- Various updates.

git-svn-id: https://svn.php.net/repository/phpdoc/en/trunk@153565 c90b9560-bf6c-de11-be94-00142212c4b1
2025-03-16 00:48:54 +00:00 · 2004-03-13 11:45:37 +00:00 · 2004-03-13 11:45:37 +00:00 · 4126d53557
commit 4126d53557
parent 027a265be6
3 changed files with 376 additions and 307 deletions
--- a/reference/mbstring/configure.xml
+++ b/reference/mbstring/configure.xml
@ -1,12 +1,12 @@
 <?xml version="1.0" encoding="iso-8859-1"?>
-<!-- $Revision: 1.3 $ -->
+<!-- $Revision: 1.4 $ -->
 <section id="mbstring.installation">
  &reftitle.install;
  <para>
-   <literal>mbstring</literal> is an extended module. You must
-   enable the module with the <literal>configure</literal> script.
-   Refer to the <link linkend="installation">Install</link> section for
-   details.
+   <literal>mbstring</literal> is a non-default extension. This means it
+   is not enabled by default. You must explicitly enable the module with
+   the <literal>configure</literal> option. See the
+   <link linkend="installation">Install</link> section for details.
  </para>
  <simpara>
   The following configure options are related to the
@ -57,7 +57,7 @@
      <para>
       As of PHP 4.3.0, the option 
       <option role="configure">--enable-mbstr-enc-trans</option>
-       will be eliminated and replaced with 
+       was eliminated and replaced with the runtime setting
       <literal>mbstring.encoding_translation</literal>.
       HTTP input character encoding conversion is enabled
       when this is set to <literal>On</literal>
--- a/reference/mbstring/ini.xml
+++ b/reference/mbstring/ini.xml
@ -1,70 +1,70 @@
 <?xml version="1.0" encoding="iso-8859-1"?>
-<!-- $Revision: 1.9 $ -->
+<!-- $Revision: 1.10 $ -->
 <section id="mbstring.configuration">
 &reftitle.runtime;
 &extension.runtime;
 <para>
- <table>
-  <title>Multi-Byte String configuration options</title>
-  <tgroup cols="3">
-   <thead>
-    <row>
-     <entry>Name</entry>
-     <entry>Default</entry>
-     <entry>Changeable</entry>
-    </row>
-   </thead>
-   <tbody>
-    <row>
-     <entry>mbstring.language</entry>
-     <entry>"neutral"</entry>
-     <entry>PHP_INI_SYSTEM | PHP_INI_PERDIR</entry>
-    </row>
-    <row>
-     <entry>mbstring.detect_order</entry>
-     <entry>NULL</entry>
-     <entry>PHP_INI_ALL</entry>
-    </row>
-    <row>
-     <entry>mbstring.http_input</entry>
-     <entry>"pass"</entry>
-     <entry>PHP_INI_ALL</entry>
-    </row>
-    <row>
-     <entry>mbstring.http_output</entry>
-     <entry>"pass"</entry>
-     <entry>PHP_INI_ALL</entry>
-    </row>
-    <row>
-     <entry>mbstring.internal_encoding</entry>
-     <entry>NULL</entry>
-     <entry>PHP_INI_ALL</entry>
-    </row>
-    <row>
-     <entry>mbstring.script_encoding</entry>
-     <entry>NULL</entry>
-     <entry>PHP_INI_ALL</entry>
-    </row>
-    <row>
-     <entry>mbstring.substitute_character</entry>
-     <entry>NULL</entry>
-     <entry>PHP_INI_ALL</entry>
-    </row>
-    <row>
-     <entry>mbstring.func_overload</entry>
-     <entry>"0"</entry>
-     <entry>PHP_INI_SYSTEM | PHP_INI_PERDIR</entry>
-    </row>
-    <row>
-     <entry>mbstring.encoding_translation</entry>
-     <entry>"0"</entry>
-     <entry>PHP_INI_SYSTEM | PHP_INI_PERDIR</entry>
-    </row>
-   </tbody>
-  </tgroup>
- </table>
- For further details and definition of the PHP_INI_* constants see
- <function>ini_set</function>.
+  <table>
+   <title>mbstring configuration options</title>
+   <tgroup cols="3">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Default</entry>
+      <entry>Changeable</entry>
+     </row>
+    </thead>
+    <tbody>
+     <row>
+      <entry>mbstring.language</entry>
+      <entry>"neutral"</entry>
+      <entry>PHP_INI_SYSTEM | PHP_INI_PERDIR</entry>
+     </row>
+     <row>
+      <entry>mbstring.detect_order</entry>
+      <entry>NULL</entry>
+      <entry>PHP_INI_ALL</entry>
+     </row>
+     <row>
+      <entry>mbstring.http_input</entry>
+      <entry>"pass"</entry>
+      <entry>PHP_INI_ALL</entry>
+     </row>
+     <row>
+      <entry>mbstring.http_output</entry>
+      <entry>"pass"</entry>
+      <entry>PHP_INI_ALL</entry>
+     </row>
+     <row>
+      <entry>mbstring.internal_encoding</entry>
+      <entry>NULL</entry>
+      <entry>PHP_INI_ALL</entry>
+     </row>
+     <row>
+      <entry>mbstring.script_encoding</entry>
+      <entry>NULL</entry>
+      <entry>PHP_INI_ALL</entry>
+     </row>
+     <row>
+      <entry>mbstring.substitute_character</entry>
+      <entry>NULL</entry>
+      <entry>PHP_INI_ALL</entry>
+     </row>
+     <row>
+      <entry>mbstring.func_overload</entry>
+      <entry>"0"</entry>
+      <entry>PHP_INI_SYSTEM | PHP_INI_PERDIR</entry>
+     </row>
+     <row>
+      <entry>mbstring.encoding_translation</entry>
+      <entry>"0"</entry>
+      <entry>PHP_INI_SYSTEM | PHP_INI_PERDIR</entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+  For the definition of the PHP_INI_* constants, please refer to
+  <function>ini_set</function>.
 </para>
 
 &ini.descriptions.title;
@ -73,37 +73,36 @@
  <itemizedlist>
   <listitem id="ini.mbstring.language">
    <simpara>
-     <literal>mbstring.language</literal> defines
-     default language used in mbstring.
-     Note that this option defines 
-     <literal>mbstring.internal_encoding</literal>
-     and <literal>mbstring.internal_encoding</literal>
-     should be placed after <literal>mbstring.language</literal>
-     in &php.ini;
+     <literal>mbstring.language</literal> is the default national
+     language setting (NLS) used in mbstring. Note that this option
+     automagically defines <literal>mbstring.internal_encoding</literal> and
+     <literal>mbstring.internal_encoding</literal> should be placed
+     after <literal>mbstring.language</literal> in &php.ini;
    </simpara>
   </listitem>
   <listitem id="ini.mbstring.encoding-translation">
    <simpara>
-     <literal>mbstring.encoding_translation</literal> enables
-     HTTP input character encoding detection and translation into
+     <literal>mbstring.encoding_translation</literal> enables the
+     transparent character encoding filter for the incoming HTTP queries,
+     which performs detection and conversion of the input encoding to the
     internal character encoding. 
    </simpara>
   </listitem>
   <listitem id="ini.mbstring.internal-encoding">
    <simpara>
-     <literal>mbstring.internal_encoding</literal> defines default
+     <literal>mbstring.internal_encoding</literal> defines the default
     internal character encoding.
    </simpara>
   </listitem>
   <listitem id="ini.mbstring.http-input">
    <simpara>
-     <literal>mbstring.http_input</literal> defines default HTTP
+     <literal>mbstring.http_input</literal> defines the default HTTP
     input character encoding.
    </simpara>
   </listitem>
   <listitem id="ini.mbstring.http-output">
    <simpara>
-     <literal>mbstring.http_output</literal> defines default HTTP
+     <literal>mbstring.http_output</literal> defines the default HTTP
     output character encoding.
    </simpara>
   </listitem>
@ -122,40 +121,31 @@
   </listitem>
   <listitem id="ini.mbstring.func-overload">
    <simpara>
-     <literal>mbstring.func_overload</literal>overload(replace) single byte
-     functions by mbstring functions. <function>mail</function>,
-     <function>ereg</function>, etc. are overloaded by
-     <function>mb_send_mail</function>, <function>mb_ereg</function>, etc.
-     Possible values are 0, 1, 2, 4 or a combination of them.
-     For example, 7 for overload everything.
-      0: No overload, 1: Overload <function>mail</function> function,
-      2: Overload str*() functions, 4: Overload ereg*() functions.
+     <literal>mbstring.func_overload</literal> overloads a set of single byte
+     functions by the mbstring counterparts. See
+     <link linkend="mbstring.overload"> Funtion overloading</link> for more
+     information.
    </simpara>
   </listitem>
  </itemizedlist>
 </para>
 <para>
-  Web Browsers are supposed to use the same character encoding
-  when submitting form. However, browsers may not use the same
-  character encoding. See <function>mb_http_input</function> to
-  detect character encoding used by browsers.
+  According to the <ulink url="http://www.w3.org/TR/REC-html40/interact/forms.html#adef-accept-charset">HTML 4.01 specification</ulink>,
+  Web browsers is allowed to encode a form being submitted with a character
+  encoding different from the one used for the page.
+  See <function>mb_http_input</function> to detect character encoding
+  used by browsers.
 </para>
 <para>
-  If <literal>enctype</literal> is set to
-  <literal>multipart/form-data</literal> in HTML forms,
-  <literal>mbstring</literal> does not convert character encoding
-  in POST data. The user must convert them in the script, if
-  conversion is needed.
- </para>
- <para>
-  Although, browsers are smart enough to detect character encoding
-  in HTML. <literal>charset</literal> is better to be set in HTTP
-  header. Change <literal>default_charset</literal> according to
-  character encoding.
+  Although browsers are enough to detect the character encoding
+  of a given HTML document by using heuristics, it would be better to set the
+  <literal>charset</literal> parameter in the <literal>Content-Type</literal>
+  HTTP header to the appropriate value by <function>header</function> or
+  <link linkend="ini.sect.data-handling">default_charset</link> ini setting.
 </para>
 <para>
  <example>
-  <title>&php.ini; setting example</title>
+  <title>&php.ini; setting examples</title>
   <programlisting>
 <![CDATA[
 ; Set default language
--- a/reference/mbstring/reference.xml
+++ b/reference/mbstring/reference.xml
@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="iso-8859-1"?>
-<!-- $Revision: 1.16 $ -->
+<!-- $Revision: 1.17 $ -->
 <reference id="ref.mbstring">
  <title>Multi-Byte String Functions</title> 
  <titleabbrev>Multi-Byte String</titleabbrev>
@ -8,94 +8,123 @@
   <section id="mbstring.intro">
    &reftitle.intro;
    <para>
-     There are many languages in which all characters can be expressed
-     by single byte. Multi-byte character codes are used to express
-     many characters for many languages. <literal>mbstring</literal>
-     is developed to handle Japanese characters. However, many
-     <literal>mbstring</literal> functions are able to handle
-     character encoding other than Japanese.
+     While there are many languages in which every necessary character can
+     be represented by a one-to-one mapping to a 8-bit value, there are also
+     several languages which require so many characters for written
+     communication that cannot be contained within the range a mere byte can
+     code. Multibyte character encoding schemes were developed to express
+     that many (more than 256) characters in the regular bytewise coding
+     system.
    </para>
    <para>
-     A multi-byte character encoding represents single character with
-     consecutive bytes. Some character encoding has shift(escape)
-     sequences to start/end multi-byte character strings. Therefore, a
-     multi-byte character string may be destroyed when it is divided
-     and/or counted unless multi-byte character encoding safe method
-     is used. This module provides multi-byte character safe string
-     functions and other utility functions such as conversion
-     functions.
+     When you manipulate (trim, split, splice, etc.) strings encoded in a
+     multibyte encoding, you need to use special functions since two or more
+     consecutive bytes may represent a single character in such encoding
+     schemes. Otherwise, if you apply a non-multibyte-aware string function
+     to the string, it probably fails to detect the beginning or ending of
+     the multibyte character and ends up with a corrupted garbage string that
+     most likely loses its original meaning.
    </para>
    <para>
-     Since PHP is basically designed for ISO-8859-1, some multi-byte
-     character encoding does not work well with PHP. Therefore, it is
-     important to set 
-     <literal>mbstring.language</literal> to appropriate language 
-     (i.e. "Japanese" for Japanese) and
-     <literal>mbstring.internal_encoding</literal> to a character
-     encoding that works with PHP.
+     <literal>mbstring</literal> provides these multibyte specific
+     string functions that help you deal with multibyte encodings in PHP,
+     which is basically supposed to be used with single byte encodings.
+     In addition to that, <literal>mbstring</literal> handles character
+     encoding conversion between the possible encoding pairs.
    </para>
    <para>
-     PHP 4 Character Encoding Requirements 
+     <literal>mbstring</literal> is also designed to handle Unicode-based
+     encodings such as UTF-8 and UCS-2 and many single-byte encodings
+     for convenience (listed below), whereas <literal>mbstring</literal> was
+     originally developed for use in Japanese web pages.
    </para>
-    <para>
-     <itemizedlist>
-      <listitem>
-       <simpara>
-        Per byte encoding
-       </simpara>
-      </listitem>
-      <listitem>
-       <simpara>
-        Single byte characters in range of <literal>00h-7fh</literal>
-        which is compatible with <literal>ASCII</literal>
-       </simpara>
-      </listitem>
-      <listitem>
-       <simpara>
-        Multi-byte characters without <literal>00h-7fh</literal>
-       </simpara>
-      </listitem>
-     </itemizedlist>
-    </para>
-    <para>
-     These are examples of internal character encoding that works with
-     PHP and does NOT work with PHP.
-     <informalexample>
-      <programlisting>
-<![CDATA[
-Character encodings work with PHP: 
-ISO-8859-*, EUC-JP, UTF-8

-Character encodings do NOT work with PHP:
-JIS, SJIS
+    <section id="mbstring.php4.req">
+     <title>PHP Character Encoding Requirements</title>
+     <para>
+      Encodings of the following types are safely used with PHP.
+      <itemizedlist>
+       <listitem>
+        <para>
+         A singlebyte encoding,
+         <itemizedlist>
+          <listitem>
+           <simpara>
+            which has ASCII-compatible (ISO646 compatible) mappings for the
+            characters in range of <literal>00h</literal> to
+            <literal>7fh</literal>.
+           </simpara>
+          </listitem>
+         </itemizedlist>
+        </para>
+       </listitem>
+       <listitem>
+        <para>
+         A multibyte encoding,
+         <itemizedlist>
+          <listitem>
+           <simpara>
+            which has ASCII-compatible mappings for the characters in range of
+            <literal>00h</literal> to <literal>7fh</literal>.
+           </simpara>
+          </listitem>
+          <listitem>
+           <simpara>
+            which don't use ISO2022 escape sequences.
+           </simpara>
+          </listitem>
+          <listitem>
+           <simpara>
+            which don't use a value from <literal>00h</literal> to
+            <literal>7fh</literal> in any of the compounded bytes
+            that represents a single character.
+           </simpara>
+          </listitem>
+         </itemizedlist>  
+        </para>
+       </listitem>
+      </itemizedlist>
+     </para>
+     <para>
+      These are examples of character encodings that are unlikely to work
+      with PHP.
+      <informalexample>
+       <programlisting>
+<![CDATA[
+JIS, SJIS, ISO-2022-JP, BIG-5
 ]]>
-      </programlisting>
-     </informalexample>
-    </para>
-    <para>
-     Character encoding, that does not work with PHP, may be converted
-     with <literal>mbstring</literal>'s HTTP input/output conversion
-     feature/function.
-    </para>
-    <note>
-     <para>
-      SJIS should not be used for internal encoding unless the reader
-      is familiar with parser/compiler, character encoding and
-      character encoding issues.
+       </programlisting>
+      </informalexample>
     </para>
-    </note>
-    <note>
     <para>
-      If you use databases with PHP, it is recommended that you use the
-      same character encoding for both database and <literal>internal
-      encoding</literal> for ease of use and better performance.
+      Although PHP scripts written in any of those encodings might not work,
+      especially in the case where encoded strings appear as identifiers
+      or literals in the script, you can almost avoid using these encodings
+      by setting up the <literal>mbstring</literal>'s transparent encoding
+      filter function for incoming HTTP queries.
+     </para>
+     <note>
+      <para>
+       It's highly discouraged to use SJIS, BIG5, CP936, CP949 and GB18030 for
+       the internal encoding unless you are familiar with the parser, the
+       scanner and the character encoding.
      </para>
-     <para>
-      If you are using PostgreSQL, it supports character
-      encoding that is different from backend character encoding. See
-      the PostgreSQL manual for details.
-     </para>
-    </note>
+     </note>
+
+     <note>
+      <para>
+       If you have some database connected with PHP, it is recommended that
+       you use the same character encoding for both database and the
+       <literal>internal encoding</literal> for ease of use and better
+       performance.
+      </para>
+      <para>
+       If you are using PostgreSQL, the character encoding used in the
+       database and the one used in the PHP may differ as it supports
+       automatic character set conversion between the backend and the frontend.
+      </para>
+     </note>
+    </section>
   </section>

   &reference.mbstring.configure;
@ -119,25 +148,21 @@ JIS, SJIS
     </para>
     <note>
      <para>
-       For PHP 4.3.2 or earlier, 
-       if <literal>enctype</literal> for HTML form is set to
-       <literal>multipart/form-data</literal>,
-       <literal>mbstring</literal> does not convert character encoding
-        in POST data. If it is the case, strings are needed to be
-       converted to internal character encoding.
+       In PHP 4.3.2 or earlier versions, <literal>mbstring</literal>
+       there is a limitation in this functionality that
+       <literal>mbstring</literal> does not perform character encoding
+       conversion in POST data if the <literal>enctype</literal> attribute in
+       the <literal>form</literal> element is set to
+       <literal>multipart/form-data</literal>. So you have to convert
+       the incoming data by yourself in this case if necessary.
      </para>
-     </note>
-     <note>
      <para>
-       Since PHP 4.3.3,
-       if <literal>enctype</literal> for HTML form is set to
-       <literal>multipart/form-data</literal>, and, 
-       <literal>mbstring.encoding_translation</literal> is set to 
-       On in &php.ini;
-       POST variables and uploaded filename will be converted to
-       internal character encoding.
-       But, characters specified in 'name' of HTML form will not be
-       converted.
+       Beginning with PHP 4.3.3, if <literal>enctype</literal> for HTML form is
+       set to <literal>multipart/form-data</literal> and
+       <literal>mbstring.encoding_translation</literal> is set to On
+       in &php.ini; the POST'ed variables and the names of uploaded files
+       will be converted to the internal character encoding as well.
+       However, the conversion isn't applied to the query keys.
      </para>
     </note>
     <para>
@ -166,9 +191,8 @@ mbstring.encoding_translation = Off
        </para>
        <para>
         When using PHP as an Apache module, it is possible to
-         override PHP ini setting per Virtual Host in
-         &httpd.conf; or per directory with
-         &htaccess;. Refer to the <link
+         override those settings in each Virtual Host directive in
+         &httpd.conf; or per directory with &htaccess;. Refer to the <link
          linkend="configuration">Configuration</link> section and
         Apache Manual for details.
        </para>
@ -186,7 +210,7 @@ mbstring.encoding_translation = Off
        </para>
        <note>
         <para>
-          For PHP3-i18n users, <literal>mbstring</literal>'s output
+          PHP3-i18n users should note that <literal>mbstring</literal>'s output
          conversion differs from PHP3-i18n. Character encoding is
          converted using output buffer.
         </para>
@ -236,51 +260,101 @@ ob_start('mb_output_handler');
   <section id="mbstring.encodings">
     <title>Supported Character Encodings</title>
     <simpara>
-      Currently, the following character encoding is supported by the
-      <literal>mbstring</literal> module. Character encoding may
-      be specified for <literal>mbstring</literal> functions'
-      <literal>encoding</literal> parameter.
+      Currently the following character encodings are supported by the
+      <literal>mbstring</literal> module. Any of those Character encodings
+      can be specified in the <literal>encoding</literal> parameter of
+      <literal>mbstring</literal> functions.
     </simpara>
     <para>
      The following character encoding is supported in this PHP
      extension: 
     </para>
-     <para>
-      <literal>UCS-4</literal>, <literal>UCS-4BE</literal>,
-      <literal>UCS-4LE</literal>, <literal>UCS-2</literal>,
-      <literal>UCS-2BE</literal>, <literal>UCS-2LE</literal>,
-      <literal>UTF-32</literal>, <literal>UTF-32BE</literal>,
-      <literal>UTF-32LE</literal>, <literal>UCS-2LE</literal>,
-      <literal>UTF-16</literal>, <literal>UTF-16BE</literal>,
-      <literal>UTF-16LE</literal>, <literal>UTF-8</literal>,
-      <literal>UTF-7</literal>, <literal>ASCII</literal>,
-      <literal>EUC-JP</literal>, <literal>SJIS</literal>,
-      <literal>eucJP-win</literal>, <literal>SJIS-win</literal>,
-      <literal>ISO-2022-JP</literal>, <literal>JIS</literal>,
-      <literal>ISO-8859-1</literal>, <literal>ISO-8859-2</literal>,
-      <literal>ISO-8859-3</literal>, <literal>ISO-8859-4</literal>,
-      <literal>ISO-8859-5</literal>, <literal>ISO-8859-6</literal>,
-      <literal>ISO-8859-7</literal>, <literal>ISO-8859-8</literal>,
-      <literal>ISO-8859-9</literal>, <literal>ISO-8859-10</literal>,
-      <literal>ISO-8859-13</literal>, <literal>ISO-8859-14</literal>,
-      <literal>ISO-8859-15</literal>, <literal>byte2be</literal>,
-      <literal>byte2le</literal>, <literal>byte4be</literal>,
-      <literal>byte4le</literal>, <literal>BASE64</literal>,
-      <literal>7bit</literal>, <literal>8bit</literal> and
-      <literal>UTF7-IMAP</literal>.
-     </para>
-     <para>
-      As of PHP 4.3.0, the following character encoding support will be added
-      experimentally :
-      <literal>EUC-CN</literal>, <literal>CP936</literal>, <literal>HZ</literal>, 
-      <literal>EUC-TW</literal>, <literal>CP950</literal>, <literal>BIG-5</literal>, 
-      <literal>EUC-KR</literal>, <literal>UHC</literal> (<literal>CP949</literal>), 
-      <literal>ISO-2022-KR</literal>,
-      <literal>Windows-1251</literal> (<literal>CP1251</literal>),
-      <literal>Windows-1252</literal> (<literal>CP1252</literal>),
-      <literal>CP866</literal>, 
-      <literal>KOI8-R</literal>.
-     </para>
+     <itemizedlist>
+      <listitem><simpara>UCS-4</simpara></listitem>
+      <listitem><simpara>UCS-4BE</simpara></listitem>
+     
+      <listitem><simpara>UCS-4LE</simpara></listitem>
+      <listitem><simpara>UCS-2</simpara></listitem>
+     
+      <listitem><simpara>UCS-2BE</simpara></listitem>
+      <listitem><simpara>UCS-2LE</simpara></listitem>
+     
+      <listitem><simpara>UTF-32</simpara></listitem>
+      <listitem><simpara>UTF-32BE</simpara></listitem>
+     
+      <listitem><simpara>UTF-32LE</simpara></listitem>
+      <listitem><simpara>UCS-2LE</simpara></listitem>
+     
+      <listitem><simpara>UTF-16</simpara></listitem>
+      <listitem><simpara>UTF-16BE</simpara></listitem>
+     
+      <listitem><simpara>UTF-16LE</simpara></listitem>
+      <listitem><simpara>UTF-8</simpara></listitem>
+     
+      <listitem><simpara>UTF-7</simpara></listitem>
+      <listitem><simpara>ASCII</simpara></listitem>
+     
+      <listitem><simpara>EUC-JP</simpara></listitem>
+      <listitem><simpara>SJIS</simpara></listitem>
+     
+      <listitem><simpara>eucJP-win</simpara></listitem>
+      <listitem><simpara>SJIS-win</simpara></listitem>
+     
+      <listitem><simpara>ISO-2022-JP</simpara></listitem>
+      <listitem><simpara>JIS</simpara></listitem>
+     
+      <listitem><simpara>ISO-8859-1</simpara></listitem>
+      <listitem><simpara>ISO-8859-2</simpara></listitem>
+     
+      <listitem><simpara>ISO-8859-3</simpara></listitem>
+      <listitem><simpara>ISO-8859-4</simpara></listitem>
+     
+      <listitem><simpara>ISO-8859-5</simpara></listitem>
+      <listitem><simpara>ISO-8859-6</simpara></listitem>
+     
+      <listitem><simpara>ISO-8859-7</simpara></listitem>
+      <listitem><simpara>ISO-8859-8</simpara></listitem>
+     
+      <listitem><simpara>ISO-8859-9</simpara></listitem>
+      <listitem><simpara>ISO-8859-10</simpara></listitem>
+     
+      <listitem><simpara>ISO-8859-13</simpara></listitem>
+      <listitem><simpara>ISO-8859-14</simpara></listitem>
+     
+      <listitem><simpara>ISO-8859-15</simpara></listitem>
+      <listitem><simpara>byte2be</simpara></listitem>
+     
+      <listitem><simpara>byte2le</simpara></listitem>
+      <listitem><simpara>byte4be</simpara></listitem>
+     
+      <listitem><simpara>byte4le</simpara></listitem>
+      <listitem><simpara>BASE64</simpara></listitem>
+     
+      <listitem><simpara>7bit</simpara></listitem>
+      <listitem><simpara>8bit</simpara></listitem>
+      <listitem><simpara>UTF7-IMAP</simpara></listitem>
+      <listitem><simpara>EUC-CN</simpara></listitem>
+      <listitem><simpara>CP936</simpara></listitem>
+      <listitem><simpara>HZ</simpara></listitem>
+      
+      <listitem><simpara>EUC-TW</simpara></listitem>
+      <listitem><simpara>CP950</simpara></listitem>
+      <listitem><simpara>BIG-5</simpara></listitem>
+      
+      <listitem><simpara>EUC-KR</simpara></listitem>
+      <listitem><simpara>UHC (CP949)</simpara></listitem>
+      
+      <listitem><simpara>ISO-2022-KR</simpara></listitem>
+     
+      <listitem><simpara>Windows-1251 (CP1251)</simpara></listitem>
+     
+      <listitem><simpara>Windows-1252 (CP1252)</simpara></listitem>
+     
+      <listitem><simpara>CP866</simpara></listitem>
+      
+      <listitem><simpara>KOI8-R</simpara></listitem>
+
+     </itemizedlist>
     <para>
      &php.ini; entry, which accepts encoding name,
      accepts &quot;<literal>auto</literal>&quot; and
@ -294,56 +368,48 @@ ob_start('mb_output_handler');
     </para>
     <para>
      If &quot;<literal>auto</literal>&quot; is set, it is expanded to
+      the list of encodings defined per the <link linkend="mbstring.configuration">NLS</link>.
+      For instance, if the NLS is set to <literal>Japanese</literal>,
+      the value is assumed to be
      &quot;<literal>ASCII,JIS,UTF-8,EUC-JP,SJIS</literal>&quot;.
     </para>
     <para>
      See also <function>mb_detect_order</function>
     </para>
-     <note>
-      <para>
-       &quot;Supported character encoding&quot; does not mean that it
-       works as internal character code.
-      </para>
-     </note>
   </section>
    
   <section id="mbstring.overload">
     <title>
-      Overloading PHP string functions with multi byte string functions
+      Function Overloading Feature
     </title>
     <para>
-      Because almost PHP application written for language using
-      single-byte character encoding, there are some difficulties for
-      multibyte string handling including Japanese. Most PHP string
-      functions such as <function>substr</function> do not support
-      multibyte strings.
+      You might often find it difficult to get an existing PHP application
+      work in a given multibyte environment. That's mostly because lots of
+      PHP applications out there are written with the standard
+      string functions such as <function>substr</function>, which are
+      known to not properly handle multibyte-encoded strings.
     </para>
     <para>
-      Multibyte extension (mbstring) has some PHP string functions
-      with multibyte support (ex. <function>substr</function> supports
-      <function>mb_substr</function>).
+      mbstring supports 'function overloading' feature which enables
+      you to add multibyte awareness to such an application without
+      code modification by overloading multibyte counterparts on
+      the standard string functions. For example,
+      <function>mb_substr</function> is called instead of
+      <function>substr</function> if function overloading is enabled.
+      This feature makes it easy to port applications that only support
+      single-byte encodings to a multibyte environment in many cases.
     </para>
     <para>
-      Multibyte extension (mbstring) also supports 'function
-      overloading' to add multibyte string functionality without
-      code modification. Using function overloading, some PHP string
-      functions will be overloaded multibyte string functions.
-      For example, <function>mb_substr</function> is called
-      instead of <function>substr</function> if function overloading
-      is enabled. Function overload makes easy to port application
-      supporting only single-byte encoding for multibyte application.
-     </para>
-     <para>
-      <literal>mbstring.func_overload</literal> in &php.ini; should be
-      set some positive value to use function overloading.
-      The value should specify the category of overloading functions,
-      should be set 1 to enable mail function overloading. 2 to enable
-      string functions, 4 to regular expression functions. For
-      example, if is set for 7, mail, strings, regex functions should
-      be overloaded. The list of overloaded functions are shown in
-      below.
+      To use the function overloading, set
+      <literal>mbstring.func_overload</literal> in &php.ini; to a
+      positive value that represents a combination of bitmasks specifying
+      the categories of functions to be overloaded. It should be set
+      to 1 to overload the <function>mail</function> function. 2 for string
+      functions, 4 for regular expression functions. For example,
+      if is set for 7, mail, strings and regular expression functions should
+      be overloaded. The list of overloaded functions are shown below.
      <table>
-      <title>Functions to be overloaded</title>
+       <title>Functions to be overloaded</title>
       <tgroup cols="3">
        <thead>
         <row>
@ -417,7 +483,7 @@ ob_start('mb_output_handler');
          <entry>4</entry>
          <entry><function>split</function></entry>
          <entry><function>mb_split</function></entry>
-        	</row>
+         </row>
        </tbody>
       </tgroup>
      </table>
@ -425,46 +491,58 @@ ob_start('mb_output_handler');
   </section>

   <section id="mbstring.ja-basic">
-     <title>Basics of Japanese multi-byte characters</title>
+     <title>Basics of Japanese multi-byte encodings</title>
     <para>
-      Most Japanese characters need more than 1 byte per character. In
-      addition, several character encoding schemes are used under a
-      Japanese environment. There are EUC-JP, Shift_JIS(SJIS) and
-      ISO-2022-JP(JIS) character encoding. As Unicode becomes popular,
-      UTF-8 is used also. To develop Web applications for a Japanese
-      environment, it is important to use the character set for the
-      task in hand, whether HTTP input/output, RDBMS and E-mail.
+      It is often said quite hard to figure out how Japanese texts are
+      handled in the computer. This is not only because Japanese characters
+      can only be represented by multibyte encodings, but because different
+      encoding standards are adopted for different purposes / platforms.
+      Moreover, not a few character set standards are used there, which
+      are slightly different from one another. Those facts have often led
+      developers to inevitable mess-up.
+     </para>
+     <para> 
+      To create a working web application that would be put in the Japanese
+      environment, it is important to use the proper character encoding and
+      character set for the task in hand.
     </para>
     <para>
      <itemizedlist>
       <listitem>
-        <simpara>Storage for a character can be up to six
-         bytes</simpara>
+        <simpara>Storage for a character can be up to six bytes</simpara>
       </listitem>
       <listitem>
        <simpara>
-         A multi-byte character is usually twice of the width compared
-         to single-byte characters. Wider characters are called
-         "zen-kaku" - meaning full width, narrower characters are
-         called "han-kaku" - meaning half width. "zen-kaku" characters
-         are usually fixed width.
+         Most of multibyte characters often appear twice as wide as 
+         a single-byte character on display. Those characters are called
+         "zen-kaku" in Japanese which means "full width", and the other
+         (narrower) characters are called "han-kaku" - means half width.
+         However the graphical properties of the characters depend on
+         the glyphs of the type faces used to display them or print them out.
        </simpara>
       </listitem>
       <listitem>
        <simpara>
-         Some character encoding defines shift(escape) sequence for
-         entering/exiting multi-byte character strings.
+         Some character encodings use shift(escape) sequences defined
+         in ISO2022 to switch the code map of the specific code area
+         (<literal>00h</literal> to <literal>7fh</literal>).
        </simpara>
       </listitem>
       <listitem>
        <simpara>
-          ISO-2022-JP must be used for SMTP/NNTP.
+         ISO-2022-JP should be used in SMTP/NNTP, and headers and entities
+         should be reencoded as per RFC requirements. Although those are not
+         requisites, it's still a good idea because several popular user
+         agents cannot recognize any other encoding methods.
        </simpara>
       </listitem>
       <listitem>
-        <para>
-         &quot;i-mode&quot; web site is supposed to use SJIS.
-        </para>
+        <simpara>
+         Webpages created for mobile phone services such as
+         <ulink url="http://www.eurotechnology.com/imode/faq.html">i-mode</ulink>,
+         <ulink url="http://www.vodafone.jp/english/live/">Vodafone live!</ulink>, or <ulink url="http://www.au.kddi.com/english/ezweb/">ezweb</ulink>
+         are supposed to use Shift_JIS.
+        </simpara>
       </listitem>
      </itemizedlist>
     </para>
@ -473,14 +551,14 @@ ob_start('mb_output_handler');
   <section id="mbstring.ref">
     <title>References</title>
     <para>
-      Multi-byte character encoding and its related issues are very
-      complex. It is impossible to cover in sufficient detail
-      here. Please refer to the following URLs and other resources for
+      Multibyte character encoding schemes and the related issues are very
+      complicated. There should be too few space to cover in sufficient details.
+      Please refer to the following URLs and other resources for
      further readings.
      <itemizedlist>
       <listitem>
        <para>
-         Unicode/UTF/UCS/etc
+         Unicode materials
        </para>
        <para>
         <ulink url="&url.unicode;">&url.unicode;</ulink>
@ -488,13 +566,14 @@ ob_start('mb_output_handler');
       </listitem>
       <listitem>
        <para>
-         Japanese/Korean/Chinese character
-         information
+         Japanese/Korean/Chinese character information
        </para>
        <para>
-         <literal>
-         ftp://ftp.ora.com/pub/examples/nutshell/ujip/doc/cjk.inf
-         </literal>
+         <ulink url="ftp://ftp.ora.com/pub/examples/nutshell/ujip/doc/cjk.inf">
+          <literal>
+           ftp://ftp.ora.com/pub/examples/nutshell/ujip/doc/cjk.inf
+          </literal>
+         </ulink>
        </para>
       </listitem>
      </itemizedlist>