The ord() and chr() functions do not interpret their inputs as ASCII or any other encoding, but work with single bytes. This should be made clearer.

-- Provided by anonymous 73458 (rowan.collins@gmail.com) git-svn-id: https://svn.php.net/repository/phpdoc/en/trunk@345229 c90b9560-bf6c-de11-be94-00142212c4b1
2025-03-15 16:38:54 +00:00 · 2018-06-28 10:36:19 +00:00 · 2018-06-28 10:36:19 +00:00 · 79ebd89049
commit 79ebd89049
parent e92fd0468a
2 changed files with 64 additions and 14 deletions
--- a/reference/strings/functions/chr.xml
+++ b/reference/strings/functions/chr.xml
@ -3,18 +3,21 @@
 <refentry xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://docbook.org/ns/docbook" xml:id="function.chr">
 <refnamediv>
  <refname>chr</refname>
-  <refpurpose>Return a specific character</refpurpose>
+  <refpurpose>Generate a single-byte string from a number</refpurpose>
 </refnamediv>
 
 <refsect1 role="description">
  &reftitle.description;
  <methodsynopsis>
   <type>string</type><methodname>chr</methodname>
-   <methodparam><type>int</type><parameter>ascii</parameter></methodparam>
+   <methodparam><type>int</type><parameter>bytevalue</parameter></methodparam>
  </methodsynopsis>
  <para>
   Returns a one-character string containing the character specified
-   by <parameter>ascii</parameter>.
+   by interpreting <parameter>bytevalue</parameter> as an unsigned integer.
+  </para>
+  <para>
+   This can be used to create a one-character string in a single-byte encoding such as ASCII, ISO-8859, or Windows 1252, by passing the position of a desired character in the encoding's mapping table. However, note that this function is not aware of any string encoding, and in particular cannot be passed a Unicode code point value to generate a string in a multibyte encoding like UTF-8 or UTF-16.
  </para>
  <para>
   This function complements <function>ord</function>. 
@ -26,20 +29,20 @@
  <para>
   <variablelist>
    <varlistentry>
-     <term><parameter>ascii</parameter></term>
+     <term><parameter>bytevalue</parameter></term>
     <listitem>
      <para>
-       The extended ASCII code.
+       An integer between 0 and 255.
      </para>
      <para>
       Values outside the valid range (0..255) will be bitwise and'ed with 255,
       which is equivalent to the following algorithm:
       <programlisting role="php">
 <![CDATA[
-while ($ascii < 0) {
-    $ascii += 256;
+while ($bytevalue < 0) {
+    $bytevalue += 256;
 }
-$ascii %= 256;
+$bytevalue %= 256;
 ]]>
       </programlisting>
      </para>
@ -52,7 +55,7 @@ $ascii %= 256;
 <refsect1 role="returnvalues">
  &reftitle.returnvalues;
  <para>
-   Returns the specified character.
+   A single-character string containing the specified byte.
  </para>
 </refsect1>
 <refsect1 role="examples">
@ -63,6 +66,8 @@ $ascii %= 256;
    <programlisting role="php">
 <![CDATA[
 <?php
+// Assumes the string will be used as ASCII or an ASCII-compatible encoding
+
 $str = "The string ends in escape: ";
 $str .= chr(27); /* add an escape character at the end of $str */

@ -90,6 +95,24 @@ aA
    </screen>
   </example>
  </para>
+  <para> 
+   <example>
+    <title>Building a UTF-8 string from individual bytes</title>
+    <programlisting role="php">
+<![CDATA[
+<?php
+declare(encoding='UTF-8');
+$str = chr(240) . chr(159) . chr(144) . chr(152);
+echo $str;
+?>
+]]>
+    </programlisting>
+    &example.outputs;
+    <screen>
+🐘
+    </screen>
+   </example>
+  </para>
 </refsect1>

 <refsect1 role="seealso">
--- a/reference/strings/functions/ord.xml
+++ b/reference/strings/functions/ord.xml
@ -3,7 +3,7 @@
 <refentry xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://docbook.org/ns/docbook" xml:id="function.ord">
 <refnamediv>
  <refname>ord</refname>
-  <refpurpose>Return ASCII value of character</refpurpose>
+  <refpurpose>Convert the first byte of a string to a value between 0 and 255</refpurpose>
 </refnamediv>
 
 <refsect1 role="description">
@ -13,8 +13,11 @@
   <methodparam><type>string</type><parameter>string</parameter></methodparam>
  </methodsynopsis>
  <para>
-   Returns the ASCII value of the first character of
-   <parameter>string</parameter>.
+   Interprets the binary value of the first byte of
+   <parameter>string</parameter> as an unsigned integer between 0 and 255.
+  </para>
+  <para>
+   If the string is in a single-byte encoding, such as ASCII, ISO-8859, or Windows 1252, this is equivalent to returning the position of a character in the character set's mapping table. However, note that this function is not aware of any string encoding, and in particular will never identify a Unicode code point in a multi-byte encoding such as UTF-8 or UTF-16.
  </para>
  <para>
   This function complements <function>chr</function>.
@ -40,7 +43,7 @@
 <refsect1 role="returnvalues">
  &reftitle.returnvalues;
  <para>
-   Returns the ASCII value as an integer.
+   An integer between 0 and 255.
  </para>
 </refsect1>

@ -59,7 +62,31 @@ if (ord($str) == 10) {
 ?>
 ]]>
    </programlisting>
-   </example>   
+   </example>
+ </para>
+ <para> 
+   <example>
+    <title>Examining the individual bytes of a UTF-8 string</title>
+    <programlisting role="php">
+<![CDATA[
+<?php
+declare(encoding='UTF-8');
+$str = "🐘";
+for ( $pos=0; $pos < strlen($str); $pos ++ ) {
+ $byte = substr($str, $pos);
+ echo 'Byte ' . $pos . ' of $str has value ' . ord($byte) . PHP_EOL;
+}
+?>
+]]>
+    </programlisting>
+    &example.outputs;
+    <screen>
+Byte 0 of $str has value 240
+Byte 1 of $str has value 159
+Byte 2 of $str has value 144
+Byte 3 of $str has value 152
+    </screen>
+   </example>
  </para>
 </refsect1>