mirror of
https://github.com/sigmasternchen/php-doc-en
synced 2025-03-19 02:18:56 +00:00

git-svn-id: https://svn.php.net/repository/phpdoc/en/trunk@43188 c90b9560-bf6c-de11-be94-00142212c4b1
463 lines
15 KiB
XML
463 lines
15 KiB
XML
<reference id="ref.regex">
|
|
<title>Regular Expression Functions (POSIX Extended)</title>
|
|
<titleabbrev>Regexps</titleabbrev>
|
|
|
|
<partintro>
|
|
<para>
|
|
Regular expressions are used for complex string manipulation in
|
|
PHP. The functions that support regular expressions are:
|
|
<itemizedlist>
|
|
<listitem>
|
|
<simpara><function>ereg</function></simpara>
|
|
</listitem>
|
|
<listitem>
|
|
<simpara><function>ereg_replace</function></simpara>
|
|
</listitem>
|
|
<listitem>
|
|
<simpara><function>eregi</function></simpara>
|
|
</listitem>
|
|
<listitem>
|
|
<simpara><function>eregi_replace</function></simpara>
|
|
</listitem>
|
|
<listitem>
|
|
<simpara><function>split</function></simpara>
|
|
</listitem>
|
|
<listitem>
|
|
<simpara><function>spliti</function></simpara>
|
|
</listitem>
|
|
</itemizedlist>
|
|
These functions all take a regular expression string as their
|
|
first argument. PHP uses the POSIX extended regular expressions
|
|
as defined by POSIX 1003.2. For a full description of POSIX
|
|
regular expressions see the regex man pages included in the regex
|
|
directory in the PHP distribution. It's in manpage format, so
|
|
you'll want to do something along the lines of <command>man
|
|
/usr/local/src/regex/regex.7</command> in order to read it.
|
|
|
|
<!-- Should add discussion of PCRE functions here. -->
|
|
|
|
</para>
|
|
<para>
|
|
<example>
|
|
<title>Regular Expression Examples</title>
|
|
<programlisting role="php">
|
|
ereg ("abc", $string);
|
|
/* Returns true if "abc"
|
|
is found anywhere in $string. */
|
|
|
|
ereg ("^abc", $string);
|
|
/* Returns true if "abc"
|
|
is found at the beginning of $string. */
|
|
|
|
ereg ("abc$", $string);
|
|
/* Returns true if "abc"
|
|
is found at the end of $string. */
|
|
|
|
eregi ("(ozilla.[23]|MSIE.3)", $HTTP_USER_AGENT);
|
|
/* Returns true if client browser
|
|
is Netscape 2, 3 or MSIE 3. */
|
|
|
|
ereg ("([[:alnum:]]+) ([[:alnum:]]+) ([[:alnum:]]+)", $string,$regs);
|
|
/* Places three space separated words
|
|
into $regs[1], $regs[2] and $regs[3]. */
|
|
|
|
$string = ereg_replace ("^", "<BR>", $string);
|
|
/* Put a <BR> tag at the beginning of $string. */
|
|
|
|
$string = ereg_replace ("$", "<BR>", $string);
|
|
/* Put a <BR> tag at the end of $string. */
|
|
|
|
$string = ereg_replace ("\n", "", $string);
|
|
/* Get rid of any newline
|
|
characters in $string. */
|
|
</programlisting>
|
|
</example>
|
|
</para>
|
|
</partintro>
|
|
|
|
<refentry id="function.ereg">
|
|
<refnamediv>
|
|
<refname>ereg</refname>
|
|
<refpurpose>Regular expression match</refpurpose>
|
|
</refnamediv>
|
|
<refsect1>
|
|
<title>Description</title>
|
|
<funcsynopsis>
|
|
<funcprototype>
|
|
<funcdef>int <function>ereg</function></funcdef>
|
|
<paramdef>string <parameter>pattern</parameter></paramdef>
|
|
<paramdef>string <parameter>string</parameter></paramdef>
|
|
<paramdef>array
|
|
<parameter><optional>regs</optional></parameter>
|
|
</paramdef>
|
|
</funcprototype>
|
|
</funcsynopsis>
|
|
<simpara>
|
|
Searches a <parameter>string</parameter> for matches to the regular
|
|
expression given in <parameter>pattern</parameter>.
|
|
</simpara>
|
|
<simpara>
|
|
If matches are found for parenthesized substrings of
|
|
<parameter>pattern</parameter> and the function is called with
|
|
the third argument <parameter>regs</parameter>, the matches will
|
|
be stored in the elements of the array
|
|
<parameter>regs</parameter>. $regs[1] will contain the substring
|
|
which starts at the first left parenthesis; $regs[2] will contain
|
|
the substring starting at the second, and so on. $regs[0] will
|
|
contain a copy of <parameter>string</parameter>.
|
|
</simpara>
|
|
<simpara>
|
|
If <function>ereg</function> finds any matches at all, $regs will
|
|
be filled with exactly ten elements, even though more or fewer
|
|
than ten parenthesized substrings may actually have matched.
|
|
This has no effect on <function>ereg</function>'s ability to
|
|
match more substrings. If no matches are found, $regs will not be
|
|
altered by <function>ereg</function>.
|
|
</simpara>
|
|
<simpara>
|
|
Searching is case sensitive.
|
|
</simpara>
|
|
<simpara>
|
|
Returns true if a match for <parameter>pattern</parameter> was
|
|
found in <parameter>string</parameter>, or false if no matches
|
|
were found or an error occurred.
|
|
</simpara>
|
|
<para>
|
|
The following code snippet takes a date in ISO format
|
|
(YYYY-MM-DD) and prints it in DD.MM.YYYY format:
|
|
<example>
|
|
<title><function>ereg</function> Example</title>
|
|
<programlisting role="php">
|
|
if (ereg ("([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})", $date, $regs)) {
|
|
echo "$regs[3].$regs[2].$regs[1]";
|
|
} else {
|
|
echo "Invalid date format: $date";
|
|
}
|
|
</programlisting>
|
|
</example>
|
|
</para>
|
|
<simpara>
|
|
See also <function>eregi</function>,
|
|
<function>ereg_replace</function>, and
|
|
<function>eregi_replace</function>.
|
|
</simpara>
|
|
</refsect1>
|
|
</refentry>
|
|
|
|
<refentry id="function.ereg-replace">
|
|
<refnamediv>
|
|
<refname>ereg_replace</refname>
|
|
<refpurpose>Replace regular expression</refpurpose>
|
|
</refnamediv>
|
|
<refsect1>
|
|
<title>Description</title>
|
|
<funcsynopsis>
|
|
<funcprototype>
|
|
<funcdef>string <function>ereg_replace</function></funcdef>
|
|
<paramdef>string <parameter>pattern</parameter></paramdef>
|
|
<paramdef>string <parameter>replacement</parameter></paramdef>
|
|
<paramdef>string <parameter>string</parameter></paramdef>
|
|
</funcprototype>
|
|
</funcsynopsis>
|
|
<simpara>
|
|
This function scans <parameter>string</parameter> for matches to
|
|
<parameter>pattern</parameter>, then replaces the matched text
|
|
with <parameter>replacement</parameter>.
|
|
</simpara>
|
|
<simpara>
|
|
The modified string is returned. (Which may mean that the
|
|
original string is returned if there are no matches to be
|
|
replaced.)
|
|
</simpara>
|
|
<simpara>
|
|
If <parameter>pattern</parameter> contains parenthesized
|
|
substrings, <parameter>replacement</parameter> may contain
|
|
substrings of the form
|
|
<literal>\\<replaceable>digit</replaceable></literal>, which will
|
|
be replaced by the text matching the digit'th parenthesized
|
|
substring; <literal>\\0</literal> will produce the entire
|
|
contents of string. Up to nine substrings may be used.
|
|
Parentheses may be nested, in which case they are counted by the
|
|
opening parenthesis.
|
|
</simpara>
|
|
<simpara>
|
|
If no matches are found in <parameter>string</parameter>, then
|
|
<parameter>string</parameter> will be returned unchanged.
|
|
</simpara>
|
|
<para>
|
|
For example, the following code snippet prints "This was a test"
|
|
three times:
|
|
<example>
|
|
<title><function>ereg_replace</function> Example</title>
|
|
<programlisting>
|
|
$string = "This is a test";
|
|
echo ereg_replace (" is", " was", $string);
|
|
echo ereg_replace ("( )is", "\\1was", $string);
|
|
echo ereg_replace ("(( )is)", "\\2was", $string);
|
|
</programlisting>
|
|
</example>
|
|
</para>
|
|
<para>
|
|
One thing to take note of is that if you use an integer value as
|
|
the <parameter>replacement</parameter> parameter, you may not get
|
|
the results you expect. This is because
|
|
<function>ereg_replace</function> will interpret the number as
|
|
the ordinal value of a character, and apply that. For instance:
|
|
<example>
|
|
<title><function>ereg_replace</function> Example</title>
|
|
<programlisting>
|
|
<?php
|
|
/* This will not work as expected. */
|
|
$num = 4;
|
|
$string = "This string has four words.";
|
|
$string = ereg_replace('four', $num, $string);
|
|
echo $string; /* Output: 'This string has words.' */
|
|
|
|
/* This will work. */
|
|
$num = '4';
|
|
$string = "This string has four words.";
|
|
$string = ereg_replace('four', $num, $string);
|
|
echo $string; /* Output: 'This string has 4 words.' */
|
|
?>
|
|
</programlisting>
|
|
</example>
|
|
</para>
|
|
<simpara>
|
|
See also <function>ereg</function>, <function>eregi</function>,
|
|
and <function>eregi_replace</function>.
|
|
</simpara>
|
|
</refsect1>
|
|
</refentry>
|
|
|
|
<refentry id="function.eregi">
|
|
<refnamediv>
|
|
<refname>eregi</refname>
|
|
<refpurpose>case insensitive regular expression match</refpurpose>
|
|
</refnamediv>
|
|
<refsect1>
|
|
<title>Description</title>
|
|
<funcsynopsis>
|
|
<funcprototype>
|
|
<funcdef>int <function>eregi</function></funcdef>
|
|
<paramdef>string <parameter>pattern</parameter></paramdef>
|
|
<paramdef>string <parameter>string</parameter></paramdef>
|
|
<paramdef>array
|
|
<parameter><optional>regs</optional></parameter>
|
|
</paramdef>
|
|
</funcprototype>
|
|
</funcsynopsis>
|
|
<para>
|
|
This function is identical to <function>ereg</function> except
|
|
that this ignores case distinction when matching alphabetic
|
|
characters.
|
|
</para>
|
|
<para>
|
|
See also <function>ereg</function>,
|
|
<function>ereg_replace</function>, and
|
|
<function>eregi_replace</function>.
|
|
</para>
|
|
</refsect1>
|
|
</refentry>
|
|
|
|
<refentry id="function.eregi-replace">
|
|
<refnamediv>
|
|
<refname>eregi_replace</refname>
|
|
<refpurpose>replace regular expression case insensitive</refpurpose>
|
|
</refnamediv>
|
|
<refsect1>
|
|
<title>Description</title>
|
|
<funcsynopsis>
|
|
<funcprototype>
|
|
<funcdef>string <function>eregi_replace</function></funcdef>
|
|
<paramdef>string <parameter>pattern</parameter></paramdef>
|
|
<paramdef>string <parameter>replacement</parameter></paramdef>
|
|
<paramdef>string <parameter>string</parameter></paramdef>
|
|
</funcprototype>
|
|
</funcsynopsis>
|
|
<para>
|
|
This function is identical to <function>ereg_replace</function>
|
|
except that this ignores case distinction when matching
|
|
alphabetic characters.
|
|
</para>
|
|
<para>
|
|
See also <function>ereg</function>, <function>eregi</function>,
|
|
and <function>ereg_replace</function>.
|
|
</para>
|
|
</refsect1>
|
|
</refentry>
|
|
|
|
<refentry id="function.split">
|
|
<refnamediv>
|
|
<refname>split</refname>
|
|
<refpurpose>split string into array by regular expression</refpurpose>
|
|
</refnamediv>
|
|
<refsect1>
|
|
<title>Description</title>
|
|
<funcsynopsis>
|
|
<funcprototype>
|
|
<funcdef>array <function>split</function></funcdef>
|
|
<paramdef>string <parameter>pattern</parameter></paramdef>
|
|
<paramdef>string <parameter>string</parameter></paramdef>
|
|
<paramdef>int
|
|
<parameter><optional>limit</optional></parameter>
|
|
</paramdef>
|
|
</funcprototype>
|
|
</funcsynopsis>
|
|
<para>
|
|
Returns an array of strings, each of which is a substring of
|
|
<parameter>string</parameter> formed by splitting it on
|
|
boundaries formed by the regular expression
|
|
<parameter>pattern</parameter>. If <parameter>limit</parameter>
|
|
is set, the returned array will contain a maximum of
|
|
<parameter>limit</parameter> elements with the last element
|
|
containing the whole rest of <parameter>string</parameter>. If
|
|
an error occurs, <function>split</function> returns false.
|
|
</para>
|
|
<para>
|
|
To split off the first four fields from a line from
|
|
<filename>/etc/passwd</filename>:
|
|
<example>
|
|
<title><function>Split</function> Example</title>
|
|
<programlisting role="php">
|
|
$passwd_list = split (":", $passwd_line, 5);
|
|
</programlisting>
|
|
</example>
|
|
</para>
|
|
<para>
|
|
To parse a date which may be delimited with slashes, dots, or
|
|
hyphens:
|
|
<example>
|
|
<title><function>Split</function> Example</title>
|
|
<programlisting role="php">
|
|
$date = "04/30/1973"; // Delimiters may be slash, dot, or hyphen
|
|
list ($month, $day, $year) = split ('[/.-]', $date);
|
|
echo "Month: $month; Day: $day; Year: $year<br>\n";
|
|
</programlisting>
|
|
</example>
|
|
</para>
|
|
|
|
<para>
|
|
Note that <parameter>pattern</parameter> is case-sensitive.
|
|
</para>
|
|
|
|
<para>
|
|
Note that if you don't require the power of regular expressions,
|
|
it is faster to use <function>explode</function>, which doesn't
|
|
incur the overhead of the regular expression engine.
|
|
</para>
|
|
|
|
<para>
|
|
For users looking for a way to emulate perl's <command>$chars =
|
|
split('', $str)</command> behaviour, please see the examples for
|
|
<function>preg_split</function>.
|
|
</para>
|
|
|
|
<para>
|
|
Please note that <parameter>pattern</parameter> is a regular
|
|
expression. If you want to split on any of the characters which
|
|
are considered special by regular expressions, you'll need to
|
|
escape them first. If you think <function>split</function> (or
|
|
any other regex function, for that matter) is doing something
|
|
weird, please read the file <filename>regex.7</filename>,
|
|
included in the <filename>regex/</filename> subdirectory of the
|
|
PHP distribution. It's in manpage format, so you'll want to do
|
|
something along the lines of <command>man
|
|
/usr/local/src/regex/regex.7</command> in order to read it.
|
|
</para>
|
|
|
|
<para>
|
|
See also: <function>spliti</function>,
|
|
<function>explode</function>, and <function>implode</function>.
|
|
</para>
|
|
|
|
</refsect1>
|
|
</refentry>
|
|
|
|
<refentry id="function.spliti">
|
|
<refnamediv>
|
|
<refname>spliti</refname>
|
|
<refpurpose>
|
|
Split string into array by regular expression case insensitive
|
|
</refpurpose>
|
|
</refnamediv>
|
|
<refsect1>
|
|
<title>Description</title>
|
|
<funcsynopsis>
|
|
<funcprototype>
|
|
<funcdef>array <function>spliti</function></funcdef>
|
|
<paramdef>string <parameter>pattern</parameter></paramdef>
|
|
<paramdef>string <parameter>string</parameter></paramdef>
|
|
<paramdef>int
|
|
<parameter><optional>limit</optional></parameter>
|
|
</paramdef>
|
|
</funcprototype>
|
|
</funcsynopsis>
|
|
<para>
|
|
This function is identical to <function>split</function> except
|
|
that this ignores case distinction when matching alphabetic
|
|
characters.
|
|
</para>
|
|
<para>
|
|
See also : <function>split</function>,
|
|
<function>explode</function> and
|
|
<function>implode</function>.
|
|
</para>
|
|
</refsect1>
|
|
</refentry>
|
|
|
|
<refentry id="function.sql-regcase">
|
|
<refnamediv>
|
|
<refname>sql_regcase</refname>
|
|
<refpurpose>
|
|
Make regular expression for case insensitive match
|
|
</refpurpose>
|
|
</refnamediv>
|
|
<refsect1>
|
|
<title>Description</title>
|
|
<funcsynopsis>
|
|
<funcprototype>
|
|
<funcdef>string <function>sql_regcase</function></funcdef>
|
|
<paramdef>string <parameter>string</parameter></paramdef>
|
|
</funcprototype>
|
|
</funcsynopsis>
|
|
<para>
|
|
Returns a valid regular expression which will match
|
|
<parameter>string</parameter>, ignoring case. This expression is
|
|
<parameter>string</parameter> with each character converted to a
|
|
bracket expression; this bracket expression contains that
|
|
character's uppercase and lowercase form if applicable, otherwise
|
|
it contains the original character twice.
|
|
<example>
|
|
<title><function>Sql_regcase</function> Example</title>
|
|
<programlisting role="php">
|
|
echo sql_regcase ("Foo bar");
|
|
</programlisting>
|
|
</example>
|
|
prints <screen>[Ff][Oo][Oo] [Bb][Aa][Rr]</screen>.
|
|
</para>
|
|
<para>
|
|
This can be used to achieve case insensitive pattern matching in
|
|
products which support only case sensitive regular expressions.
|
|
</para>
|
|
</refsect1>
|
|
</refentry>
|
|
|
|
|
|
</reference>
|
|
|
|
<!-- Keep this comment at the end of the file
|
|
Local variables:
|
|
mode: sgml
|
|
sgml-omittag:t
|
|
sgml-shorttag:t
|
|
sgml-minimize-attributes:nil
|
|
sgml-always-quote-attributes:t
|
|
sgml-indent-step:1
|
|
sgml-indent-data:t
|
|
sgml-parent-document:nil
|
|
sgml-default-dtd-file:"../../manual.ced"
|
|
sgml-exposed-tags:nil
|
|
sgml-local-catalogs:nil
|
|
sgml-local-ecat-files:nil
|
|
End:
|
|
-->
|