From e43d662ec679628a17b738c10d32d53216831dfb Mon Sep 17 00:00:00 2001 From: irc-html Date: Mon, 21 Jan 2002 19:28:39 +0000 Subject: [PATCH] whitespace correction git-svn-id: https://svn.php.net/repository/phpdoc/en/trunk@68350 c90b9560-bf6c-de11-be94-00142212c4b1 --- functions/pcre.xml | 1079 ++++++++++++++++++++++---------------------- 1 file changed, 547 insertions(+), 532 deletions(-) diff --git a/functions/pcre.xml b/functions/pcre.xml index 0d5af5c379..0b57febdda 100644 --- a/functions/pcre.xml +++ b/functions/pcre.xml @@ -1,5 +1,5 @@ - + Regular Expression Functions (Perl-Compatible) PCRE @@ -36,17 +36,17 @@ - /href='(.*)' - missing ending delimiter + /href='(.*)' - missing ending delimiter - /\w+\s*\w+/J - unknown modifier 'J' + /\w+\s*\w+/J - unknown modifier 'J' - 1-\d3-\d3-\d4| - missing starting delimiter + 1-\d3-\d3-\d4| - missing starting delimiter @@ -65,7 +65,7 @@ - + preg_match @@ -145,13 +145,13 @@ echo "domain name is: ".$matches[0]."\n"; ]]> - This example will produce: - + This example will produce: + - - + + See also preg_match_all, preg_replace, and @@ -194,12 +194,12 @@ domain name is: php.net PREG_PATTERN_ORDER - - Orders results so that $matches[0] is an array of full - pattern matches, $matches[1] is an array of strings matched by - the first parenthesized subpattern, and so on. - - + + Orders results so that $matches[0] is an array of full + pattern matches, $matches[1] is an array of strings matched by + the first parenthesized subpattern, and so on. + + ]+>(.*)]+>|U", "example:
this is a test
", @@ -220,18 +220,18 @@ example: , this is a test and $out[1] contains array of strings enclosed by tags.
-
+
PREG_SET_ORDER - - Orders results so that $matches[0] is an array of first set - of matches, $matches[1] is an array of second set of matches, - and so on. - - + + Orders results so that $matches[0] is an array of first set + of matches, $matches[1] is an array of second set of matches, + and so on. + + ]+>(.*)]+>|U", "example:
this is a test
", @@ -239,25 +239,26 @@ preg_match_all ("|<;[^>]+>(.*)]+>|U", print $out[0][0].", ".$out[0][1]."\n"; print $out[1][0].", ".$out[1][1]."\n"; ]]> -
-
- This example will produce: - - + + + This example will produce: + + example: , example:
this is a test
, this is a test ]]> -
-
- In this case, $matches[0] is the first set of matches, and - $matches[0][0] has text matched by full pattern, $matches[0][1] - has text matched by first subpattern and so on. Similarly, - $matches[1] is the second set of matches, etc. -
+
+
+ In this case, $matches[0] is the first set of matches, and + $matches[0][0] has text matched by full pattern, $matches[0][1] + has text matched by first subpattern and so on. Similarly, + $matches[1] is the second set of matches, etc. +
- + + If order is not specified, it is assumed to be PREG_PATTERN_ORDER. @@ -475,7 +476,7 @@ $text = preg_replace ($search, $replace, $document); Parameter limit was added after PHP 4.0.1pl2. - + See also preg_match, preg_match_all, and @@ -550,35 +551,35 @@ $text = preg_replace ($search, $replace, $document); - If limit is specified, then only substrings up to - limit are returned, and if - limit is -1, it actually means "no limit", which is - useful for specifying the flags. + If limit is specified, then only substrings up to + limit are returned, and if + limit is -1, it actually means "no limit", which is + useful for specifying the flags. - flags can be any combination of the following flags - (combined with bitwise | operator): + flags can be any combination of the following flags + (combined with bitwise | operator): - PREG_SPLIT_NO_EMPTY - - - If this flag is set, only non-empty pieces will be returned by - preg_split. - - + PREG_SPLIT_NO_EMPTY + + + If this flag is set, only non-empty pieces will be returned by + preg_split. + + - PREG_SPLIT_DELIM_CAPTURE - - - If this flag is set, parenthesized expression in the delimiter pattern - will be captured and returned as well. This flag was added for 4.0.5. - - + PREG_SPLIT_DELIM_CAPTURE + + + If this flag is set, parenthesized expression in the delimiter pattern + will be captured and returned as well. This flag was added for 4.0.5. + + - + @@ -739,159 +740,159 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array);
- i (PCRE_CASELESS) - - - If this modifier is set, letters in the pattern match both - upper and lower case letters. - - + i (PCRE_CASELESS) + + + If this modifier is set, letters in the pattern match both + upper and lower case letters. + + - m (PCRE_MULTILINE) - - - By default, PCRE treats the subject string as consisting of a - single "line" of characters (even if it actually contains - several newlines). The "start of line" metacharacter (^) - matches only at the start of the string, while the "end of - line" metacharacter ($) matches only at the end of the - string, or before a terminating newline (unless - D modifier is set). This is the same as - Perl. - - - When this modifier is set, the "start of line" and "end of - line" constructs match immediately following or immediately - before any newline in the subject string, respectively, as - well as at the very start and end. This is equivalent to - Perl's /m modifier. If there are no "\n" characters in a - subject string, or no occurrences of ^ or $ in a pattern, - setting this modifier has no effect. - - + m (PCRE_MULTILINE) + + + By default, PCRE treats the subject string as consisting of a + single "line" of characters (even if it actually contains + several newlines). The "start of line" metacharacter (^) + matches only at the start of the string, while the "end of + line" metacharacter ($) matches only at the end of the + string, or before a terminating newline (unless + D modifier is set). This is the same as + Perl. + + + When this modifier is set, the "start of line" and "end of + line" constructs match immediately following or immediately + before any newline in the subject string, respectively, as + well as at the very start and end. This is equivalent to + Perl's /m modifier. If there are no "\n" characters in a + subject string, or no occurrences of ^ or $ in a pattern, + setting this modifier has no effect. + + - s (PCRE_DOTALL) - - - If this modifier is set, a dot metacharater in the pattern - matches all characters, including newlines. Without it, - newlines are excluded. This modifier is equivalent to Perl's - /s modifier. A negative class such as [^a] always matches a - newline character, independent of the setting of this - modifier. - - + s (PCRE_DOTALL) + + + If this modifier is set, a dot metacharacter in the pattern + matches all characters, including newlines. Without it, + newlines are excluded. This modifier is equivalent to Perl's + /s modifier. A negative class such as [^a] always matches a + newline character, independent of the setting of this + modifier. + + - x (PCRE_EXTENDED) - - - If this modifier is set, whitespace data characters in the - pattern are totally ignored except when escaped or inside a - character class, and characters between an unescaped # - outside a character class and the next newline character, - inclusive, are also ignored. This is equivalent to Perl's /x - modifier, and makes it possible to include comments inside - complicated patterns. Note, however, that this applies only - to data characters. Whitespace characters may never appear - within special character sequences in a pattern, for example - within the sequence (?( which introduces a conditional - subpattern. - - + x (PCRE_EXTENDED) + + + If this modifier is set, whitespace data characters in the + pattern are totally ignored except when escaped or inside a + character class, and characters between an unescaped # + outside a character class and the next newline character, + inclusive, are also ignored. This is equivalent to Perl's /x + modifier, and makes it possible to include comments inside + complicated patterns. Note, however, that this applies only + to data characters. Whitespace characters may never appear + within special character sequences in a pattern, for example + within the sequence (?( which introduces a conditional + subpattern. + + - e - - - If this modifier is set, preg_replace - does normal substitution of backreferences in the - replacement string, evaluates it as PHP code, and uses the - result for replacing the search string. - - - Only preg_replace uses this modifier; - it is ignored by other PCRE functions. - - + e + + + If this modifier is set, preg_replace + does normal substitution of backreferences in the + replacement string, evaluates it as PHP code, and uses the + result for replacing the search string. + + + Only preg_replace uses this modifier; + it is ignored by other PCRE functions. + + - A (PCRE_ANCHORED) - - - If this modifier is set, the pattern is forced to be - "anchored", that is, it is constrained to match only at the - start of the string which is being searched (the "subject - string"). This effect can also be achieved by appropriate - constructs in the pattern itself, which is the only way to - do it in Perl. - - + A (PCRE_ANCHORED) + + + If this modifier is set, the pattern is forced to be + "anchored", that is, it is constrained to match only at the + start of the string which is being searched (the "subject + string"). This effect can also be achieved by appropriate + constructs in the pattern itself, which is the only way to + do it in Perl. + + - D (PCRE_DOLLAR_ENDONLY) - - - If this modifier is set, a dollar metacharacter in the pattern - matches only at the end of the subject string. Without this - modifier, a dollar also matches immediately before the final - character if it is a newline (but not before any other - newlines). This modifier is ignored if m - modifier is set. There is no equivalent to this modifier in - Perl. - - + D (PCRE_DOLLAR_ENDONLY) + + + If this modifier is set, a dollar metacharacter in the pattern + matches only at the end of the subject string. Without this + modifier, a dollar also matches immediately before the final + character if it is a newline (but not before any other + newlines). This modifier is ignored if m + modifier is set. There is no equivalent to this modifier in + Perl. + + - S - - - When a pattern is going to be used several times, it is - worth spending more time analyzing it in order to speed up - the time taken for matching. If this modifier is set, then - this extra analysis is performed. At present, studying a - pattern is useful only for non-anchored patterns that do not - have a single fixed starting character. - - + S + + + When a pattern is going to be used several times, it is + worth spending more time analyzing it in order to speed up + the time taken for matching. If this modifier is set, then + this extra analysis is performed. At present, studying a + pattern is useful only for non-anchored patterns that do not + have a single fixed starting character. + + - U (PCRE_UNGREEDY) - - - This modifier inverts the "greediness" of the quantifiers so - that they are not greedy by default, but become greedy if - followed by "?". It is not compatible with Perl. It can also - be set by a (?U) modifier setting within the pattern. - - + U (PCRE_UNGREEDY) + + + This modifier inverts the "greediness" of the quantifiers so + that they are not greedy by default, but become greedy if + followed by "?". It is not compatible with Perl. It can also + be set by a (?U) modifier setting within the pattern. + + - X (PCRE_EXTRA) - - - This modifier turns on additional functionality of PCRE that - is incompatible with Perl. Any backslash in a pattern that - is followed by a letter that has no special meaning causes - an error, thus reserving these combinations for future - expansion. By default, as in Perl, a backslash followed by a - letter with no special meaning is treated as a literal. - There are at present no other features controlled by this - modifier. - - + X (PCRE_EXTRA) + + + This modifier turns on additional functionality of PCRE that + is incompatible with Perl. Any backslash in a pattern that + is followed by a letter that has no special meaning causes + an error, thus reserving these combinations for future + expansion. By default, as in Perl, a backslash followed by a + letter with no special meaning is treated as a literal. + There are at present no other features controlled by this + modifier. + + - u (PCRE_UTF8) - - - This modifier turns on additional functionality of PCRE that - is incompatible with Perl. Pattern strings are treated as - UTF-8. This modifier is available from PHP 4.1.0 or greater. - - + u (PCRE_UTF8) + + + This modifier turns on additional functionality of PCRE that + is incompatible with Perl. Pattern strings are treated as + UTF-8. This modifier is available from PHP 4.1.0 or greater. + +
@@ -922,31 +923,31 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); The differences described here are with respect to Perl 5.005. - - - By default, a whitespace character is any character that - the C library function isspace() recognizes, though it is - possible to compile PCRE with alternative character type - tables. Normally isspace() matches space, formfeed, newline, - carriage return, horizontal tab, and vertical tab. Perl 5 no - longer includes vertical tab in its set of whitespace char- - acters. The \v escape that was in the Perl documentation for - a long time was never in fact recognized. However, the char- - acter itself was treated as whitespace at least up to 5.002. - In 5.004 and 5.005 it does not match \s. - - - - + + + By default, a whitespace character is any character that + the C library function isspace() recognizes, though it is + possible to compile PCRE with alternative character type + tables. Normally isspace() matches space, formfeed, newline, + carriage return, horizontal tab, and vertical tab. Perl 5 no + longer includes vertical tab in its set of whitespace characters. + The \v escape that was in the Perl documentation for + a long time was never in fact recognized. However, the character + itself was treated as whitespace at least up to 5.002. + In 5.004 and 5.005 it does not match \s. + + + + PCRE does not allow repeat quantifiers on lookahead assertions. Perl permits them, but they do not mean what you might think. For example, (?!a){3} does not assert that the next three characters are not "a". It just asserts that the next character is not "a" three times. - - - - + + + + Capturing subpatterns that occur inside negative looka- head assertions are counted, but their entries in the offsets vector are never set. Perl sets its numerical vari- @@ -954,39 +955,39 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); assertion fails to match something (thereby succeeding), but only if the negative lookahead assertion contains just one branch. - - - - + + + + Though binary zero characters are supported in the sub- ject string, they are not allowed in a pattern string because it is passed as a normal C string, terminated by zero. The escape sequence "\0" can be used in the pattern to represent a binary zero. - - - - + + + + The following Perl escape sequences are not supported: \l, \u, \L, \U, \E, \Q. In fact these are implemented by Perl's general string-handling and are not part of its pat- tern matching engine. - - - - + + + + The Perl \G assertion is not supported as it is not relevant to single pattern matches. - - - - + + + + Fairly obviously, PCRE does not support the (?{code}) construction. - - - - + + + + There are at the time of writing some oddities in Perl 5.005_02 concerned with the settings of captured strings when part of a pattern is repeated. For example, matching @@ -997,23 +998,23 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); In Perl 5.004 $2 is set in both cases, and that is also &true; of PCRE. If in the future Perl changes to a consistent state that is different, PCRE may change to follow. - - - - + + + + Another as yet unresolved discrepancy is that in Perl 5.005_02 the pattern /^(a)?(?(1)a|b)+$/ matches the string "a", whereas in PCRE it does not. However, in both Perl and PCRE /^(a)?a/ matched against "a" leaves $1 unset. - - - - + + + + PCRE provides some extensions to the Perl regular expression facilities: - - - + + + Although lookbehind assertions must match fixed length strings, each alternative branch of a lookbehind assertion can match a different length of string. Perl 5.005 requires @@ -1042,9 +1043,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); - - - +
+ +
@@ -1070,8 +1071,8 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); itself. - - Meta-caracters + + Meta-caracters The power of regular expressions comes from the ability to include alternatives and repetitions in the pat- @@ -1086,116 +1087,116 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); Outside square brackets, the meta-characters are as follows: - \ - - - general escape character with several uses - - + \ + + + general escape character with several uses + + - ^ - - - assert start of subject (or line, in multiline mode) - - + ^ + + + assert start of subject (or line, in multiline mode) + + - $ - - - assert end of subject (or line, in multiline mode) - - + $ + + + assert end of subject (or line, in multiline mode) + + - . - - - match any character except newline (by default) - - + . + + + match any character except newline (by default) + + - [ - - - start character class definition - - + [ + + + start character class definition + + - ] - - + ] + + end character class definition - - + + - | - - + | + + start of alternative branch - - + + - ( - - + ( + + start subpattern - - + + - ) - - + ) + + end subpattern - - + + - ? - - + ? + + extends the meaning of (, also 0 or 1 quantifier, also quantifier minimizer - - + + - * - - + * + + 0 or more quantifier - - + + - + - - + + + + 1 or more quantifier - - + + - { - - + { + + start min/max quantifier - - + + - } - - + } + + end min/max quantifier - - + + @@ -1204,36 +1205,36 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); characters are: - \ - - + \ + + general escape character - - + + - ^ - - + ^ + + negate the class, but only if the first character - - + + - - - - + - + + indicates character range - - + + - ] - - + ] + + terminates the character class - - + + The following sections describe the use of each of the @@ -1277,76 +1278,76 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); - \a - - + \a + + alarm, that is, the BEL character (hex 07) - - + + - \cx - - + \cx + + "control-x", where x is any character - - + + - \e - - + \e + + escape (hex 1B) - - + + - \f - - + \f + + formfeed (hex 0C) - - + + - \n - - + \n + + newline (hex 0A) - - + + - \r - - + \r + + carriage return (hex 0D) - - + + - \t - - + \t + + tab (hex 09) - - + + - \xhh - - + \xhh + + character with hex code hh - - + + - \ddd - - + \ddd + + character with octal code ddd, or backreference - - + + @@ -1389,80 +1390,80 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); - \040 - - + \040 + + is another way of writing a space - - + + - \40 - - + \40 + + is the same, provided there are fewer than 40 previous capturing subpatterns - - + + - \7 - - + \7 + + is always a back reference - - + + - \11 - - + \11 + + might be a back reference, or another way of writing a tab - - + + - \011 - - + \011 + + is always a tab - - + + - \0113 - - + \0113 + + is a tab followed by the character "3" - - + + - \113 - - + \113 + + is the character with octal code 113 (since there can be no more than 99 back references) - - + + - \377 - - + \377 + + is a byte consisting entirely of 1 bits - - + + - \81 - - + \81 + + is either a back reference, or a binary zero followed by the two characters "8" and "1" - - + + @@ -1485,52 +1486,52 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); - \d - - + \d + + any decimal digit - - + + - \D - - + \D + + any character that is not a decimal digit - - + + - \s - - + \s + + any whitespace character - - + + - \S - - + \S + + any character that is not a whitespace character - - + + - \w - - + \w + + any "word" character - - + + - \W - - + \W + + any "non-word" character - - + + @@ -1565,49 +1566,49 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); backslashed assertions are - - - \b - - - word boundary - - - - - \B - - - not a word boundary - - - - - \A - - - start of subject (independent of multiline mode) - - - - - \Z - - - end of subject or newline at end (independent of - multiline mode) - - - - - \z - - - end of subject (independent of multiline mode) - - - - + + + \b + + + word boundary + + + + + \B + + + not a word boundary + + + + + \A + + + start of subject (independent of multiline mode) + + + + + \Z + + + end of subject or newline at end (independent of + multiline mode) + + + + + \z + + + end of subject (independent of multiline mode) + + + + These assertions may not appear in character classes (but @@ -1634,8 +1635,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); string, whereas \z matches only at the end. - - Circumflex and dollar + + + Circumflex and dollar Outside a character class, in the default matching mode, the circumflex character is an assertion which is true only if @@ -1684,8 +1686,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); whether PCRE_MULTILINE is set or not. - - FULL STOP + + + FULL STOP Outside a character class, a dot in the pattern matches any one character in the subject, including a non-printing @@ -1697,8 +1700,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); in a character class. - - Square brackets + + + Square brackets An opening square bracket introduces a character class, ter- minated by a closing square bracket. A closing square @@ -1776,8 +1780,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); classes, but it does no harm if they are escaped. - - Vertical bar + + + Vertical bar Vertical bar characters are used to separate alternative patterns. For example, the pattern @@ -1794,8 +1799,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); subpattern. - - Internal option setting + + + Internal option setting The settings of PCRE_CASELESS , PCRE_MULTILINE , @@ -1866,8 +1872,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); even when it is at top level. It is best put at the start. - - subpatterns + + + subpatterns Subpatterns are delimited by parentheses (round brackets), which can be nested. Marking part of a pattern as a subpat- @@ -1929,8 +1936,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); the above patterns match "SUNDAY" as well as "Saturday". - - Repetition + + + Repetition Repetition is specified by quantifiers, which can follow any of the following items: @@ -2069,8 +2077,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); "b". - - BACK REFERENCES + + + BACK REFERENCES Outside a character class, a backslash followed by a digit greater than 0 (and possibly further digits) is a back @@ -2136,8 +2145,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); example above, or by a quantifier with a minimum of zero. - - Assertions + + + Assertions An assertion is a test on the characters following or preceding the current matching point that does not actually @@ -2257,8 +2267,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); subpatterns. - - Once-only subpatterns + + + Once-only subpatterns With both maximizing and minimizing repetition, failure of what follows normally causes the repeated item to be re- @@ -2367,8 +2378,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); pens quickly. - - Conditional subpatterns + + + Conditional subpatterns It is possible to cause the matching process to obey a sub- pattern conditionally or to choose between two alternative @@ -2426,8 +2438,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); letters and dd are digits. - - Comments + + + Comments The sequence (?# marks the start of a comment which continues up to the next closing parenthesis. Nested @@ -2439,8 +2452,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); ues up to the next newline character in the pattern. - - Recursive patterns + + + Recursive patterns Consider the problem of matching a string in parentheses, allowing for unlimited nested parentheses. Without the use @@ -2499,8 +2513,9 @@ $fl_array = preg_grep ("/^(\d+)?\.\d+$/", $array); recursion. - - Performances + + + Performances Certain items that may appear in patterns are more efficient than others. It is more efficient to use a character class