mirror of
https://github.com/sigmasternchen/php-doc-en
synced 2025-03-18 18:08:54 +00:00

git-svn-id: https://svn.php.net/repository/phpdoc/en/trunk@132133 c90b9560-bf6c-de11-be94-00142212c4b1
608 lines
19 KiB
XML
608 lines
19 KiB
XML
<?xml version="1.0" encoding="iso-8859-1"?>
|
|
<!-- $Revision: 1.10 $ -->
|
|
<reference id="ref.xml">
|
|
<title>XML parser functions</title>
|
|
<titleabbrev>XML</titleabbrev>
|
|
|
|
<partintro>
|
|
|
|
<section id="xml.intro">
|
|
&reftitle.intro;
|
|
<para>
|
|
XML (eXtensible Markup Language) is a data format for structured
|
|
document interchange on the Web. It is a standard defined by
|
|
The World Wide Web consortium (W3C). Information about XML and
|
|
related technologies can be found at <ulink
|
|
url="&url.xml;">&url.xml;</ulink>.
|
|
</para>
|
|
<para>
|
|
This PHP extension implements support for James Clark's
|
|
<productname>expat</productname> in PHP. This toolkit lets you
|
|
parse, but not validate, XML documents. It supports three
|
|
source <link linkend="xml.encoding">character encodings</link>
|
|
also provided by PHP: <literal>US-ASCII</literal>,
|
|
<literal>ISO-8859-1</literal> and <literal>UTF-8</literal>.
|
|
<literal>UTF-16</literal> is not supported.
|
|
</para>
|
|
<para>
|
|
This extension lets you <link
|
|
linkend="function.xml-parser-create">create XML parsers</link>
|
|
and then define <emphasis>handlers</emphasis> for different XML
|
|
events. Each XML parser also has a few <link
|
|
linkend="function.xml-parser-set-option">parameters</link> you
|
|
can adjust.
|
|
</para>
|
|
</section>
|
|
|
|
<section id="xml.requirements">
|
|
&reftitle.required;
|
|
<para>
|
|
This extension uses <productname>expat</productname>, which can
|
|
be found at <ulink url="&url.expat;">&url.expat;</ulink>. The
|
|
Makefile that comes with expat does not build a library by
|
|
default, you can use this make rule for that:
|
|
<programlisting role="makefile">
|
|
<![CDATA[
|
|
libexpat.a: $(OBJS)
|
|
ar -rc $@ $(OBJS)
|
|
ranlib $@
|
|
]]>
|
|
</programlisting>
|
|
A source RPM package of expat can be found at <ulink
|
|
url="&url.expat.rpm;">&url.expat.rpm;</ulink>.
|
|
</para>
|
|
</section>
|
|
|
|
&reference.xml.configure;
|
|
|
|
<section id="xml.configuration">
|
|
&reftitle.runtime;
|
|
&no.config;
|
|
</section>
|
|
|
|
<section id="xml.resources">
|
|
&reftitle.resources;
|
|
<section id="xml.resources.xml">
|
|
<title><literal>xml</literal></title>
|
|
<para>
|
|
The <literal>xml</literal> resource as returned by
|
|
<function>xml_parser_create</function> and
|
|
<function>xml_parser_create_ns</function> references an xml
|
|
parser instance to be used with the functions provided by this
|
|
extension.
|
|
</para>
|
|
</section>
|
|
</section>
|
|
|
|
&reference.xml.constants;
|
|
|
|
<section id="xml.eventhandlers">
|
|
<title>Event Handlers</title>
|
|
<para>
|
|
The XML event handlers defined are:
|
|
<table>
|
|
<title>Supported XML handlers</title>
|
|
<tgroup cols="2">
|
|
<thead>
|
|
<row>
|
|
<entry>PHP function to set handler</entry>
|
|
<entry>Event description</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><function>xml_set_element_handler</function></entry>
|
|
<entry>
|
|
Element events are issued whenever the XML parser
|
|
encounters start or end tags. There are separate handlers
|
|
for start tags and end tags.
|
|
</entry>
|
|
</row>
|
|
<row>
|
|
<entry>
|
|
<function>xml_set_character_data_handler</function>
|
|
</entry>
|
|
<entry>
|
|
Character data is roughly all the non-markup contents of
|
|
XML documents, including whitespace between tags. Note
|
|
that the XML parser does not add or remove any whitespace,
|
|
it is up to the application (you) to decide whether
|
|
whitespace is significant.
|
|
</entry>
|
|
</row>
|
|
<row>
|
|
<entry>
|
|
<function>xml_set_processing_instruction_handler</function>
|
|
</entry>
|
|
<entry>
|
|
PHP programmers should be familiar with processing
|
|
instructions (PIs) already. <?php ?> is a processing
|
|
instruction, where <replaceable>php</replaceable> is called
|
|
the "PI target". The handling of these are
|
|
application-specific, except that all PI targets starting
|
|
with "XML" are reserved.
|
|
</entry>
|
|
</row>
|
|
<row>
|
|
<entry><function>xml_set_default_handler</function></entry>
|
|
<entry>
|
|
What goes not to another handler goes to the default
|
|
handler. You will get things like the XML and document
|
|
type declarations in the default handler.
|
|
</entry>
|
|
</row>
|
|
<row>
|
|
<entry>
|
|
<function>xml_set_unparsed_entity_decl_handler</function>
|
|
</entry>
|
|
<entry>
|
|
This handler will be called for declaration of an unparsed
|
|
(NDATA) entity.
|
|
</entry>
|
|
</row>
|
|
<row>
|
|
<entry>
|
|
<function>xml_set_notation_decl_handler</function>
|
|
</entry>
|
|
<entry>
|
|
This handler is called for declaration of a notation.
|
|
</entry>
|
|
</row>
|
|
<row>
|
|
<entry>
|
|
<function>xml_set_external_entity_ref_handler</function>
|
|
</entry>
|
|
<entry>
|
|
This handler is called when the XML parser finds a
|
|
reference to an external parsed general entity. This can
|
|
be a reference to a file or URL, for example. See <link
|
|
linkend="example.xml-external-entity">the external entity
|
|
example</link> for a demonstration.
|
|
</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
</para>
|
|
</section>
|
|
|
|
<section id="xml.case-folding">
|
|
<title>Case Folding</title>
|
|
<para>
|
|
The element handler functions may get their element names
|
|
<glossterm>case-folded</glossterm>. Case-folding is defined by
|
|
the XML standard as "a process applied to a sequence of
|
|
characters, in which those identified as non-uppercase are
|
|
replaced by their uppercase equivalents". In other words, when
|
|
it comes to XML, case-folding simply means uppercasing.
|
|
</para>
|
|
<para>
|
|
By default, all the element names that are passed to the handler
|
|
functions are case-folded. This behaviour can be queried and
|
|
controlled per XML parser with the
|
|
<function>xml_parser_get_option</function> and
|
|
<function>xml_parser_set_option</function> functions,
|
|
respectively.
|
|
</para>
|
|
</section>
|
|
|
|
<section id="xml.error-codes">
|
|
<title>Error Codes</title>
|
|
<para>
|
|
The following constants are defined for XML error codes (as
|
|
returned by <function>xml_parse</function>):
|
|
<simplelist>
|
|
<member>XML_ERROR_NONE</member>
|
|
<member>XML_ERROR_NO_MEMORY</member>
|
|
<member>XML_ERROR_SYNTAX</member>
|
|
<member>XML_ERROR_NO_ELEMENTS</member>
|
|
<member>XML_ERROR_INVALID_TOKEN</member>
|
|
<member>XML_ERROR_UNCLOSED_TOKEN</member>
|
|
<member>XML_ERROR_PARTIAL_CHAR</member>
|
|
<member>XML_ERROR_TAG_MISMATCH</member>
|
|
<member>XML_ERROR_DUPLICATE_ATTRIBUTE</member>
|
|
<member>XML_ERROR_JUNK_AFTER_DOC_ELEMENT</member>
|
|
<member>XML_ERROR_PARAM_ENTITY_REF</member>
|
|
<member>XML_ERROR_UNDEFINED_ENTITY</member>
|
|
<member>XML_ERROR_RECURSIVE_ENTITY_REF</member>
|
|
<member>XML_ERROR_ASYNC_ENTITY</member>
|
|
<member>XML_ERROR_BAD_CHAR_REF</member>
|
|
<member>XML_ERROR_BINARY_ENTITY_REF</member>
|
|
<member>XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF</member>
|
|
<member>XML_ERROR_MISPLACED_XML_PI</member>
|
|
<member>XML_ERROR_UNKNOWN_ENCODING</member>
|
|
<member>XML_ERROR_INCORRECT_ENCODING</member>
|
|
<member>XML_ERROR_UNCLOSED_CDATA_SECTION</member>
|
|
<member>XML_ERROR_EXTERNAL_ENTITY_HANDLING</member>
|
|
</simplelist>
|
|
</para>
|
|
</section>
|
|
|
|
<section id="xml.encoding">
|
|
<title>Character Encoding</title>
|
|
<para>
|
|
PHP's XML extension supports the <ulink
|
|
url="&url.unicode;">Unicode</ulink> character set through
|
|
different <glossterm>character encoding</glossterm>s. There are
|
|
two types of character encodings, <glossterm>source
|
|
encoding</glossterm> and <glossterm>target encoding</glossterm>.
|
|
PHP's internal representation of the document is always encoded
|
|
with <literal>UTF-8</literal>.
|
|
</para>
|
|
<para>
|
|
Source encoding is done when an XML document is <link
|
|
linkend="function.xml-parse">parsed</link>. Upon <link
|
|
linkend="function.xml-parser-create">creating an XML
|
|
parser</link>, a source encoding can be specified (this encoding
|
|
can not be changed later in the XML parser's lifetime). The
|
|
supported source encodings are <literal>ISO-8859-1</literal>,
|
|
<literal>US-ASCII</literal> and <literal>UTF-8</literal>. The
|
|
former two are single-byte encodings, which means that each
|
|
character is represented by a single byte.
|
|
<literal>UTF-8</literal> can encode characters composed by a
|
|
variable number of bits (up to 21) in one to four bytes. The
|
|
default source encoding used by PHP is
|
|
<literal>ISO-8859-1</literal>.
|
|
</para>
|
|
<para>
|
|
Target encoding is done when PHP passes data to XML handler
|
|
functions. When an XML parser is created, the target encoding
|
|
is set to the same as the source encoding, but this may be
|
|
changed at any point. The target encoding will affect character
|
|
data as well as tag names and processing instruction targets.
|
|
</para>
|
|
<para>
|
|
If the XML parser encounters characters outside the range that
|
|
its source encoding is capable of representing, it will return
|
|
an error.
|
|
</para>
|
|
<para>
|
|
If PHP encounters characters in the parsed XML document that can
|
|
not be represented in the chosen target encoding, the problem
|
|
characters will be "demoted". Currently, this means that such
|
|
characters are replaced by a question mark.
|
|
</para>
|
|
</section>
|
|
|
|
<section id="xml.examples">
|
|
&reftitle.examples;
|
|
<para>
|
|
Here are some example PHP scripts parsing XML documents.
|
|
</para>
|
|
<section id="example.xml-structure">
|
|
<title>XML Element Structure Example</title>
|
|
<para>
|
|
This first example displays the stucture of the start elements in
|
|
a document with indentation.
|
|
<example>
|
|
<title>Show XML Element Structure</title>
|
|
<programlisting role="php">
|
|
<![CDATA[
|
|
<?php
|
|
$file = "data.xml";
|
|
$depth = array();
|
|
|
|
function startElement($parser, $name, $attrs) {
|
|
global $depth;
|
|
for ($i = 0; $i < $depth[$parser]; $i++) {
|
|
print " ";
|
|
}
|
|
print "$name\n";
|
|
$depth[$parser]++;
|
|
}
|
|
|
|
function endElement($parser, $name) {
|
|
global $depth;
|
|
$depth[$parser]--;
|
|
}
|
|
|
|
$xml_parser = xml_parser_create();
|
|
xml_set_element_handler($xml_parser, "startElement", "endElement");
|
|
if (!($fp = fopen($file, "r"))) {
|
|
die("could not open XML input");
|
|
}
|
|
|
|
while ($data = fread($fp, 4096)) {
|
|
if (!xml_parse($xml_parser, $data, feof($fp))) {
|
|
die(sprintf("XML error: %s at line %d",
|
|
xml_error_string(xml_get_error_code($xml_parser)),
|
|
xml_get_current_line_number($xml_parser)));
|
|
}
|
|
}
|
|
xml_parser_free($xml_parser);
|
|
?>
|
|
]]>
|
|
</programlisting>
|
|
</example>
|
|
</para>
|
|
</section>
|
|
|
|
<section id="example.xml-map-tags">
|
|
<title>XML Tag Mapping Example</title>
|
|
<para>
|
|
<example>
|
|
<title>Map XML to HTML</title>
|
|
<para>
|
|
This example maps tags in an XML document directly to HTML
|
|
tags. Elements not found in the "map array" are ignored. Of
|
|
course, this example will only work with a specific XML
|
|
document type.
|
|
</para>
|
|
<para>
|
|
<programlisting role="php">
|
|
<![CDATA[
|
|
<?php
|
|
$file = "data.xml";
|
|
$map_array = array(
|
|
"BOLD" => "B",
|
|
"EMPHASIS" => "I",
|
|
"LITERAL" => "TT"
|
|
);
|
|
|
|
function startElement($parser, $name, $attrs) {
|
|
global $map_array;
|
|
if ($htmltag = $map_array[$name]) {
|
|
print "<$htmltag>";
|
|
}
|
|
}
|
|
|
|
function endElement($parser, $name) {
|
|
global $map_array;
|
|
if ($htmltag = $map_array[$name]) {
|
|
print "</$htmltag>";
|
|
}
|
|
}
|
|
|
|
function characterData($parser, $data) {
|
|
print $data;
|
|
}
|
|
|
|
$xml_parser = xml_parser_create();
|
|
// use case-folding so we are sure to find the tag in $map_array
|
|
xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, true);
|
|
xml_set_element_handler($xml_parser, "startElement", "endElement");
|
|
xml_set_character_data_handler($xml_parser, "characterData");
|
|
if (!($fp = fopen($file, "r"))) {
|
|
die("could not open XML input");
|
|
}
|
|
|
|
while ($data = fread($fp, 4096)) {
|
|
if (!xml_parse($xml_parser, $data, feof($fp))) {
|
|
die(sprintf("XML error: %s at line %d",
|
|
xml_error_string(xml_get_error_code($xml_parser)),
|
|
xml_get_current_line_number($xml_parser)));
|
|
}
|
|
}
|
|
xml_parser_free($xml_parser);
|
|
?>
|
|
]]>
|
|
</programlisting>
|
|
</para>
|
|
</example>
|
|
</para>
|
|
</section>
|
|
|
|
<section id="example.xml-external-entity">
|
|
<title>XML External Entity Example</title>
|
|
<para>
|
|
This example highlights XML code. It illustrates how to use an
|
|
external entity reference handler to include and parse other
|
|
documents, as well as how PIs can be processed, and a way of
|
|
determining "trust" for PIs containing code.
|
|
</para>
|
|
<para>
|
|
XML documents that can be used for this example are found below
|
|
the example (<filename>xmltest.xml</filename> and
|
|
<filename>xmltest2.xml</filename>.)
|
|
</para>
|
|
<para>
|
|
<example>
|
|
<title>External Entity Example</title>
|
|
<programlisting role="php">
|
|
<![CDATA[
|
|
<?php
|
|
$file = "xmltest.xml";
|
|
|
|
function trustedFile($file) {
|
|
// only trust local files owned by ourselves
|
|
if (!eregi("^([a-z]+)://", $file)
|
|
&& fileowner($file) == getmyuid()) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
function startElement($parser, $name, $attribs) {
|
|
print "<<font color=\"#0000cc\">$name</font>";
|
|
if (sizeof($attribs)) {
|
|
while (list($k, $v) = each($attribs)) {
|
|
print " <font color=\"#009900\">$k</font>=\"<font
|
|
color=\"#990000\">$v</font>\"";
|
|
}
|
|
}
|
|
print ">";
|
|
}
|
|
|
|
function endElement($parser, $name) {
|
|
print "</<font color=\"#0000cc\">$name</font>>";
|
|
}
|
|
|
|
function characterData($parser, $data) {
|
|
print "<b>$data</b>";
|
|
}
|
|
|
|
function PIHandler($parser, $target, $data) {
|
|
switch (strtolower($target)) {
|
|
case "php":
|
|
global $parser_file;
|
|
// If the parsed document is "trusted", we say it is safe
|
|
// to execute PHP code inside it. If not, display the code
|
|
// instead.
|
|
if (trustedFile($parser_file[$parser])) {
|
|
eval($data);
|
|
} else {
|
|
printf("Untrusted PHP code: <i>%s</i>",
|
|
htmlspecialchars($data));
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
function defaultHandler($parser, $data) {
|
|
if (substr($data, 0, 1) == "&" && substr($data, -1, 1) == ";") {
|
|
printf('<font color="#aa00aa">%s</font>',
|
|
htmlspecialchars($data));
|
|
} else {
|
|
printf('<font size="-1">%s</font>',
|
|
htmlspecialchars($data));
|
|
}
|
|
}
|
|
|
|
function externalEntityRefHandler($parser, $openEntityNames, $base, $systemId,
|
|
$publicId) {
|
|
if ($systemId) {
|
|
if (!list($parser, $fp) = new_xml_parser($systemId)) {
|
|
printf("Could not open entity %s at %s\n", $openEntityNames,
|
|
$systemId);
|
|
return false;
|
|
}
|
|
while ($data = fread($fp, 4096)) {
|
|
if (!xml_parse($parser, $data, feof($fp))) {
|
|
printf("XML error: %s at line %d while parsing entity %s\n",
|
|
xml_error_string(xml_get_error_code($parser)),
|
|
xml_get_current_line_number($parser), $openEntityNames);
|
|
xml_parser_free($parser);
|
|
return false;
|
|
}
|
|
}
|
|
xml_parser_free($parser);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
function new_xml_parser($file) {
|
|
global $parser_file;
|
|
|
|
$xml_parser = xml_parser_create();
|
|
xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, 1);
|
|
xml_set_element_handler($xml_parser, "startElement", "endElement");
|
|
xml_set_character_data_handler($xml_parser, "characterData");
|
|
xml_set_processing_instruction_handler($xml_parser, "PIHandler");
|
|
xml_set_default_handler($xml_parser, "defaultHandler");
|
|
xml_set_external_entity_ref_handler($xml_parser, "externalEntityRefHandler");
|
|
|
|
if (!($fp = @fopen($file, "r"))) {
|
|
return false;
|
|
}
|
|
if (!is_array($parser_file)) {
|
|
settype($parser_file, "array");
|
|
}
|
|
$parser_file[$xml_parser] = $file;
|
|
return array($xml_parser, $fp);
|
|
}
|
|
|
|
if (!(list($xml_parser, $fp) = new_xml_parser($file))) {
|
|
die("could not open XML input");
|
|
}
|
|
|
|
print "<pre>";
|
|
while ($data = fread($fp, 4096)) {
|
|
if (!xml_parse($xml_parser, $data, feof($fp))) {
|
|
die(sprintf("XML error: %s at line %d\n",
|
|
xml_error_string(xml_get_error_code($xml_parser)),
|
|
xml_get_current_line_number($xml_parser)));
|
|
}
|
|
}
|
|
print "</pre>";
|
|
print "parse complete\n";
|
|
xml_parser_free($xml_parser);
|
|
|
|
?>
|
|
]]>
|
|
</programlisting>
|
|
</example>
|
|
</para>
|
|
<para>
|
|
<example>
|
|
<title>xmltest.xml</title>
|
|
<programlisting role="xml">
|
|
<![CDATA[
|
|
<?xml version='1.0'?>
|
|
<!DOCTYPE chapter SYSTEM "/just/a/test.dtd" [
|
|
<!ENTITY plainEntity "FOO entity">
|
|
<!ENTITY systemEntity SYSTEM "xmltest2.xml">
|
|
]>
|
|
<chapter>
|
|
<TITLE>Title &plainEntity;</TITLE>
|
|
<para>
|
|
<informaltable>
|
|
<tgroup cols="3">
|
|
<tbody>
|
|
<row><entry>a1</entry><entry morerows="1">b1</entry><entry>c1</entry></row>
|
|
<row><entry>a2</entry><entry>c2</entry></row>
|
|
<row><entry>a3</entry><entry>b3</entry><entry>c3</entry></row>
|
|
</tbody>
|
|
</tgroup>
|
|
</informaltable>
|
|
</para>
|
|
&systemEntity;
|
|
<section id="about">
|
|
<title>About this Document</title>
|
|
<para>
|
|
<!-- this is a comment -->
|
|
<?php print 'Hi! This is PHP version '.phpversion(); ?>
|
|
</para>
|
|
</section>
|
|
</chapter>
|
|
]]>
|
|
</programlisting>
|
|
</example>
|
|
</para>
|
|
<para>
|
|
This file is included from <filename>xmltest.xml</filename>:
|
|
<example>
|
|
<title>xmltest2.xml</title>
|
|
<programlisting role="xml">
|
|
<![CDATA[
|
|
<?xml version="1.0"?>
|
|
<!DOCTYPE foo [
|
|
<!ENTITY testEnt "test entity">
|
|
]>
|
|
<foo>
|
|
<element attrib="value"/>
|
|
&testEnt;
|
|
<?php print "This is some more PHP code being executed."; ?>
|
|
</foo>
|
|
]]>
|
|
</programlisting>
|
|
</example>
|
|
</para>
|
|
</section>
|
|
</section>
|
|
</partintro>
|
|
|
|
&reference.xml.functions;
|
|
|
|
</reference>
|
|
<!-- Keep this comment at the end of the file
|
|
Local variables:
|
|
mode: sgml
|
|
sgml-omittag:t
|
|
sgml-shorttag:t
|
|
sgml-minimize-attributes:nil
|
|
sgml-always-quote-attributes:t
|
|
sgml-indent-step:1
|
|
sgml-indent-data:t
|
|
indent-tabs-mode:nil
|
|
sgml-parent-document:nil
|
|
sgml-default-dtd-file:"../../../manual.ced"
|
|
sgml-exposed-tags:nil
|
|
sgml-local-catalogs:nil
|
|
sgml-local-ecat-files:nil
|
|
End:
|
|
vim600: syn=xml fen fdm=syntax fdl=2 si
|
|
vim: et tw=78 syn=sgml
|
|
vi: ts=1 sw=1
|
|
-->
|
|
|