wp/wp-includes/SimplePie/Parser.php
changeset 0 d970ebf37754
child 16 a86126ab1dd4
equal deleted inserted replaced
-1:000000000000 0:d970ebf37754
       
     1 <?php
       
     2 /**
       
     3  * SimplePie
       
     4  *
       
     5  * A PHP-Based RSS and Atom Feed Framework.
       
     6  * Takes the hard work out of managing a complete RSS/Atom solution.
       
     7  *
       
     8  * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
       
     9  * All rights reserved.
       
    10  *
       
    11  * Redistribution and use in source and binary forms, with or without modification, are
       
    12  * permitted provided that the following conditions are met:
       
    13  *
       
    14  * 	* Redistributions of source code must retain the above copyright notice, this list of
       
    15  * 	  conditions and the following disclaimer.
       
    16  *
       
    17  * 	* Redistributions in binary form must reproduce the above copyright notice, this list
       
    18  * 	  of conditions and the following disclaimer in the documentation and/or other materials
       
    19  * 	  provided with the distribution.
       
    20  *
       
    21  * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
       
    22  * 	  to endorse or promote products derived from this software without specific prior
       
    23  * 	  written permission.
       
    24  *
       
    25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
       
    26  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
       
    27  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
       
    28  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
       
    30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       
    31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
       
    32  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    33  * POSSIBILITY OF SUCH DAMAGE.
       
    34  *
       
    35  * @package SimplePie
       
    36  * @version 1.3.1
       
    37  * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
       
    38  * @author Ryan Parman
       
    39  * @author Geoffrey Sneddon
       
    40  * @author Ryan McCue
       
    41  * @link http://simplepie.org/ SimplePie
       
    42  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
       
    43  */
       
    44 
       
    45 /**
       
    46  * Parses XML into something sane
       
    47  *
       
    48  *
       
    49  * This class can be overloaded with {@see SimplePie::set_parser_class()}
       
    50  *
       
    51  * @package SimplePie
       
    52  * @subpackage Parsing
       
    53  */
       
    54 class SimplePie_Parser
       
    55 {
       
    56 	var $error_code;
       
    57 	var $error_string;
       
    58 	var $current_line;
       
    59 	var $current_column;
       
    60 	var $current_byte;
       
    61 	var $separator = ' ';
       
    62 	var $namespace = array('');
       
    63 	var $element = array('');
       
    64 	var $xml_base = array('');
       
    65 	var $xml_base_explicit = array(false);
       
    66 	var $xml_lang = array('');
       
    67 	var $data = array();
       
    68 	var $datas = array(array());
       
    69 	var $current_xhtml_construct = -1;
       
    70 	var $encoding;
       
    71 	protected $registry;
       
    72 
       
    73 	public function set_registry(SimplePie_Registry $registry)
       
    74 	{
       
    75 		$this->registry = $registry;
       
    76 	}
       
    77 
       
    78 	public function parse(&$data, $encoding)
       
    79 	{
       
    80 		// Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
       
    81 		if (strtoupper($encoding) === 'US-ASCII')
       
    82 		{
       
    83 			$this->encoding = 'UTF-8';
       
    84 		}
       
    85 		else
       
    86 		{
       
    87 			$this->encoding = $encoding;
       
    88 		}
       
    89 
       
    90 		// Strip BOM:
       
    91 		// UTF-32 Big Endian BOM
       
    92 		if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
       
    93 		{
       
    94 			$data = substr($data, 4);
       
    95 		}
       
    96 		// UTF-32 Little Endian BOM
       
    97 		elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
       
    98 		{
       
    99 			$data = substr($data, 4);
       
   100 		}
       
   101 		// UTF-16 Big Endian BOM
       
   102 		elseif (substr($data, 0, 2) === "\xFE\xFF")
       
   103 		{
       
   104 			$data = substr($data, 2);
       
   105 		}
       
   106 		// UTF-16 Little Endian BOM
       
   107 		elseif (substr($data, 0, 2) === "\xFF\xFE")
       
   108 		{
       
   109 			$data = substr($data, 2);
       
   110 		}
       
   111 		// UTF-8 BOM
       
   112 		elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
       
   113 		{
       
   114 			$data = substr($data, 3);
       
   115 		}
       
   116 
       
   117 		if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
       
   118 		{
       
   119 			$declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
       
   120 			if ($declaration->parse())
       
   121 			{
       
   122 				$data = substr($data, $pos + 2);
       
   123 				$data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
       
   124 			}
       
   125 			else
       
   126 			{
       
   127 				$this->error_string = 'SimplePie bug! Please report this!';
       
   128 				return false;
       
   129 			}
       
   130 		}
       
   131 
       
   132 		$return = true;
       
   133 
       
   134 		static $xml_is_sane = null;
       
   135 		if ($xml_is_sane === null)
       
   136 		{
       
   137 			$parser_check = xml_parser_create();
       
   138 			xml_parse_into_struct($parser_check, '<foo>&amp;</foo>', $values);
       
   139 			xml_parser_free($parser_check);
       
   140 			$xml_is_sane = isset($values[0]['value']);
       
   141 		}
       
   142 
       
   143 		// Create the parser
       
   144 		if ($xml_is_sane)
       
   145 		{
       
   146 			$xml = xml_parser_create_ns($this->encoding, $this->separator);
       
   147 			xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1);
       
   148 			xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0);
       
   149 			xml_set_object($xml, $this);
       
   150 			xml_set_character_data_handler($xml, 'cdata');
       
   151 			xml_set_element_handler($xml, 'tag_open', 'tag_close');
       
   152 
       
   153 			// Parse!
       
   154 			if (!xml_parse($xml, $data, true))
       
   155 			{
       
   156 				$this->error_code = xml_get_error_code($xml);
       
   157 				$this->error_string = xml_error_string($this->error_code);
       
   158 				$return = false;
       
   159 			}
       
   160 			$this->current_line = xml_get_current_line_number($xml);
       
   161 			$this->current_column = xml_get_current_column_number($xml);
       
   162 			$this->current_byte = xml_get_current_byte_index($xml);
       
   163 			xml_parser_free($xml);
       
   164 			return $return;
       
   165 		}
       
   166 		else
       
   167 		{
       
   168 			libxml_clear_errors();
       
   169 			$xml = new XMLReader();
       
   170 			$xml->xml($data);
       
   171 			while (@$xml->read())
       
   172 			{
       
   173 				switch ($xml->nodeType)
       
   174 				{
       
   175 
       
   176 					case constant('XMLReader::END_ELEMENT'):
       
   177 						if ($xml->namespaceURI !== '')
       
   178 						{
       
   179 							$tagName = $xml->namespaceURI . $this->separator . $xml->localName;
       
   180 						}
       
   181 						else
       
   182 						{
       
   183 							$tagName = $xml->localName;
       
   184 						}
       
   185 						$this->tag_close(null, $tagName);
       
   186 						break;
       
   187 					case constant('XMLReader::ELEMENT'):
       
   188 						$empty = $xml->isEmptyElement;
       
   189 						if ($xml->namespaceURI !== '')
       
   190 						{
       
   191 							$tagName = $xml->namespaceURI . $this->separator . $xml->localName;
       
   192 						}
       
   193 						else
       
   194 						{
       
   195 							$tagName = $xml->localName;
       
   196 						}
       
   197 						$attributes = array();
       
   198 						while ($xml->moveToNextAttribute())
       
   199 						{
       
   200 							if ($xml->namespaceURI !== '')
       
   201 							{
       
   202 								$attrName = $xml->namespaceURI . $this->separator . $xml->localName;
       
   203 							}
       
   204 							else
       
   205 							{
       
   206 								$attrName = $xml->localName;
       
   207 							}
       
   208 							$attributes[$attrName] = $xml->value;
       
   209 						}
       
   210 						$this->tag_open(null, $tagName, $attributes);
       
   211 						if ($empty)
       
   212 						{
       
   213 							$this->tag_close(null, $tagName);
       
   214 						}
       
   215 						break;
       
   216 					case constant('XMLReader::TEXT'):
       
   217 
       
   218 					case constant('XMLReader::CDATA'):
       
   219 						$this->cdata(null, $xml->value);
       
   220 						break;
       
   221 				}
       
   222 			}
       
   223 			if ($error = libxml_get_last_error())
       
   224 			{
       
   225 				$this->error_code = $error->code;
       
   226 				$this->error_string = $error->message;
       
   227 				$this->current_line = $error->line;
       
   228 				$this->current_column = $error->column;
       
   229 				return false;
       
   230 			}
       
   231 			else
       
   232 			{
       
   233 				return true;
       
   234 			}
       
   235 		}
       
   236 	}
       
   237 
       
   238 	public function get_error_code()
       
   239 	{
       
   240 		return $this->error_code;
       
   241 	}
       
   242 
       
   243 	public function get_error_string()
       
   244 	{
       
   245 		return $this->error_string;
       
   246 	}
       
   247 
       
   248 	public function get_current_line()
       
   249 	{
       
   250 		return $this->current_line;
       
   251 	}
       
   252 
       
   253 	public function get_current_column()
       
   254 	{
       
   255 		return $this->current_column;
       
   256 	}
       
   257 
       
   258 	public function get_current_byte()
       
   259 	{
       
   260 		return $this->current_byte;
       
   261 	}
       
   262 
       
   263 	public function get_data()
       
   264 	{
       
   265 		return $this->data;
       
   266 	}
       
   267 
       
   268 	public function tag_open($parser, $tag, $attributes)
       
   269 	{
       
   270 		list($this->namespace[], $this->element[]) = $this->split_ns($tag);
       
   271 
       
   272 		$attribs = array();
       
   273 		foreach ($attributes as $name => $value)
       
   274 		{
       
   275 			list($attrib_namespace, $attribute) = $this->split_ns($name);
       
   276 			$attribs[$attrib_namespace][$attribute] = $value;
       
   277 		}
       
   278 
       
   279 		if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base']))
       
   280 		{
       
   281 			$base = $this->registry->call('Misc', 'absolutize_url', array($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base)));
       
   282 			if ($base !== false)
       
   283 			{
       
   284 				$this->xml_base[] = $base;
       
   285 				$this->xml_base_explicit[] = true;
       
   286 			}
       
   287 		}
       
   288 		else
       
   289 		{
       
   290 			$this->xml_base[] = end($this->xml_base);
       
   291 			$this->xml_base_explicit[] = end($this->xml_base_explicit);
       
   292 		}
       
   293 
       
   294 		if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['lang']))
       
   295 		{
       
   296 			$this->xml_lang[] = $attribs[SIMPLEPIE_NAMESPACE_XML]['lang'];
       
   297 		}
       
   298 		else
       
   299 		{
       
   300 			$this->xml_lang[] = end($this->xml_lang);
       
   301 		}
       
   302 
       
   303 		if ($this->current_xhtml_construct >= 0)
       
   304 		{
       
   305 			$this->current_xhtml_construct++;
       
   306 			if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML)
       
   307 			{
       
   308 				$this->data['data'] .= '<' . end($this->element);
       
   309 				if (isset($attribs['']))
       
   310 				{
       
   311 					foreach ($attribs[''] as $name => $value)
       
   312 					{
       
   313 						$this->data['data'] .= ' ' . $name . '="' . htmlspecialchars($value, ENT_COMPAT, $this->encoding) . '"';
       
   314 					}
       
   315 				}
       
   316 				$this->data['data'] .= '>';
       
   317 			}
       
   318 		}
       
   319 		else
       
   320 		{
       
   321 			$this->datas[] =& $this->data;
       
   322 			$this->data =& $this->data['child'][end($this->namespace)][end($this->element)][];
       
   323 			$this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang));
       
   324 			if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml')
       
   325 			|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml')
       
   326 			|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_20 && in_array(end($this->element), array('title')))
       
   327 			|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_090 && in_array(end($this->element), array('title')))
       
   328 			|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_10 && in_array(end($this->element), array('title'))))
       
   329 			{
       
   330 				$this->current_xhtml_construct = 0;
       
   331 			}
       
   332 		}
       
   333 	}
       
   334 
       
   335 	public function cdata($parser, $cdata)
       
   336 	{
       
   337 		if ($this->current_xhtml_construct >= 0)
       
   338 		{
       
   339 			$this->data['data'] .= htmlspecialchars($cdata, ENT_QUOTES, $this->encoding);
       
   340 		}
       
   341 		else
       
   342 		{
       
   343 			$this->data['data'] .= $cdata;
       
   344 		}
       
   345 	}
       
   346 
       
   347 	public function tag_close($parser, $tag)
       
   348 	{
       
   349 		if ($this->current_xhtml_construct >= 0)
       
   350 		{
       
   351 			$this->current_xhtml_construct--;
       
   352 			if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML && !in_array(end($this->element), array('area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param')))
       
   353 			{
       
   354 				$this->data['data'] .= '</' . end($this->element) . '>';
       
   355 			}
       
   356 		}
       
   357 		if ($this->current_xhtml_construct === -1)
       
   358 		{
       
   359 			$this->data =& $this->datas[count($this->datas) - 1];
       
   360 			array_pop($this->datas);
       
   361 		}
       
   362 
       
   363 		array_pop($this->element);
       
   364 		array_pop($this->namespace);
       
   365 		array_pop($this->xml_base);
       
   366 		array_pop($this->xml_base_explicit);
       
   367 		array_pop($this->xml_lang);
       
   368 	}
       
   369 
       
   370 	public function split_ns($string)
       
   371 	{
       
   372 		static $cache = array();
       
   373 		if (!isset($cache[$string]))
       
   374 		{
       
   375 			if ($pos = strpos($string, $this->separator))
       
   376 			{
       
   377 				static $separator_length;
       
   378 				if (!$separator_length)
       
   379 				{
       
   380 					$separator_length = strlen($this->separator);
       
   381 				}
       
   382 				$namespace = substr($string, 0, $pos);
       
   383 				$local_name = substr($string, $pos + $separator_length);
       
   384 				if (strtolower($namespace) === SIMPLEPIE_NAMESPACE_ITUNES)
       
   385 				{
       
   386 					$namespace = SIMPLEPIE_NAMESPACE_ITUNES;
       
   387 				}
       
   388 
       
   389 				// Normalize the Media RSS namespaces
       
   390 				if ($namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG ||
       
   391 					$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG2 ||
       
   392 					$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG3 ||
       
   393 					$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG4 ||
       
   394 					$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG5 )
       
   395 				{
       
   396 					$namespace = SIMPLEPIE_NAMESPACE_MEDIARSS;
       
   397 				}
       
   398 				$cache[$string] = array($namespace, $local_name);
       
   399 			}
       
   400 			else
       
   401 			{
       
   402 				$cache[$string] = array('', $string);
       
   403 			}
       
   404 		}
       
   405 		return $cache[$string];
       
   406 	}
       
   407 }