wp/wp-includes/SimplePie/Sanitize.php
changeset 16 a86126ab1dd4
parent 0 d970ebf37754
child 19 3d72ae0968f4
equal deleted inserted replaced
15:3d4e9c994f10 16:a86126ab1dd4
     3  * SimplePie
     3  * SimplePie
     4  *
     4  *
     5  * A PHP-Based RSS and Atom Feed Framework.
     5  * A PHP-Based RSS and Atom Feed Framework.
     6  * Takes the hard work out of managing a complete RSS/Atom solution.
     6  * Takes the hard work out of managing a complete RSS/Atom solution.
     7  *
     7  *
     8  * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
     8  * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
     9  * All rights reserved.
     9  * All rights reserved.
    10  *
    10  *
    11  * Redistribution and use in source and binary forms, with or without modification, are
    11  * Redistribution and use in source and binary forms, with or without modification, are
    12  * permitted provided that the following conditions are met:
    12  * permitted provided that the following conditions are met:
    13  *
    13  *
    31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
    31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
    32  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    32  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    33  * POSSIBILITY OF SUCH DAMAGE.
    33  * POSSIBILITY OF SUCH DAMAGE.
    34  *
    34  *
    35  * @package SimplePie
    35  * @package SimplePie
    36  * @version 1.3.1
    36  * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
    37  * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
       
    38  * @author Ryan Parman
    37  * @author Ryan Parman
    39  * @author Geoffrey Sneddon
    38  * @author Sam Sneddon
    40  * @author Ryan McCue
    39  * @author Ryan McCue
    41  * @link http://simplepie.org/ SimplePie
    40  * @link http://simplepie.org/ SimplePie
    42  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
    41  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
    43  */
    42  */
    44 
    43 
    59 	// Options
    58 	// Options
    60 	var $remove_div = true;
    59 	var $remove_div = true;
    61 	var $image_handler = '';
    60 	var $image_handler = '';
    62 	var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style');
    61 	var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style');
    63 	var $encode_instead_of_strip = false;
    62 	var $encode_instead_of_strip = false;
    64 	var $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
    63 	var $strip_attributes = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
       
    64 	var $add_attributes = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none'));
    65 	var $strip_comments = false;
    65 	var $strip_comments = false;
    66 	var $output_encoding = 'UTF-8';
    66 	var $output_encoding = 'UTF-8';
    67 	var $enable_cache = true;
    67 	var $enable_cache = true;
    68 	var $cache_location = './cache';
    68 	var $cache_location = './cache';
    69 	var $cache_name_function = 'md5';
    69 	var $cache_name_function = 'md5';
   158 	public function encode_instead_of_strip($encode = false)
   158 	public function encode_instead_of_strip($encode = false)
   159 	{
   159 	{
   160 		$this->encode_instead_of_strip = (bool) $encode;
   160 		$this->encode_instead_of_strip = (bool) $encode;
   161 	}
   161 	}
   162 
   162 
   163 	public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'))
   163 	public function strip_attributes($attribs = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'))
   164 	{
   164 	{
   165 		if ($attribs)
   165 		if ($attribs)
   166 		{
   166 		{
   167 			if (is_array($attribs))
   167 			if (is_array($attribs))
   168 			{
   168 			{
   174 			}
   174 			}
   175 		}
   175 		}
   176 		else
   176 		else
   177 		{
   177 		{
   178 			$this->strip_attributes = false;
   178 			$this->strip_attributes = false;
       
   179 		}
       
   180 	}
       
   181 
       
   182 	public function add_attributes($attribs = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none')))
       
   183 	{
       
   184 		if ($attribs)
       
   185 		{
       
   186 			if (is_array($attribs))
       
   187 			{
       
   188 				$this->add_attributes = $attribs;
       
   189 			}
       
   190 			else
       
   191 			{
       
   192 				$this->add_attributes = explode(',', $attribs);
       
   193 			}
       
   194 		}
       
   195 		else
       
   196 		{
       
   197 			$this->add_attributes = false;
   179 		}
   198 		}
   180 	}
   199 	}
   181 
   200 
   182 	public function strip_comments($strip = false)
   201 	public function strip_comments($strip = false)
   183 	{
   202 	{
   247 			if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
   266 			if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
   248 			{
   267 			{
   249 
   268 
   250 				if (!class_exists('DOMDocument'))
   269 				if (!class_exists('DOMDocument'))
   251 				{
   270 				{
   252 					$this->registry->call('Misc', 'error', array('DOMDocument not found, unable to use sanitizer', E_USER_WARNING, __FILE__, __LINE__));
   271 					throw new SimplePie_Exception('DOMDocument not found, unable to use sanitizer');
   253 					return '';
       
   254 				}
   272 				}
   255 				$document = new DOMDocument();
   273 				$document = new DOMDocument();
   256 				$document->encoding = 'UTF-8';
   274 				$document->encoding = 'UTF-8';
       
   275 
   257 				$data = $this->preprocess($data, $type);
   276 				$data = $this->preprocess($data, $type);
   258 
   277 
   259 				set_error_handler(array('SimplePie_Misc', 'silence_errors'));
   278 				set_error_handler(array('SimplePie_Misc', 'silence_errors'));
   260 				$document->loadHTML($data);
   279 				$document->loadHTML($data);
   261 				restore_error_handler();
   280 				restore_error_handler();
   262 
   281 
       
   282 				$xpath = new DOMXPath($document);
       
   283 
   263 				// Strip comments
   284 				// Strip comments
   264 				if ($this->strip_comments)
   285 				if ($this->strip_comments)
   265 				{
   286 				{
   266 					$xpath = new DOMXPath($document);
       
   267 					$comments = $xpath->query('//comment()');
   287 					$comments = $xpath->query('//comment()');
   268 
   288 
   269 					foreach ($comments as $comment)
   289 					foreach ($comments as $comment)
   270 					{
   290 					{
   271 						$comment->parentNode->removeChild($comment);
   291 						$comment->parentNode->removeChild($comment);
   277 				// http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
   297 				// http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
   278 				if ($this->strip_htmltags)
   298 				if ($this->strip_htmltags)
   279 				{
   299 				{
   280 					foreach ($this->strip_htmltags as $tag)
   300 					foreach ($this->strip_htmltags as $tag)
   281 					{
   301 					{
   282 						$this->strip_tag($tag, $document, $type);
   302 						$this->strip_tag($tag, $document, $xpath, $type);
   283 					}
   303 					}
   284 				}
   304 				}
   285 
   305 
   286 				if ($this->strip_attributes)
   306 				if ($this->strip_attributes)
   287 				{
   307 				{
   288 					foreach ($this->strip_attributes as $attrib)
   308 					foreach ($this->strip_attributes as $attrib)
   289 					{
   309 					{
   290 						$this->strip_attr($attrib, $document);
   310 						$this->strip_attr($attrib, $xpath);
       
   311 					}
       
   312 				}
       
   313 
       
   314 				if ($this->add_attributes)
       
   315 				{
       
   316 					foreach ($this->add_attributes as $tag => $valuePairs)
       
   317 					{
       
   318 						$this->add_attr($tag, $valuePairs, $document);
   291 					}
   319 					}
   292 				}
   320 				}
   293 
   321 
   294 				// Replace relative URLs
   322 				// Replace relative URLs
   295 				$this->base = $base;
   323 				$this->base = $base;
   324 									{
   352 									{
   325 										$img->setAttribute('src', $this->image_handler . $image_url);
   353 										$img->setAttribute('src', $this->image_handler . $image_url);
   326 									}
   354 									}
   327 									else
   355 									else
   328 									{
   356 									{
   329 										trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
   357 										trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
   330 									}
   358 									}
   331 								}
   359 								}
   332 							}
   360 							}
   333 						}
   361 						}
   334 					}
   362 					}
   335 				}
   363 				}
   336 
   364 
   337 				// Remove the DOCTYPE
   365 				// Get content node
   338 				// Seems to cause segfaulting if we don't do this
   366 				$div = $document->getElementsByTagName('body')->item(0)->firstChild;
   339 				if ($document->firstChild instanceof DOMDocumentType)
       
   340 				{
       
   341 					$document->removeChild($document->firstChild);
       
   342 				}
       
   343 
       
   344 				// Move everything from the body to the root
       
   345 				$real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0);
       
   346 				$document->replaceChild($real_body, $document->firstChild);
       
   347 
       
   348 				// Finally, convert to a HTML string
   367 				// Finally, convert to a HTML string
   349 				$data = trim($document->saveHTML());
   368 				$data = trim($document->saveHTML($div));
   350 
   369 
   351 				if ($this->remove_div)
   370 				if ($this->remove_div)
   352 				{
   371 				{
   353 					$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
   372 					$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
   354 					$data = preg_replace('/<\/div>$/', '', $data);
   373 					$data = preg_replace('/<\/div>$/', '', $data);
   382 	}
   401 	}
   383 
   402 
   384 	protected function preprocess($html, $type)
   403 	protected function preprocess($html, $type)
   385 	{
   404 	{
   386 		$ret = '';
   405 		$ret = '';
       
   406 		$html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html);
   387 		if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
   407 		if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
   388 		{
   408 		{
   389 			// Atom XHTML constructs are wrapped with a div by default
   409 			// Atom XHTML constructs are wrapped with a div by default
   390 			// Note: No protection if $html contains a stray </div>!
   410 			// Note: No protection if $html contains a stray </div>!
   391 			$html = '<div>' . $html . '</div>';
   411 			$html = '<div>' . $html . '</div>';
   454 		{
   474 		{
   455 			return '';
   475 			return '';
   456 		}
   476 		}
   457 	}
   477 	}
   458 
   478 
   459 	protected function strip_tag($tag, $document, $type)
   479 	protected function strip_tag($tag, $document, $xpath, $type)
   460 	{
   480 	{
   461 		$xpath = new DOMXPath($document);
       
   462 		$elements = $xpath->query('body//' . $tag);
   481 		$elements = $xpath->query('body//' . $tag);
   463 		if ($this->encode_instead_of_strip)
   482 		if ($this->encode_instead_of_strip)
   464 		{
   483 		{
   465 			foreach ($elements as $element)
   484 			foreach ($elements as $element)
   466 			{
   485 			{
   539 				$element->parentNode->replaceChild($fragment, $element);
   558 				$element->parentNode->replaceChild($fragment, $element);
   540 			}
   559 			}
   541 		}
   560 		}
   542 	}
   561 	}
   543 
   562 
   544 	protected function strip_attr($attrib, $document)
   563 	protected function strip_attr($attrib, $xpath)
   545 	{
   564 	{
   546 		$xpath = new DOMXPath($document);
       
   547 		$elements = $xpath->query('//*[@' . $attrib . ']');
   565 		$elements = $xpath->query('//*[@' . $attrib . ']');
   548 
   566 
   549 		foreach ($elements as $element)
   567 		foreach ($elements as $element)
   550 		{
   568 		{
   551 			$element->removeAttribute($attrib);
   569 			$element->removeAttribute($attrib);
   552 		}
   570 		}
   553 	}
   571 	}
       
   572 
       
   573 	protected function add_attr($tag, $valuePairs, $document)
       
   574 	{
       
   575 		$elements = $document->getElementsByTagName($tag);
       
   576 		foreach ($elements as $element)
       
   577 		{
       
   578 			foreach ($valuePairs as $attrib => $value)
       
   579 			{
       
   580 				$element->setAttribute($attrib, $value);
       
   581 			}
       
   582 		}
       
   583 	}
   554 }
   584 }