wp/wp-includes/SimplePie/Sanitize.php
author ymh <ymh.work@gmail.com>
Wed, 06 Nov 2013 03:21:17 +0000
changeset 0 d970ebf37754
child 16 a86126ab1dd4
permissions -rw-r--r--
first import
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
<?php
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
/**
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
 * SimplePie
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
 *
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
 * A PHP-Based RSS and Atom Feed Framework.
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
 * Takes the hard work out of managing a complete RSS/Atom solution.
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
 *
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
 * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
 * All rights reserved.
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
 *
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
 * Redistribution and use in source and binary forms, with or without modification, are
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
 * permitted provided that the following conditions are met:
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
 *
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
 * 	* Redistributions of source code must retain the above copyright notice, this list of
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
 * 	  conditions and the following disclaimer.
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
 *
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
 * 	* Redistributions in binary form must reproduce the above copyright notice, this list
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
 * 	  of conditions and the following disclaimer in the documentation and/or other materials
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
 * 	  provided with the distribution.
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
 *
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
 * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
 * 	  to endorse or promote products derived from this software without specific prior
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
 * 	  written permission.
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
 *
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
 * POSSIBILITY OF SUCH DAMAGE.
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
 *
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
 * @package SimplePie
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
 * @version 1.3.1
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
 * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
 * @author Ryan Parman
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
 * @author Geoffrey Sneddon
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
 * @author Ryan McCue
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
 * @link http://simplepie.org/ SimplePie
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
 */
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
/**
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
 * Used for data cleanup and post-processing
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
 *
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
 *
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
 * This class can be overloaded with {@see SimplePie::set_sanitize_class()}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
 *
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
 * @package SimplePie
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
 * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
 */
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
class SimplePie_Sanitize
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
	// Private vars
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
	var $base;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
	// Options
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
	var $remove_div = true;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
	var $image_handler = '';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
	var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style');
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
	var $encode_instead_of_strip = false;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
	var $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
	var $strip_comments = false;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
	var $output_encoding = 'UTF-8';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
	var $enable_cache = true;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
	var $cache_location = './cache';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
	var $cache_name_function = 'md5';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
	var $timeout = 10;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
	var $useragent = '';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
	var $force_fsockopen = false;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
	var $replace_url_attributes = null;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
	public function __construct()
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
		// Set defaults
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
		$this->set_url_replacements(null);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
	public function remove_div($enable = true)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
		$this->remove_div = (bool) $enable;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
	public function set_image_handler($page = false)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
		if ($page)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
			$this->image_handler = (string) $page;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
		else
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
			$this->image_handler = false;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
	public function set_registry(SimplePie_Registry $registry)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
		$this->registry = $registry;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
	public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache')
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
		if (isset($enable_cache))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   106
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   107
			$this->enable_cache = (bool) $enable_cache;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   110
		if ($cache_location)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   111
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   112
			$this->cache_location = (string) $cache_location;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   113
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   115
		if ($cache_name_function)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   116
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
			$this->cache_name_function = (string) $cache_name_function;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   118
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   119
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   120
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   121
	public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   122
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   123
		if ($timeout)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   125
			$this->timeout = (string) $timeout;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   126
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   128
		if ($useragent)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   129
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   130
			$this->useragent = (string) $useragent;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   131
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   132
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   133
		if ($force_fsockopen)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   134
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   135
			$this->force_fsockopen = (string) $force_fsockopen;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
	public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   141
		if ($tags)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   142
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   143
			if (is_array($tags))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   144
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   145
				$this->strip_htmltags = $tags;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   146
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   147
			else
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   148
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   149
				$this->strip_htmltags = explode(',', $tags);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   150
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   151
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   152
		else
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   153
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   154
			$this->strip_htmltags = false;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   155
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   156
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   157
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   158
	public function encode_instead_of_strip($encode = false)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   159
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   160
		$this->encode_instead_of_strip = (bool) $encode;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   161
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   162
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   163
	public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   164
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   165
		if ($attribs)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   166
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   167
			if (is_array($attribs))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   168
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   169
				$this->strip_attributes = $attribs;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   170
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   171
			else
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   172
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   173
				$this->strip_attributes = explode(',', $attribs);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   174
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   175
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   176
		else
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   177
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   178
			$this->strip_attributes = false;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   179
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   180
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   181
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   182
	public function strip_comments($strip = false)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   183
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   184
		$this->strip_comments = (bool) $strip;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   185
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   186
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   187
	public function set_output_encoding($encoding = 'UTF-8')
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   188
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   189
		$this->output_encoding = (string) $encoding;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   190
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   191
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   192
	/**
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   193
	 * Set element/attribute key/value pairs of HTML attributes
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   194
	 * containing URLs that need to be resolved relative to the feed
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   195
	 *
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   196
	 * Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite,
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   197
	 * |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite,
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   198
	 * |q|@cite
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   199
	 *
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   200
	 * @since 1.0
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   201
	 * @param array|null $element_attribute Element/attribute key/value pairs, null for default
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   202
	 */
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   203
	public function set_url_replacements($element_attribute = null)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   204
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   205
		if ($element_attribute === null)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   206
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   207
			$element_attribute = array(
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   208
				'a' => 'href',
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   209
				'area' => 'href',
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   210
				'blockquote' => 'cite',
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   211
				'del' => 'cite',
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   212
				'form' => 'action',
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   213
				'img' => array(
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   214
					'longdesc',
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   215
					'src'
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   216
				),
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   217
				'input' => 'src',
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   218
				'ins' => 'cite',
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   219
				'q' => 'cite'
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   220
			);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   221
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   222
		$this->replace_url_attributes = (array) $element_attribute;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   223
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   224
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   225
	public function sanitize($data, $type, $base = '')
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   226
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   227
		$data = trim($data);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   228
		if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   229
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   230
			if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   231
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   232
				if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   233
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   234
					$type |= SIMPLEPIE_CONSTRUCT_HTML;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   235
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   236
				else
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   237
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   238
					$type |= SIMPLEPIE_CONSTRUCT_TEXT;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   239
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   240
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   241
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   242
			if ($type & SIMPLEPIE_CONSTRUCT_BASE64)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   243
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   244
				$data = base64_decode($data);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   245
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   246
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   247
			if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   248
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   249
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   250
				if (!class_exists('DOMDocument'))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   251
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   252
					$this->registry->call('Misc', 'error', array('DOMDocument not found, unable to use sanitizer', E_USER_WARNING, __FILE__, __LINE__));
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   253
					return '';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   254
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   255
				$document = new DOMDocument();
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   256
				$document->encoding = 'UTF-8';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   257
				$data = $this->preprocess($data, $type);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   258
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   259
				set_error_handler(array('SimplePie_Misc', 'silence_errors'));
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   260
				$document->loadHTML($data);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   261
				restore_error_handler();
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   262
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   263
				// Strip comments
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   264
				if ($this->strip_comments)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   265
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   266
					$xpath = new DOMXPath($document);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   267
					$comments = $xpath->query('//comment()');
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   268
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   269
					foreach ($comments as $comment)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   270
					{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   271
						$comment->parentNode->removeChild($comment);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   272
					}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   273
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   274
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   275
				// Strip out HTML tags and attributes that might cause various security problems.
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   276
				// Based on recommendations by Mark Pilgrim at:
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   277
				// http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   278
				if ($this->strip_htmltags)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   279
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   280
					foreach ($this->strip_htmltags as $tag)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   281
					{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   282
						$this->strip_tag($tag, $document, $type);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   283
					}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   284
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   285
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   286
				if ($this->strip_attributes)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   287
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   288
					foreach ($this->strip_attributes as $attrib)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   289
					{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   290
						$this->strip_attr($attrib, $document);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   291
					}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   292
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   293
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   294
				// Replace relative URLs
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   295
				$this->base = $base;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   296
				foreach ($this->replace_url_attributes as $element => $attributes)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   297
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   298
					$this->replace_urls($document, $element, $attributes);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   299
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   300
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   301
				// If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   302
				if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   303
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   304
					$images = $document->getElementsByTagName('img');
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   305
					foreach ($images as $img)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   306
					{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   307
						if ($img->hasAttribute('src'))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   308
						{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   309
							$image_url = call_user_func($this->cache_name_function, $img->getAttribute('src'));
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   310
							$cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi'));
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   311
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   312
							if ($cache->load())
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   313
							{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   314
								$img->setAttribute('src', $this->image_handler . $image_url);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   315
							}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   316
							else
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   317
							{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   318
								$file = $this->registry->create('File', array($img->getAttribute('src'), $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen));
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   319
								$headers = $file->headers;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   320
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   321
								if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   322
								{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   323
									if ($cache->save(array('headers' => $file->headers, 'body' => $file->body)))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   324
									{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   325
										$img->setAttribute('src', $this->image_handler . $image_url);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   326
									}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   327
									else
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   328
									{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   329
										trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   330
									}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   331
								}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   332
							}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   333
						}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   334
					}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   335
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   336
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   337
				// Remove the DOCTYPE
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   338
				// Seems to cause segfaulting if we don't do this
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   339
				if ($document->firstChild instanceof DOMDocumentType)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   340
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   341
					$document->removeChild($document->firstChild);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   342
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   343
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   344
				// Move everything from the body to the root
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   345
				$real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   346
				$document->replaceChild($real_body, $document->firstChild);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   347
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   348
				// Finally, convert to a HTML string
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   349
				$data = trim($document->saveHTML());
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   350
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   351
				if ($this->remove_div)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   352
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   353
					$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   354
					$data = preg_replace('/<\/div>$/', '', $data);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   355
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   356
				else
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   357
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   358
					$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   359
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   360
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   361
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   362
			if ($type & SIMPLEPIE_CONSTRUCT_IRI)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   363
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   364
				$absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base));
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   365
				if ($absolute !== false)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   366
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   367
					$data = $absolute;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   368
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   369
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   370
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   371
			if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   372
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   373
				$data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   374
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   375
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   376
			if ($this->output_encoding !== 'UTF-8')
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   377
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   378
				$data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding));
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   379
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   380
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   381
		return $data;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   382
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   383
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   384
	protected function preprocess($html, $type)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   385
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   386
		$ret = '';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   387
		if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   388
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   389
			// Atom XHTML constructs are wrapped with a div by default
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   390
			// Note: No protection if $html contains a stray </div>!
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   391
			$html = '<div>' . $html . '</div>';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   392
			$ret .= '<!DOCTYPE html>';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   393
			$content_type = 'text/html';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   394
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   395
		else
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   396
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   397
			$ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   398
			$content_type = 'application/xhtml+xml';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   399
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   400
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   401
		$ret .= '<html><head>';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   402
		$ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   403
		$ret .= '</head><body>' . $html . '</body></html>';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   404
		return $ret;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   405
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   406
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   407
	public function replace_urls($document, $tag, $attributes)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   408
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   409
		if (!is_array($attributes))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   410
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   411
			$attributes = array($attributes);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   412
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   413
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   414
		if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   415
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   416
			$elements = $document->getElementsByTagName($tag);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   417
			foreach ($elements as $element)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   418
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   419
				foreach ($attributes as $attribute)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   420
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   421
					if ($element->hasAttribute($attribute))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   422
					{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   423
						$value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base));
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   424
						if ($value !== false)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   425
						{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   426
							$element->setAttribute($attribute, $value);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   427
						}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   428
					}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   429
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   430
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   431
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   432
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   433
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   434
	public function do_strip_htmltags($match)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   435
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   436
		if ($this->encode_instead_of_strip)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   437
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   438
			if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   439
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   440
				$match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   441
				$match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   442
				return "&lt;$match[1]$match[2]&gt;$match[3]&lt;/$match[1]&gt;";
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   443
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   444
			else
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   445
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   446
				return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   447
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   448
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   449
		elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   450
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   451
			return $match[4];
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   452
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   453
		else
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   454
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   455
			return '';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   456
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   457
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   458
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   459
	protected function strip_tag($tag, $document, $type)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   460
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   461
		$xpath = new DOMXPath($document);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   462
		$elements = $xpath->query('body//' . $tag);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   463
		if ($this->encode_instead_of_strip)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   464
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   465
			foreach ($elements as $element)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   466
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   467
				$fragment = $document->createDocumentFragment();
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   468
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   469
				// For elements which aren't script or style, include the tag itself
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   470
				if (!in_array($tag, array('script', 'style')))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   471
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   472
					$text = '<' . $tag;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   473
					if ($element->hasAttributes())
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   474
					{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   475
						$attrs = array();
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   476
						foreach ($element->attributes as $name => $attr)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   477
						{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   478
							$value = $attr->value;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   479
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   480
							// In XHTML, empty values should never exist, so we repeat the value
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   481
							if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   482
							{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   483
								$value = $name;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   484
							}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   485
							// For HTML, empty is fine
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   486
							elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   487
							{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   488
								$attrs[] = $name;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   489
								continue;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   490
							}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   491
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   492
							// Standard attribute text
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   493
							$attrs[] = $name . '="' . $attr->value . '"';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   494
						}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   495
						$text .= ' ' . implode(' ', $attrs);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   496
					}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   497
					$text .= '>';
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   498
					$fragment->appendChild(new DOMText($text));
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   499
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   500
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   501
				$number = $element->childNodes->length;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   502
				for ($i = $number; $i > 0; $i--)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   503
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   504
					$child = $element->childNodes->item(0);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   505
					$fragment->appendChild($child);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   506
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   507
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   508
				if (!in_array($tag, array('script', 'style')))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   509
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   510
					$fragment->appendChild(new DOMText('</' . $tag . '>'));
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   511
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   512
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   513
				$element->parentNode->replaceChild($fragment, $element);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   514
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   515
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   516
			return;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   517
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   518
		elseif (in_array($tag, array('script', 'style')))
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   519
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   520
			foreach ($elements as $element)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   521
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   522
				$element->parentNode->removeChild($element);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   523
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   524
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   525
			return;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   526
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   527
		else
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   528
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   529
			foreach ($elements as $element)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   530
			{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   531
				$fragment = $document->createDocumentFragment();
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   532
				$number = $element->childNodes->length;
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   533
				for ($i = $number; $i > 0; $i--)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   534
				{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   535
					$child = $element->childNodes->item(0);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   536
					$fragment->appendChild($child);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   537
				}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   538
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   539
				$element->parentNode->replaceChild($fragment, $element);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   540
			}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   541
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   542
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   543
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   544
	protected function strip_attr($attrib, $document)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   545
	{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   546
		$xpath = new DOMXPath($document);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   547
		$elements = $xpath->query('//*[@' . $attrib . ']');
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   548
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   549
		foreach ($elements as $element)
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   550
		{
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   551
			$element->removeAttribute($attrib);
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   552
		}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   553
	}
d970ebf37754 first import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   554
}