|
1 <?php |
|
2 |
|
3 /** |
|
4 * SimplePie |
|
5 * |
|
6 * A PHP-Based RSS and Atom Feed Framework. |
|
7 * Takes the hard work out of managing a complete RSS/Atom solution. |
|
8 * |
|
9 * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors |
|
10 * All rights reserved. |
|
11 * |
|
12 * Redistribution and use in source and binary forms, with or without modification, are |
|
13 * permitted provided that the following conditions are met: |
|
14 * |
|
15 * * Redistributions of source code must retain the above copyright notice, this list of |
|
16 * conditions and the following disclaimer. |
|
17 * |
|
18 * * Redistributions in binary form must reproduce the above copyright notice, this list |
|
19 * of conditions and the following disclaimer in the documentation and/or other materials |
|
20 * provided with the distribution. |
|
21 * |
|
22 * * Neither the name of the SimplePie Team nor the names of its contributors may be used |
|
23 * to endorse or promote products derived from this software without specific prior |
|
24 * written permission. |
|
25 * |
|
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS |
|
27 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY |
|
28 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS |
|
29 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
|
31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
32 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
|
33 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
34 * POSSIBILITY OF SUCH DAMAGE. |
|
35 * |
|
36 * @package SimplePie |
|
37 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue |
|
38 * @author Ryan Parman |
|
39 * @author Sam Sneddon |
|
40 * @author Ryan McCue |
|
41 * @link http://simplepie.org/ SimplePie |
|
42 * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
|
43 */ |
|
44 |
|
45 namespace SimplePie; |
|
46 |
|
47 use InvalidArgumentException; |
|
48 use SimplePie\Cache\Base; |
|
49 use SimplePie\Cache\BaseDataCache; |
|
50 use SimplePie\Cache\CallableNameFilter; |
|
51 use SimplePie\Cache\DataCache; |
|
52 use SimplePie\Cache\NameFilter; |
|
53 |
|
54 /** |
|
55 * Used for data cleanup and post-processing |
|
56 * |
|
57 * |
|
58 * This class can be overloaded with {@see \SimplePie\SimplePie::set_sanitize_class()} |
|
59 * |
|
60 * @package SimplePie |
|
61 * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags |
|
62 */ |
|
63 class Sanitize implements RegistryAware |
|
64 { |
|
65 // Private vars |
|
66 public $base; |
|
67 |
|
68 // Options |
|
69 public $remove_div = true; |
|
70 public $image_handler = ''; |
|
71 public $strip_htmltags = ['base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style']; |
|
72 public $encode_instead_of_strip = false; |
|
73 public $strip_attributes = ['bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc']; |
|
74 public $rename_attributes = []; |
|
75 public $add_attributes = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']]; |
|
76 public $strip_comments = false; |
|
77 public $output_encoding = 'UTF-8'; |
|
78 public $enable_cache = true; |
|
79 public $cache_location = './cache'; |
|
80 public $cache_name_function = 'md5'; |
|
81 |
|
82 /** |
|
83 * @var NameFilter |
|
84 */ |
|
85 private $cache_namefilter; |
|
86 public $timeout = 10; |
|
87 public $useragent = ''; |
|
88 public $force_fsockopen = false; |
|
89 public $replace_url_attributes = null; |
|
90 public $registry; |
|
91 |
|
92 /** |
|
93 * @var DataCache|null |
|
94 */ |
|
95 private $cache = null; |
|
96 |
|
97 /** |
|
98 * @var int Cache duration (in seconds) |
|
99 */ |
|
100 private $cache_duration = 3600; |
|
101 |
|
102 /** |
|
103 * List of domains for which to force HTTPS. |
|
104 * @see \SimplePie\Sanitize::set_https_domains() |
|
105 * Array is a tree split at DNS levels. Example: |
|
106 * array('biz' => true, 'com' => array('example' => true), 'net' => array('example' => array('www' => true))) |
|
107 */ |
|
108 public $https_domains = []; |
|
109 |
|
110 public function __construct() |
|
111 { |
|
112 // Set defaults |
|
113 $this->set_url_replacements(null); |
|
114 } |
|
115 |
|
116 public function remove_div($enable = true) |
|
117 { |
|
118 $this->remove_div = (bool) $enable; |
|
119 } |
|
120 |
|
121 public function set_image_handler($page = false) |
|
122 { |
|
123 if ($page) { |
|
124 $this->image_handler = (string) $page; |
|
125 } else { |
|
126 $this->image_handler = false; |
|
127 } |
|
128 } |
|
129 |
|
130 public function set_registry(\SimplePie\Registry $registry)/* : void */ |
|
131 { |
|
132 $this->registry = $registry; |
|
133 } |
|
134 |
|
135 public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie\Cache', ?DataCache $cache = null) |
|
136 { |
|
137 if (isset($enable_cache)) { |
|
138 $this->enable_cache = (bool) $enable_cache; |
|
139 } |
|
140 |
|
141 if ($cache_location) { |
|
142 $this->cache_location = (string) $cache_location; |
|
143 } |
|
144 |
|
145 if (!is_string($cache_name_function) && !is_object($cache_name_function) && !$cache_name_function instanceof NameFilter) { |
|
146 throw new InvalidArgumentException(sprintf( |
|
147 '%s(): Argument #3 ($cache_name_function) must be of type %s', |
|
148 __METHOD__, |
|
149 NameFilter::class |
|
150 ), 1); |
|
151 } |
|
152 |
|
153 // BC: $cache_name_function could be a callable as string |
|
154 if (is_string($cache_name_function)) { |
|
155 // trigger_error(sprintf('Providing $cache_name_function as string in "%s()" is deprecated since SimplePie 1.8.0, provide as "%s" instead.', __METHOD__, NameFilter::class), \E_USER_DEPRECATED); |
|
156 $this->cache_name_function = (string) $cache_name_function; |
|
157 |
|
158 $cache_name_function = new CallableNameFilter($cache_name_function); |
|
159 } |
|
160 |
|
161 $this->cache_namefilter = $cache_name_function; |
|
162 |
|
163 if ($cache !== null) { |
|
164 $this->cache = $cache; |
|
165 } |
|
166 } |
|
167 |
|
168 public function pass_file_data($file_class = 'SimplePie\File', $timeout = 10, $useragent = '', $force_fsockopen = false) |
|
169 { |
|
170 if ($timeout) { |
|
171 $this->timeout = (string) $timeout; |
|
172 } |
|
173 |
|
174 if ($useragent) { |
|
175 $this->useragent = (string) $useragent; |
|
176 } |
|
177 |
|
178 if ($force_fsockopen) { |
|
179 $this->force_fsockopen = (string) $force_fsockopen; |
|
180 } |
|
181 } |
|
182 |
|
183 public function strip_htmltags($tags = ['base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style']) |
|
184 { |
|
185 if ($tags) { |
|
186 if (is_array($tags)) { |
|
187 $this->strip_htmltags = $tags; |
|
188 } else { |
|
189 $this->strip_htmltags = explode(',', $tags); |
|
190 } |
|
191 } else { |
|
192 $this->strip_htmltags = false; |
|
193 } |
|
194 } |
|
195 |
|
196 public function encode_instead_of_strip($encode = false) |
|
197 { |
|
198 $this->encode_instead_of_strip = (bool) $encode; |
|
199 } |
|
200 |
|
201 public function rename_attributes($attribs = []) |
|
202 { |
|
203 if ($attribs) { |
|
204 if (is_array($attribs)) { |
|
205 $this->rename_attributes = $attribs; |
|
206 } else { |
|
207 $this->rename_attributes = explode(',', $attribs); |
|
208 } |
|
209 } else { |
|
210 $this->rename_attributes = false; |
|
211 } |
|
212 } |
|
213 |
|
214 public function strip_attributes($attribs = ['bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc']) |
|
215 { |
|
216 if ($attribs) { |
|
217 if (is_array($attribs)) { |
|
218 $this->strip_attributes = $attribs; |
|
219 } else { |
|
220 $this->strip_attributes = explode(',', $attribs); |
|
221 } |
|
222 } else { |
|
223 $this->strip_attributes = false; |
|
224 } |
|
225 } |
|
226 |
|
227 public function add_attributes($attribs = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']]) |
|
228 { |
|
229 if ($attribs) { |
|
230 if (is_array($attribs)) { |
|
231 $this->add_attributes = $attribs; |
|
232 } else { |
|
233 $this->add_attributes = explode(',', $attribs); |
|
234 } |
|
235 } else { |
|
236 $this->add_attributes = false; |
|
237 } |
|
238 } |
|
239 |
|
240 public function strip_comments($strip = false) |
|
241 { |
|
242 $this->strip_comments = (bool) $strip; |
|
243 } |
|
244 |
|
245 public function set_output_encoding($encoding = 'UTF-8') |
|
246 { |
|
247 $this->output_encoding = (string) $encoding; |
|
248 } |
|
249 |
|
250 /** |
|
251 * Set element/attribute key/value pairs of HTML attributes |
|
252 * containing URLs that need to be resolved relative to the feed |
|
253 * |
|
254 * Defaults to |a|@href, |area|@href, |audio|@src, |blockquote|@cite, |
|
255 * |del|@cite, |form|@action, |img|@longdesc, |img|@src, |input|@src, |
|
256 * |ins|@cite, |q|@cite, |source|@src, |video|@src |
|
257 * |
|
258 * @since 1.0 |
|
259 * @param array|null $element_attribute Element/attribute key/value pairs, null for default |
|
260 */ |
|
261 public function set_url_replacements($element_attribute = null) |
|
262 { |
|
263 if ($element_attribute === null) { |
|
264 $element_attribute = [ |
|
265 'a' => 'href', |
|
266 'area' => 'href', |
|
267 'audio' => 'src', |
|
268 'blockquote' => 'cite', |
|
269 'del' => 'cite', |
|
270 'form' => 'action', |
|
271 'img' => [ |
|
272 'longdesc', |
|
273 'src' |
|
274 ], |
|
275 'input' => 'src', |
|
276 'ins' => 'cite', |
|
277 'q' => 'cite', |
|
278 'source' => 'src', |
|
279 'video' => [ |
|
280 'poster', |
|
281 'src' |
|
282 ] |
|
283 ]; |
|
284 } |
|
285 $this->replace_url_attributes = (array) $element_attribute; |
|
286 } |
|
287 |
|
288 /** |
|
289 * Set the list of domains for which to force HTTPS. |
|
290 * @see \SimplePie\Misc::https_url() |
|
291 * Example array('biz', 'example.com', 'example.org', 'www.example.net'); |
|
292 */ |
|
293 public function set_https_domains($domains) |
|
294 { |
|
295 $this->https_domains = []; |
|
296 foreach ($domains as $domain) { |
|
297 $domain = trim($domain, ". \t\n\r\0\x0B"); |
|
298 $segments = array_reverse(explode('.', $domain)); |
|
299 $node = &$this->https_domains; |
|
300 foreach ($segments as $segment) {//Build a tree |
|
301 if ($node === true) { |
|
302 break; |
|
303 } |
|
304 if (!isset($node[$segment])) { |
|
305 $node[$segment] = []; |
|
306 } |
|
307 $node = &$node[$segment]; |
|
308 } |
|
309 $node = true; |
|
310 } |
|
311 } |
|
312 |
|
313 /** |
|
314 * Check if the domain is in the list of forced HTTPS. |
|
315 */ |
|
316 protected function is_https_domain($domain) |
|
317 { |
|
318 $domain = trim($domain, '. '); |
|
319 $segments = array_reverse(explode('.', $domain)); |
|
320 $node = &$this->https_domains; |
|
321 foreach ($segments as $segment) {//Explore the tree |
|
322 if (isset($node[$segment])) { |
|
323 $node = &$node[$segment]; |
|
324 } else { |
|
325 break; |
|
326 } |
|
327 } |
|
328 return $node === true; |
|
329 } |
|
330 |
|
331 /** |
|
332 * Force HTTPS for selected Web sites. |
|
333 */ |
|
334 public function https_url($url) |
|
335 { |
|
336 return (strtolower(substr($url, 0, 7)) === 'http://') && |
|
337 $this->is_https_domain(parse_url($url, PHP_URL_HOST)) ? |
|
338 substr_replace($url, 's', 4, 0) : //Add the 's' to HTTPS |
|
339 $url; |
|
340 } |
|
341 |
|
342 public function sanitize($data, $type, $base = '') |
|
343 { |
|
344 $data = trim($data); |
|
345 if ($data !== '' || $type & \SimplePie\SimplePie::CONSTRUCT_IRI) { |
|
346 if ($type & \SimplePie\SimplePie::CONSTRUCT_MAYBE_HTML) { |
|
347 if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . \SimplePie\SimplePie::PCRE_HTML_ATTRIBUTE . '>)/', $data)) { |
|
348 $type |= \SimplePie\SimplePie::CONSTRUCT_HTML; |
|
349 } else { |
|
350 $type |= \SimplePie\SimplePie::CONSTRUCT_TEXT; |
|
351 } |
|
352 } |
|
353 |
|
354 if ($type & \SimplePie\SimplePie::CONSTRUCT_BASE64) { |
|
355 $data = base64_decode($data); |
|
356 } |
|
357 |
|
358 if ($type & (\SimplePie\SimplePie::CONSTRUCT_HTML | \SimplePie\SimplePie::CONSTRUCT_XHTML)) { |
|
359 if (!class_exists('DOMDocument')) { |
|
360 throw new \SimplePie\Exception('DOMDocument not found, unable to use sanitizer'); |
|
361 } |
|
362 $document = new \DOMDocument(); |
|
363 $document->encoding = 'UTF-8'; |
|
364 |
|
365 $data = $this->preprocess($data, $type); |
|
366 |
|
367 set_error_handler(['SimplePie\Misc', 'silence_errors']); |
|
368 $document->loadHTML($data); |
|
369 restore_error_handler(); |
|
370 |
|
371 $xpath = new \DOMXPath($document); |
|
372 |
|
373 // Strip comments |
|
374 if ($this->strip_comments) { |
|
375 $comments = $xpath->query('//comment()'); |
|
376 |
|
377 foreach ($comments as $comment) { |
|
378 $comment->parentNode->removeChild($comment); |
|
379 } |
|
380 } |
|
381 |
|
382 // Strip out HTML tags and attributes that might cause various security problems. |
|
383 // Based on recommendations by Mark Pilgrim at: |
|
384 // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely |
|
385 if ($this->strip_htmltags) { |
|
386 foreach ($this->strip_htmltags as $tag) { |
|
387 $this->strip_tag($tag, $document, $xpath, $type); |
|
388 } |
|
389 } |
|
390 |
|
391 if ($this->rename_attributes) { |
|
392 foreach ($this->rename_attributes as $attrib) { |
|
393 $this->rename_attr($attrib, $xpath); |
|
394 } |
|
395 } |
|
396 |
|
397 if ($this->strip_attributes) { |
|
398 foreach ($this->strip_attributes as $attrib) { |
|
399 $this->strip_attr($attrib, $xpath); |
|
400 } |
|
401 } |
|
402 |
|
403 if ($this->add_attributes) { |
|
404 foreach ($this->add_attributes as $tag => $valuePairs) { |
|
405 $this->add_attr($tag, $valuePairs, $document); |
|
406 } |
|
407 } |
|
408 |
|
409 // Replace relative URLs |
|
410 $this->base = $base; |
|
411 foreach ($this->replace_url_attributes as $element => $attributes) { |
|
412 $this->replace_urls($document, $element, $attributes); |
|
413 } |
|
414 |
|
415 // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags. |
|
416 if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache) { |
|
417 $images = $document->getElementsByTagName('img'); |
|
418 |
|
419 foreach ($images as $img) { |
|
420 if ($img->hasAttribute('src')) { |
|
421 $image_url = $this->cache_namefilter->filter($img->getAttribute('src')); |
|
422 $cache = $this->get_cache($image_url); |
|
423 |
|
424 if ($cache->get_data($image_url, false)) { |
|
425 $img->setAttribute('src', $this->image_handler . $image_url); |
|
426 } else { |
|
427 $file = $this->registry->create(File::class, [$img->getAttribute('src'), $this->timeout, 5, ['X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']], $this->useragent, $this->force_fsockopen]); |
|
428 $headers = $file->headers; |
|
429 |
|
430 if ($file->success && ($file->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) { |
|
431 if ($cache->set_data($image_url, ['headers' => $file->headers, 'body' => $file->body], $this->cache_duration)) { |
|
432 $img->setAttribute('src', $this->image_handler . $image_url); |
|
433 } else { |
|
434 trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); |
|
435 } |
|
436 } |
|
437 } |
|
438 } |
|
439 } |
|
440 } |
|
441 |
|
442 // Get content node |
|
443 $div = $document->getElementsByTagName('body')->item(0)->firstChild; |
|
444 // Finally, convert to a HTML string |
|
445 $data = trim($document->saveHTML($div)); |
|
446 |
|
447 if ($this->remove_div) { |
|
448 $data = preg_replace('/^<div' . \SimplePie\SimplePie::PCRE_XML_ATTRIBUTE . '>/', '', $data); |
|
449 $data = preg_replace('/<\/div>$/', '', $data); |
|
450 } else { |
|
451 $data = preg_replace('/^<div' . \SimplePie\SimplePie::PCRE_XML_ATTRIBUTE . '>/', '<div>', $data); |
|
452 } |
|
453 |
|
454 $data = str_replace('</source>', '', $data); |
|
455 } |
|
456 |
|
457 if ($type & \SimplePie\SimplePie::CONSTRUCT_IRI) { |
|
458 $absolute = $this->registry->call(Misc::class, 'absolutize_url', [$data, $base]); |
|
459 if ($absolute !== false) { |
|
460 $data = $absolute; |
|
461 } |
|
462 } |
|
463 |
|
464 if ($type & (\SimplePie\SimplePie::CONSTRUCT_TEXT | \SimplePie\SimplePie::CONSTRUCT_IRI)) { |
|
465 $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8'); |
|
466 } |
|
467 |
|
468 if ($this->output_encoding !== 'UTF-8') { |
|
469 $data = $this->registry->call(Misc::class, 'change_encoding', [$data, 'UTF-8', $this->output_encoding]); |
|
470 } |
|
471 } |
|
472 return $data; |
|
473 } |
|
474 |
|
475 protected function preprocess($html, $type) |
|
476 { |
|
477 $ret = ''; |
|
478 $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html); |
|
479 if ($type & ~\SimplePie\SimplePie::CONSTRUCT_XHTML) { |
|
480 // Atom XHTML constructs are wrapped with a div by default |
|
481 // Note: No protection if $html contains a stray </div>! |
|
482 $html = '<div>' . $html . '</div>'; |
|
483 $ret .= '<!DOCTYPE html>'; |
|
484 $content_type = 'text/html'; |
|
485 } else { |
|
486 $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'; |
|
487 $content_type = 'application/xhtml+xml'; |
|
488 } |
|
489 |
|
490 $ret .= '<html><head>'; |
|
491 $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />'; |
|
492 $ret .= '</head><body>' . $html . '</body></html>'; |
|
493 return $ret; |
|
494 } |
|
495 |
|
496 public function replace_urls($document, $tag, $attributes) |
|
497 { |
|
498 if (!is_array($attributes)) { |
|
499 $attributes = [$attributes]; |
|
500 } |
|
501 |
|
502 if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) { |
|
503 $elements = $document->getElementsByTagName($tag); |
|
504 foreach ($elements as $element) { |
|
505 foreach ($attributes as $attribute) { |
|
506 if ($element->hasAttribute($attribute)) { |
|
507 $value = $this->registry->call(Misc::class, 'absolutize_url', [$element->getAttribute($attribute), $this->base]); |
|
508 if ($value !== false) { |
|
509 $value = $this->https_url($value); |
|
510 $element->setAttribute($attribute, $value); |
|
511 } |
|
512 } |
|
513 } |
|
514 } |
|
515 } |
|
516 } |
|
517 |
|
518 public function do_strip_htmltags($match) |
|
519 { |
|
520 if ($this->encode_instead_of_strip) { |
|
521 if (isset($match[4]) && !in_array(strtolower($match[1]), ['script', 'style'])) { |
|
522 $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8'); |
|
523 $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8'); |
|
524 return "<$match[1]$match[2]>$match[3]</$match[1]>"; |
|
525 } else { |
|
526 return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8'); |
|
527 } |
|
528 } elseif (isset($match[4]) && !in_array(strtolower($match[1]), ['script', 'style'])) { |
|
529 return $match[4]; |
|
530 } else { |
|
531 return ''; |
|
532 } |
|
533 } |
|
534 |
|
535 protected function strip_tag($tag, $document, $xpath, $type) |
|
536 { |
|
537 $elements = $xpath->query('body//' . $tag); |
|
538 if ($this->encode_instead_of_strip) { |
|
539 foreach ($elements as $element) { |
|
540 $fragment = $document->createDocumentFragment(); |
|
541 |
|
542 // For elements which aren't script or style, include the tag itself |
|
543 if (!in_array($tag, ['script', 'style'])) { |
|
544 $text = '<' . $tag; |
|
545 if ($element->hasAttributes()) { |
|
546 $attrs = []; |
|
547 foreach ($element->attributes as $name => $attr) { |
|
548 $value = $attr->value; |
|
549 |
|
550 // In XHTML, empty values should never exist, so we repeat the value |
|
551 if (empty($value) && ($type & \SimplePie\SimplePie::CONSTRUCT_XHTML)) { |
|
552 $value = $name; |
|
553 } |
|
554 // For HTML, empty is fine |
|
555 elseif (empty($value) && ($type & \SimplePie\SimplePie::CONSTRUCT_HTML)) { |
|
556 $attrs[] = $name; |
|
557 continue; |
|
558 } |
|
559 |
|
560 // Standard attribute text |
|
561 $attrs[] = $name . '="' . $attr->value . '"'; |
|
562 } |
|
563 $text .= ' ' . implode(' ', $attrs); |
|
564 } |
|
565 $text .= '>'; |
|
566 $fragment->appendChild(new \DOMText($text)); |
|
567 } |
|
568 |
|
569 $number = $element->childNodes->length; |
|
570 for ($i = $number; $i > 0; $i--) { |
|
571 $child = $element->childNodes->item(0); |
|
572 $fragment->appendChild($child); |
|
573 } |
|
574 |
|
575 if (!in_array($tag, ['script', 'style'])) { |
|
576 $fragment->appendChild(new \DOMText('</' . $tag . '>')); |
|
577 } |
|
578 |
|
579 $element->parentNode->replaceChild($fragment, $element); |
|
580 } |
|
581 |
|
582 return; |
|
583 } elseif (in_array($tag, ['script', 'style'])) { |
|
584 foreach ($elements as $element) { |
|
585 $element->parentNode->removeChild($element); |
|
586 } |
|
587 |
|
588 return; |
|
589 } else { |
|
590 foreach ($elements as $element) { |
|
591 $fragment = $document->createDocumentFragment(); |
|
592 $number = $element->childNodes->length; |
|
593 for ($i = $number; $i > 0; $i--) { |
|
594 $child = $element->childNodes->item(0); |
|
595 $fragment->appendChild($child); |
|
596 } |
|
597 |
|
598 $element->parentNode->replaceChild($fragment, $element); |
|
599 } |
|
600 } |
|
601 } |
|
602 |
|
603 protected function strip_attr($attrib, $xpath) |
|
604 { |
|
605 $elements = $xpath->query('//*[@' . $attrib . ']'); |
|
606 |
|
607 foreach ($elements as $element) { |
|
608 $element->removeAttribute($attrib); |
|
609 } |
|
610 } |
|
611 |
|
612 protected function rename_attr($attrib, $xpath) |
|
613 { |
|
614 $elements = $xpath->query('//*[@' . $attrib . ']'); |
|
615 |
|
616 foreach ($elements as $element) { |
|
617 $element->setAttribute('data-sanitized-' . $attrib, $element->getAttribute($attrib)); |
|
618 $element->removeAttribute($attrib); |
|
619 } |
|
620 } |
|
621 |
|
622 protected function add_attr($tag, $valuePairs, $document) |
|
623 { |
|
624 $elements = $document->getElementsByTagName($tag); |
|
625 foreach ($elements as $element) { |
|
626 foreach ($valuePairs as $attrib => $value) { |
|
627 $element->setAttribute($attrib, $value); |
|
628 } |
|
629 } |
|
630 } |
|
631 |
|
632 /** |
|
633 * Get a DataCache |
|
634 * |
|
635 * @param string $image_url Only needed for BC, can be removed in SimplePie 2.0.0 |
|
636 * |
|
637 * @return DataCache |
|
638 */ |
|
639 private function get_cache($image_url = '') |
|
640 { |
|
641 if ($this->cache === null) { |
|
642 // @trigger_error(sprintf('Not providing as PSR-16 cache implementation is deprecated since SimplePie 1.8.0, please use "SimplePie\SimplePie::set_cache()".'), \E_USER_DEPRECATED); |
|
643 $cache = $this->registry->call(Cache::class, 'get_handler', [ |
|
644 $this->cache_location, |
|
645 $image_url, |
|
646 Base::TYPE_IMAGE |
|
647 ]); |
|
648 |
|
649 return new BaseDataCache($cache); |
|
650 } |
|
651 |
|
652 return $this->cache; |
|
653 } |
|
654 } |
|
655 |
|
656 class_alias('SimplePie\Sanitize', 'SimplePie_Sanitize'); |