changeset 16 | a86126ab1dd4 |
parent 0 | d970ebf37754 |
child 19 | 3d72ae0968f4 |
15:3d4e9c994f10 | 16:a86126ab1dd4 |
---|---|
3 * SimplePie |
3 * SimplePie |
4 * |
4 * |
5 * A PHP-Based RSS and Atom Feed Framework. |
5 * A PHP-Based RSS and Atom Feed Framework. |
6 * Takes the hard work out of managing a complete RSS/Atom solution. |
6 * Takes the hard work out of managing a complete RSS/Atom solution. |
7 * |
7 * |
8 * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
8 * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors |
9 * All rights reserved. |
9 * All rights reserved. |
10 * |
10 * |
11 * Redistribution and use in source and binary forms, with or without modification, are |
11 * Redistribution and use in source and binary forms, with or without modification, are |
12 * permitted provided that the following conditions are met: |
12 * permitted provided that the following conditions are met: |
13 * |
13 * |
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
33 * POSSIBILITY OF SUCH DAMAGE. |
33 * POSSIBILITY OF SUCH DAMAGE. |
34 * |
34 * |
35 * @package SimplePie |
35 * @package SimplePie |
36 * @version 1.3.1 |
36 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue |
37 * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
|
38 * @author Ryan Parman |
37 * @author Ryan Parman |
39 * @author Geoffrey Sneddon |
38 * @author Sam Sneddon |
40 * @author Ryan McCue |
39 * @author Ryan McCue |
41 * @link http://simplepie.org/ SimplePie |
40 * @link http://simplepie.org/ SimplePie |
42 * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
41 * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 */ |
42 */ |
44 |
43 |
59 // Options |
58 // Options |
60 var $remove_div = true; |
59 var $remove_div = true; |
61 var $image_handler = ''; |
60 var $image_handler = ''; |
62 var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'); |
61 var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'); |
63 var $encode_instead_of_strip = false; |
62 var $encode_instead_of_strip = false; |
64 var $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'); |
63 var $strip_attributes = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'); |
64 var $add_attributes = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none')); |
|
65 var $strip_comments = false; |
65 var $strip_comments = false; |
66 var $output_encoding = 'UTF-8'; |
66 var $output_encoding = 'UTF-8'; |
67 var $enable_cache = true; |
67 var $enable_cache = true; |
68 var $cache_location = './cache'; |
68 var $cache_location = './cache'; |
69 var $cache_name_function = 'md5'; |
69 var $cache_name_function = 'md5'; |
158 public function encode_instead_of_strip($encode = false) |
158 public function encode_instead_of_strip($encode = false) |
159 { |
159 { |
160 $this->encode_instead_of_strip = (bool) $encode; |
160 $this->encode_instead_of_strip = (bool) $encode; |
161 } |
161 } |
162 |
162 |
163 public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc')) |
163 public function strip_attributes($attribs = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc')) |
164 { |
164 { |
165 if ($attribs) |
165 if ($attribs) |
166 { |
166 { |
167 if (is_array($attribs)) |
167 if (is_array($attribs)) |
168 { |
168 { |
174 } |
174 } |
175 } |
175 } |
176 else |
176 else |
177 { |
177 { |
178 $this->strip_attributes = false; |
178 $this->strip_attributes = false; |
179 } |
|
180 } |
|
181 |
|
182 public function add_attributes($attribs = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none'))) |
|
183 { |
|
184 if ($attribs) |
|
185 { |
|
186 if (is_array($attribs)) |
|
187 { |
|
188 $this->add_attributes = $attribs; |
|
189 } |
|
190 else |
|
191 { |
|
192 $this->add_attributes = explode(',', $attribs); |
|
193 } |
|
194 } |
|
195 else |
|
196 { |
|
197 $this->add_attributes = false; |
|
179 } |
198 } |
180 } |
199 } |
181 |
200 |
182 public function strip_comments($strip = false) |
201 public function strip_comments($strip = false) |
183 { |
202 { |
247 if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) |
266 if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) |
248 { |
267 { |
249 |
268 |
250 if (!class_exists('DOMDocument')) |
269 if (!class_exists('DOMDocument')) |
251 { |
270 { |
252 $this->registry->call('Misc', 'error', array('DOMDocument not found, unable to use sanitizer', E_USER_WARNING, __FILE__, __LINE__)); |
271 throw new SimplePie_Exception('DOMDocument not found, unable to use sanitizer'); |
253 return ''; |
|
254 } |
272 } |
255 $document = new DOMDocument(); |
273 $document = new DOMDocument(); |
256 $document->encoding = 'UTF-8'; |
274 $document->encoding = 'UTF-8'; |
275 |
|
257 $data = $this->preprocess($data, $type); |
276 $data = $this->preprocess($data, $type); |
258 |
277 |
259 set_error_handler(array('SimplePie_Misc', 'silence_errors')); |
278 set_error_handler(array('SimplePie_Misc', 'silence_errors')); |
260 $document->loadHTML($data); |
279 $document->loadHTML($data); |
261 restore_error_handler(); |
280 restore_error_handler(); |
262 |
281 |
282 $xpath = new DOMXPath($document); |
|
283 |
|
263 // Strip comments |
284 // Strip comments |
264 if ($this->strip_comments) |
285 if ($this->strip_comments) |
265 { |
286 { |
266 $xpath = new DOMXPath($document); |
|
267 $comments = $xpath->query('//comment()'); |
287 $comments = $xpath->query('//comment()'); |
268 |
288 |
269 foreach ($comments as $comment) |
289 foreach ($comments as $comment) |
270 { |
290 { |
271 $comment->parentNode->removeChild($comment); |
291 $comment->parentNode->removeChild($comment); |
277 // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely |
297 // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely |
278 if ($this->strip_htmltags) |
298 if ($this->strip_htmltags) |
279 { |
299 { |
280 foreach ($this->strip_htmltags as $tag) |
300 foreach ($this->strip_htmltags as $tag) |
281 { |
301 { |
282 $this->strip_tag($tag, $document, $type); |
302 $this->strip_tag($tag, $document, $xpath, $type); |
283 } |
303 } |
284 } |
304 } |
285 |
305 |
286 if ($this->strip_attributes) |
306 if ($this->strip_attributes) |
287 { |
307 { |
288 foreach ($this->strip_attributes as $attrib) |
308 foreach ($this->strip_attributes as $attrib) |
289 { |
309 { |
290 $this->strip_attr($attrib, $document); |
310 $this->strip_attr($attrib, $xpath); |
311 } |
|
312 } |
|
313 |
|
314 if ($this->add_attributes) |
|
315 { |
|
316 foreach ($this->add_attributes as $tag => $valuePairs) |
|
317 { |
|
318 $this->add_attr($tag, $valuePairs, $document); |
|
291 } |
319 } |
292 } |
320 } |
293 |
321 |
294 // Replace relative URLs |
322 // Replace relative URLs |
295 $this->base = $base; |
323 $this->base = $base; |
324 { |
352 { |
325 $img->setAttribute('src', $this->image_handler . $image_url); |
353 $img->setAttribute('src', $this->image_handler . $image_url); |
326 } |
354 } |
327 else |
355 else |
328 { |
356 { |
329 trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); |
357 trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); |
330 } |
358 } |
331 } |
359 } |
332 } |
360 } |
333 } |
361 } |
334 } |
362 } |
335 } |
363 } |
336 |
364 |
337 // Remove the DOCTYPE |
365 // Get content node |
338 // Seems to cause segfaulting if we don't do this |
366 $div = $document->getElementsByTagName('body')->item(0)->firstChild; |
339 if ($document->firstChild instanceof DOMDocumentType) |
|
340 { |
|
341 $document->removeChild($document->firstChild); |
|
342 } |
|
343 |
|
344 // Move everything from the body to the root |
|
345 $real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0); |
|
346 $document->replaceChild($real_body, $document->firstChild); |
|
347 |
|
348 // Finally, convert to a HTML string |
367 // Finally, convert to a HTML string |
349 $data = trim($document->saveHTML()); |
368 $data = trim($document->saveHTML($div)); |
350 |
369 |
351 if ($this->remove_div) |
370 if ($this->remove_div) |
352 { |
371 { |
353 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data); |
372 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data); |
354 $data = preg_replace('/<\/div>$/', '', $data); |
373 $data = preg_replace('/<\/div>$/', '', $data); |
382 } |
401 } |
383 |
402 |
384 protected function preprocess($html, $type) |
403 protected function preprocess($html, $type) |
385 { |
404 { |
386 $ret = ''; |
405 $ret = ''; |
406 $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html); |
|
387 if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML) |
407 if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML) |
388 { |
408 { |
389 // Atom XHTML constructs are wrapped with a div by default |
409 // Atom XHTML constructs are wrapped with a div by default |
390 // Note: No protection if $html contains a stray </div>! |
410 // Note: No protection if $html contains a stray </div>! |
391 $html = '<div>' . $html . '</div>'; |
411 $html = '<div>' . $html . '</div>'; |
454 { |
474 { |
455 return ''; |
475 return ''; |
456 } |
476 } |
457 } |
477 } |
458 |
478 |
459 protected function strip_tag($tag, $document, $type) |
479 protected function strip_tag($tag, $document, $xpath, $type) |
460 { |
480 { |
461 $xpath = new DOMXPath($document); |
|
462 $elements = $xpath->query('body//' . $tag); |
481 $elements = $xpath->query('body//' . $tag); |
463 if ($this->encode_instead_of_strip) |
482 if ($this->encode_instead_of_strip) |
464 { |
483 { |
465 foreach ($elements as $element) |
484 foreach ($elements as $element) |
466 { |
485 { |
539 $element->parentNode->replaceChild($fragment, $element); |
558 $element->parentNode->replaceChild($fragment, $element); |
540 } |
559 } |
541 } |
560 } |
542 } |
561 } |
543 |
562 |
544 protected function strip_attr($attrib, $document) |
563 protected function strip_attr($attrib, $xpath) |
545 { |
564 { |
546 $xpath = new DOMXPath($document); |
|
547 $elements = $xpath->query('//*[@' . $attrib . ']'); |
565 $elements = $xpath->query('//*[@' . $attrib . ']'); |
548 |
566 |
549 foreach ($elements as $element) |
567 foreach ($elements as $element) |
550 { |
568 { |
551 $element->removeAttribute($attrib); |
569 $element->removeAttribute($attrib); |
552 } |
570 } |
553 } |
571 } |
572 |
|
573 protected function add_attr($tag, $valuePairs, $document) |
|
574 { |
|
575 $elements = $document->getElementsByTagName($tag); |
|
576 foreach ($elements as $element) |
|
577 { |
|
578 foreach ($valuePairs as $attrib => $value) |
|
579 { |
|
580 $element->setAttribute($attrib, $value); |
|
581 } |
|
582 } |
|
583 } |
|
554 } |
584 } |