|
1 <?php |
|
2 /** |
|
3 * HTML/XHTML filter that only allows some elements and attributes |
|
4 * |
|
5 * Added wp_ prefix to avoid conflicts with existing kses users |
|
6 * |
|
7 * @version 0.2.2 |
|
8 * @copyright (C) 2002, 2003, 2005 |
|
9 * @author Ulf Harnhammar <metaur@users.sourceforge.net> |
|
10 * |
|
11 * @package External |
|
12 * @subpackage KSES |
|
13 * |
|
14 * @internal |
|
15 * *** CONTACT INFORMATION *** |
|
16 * E-mail: metaur at users dot sourceforge dot net |
|
17 * Web page: http://sourceforge.net/projects/kses |
|
18 * Paper mail: Ulf Harnhammar |
|
19 * Ymergatan 17 C |
|
20 * 753 25 Uppsala |
|
21 * SWEDEN |
|
22 * |
|
23 * [kses strips evil scripts!] |
|
24 */ |
|
25 |
|
26 /** |
|
27 * You can override this in your my-hacks.php file You can also override this |
|
28 * in a plugin file. The my-hacks.php is deprecated in its usage. |
|
29 * |
|
30 * @since 1.2.0 |
|
31 */ |
|
32 if (!defined('CUSTOM_TAGS')) |
|
33 define('CUSTOM_TAGS', false); |
|
34 |
|
35 if (!CUSTOM_TAGS) { |
|
36 /** |
|
37 * Kses global for default allowable HTML tags. |
|
38 * |
|
39 * Can be override by using CUSTOM_TAGS constant. |
|
40 * |
|
41 * @global array $allowedposttags |
|
42 * @since 2.0.0 |
|
43 */ |
|
44 $allowedposttags = array( |
|
45 'address' => array(), |
|
46 'a' => array( |
|
47 'class' => array (), |
|
48 'href' => array (), |
|
49 'id' => array (), |
|
50 'title' => array (), |
|
51 'rel' => array (), |
|
52 'rev' => array (), |
|
53 'name' => array (), |
|
54 'target' => array()), |
|
55 'abbr' => array( |
|
56 'class' => array (), |
|
57 'title' => array ()), |
|
58 'acronym' => array( |
|
59 'title' => array ()), |
|
60 'b' => array(), |
|
61 'big' => array(), |
|
62 'blockquote' => array( |
|
63 'id' => array (), |
|
64 'cite' => array (), |
|
65 'class' => array(), |
|
66 'lang' => array(), |
|
67 'xml:lang' => array()), |
|
68 'br' => array ( |
|
69 'class' => array ()), |
|
70 'button' => array( |
|
71 'disabled' => array (), |
|
72 'name' => array (), |
|
73 'type' => array (), |
|
74 'value' => array ()), |
|
75 'caption' => array( |
|
76 'align' => array (), |
|
77 'class' => array ()), |
|
78 'cite' => array ( |
|
79 'class' => array(), |
|
80 'dir' => array(), |
|
81 'lang' => array(), |
|
82 'title' => array ()), |
|
83 'code' => array ( |
|
84 'style' => array()), |
|
85 'col' => array( |
|
86 'align' => array (), |
|
87 'char' => array (), |
|
88 'charoff' => array (), |
|
89 'span' => array (), |
|
90 'dir' => array(), |
|
91 'style' => array (), |
|
92 'valign' => array (), |
|
93 'width' => array ()), |
|
94 'del' => array( |
|
95 'datetime' => array ()), |
|
96 'dd' => array(), |
|
97 'div' => array( |
|
98 'align' => array (), |
|
99 'class' => array (), |
|
100 'dir' => array (), |
|
101 'lang' => array(), |
|
102 'style' => array (), |
|
103 'xml:lang' => array()), |
|
104 'dl' => array(), |
|
105 'dt' => array(), |
|
106 'em' => array(), |
|
107 'fieldset' => array(), |
|
108 'font' => array( |
|
109 'color' => array (), |
|
110 'face' => array (), |
|
111 'size' => array ()), |
|
112 'form' => array( |
|
113 'action' => array (), |
|
114 'accept' => array (), |
|
115 'accept-charset' => array (), |
|
116 'enctype' => array (), |
|
117 'method' => array (), |
|
118 'name' => array (), |
|
119 'target' => array ()), |
|
120 'h1' => array( |
|
121 'align' => array (), |
|
122 'class' => array (), |
|
123 'id' => array (), |
|
124 'style' => array ()), |
|
125 'h2' => array ( |
|
126 'align' => array (), |
|
127 'class' => array (), |
|
128 'id' => array (), |
|
129 'style' => array ()), |
|
130 'h3' => array ( |
|
131 'align' => array (), |
|
132 'class' => array (), |
|
133 'id' => array (), |
|
134 'style' => array ()), |
|
135 'h4' => array ( |
|
136 'align' => array (), |
|
137 'class' => array (), |
|
138 'id' => array (), |
|
139 'style' => array ()), |
|
140 'h5' => array ( |
|
141 'align' => array (), |
|
142 'class' => array (), |
|
143 'id' => array (), |
|
144 'style' => array ()), |
|
145 'h6' => array ( |
|
146 'align' => array (), |
|
147 'class' => array (), |
|
148 'id' => array (), |
|
149 'style' => array ()), |
|
150 'hr' => array ( |
|
151 'align' => array (), |
|
152 'class' => array (), |
|
153 'noshade' => array (), |
|
154 'size' => array (), |
|
155 'width' => array ()), |
|
156 'i' => array(), |
|
157 'img' => array( |
|
158 'alt' => array (), |
|
159 'align' => array (), |
|
160 'border' => array (), |
|
161 'class' => array (), |
|
162 'height' => array (), |
|
163 'hspace' => array (), |
|
164 'longdesc' => array (), |
|
165 'vspace' => array (), |
|
166 'src' => array (), |
|
167 'style' => array (), |
|
168 'width' => array ()), |
|
169 'ins' => array( |
|
170 'datetime' => array (), |
|
171 'cite' => array ()), |
|
172 'kbd' => array(), |
|
173 'label' => array( |
|
174 'for' => array ()), |
|
175 'legend' => array( |
|
176 'align' => array ()), |
|
177 'li' => array ( |
|
178 'align' => array (), |
|
179 'class' => array ()), |
|
180 'p' => array( |
|
181 'class' => array (), |
|
182 'align' => array (), |
|
183 'dir' => array(), |
|
184 'lang' => array(), |
|
185 'style' => array (), |
|
186 'xml:lang' => array()), |
|
187 'pre' => array( |
|
188 'style' => array(), |
|
189 'width' => array ()), |
|
190 'q' => array( |
|
191 'cite' => array ()), |
|
192 's' => array(), |
|
193 'span' => array ( |
|
194 'class' => array (), |
|
195 'dir' => array (), |
|
196 'align' => array (), |
|
197 'lang' => array (), |
|
198 'style' => array (), |
|
199 'title' => array (), |
|
200 'xml:lang' => array()), |
|
201 'strike' => array(), |
|
202 'strong' => array(), |
|
203 'sub' => array(), |
|
204 'sup' => array(), |
|
205 'table' => array( |
|
206 'align' => array (), |
|
207 'bgcolor' => array (), |
|
208 'border' => array (), |
|
209 'cellpadding' => array (), |
|
210 'cellspacing' => array (), |
|
211 'class' => array (), |
|
212 'dir' => array(), |
|
213 'id' => array(), |
|
214 'rules' => array (), |
|
215 'style' => array (), |
|
216 'summary' => array (), |
|
217 'width' => array ()), |
|
218 'tbody' => array( |
|
219 'align' => array (), |
|
220 'char' => array (), |
|
221 'charoff' => array (), |
|
222 'valign' => array ()), |
|
223 'td' => array( |
|
224 'abbr' => array (), |
|
225 'align' => array (), |
|
226 'axis' => array (), |
|
227 'bgcolor' => array (), |
|
228 'char' => array (), |
|
229 'charoff' => array (), |
|
230 'class' => array (), |
|
231 'colspan' => array (), |
|
232 'dir' => array(), |
|
233 'headers' => array (), |
|
234 'height' => array (), |
|
235 'nowrap' => array (), |
|
236 'rowspan' => array (), |
|
237 'scope' => array (), |
|
238 'style' => array (), |
|
239 'valign' => array (), |
|
240 'width' => array ()), |
|
241 'textarea' => array( |
|
242 'cols' => array (), |
|
243 'rows' => array (), |
|
244 'disabled' => array (), |
|
245 'name' => array (), |
|
246 'readonly' => array ()), |
|
247 'tfoot' => array( |
|
248 'align' => array (), |
|
249 'char' => array (), |
|
250 'class' => array (), |
|
251 'charoff' => array (), |
|
252 'valign' => array ()), |
|
253 'th' => array( |
|
254 'abbr' => array (), |
|
255 'align' => array (), |
|
256 'axis' => array (), |
|
257 'bgcolor' => array (), |
|
258 'char' => array (), |
|
259 'charoff' => array (), |
|
260 'class' => array (), |
|
261 'colspan' => array (), |
|
262 'headers' => array (), |
|
263 'height' => array (), |
|
264 'nowrap' => array (), |
|
265 'rowspan' => array (), |
|
266 'scope' => array (), |
|
267 'valign' => array (), |
|
268 'width' => array ()), |
|
269 'thead' => array( |
|
270 'align' => array (), |
|
271 'char' => array (), |
|
272 'charoff' => array (), |
|
273 'class' => array (), |
|
274 'valign' => array ()), |
|
275 'title' => array(), |
|
276 'tr' => array( |
|
277 'align' => array (), |
|
278 'bgcolor' => array (), |
|
279 'char' => array (), |
|
280 'charoff' => array (), |
|
281 'class' => array (), |
|
282 'style' => array (), |
|
283 'valign' => array ()), |
|
284 'tt' => array(), |
|
285 'u' => array(), |
|
286 'ul' => array ( |
|
287 'class' => array (), |
|
288 'style' => array (), |
|
289 'type' => array ()), |
|
290 'ol' => array ( |
|
291 'class' => array (), |
|
292 'start' => array (), |
|
293 'style' => array (), |
|
294 'type' => array ()), |
|
295 'var' => array ()); |
|
296 |
|
297 /** |
|
298 * Kses allowed HTML elements. |
|
299 * |
|
300 * @global array $allowedtags |
|
301 * @since 1.0.0 |
|
302 */ |
|
303 $allowedtags = array( |
|
304 'a' => array( |
|
305 'href' => array (), |
|
306 'title' => array ()), |
|
307 'abbr' => array( |
|
308 'title' => array ()), |
|
309 'acronym' => array( |
|
310 'title' => array ()), |
|
311 'b' => array(), |
|
312 'blockquote' => array( |
|
313 'cite' => array ()), |
|
314 // 'br' => array(), |
|
315 'cite' => array (), |
|
316 'code' => array(), |
|
317 'del' => array( |
|
318 'datetime' => array ()), |
|
319 // 'dd' => array(), |
|
320 // 'dl' => array(), |
|
321 // 'dt' => array(), |
|
322 'em' => array (), 'i' => array (), |
|
323 // 'ins' => array('datetime' => array(), 'cite' => array()), |
|
324 // 'li' => array(), |
|
325 // 'ol' => array(), |
|
326 // 'p' => array(), |
|
327 'q' => array( |
|
328 'cite' => array ()), |
|
329 'strike' => array(), |
|
330 'strong' => array(), |
|
331 // 'sub' => array(), |
|
332 // 'sup' => array(), |
|
333 // 'u' => array(), |
|
334 // 'ul' => array(), |
|
335 ); |
|
336 } |
|
337 |
|
338 /** |
|
339 * Filters content and keeps only allowable HTML elements. |
|
340 * |
|
341 * This function makes sure that only the allowed HTML element names, attribute |
|
342 * names and attribute values plus only sane HTML entities will occur in |
|
343 * $string. You have to remove any slashes from PHP's magic quotes before you |
|
344 * call this function. |
|
345 * |
|
346 * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news', |
|
347 * 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This covers all common |
|
348 * link protocols, except for 'javascript' which should not be allowed for |
|
349 * untrusted users. |
|
350 * |
|
351 * @since 1.0.0 |
|
352 * |
|
353 * @param string $string Content to filter through kses |
|
354 * @param array $allowed_html List of allowed HTML elements |
|
355 * @param array $allowed_protocols Optional. Allowed protocol in links. |
|
356 * @return string Filtered content with only allowed HTML elements |
|
357 */ |
|
358 function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) { |
|
359 $string = wp_kses_no_null($string); |
|
360 $string = wp_kses_js_entities($string); |
|
361 $string = wp_kses_normalize_entities($string); |
|
362 $allowed_html_fixed = wp_kses_array_lc($allowed_html); |
|
363 $string = wp_kses_hook($string, $allowed_html_fixed, $allowed_protocols); // WP changed the order of these funcs and added args to wp_kses_hook |
|
364 return wp_kses_split($string, $allowed_html_fixed, $allowed_protocols); |
|
365 } |
|
366 |
|
367 /** |
|
368 * You add any kses hooks here. |
|
369 * |
|
370 * There is currently only one kses WordPress hook and it is called here. All |
|
371 * parameters are passed to the hooks and expected to recieve a string. |
|
372 * |
|
373 * @since 1.0.0 |
|
374 * |
|
375 * @param string $string Content to filter through kses |
|
376 * @param array $allowed_html List of allowed HTML elements |
|
377 * @param array $allowed_protocols Allowed protocol in links |
|
378 * @return string Filtered content through 'pre_kses' hook |
|
379 */ |
|
380 function wp_kses_hook($string, $allowed_html, $allowed_protocols) { |
|
381 $string = apply_filters('pre_kses', $string, $allowed_html, $allowed_protocols); |
|
382 return $string; |
|
383 } |
|
384 |
|
385 /** |
|
386 * This function returns kses' version number. |
|
387 * |
|
388 * @since 1.0.0 |
|
389 * |
|
390 * @return string KSES Version Number |
|
391 */ |
|
392 function wp_kses_version() { |
|
393 return '0.2.2'; |
|
394 } |
|
395 |
|
396 /** |
|
397 * Searches for HTML tags, no matter how malformed. |
|
398 * |
|
399 * It also matches stray ">" characters. |
|
400 * |
|
401 * @since 1.0.0 |
|
402 * |
|
403 * @param string $string Content to filter |
|
404 * @param array $allowed_html Allowed HTML elements |
|
405 * @param array $allowed_protocols Allowed protocols to keep |
|
406 * @return string Content with fixed HTML tags |
|
407 */ |
|
408 function wp_kses_split($string, $allowed_html, $allowed_protocols) { |
|
409 global $pass_allowed_html, $pass_allowed_protocols; |
|
410 $pass_allowed_html = $allowed_html; |
|
411 $pass_allowed_protocols = $allowed_protocols; |
|
412 return preg_replace_callback('%((<!--.*?(-->|$))|(<[^>]*(>|$)|>))%', |
|
413 create_function('$match', 'global $pass_allowed_html, $pass_allowed_protocols; return wp_kses_split2($match[1], $pass_allowed_html, $pass_allowed_protocols);'), $string); |
|
414 } |
|
415 |
|
416 /** |
|
417 * Callback for wp_kses_split for fixing malformed HTML tags. |
|
418 * |
|
419 * This function does a lot of work. It rejects some very malformed things like |
|
420 * <:::>. It returns an empty string, if the element isn't allowed (look ma, no |
|
421 * strip_tags()!). Otherwise it splits the tag into an element and an attribute |
|
422 * list. |
|
423 * |
|
424 * After the tag is split into an element and an attribute list, it is run |
|
425 * through another filter which will remove illegal attributes and once that is |
|
426 * completed, will be returned. |
|
427 * |
|
428 * @access private |
|
429 * @since 1.0.0 |
|
430 * @uses wp_kses_attr() |
|
431 * |
|
432 * @param string $string Content to filter |
|
433 * @param array $allowed_html Allowed HTML elements |
|
434 * @param array $allowed_protocols Allowed protocols to keep |
|
435 * @return string Fixed HTML element |
|
436 */ |
|
437 function wp_kses_split2($string, $allowed_html, $allowed_protocols) { |
|
438 $string = wp_kses_stripslashes($string); |
|
439 |
|
440 if (substr($string, 0, 1) != '<') |
|
441 return '>'; |
|
442 # It matched a ">" character |
|
443 |
|
444 if (preg_match('%^<!--(.*?)(-->)?$%', $string, $matches)) { |
|
445 $string = str_replace(array('<!--', '-->'), '', $matches[1]); |
|
446 while ( $string != $newstring = wp_kses($string, $allowed_html, $allowed_protocols) ) |
|
447 $string = $newstring; |
|
448 if ( $string == '' ) |
|
449 return ''; |
|
450 // prevent multiple dashes in comments |
|
451 $string = preg_replace('/--+/', '-', $string); |
|
452 // prevent three dashes closing a comment |
|
453 $string = preg_replace('/-$/', '', $string); |
|
454 return "<!--{$string}-->"; |
|
455 } |
|
456 # Allow HTML comments |
|
457 |
|
458 if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) |
|
459 return ''; |
|
460 # It's seriously malformed |
|
461 |
|
462 $slash = trim($matches[1]); |
|
463 $elem = $matches[2]; |
|
464 $attrlist = $matches[3]; |
|
465 |
|
466 if (!@isset($allowed_html[strtolower($elem)])) |
|
467 return ''; |
|
468 # They are using a not allowed HTML element |
|
469 |
|
470 if ($slash != '') |
|
471 return "<$slash$elem>"; |
|
472 # No attributes are allowed for closing elements |
|
473 |
|
474 return wp_kses_attr("$slash$elem", $attrlist, $allowed_html, $allowed_protocols); |
|
475 } |
|
476 |
|
477 /** |
|
478 * Removes all attributes, if none are allowed for this element. |
|
479 * |
|
480 * If some are allowed it calls wp_kses_hair() to split them further, and then |
|
481 * it builds up new HTML code from the data that kses_hair() returns. It also |
|
482 * removes "<" and ">" characters, if there are any left. One more thing it does |
|
483 * is to check if the tag has a closing XHTML slash, and if it does, it puts one |
|
484 * in the returned code as well. |
|
485 * |
|
486 * @since 1.0.0 |
|
487 * |
|
488 * @param string $element HTML element/tag |
|
489 * @param string $attr HTML attributes from HTML element to closing HTML element tag |
|
490 * @param array $allowed_html Allowed HTML elements |
|
491 * @param array $allowed_protocols Allowed protocols to keep |
|
492 * @return string Sanitized HTML element |
|
493 */ |
|
494 function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) { |
|
495 # Is there a closing XHTML slash at the end of the attributes? |
|
496 |
|
497 $xhtml_slash = ''; |
|
498 if (preg_match('%\s/\s*$%', $attr)) |
|
499 $xhtml_slash = ' /'; |
|
500 |
|
501 # Are any attributes allowed at all for this element? |
|
502 |
|
503 if (@ count($allowed_html[strtolower($element)]) == 0) |
|
504 return "<$element$xhtml_slash>"; |
|
505 |
|
506 # Split it |
|
507 |
|
508 $attrarr = wp_kses_hair($attr, $allowed_protocols); |
|
509 |
|
510 # Go through $attrarr, and save the allowed attributes for this element |
|
511 # in $attr2 |
|
512 |
|
513 $attr2 = ''; |
|
514 |
|
515 foreach ($attrarr as $arreach) { |
|
516 if (!@ isset ($allowed_html[strtolower($element)][strtolower($arreach['name'])])) |
|
517 continue; # the attribute is not allowed |
|
518 |
|
519 $current = $allowed_html[strtolower($element)][strtolower($arreach['name'])]; |
|
520 if ($current == '') |
|
521 continue; # the attribute is not allowed |
|
522 |
|
523 if (!is_array($current)) |
|
524 $attr2 .= ' '.$arreach['whole']; |
|
525 # there are no checks |
|
526 |
|
527 else { |
|
528 # there are some checks |
|
529 $ok = true; |
|
530 foreach ($current as $currkey => $currval) |
|
531 if (!wp_kses_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval)) { |
|
532 $ok = false; |
|
533 break; |
|
534 } |
|
535 |
|
536 if ( $arreach['name'] == 'style' ) { |
|
537 $orig_value = $arreach['value']; |
|
538 |
|
539 $value = safecss_filter_attr($orig_value); |
|
540 |
|
541 if ( empty($value) ) |
|
542 continue; |
|
543 |
|
544 $arreach['value'] = $value; |
|
545 |
|
546 $arreach['whole'] = str_replace($orig_value, $value, $arreach['whole']); |
|
547 } |
|
548 |
|
549 if ($ok) |
|
550 $attr2 .= ' '.$arreach['whole']; # it passed them |
|
551 } # if !is_array($current) |
|
552 } # foreach |
|
553 |
|
554 # Remove any "<" or ">" characters |
|
555 |
|
556 $attr2 = preg_replace('/[<>]/', '', $attr2); |
|
557 |
|
558 return "<$element$attr2$xhtml_slash>"; |
|
559 } |
|
560 |
|
561 /** |
|
562 * Builds an attribute list from string containing attributes. |
|
563 * |
|
564 * This function does a lot of work. It parses an attribute list into an array |
|
565 * with attribute data, and tries to do the right thing even if it gets weird |
|
566 * input. It will add quotes around attribute values that don't have any quotes |
|
567 * or apostrophes around them, to make it easier to produce HTML code that will |
|
568 * conform to W3C's HTML specification. It will also remove bad URL protocols |
|
569 * from attribute values. It also reduces duplicate attributes by using the |
|
570 * attribute defined first (foo='bar' foo='baz' will result in foo='bar'). |
|
571 * |
|
572 * @since 1.0.0 |
|
573 * |
|
574 * @param string $attr Attribute list from HTML element to closing HTML element tag |
|
575 * @param array $allowed_protocols Allowed protocols to keep |
|
576 * @return array List of attributes after parsing |
|
577 */ |
|
578 function wp_kses_hair($attr, $allowed_protocols) { |
|
579 $attrarr = array (); |
|
580 $mode = 0; |
|
581 $attrname = ''; |
|
582 $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action'); |
|
583 |
|
584 # Loop through the whole attribute list |
|
585 |
|
586 while (strlen($attr) != 0) { |
|
587 $working = 0; # Was the last operation successful? |
|
588 |
|
589 switch ($mode) { |
|
590 case 0 : # attribute name, href for instance |
|
591 |
|
592 if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) { |
|
593 $attrname = $match[1]; |
|
594 $working = $mode = 1; |
|
595 $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr); |
|
596 } |
|
597 |
|
598 break; |
|
599 |
|
600 case 1 : # equals sign or valueless ("selected") |
|
601 |
|
602 if (preg_match('/^\s*=\s*/', $attr)) # equals sign |
|
603 { |
|
604 $working = 1; |
|
605 $mode = 2; |
|
606 $attr = preg_replace('/^\s*=\s*/', '', $attr); |
|
607 break; |
|
608 } |
|
609 |
|
610 if (preg_match('/^\s+/', $attr)) # valueless |
|
611 { |
|
612 $working = 1; |
|
613 $mode = 0; |
|
614 if(FALSE === array_key_exists($attrname, $attrarr)) { |
|
615 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); |
|
616 } |
|
617 $attr = preg_replace('/^\s+/', '', $attr); |
|
618 } |
|
619 |
|
620 break; |
|
621 |
|
622 case 2 : # attribute value, a URL after href= for instance |
|
623 |
|
624 if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) |
|
625 # "value" |
|
626 { |
|
627 $thisval = $match[1]; |
|
628 if ( in_array($attrname, $uris) ) |
|
629 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); |
|
630 |
|
631 if(FALSE === array_key_exists($attrname, $attrarr)) { |
|
632 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n'); |
|
633 } |
|
634 $working = 1; |
|
635 $mode = 0; |
|
636 $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr); |
|
637 break; |
|
638 } |
|
639 |
|
640 if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) |
|
641 # 'value' |
|
642 { |
|
643 $thisval = $match[1]; |
|
644 if ( in_array($attrname, $uris) ) |
|
645 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); |
|
646 |
|
647 if(FALSE === array_key_exists($attrname, $attrarr)) { |
|
648 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n'); |
|
649 } |
|
650 $working = 1; |
|
651 $mode = 0; |
|
652 $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr); |
|
653 break; |
|
654 } |
|
655 |
|
656 if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) |
|
657 # value |
|
658 { |
|
659 $thisval = $match[1]; |
|
660 if ( in_array($attrname, $uris) ) |
|
661 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); |
|
662 |
|
663 if(FALSE === array_key_exists($attrname, $attrarr)) { |
|
664 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n'); |
|
665 } |
|
666 # We add quotes to conform to W3C's HTML spec. |
|
667 $working = 1; |
|
668 $mode = 0; |
|
669 $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr); |
|
670 } |
|
671 |
|
672 break; |
|
673 } # switch |
|
674 |
|
675 if ($working == 0) # not well formed, remove and try again |
|
676 { |
|
677 $attr = wp_kses_html_error($attr); |
|
678 $mode = 0; |
|
679 } |
|
680 } # while |
|
681 |
|
682 if ($mode == 1 && FALSE === array_key_exists($attrname, $attrarr)) |
|
683 # special case, for when the attribute list ends with a valueless |
|
684 # attribute like "selected" |
|
685 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); |
|
686 |
|
687 return $attrarr; |
|
688 } |
|
689 |
|
690 /** |
|
691 * Performs different checks for attribute values. |
|
692 * |
|
693 * The currently implemented checks are "maxlen", "minlen", "maxval", "minval" |
|
694 * and "valueless" with even more checks to come soon. |
|
695 * |
|
696 * @since 1.0.0 |
|
697 * |
|
698 * @param string $value Attribute value |
|
699 * @param string $vless Whether the value is valueless or not. Use 'y' or 'n' |
|
700 * @param string $checkname What $checkvalue is checking for. |
|
701 * @param mixed $checkvalue What constraint the value should pass |
|
702 * @return bool Whether check passes (true) or not (false) |
|
703 */ |
|
704 function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) { |
|
705 $ok = true; |
|
706 |
|
707 switch (strtolower($checkname)) { |
|
708 case 'maxlen' : |
|
709 # The maxlen check makes sure that the attribute value has a length not |
|
710 # greater than the given value. This can be used to avoid Buffer Overflows |
|
711 # in WWW clients and various Internet servers. |
|
712 |
|
713 if (strlen($value) > $checkvalue) |
|
714 $ok = false; |
|
715 break; |
|
716 |
|
717 case 'minlen' : |
|
718 # The minlen check makes sure that the attribute value has a length not |
|
719 # smaller than the given value. |
|
720 |
|
721 if (strlen($value) < $checkvalue) |
|
722 $ok = false; |
|
723 break; |
|
724 |
|
725 case 'maxval' : |
|
726 # The maxval check does two things: it checks that the attribute value is |
|
727 # an integer from 0 and up, without an excessive amount of zeroes or |
|
728 # whitespace (to avoid Buffer Overflows). It also checks that the attribute |
|
729 # value is not greater than the given value. |
|
730 # This check can be used to avoid Denial of Service attacks. |
|
731 |
|
732 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) |
|
733 $ok = false; |
|
734 if ($value > $checkvalue) |
|
735 $ok = false; |
|
736 break; |
|
737 |
|
738 case 'minval' : |
|
739 # The minval check checks that the attribute value is a positive integer, |
|
740 # and that it is not smaller than the given value. |
|
741 |
|
742 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) |
|
743 $ok = false; |
|
744 if ($value < $checkvalue) |
|
745 $ok = false; |
|
746 break; |
|
747 |
|
748 case 'valueless' : |
|
749 # The valueless check checks if the attribute has a value |
|
750 # (like <a href="blah">) or not (<option selected>). If the given value |
|
751 # is a "y" or a "Y", the attribute must not have a value. |
|
752 # If the given value is an "n" or an "N", the attribute must have one. |
|
753 |
|
754 if (strtolower($checkvalue) != $vless) |
|
755 $ok = false; |
|
756 break; |
|
757 } # switch |
|
758 |
|
759 return $ok; |
|
760 } |
|
761 |
|
762 /** |
|
763 * Sanitize string from bad protocols. |
|
764 * |
|
765 * This function removes all non-allowed protocols from the beginning of |
|
766 * $string. It ignores whitespace and the case of the letters, and it does |
|
767 * understand HTML entities. It does its work in a while loop, so it won't be |
|
768 * fooled by a string like "javascript:javascript:alert(57)". |
|
769 * |
|
770 * @since 1.0.0 |
|
771 * |
|
772 * @param string $string Content to filter bad protocols from |
|
773 * @param array $allowed_protocols Allowed protocols to keep |
|
774 * @return string Filtered content |
|
775 */ |
|
776 function wp_kses_bad_protocol($string, $allowed_protocols) { |
|
777 $string = wp_kses_no_null($string); |
|
778 $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature" |
|
779 $string2 = $string.'a'; |
|
780 |
|
781 while ($string != $string2) { |
|
782 $string2 = $string; |
|
783 $string = wp_kses_bad_protocol_once($string, $allowed_protocols); |
|
784 } # while |
|
785 |
|
786 return $string; |
|
787 } |
|
788 |
|
789 /** |
|
790 * Removes any NULL characters in $string. |
|
791 * |
|
792 * @since 1.0.0 |
|
793 * |
|
794 * @param string $string |
|
795 * @return string |
|
796 */ |
|
797 function wp_kses_no_null($string) { |
|
798 $string = preg_replace('/\0+/', '', $string); |
|
799 $string = preg_replace('/(\\\\0)+/', '', $string); |
|
800 |
|
801 return $string; |
|
802 } |
|
803 |
|
804 /** |
|
805 * Strips slashes from in front of quotes. |
|
806 * |
|
807 * This function changes the character sequence \" to just ". It leaves all |
|
808 * other slashes alone. It's really weird, but the quoting from |
|
809 * preg_replace(//e) seems to require this. |
|
810 * |
|
811 * @since 1.0.0 |
|
812 * |
|
813 * @param string $string String to strip slashes |
|
814 * @return string Fixed strings with quoted slashes |
|
815 */ |
|
816 function wp_kses_stripslashes($string) { |
|
817 return preg_replace('%\\\\"%', '"', $string); |
|
818 } |
|
819 |
|
820 /** |
|
821 * Goes through an array and changes the keys to all lower case. |
|
822 * |
|
823 * @since 1.0.0 |
|
824 * |
|
825 * @param array $inarray Unfiltered array |
|
826 * @return array Fixed array with all lowercase keys |
|
827 */ |
|
828 function wp_kses_array_lc($inarray) { |
|
829 $outarray = array (); |
|
830 |
|
831 foreach ( (array) $inarray as $inkey => $inval) { |
|
832 $outkey = strtolower($inkey); |
|
833 $outarray[$outkey] = array (); |
|
834 |
|
835 foreach ( (array) $inval as $inkey2 => $inval2) { |
|
836 $outkey2 = strtolower($inkey2); |
|
837 $outarray[$outkey][$outkey2] = $inval2; |
|
838 } # foreach $inval |
|
839 } # foreach $inarray |
|
840 |
|
841 return $outarray; |
|
842 } |
|
843 |
|
844 /** |
|
845 * Removes the HTML JavaScript entities found in early versions of Netscape 4. |
|
846 * |
|
847 * @since 1.0.0 |
|
848 * |
|
849 * @param string $string |
|
850 * @return string |
|
851 */ |
|
852 function wp_kses_js_entities($string) { |
|
853 return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); |
|
854 } |
|
855 |
|
856 /** |
|
857 * Handles parsing errors in wp_kses_hair(). |
|
858 * |
|
859 * The general plan is to remove everything to and including some whitespace, |
|
860 * but it deals with quotes and apostrophes as well. |
|
861 * |
|
862 * @since 1.0.0 |
|
863 * |
|
864 * @param string $string |
|
865 * @return string |
|
866 */ |
|
867 function wp_kses_html_error($string) { |
|
868 return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string); |
|
869 } |
|
870 |
|
871 /** |
|
872 * Sanitizes content from bad protocols and other characters. |
|
873 * |
|
874 * This function searches for URL protocols at the beginning of $string, while |
|
875 * handling whitespace and HTML entities. |
|
876 * |
|
877 * @since 1.0.0 |
|
878 * |
|
879 * @param string $string Content to check for bad protocols |
|
880 * @param string $allowed_protocols Allowed protocols |
|
881 * @return string Sanitized content |
|
882 */ |
|
883 function wp_kses_bad_protocol_once($string, $allowed_protocols) { |
|
884 global $_kses_allowed_protocols; |
|
885 $_kses_allowed_protocols = $allowed_protocols; |
|
886 |
|
887 $string2 = preg_split('/:|:|:/i', $string, 2); |
|
888 if ( isset($string2[1]) && !preg_match('%/\?%', $string2[0]) ) |
|
889 $string = wp_kses_bad_protocol_once2($string2[0]) . trim($string2[1]); |
|
890 else |
|
891 $string = preg_replace_callback('/^((&[^;]*;|[\sA-Za-z0-9])*)'.'(:|:|&#[Xx]3[Aa];)\s*/', 'wp_kses_bad_protocol_once2', $string); |
|
892 |
|
893 return $string; |
|
894 } |
|
895 |
|
896 /** |
|
897 * Callback for wp_kses_bad_protocol_once() regular expression. |
|
898 * |
|
899 * This function processes URL protocols, checks to see if they're in the |
|
900 * white-list or not, and returns different data depending on the answer. |
|
901 * |
|
902 * @access private |
|
903 * @since 1.0.0 |
|
904 * |
|
905 * @param mixed $matches string or preg_replace_callback() matches array to check for bad protocols |
|
906 * @return string Sanitized content |
|
907 */ |
|
908 function wp_kses_bad_protocol_once2($matches) { |
|
909 global $_kses_allowed_protocols; |
|
910 |
|
911 if ( is_array($matches) ) { |
|
912 if ( ! isset($matches[1]) || empty($matches[1]) ) |
|
913 return ''; |
|
914 |
|
915 $string = $matches[1]; |
|
916 } else { |
|
917 $string = $matches; |
|
918 } |
|
919 |
|
920 $string2 = wp_kses_decode_entities($string); |
|
921 $string2 = preg_replace('/\s/', '', $string2); |
|
922 $string2 = wp_kses_no_null($string2); |
|
923 $string2 = preg_replace('/\xad+/', '', $string2); |
|
924 # deals with Opera "feature" |
|
925 $string2 = strtolower($string2); |
|
926 |
|
927 $allowed = false; |
|
928 foreach ( (array) $_kses_allowed_protocols as $one_protocol) |
|
929 if (strtolower($one_protocol) == $string2) { |
|
930 $allowed = true; |
|
931 break; |
|
932 } |
|
933 |
|
934 if ($allowed) |
|
935 return "$string2:"; |
|
936 else |
|
937 return ''; |
|
938 } |
|
939 |
|
940 /** |
|
941 * Converts and fixes HTML entities. |
|
942 * |
|
943 * This function normalizes HTML entities. It will convert "AT&T" to the correct |
|
944 * "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. |
|
945 * |
|
946 * @since 1.0.0 |
|
947 * |
|
948 * @param string $string Content to normalize entities |
|
949 * @return string Content with normalized entities |
|
950 */ |
|
951 function wp_kses_normalize_entities($string) { |
|
952 # Disarm all entities by converting & to & |
|
953 |
|
954 $string = str_replace('&', '&', $string); |
|
955 |
|
956 # Change back the allowed entities in our entity whitelist |
|
957 |
|
958 $string = preg_replace('/&([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string); |
|
959 $string = preg_replace_callback('/&#0*([0-9]{1,5});/', 'wp_kses_normalize_entities2', $string); |
|
960 $string = preg_replace_callback('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 'wp_kses_normalize_entities3', $string); |
|
961 |
|
962 return $string; |
|
963 } |
|
964 |
|
965 /** |
|
966 * Callback for wp_kses_normalize_entities() regular expression. |
|
967 * |
|
968 * This function helps wp_kses_normalize_entities() to only accept 16 bit values |
|
969 * and nothing more for &#number; entities. |
|
970 * |
|
971 * @access private |
|
972 * @since 1.0.0 |
|
973 * |
|
974 * @param array $matches preg_replace_callback() matches array |
|
975 * @return string Correctly encoded entity |
|
976 */ |
|
977 function wp_kses_normalize_entities2($matches) { |
|
978 if ( ! isset($matches[1]) || empty($matches[1]) ) |
|
979 return ''; |
|
980 |
|
981 $i = $matches[1]; |
|
982 return ( ( ! valid_unicode($i) ) || ($i > 65535) ? "&#$i;" : "&#$i;" ); |
|
983 } |
|
984 |
|
985 /** |
|
986 * Callback for wp_kses_normalize_entities() for regular expression. |
|
987 * |
|
988 * This function helps wp_kses_normalize_entities() to only accept valid Unicode |
|
989 * numeric entities in hex form. |
|
990 * |
|
991 * @access private |
|
992 * |
|
993 * @param array $matches preg_replace_callback() matches array |
|
994 * @return string Correctly encoded entity |
|
995 */ |
|
996 function wp_kses_normalize_entities3($matches) { |
|
997 if ( ! isset($matches[2]) || empty($matches[2]) ) |
|
998 return ''; |
|
999 |
|
1000 $hexchars = $matches[2]; |
|
1001 return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&#x$hexchars;" : "&#x$hexchars;" ); |
|
1002 } |
|
1003 |
|
1004 /** |
|
1005 * Helper function to determine if a Unicode value is valid. |
|
1006 * |
|
1007 * @param int $i Unicode value |
|
1008 * @return bool true if the value was a valid Unicode number |
|
1009 */ |
|
1010 function valid_unicode($i) { |
|
1011 return ( $i == 0x9 || $i == 0xa || $i == 0xd || |
|
1012 ($i >= 0x20 && $i <= 0xd7ff) || |
|
1013 ($i >= 0xe000 && $i <= 0xfffd) || |
|
1014 ($i >= 0x10000 && $i <= 0x10ffff) ); |
|
1015 } |
|
1016 |
|
1017 /** |
|
1018 * Convert all entities to their character counterparts. |
|
1019 * |
|
1020 * This function decodes numeric HTML entities (A and A). It doesn't do |
|
1021 * anything with other entities like ä, but we don't need them in the URL |
|
1022 * protocol whitelisting system anyway. |
|
1023 * |
|
1024 * @since 1.0.0 |
|
1025 * |
|
1026 * @param string $string Content to change entities |
|
1027 * @return string Content after decoded entities |
|
1028 */ |
|
1029 function wp_kses_decode_entities($string) { |
|
1030 $string = preg_replace_callback('/&#([0-9]+);/', create_function('$match', 'return chr($match[1]);'), $string); |
|
1031 $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', create_function('$match', 'return chr(hexdec($match[1]));'), $string); |
|
1032 |
|
1033 return $string; |
|
1034 } |
|
1035 |
|
1036 /** |
|
1037 * Sanitize content with allowed HTML Kses rules. |
|
1038 * |
|
1039 * @since 1.0.0 |
|
1040 * @uses $allowedtags |
|
1041 * |
|
1042 * @param string $data Content to filter |
|
1043 * @return string Filtered content |
|
1044 */ |
|
1045 function wp_filter_kses($data) { |
|
1046 global $allowedtags; |
|
1047 return addslashes( wp_kses(stripslashes( $data ), $allowedtags) ); |
|
1048 } |
|
1049 |
|
1050 /** |
|
1051 * Sanitize content for allowed HTML tags for post content. |
|
1052 * |
|
1053 * Post content refers to the page contents of the 'post' type and not $_POST |
|
1054 * data from forms. |
|
1055 * |
|
1056 * @since 2.0.0 |
|
1057 * @uses $allowedposttags |
|
1058 * |
|
1059 * @param string $data Post content to filter |
|
1060 * @return string Filtered post content with allowed HTML tags and attributes intact. |
|
1061 */ |
|
1062 function wp_filter_post_kses($data) { |
|
1063 global $allowedposttags; |
|
1064 return addslashes ( wp_kses(stripslashes( $data ), $allowedposttags) ); |
|
1065 } |
|
1066 |
|
1067 /** |
|
1068 * Strips all of the HTML in the content. |
|
1069 * |
|
1070 * @since 2.1.0 |
|
1071 * |
|
1072 * @param string $data Content to strip all HTML from |
|
1073 * @return string Filtered content without any HTML |
|
1074 */ |
|
1075 function wp_filter_nohtml_kses($data) { |
|
1076 return addslashes ( wp_kses(stripslashes( $data ), array()) ); |
|
1077 } |
|
1078 |
|
1079 /** |
|
1080 * Adds all Kses input form content filters. |
|
1081 * |
|
1082 * All hooks have default priority. The wp_filter_kses() function is added to |
|
1083 * the 'pre_comment_content' and 'title_save_pre' hooks. |
|
1084 * |
|
1085 * The wp_filter_post_kses() function is added to the 'content_save_pre', |
|
1086 * 'excerpt_save_pre', and 'content_filtered_save_pre' hooks. |
|
1087 * |
|
1088 * @since 2.0.0 |
|
1089 * @uses add_filter() See description for what functions are added to what hooks. |
|
1090 */ |
|
1091 function kses_init_filters() { |
|
1092 // Normal filtering. |
|
1093 add_filter('pre_comment_content', 'wp_filter_kses'); |
|
1094 add_filter('title_save_pre', 'wp_filter_kses'); |
|
1095 |
|
1096 // Post filtering |
|
1097 add_filter('content_save_pre', 'wp_filter_post_kses'); |
|
1098 add_filter('excerpt_save_pre', 'wp_filter_post_kses'); |
|
1099 add_filter('content_filtered_save_pre', 'wp_filter_post_kses'); |
|
1100 } |
|
1101 |
|
1102 /** |
|
1103 * Removes all Kses input form content filters. |
|
1104 * |
|
1105 * A quick procedural method to removing all of the filters that kses uses for |
|
1106 * content in WordPress Loop. |
|
1107 * |
|
1108 * Does not remove the kses_init() function from 'init' hook (priority is |
|
1109 * default). Also does not remove kses_init() function from 'set_current_user' |
|
1110 * hook (priority is also default). |
|
1111 * |
|
1112 * @since 2.0.6 |
|
1113 */ |
|
1114 function kses_remove_filters() { |
|
1115 // Normal filtering. |
|
1116 remove_filter('pre_comment_content', 'wp_filter_kses'); |
|
1117 remove_filter('title_save_pre', 'wp_filter_kses'); |
|
1118 |
|
1119 // Post filtering |
|
1120 remove_filter('content_save_pre', 'wp_filter_post_kses'); |
|
1121 remove_filter('excerpt_save_pre', 'wp_filter_post_kses'); |
|
1122 remove_filter('content_filtered_save_pre', 'wp_filter_post_kses'); |
|
1123 } |
|
1124 |
|
1125 /** |
|
1126 * Sets up most of the Kses filters for input form content. |
|
1127 * |
|
1128 * If you remove the kses_init() function from 'init' hook and |
|
1129 * 'set_current_user' (priority is default), then none of the Kses filter hooks |
|
1130 * will be added. |
|
1131 * |
|
1132 * First removes all of the Kses filters in case the current user does not need |
|
1133 * to have Kses filter the content. If the user does not have unfiltered html |
|
1134 * capability, then Kses filters are added. |
|
1135 * |
|
1136 * @uses kses_remove_filters() Removes the Kses filters |
|
1137 * @uses kses_init_filters() Adds the Kses filters back if the user |
|
1138 * does not have unfiltered HTML capability. |
|
1139 * @since 2.0.0 |
|
1140 */ |
|
1141 function kses_init() { |
|
1142 kses_remove_filters(); |
|
1143 |
|
1144 if (current_user_can('unfiltered_html') == false) |
|
1145 kses_init_filters(); |
|
1146 } |
|
1147 |
|
1148 add_action('init', 'kses_init'); |
|
1149 add_action('set_current_user', 'kses_init'); |
|
1150 |
|
1151 function safecss_filter_attr( $css, $deprecated = '' ) { |
|
1152 $css = wp_kses_no_null($css); |
|
1153 $css = str_replace(array("\n","\r","\t"), '', $css); |
|
1154 |
|
1155 if ( preg_match( '%[\\(&]|/\*%', $css ) ) // remove any inline css containing \ ( & or comments |
|
1156 return ''; |
|
1157 |
|
1158 $css_array = split( ';', trim( $css ) ); |
|
1159 $allowed_attr = apply_filters( 'safe_style_css', array( 'text-align', 'margin', 'color', 'float', |
|
1160 'border', 'background', 'background-color', 'border-bottom', 'border-bottom-color', |
|
1161 'border-bottom-style', 'border-bottom-width', 'border-collapse', 'border-color', 'border-left', |
|
1162 'border-left-color', 'border-left-style', 'border-left-width', 'border-right', 'border-right-color', |
|
1163 'border-right-style', 'border-right-width', 'border-spacing', 'border-style', 'border-top', |
|
1164 'border-top-color', 'border-top-style', 'border-top-width', 'border-width', 'caption-side', |
|
1165 'clear', 'cursor', 'direction', 'font', 'font-family', 'font-size', 'font-style', |
|
1166 'font-variant', 'font-weight', 'height', 'letter-spacing', 'line-height', 'margin-bottom', |
|
1167 'margin-left', 'margin-right', 'margin-top', 'overflow', 'padding', 'padding-bottom', |
|
1168 'padding-left', 'padding-right', 'padding-top', 'text-decoration', 'text-indent', 'vertical-align', |
|
1169 'width' ) ); |
|
1170 |
|
1171 if ( empty($allowed_attr) ) |
|
1172 return $css; |
|
1173 |
|
1174 $css = ''; |
|
1175 foreach ( $css_array as $css_item ) { |
|
1176 if ( $css_item == '' ) |
|
1177 continue; |
|
1178 $css_item = trim( $css_item ); |
|
1179 $found = false; |
|
1180 if ( strpos( $css_item, ':' ) === false ) { |
|
1181 $found = true; |
|
1182 } else { |
|
1183 $parts = split( ':', $css_item ); |
|
1184 if ( in_array( trim( $parts[0] ), $allowed_attr ) ) |
|
1185 $found = true; |
|
1186 } |
|
1187 if ( $found ) { |
|
1188 if( $css != '' ) |
|
1189 $css .= ';'; |
|
1190 $css .= $css_item; |
|
1191 } |
|
1192 } |
|
1193 |
|
1194 return $css; |
|
1195 } |