diff -r 000000000000 -r 03b0d1493584 web/wp-includes/kses.php --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/wp-includes/kses.php Wed Dec 23 17:55:33 2009 +0000 @@ -0,0 +1,1195 @@ + + * + * @package External + * @subpackage KSES + * + * @internal + * *** CONTACT INFORMATION *** + * E-mail: metaur at users dot sourceforge dot net + * Web page: http://sourceforge.net/projects/kses + * Paper mail: Ulf Harnhammar + * Ymergatan 17 C + * 753 25 Uppsala + * SWEDEN + * + * [kses strips evil scripts!] + */ + +/** + * You can override this in your my-hacks.php file You can also override this + * in a plugin file. The my-hacks.php is deprecated in its usage. + * + * @since 1.2.0 + */ +if (!defined('CUSTOM_TAGS')) + define('CUSTOM_TAGS', false); + +if (!CUSTOM_TAGS) { + /** + * Kses global for default allowable HTML tags. + * + * Can be override by using CUSTOM_TAGS constant. + * + * @global array $allowedposttags + * @since 2.0.0 + */ + $allowedposttags = array( + 'address' => array(), + 'a' => array( + 'class' => array (), + 'href' => array (), + 'id' => array (), + 'title' => array (), + 'rel' => array (), + 'rev' => array (), + 'name' => array (), + 'target' => array()), + 'abbr' => array( + 'class' => array (), + 'title' => array ()), + 'acronym' => array( + 'title' => array ()), + 'b' => array(), + 'big' => array(), + 'blockquote' => array( + 'id' => array (), + 'cite' => array (), + 'class' => array(), + 'lang' => array(), + 'xml:lang' => array()), + 'br' => array ( + 'class' => array ()), + 'button' => array( + 'disabled' => array (), + 'name' => array (), + 'type' => array (), + 'value' => array ()), + 'caption' => array( + 'align' => array (), + 'class' => array ()), + 'cite' => array ( + 'class' => array(), + 'dir' => array(), + 'lang' => array(), + 'title' => array ()), + 'code' => array ( + 'style' => array()), + 'col' => array( + 'align' => array (), + 'char' => array (), + 'charoff' => array (), + 'span' => array (), + 'dir' => array(), + 'style' => array (), + 'valign' => array (), + 'width' => array ()), + 'del' => array( + 'datetime' => array ()), + 'dd' => array(), + 'div' => array( + 'align' => array (), + 'class' => array (), + 'dir' => array (), + 'lang' => array(), + 'style' => array (), + 'xml:lang' => array()), + 'dl' => array(), + 'dt' => array(), + 'em' => array(), + 'fieldset' => array(), + 'font' => array( + 'color' => array (), + 'face' => array (), + 'size' => array ()), + 'form' => array( + 'action' => array (), + 'accept' => array (), + 'accept-charset' => array (), + 'enctype' => array (), + 'method' => array (), + 'name' => array (), + 'target' => array ()), + 'h1' => array( + 'align' => array (), + 'class' => array (), + 'id' => array (), + 'style' => array ()), + 'h2' => array ( + 'align' => array (), + 'class' => array (), + 'id' => array (), + 'style' => array ()), + 'h3' => array ( + 'align' => array (), + 'class' => array (), + 'id' => array (), + 'style' => array ()), + 'h4' => array ( + 'align' => array (), + 'class' => array (), + 'id' => array (), + 'style' => array ()), + 'h5' => array ( + 'align' => array (), + 'class' => array (), + 'id' => array (), + 'style' => array ()), + 'h6' => array ( + 'align' => array (), + 'class' => array (), + 'id' => array (), + 'style' => array ()), + 'hr' => array ( + 'align' => array (), + 'class' => array (), + 'noshade' => array (), + 'size' => array (), + 'width' => array ()), + 'i' => array(), + 'img' => array( + 'alt' => array (), + 'align' => array (), + 'border' => array (), + 'class' => array (), + 'height' => array (), + 'hspace' => array (), + 'longdesc' => array (), + 'vspace' => array (), + 'src' => array (), + 'style' => array (), + 'width' => array ()), + 'ins' => array( + 'datetime' => array (), + 'cite' => array ()), + 'kbd' => array(), + 'label' => array( + 'for' => array ()), + 'legend' => array( + 'align' => array ()), + 'li' => array ( + 'align' => array (), + 'class' => array ()), + 'p' => array( + 'class' => array (), + 'align' => array (), + 'dir' => array(), + 'lang' => array(), + 'style' => array (), + 'xml:lang' => array()), + 'pre' => array( + 'style' => array(), + 'width' => array ()), + 'q' => array( + 'cite' => array ()), + 's' => array(), + 'span' => array ( + 'class' => array (), + 'dir' => array (), + 'align' => array (), + 'lang' => array (), + 'style' => array (), + 'title' => array (), + 'xml:lang' => array()), + 'strike' => array(), + 'strong' => array(), + 'sub' => array(), + 'sup' => array(), + 'table' => array( + 'align' => array (), + 'bgcolor' => array (), + 'border' => array (), + 'cellpadding' => array (), + 'cellspacing' => array (), + 'class' => array (), + 'dir' => array(), + 'id' => array(), + 'rules' => array (), + 'style' => array (), + 'summary' => array (), + 'width' => array ()), + 'tbody' => array( + 'align' => array (), + 'char' => array (), + 'charoff' => array (), + 'valign' => array ()), + 'td' => array( + 'abbr' => array (), + 'align' => array (), + 'axis' => array (), + 'bgcolor' => array (), + 'char' => array (), + 'charoff' => array (), + 'class' => array (), + 'colspan' => array (), + 'dir' => array(), + 'headers' => array (), + 'height' => array (), + 'nowrap' => array (), + 'rowspan' => array (), + 'scope' => array (), + 'style' => array (), + 'valign' => array (), + 'width' => array ()), + 'textarea' => array( + 'cols' => array (), + 'rows' => array (), + 'disabled' => array (), + 'name' => array (), + 'readonly' => array ()), + 'tfoot' => array( + 'align' => array (), + 'char' => array (), + 'class' => array (), + 'charoff' => array (), + 'valign' => array ()), + 'th' => array( + 'abbr' => array (), + 'align' => array (), + 'axis' => array (), + 'bgcolor' => array (), + 'char' => array (), + 'charoff' => array (), + 'class' => array (), + 'colspan' => array (), + 'headers' => array (), + 'height' => array (), + 'nowrap' => array (), + 'rowspan' => array (), + 'scope' => array (), + 'valign' => array (), + 'width' => array ()), + 'thead' => array( + 'align' => array (), + 'char' => array (), + 'charoff' => array (), + 'class' => array (), + 'valign' => array ()), + 'title' => array(), + 'tr' => array( + 'align' => array (), + 'bgcolor' => array (), + 'char' => array (), + 'charoff' => array (), + 'class' => array (), + 'style' => array (), + 'valign' => array ()), + 'tt' => array(), + 'u' => array(), + 'ul' => array ( + 'class' => array (), + 'style' => array (), + 'type' => array ()), + 'ol' => array ( + 'class' => array (), + 'start' => array (), + 'style' => array (), + 'type' => array ()), + 'var' => array ()); + + /** + * Kses allowed HTML elements. + * + * @global array $allowedtags + * @since 1.0.0 + */ + $allowedtags = array( + 'a' => array( + 'href' => array (), + 'title' => array ()), + 'abbr' => array( + 'title' => array ()), + 'acronym' => array( + 'title' => array ()), + 'b' => array(), + 'blockquote' => array( + 'cite' => array ()), + // 'br' => array(), + 'cite' => array (), + 'code' => array(), + 'del' => array( + 'datetime' => array ()), + // 'dd' => array(), + // 'dl' => array(), + // 'dt' => array(), + 'em' => array (), 'i' => array (), + // 'ins' => array('datetime' => array(), 'cite' => array()), + // 'li' => array(), + // 'ol' => array(), + // 'p' => array(), + 'q' => array( + 'cite' => array ()), + 'strike' => array(), + 'strong' => array(), + // 'sub' => array(), + // 'sup' => array(), + // 'u' => array(), + // 'ul' => array(), + ); +} + +/** + * Filters content and keeps only allowable HTML elements. + * + * This function makes sure that only the allowed HTML element names, attribute + * names and attribute values plus only sane HTML entities will occur in + * $string. You have to remove any slashes from PHP's magic quotes before you + * call this function. + * + * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news', + * 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This covers all common + * link protocols, except for 'javascript' which should not be allowed for + * untrusted users. + * + * @since 1.0.0 + * + * @param string $string Content to filter through kses + * @param array $allowed_html List of allowed HTML elements + * @param array $allowed_protocols Optional. Allowed protocol in links. + * @return string Filtered content with only allowed HTML elements + */ +function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) { + $string = wp_kses_no_null($string); + $string = wp_kses_js_entities($string); + $string = wp_kses_normalize_entities($string); + $allowed_html_fixed = wp_kses_array_lc($allowed_html); + $string = wp_kses_hook($string, $allowed_html_fixed, $allowed_protocols); // WP changed the order of these funcs and added args to wp_kses_hook + return wp_kses_split($string, $allowed_html_fixed, $allowed_protocols); +} + +/** + * You add any kses hooks here. + * + * There is currently only one kses WordPress hook and it is called here. All + * parameters are passed to the hooks and expected to recieve a string. + * + * @since 1.0.0 + * + * @param string $string Content to filter through kses + * @param array $allowed_html List of allowed HTML elements + * @param array $allowed_protocols Allowed protocol in links + * @return string Filtered content through 'pre_kses' hook + */ +function wp_kses_hook($string, $allowed_html, $allowed_protocols) { + $string = apply_filters('pre_kses', $string, $allowed_html, $allowed_protocols); + return $string; +} + +/** + * This function returns kses' version number. + * + * @since 1.0.0 + * + * @return string KSES Version Number + */ +function wp_kses_version() { + return '0.2.2'; +} + +/** + * Searches for HTML tags, no matter how malformed. + * + * It also matches stray ">" characters. + * + * @since 1.0.0 + * + * @param string $string Content to filter + * @param array $allowed_html Allowed HTML elements + * @param array $allowed_protocols Allowed protocols to keep + * @return string Content with fixed HTML tags + */ +function wp_kses_split($string, $allowed_html, $allowed_protocols) { + global $pass_allowed_html, $pass_allowed_protocols; + $pass_allowed_html = $allowed_html; + $pass_allowed_protocols = $allowed_protocols; + return preg_replace_callback('%((|$))|(<[^>]*(>|$)|>))%', + create_function('$match', 'global $pass_allowed_html, $pass_allowed_protocols; return wp_kses_split2($match[1], $pass_allowed_html, $pass_allowed_protocols);'), $string); +} + +/** + * Callback for wp_kses_split for fixing malformed HTML tags. + * + * This function does a lot of work. It rejects some very malformed things like + * <:::>. It returns an empty string, if the element isn't allowed (look ma, no + * strip_tags()!). Otherwise it splits the tag into an element and an attribute + * list. + * + * After the tag is split into an element and an attribute list, it is run + * through another filter which will remove illegal attributes and once that is + * completed, will be returned. + * + * @access private + * @since 1.0.0 + * @uses wp_kses_attr() + * + * @param string $string Content to filter + * @param array $allowed_html Allowed HTML elements + * @param array $allowed_protocols Allowed protocols to keep + * @return string Fixed HTML element + */ +function wp_kses_split2($string, $allowed_html, $allowed_protocols) { + $string = wp_kses_stripslashes($string); + + if (substr($string, 0, 1) != '<') + return '>'; + # It matched a ">" character + + if (preg_match('%^)?$%', $string, $matches)) { + $string = str_replace(array(''), '', $matches[1]); + while ( $string != $newstring = wp_kses($string, $allowed_html, $allowed_protocols) ) + $string = $newstring; + if ( $string == '' ) + return ''; + // prevent multiple dashes in comments + $string = preg_replace('/--+/', '-', $string); + // prevent three dashes closing a comment + $string = preg_replace('/-$/', '', $string); + return ""; + } + # Allow HTML comments + + if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) + return ''; + # It's seriously malformed + + $slash = trim($matches[1]); + $elem = $matches[2]; + $attrlist = $matches[3]; + + if (!@isset($allowed_html[strtolower($elem)])) + return ''; + # They are using a not allowed HTML element + + if ($slash != '') + return "<$slash$elem>"; + # No attributes are allowed for closing elements + + return wp_kses_attr("$slash$elem", $attrlist, $allowed_html, $allowed_protocols); +} + +/** + * Removes all attributes, if none are allowed for this element. + * + * If some are allowed it calls wp_kses_hair() to split them further, and then + * it builds up new HTML code from the data that kses_hair() returns. It also + * removes "<" and ">" characters, if there are any left. One more thing it does + * is to check if the tag has a closing XHTML slash, and if it does, it puts one + * in the returned code as well. + * + * @since 1.0.0 + * + * @param string $element HTML element/tag + * @param string $attr HTML attributes from HTML element to closing HTML element tag + * @param array $allowed_html Allowed HTML elements + * @param array $allowed_protocols Allowed protocols to keep + * @return string Sanitized HTML element + */ +function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) { + # Is there a closing XHTML slash at the end of the attributes? + + $xhtml_slash = ''; + if (preg_match('%\s/\s*$%', $attr)) + $xhtml_slash = ' /'; + + # Are any attributes allowed at all for this element? + + if (@ count($allowed_html[strtolower($element)]) == 0) + return "<$element$xhtml_slash>"; + + # Split it + + $attrarr = wp_kses_hair($attr, $allowed_protocols); + + # Go through $attrarr, and save the allowed attributes for this element + # in $attr2 + + $attr2 = ''; + + foreach ($attrarr as $arreach) { + if (!@ isset ($allowed_html[strtolower($element)][strtolower($arreach['name'])])) + continue; # the attribute is not allowed + + $current = $allowed_html[strtolower($element)][strtolower($arreach['name'])]; + if ($current == '') + continue; # the attribute is not allowed + + if (!is_array($current)) + $attr2 .= ' '.$arreach['whole']; + # there are no checks + + else { + # there are some checks + $ok = true; + foreach ($current as $currkey => $currval) + if (!wp_kses_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval)) { + $ok = false; + break; + } + + if ( $arreach['name'] == 'style' ) { + $orig_value = $arreach['value']; + + $value = safecss_filter_attr($orig_value); + + if ( empty($value) ) + continue; + + $arreach['value'] = $value; + + $arreach['whole'] = str_replace($orig_value, $value, $arreach['whole']); + } + + if ($ok) + $attr2 .= ' '.$arreach['whole']; # it passed them + } # if !is_array($current) + } # foreach + + # Remove any "<" or ">" characters + + $attr2 = preg_replace('/[<>]/', '', $attr2); + + return "<$element$attr2$xhtml_slash>"; +} + +/** + * Builds an attribute list from string containing attributes. + * + * This function does a lot of work. It parses an attribute list into an array + * with attribute data, and tries to do the right thing even if it gets weird + * input. It will add quotes around attribute values that don't have any quotes + * or apostrophes around them, to make it easier to produce HTML code that will + * conform to W3C's HTML specification. It will also remove bad URL protocols + * from attribute values. It also reduces duplicate attributes by using the + * attribute defined first (foo='bar' foo='baz' will result in foo='bar'). + * + * @since 1.0.0 + * + * @param string $attr Attribute list from HTML element to closing HTML element tag + * @param array $allowed_protocols Allowed protocols to keep + * @return array List of attributes after parsing + */ +function wp_kses_hair($attr, $allowed_protocols) { + $attrarr = array (); + $mode = 0; + $attrname = ''; + $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action'); + + # Loop through the whole attribute list + + while (strlen($attr) != 0) { + $working = 0; # Was the last operation successful? + + switch ($mode) { + case 0 : # attribute name, href for instance + + if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) { + $attrname = $match[1]; + $working = $mode = 1; + $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr); + } + + break; + + case 1 : # equals sign or valueless ("selected") + + if (preg_match('/^\s*=\s*/', $attr)) # equals sign + { + $working = 1; + $mode = 2; + $attr = preg_replace('/^\s*=\s*/', '', $attr); + break; + } + + if (preg_match('/^\s+/', $attr)) # valueless + { + $working = 1; + $mode = 0; + if(FALSE === array_key_exists($attrname, $attrarr)) { + $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); + } + $attr = preg_replace('/^\s+/', '', $attr); + } + + break; + + case 2 : # attribute value, a URL after href= for instance + + if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) + # "value" + { + $thisval = $match[1]; + if ( in_array($attrname, $uris) ) + $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); + + if(FALSE === array_key_exists($attrname, $attrarr)) { + $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n'); + } + $working = 1; + $mode = 0; + $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr); + break; + } + + if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) + # 'value' + { + $thisval = $match[1]; + if ( in_array($attrname, $uris) ) + $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); + + if(FALSE === array_key_exists($attrname, $attrarr)) { + $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n'); + } + $working = 1; + $mode = 0; + $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr); + break; + } + + if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) + # value + { + $thisval = $match[1]; + if ( in_array($attrname, $uris) ) + $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); + + if(FALSE === array_key_exists($attrname, $attrarr)) { + $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n'); + } + # We add quotes to conform to W3C's HTML spec. + $working = 1; + $mode = 0; + $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr); + } + + break; + } # switch + + if ($working == 0) # not well formed, remove and try again + { + $attr = wp_kses_html_error($attr); + $mode = 0; + } + } # while + + if ($mode == 1 && FALSE === array_key_exists($attrname, $attrarr)) + # special case, for when the attribute list ends with a valueless + # attribute like "selected" + $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); + + return $attrarr; +} + +/** + * Performs different checks for attribute values. + * + * The currently implemented checks are "maxlen", "minlen", "maxval", "minval" + * and "valueless" with even more checks to come soon. + * + * @since 1.0.0 + * + * @param string $value Attribute value + * @param string $vless Whether the value is valueless or not. Use 'y' or 'n' + * @param string $checkname What $checkvalue is checking for. + * @param mixed $checkvalue What constraint the value should pass + * @return bool Whether check passes (true) or not (false) + */ +function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) { + $ok = true; + + switch (strtolower($checkname)) { + case 'maxlen' : + # The maxlen check makes sure that the attribute value has a length not + # greater than the given value. This can be used to avoid Buffer Overflows + # in WWW clients and various Internet servers. + + if (strlen($value) > $checkvalue) + $ok = false; + break; + + case 'minlen' : + # The minlen check makes sure that the attribute value has a length not + # smaller than the given value. + + if (strlen($value) < $checkvalue) + $ok = false; + break; + + case 'maxval' : + # The maxval check does two things: it checks that the attribute value is + # an integer from 0 and up, without an excessive amount of zeroes or + # whitespace (to avoid Buffer Overflows). It also checks that the attribute + # value is not greater than the given value. + # This check can be used to avoid Denial of Service attacks. + + if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) + $ok = false; + if ($value > $checkvalue) + $ok = false; + break; + + case 'minval' : + # The minval check checks that the attribute value is a positive integer, + # and that it is not smaller than the given value. + + if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) + $ok = false; + if ($value < $checkvalue) + $ok = false; + break; + + case 'valueless' : + # The valueless check checks if the attribute has a value + # (like ) or not (