11 function _($string) { |
11 function _($string) { |
12 return $string; |
12 return $string; |
13 } |
13 } |
14 } |
14 } |
15 |
15 |
16 if ( !function_exists('mb_substr') ): |
16 /** |
17 function mb_substr( $str, $start, $length=null, $encoding=null ) { |
17 * Returns whether PCRE/u (PCRE_UTF8 modifier) is available for use. |
18 return _mb_substr($str, $start, $length, $encoding); |
18 * |
19 } |
19 * @ignore |
20 endif; |
20 * @since 4.2.2 |
21 |
21 * @access private |
22 function _mb_substr( $str, $start, $length=null, $encoding=null ) { |
22 * |
23 // the solution below, works only for utf-8, so in case of a different |
23 * @param bool $set - Used for testing only |
24 // charset, just use built-in substr |
24 * null : default - get PCRE/u capability |
25 $charset = get_option( 'blog_charset' ); |
25 * false : Used for testing - return false for future calls to this function |
26 if ( !in_array( $charset, array('utf8', 'utf-8', 'UTF8', 'UTF-8') ) ) { |
26 * 'reset': Used for testing - restore default behavior of this function |
27 return is_null( $length )? substr( $str, $start ) : substr( $str, $start, $length); |
27 */ |
28 } |
28 function _wp_can_use_pcre_u( $set = null ) { |
29 // use the regex unicode support to separate the UTF-8 characters into an array |
29 static $utf8_pcre = 'reset'; |
30 preg_match_all( '/./us', $str, $match ); |
30 |
31 $chars = is_null( $length )? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length ); |
31 if ( null !== $set ) { |
32 return implode( '', $chars ); |
32 $utf8_pcre = $set; |
|
33 } |
|
34 |
|
35 if ( 'reset' === $utf8_pcre ) { |
|
36 $utf8_pcre = @preg_match( '/^./u', 'a' ); |
|
37 } |
|
38 |
|
39 return $utf8_pcre; |
|
40 } |
|
41 |
|
42 if ( ! function_exists( 'mb_substr' ) ) : |
|
43 function mb_substr( $str, $start, $length = null, $encoding = null ) { |
|
44 return _mb_substr( $str, $start, $length, $encoding ); |
|
45 } |
|
46 endif; |
|
47 |
|
48 /* |
|
49 * Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit. |
|
50 * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence. |
|
51 * The behavior of this function for invalid inputs is undefined. |
|
52 */ |
|
53 function _mb_substr( $str, $start, $length = null, $encoding = null ) { |
|
54 if ( null === $encoding ) { |
|
55 $encoding = get_option( 'blog_charset' ); |
|
56 } |
|
57 |
|
58 // The solution below works only for UTF-8, |
|
59 // so in case of a different charset just use built-in substr() |
|
60 if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) { |
|
61 return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length ); |
|
62 } |
|
63 |
|
64 if ( _wp_can_use_pcre_u() ) { |
|
65 // Use the regex unicode support to separate the UTF-8 characters into an array |
|
66 preg_match_all( '/./us', $str, $match ); |
|
67 $chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length ); |
|
68 return implode( '', $chars ); |
|
69 } |
|
70 |
|
71 $regex = '/( |
|
72 [\x00-\x7F] # single-byte sequences 0xxxxxxx |
|
73 | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx |
|
74 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 |
|
75 | [\xE1-\xEC][\x80-\xBF]{2} |
|
76 | \xED[\x80-\x9F][\x80-\xBF] |
|
77 | [\xEE-\xEF][\x80-\xBF]{2} |
|
78 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 |
|
79 | [\xF1-\xF3][\x80-\xBF]{3} |
|
80 | \xF4[\x80-\x8F][\x80-\xBF]{2} |
|
81 )/x'; |
|
82 |
|
83 $chars = array( '' ); // Start with 1 element instead of 0 since the first thing we do is pop |
|
84 do { |
|
85 // We had some string left over from the last round, but we counted it in that last round. |
|
86 array_pop( $chars ); |
|
87 |
|
88 // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string) |
|
89 $pieces = preg_split( $regex, $str, 1000, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); |
|
90 |
|
91 $chars = array_merge( $chars, $pieces ); |
|
92 } while ( count( $pieces ) > 1 && $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop. |
|
93 |
|
94 return join( '', array_slice( $chars, $start, $length ) ); |
|
95 } |
|
96 |
|
97 if ( ! function_exists( 'mb_strlen' ) ) : |
|
98 function mb_strlen( $str, $encoding = null ) { |
|
99 return _mb_strlen( $str, $encoding ); |
|
100 } |
|
101 endif; |
|
102 |
|
103 /* |
|
104 * Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit. |
|
105 * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence. |
|
106 * The behavior of this function for invalid inputs is undefined. |
|
107 */ |
|
108 function _mb_strlen( $str, $encoding = null ) { |
|
109 if ( null === $encoding ) { |
|
110 $encoding = get_option( 'blog_charset' ); |
|
111 } |
|
112 |
|
113 // The solution below works only for UTF-8, |
|
114 // so in case of a different charset just use built-in strlen() |
|
115 if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) { |
|
116 return strlen( $str ); |
|
117 } |
|
118 |
|
119 if ( _wp_can_use_pcre_u() ) { |
|
120 // Use the regex unicode support to separate the UTF-8 characters into an array |
|
121 preg_match_all( '/./us', $str, $match ); |
|
122 return count( $match[0] ); |
|
123 } |
|
124 |
|
125 $regex = '/(?: |
|
126 [\x00-\x7F] # single-byte sequences 0xxxxxxx |
|
127 | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx |
|
128 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 |
|
129 | [\xE1-\xEC][\x80-\xBF]{2} |
|
130 | \xED[\x80-\x9F][\x80-\xBF] |
|
131 | [\xEE-\xEF][\x80-\xBF]{2} |
|
132 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 |
|
133 | [\xF1-\xF3][\x80-\xBF]{3} |
|
134 | \xF4[\x80-\x8F][\x80-\xBF]{2} |
|
135 )/x'; |
|
136 |
|
137 $count = 1; // Start at 1 instead of 0 since the first thing we do is decrement |
|
138 do { |
|
139 // We had some string left over from the last round, but we counted it in that last round. |
|
140 $count--; |
|
141 |
|
142 // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string) |
|
143 $pieces = preg_split( $regex, $str, 1000 ); |
|
144 |
|
145 // Increment |
|
146 $count += count( $pieces ); |
|
147 } while ( $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop. |
|
148 |
|
149 // Fencepost: preg_split() always returns one extra item in the array |
|
150 return --$count; |
33 } |
151 } |
34 |
152 |
35 if ( !function_exists('hash_hmac') ): |
153 if ( !function_exists('hash_hmac') ): |
36 function hash_hmac($algo, $data, $key, $raw_output = false) { |
154 function hash_hmac($algo, $data, $key, $raw_output = false) { |
37 return _hash_hmac($algo, $data, $key, $raw_output); |
155 return _hash_hmac($algo, $data, $key, $raw_output); |