139 $codepoints = array(); |
139 $codepoints = array(); |
140 |
140 |
141 // Get number of bytes |
141 // Get number of bytes |
142 $strlen = strlen($input); |
142 $strlen = strlen($input); |
143 |
143 |
|
144 // phpcs:ignore Generic.CodeAnalysis.JumbledIncrementer -- This is a deliberate choice. |
144 for ($position = 0; $position < $strlen; $position++) { |
145 for ($position = 0; $position < $strlen; $position++) { |
145 $value = ord($input[$position]); |
146 $value = ord($input[$position]); |
146 |
147 |
147 // One byte sequence: |
148 // One byte sequence: |
148 if ((~$value & 0x80) === 0x80) { |
149 if ((~$value & 0x80) === 0x80) { |
149 $character = $value; |
150 $character = $value; |
150 $length = 1; |
151 $length = 1; |
151 $remaining = 0; |
152 $remaining = 0; |
152 } |
153 } |
153 // Two byte sequence: |
154 // Two byte sequence: |
154 elseif (($value & 0xE0) === 0xC0) { |
155 elseif (($value & 0xE0) === 0xC0) { |
155 $character = ($value & 0x1F) << 6; |
156 $character = ($value & 0x1F) << 6; |
156 $length = 2; |
157 $length = 2; |
157 $remaining = 1; |
158 $remaining = 1; |
158 } |
159 } |
159 // Three byte sequence: |
160 // Three byte sequence: |
160 elseif (($value & 0xF0) === 0xE0) { |
161 elseif (($value & 0xF0) === 0xE0) { |
161 $character = ($value & 0x0F) << 12; |
162 $character = ($value & 0x0F) << 12; |
162 $length = 3; |
163 $length = 3; |
163 $remaining = 2; |
164 $remaining = 2; |
164 } |
165 } |
165 // Four byte sequence: |
166 // Four byte sequence: |
166 elseif (($value & 0xF8) === 0xF0) { |
167 elseif (($value & 0xF8) === 0xF0) { |
167 $character = ($value & 0x07) << 18; |
168 $character = ($value & 0x07) << 18; |
168 $length = 4; |
169 $length = 4; |
169 $remaining = 3; |
170 $remaining = 3; |
170 } |
171 } |
171 // Invalid byte: |
172 // Invalid byte: |
172 else { |
173 else { |
173 throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $value); |
174 throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $value); |
183 // If it is invalid, count the sequence as invalid and reprocess the current byte: |
184 // If it is invalid, count the sequence as invalid and reprocess the current byte: |
184 if (($value & 0xC0) !== 0x80) { |
185 if (($value & 0xC0) !== 0x80) { |
185 throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character); |
186 throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character); |
186 } |
187 } |
187 |
188 |
188 $character |= ($value & 0x3F) << (--$remaining * 6); |
189 --$remaining; |
|
190 $character |= ($value & 0x3F) << ($remaining * 6); |
189 } |
191 } |
190 $position--; |
192 $position--; |
191 } |
193 } |
192 |
194 |
193 if ( |
195 if (// Non-shortest form sequences are invalid |
194 // Non-shortest form sequences are invalid |
196 $length > 1 && $character <= 0x7F |
195 $length > 1 && $character <= 0x7F |
|
196 || $length > 2 && $character <= 0x7FF |
197 || $length > 2 && $character <= 0x7FF |
197 || $length > 3 && $character <= 0xFFFF |
198 || $length > 3 && $character <= 0xFFFF |
198 // Outside of range of ucschar codepoints |
199 // Outside of range of ucschar codepoints |
199 // Noncharacters |
200 // Noncharacters |
200 || ($character & 0xFFFE) === 0xFFFE |
201 || ($character & 0xFFFE) === 0xFFFE |
201 || $character >= 0xFDD0 && $character <= 0xFDEF |
202 || $character >= 0xFDD0 && $character <= 0xFDEF |
202 || ( |
203 || ( |
203 // Everything else not in ucschar |
204 // Everything else not in ucschar |
204 $character > 0xD7FF && $character < 0xF900 |
205 $character > 0xD7FF && $character < 0xF900 |
205 || $character < 0x20 |
206 || $character < 0x20 |
206 || $character > 0x7E && $character < 0xA0 |
207 || $character > 0x7E && $character < 0xA0 |
207 || $character > 0xEFFFD |
208 || $character > 0xEFFFD |
208 ) |
209 ) |
209 ) { |
210 ) { |
225 * @param string $input UTF-8 encoded string to encode |
226 * @param string $input UTF-8 encoded string to encode |
226 * @return string Punycode-encoded string |
227 * @return string Punycode-encoded string |
227 */ |
228 */ |
228 public static function punycode_encode($input) { |
229 public static function punycode_encode($input) { |
229 $output = ''; |
230 $output = ''; |
230 # let n = initial_n |
231 // let n = initial_n |
231 $n = self::BOOTSTRAP_INITIAL_N; |
232 $n = self::BOOTSTRAP_INITIAL_N; |
232 # let delta = 0 |
233 // let delta = 0 |
233 $delta = 0; |
234 $delta = 0; |
234 # let bias = initial_bias |
235 // let bias = initial_bias |
235 $bias = self::BOOTSTRAP_INITIAL_BIAS; |
236 $bias = self::BOOTSTRAP_INITIAL_BIAS; |
236 # let h = b = the number of basic code points in the input |
237 // let h = b = the number of basic code points in the input |
237 $h = $b = 0; // see loop |
238 $h = 0; |
238 # copy them to the output in order |
239 $b = 0; // see loop |
|
240 // copy them to the output in order |
239 $codepoints = self::utf8_to_codepoints($input); |
241 $codepoints = self::utf8_to_codepoints($input); |
240 $extended = array(); |
242 $extended = array(); |
241 |
243 |
242 foreach ($codepoints as $char) { |
244 foreach ($codepoints as $char) { |
243 if ($char < 128) { |
245 if ($char < 128) { |
244 // Character is valid ASCII |
246 // Character is valid ASCII |
245 // TODO: this should also check if it's valid for a URL |
247 // TODO: this should also check if it's valid for a URL |
258 } |
260 } |
259 } |
261 } |
260 $extended = array_keys($extended); |
262 $extended = array_keys($extended); |
261 sort($extended); |
263 sort($extended); |
262 $b = $h; |
264 $b = $h; |
263 # [copy them] followed by a delimiter if b > 0 |
265 // [copy them] followed by a delimiter if b > 0 |
264 if (strlen($output) > 0) { |
266 if (strlen($output) > 0) { |
265 $output .= '-'; |
267 $output .= '-'; |
266 } |
268 } |
267 # {if the input contains a non-basic code point < n then fail} |
269 // {if the input contains a non-basic code point < n then fail} |
268 # while h < length(input) do begin |
270 // while h < length(input) do begin |
269 while ($h < count($codepoints)) { |
271 $codepointcount = count($codepoints); |
270 # let m = the minimum code point >= n in the input |
272 while ($h < $codepointcount) { |
|
273 // let m = the minimum code point >= n in the input |
271 $m = array_shift($extended); |
274 $m = array_shift($extended); |
272 //printf('next code point to insert is %s' . PHP_EOL, dechex($m)); |
275 //printf('next code point to insert is %s' . PHP_EOL, dechex($m)); |
273 # let delta = delta + (m - n) * (h + 1), fail on overflow |
276 // let delta = delta + (m - n) * (h + 1), fail on overflow |
274 $delta += ($m - $n) * ($h + 1); |
277 $delta += ($m - $n) * ($h + 1); |
275 # let n = m |
278 // let n = m |
276 $n = $m; |
279 $n = $m; |
277 # for each code point c in the input (in order) do begin |
280 // for each code point c in the input (in order) do begin |
278 for ($num = 0; $num < count($codepoints); $num++) { |
281 for ($num = 0; $num < $codepointcount; $num++) { |
279 $c = $codepoints[$num]; |
282 $c = $codepoints[$num]; |
280 # if c < n then increment delta, fail on overflow |
283 // if c < n then increment delta, fail on overflow |
281 if ($c < $n) { |
284 if ($c < $n) { |
282 $delta++; |
285 $delta++; |
283 } |
286 } |
284 # if c == n then begin |
287 // if c == n then begin |
285 elseif ($c === $n) { |
288 elseif ($c === $n) { |
286 # let q = delta |
289 // let q = delta |
287 $q = $delta; |
290 $q = $delta; |
288 # for k = base to infinity in steps of base do begin |
291 // for k = base to infinity in steps of base do begin |
289 for ($k = self::BOOTSTRAP_BASE; ; $k += self::BOOTSTRAP_BASE) { |
292 for ($k = self::BOOTSTRAP_BASE; ; $k += self::BOOTSTRAP_BASE) { |
290 # let t = tmin if k <= bias {+ tmin}, or |
293 // let t = tmin if k <= bias {+ tmin}, or |
291 # tmax if k >= bias + tmax, or k - bias otherwise |
294 // tmax if k >= bias + tmax, or k - bias otherwise |
292 if ($k <= ($bias + self::BOOTSTRAP_TMIN)) { |
295 if ($k <= ($bias + self::BOOTSTRAP_TMIN)) { |
293 $t = self::BOOTSTRAP_TMIN; |
296 $t = self::BOOTSTRAP_TMIN; |
294 } |
297 } |
295 elseif ($k >= ($bias + self::BOOTSTRAP_TMAX)) { |
298 elseif ($k >= ($bias + self::BOOTSTRAP_TMAX)) { |
296 $t = self::BOOTSTRAP_TMAX; |
299 $t = self::BOOTSTRAP_TMAX; |
297 } |
300 } |
298 else { |
301 else { |
299 $t = $k - $bias; |
302 $t = $k - $bias; |
300 } |
303 } |
301 # if q < t then break |
304 // if q < t then break |
302 if ($q < $t) { |
305 if ($q < $t) { |
303 break; |
306 break; |
304 } |
307 } |
305 # output the code point for digit t + ((q - t) mod (base - t)) |
308 // output the code point for digit t + ((q - t) mod (base - t)) |
306 $digit = $t + (($q - $t) % (self::BOOTSTRAP_BASE - $t)); |
309 $digit = $t + (($q - $t) % (self::BOOTSTRAP_BASE - $t)); |
307 $output .= self::digit_to_char($digit); |
310 $output .= self::digit_to_char($digit); |
308 # let q = (q - t) div (base - t) |
311 // let q = (q - t) div (base - t) |
309 $q = floor(($q - $t) / (self::BOOTSTRAP_BASE - $t)); |
312 $q = floor(($q - $t) / (self::BOOTSTRAP_BASE - $t)); |
310 # end |
313 } // end |
311 } |
314 // output the code point for digit q |
312 # output the code point for digit q |
|
313 $output .= self::digit_to_char($q); |
315 $output .= self::digit_to_char($q); |
314 # let bias = adapt(delta, h + 1, test h equals b?) |
316 // let bias = adapt(delta, h + 1, test h equals b?) |
315 $bias = self::adapt($delta, $h + 1, $h === $b); |
317 $bias = self::adapt($delta, $h + 1, $h === $b); |
316 # let delta = 0 |
318 // let delta = 0 |
317 $delta = 0; |
319 $delta = 0; |
318 # increment h |
320 // increment h |
319 $h++; |
321 $h++; |
320 # end |
322 } // end |
321 } |
323 } // end |
322 # end |
324 // increment delta and n |
323 } |
|
324 # increment delta and n |
|
325 $delta++; |
325 $delta++; |
326 $n++; |
326 $n++; |
327 # end |
327 } // end |
328 } |
|
329 |
328 |
330 return $output; |
329 return $output; |
331 } |
330 } |
332 |
331 |
333 /** |
332 /** |
356 * @see https://tools.ietf.org/html/rfc3492#section-6.1 |
355 * @see https://tools.ietf.org/html/rfc3492#section-6.1 |
357 * @param int $delta |
356 * @param int $delta |
358 * @param int $numpoints |
357 * @param int $numpoints |
359 * @param bool $firsttime |
358 * @param bool $firsttime |
360 * @return int New bias |
359 * @return int New bias |
|
360 * |
|
361 * function adapt(delta,numpoints,firsttime): |
361 */ |
362 */ |
362 protected static function adapt($delta, $numpoints, $firsttime) { |
363 protected static function adapt($delta, $numpoints, $firsttime) { |
363 # function adapt(delta,numpoints,firsttime): |
364 // if firsttime then let delta = delta div damp |
364 # if firsttime then let delta = delta div damp |
|
365 if ($firsttime) { |
365 if ($firsttime) { |
366 $delta = floor($delta / self::BOOTSTRAP_DAMP); |
366 $delta = floor($delta / self::BOOTSTRAP_DAMP); |
367 } |
367 } |
368 # else let delta = delta div 2 |
368 // else let delta = delta div 2 |
369 else { |
369 else { |
370 $delta = floor($delta / 2); |
370 $delta = floor($delta / 2); |
371 } |
371 } |
372 # let delta = delta + (delta div numpoints) |
372 // let delta = delta + (delta div numpoints) |
373 $delta += floor($delta / $numpoints); |
373 $delta += floor($delta / $numpoints); |
374 # let k = 0 |
374 // let k = 0 |
375 $k = 0; |
375 $k = 0; |
376 # while delta > ((base - tmin) * tmax) div 2 do begin |
376 // while delta > ((base - tmin) * tmax) div 2 do begin |
377 $max = floor(((self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN) * self::BOOTSTRAP_TMAX) / 2); |
377 $max = floor(((self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN) * self::BOOTSTRAP_TMAX) / 2); |
378 while ($delta > $max) { |
378 while ($delta > $max) { |
379 # let delta = delta div (base - tmin) |
379 // let delta = delta div (base - tmin) |
380 $delta = floor($delta / (self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN)); |
380 $delta = floor($delta / (self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN)); |
381 # let k = k + base |
381 // let k = k + base |
382 $k += self::BOOTSTRAP_BASE; |
382 $k += self::BOOTSTRAP_BASE; |
383 # end |
383 } // end |
384 } |
384 // return k + (((base - tmin + 1) * delta) div (delta + skew)) |
385 # return k + (((base - tmin + 1) * delta) div (delta + skew)) |
|
386 return $k + floor(((self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN + 1) * $delta) / ($delta + self::BOOTSTRAP_SKEW)); |
385 return $k + floor(((self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN + 1) * $delta) / ($delta + self::BOOTSTRAP_SKEW)); |
387 } |
386 } |
388 } |
387 } |