|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Json |
|
17 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
18 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
19 * @version $Id: Decoder.php 22653 2010-07-22 18:41:39Z mabe $ |
|
20 */ |
|
21 |
|
22 /** |
|
23 * @see Zend_Json |
|
24 */ |
|
25 require_once 'Zend/Json.php'; |
|
26 |
|
27 /** |
|
28 * Decode JSON encoded string to PHP variable constructs |
|
29 * |
|
30 * @category Zend |
|
31 * @package Zend_Json |
|
32 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
33 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
34 */ |
|
35 class Zend_Json_Decoder |
|
36 { |
|
37 /** |
|
38 * Parse tokens used to decode the JSON object. These are not |
|
39 * for public consumption, they are just used internally to the |
|
40 * class. |
|
41 */ |
|
42 const EOF = 0; |
|
43 const DATUM = 1; |
|
44 const LBRACE = 2; |
|
45 const LBRACKET = 3; |
|
46 const RBRACE = 4; |
|
47 const RBRACKET = 5; |
|
48 const COMMA = 6; |
|
49 const COLON = 7; |
|
50 |
|
51 /** |
|
52 * Use to maintain a "pointer" to the source being decoded |
|
53 * |
|
54 * @var string |
|
55 */ |
|
56 protected $_source; |
|
57 |
|
58 /** |
|
59 * Caches the source length |
|
60 * |
|
61 * @var int |
|
62 */ |
|
63 protected $_sourceLength; |
|
64 |
|
65 /** |
|
66 * The offset within the souce being decoded |
|
67 * |
|
68 * @var int |
|
69 * |
|
70 */ |
|
71 protected $_offset; |
|
72 |
|
73 /** |
|
74 * The current token being considered in the parser cycle |
|
75 * |
|
76 * @var int |
|
77 */ |
|
78 protected $_token; |
|
79 |
|
80 /** |
|
81 * Flag indicating how objects should be decoded |
|
82 * |
|
83 * @var int |
|
84 * @access protected |
|
85 */ |
|
86 protected $_decodeType; |
|
87 |
|
88 /** |
|
89 * Constructor |
|
90 * |
|
91 * @param string $source String source to decode |
|
92 * @param int $decodeType How objects should be decoded -- see |
|
93 * {@link Zend_Json::TYPE_ARRAY} and {@link Zend_Json::TYPE_OBJECT} for |
|
94 * valid values |
|
95 * @return void |
|
96 */ |
|
97 protected function __construct($source, $decodeType) |
|
98 { |
|
99 // Set defaults |
|
100 $this->_source = self::decodeUnicodeString($source); |
|
101 $this->_sourceLength = strlen($this->_source); |
|
102 $this->_token = self::EOF; |
|
103 $this->_offset = 0; |
|
104 |
|
105 // Normalize and set $decodeType |
|
106 if (!in_array($decodeType, array(Zend_Json::TYPE_ARRAY, Zend_Json::TYPE_OBJECT))) |
|
107 { |
|
108 $decodeType = Zend_Json::TYPE_ARRAY; |
|
109 } |
|
110 $this->_decodeType = $decodeType; |
|
111 |
|
112 // Set pointer at first token |
|
113 $this->_getNextToken(); |
|
114 } |
|
115 |
|
116 /** |
|
117 * Decode a JSON source string |
|
118 * |
|
119 * Decodes a JSON encoded string. The value returned will be one of the |
|
120 * following: |
|
121 * - integer |
|
122 * - float |
|
123 * - boolean |
|
124 * - null |
|
125 * - StdClass |
|
126 * - array |
|
127 * - array of one or more of the above types |
|
128 * |
|
129 * By default, decoded objects will be returned as associative arrays; to |
|
130 * return a StdClass object instead, pass {@link Zend_Json::TYPE_OBJECT} to |
|
131 * the $objectDecodeType parameter. |
|
132 * |
|
133 * Throws a Zend_Json_Exception if the source string is null. |
|
134 * |
|
135 * @static |
|
136 * @access public |
|
137 * @param string $source String to be decoded |
|
138 * @param int $objectDecodeType How objects should be decoded; should be |
|
139 * either or {@link Zend_Json::TYPE_ARRAY} or |
|
140 * {@link Zend_Json::TYPE_OBJECT}; defaults to TYPE_ARRAY |
|
141 * @return mixed |
|
142 * @throws Zend_Json_Exception |
|
143 */ |
|
144 public static function decode($source = null, $objectDecodeType = Zend_Json::TYPE_ARRAY) |
|
145 { |
|
146 if (null === $source) { |
|
147 require_once 'Zend/Json/Exception.php'; |
|
148 throw new Zend_Json_Exception('Must specify JSON encoded source for decoding'); |
|
149 } elseif (!is_string($source)) { |
|
150 require_once 'Zend/Json/Exception.php'; |
|
151 throw new Zend_Json_Exception('Can only decode JSON encoded strings'); |
|
152 } |
|
153 |
|
154 $decoder = new self($source, $objectDecodeType); |
|
155 |
|
156 return $decoder->_decodeValue(); |
|
157 } |
|
158 |
|
159 |
|
160 /** |
|
161 * Recursive driving rountine for supported toplevel tops |
|
162 * |
|
163 * @return mixed |
|
164 */ |
|
165 protected function _decodeValue() |
|
166 { |
|
167 switch ($this->_token) { |
|
168 case self::DATUM: |
|
169 $result = $this->_tokenValue; |
|
170 $this->_getNextToken(); |
|
171 return($result); |
|
172 break; |
|
173 case self::LBRACE: |
|
174 return($this->_decodeObject()); |
|
175 break; |
|
176 case self::LBRACKET: |
|
177 return($this->_decodeArray()); |
|
178 break; |
|
179 default: |
|
180 return null; |
|
181 break; |
|
182 } |
|
183 } |
|
184 |
|
185 /** |
|
186 * Decodes an object of the form: |
|
187 * { "attribute: value, "attribute2" : value,...} |
|
188 * |
|
189 * If Zend_Json_Encoder was used to encode the original object then |
|
190 * a special attribute called __className which specifies a class |
|
191 * name that should wrap the data contained within the encoded source. |
|
192 * |
|
193 * Decodes to either an array or StdClass object, based on the value of |
|
194 * {@link $_decodeType}. If invalid $_decodeType present, returns as an |
|
195 * array. |
|
196 * |
|
197 * @return array|StdClass |
|
198 */ |
|
199 protected function _decodeObject() |
|
200 { |
|
201 $members = array(); |
|
202 $tok = $this->_getNextToken(); |
|
203 |
|
204 while ($tok && $tok != self::RBRACE) { |
|
205 if ($tok != self::DATUM || ! is_string($this->_tokenValue)) { |
|
206 require_once 'Zend/Json/Exception.php'; |
|
207 throw new Zend_Json_Exception('Missing key in object encoding: ' . $this->_source); |
|
208 } |
|
209 |
|
210 $key = $this->_tokenValue; |
|
211 $tok = $this->_getNextToken(); |
|
212 |
|
213 if ($tok != self::COLON) { |
|
214 require_once 'Zend/Json/Exception.php'; |
|
215 throw new Zend_Json_Exception('Missing ":" in object encoding: ' . $this->_source); |
|
216 } |
|
217 |
|
218 $tok = $this->_getNextToken(); |
|
219 $members[$key] = $this->_decodeValue(); |
|
220 $tok = $this->_token; |
|
221 |
|
222 if ($tok == self::RBRACE) { |
|
223 break; |
|
224 } |
|
225 |
|
226 if ($tok != self::COMMA) { |
|
227 require_once 'Zend/Json/Exception.php'; |
|
228 throw new Zend_Json_Exception('Missing "," in object encoding: ' . $this->_source); |
|
229 } |
|
230 |
|
231 $tok = $this->_getNextToken(); |
|
232 } |
|
233 |
|
234 switch ($this->_decodeType) { |
|
235 case Zend_Json::TYPE_OBJECT: |
|
236 // Create new StdClass and populate with $members |
|
237 $result = new StdClass(); |
|
238 foreach ($members as $key => $value) { |
|
239 $result->$key = $value; |
|
240 } |
|
241 break; |
|
242 case Zend_Json::TYPE_ARRAY: |
|
243 default: |
|
244 $result = $members; |
|
245 break; |
|
246 } |
|
247 |
|
248 $this->_getNextToken(); |
|
249 return $result; |
|
250 } |
|
251 |
|
252 /** |
|
253 * Decodes a JSON array format: |
|
254 * [element, element2,...,elementN] |
|
255 * |
|
256 * @return array |
|
257 */ |
|
258 protected function _decodeArray() |
|
259 { |
|
260 $result = array(); |
|
261 $starttok = $tok = $this->_getNextToken(); // Move past the '[' |
|
262 $index = 0; |
|
263 |
|
264 while ($tok && $tok != self::RBRACKET) { |
|
265 $result[$index++] = $this->_decodeValue(); |
|
266 |
|
267 $tok = $this->_token; |
|
268 |
|
269 if ($tok == self::RBRACKET || !$tok) { |
|
270 break; |
|
271 } |
|
272 |
|
273 if ($tok != self::COMMA) { |
|
274 require_once 'Zend/Json/Exception.php'; |
|
275 throw new Zend_Json_Exception('Missing "," in array encoding: ' . $this->_source); |
|
276 } |
|
277 |
|
278 $tok = $this->_getNextToken(); |
|
279 } |
|
280 |
|
281 $this->_getNextToken(); |
|
282 return($result); |
|
283 } |
|
284 |
|
285 |
|
286 /** |
|
287 * Removes whitepsace characters from the source input |
|
288 */ |
|
289 protected function _eatWhitespace() |
|
290 { |
|
291 if (preg_match( |
|
292 '/([\t\b\f\n\r ])*/s', |
|
293 $this->_source, |
|
294 $matches, |
|
295 PREG_OFFSET_CAPTURE, |
|
296 $this->_offset) |
|
297 && $matches[0][1] == $this->_offset) |
|
298 { |
|
299 $this->_offset += strlen($matches[0][0]); |
|
300 } |
|
301 } |
|
302 |
|
303 |
|
304 /** |
|
305 * Retrieves the next token from the source stream |
|
306 * |
|
307 * @return int Token constant value specified in class definition |
|
308 */ |
|
309 protected function _getNextToken() |
|
310 { |
|
311 $this->_token = self::EOF; |
|
312 $this->_tokenValue = null; |
|
313 $this->_eatWhitespace(); |
|
314 |
|
315 if ($this->_offset >= $this->_sourceLength) { |
|
316 return(self::EOF); |
|
317 } |
|
318 |
|
319 $str = $this->_source; |
|
320 $str_length = $this->_sourceLength; |
|
321 $i = $this->_offset; |
|
322 $start = $i; |
|
323 |
|
324 switch ($str{$i}) { |
|
325 case '{': |
|
326 $this->_token = self::LBRACE; |
|
327 break; |
|
328 case '}': |
|
329 $this->_token = self::RBRACE; |
|
330 break; |
|
331 case '[': |
|
332 $this->_token = self::LBRACKET; |
|
333 break; |
|
334 case ']': |
|
335 $this->_token = self::RBRACKET; |
|
336 break; |
|
337 case ',': |
|
338 $this->_token = self::COMMA; |
|
339 break; |
|
340 case ':': |
|
341 $this->_token = self::COLON; |
|
342 break; |
|
343 case '"': |
|
344 $result = ''; |
|
345 do { |
|
346 $i++; |
|
347 if ($i >= $str_length) { |
|
348 break; |
|
349 } |
|
350 |
|
351 $chr = $str{$i}; |
|
352 |
|
353 if ($chr == '\\') { |
|
354 $i++; |
|
355 if ($i >= $str_length) { |
|
356 break; |
|
357 } |
|
358 $chr = $str{$i}; |
|
359 switch ($chr) { |
|
360 case '"' : |
|
361 $result .= '"'; |
|
362 break; |
|
363 case '\\': |
|
364 $result .= '\\'; |
|
365 break; |
|
366 case '/' : |
|
367 $result .= '/'; |
|
368 break; |
|
369 case 'b' : |
|
370 $result .= "\x08"; |
|
371 break; |
|
372 case 'f' : |
|
373 $result .= "\x0c"; |
|
374 break; |
|
375 case 'n' : |
|
376 $result .= "\x0a"; |
|
377 break; |
|
378 case 'r' : |
|
379 $result .= "\x0d"; |
|
380 break; |
|
381 case 't' : |
|
382 $result .= "\x09"; |
|
383 break; |
|
384 case '\'' : |
|
385 $result .= '\''; |
|
386 break; |
|
387 default: |
|
388 require_once 'Zend/Json/Exception.php'; |
|
389 throw new Zend_Json_Exception("Illegal escape " |
|
390 . "sequence '" . $chr . "'"); |
|
391 } |
|
392 } elseif($chr == '"') { |
|
393 break; |
|
394 } else { |
|
395 $result .= $chr; |
|
396 } |
|
397 } while ($i < $str_length); |
|
398 |
|
399 $this->_token = self::DATUM; |
|
400 //$this->_tokenValue = substr($str, $start + 1, $i - $start - 1); |
|
401 $this->_tokenValue = $result; |
|
402 break; |
|
403 case 't': |
|
404 if (($i+ 3) < $str_length && substr($str, $start, 4) == "true") { |
|
405 $this->_token = self::DATUM; |
|
406 } |
|
407 $this->_tokenValue = true; |
|
408 $i += 3; |
|
409 break; |
|
410 case 'f': |
|
411 if (($i+ 4) < $str_length && substr($str, $start, 5) == "false") { |
|
412 $this->_token = self::DATUM; |
|
413 } |
|
414 $this->_tokenValue = false; |
|
415 $i += 4; |
|
416 break; |
|
417 case 'n': |
|
418 if (($i+ 3) < $str_length && substr($str, $start, 4) == "null") { |
|
419 $this->_token = self::DATUM; |
|
420 } |
|
421 $this->_tokenValue = NULL; |
|
422 $i += 3; |
|
423 break; |
|
424 } |
|
425 |
|
426 if ($this->_token != self::EOF) { |
|
427 $this->_offset = $i + 1; // Consume the last token character |
|
428 return($this->_token); |
|
429 } |
|
430 |
|
431 $chr = $str{$i}; |
|
432 if ($chr == '-' || $chr == '.' || ($chr >= '0' && $chr <= '9')) { |
|
433 if (preg_match('/-?([0-9])*(\.[0-9]*)?((e|E)((-|\+)?)[0-9]+)?/s', |
|
434 $str, $matches, PREG_OFFSET_CAPTURE, $start) && $matches[0][1] == $start) { |
|
435 |
|
436 $datum = $matches[0][0]; |
|
437 |
|
438 if (is_numeric($datum)) { |
|
439 if (preg_match('/^0\d+$/', $datum)) { |
|
440 require_once 'Zend/Json/Exception.php'; |
|
441 throw new Zend_Json_Exception("Octal notation not supported by JSON (value: $datum)"); |
|
442 } else { |
|
443 $val = intval($datum); |
|
444 $fVal = floatval($datum); |
|
445 $this->_tokenValue = ($val == $fVal ? $val : $fVal); |
|
446 } |
|
447 } else { |
|
448 require_once 'Zend/Json/Exception.php'; |
|
449 throw new Zend_Json_Exception("Illegal number format: $datum"); |
|
450 } |
|
451 |
|
452 $this->_token = self::DATUM; |
|
453 $this->_offset = $start + strlen($datum); |
|
454 } |
|
455 } else { |
|
456 require_once 'Zend/Json/Exception.php'; |
|
457 throw new Zend_Json_Exception('Illegal Token'); |
|
458 } |
|
459 |
|
460 return($this->_token); |
|
461 } |
|
462 |
|
463 /** |
|
464 * Decode Unicode Characters from \u0000 ASCII syntax. |
|
465 * |
|
466 * This algorithm was originally developed for the |
|
467 * Solar Framework by Paul M. Jones |
|
468 * |
|
469 * @link http://solarphp.com/ |
|
470 * @link http://svn.solarphp.com/core/trunk/Solar/Json.php |
|
471 * @param string $value |
|
472 * @return string |
|
473 */ |
|
474 public static function decodeUnicodeString($chrs) |
|
475 { |
|
476 $delim = substr($chrs, 0, 1); |
|
477 $utf8 = ''; |
|
478 $strlen_chrs = strlen($chrs); |
|
479 |
|
480 for($i = 0; $i < $strlen_chrs; $i++) { |
|
481 |
|
482 $substr_chrs_c_2 = substr($chrs, $i, 2); |
|
483 $ord_chrs_c = ord($chrs[$i]); |
|
484 |
|
485 switch (true) { |
|
486 case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $i, 6)): |
|
487 // single, escaped unicode character |
|
488 $utf16 = chr(hexdec(substr($chrs, ($i + 2), 2))) |
|
489 . chr(hexdec(substr($chrs, ($i + 4), 2))); |
|
490 $utf8 .= self::_utf162utf8($utf16); |
|
491 $i += 5; |
|
492 break; |
|
493 case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F): |
|
494 $utf8 .= $chrs{$i}; |
|
495 break; |
|
496 case ($ord_chrs_c & 0xE0) == 0xC0: |
|
497 // characters U-00000080 - U-000007FF, mask 110XXXXX |
|
498 //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
|
499 $utf8 .= substr($chrs, $i, 2); |
|
500 ++$i; |
|
501 break; |
|
502 case ($ord_chrs_c & 0xF0) == 0xE0: |
|
503 // characters U-00000800 - U-0000FFFF, mask 1110XXXX |
|
504 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
|
505 $utf8 .= substr($chrs, $i, 3); |
|
506 $i += 2; |
|
507 break; |
|
508 case ($ord_chrs_c & 0xF8) == 0xF0: |
|
509 // characters U-00010000 - U-001FFFFF, mask 11110XXX |
|
510 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
|
511 $utf8 .= substr($chrs, $i, 4); |
|
512 $i += 3; |
|
513 break; |
|
514 case ($ord_chrs_c & 0xFC) == 0xF8: |
|
515 // characters U-00200000 - U-03FFFFFF, mask 111110XX |
|
516 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
|
517 $utf8 .= substr($chrs, $i, 5); |
|
518 $i += 4; |
|
519 break; |
|
520 case ($ord_chrs_c & 0xFE) == 0xFC: |
|
521 // characters U-04000000 - U-7FFFFFFF, mask 1111110X |
|
522 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
|
523 $utf8 .= substr($chrs, $i, 6); |
|
524 $i += 5; |
|
525 break; |
|
526 } |
|
527 } |
|
528 |
|
529 return $utf8; |
|
530 } |
|
531 |
|
532 /** |
|
533 * Convert a string from one UTF-16 char to one UTF-8 char. |
|
534 * |
|
535 * Normally should be handled by mb_convert_encoding, but |
|
536 * provides a slower PHP-only method for installations |
|
537 * that lack the multibye string extension. |
|
538 * |
|
539 * This method is from the Solar Framework by Paul M. Jones |
|
540 * |
|
541 * @link http://solarphp.com |
|
542 * @param string $utf16 UTF-16 character |
|
543 * @return string UTF-8 character |
|
544 */ |
|
545 protected static function _utf162utf8($utf16) |
|
546 { |
|
547 // Check for mb extension otherwise do by hand. |
|
548 if( function_exists('mb_convert_encoding') ) { |
|
549 return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16'); |
|
550 } |
|
551 |
|
552 $bytes = (ord($utf16{0}) << 8) | ord($utf16{1}); |
|
553 |
|
554 switch (true) { |
|
555 case ((0x7F & $bytes) == $bytes): |
|
556 // this case should never be reached, because we are in ASCII range |
|
557 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
|
558 return chr(0x7F & $bytes); |
|
559 |
|
560 case (0x07FF & $bytes) == $bytes: |
|
561 // return a 2-byte UTF-8 character |
|
562 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
|
563 return chr(0xC0 | (($bytes >> 6) & 0x1F)) |
|
564 . chr(0x80 | ($bytes & 0x3F)); |
|
565 |
|
566 case (0xFFFF & $bytes) == $bytes: |
|
567 // return a 3-byte UTF-8 character |
|
568 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
|
569 return chr(0xE0 | (($bytes >> 12) & 0x0F)) |
|
570 . chr(0x80 | (($bytes >> 6) & 0x3F)) |
|
571 . chr(0x80 | ($bytes & 0x3F)); |
|
572 } |
|
573 |
|
574 // ignoring UTF-32 for now, sorry |
|
575 return ''; |
|
576 } |
|
577 } |
|
578 |