web/lib/Zend/Json/Decoder.php
changeset 64 162c1de6545a
parent 19 1c2f13fd785c
child 68 ecaf28ffe26e
equal deleted inserted replaced
63:5b37998e522e 64:162c1de6545a
       
     1 <?php
       
     2 /**
       
     3  * Zend Framework
       
     4  *
       
     5  * LICENSE
       
     6  *
       
     7  * This source file is subject to the new BSD license that is bundled
       
     8  * with this package in the file LICENSE.txt.
       
     9  * It is also available through the world-wide-web at this URL:
       
    10  * http://framework.zend.com/license/new-bsd
       
    11  * If you did not receive a copy of the license and are unable to
       
    12  * obtain it through the world-wide-web, please send an email
       
    13  * to license@zend.com so we can send you a copy immediately.
       
    14  *
       
    15  * @category   Zend
       
    16  * @package    Zend_Json
       
    17  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    18  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    19  * @version    $Id: Decoder.php 22653 2010-07-22 18:41:39Z mabe $
       
    20  */
       
    21 
       
    22 /**
       
    23  * @see Zend_Json
       
    24  */
       
    25 require_once 'Zend/Json.php';
       
    26 
       
    27 /**
       
    28  * Decode JSON encoded string to PHP variable constructs
       
    29  *
       
    30  * @category   Zend
       
    31  * @package    Zend_Json
       
    32  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    33  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    34  */
       
    35 class Zend_Json_Decoder
       
    36 {
       
    37     /**
       
    38      * Parse tokens used to decode the JSON object. These are not
       
    39      * for public consumption, they are just used internally to the
       
    40      * class.
       
    41      */
       
    42     const EOF         = 0;
       
    43     const DATUM        = 1;
       
    44     const LBRACE    = 2;
       
    45     const LBRACKET    = 3;
       
    46     const RBRACE     = 4;
       
    47     const RBRACKET    = 5;
       
    48     const COMMA       = 6;
       
    49     const COLON        = 7;
       
    50 
       
    51     /**
       
    52      * Use to maintain a "pointer" to the source being decoded
       
    53      *
       
    54      * @var string
       
    55      */
       
    56     protected $_source;
       
    57 
       
    58     /**
       
    59      * Caches the source length
       
    60      *
       
    61      * @var int
       
    62      */
       
    63     protected $_sourceLength;
       
    64 
       
    65     /**
       
    66      * The offset within the souce being decoded
       
    67      *
       
    68      * @var int
       
    69      *
       
    70      */
       
    71     protected $_offset;
       
    72 
       
    73     /**
       
    74      * The current token being considered in the parser cycle
       
    75      *
       
    76      * @var int
       
    77      */
       
    78     protected $_token;
       
    79 
       
    80     /**
       
    81      * Flag indicating how objects should be decoded
       
    82      *
       
    83      * @var int
       
    84      * @access protected
       
    85      */
       
    86     protected $_decodeType;
       
    87 
       
    88     /**
       
    89      * Constructor
       
    90      *
       
    91      * @param string $source String source to decode
       
    92      * @param int $decodeType How objects should be decoded -- see
       
    93      * {@link Zend_Json::TYPE_ARRAY} and {@link Zend_Json::TYPE_OBJECT} for
       
    94      * valid values
       
    95      * @return void
       
    96      */
       
    97     protected function __construct($source, $decodeType)
       
    98     {
       
    99         // Set defaults
       
   100         $this->_source       = self::decodeUnicodeString($source);
       
   101         $this->_sourceLength = strlen($this->_source);
       
   102         $this->_token        = self::EOF;
       
   103         $this->_offset       = 0;
       
   104 
       
   105         // Normalize and set $decodeType
       
   106         if (!in_array($decodeType, array(Zend_Json::TYPE_ARRAY, Zend_Json::TYPE_OBJECT)))
       
   107         {
       
   108             $decodeType = Zend_Json::TYPE_ARRAY;
       
   109         }
       
   110         $this->_decodeType   = $decodeType;
       
   111 
       
   112         // Set pointer at first token
       
   113         $this->_getNextToken();
       
   114     }
       
   115 
       
   116     /**
       
   117      * Decode a JSON source string
       
   118      *
       
   119      * Decodes a JSON encoded string. The value returned will be one of the
       
   120      * following:
       
   121      *        - integer
       
   122      *        - float
       
   123      *        - boolean
       
   124      *        - null
       
   125      *      - StdClass
       
   126      *      - array
       
   127      *         - array of one or more of the above types
       
   128      *
       
   129      * By default, decoded objects will be returned as associative arrays; to
       
   130      * return a StdClass object instead, pass {@link Zend_Json::TYPE_OBJECT} to
       
   131      * the $objectDecodeType parameter.
       
   132      *
       
   133      * Throws a Zend_Json_Exception if the source string is null.
       
   134      *
       
   135      * @static
       
   136      * @access public
       
   137      * @param string $source String to be decoded
       
   138      * @param int $objectDecodeType How objects should be decoded; should be
       
   139      * either or {@link Zend_Json::TYPE_ARRAY} or
       
   140      * {@link Zend_Json::TYPE_OBJECT}; defaults to TYPE_ARRAY
       
   141      * @return mixed
       
   142      * @throws Zend_Json_Exception
       
   143      */
       
   144     public static function decode($source = null, $objectDecodeType = Zend_Json::TYPE_ARRAY)
       
   145     {
       
   146         if (null === $source) {
       
   147             require_once 'Zend/Json/Exception.php';
       
   148             throw new Zend_Json_Exception('Must specify JSON encoded source for decoding');
       
   149         } elseif (!is_string($source)) {
       
   150             require_once 'Zend/Json/Exception.php';
       
   151             throw new Zend_Json_Exception('Can only decode JSON encoded strings');
       
   152         }
       
   153 
       
   154         $decoder = new self($source, $objectDecodeType);
       
   155 
       
   156         return $decoder->_decodeValue();
       
   157     }
       
   158 
       
   159 
       
   160     /**
       
   161      * Recursive driving rountine for supported toplevel tops
       
   162      *
       
   163      * @return mixed
       
   164      */
       
   165     protected function _decodeValue()
       
   166     {
       
   167         switch ($this->_token) {
       
   168             case self::DATUM:
       
   169                 $result  = $this->_tokenValue;
       
   170                 $this->_getNextToken();
       
   171                 return($result);
       
   172                 break;
       
   173             case self::LBRACE:
       
   174                 return($this->_decodeObject());
       
   175                 break;
       
   176             case self::LBRACKET:
       
   177                 return($this->_decodeArray());
       
   178                 break;
       
   179             default:
       
   180                 return null;
       
   181                 break;
       
   182         }
       
   183     }
       
   184 
       
   185     /**
       
   186      * Decodes an object of the form:
       
   187      *  { "attribute: value, "attribute2" : value,...}
       
   188      *
       
   189      * If Zend_Json_Encoder was used to encode the original object then
       
   190      * a special attribute called __className which specifies a class
       
   191      * name that should wrap the data contained within the encoded source.
       
   192      *
       
   193      * Decodes to either an array or StdClass object, based on the value of
       
   194      * {@link $_decodeType}. If invalid $_decodeType present, returns as an
       
   195      * array.
       
   196      *
       
   197      * @return array|StdClass
       
   198      */
       
   199     protected function _decodeObject()
       
   200     {
       
   201         $members = array();
       
   202         $tok = $this->_getNextToken();
       
   203 
       
   204         while ($tok && $tok != self::RBRACE) {
       
   205             if ($tok != self::DATUM || ! is_string($this->_tokenValue)) {
       
   206                 require_once 'Zend/Json/Exception.php';
       
   207                 throw new Zend_Json_Exception('Missing key in object encoding: ' . $this->_source);
       
   208             }
       
   209 
       
   210             $key = $this->_tokenValue;
       
   211             $tok = $this->_getNextToken();
       
   212 
       
   213             if ($tok != self::COLON) {
       
   214                 require_once 'Zend/Json/Exception.php';
       
   215                 throw new Zend_Json_Exception('Missing ":" in object encoding: ' . $this->_source);
       
   216             }
       
   217 
       
   218             $tok = $this->_getNextToken();
       
   219             $members[$key] = $this->_decodeValue();
       
   220             $tok = $this->_token;
       
   221 
       
   222             if ($tok == self::RBRACE) {
       
   223                 break;
       
   224             }
       
   225 
       
   226             if ($tok != self::COMMA) {
       
   227                 require_once 'Zend/Json/Exception.php';
       
   228                 throw new Zend_Json_Exception('Missing "," in object encoding: ' . $this->_source);
       
   229             }
       
   230 
       
   231             $tok = $this->_getNextToken();
       
   232         }
       
   233 
       
   234         switch ($this->_decodeType) {
       
   235             case Zend_Json::TYPE_OBJECT:
       
   236                 // Create new StdClass and populate with $members
       
   237                 $result = new StdClass();
       
   238                 foreach ($members as $key => $value) {
       
   239                     $result->$key = $value;
       
   240                 }
       
   241                 break;
       
   242             case Zend_Json::TYPE_ARRAY:
       
   243             default:
       
   244                 $result = $members;
       
   245                 break;
       
   246         }
       
   247 
       
   248         $this->_getNextToken();
       
   249         return $result;
       
   250     }
       
   251 
       
   252     /**
       
   253      * Decodes a JSON array format:
       
   254      *    [element, element2,...,elementN]
       
   255      *
       
   256      * @return array
       
   257      */
       
   258     protected function _decodeArray()
       
   259     {
       
   260         $result = array();
       
   261         $starttok = $tok = $this->_getNextToken(); // Move past the '['
       
   262         $index  = 0;
       
   263 
       
   264         while ($tok && $tok != self::RBRACKET) {
       
   265             $result[$index++] = $this->_decodeValue();
       
   266 
       
   267             $tok = $this->_token;
       
   268 
       
   269             if ($tok == self::RBRACKET || !$tok) {
       
   270                 break;
       
   271             }
       
   272 
       
   273             if ($tok != self::COMMA) {
       
   274                 require_once 'Zend/Json/Exception.php';
       
   275                 throw new Zend_Json_Exception('Missing "," in array encoding: ' . $this->_source);
       
   276             }
       
   277 
       
   278             $tok = $this->_getNextToken();
       
   279         }
       
   280 
       
   281         $this->_getNextToken();
       
   282         return($result);
       
   283     }
       
   284 
       
   285 
       
   286     /**
       
   287      * Removes whitepsace characters from the source input
       
   288      */
       
   289     protected function _eatWhitespace()
       
   290     {
       
   291         if (preg_match(
       
   292                 '/([\t\b\f\n\r ])*/s',
       
   293                 $this->_source,
       
   294                 $matches,
       
   295                 PREG_OFFSET_CAPTURE,
       
   296                 $this->_offset)
       
   297             && $matches[0][1] == $this->_offset)
       
   298         {
       
   299             $this->_offset += strlen($matches[0][0]);
       
   300         }
       
   301     }
       
   302 
       
   303 
       
   304     /**
       
   305      * Retrieves the next token from the source stream
       
   306      *
       
   307      * @return int Token constant value specified in class definition
       
   308      */
       
   309     protected function _getNextToken()
       
   310     {
       
   311         $this->_token      = self::EOF;
       
   312         $this->_tokenValue = null;
       
   313         $this->_eatWhitespace();
       
   314 
       
   315         if ($this->_offset >= $this->_sourceLength) {
       
   316             return(self::EOF);
       
   317         }
       
   318 
       
   319         $str        = $this->_source;
       
   320         $str_length = $this->_sourceLength;
       
   321         $i          = $this->_offset;
       
   322         $start      = $i;
       
   323 
       
   324         switch ($str{$i}) {
       
   325             case '{':
       
   326                $this->_token = self::LBRACE;
       
   327                break;
       
   328             case '}':
       
   329                 $this->_token = self::RBRACE;
       
   330                 break;
       
   331             case '[':
       
   332                 $this->_token = self::LBRACKET;
       
   333                 break;
       
   334             case ']':
       
   335                 $this->_token = self::RBRACKET;
       
   336                 break;
       
   337             case ',':
       
   338                 $this->_token = self::COMMA;
       
   339                 break;
       
   340             case ':':
       
   341                 $this->_token = self::COLON;
       
   342                 break;
       
   343             case  '"':
       
   344                 $result = '';
       
   345                 do {
       
   346                     $i++;
       
   347                     if ($i >= $str_length) {
       
   348                         break;
       
   349                     }
       
   350 
       
   351                     $chr = $str{$i};
       
   352 
       
   353                     if ($chr == '\\') {
       
   354                         $i++;
       
   355                         if ($i >= $str_length) {
       
   356                             break;
       
   357                         }
       
   358                         $chr = $str{$i};
       
   359                         switch ($chr) {
       
   360                             case '"' :
       
   361                                 $result .= '"';
       
   362                                 break;
       
   363                             case '\\':
       
   364                                 $result .= '\\';
       
   365                                 break;
       
   366                             case '/' :
       
   367                                 $result .= '/';
       
   368                                 break;
       
   369                             case 'b' :
       
   370                                 $result .= "\x08";
       
   371                                 break;
       
   372                             case 'f' :
       
   373                                 $result .= "\x0c";
       
   374                                 break;
       
   375                             case 'n' :
       
   376                                 $result .= "\x0a";
       
   377                                 break;
       
   378                             case 'r' :
       
   379                                 $result .= "\x0d";
       
   380                                 break;
       
   381                             case 't' :
       
   382                                 $result .= "\x09";
       
   383                                 break;
       
   384                             case '\'' :
       
   385                                 $result .= '\'';
       
   386                                 break;
       
   387                             default:
       
   388                                 require_once 'Zend/Json/Exception.php';
       
   389                                 throw new Zend_Json_Exception("Illegal escape "
       
   390                                     .  "sequence '" . $chr . "'");
       
   391                         }
       
   392                     } elseif($chr == '"') {
       
   393                         break;
       
   394                     } else {
       
   395                         $result .= $chr;
       
   396                     }
       
   397                 } while ($i < $str_length);
       
   398 
       
   399                 $this->_token = self::DATUM;
       
   400                 //$this->_tokenValue = substr($str, $start + 1, $i - $start - 1);
       
   401                 $this->_tokenValue = $result;
       
   402                 break;
       
   403             case 't':
       
   404                 if (($i+ 3) < $str_length && substr($str, $start, 4) == "true") {
       
   405                     $this->_token = self::DATUM;
       
   406                 }
       
   407                 $this->_tokenValue = true;
       
   408                 $i += 3;
       
   409                 break;
       
   410             case 'f':
       
   411                 if (($i+ 4) < $str_length && substr($str, $start, 5) == "false") {
       
   412                     $this->_token = self::DATUM;
       
   413                 }
       
   414                 $this->_tokenValue = false;
       
   415                 $i += 4;
       
   416                 break;
       
   417             case 'n':
       
   418                 if (($i+ 3) < $str_length && substr($str, $start, 4) == "null") {
       
   419                     $this->_token = self::DATUM;
       
   420                 }
       
   421                 $this->_tokenValue = NULL;
       
   422                 $i += 3;
       
   423                 break;
       
   424         }
       
   425 
       
   426         if ($this->_token != self::EOF) {
       
   427             $this->_offset = $i + 1; // Consume the last token character
       
   428             return($this->_token);
       
   429         }
       
   430 
       
   431         $chr = $str{$i};
       
   432         if ($chr == '-' || $chr == '.' || ($chr >= '0' && $chr <= '9')) {
       
   433             if (preg_match('/-?([0-9])*(\.[0-9]*)?((e|E)((-|\+)?)[0-9]+)?/s',
       
   434                 $str, $matches, PREG_OFFSET_CAPTURE, $start) && $matches[0][1] == $start) {
       
   435 
       
   436                 $datum = $matches[0][0];
       
   437 
       
   438                 if (is_numeric($datum)) {
       
   439                     if (preg_match('/^0\d+$/', $datum)) {
       
   440                         require_once 'Zend/Json/Exception.php';
       
   441                         throw new Zend_Json_Exception("Octal notation not supported by JSON (value: $datum)");
       
   442                     } else {
       
   443                         $val  = intval($datum);
       
   444                         $fVal = floatval($datum);
       
   445                         $this->_tokenValue = ($val == $fVal ? $val : $fVal);
       
   446                     }
       
   447                 } else {
       
   448                     require_once 'Zend/Json/Exception.php';
       
   449                     throw new Zend_Json_Exception("Illegal number format: $datum");
       
   450                 }
       
   451 
       
   452                 $this->_token = self::DATUM;
       
   453                 $this->_offset = $start + strlen($datum);
       
   454             }
       
   455         } else {
       
   456             require_once 'Zend/Json/Exception.php';
       
   457             throw new Zend_Json_Exception('Illegal Token');
       
   458         }
       
   459 
       
   460         return($this->_token);
       
   461     }
       
   462 
       
   463     /**
       
   464      * Decode Unicode Characters from \u0000 ASCII syntax.
       
   465      *
       
   466      * This algorithm was originally developed for the
       
   467      * Solar Framework by Paul M. Jones
       
   468      *
       
   469      * @link   http://solarphp.com/
       
   470      * @link   http://svn.solarphp.com/core/trunk/Solar/Json.php
       
   471      * @param  string $value
       
   472      * @return string
       
   473      */
       
   474     public static function decodeUnicodeString($chrs)
       
   475     {
       
   476         $delim       = substr($chrs, 0, 1);
       
   477         $utf8        = '';
       
   478         $strlen_chrs = strlen($chrs);
       
   479 
       
   480         for($i = 0; $i < $strlen_chrs; $i++) {
       
   481 
       
   482             $substr_chrs_c_2 = substr($chrs, $i, 2);
       
   483             $ord_chrs_c = ord($chrs[$i]);
       
   484 
       
   485             switch (true) {
       
   486                 case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $i, 6)):
       
   487                     // single, escaped unicode character
       
   488                     $utf16 = chr(hexdec(substr($chrs, ($i + 2), 2)))
       
   489                            . chr(hexdec(substr($chrs, ($i + 4), 2)));
       
   490                     $utf8 .= self::_utf162utf8($utf16);
       
   491                     $i += 5;
       
   492                     break;
       
   493                 case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
       
   494                     $utf8 .= $chrs{$i};
       
   495                     break;
       
   496                 case ($ord_chrs_c & 0xE0) == 0xC0:
       
   497                     // characters U-00000080 - U-000007FF, mask 110XXXXX
       
   498                     //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
       
   499                     $utf8 .= substr($chrs, $i, 2);
       
   500                     ++$i;
       
   501                     break;
       
   502                 case ($ord_chrs_c & 0xF0) == 0xE0:
       
   503                     // characters U-00000800 - U-0000FFFF, mask 1110XXXX
       
   504                     // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
       
   505                     $utf8 .= substr($chrs, $i, 3);
       
   506                     $i += 2;
       
   507                     break;
       
   508                 case ($ord_chrs_c & 0xF8) == 0xF0:
       
   509                     // characters U-00010000 - U-001FFFFF, mask 11110XXX
       
   510                     // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
       
   511                     $utf8 .= substr($chrs, $i, 4);
       
   512                     $i += 3;
       
   513                     break;
       
   514                 case ($ord_chrs_c & 0xFC) == 0xF8:
       
   515                     // characters U-00200000 - U-03FFFFFF, mask 111110XX
       
   516                     // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
       
   517                     $utf8 .= substr($chrs, $i, 5);
       
   518                     $i += 4;
       
   519                     break;
       
   520                 case ($ord_chrs_c & 0xFE) == 0xFC:
       
   521                     // characters U-04000000 - U-7FFFFFFF, mask 1111110X
       
   522                     // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
       
   523                     $utf8 .= substr($chrs, $i, 6);
       
   524                     $i += 5;
       
   525                     break;
       
   526             }
       
   527         }
       
   528 
       
   529         return $utf8;
       
   530     }
       
   531 
       
   532     /**
       
   533      * Convert a string from one UTF-16 char to one UTF-8 char.
       
   534      *
       
   535      * Normally should be handled by mb_convert_encoding, but
       
   536      * provides a slower PHP-only method for installations
       
   537      * that lack the multibye string extension.
       
   538      *
       
   539      * This method is from the Solar Framework by Paul M. Jones
       
   540      *
       
   541      * @link   http://solarphp.com
       
   542      * @param  string $utf16 UTF-16 character
       
   543      * @return string UTF-8 character
       
   544      */
       
   545     protected static function _utf162utf8($utf16)
       
   546     {
       
   547         // Check for mb extension otherwise do by hand.
       
   548         if( function_exists('mb_convert_encoding') ) {
       
   549             return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
       
   550         }
       
   551 
       
   552         $bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
       
   553 
       
   554         switch (true) {
       
   555             case ((0x7F & $bytes) == $bytes):
       
   556                 // this case should never be reached, because we are in ASCII range
       
   557                 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
       
   558                 return chr(0x7F & $bytes);
       
   559 
       
   560             case (0x07FF & $bytes) == $bytes:
       
   561                 // return a 2-byte UTF-8 character
       
   562                 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
       
   563                 return chr(0xC0 | (($bytes >> 6) & 0x1F))
       
   564                      . chr(0x80 | ($bytes & 0x3F));
       
   565 
       
   566             case (0xFFFF & $bytes) == $bytes:
       
   567                 // return a 3-byte UTF-8 character
       
   568                 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
       
   569                 return chr(0xE0 | (($bytes >> 12) & 0x0F))
       
   570                      . chr(0x80 | (($bytes >> 6) & 0x3F))
       
   571                      . chr(0x80 | ($bytes & 0x3F));
       
   572         }
       
   573 
       
   574         // ignoring UTF-32 for now, sorry
       
   575         return '';
       
   576     }
       
   577 }
       
   578