web/lib/django/utils/simplejson/decoder.py
changeset 0 0d40e90630ef
equal deleted inserted replaced
-1:000000000000 0:0d40e90630ef
       
     1 """Implementation of JSONDecoder
       
     2 """
       
     3 import re
       
     4 import sys
       
     5 import struct
       
     6 
       
     7 from django.utils.simplejson.scanner import make_scanner
       
     8 c_scanstring = None
       
     9 
       
    10 __all__ = ['JSONDecoder']
       
    11 
       
    12 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
       
    13 
       
    14 def _floatconstants():
       
    15     _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
       
    16     if sys.byteorder != 'big':
       
    17         _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
       
    18     nan, inf = struct.unpack('dd', _BYTES)
       
    19     return nan, inf, -inf
       
    20 
       
    21 NaN, PosInf, NegInf = _floatconstants()
       
    22 
       
    23 
       
    24 def linecol(doc, pos):
       
    25     lineno = doc.count('\n', 0, pos) + 1
       
    26     if lineno == 1:
       
    27         colno = pos
       
    28     else:
       
    29         colno = pos - doc.rindex('\n', 0, pos)
       
    30     return lineno, colno
       
    31 
       
    32 
       
    33 def errmsg(msg, doc, pos, end=None):
       
    34     # Note that this function is called from _speedups
       
    35     lineno, colno = linecol(doc, pos)
       
    36     if end is None:
       
    37         return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
       
    38     endlineno, endcolno = linecol(doc, end)
       
    39     return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
       
    40         msg, lineno, colno, endlineno, endcolno, pos, end)
       
    41 
       
    42 
       
    43 _CONSTANTS = {
       
    44     '-Infinity': NegInf,
       
    45     'Infinity': PosInf,
       
    46     'NaN': NaN,
       
    47 }
       
    48 
       
    49 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
       
    50 BACKSLASH = {
       
    51     '"': u'"', '\\': u'\\', '/': u'/',
       
    52     'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
       
    53 }
       
    54 
       
    55 DEFAULT_ENCODING = "utf-8"
       
    56 
       
    57 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
       
    58     """Scan the string s for a JSON string. End is the index of the
       
    59     character in s after the quote that started the JSON string.
       
    60     Unescapes all valid JSON string escape sequences and raises ValueError
       
    61     on attempt to decode an invalid string. If strict is False then literal
       
    62     control characters are allowed in the string.
       
    63     
       
    64     Returns a tuple of the decoded string and the index of the character in s
       
    65     after the end quote."""
       
    66     if encoding is None:
       
    67         encoding = DEFAULT_ENCODING
       
    68     chunks = []
       
    69     _append = chunks.append
       
    70     begin = end - 1
       
    71     while 1:
       
    72         chunk = _m(s, end)
       
    73         if chunk is None:
       
    74             raise ValueError(
       
    75                 errmsg("Unterminated string starting at", s, begin))
       
    76         end = chunk.end()
       
    77         content, terminator = chunk.groups()
       
    78         # Content is contains zero or more unescaped string characters
       
    79         if content:
       
    80             if not isinstance(content, unicode):
       
    81                 content = unicode(content, encoding)
       
    82             _append(content)
       
    83         # Terminator is the end of string, a literal control character,
       
    84         # or a backslash denoting that an escape sequence follows
       
    85         if terminator == '"':
       
    86             break
       
    87         elif terminator != '\\':
       
    88             if strict:
       
    89                 msg = "Invalid control character %r at" % (terminator,)
       
    90                 raise ValueError(msg, s, end)
       
    91             else:
       
    92                 _append(terminator)
       
    93                 continue
       
    94         try:
       
    95             esc = s[end]
       
    96         except IndexError:
       
    97             raise ValueError(
       
    98                 errmsg("Unterminated string starting at", s, begin))
       
    99         # If not a unicode escape sequence, must be in the lookup table
       
   100         if esc != 'u':
       
   101             try:
       
   102                 char = _b[esc]
       
   103             except KeyError:
       
   104                 raise ValueError(
       
   105                     errmsg("Invalid \\escape: %r" % (esc,), s, end))
       
   106             end += 1
       
   107         else:
       
   108             # Unicode escape sequence
       
   109             esc = s[end + 1:end + 5]
       
   110             next_end = end + 5
       
   111             if len(esc) != 4:
       
   112                 msg = "Invalid \\uXXXX escape"
       
   113                 raise ValueError(errmsg(msg, s, end))
       
   114             uni = int(esc, 16)
       
   115             # Check for surrogate pair on UCS-4 systems
       
   116             if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
       
   117                 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
       
   118                 if not s[end + 5:end + 7] == '\\u':
       
   119                     raise ValueError(errmsg(msg, s, end))
       
   120                 esc2 = s[end + 7:end + 11]
       
   121                 if len(esc2) != 4:
       
   122                     raise ValueError(errmsg(msg, s, end))
       
   123                 uni2 = int(esc2, 16)
       
   124                 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
       
   125                 next_end += 6
       
   126             char = unichr(uni)
       
   127             end = next_end
       
   128         # Append the unescaped character
       
   129         _append(char)
       
   130     return u''.join(chunks), end
       
   131 
       
   132 
       
   133 # Use speedup if available
       
   134 scanstring = c_scanstring or py_scanstring
       
   135 
       
   136 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
       
   137 WHITESPACE_STR = ' \t\n\r'
       
   138 
       
   139 def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
       
   140     pairs = {}
       
   141     # Use a slice to prevent IndexError from being raised, the following
       
   142     # check will raise a more specific ValueError if the string is empty
       
   143     nextchar = s[end:end + 1]
       
   144     # Normally we expect nextchar == '"'
       
   145     if nextchar != '"':
       
   146         if nextchar in _ws:
       
   147             end = _w(s, end).end()
       
   148             nextchar = s[end:end + 1]
       
   149         # Trivial empty object
       
   150         if nextchar == '}':
       
   151             return pairs, end + 1
       
   152         elif nextchar != '"':
       
   153             raise ValueError(errmsg("Expecting property name", s, end))
       
   154     end += 1
       
   155     while True:
       
   156         key, end = scanstring(s, end, encoding, strict)
       
   157 
       
   158         # To skip some function call overhead we optimize the fast paths where
       
   159         # the JSON key separator is ": " or just ":".
       
   160         if s[end:end + 1] != ':':
       
   161             end = _w(s, end).end()
       
   162             if s[end:end + 1] != ':':
       
   163                 raise ValueError(errmsg("Expecting : delimiter", s, end))
       
   164 
       
   165         end += 1
       
   166 
       
   167         try:
       
   168             if s[end] in _ws:
       
   169                 end += 1
       
   170                 if s[end] in _ws:
       
   171                     end = _w(s, end + 1).end()
       
   172         except IndexError:
       
   173             pass
       
   174 
       
   175         try:
       
   176             value, end = scan_once(s, end)
       
   177         except StopIteration:
       
   178             raise ValueError(errmsg("Expecting object", s, end))
       
   179         pairs[key] = value
       
   180 
       
   181         try:
       
   182             nextchar = s[end]
       
   183             if nextchar in _ws:
       
   184                 end = _w(s, end + 1).end()
       
   185                 nextchar = s[end]
       
   186         except IndexError:
       
   187             nextchar = ''
       
   188         end += 1
       
   189 
       
   190         if nextchar == '}':
       
   191             break
       
   192         elif nextchar != ',':
       
   193             raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
       
   194 
       
   195         try:
       
   196             nextchar = s[end]
       
   197             if nextchar in _ws:
       
   198                 end += 1
       
   199                 nextchar = s[end]
       
   200                 if nextchar in _ws:
       
   201                     end = _w(s, end + 1).end()
       
   202                     nextchar = s[end]
       
   203         except IndexError:
       
   204             nextchar = ''
       
   205 
       
   206         end += 1
       
   207         if nextchar != '"':
       
   208             raise ValueError(errmsg("Expecting property name", s, end - 1))
       
   209 
       
   210     if object_hook is not None:
       
   211         pairs = object_hook(pairs)
       
   212     return pairs, end
       
   213 
       
   214 def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
       
   215     values = []
       
   216     nextchar = s[end:end + 1]
       
   217     if nextchar in _ws:
       
   218         end = _w(s, end + 1).end()
       
   219         nextchar = s[end:end + 1]
       
   220     # Look-ahead for trivial empty array
       
   221     if nextchar == ']':
       
   222         return values, end + 1
       
   223     _append = values.append
       
   224     while True:
       
   225         try:
       
   226             value, end = scan_once(s, end)
       
   227         except StopIteration:
       
   228             raise ValueError(errmsg("Expecting object", s, end))
       
   229         _append(value)
       
   230         nextchar = s[end:end + 1]
       
   231         if nextchar in _ws:
       
   232             end = _w(s, end + 1).end()
       
   233             nextchar = s[end:end + 1]
       
   234         end += 1
       
   235         if nextchar == ']':
       
   236             break
       
   237         elif nextchar != ',':
       
   238             raise ValueError(errmsg("Expecting , delimiter", s, end))
       
   239 
       
   240         try:
       
   241             if s[end] in _ws:
       
   242                 end += 1
       
   243                 if s[end] in _ws:
       
   244                     end = _w(s, end + 1).end()
       
   245         except IndexError:
       
   246             pass
       
   247 
       
   248     return values, end
       
   249 
       
   250 class JSONDecoder(object):
       
   251     """Simple JSON <http://json.org> decoder
       
   252 
       
   253     Performs the following translations in decoding by default:
       
   254 
       
   255     +---------------+-------------------+
       
   256     | JSON          | Python            |
       
   257     +===============+===================+
       
   258     | object        | dict              |
       
   259     +---------------+-------------------+
       
   260     | array         | list              |
       
   261     +---------------+-------------------+
       
   262     | string        | unicode           |
       
   263     +---------------+-------------------+
       
   264     | number (int)  | int, long         |
       
   265     +---------------+-------------------+
       
   266     | number (real) | float             |
       
   267     +---------------+-------------------+
       
   268     | true          | True              |
       
   269     +---------------+-------------------+
       
   270     | false         | False             |
       
   271     +---------------+-------------------+
       
   272     | null          | None              |
       
   273     +---------------+-------------------+
       
   274 
       
   275     It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
       
   276     their corresponding ``float`` values, which is outside the JSON spec.
       
   277 
       
   278     """
       
   279 
       
   280     def __init__(self, encoding=None, object_hook=None, parse_float=None,
       
   281             parse_int=None, parse_constant=None, strict=True):
       
   282         """``encoding`` determines the encoding used to interpret any ``str``
       
   283         objects decoded by this instance (utf-8 by default).  It has no
       
   284         effect when decoding ``unicode`` objects.
       
   285 
       
   286         Note that currently only encodings that are a superset of ASCII work,
       
   287         strings of other encodings should be passed in as ``unicode``.
       
   288 
       
   289         ``object_hook``, if specified, will be called with the result
       
   290         of every JSON object decoded and its return value will be used in
       
   291         place of the given ``dict``.  This can be used to provide custom
       
   292         deserializations (e.g. to support JSON-RPC class hinting).
       
   293 
       
   294         ``parse_float``, if specified, will be called with the string
       
   295         of every JSON float to be decoded. By default this is equivalent to
       
   296         float(num_str). This can be used to use another datatype or parser
       
   297         for JSON floats (e.g. decimal.Decimal).
       
   298 
       
   299         ``parse_int``, if specified, will be called with the string
       
   300         of every JSON int to be decoded. By default this is equivalent to
       
   301         int(num_str). This can be used to use another datatype or parser
       
   302         for JSON integers (e.g. float).
       
   303 
       
   304         ``parse_constant``, if specified, will be called with one of the
       
   305         following strings: -Infinity, Infinity, NaN.
       
   306         This can be used to raise an exception if invalid JSON numbers
       
   307         are encountered.
       
   308 
       
   309         """
       
   310         self.encoding = encoding
       
   311         self.object_hook = object_hook
       
   312         self.parse_float = parse_float or float
       
   313         self.parse_int = parse_int or int
       
   314         self.parse_constant = parse_constant or _CONSTANTS.__getitem__
       
   315         self.strict = strict
       
   316         self.parse_object = JSONObject
       
   317         self.parse_array = JSONArray
       
   318         self.parse_string = scanstring
       
   319         self.scan_once = make_scanner(self)
       
   320 
       
   321     def decode(self, s, _w=WHITESPACE.match):
       
   322         """Return the Python representation of ``s`` (a ``str`` or ``unicode``
       
   323         instance containing a JSON document)
       
   324 
       
   325         """
       
   326         obj, end = self.raw_decode(s, idx=_w(s, 0).end())
       
   327         end = _w(s, end).end()
       
   328         if end != len(s):
       
   329             raise ValueError(errmsg("Extra data", s, end, len(s)))
       
   330         return obj
       
   331 
       
   332     def raw_decode(self, s, idx=0):
       
   333         """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
       
   334         with a JSON document) and return a 2-tuple of the Python
       
   335         representation and the index in ``s`` where the document ended.
       
   336 
       
   337         This can be used to decode a JSON document from a string that may
       
   338         have extraneous data at the end.
       
   339 
       
   340         """
       
   341         try:
       
   342             obj, end = self.scan_once(s, idx)
       
   343         except StopIteration:
       
   344             raise ValueError("No JSON object could be decoded")
       
   345         return obj, end