web/lib/django/utils/simplejson/encoder.py
changeset 0 0d40e90630ef
equal deleted inserted replaced
-1:000000000000 0:0d40e90630ef
       
     1 """Implementation of JSONEncoder
       
     2 """
       
     3 import re
       
     4 
       
     5 c_encode_basestring_ascii = None
       
     6 c_make_encoder = None
       
     7 
       
     8 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
       
     9 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
       
    10 HAS_UTF8 = re.compile(r'[\x80-\xff]')
       
    11 ESCAPE_DCT = {
       
    12     '\\': '\\\\',
       
    13     '"': '\\"',
       
    14     '\b': '\\b',
       
    15     '\f': '\\f',
       
    16     '\n': '\\n',
       
    17     '\r': '\\r',
       
    18     '\t': '\\t',
       
    19 }
       
    20 for i in range(0x20):
       
    21     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
       
    22 
       
    23 # Assume this produces an infinity on all machines (probably not guaranteed)
       
    24 INFINITY = float('1e66666')
       
    25 FLOAT_REPR = repr
       
    26 
       
    27 def encode_basestring(s):
       
    28     """Return a JSON representation of a Python string
       
    29 
       
    30     """
       
    31     def replace(match):
       
    32         return ESCAPE_DCT[match.group(0)]
       
    33     return '"' + ESCAPE.sub(replace, s) + '"'
       
    34 
       
    35 
       
    36 def py_encode_basestring_ascii(s):
       
    37     """Return an ASCII-only JSON representation of a Python string
       
    38 
       
    39     """
       
    40     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
       
    41         s = s.decode('utf-8')
       
    42     def replace(match):
       
    43         s = match.group(0)
       
    44         try:
       
    45             return ESCAPE_DCT[s]
       
    46         except KeyError:
       
    47             n = ord(s)
       
    48             if n < 0x10000:
       
    49                 return '\\u%04x' % (n,)
       
    50             else:
       
    51                 # surrogate pair
       
    52                 n -= 0x10000
       
    53                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
       
    54                 s2 = 0xdc00 | (n & 0x3ff)
       
    55                 return '\\u%04x\\u%04x' % (s1, s2)
       
    56     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
       
    57 
       
    58 
       
    59 encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii
       
    60 
       
    61 class JSONEncoder(object):
       
    62     """Extensible JSON <http://json.org> encoder for Python data structures.
       
    63 
       
    64     Supports the following objects and types by default:
       
    65 
       
    66     +-------------------+---------------+
       
    67     | Python            | JSON          |
       
    68     +===================+===============+
       
    69     | dict              | object        |
       
    70     +-------------------+---------------+
       
    71     | list, tuple       | array         |
       
    72     +-------------------+---------------+
       
    73     | str, unicode      | string        |
       
    74     +-------------------+---------------+
       
    75     | int, long, float  | number        |
       
    76     +-------------------+---------------+
       
    77     | True              | true          |
       
    78     +-------------------+---------------+
       
    79     | False             | false         |
       
    80     +-------------------+---------------+
       
    81     | None              | null          |
       
    82     +-------------------+---------------+
       
    83 
       
    84     To extend this to recognize other objects, subclass and implement a
       
    85     ``.default()`` method with another method that returns a serializable
       
    86     object for ``o`` if possible, otherwise it should call the superclass
       
    87     implementation (to raise ``TypeError``).
       
    88 
       
    89     """
       
    90     item_separator = ', '
       
    91     key_separator = ': '
       
    92     def __init__(self, skipkeys=False, ensure_ascii=True,
       
    93             check_circular=True, allow_nan=True, sort_keys=False,
       
    94             indent=None, separators=None, encoding='utf-8', default=None):
       
    95         """Constructor for JSONEncoder, with sensible defaults.
       
    96 
       
    97         If skipkeys is False, then it is a TypeError to attempt
       
    98         encoding of keys that are not str, int, long, float or None.  If
       
    99         skipkeys is True, such items are simply skipped.
       
   100 
       
   101         If ensure_ascii is True, the output is guaranteed to be str
       
   102         objects with all incoming unicode characters escaped.  If
       
   103         ensure_ascii is false, the output will be unicode object.
       
   104 
       
   105         If check_circular is True, then lists, dicts, and custom encoded
       
   106         objects will be checked for circular references during encoding to
       
   107         prevent an infinite recursion (which would cause an OverflowError).
       
   108         Otherwise, no such check takes place.
       
   109 
       
   110         If allow_nan is True, then NaN, Infinity, and -Infinity will be
       
   111         encoded as such.  This behavior is not JSON specification compliant,
       
   112         but is consistent with most JavaScript based encoders and decoders.
       
   113         Otherwise, it will be a ValueError to encode such floats.
       
   114 
       
   115         If sort_keys is True, then the output of dictionaries will be
       
   116         sorted by key; this is useful for regression tests to ensure
       
   117         that JSON serializations can be compared on a day-to-day basis.
       
   118 
       
   119         If indent is a non-negative integer, then JSON array
       
   120         elements and object members will be pretty-printed with that
       
   121         indent level.  An indent level of 0 will only insert newlines.
       
   122         None is the most compact representation.
       
   123 
       
   124         If specified, separators should be a (item_separator, key_separator)
       
   125         tuple.  The default is (', ', ': ').  To get the most compact JSON
       
   126         representation you should specify (',', ':') to eliminate whitespace.
       
   127 
       
   128         If specified, default is a function that gets called for objects
       
   129         that can't otherwise be serialized.  It should return a JSON encodable
       
   130         version of the object or raise a ``TypeError``.
       
   131 
       
   132         If encoding is not None, then all input strings will be
       
   133         transformed into unicode using that encoding prior to JSON-encoding.
       
   134         The default is UTF-8.
       
   135 
       
   136         """
       
   137 
       
   138         self.skipkeys = skipkeys
       
   139         self.ensure_ascii = ensure_ascii
       
   140         self.check_circular = check_circular
       
   141         self.allow_nan = allow_nan
       
   142         self.sort_keys = sort_keys
       
   143         self.indent = indent
       
   144         if separators is not None:
       
   145             self.item_separator, self.key_separator = separators
       
   146         if default is not None:
       
   147             self.default = default
       
   148         self.encoding = encoding
       
   149 
       
   150     def default(self, o):
       
   151         """Implement this method in a subclass such that it returns
       
   152         a serializable object for ``o``, or calls the base implementation
       
   153         (to raise a ``TypeError``).
       
   154 
       
   155         For example, to support arbitrary iterators, you could
       
   156         implement default like this::
       
   157 
       
   158             def default(self, o):
       
   159                 try:
       
   160                     iterable = iter(o)
       
   161                 except TypeError:
       
   162                     pass
       
   163                 else:
       
   164                     return list(iterable)
       
   165                 return JSONEncoder.default(self, o)
       
   166 
       
   167         """
       
   168         raise TypeError("%r is not JSON serializable" % (o,))
       
   169 
       
   170     def encode(self, o):
       
   171         """Return a JSON string representation of a Python data structure.
       
   172 
       
   173         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
       
   174         '{"foo": ["bar", "baz"]}'
       
   175 
       
   176         """
       
   177         # This is for extremely simple cases and benchmarks.
       
   178         if isinstance(o, basestring):
       
   179             if isinstance(o, str):
       
   180                 _encoding = self.encoding
       
   181                 if (_encoding is not None
       
   182                         and not (_encoding == 'utf-8')):
       
   183                     o = o.decode(_encoding)
       
   184             if self.ensure_ascii:
       
   185                 return encode_basestring_ascii(o)
       
   186             else:
       
   187                 return encode_basestring(o)
       
   188         # This doesn't pass the iterator directly to ''.join() because the
       
   189         # exceptions aren't as detailed.  The list call should be roughly
       
   190         # equivalent to the PySequence_Fast that ''.join() would do.
       
   191         chunks = self.iterencode(o, _one_shot=True)
       
   192         if not isinstance(chunks, (list, tuple)):
       
   193             chunks = list(chunks)
       
   194         return ''.join(chunks)
       
   195 
       
   196     def iterencode(self, o, _one_shot=False):
       
   197         """Encode the given object and yield each string
       
   198         representation as available.
       
   199 
       
   200         For example::
       
   201 
       
   202             for chunk in JSONEncoder().iterencode(bigobject):
       
   203                 mysocket.write(chunk)
       
   204 
       
   205         """
       
   206         if self.check_circular:
       
   207             markers = {}
       
   208         else:
       
   209             markers = None
       
   210         if self.ensure_ascii:
       
   211             _encoder = encode_basestring_ascii
       
   212         else:
       
   213             _encoder = encode_basestring
       
   214         if self.encoding != 'utf-8':
       
   215             def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
       
   216                 if isinstance(o, str):
       
   217                     o = o.decode(_encoding)
       
   218                 return _orig_encoder(o)
       
   219 
       
   220         def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
       
   221             # Check for specials.  Note that this type of test is processor- and/or
       
   222             # platform-specific, so do tests which don't depend on the internals.
       
   223 
       
   224             if o != o:
       
   225                 text = 'NaN'
       
   226             elif o == _inf:
       
   227                 text = 'Infinity'
       
   228             elif o == _neginf:
       
   229                 text = '-Infinity'
       
   230             else:
       
   231                 return _repr(o)
       
   232 
       
   233             if not allow_nan:
       
   234                 raise ValueError("Out of range float values are not JSON compliant: %r"
       
   235                     % (o,))
       
   236 
       
   237             return text
       
   238 
       
   239 
       
   240         if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys:
       
   241             _iterencode = c_make_encoder(
       
   242                 markers, self.default, _encoder, self.indent,
       
   243                 self.key_separator, self.item_separator, self.sort_keys,
       
   244                 self.skipkeys, self.allow_nan)
       
   245         else:
       
   246             _iterencode = _make_iterencode(
       
   247                 markers, self.default, _encoder, self.indent, floatstr,
       
   248                 self.key_separator, self.item_separator, self.sort_keys,
       
   249                 self.skipkeys, _one_shot)
       
   250         return _iterencode(o, 0)
       
   251 
       
   252 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
       
   253         ## HACK: hand-optimized bytecode; turn globals into locals
       
   254         False=False,
       
   255         True=True,
       
   256         ValueError=ValueError,
       
   257         basestring=basestring,
       
   258         dict=dict,
       
   259         float=float,
       
   260         id=id,
       
   261         int=int,
       
   262         isinstance=isinstance,
       
   263         list=list,
       
   264         long=long,
       
   265         str=str,
       
   266         tuple=tuple,
       
   267     ):
       
   268 
       
   269     def _iterencode_list(lst, _current_indent_level):
       
   270         if not lst:
       
   271             yield '[]'
       
   272             return
       
   273         if markers is not None:
       
   274             markerid = id(lst)
       
   275             if markerid in markers:
       
   276                 raise ValueError("Circular reference detected")
       
   277             markers[markerid] = lst
       
   278         buf = '['
       
   279         if _indent is not None:
       
   280             _current_indent_level += 1
       
   281             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
       
   282             separator = _item_separator + newline_indent
       
   283             buf += newline_indent
       
   284         else:
       
   285             newline_indent = None
       
   286             separator = _item_separator
       
   287         first = True
       
   288         for value in lst:
       
   289             if first:
       
   290                 first = False
       
   291             else:
       
   292                 buf = separator
       
   293             if isinstance(value, basestring):
       
   294                 yield buf + _encoder(value)
       
   295             elif value is None:
       
   296                 yield buf + 'null'
       
   297             elif value is True:
       
   298                 yield buf + 'true'
       
   299             elif value is False:
       
   300                 yield buf + 'false'
       
   301             elif isinstance(value, (int, long)):
       
   302                 yield buf + str(value)
       
   303             elif isinstance(value, float):
       
   304                 yield buf + _floatstr(value)
       
   305             else:
       
   306                 yield buf
       
   307                 if isinstance(value, (list, tuple)):
       
   308                     chunks = _iterencode_list(value, _current_indent_level)
       
   309                 elif isinstance(value, dict):
       
   310                     chunks = _iterencode_dict(value, _current_indent_level)
       
   311                 else:
       
   312                     chunks = _iterencode(value, _current_indent_level)
       
   313                 for chunk in chunks:
       
   314                     yield chunk
       
   315         if newline_indent is not None:
       
   316             _current_indent_level -= 1
       
   317             yield '\n' + (' ' * (_indent * _current_indent_level))
       
   318         yield ']'
       
   319         if markers is not None:
       
   320             del markers[markerid]
       
   321 
       
   322     def _iterencode_dict(dct, _current_indent_level):
       
   323         if not dct:
       
   324             yield '{}'
       
   325             return
       
   326         if markers is not None:
       
   327             markerid = id(dct)
       
   328             if markerid in markers:
       
   329                 raise ValueError("Circular reference detected")
       
   330             markers[markerid] = dct
       
   331         yield '{'
       
   332         if _indent is not None:
       
   333             _current_indent_level += 1
       
   334             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
       
   335             item_separator = _item_separator + newline_indent
       
   336             yield newline_indent
       
   337         else:
       
   338             newline_indent = None
       
   339             item_separator = _item_separator
       
   340         first = True
       
   341         if _sort_keys:
       
   342             items = dct.items()
       
   343             items.sort(key=lambda kv: kv[0])
       
   344         else:
       
   345             items = dct.iteritems()
       
   346         for key, value in items:
       
   347             if isinstance(key, basestring):
       
   348                 pass
       
   349             # JavaScript is weakly typed for these, so it makes sense to
       
   350             # also allow them.  Many encoders seem to do something like this.
       
   351             elif isinstance(key, float):
       
   352                 key = _floatstr(key)
       
   353             elif isinstance(key, (int, long)):
       
   354                 key = str(key)
       
   355             elif key is True:
       
   356                 key = 'true'
       
   357             elif key is False:
       
   358                 key = 'false'
       
   359             elif key is None:
       
   360                 key = 'null'
       
   361             elif _skipkeys:
       
   362                 continue
       
   363             else:
       
   364                 raise TypeError("key %r is not a string" % (key,))
       
   365             if first:
       
   366                 first = False
       
   367             else:
       
   368                 yield item_separator
       
   369             yield _encoder(key)
       
   370             yield _key_separator
       
   371             if isinstance(value, basestring):
       
   372                 yield _encoder(value)
       
   373             elif value is None:
       
   374                 yield 'null'
       
   375             elif value is True:
       
   376                 yield 'true'
       
   377             elif value is False:
       
   378                 yield 'false'
       
   379             elif isinstance(value, (int, long)):
       
   380                 yield str(value)
       
   381             elif isinstance(value, float):
       
   382                 yield _floatstr(value)
       
   383             else:
       
   384                 if isinstance(value, (list, tuple)):
       
   385                     chunks = _iterencode_list(value, _current_indent_level)
       
   386                 elif isinstance(value, dict):
       
   387                     chunks = _iterencode_dict(value, _current_indent_level)
       
   388                 else:
       
   389                     chunks = _iterencode(value, _current_indent_level)
       
   390                 for chunk in chunks:
       
   391                     yield chunk
       
   392         if newline_indent is not None:
       
   393             _current_indent_level -= 1
       
   394             yield '\n' + (' ' * (_indent * _current_indent_level))
       
   395         yield '}'
       
   396         if markers is not None:
       
   397             del markers[markerid]
       
   398 
       
   399     def _iterencode(o, _current_indent_level):
       
   400         if isinstance(o, basestring):
       
   401             yield _encoder(o)
       
   402         elif o is None:
       
   403             yield 'null'
       
   404         elif o is True:
       
   405             yield 'true'
       
   406         elif o is False:
       
   407             yield 'false'
       
   408         elif isinstance(o, (int, long)):
       
   409             yield str(o)
       
   410         elif isinstance(o, float):
       
   411             yield _floatstr(o)
       
   412         elif isinstance(o, (list, tuple)):
       
   413             for chunk in _iterencode_list(o, _current_indent_level):
       
   414                 yield chunk
       
   415         elif isinstance(o, dict):
       
   416             for chunk in _iterencode_dict(o, _current_indent_level):
       
   417                 yield chunk
       
   418         else:
       
   419             if markers is not None:
       
   420                 markerid = id(o)
       
   421                 if markerid in markers:
       
   422                     raise ValueError("Circular reference detected")
       
   423                 markers[markerid] = o
       
   424             o = _default(o)
       
   425             for chunk in _iterencode(o, _current_indent_level):
       
   426                 yield chunk
       
   427             if markers is not None:
       
   428                 del markers[markerid]
       
   429 
       
   430     return _iterencode