|
1 """Implementation of JSONDecoder |
|
2 """ |
|
3 import re |
|
4 import sys |
|
5 import struct |
|
6 |
|
7 from django.utils.simplejson.scanner import make_scanner |
|
8 c_scanstring = None |
|
9 |
|
10 __all__ = ['JSONDecoder'] |
|
11 |
|
12 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL |
|
13 |
|
14 def _floatconstants(): |
|
15 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') |
|
16 if sys.byteorder != 'big': |
|
17 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] |
|
18 nan, inf = struct.unpack('dd', _BYTES) |
|
19 return nan, inf, -inf |
|
20 |
|
21 NaN, PosInf, NegInf = _floatconstants() |
|
22 |
|
23 |
|
24 def linecol(doc, pos): |
|
25 lineno = doc.count('\n', 0, pos) + 1 |
|
26 if lineno == 1: |
|
27 colno = pos |
|
28 else: |
|
29 colno = pos - doc.rindex('\n', 0, pos) |
|
30 return lineno, colno |
|
31 |
|
32 |
|
33 def errmsg(msg, doc, pos, end=None): |
|
34 # Note that this function is called from _speedups |
|
35 lineno, colno = linecol(doc, pos) |
|
36 if end is None: |
|
37 return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) |
|
38 endlineno, endcolno = linecol(doc, end) |
|
39 return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( |
|
40 msg, lineno, colno, endlineno, endcolno, pos, end) |
|
41 |
|
42 |
|
43 _CONSTANTS = { |
|
44 '-Infinity': NegInf, |
|
45 'Infinity': PosInf, |
|
46 'NaN': NaN, |
|
47 } |
|
48 |
|
49 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) |
|
50 BACKSLASH = { |
|
51 '"': u'"', '\\': u'\\', '/': u'/', |
|
52 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', |
|
53 } |
|
54 |
|
55 DEFAULT_ENCODING = "utf-8" |
|
56 |
|
57 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): |
|
58 """Scan the string s for a JSON string. End is the index of the |
|
59 character in s after the quote that started the JSON string. |
|
60 Unescapes all valid JSON string escape sequences and raises ValueError |
|
61 on attempt to decode an invalid string. If strict is False then literal |
|
62 control characters are allowed in the string. |
|
63 |
|
64 Returns a tuple of the decoded string and the index of the character in s |
|
65 after the end quote.""" |
|
66 if encoding is None: |
|
67 encoding = DEFAULT_ENCODING |
|
68 chunks = [] |
|
69 _append = chunks.append |
|
70 begin = end - 1 |
|
71 while 1: |
|
72 chunk = _m(s, end) |
|
73 if chunk is None: |
|
74 raise ValueError( |
|
75 errmsg("Unterminated string starting at", s, begin)) |
|
76 end = chunk.end() |
|
77 content, terminator = chunk.groups() |
|
78 # Content is contains zero or more unescaped string characters |
|
79 if content: |
|
80 if not isinstance(content, unicode): |
|
81 content = unicode(content, encoding) |
|
82 _append(content) |
|
83 # Terminator is the end of string, a literal control character, |
|
84 # or a backslash denoting that an escape sequence follows |
|
85 if terminator == '"': |
|
86 break |
|
87 elif terminator != '\\': |
|
88 if strict: |
|
89 msg = "Invalid control character %r at" % (terminator,) |
|
90 raise ValueError(msg, s, end) |
|
91 else: |
|
92 _append(terminator) |
|
93 continue |
|
94 try: |
|
95 esc = s[end] |
|
96 except IndexError: |
|
97 raise ValueError( |
|
98 errmsg("Unterminated string starting at", s, begin)) |
|
99 # If not a unicode escape sequence, must be in the lookup table |
|
100 if esc != 'u': |
|
101 try: |
|
102 char = _b[esc] |
|
103 except KeyError: |
|
104 raise ValueError( |
|
105 errmsg("Invalid \\escape: %r" % (esc,), s, end)) |
|
106 end += 1 |
|
107 else: |
|
108 # Unicode escape sequence |
|
109 esc = s[end + 1:end + 5] |
|
110 next_end = end + 5 |
|
111 if len(esc) != 4: |
|
112 msg = "Invalid \\uXXXX escape" |
|
113 raise ValueError(errmsg(msg, s, end)) |
|
114 uni = int(esc, 16) |
|
115 # Check for surrogate pair on UCS-4 systems |
|
116 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: |
|
117 msg = "Invalid \\uXXXX\\uXXXX surrogate pair" |
|
118 if not s[end + 5:end + 7] == '\\u': |
|
119 raise ValueError(errmsg(msg, s, end)) |
|
120 esc2 = s[end + 7:end + 11] |
|
121 if len(esc2) != 4: |
|
122 raise ValueError(errmsg(msg, s, end)) |
|
123 uni2 = int(esc2, 16) |
|
124 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) |
|
125 next_end += 6 |
|
126 char = unichr(uni) |
|
127 end = next_end |
|
128 # Append the unescaped character |
|
129 _append(char) |
|
130 return u''.join(chunks), end |
|
131 |
|
132 |
|
133 # Use speedup if available |
|
134 scanstring = c_scanstring or py_scanstring |
|
135 |
|
136 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) |
|
137 WHITESPACE_STR = ' \t\n\r' |
|
138 |
|
139 def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): |
|
140 pairs = {} |
|
141 # Use a slice to prevent IndexError from being raised, the following |
|
142 # check will raise a more specific ValueError if the string is empty |
|
143 nextchar = s[end:end + 1] |
|
144 # Normally we expect nextchar == '"' |
|
145 if nextchar != '"': |
|
146 if nextchar in _ws: |
|
147 end = _w(s, end).end() |
|
148 nextchar = s[end:end + 1] |
|
149 # Trivial empty object |
|
150 if nextchar == '}': |
|
151 return pairs, end + 1 |
|
152 elif nextchar != '"': |
|
153 raise ValueError(errmsg("Expecting property name", s, end)) |
|
154 end += 1 |
|
155 while True: |
|
156 key, end = scanstring(s, end, encoding, strict) |
|
157 |
|
158 # To skip some function call overhead we optimize the fast paths where |
|
159 # the JSON key separator is ": " or just ":". |
|
160 if s[end:end + 1] != ':': |
|
161 end = _w(s, end).end() |
|
162 if s[end:end + 1] != ':': |
|
163 raise ValueError(errmsg("Expecting : delimiter", s, end)) |
|
164 |
|
165 end += 1 |
|
166 |
|
167 try: |
|
168 if s[end] in _ws: |
|
169 end += 1 |
|
170 if s[end] in _ws: |
|
171 end = _w(s, end + 1).end() |
|
172 except IndexError: |
|
173 pass |
|
174 |
|
175 try: |
|
176 value, end = scan_once(s, end) |
|
177 except StopIteration: |
|
178 raise ValueError(errmsg("Expecting object", s, end)) |
|
179 pairs[key] = value |
|
180 |
|
181 try: |
|
182 nextchar = s[end] |
|
183 if nextchar in _ws: |
|
184 end = _w(s, end + 1).end() |
|
185 nextchar = s[end] |
|
186 except IndexError: |
|
187 nextchar = '' |
|
188 end += 1 |
|
189 |
|
190 if nextchar == '}': |
|
191 break |
|
192 elif nextchar != ',': |
|
193 raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) |
|
194 |
|
195 try: |
|
196 nextchar = s[end] |
|
197 if nextchar in _ws: |
|
198 end += 1 |
|
199 nextchar = s[end] |
|
200 if nextchar in _ws: |
|
201 end = _w(s, end + 1).end() |
|
202 nextchar = s[end] |
|
203 except IndexError: |
|
204 nextchar = '' |
|
205 |
|
206 end += 1 |
|
207 if nextchar != '"': |
|
208 raise ValueError(errmsg("Expecting property name", s, end - 1)) |
|
209 |
|
210 if object_hook is not None: |
|
211 pairs = object_hook(pairs) |
|
212 return pairs, end |
|
213 |
|
214 def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): |
|
215 values = [] |
|
216 nextchar = s[end:end + 1] |
|
217 if nextchar in _ws: |
|
218 end = _w(s, end + 1).end() |
|
219 nextchar = s[end:end + 1] |
|
220 # Look-ahead for trivial empty array |
|
221 if nextchar == ']': |
|
222 return values, end + 1 |
|
223 _append = values.append |
|
224 while True: |
|
225 try: |
|
226 value, end = scan_once(s, end) |
|
227 except StopIteration: |
|
228 raise ValueError(errmsg("Expecting object", s, end)) |
|
229 _append(value) |
|
230 nextchar = s[end:end + 1] |
|
231 if nextchar in _ws: |
|
232 end = _w(s, end + 1).end() |
|
233 nextchar = s[end:end + 1] |
|
234 end += 1 |
|
235 if nextchar == ']': |
|
236 break |
|
237 elif nextchar != ',': |
|
238 raise ValueError(errmsg("Expecting , delimiter", s, end)) |
|
239 |
|
240 try: |
|
241 if s[end] in _ws: |
|
242 end += 1 |
|
243 if s[end] in _ws: |
|
244 end = _w(s, end + 1).end() |
|
245 except IndexError: |
|
246 pass |
|
247 |
|
248 return values, end |
|
249 |
|
250 class JSONDecoder(object): |
|
251 """Simple JSON <http://json.org> decoder |
|
252 |
|
253 Performs the following translations in decoding by default: |
|
254 |
|
255 +---------------+-------------------+ |
|
256 | JSON | Python | |
|
257 +===============+===================+ |
|
258 | object | dict | |
|
259 +---------------+-------------------+ |
|
260 | array | list | |
|
261 +---------------+-------------------+ |
|
262 | string | unicode | |
|
263 +---------------+-------------------+ |
|
264 | number (int) | int, long | |
|
265 +---------------+-------------------+ |
|
266 | number (real) | float | |
|
267 +---------------+-------------------+ |
|
268 | true | True | |
|
269 +---------------+-------------------+ |
|
270 | false | False | |
|
271 +---------------+-------------------+ |
|
272 | null | None | |
|
273 +---------------+-------------------+ |
|
274 |
|
275 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as |
|
276 their corresponding ``float`` values, which is outside the JSON spec. |
|
277 |
|
278 """ |
|
279 |
|
280 def __init__(self, encoding=None, object_hook=None, parse_float=None, |
|
281 parse_int=None, parse_constant=None, strict=True): |
|
282 """``encoding`` determines the encoding used to interpret any ``str`` |
|
283 objects decoded by this instance (utf-8 by default). It has no |
|
284 effect when decoding ``unicode`` objects. |
|
285 |
|
286 Note that currently only encodings that are a superset of ASCII work, |
|
287 strings of other encodings should be passed in as ``unicode``. |
|
288 |
|
289 ``object_hook``, if specified, will be called with the result |
|
290 of every JSON object decoded and its return value will be used in |
|
291 place of the given ``dict``. This can be used to provide custom |
|
292 deserializations (e.g. to support JSON-RPC class hinting). |
|
293 |
|
294 ``parse_float``, if specified, will be called with the string |
|
295 of every JSON float to be decoded. By default this is equivalent to |
|
296 float(num_str). This can be used to use another datatype or parser |
|
297 for JSON floats (e.g. decimal.Decimal). |
|
298 |
|
299 ``parse_int``, if specified, will be called with the string |
|
300 of every JSON int to be decoded. By default this is equivalent to |
|
301 int(num_str). This can be used to use another datatype or parser |
|
302 for JSON integers (e.g. float). |
|
303 |
|
304 ``parse_constant``, if specified, will be called with one of the |
|
305 following strings: -Infinity, Infinity, NaN. |
|
306 This can be used to raise an exception if invalid JSON numbers |
|
307 are encountered. |
|
308 |
|
309 """ |
|
310 self.encoding = encoding |
|
311 self.object_hook = object_hook |
|
312 self.parse_float = parse_float or float |
|
313 self.parse_int = parse_int or int |
|
314 self.parse_constant = parse_constant or _CONSTANTS.__getitem__ |
|
315 self.strict = strict |
|
316 self.parse_object = JSONObject |
|
317 self.parse_array = JSONArray |
|
318 self.parse_string = scanstring |
|
319 self.scan_once = make_scanner(self) |
|
320 |
|
321 def decode(self, s, _w=WHITESPACE.match): |
|
322 """Return the Python representation of ``s`` (a ``str`` or ``unicode`` |
|
323 instance containing a JSON document) |
|
324 |
|
325 """ |
|
326 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) |
|
327 end = _w(s, end).end() |
|
328 if end != len(s): |
|
329 raise ValueError(errmsg("Extra data", s, end, len(s))) |
|
330 return obj |
|
331 |
|
332 def raw_decode(self, s, idx=0): |
|
333 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning |
|
334 with a JSON document) and return a 2-tuple of the Python |
|
335 representation and the index in ``s`` where the document ended. |
|
336 |
|
337 This can be used to decode a JSON document from a string that may |
|
338 have extraneous data at the end. |
|
339 |
|
340 """ |
|
341 try: |
|
342 obj, end = self.scan_once(s, idx) |
|
343 except StopIteration: |
|
344 raise ValueError("No JSON object could be decoded") |
|
345 return obj, end |