OLD | NEW |
| (Empty) |
1 """Implementation of JSONDecoder | |
2 """ | |
3 import re | |
4 import sys | |
5 import struct | |
6 | |
7 from simplejson.scanner import make_scanner | |
8 def _import_c_scanstring(): | |
9 try: | |
10 from simplejson._speedups import scanstring | |
11 return scanstring | |
12 except ImportError: | |
13 return None | |
14 c_scanstring = _import_c_scanstring() | |
15 | |
16 __all__ = ['JSONDecoder'] | |
17 | |
18 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL | |
19 | |
20 def _floatconstants(): | |
21 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') | |
22 # The struct module in Python 2.4 would get frexp() out of range here | |
23 # when an endian is specified in the format string. Fixed in Python 2.5+ | |
24 if sys.byteorder != 'big': | |
25 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] | |
26 nan, inf = struct.unpack('dd', _BYTES) | |
27 return nan, inf, -inf | |
28 | |
29 NaN, PosInf, NegInf = _floatconstants() | |
30 | |
31 | |
32 class JSONDecodeError(ValueError): | |
33 """Subclass of ValueError with the following additional properties: | |
34 | |
35 msg: The unformatted error message | |
36 doc: The JSON document being parsed | |
37 pos: The start index of doc where parsing failed | |
38 end: The end index of doc where parsing failed (may be None) | |
39 lineno: The line corresponding to pos | |
40 colno: The column corresponding to pos | |
41 endlineno: The line corresponding to end (may be None) | |
42 endcolno: The column corresponding to end (may be None) | |
43 | |
44 """ | |
45 def __init__(self, msg, doc, pos, end=None): | |
46 ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) | |
47 self.msg = msg | |
48 self.doc = doc | |
49 self.pos = pos | |
50 self.end = end | |
51 self.lineno, self.colno = linecol(doc, pos) | |
52 if end is not None: | |
53 self.endlineno, self.endcolno = linecol(doc, end) | |
54 else: | |
55 self.endlineno, self.endcolno = None, None | |
56 | |
57 | |
58 def linecol(doc, pos): | |
59 lineno = doc.count('\n', 0, pos) + 1 | |
60 if lineno == 1: | |
61 colno = pos | |
62 else: | |
63 colno = pos - doc.rindex('\n', 0, pos) | |
64 return lineno, colno | |
65 | |
66 | |
67 def errmsg(msg, doc, pos, end=None): | |
68 # Note that this function is called from _speedups | |
69 lineno, colno = linecol(doc, pos) | |
70 if end is None: | |
71 #fmt = '{0}: line {1} column {2} (char {3})' | |
72 #return fmt.format(msg, lineno, colno, pos) | |
73 fmt = '%s: line %d column %d (char %d)' | |
74 return fmt % (msg, lineno, colno, pos) | |
75 endlineno, endcolno = linecol(doc, end) | |
76 #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' | |
77 #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) | |
78 fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' | |
79 return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) | |
80 | |
81 | |
82 _CONSTANTS = { | |
83 '-Infinity': NegInf, | |
84 'Infinity': PosInf, | |
85 'NaN': NaN, | |
86 } | |
87 | |
88 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) | |
89 BACKSLASH = { | |
90 '"': u'"', '\\': u'\\', '/': u'/', | |
91 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', | |
92 } | |
93 | |
94 DEFAULT_ENCODING = "utf-8" | |
95 | |
96 def py_scanstring(s, end, encoding=None, strict=True, | |
97 _b=BACKSLASH, _m=STRINGCHUNK.match): | |
98 """Scan the string s for a JSON string. End is the index of the | |
99 character in s after the quote that started the JSON string. | |
100 Unescapes all valid JSON string escape sequences and raises ValueError | |
101 on attempt to decode an invalid string. If strict is False then literal | |
102 control characters are allowed in the string. | |
103 | |
104 Returns a tuple of the decoded string and the index of the character in s | |
105 after the end quote.""" | |
106 if encoding is None: | |
107 encoding = DEFAULT_ENCODING | |
108 chunks = [] | |
109 _append = chunks.append | |
110 begin = end - 1 | |
111 while 1: | |
112 chunk = _m(s, end) | |
113 if chunk is None: | |
114 raise JSONDecodeError( | |
115 "Unterminated string starting at", s, begin) | |
116 end = chunk.end() | |
117 content, terminator = chunk.groups() | |
118 # Content is contains zero or more unescaped string characters | |
119 if content: | |
120 if not isinstance(content, unicode): | |
121 content = unicode(content, encoding) | |
122 _append(content) | |
123 # Terminator is the end of string, a literal control character, | |
124 # or a backslash denoting that an escape sequence follows | |
125 if terminator == '"': | |
126 break | |
127 elif terminator != '\\': | |
128 if strict: | |
129 msg = "Invalid control character %r at" % (terminator,) | |
130 #msg = "Invalid control character {0!r} at".format(terminator) | |
131 raise JSONDecodeError(msg, s, end) | |
132 else: | |
133 _append(terminator) | |
134 continue | |
135 try: | |
136 esc = s[end] | |
137 except IndexError: | |
138 raise JSONDecodeError( | |
139 "Unterminated string starting at", s, begin) | |
140 # If not a unicode escape sequence, must be in the lookup table | |
141 if esc != 'u': | |
142 try: | |
143 char = _b[esc] | |
144 except KeyError: | |
145 msg = "Invalid \\escape: " + repr(esc) | |
146 raise JSONDecodeError(msg, s, end) | |
147 end += 1 | |
148 else: | |
149 # Unicode escape sequence | |
150 esc = s[end + 1:end + 5] | |
151 next_end = end + 5 | |
152 if len(esc) != 4: | |
153 msg = "Invalid \\uXXXX escape" | |
154 raise JSONDecodeError(msg, s, end) | |
155 uni = int(esc, 16) | |
156 # Check for surrogate pair on UCS-4 systems | |
157 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: | |
158 msg = "Invalid \\uXXXX\\uXXXX surrogate pair" | |
159 if not s[end + 5:end + 7] == '\\u': | |
160 raise JSONDecodeError(msg, s, end) | |
161 esc2 = s[end + 7:end + 11] | |
162 if len(esc2) != 4: | |
163 raise JSONDecodeError(msg, s, end) | |
164 uni2 = int(esc2, 16) | |
165 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) | |
166 next_end += 6 | |
167 char = unichr(uni) | |
168 end = next_end | |
169 # Append the unescaped character | |
170 _append(char) | |
171 return u''.join(chunks), end | |
172 | |
173 | |
174 # Use speedup if available | |
175 scanstring = c_scanstring or py_scanstring | |
176 | |
177 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) | |
178 WHITESPACE_STR = ' \t\n\r' | |
179 | |
180 def JSONObject((s, end), encoding, strict, scan_once, object_hook, | |
181 object_pairs_hook, memo=None, | |
182 _w=WHITESPACE.match, _ws=WHITESPACE_STR): | |
183 # Backwards compatibility | |
184 if memo is None: | |
185 memo = {} | |
186 memo_get = memo.setdefault | |
187 pairs = [] | |
188 # Use a slice to prevent IndexError from being raised, the following | |
189 # check will raise a more specific ValueError if the string is empty | |
190 nextchar = s[end:end + 1] | |
191 # Normally we expect nextchar == '"' | |
192 if nextchar != '"': | |
193 if nextchar in _ws: | |
194 end = _w(s, end).end() | |
195 nextchar = s[end:end + 1] | |
196 # Trivial empty object | |
197 if nextchar == '}': | |
198 if object_pairs_hook is not None: | |
199 result = object_pairs_hook(pairs) | |
200 return result, end + 1 | |
201 pairs = {} | |
202 if object_hook is not None: | |
203 pairs = object_hook(pairs) | |
204 return pairs, end + 1 | |
205 elif nextchar != '"': | |
206 raise JSONDecodeError("Expecting property name", s, end) | |
207 end += 1 | |
208 while True: | |
209 key, end = scanstring(s, end, encoding, strict) | |
210 key = memo_get(key, key) | |
211 | |
212 # To skip some function call overhead we optimize the fast paths where | |
213 # the JSON key separator is ": " or just ":". | |
214 if s[end:end + 1] != ':': | |
215 end = _w(s, end).end() | |
216 if s[end:end + 1] != ':': | |
217 raise JSONDecodeError("Expecting : delimiter", s, end) | |
218 | |
219 end += 1 | |
220 | |
221 try: | |
222 if s[end] in _ws: | |
223 end += 1 | |
224 if s[end] in _ws: | |
225 end = _w(s, end + 1).end() | |
226 except IndexError: | |
227 pass | |
228 | |
229 try: | |
230 value, end = scan_once(s, end) | |
231 except StopIteration: | |
232 raise JSONDecodeError("Expecting object", s, end) | |
233 pairs.append((key, value)) | |
234 | |
235 try: | |
236 nextchar = s[end] | |
237 if nextchar in _ws: | |
238 end = _w(s, end + 1).end() | |
239 nextchar = s[end] | |
240 except IndexError: | |
241 nextchar = '' | |
242 end += 1 | |
243 | |
244 if nextchar == '}': | |
245 break | |
246 elif nextchar != ',': | |
247 raise JSONDecodeError("Expecting , delimiter", s, end - 1) | |
248 | |
249 try: | |
250 nextchar = s[end] | |
251 if nextchar in _ws: | |
252 end += 1 | |
253 nextchar = s[end] | |
254 if nextchar in _ws: | |
255 end = _w(s, end + 1).end() | |
256 nextchar = s[end] | |
257 except IndexError: | |
258 nextchar = '' | |
259 | |
260 end += 1 | |
261 if nextchar != '"': | |
262 raise JSONDecodeError("Expecting property name", s, end - 1) | |
263 | |
264 if object_pairs_hook is not None: | |
265 result = object_pairs_hook(pairs) | |
266 return result, end | |
267 pairs = dict(pairs) | |
268 if object_hook is not None: | |
269 pairs = object_hook(pairs) | |
270 return pairs, end | |
271 | |
272 def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): | |
273 values = [] | |
274 nextchar = s[end:end + 1] | |
275 if nextchar in _ws: | |
276 end = _w(s, end + 1).end() | |
277 nextchar = s[end:end + 1] | |
278 # Look-ahead for trivial empty array | |
279 if nextchar == ']': | |
280 return values, end + 1 | |
281 _append = values.append | |
282 while True: | |
283 try: | |
284 value, end = scan_once(s, end) | |
285 except StopIteration: | |
286 raise JSONDecodeError("Expecting object", s, end) | |
287 _append(value) | |
288 nextchar = s[end:end + 1] | |
289 if nextchar in _ws: | |
290 end = _w(s, end + 1).end() | |
291 nextchar = s[end:end + 1] | |
292 end += 1 | |
293 if nextchar == ']': | |
294 break | |
295 elif nextchar != ',': | |
296 raise JSONDecodeError("Expecting , delimiter", s, end) | |
297 | |
298 try: | |
299 if s[end] in _ws: | |
300 end += 1 | |
301 if s[end] in _ws: | |
302 end = _w(s, end + 1).end() | |
303 except IndexError: | |
304 pass | |
305 | |
306 return values, end | |
307 | |
308 class JSONDecoder(object): | |
309 """Simple JSON <http://json.org> decoder | |
310 | |
311 Performs the following translations in decoding by default: | |
312 | |
313 +---------------+-------------------+ | |
314 | JSON | Python | | |
315 +===============+===================+ | |
316 | object | dict | | |
317 +---------------+-------------------+ | |
318 | array | list | | |
319 +---------------+-------------------+ | |
320 | string | unicode | | |
321 +---------------+-------------------+ | |
322 | number (int) | int, long | | |
323 +---------------+-------------------+ | |
324 | number (real) | float | | |
325 +---------------+-------------------+ | |
326 | true | True | | |
327 +---------------+-------------------+ | |
328 | false | False | | |
329 +---------------+-------------------+ | |
330 | null | None | | |
331 +---------------+-------------------+ | |
332 | |
333 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as | |
334 their corresponding ``float`` values, which is outside the JSON spec. | |
335 | |
336 """ | |
337 | |
338 def __init__(self, encoding=None, object_hook=None, parse_float=None, | |
339 parse_int=None, parse_constant=None, strict=True, | |
340 object_pairs_hook=None): | |
341 """ | |
342 *encoding* determines the encoding used to interpret any | |
343 :class:`str` objects decoded by this instance (``'utf-8'`` by | |
344 default). It has no effect when decoding :class:`unicode` objects. | |
345 | |
346 Note that currently only encodings that are a superset of ASCII work, | |
347 strings of other encodings should be passed in as :class:`unicode`. | |
348 | |
349 *object_hook*, if specified, will be called with the result of every | |
350 JSON object decoded and its return value will be used in place of the | |
351 given :class:`dict`. This can be used to provide custom | |
352 deserializations (e.g. to support JSON-RPC class hinting). | |
353 | |
354 *object_pairs_hook* is an optional function that will be called with | |
355 the result of any object literal decode with an ordered list of pairs. | |
356 The return value of *object_pairs_hook* will be used instead of the | |
357 :class:`dict`. This feature can be used to implement custom decoders | |
358 that rely on the order that the key and value pairs are decoded (for | |
359 example, :func:`collections.OrderedDict` will remember the order of | |
360 insertion). If *object_hook* is also defined, the *object_pairs_hook* | |
361 takes priority. | |
362 | |
363 *parse_float*, if specified, will be called with the string of every | |
364 JSON float to be decoded. By default, this is equivalent to | |
365 ``float(num_str)``. This can be used to use another datatype or parser | |
366 for JSON floats (e.g. :class:`decimal.Decimal`). | |
367 | |
368 *parse_int*, if specified, will be called with the string of every | |
369 JSON int to be decoded. By default, this is equivalent to | |
370 ``int(num_str)``. This can be used to use another datatype or parser | |
371 for JSON integers (e.g. :class:`float`). | |
372 | |
373 *parse_constant*, if specified, will be called with one of the | |
374 following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This | |
375 can be used to raise an exception if invalid JSON numbers are | |
376 encountered. | |
377 | |
378 *strict* controls the parser's behavior when it encounters an | |
379 invalid control character in a string. The default setting of | |
380 ``True`` means that unescaped control characters are parse errors, if | |
381 ``False`` then control characters will be allowed in strings. | |
382 | |
383 """ | |
384 self.encoding = encoding | |
385 self.object_hook = object_hook | |
386 self.object_pairs_hook = object_pairs_hook | |
387 self.parse_float = parse_float or float | |
388 self.parse_int = parse_int or int | |
389 self.parse_constant = parse_constant or _CONSTANTS.__getitem__ | |
390 self.strict = strict | |
391 self.parse_object = JSONObject | |
392 self.parse_array = JSONArray | |
393 self.parse_string = scanstring | |
394 self.memo = {} | |
395 self.scan_once = make_scanner(self) | |
396 | |
397 def decode(self, s, _w=WHITESPACE.match): | |
398 """Return the Python representation of ``s`` (a ``str`` or ``unicode`` | |
399 instance containing a JSON document) | |
400 | |
401 """ | |
402 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) | |
403 end = _w(s, end).end() | |
404 if end != len(s): | |
405 raise JSONDecodeError("Extra data", s, end, len(s)) | |
406 return obj | |
407 | |
408 def raw_decode(self, s, idx=0): | |
409 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` | |
410 beginning with a JSON document) and return a 2-tuple of the Python | |
411 representation and the index in ``s`` where the document ended. | |
412 | |
413 This can be used to decode a JSON document from a string that may | |
414 have extraneous data at the end. | |
415 | |
416 """ | |
417 try: | |
418 obj, end = self.scan_once(s, idx) | |
419 except StopIteration: | |
420 raise JSONDecodeError("No JSON object could be decoded", s, idx) | |
421 return obj, end | |
OLD | NEW |