OLD | NEW |
| (Empty) |
1 """Implementation of JSONEncoder | |
2 """ | |
3 import re | |
4 from decimal import Decimal | |
5 | |
6 def _import_speedups(): | |
7 try: | |
8 from simplejson import _speedups | |
9 return _speedups.encode_basestring_ascii, _speedups.make_encoder | |
10 except ImportError: | |
11 return None, None | |
12 c_encode_basestring_ascii, c_make_encoder = _import_speedups() | |
13 | |
14 from simplejson.decoder import PosInf | |
15 | |
16 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') | |
17 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') | |
18 HAS_UTF8 = re.compile(r'[\x80-\xff]') | |
19 ESCAPE_DCT = { | |
20 '\\': '\\\\', | |
21 '"': '\\"', | |
22 '\b': '\\b', | |
23 '\f': '\\f', | |
24 '\n': '\\n', | |
25 '\r': '\\r', | |
26 '\t': '\\t', | |
27 } | |
28 for i in range(0x20): | |
29 #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) | |
30 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) | |
31 | |
32 FLOAT_REPR = repr | |
33 | |
34 def encode_basestring(s): | |
35 """Return a JSON representation of a Python string | |
36 | |
37 """ | |
38 if isinstance(s, str) and HAS_UTF8.search(s) is not None: | |
39 s = s.decode('utf-8') | |
40 def replace(match): | |
41 return ESCAPE_DCT[match.group(0)] | |
42 return u'"' + ESCAPE.sub(replace, s) + u'"' | |
43 | |
44 | |
45 def py_encode_basestring_ascii(s): | |
46 """Return an ASCII-only JSON representation of a Python string | |
47 | |
48 """ | |
49 if isinstance(s, str) and HAS_UTF8.search(s) is not None: | |
50 s = s.decode('utf-8') | |
51 def replace(match): | |
52 s = match.group(0) | |
53 try: | |
54 return ESCAPE_DCT[s] | |
55 except KeyError: | |
56 n = ord(s) | |
57 if n < 0x10000: | |
58 #return '\\u{0:04x}'.format(n) | |
59 return '\\u%04x' % (n,) | |
60 else: | |
61 # surrogate pair | |
62 n -= 0x10000 | |
63 s1 = 0xd800 | ((n >> 10) & 0x3ff) | |
64 s2 = 0xdc00 | (n & 0x3ff) | |
65 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) | |
66 return '\\u%04x\\u%04x' % (s1, s2) | |
67 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' | |
68 | |
69 | |
70 encode_basestring_ascii = ( | |
71 c_encode_basestring_ascii or py_encode_basestring_ascii) | |
72 | |
73 class JSONEncoder(object): | |
74 """Extensible JSON <http://json.org> encoder for Python data structures. | |
75 | |
76 Supports the following objects and types by default: | |
77 | |
78 +-------------------+---------------+ | |
79 | Python | JSON | | |
80 +===================+===============+ | |
81 | dict | object | | |
82 +-------------------+---------------+ | |
83 | list, tuple | array | | |
84 +-------------------+---------------+ | |
85 | str, unicode | string | | |
86 +-------------------+---------------+ | |
87 | int, long, float | number | | |
88 +-------------------+---------------+ | |
89 | True | true | | |
90 +-------------------+---------------+ | |
91 | False | false | | |
92 +-------------------+---------------+ | |
93 | None | null | | |
94 +-------------------+---------------+ | |
95 | |
96 To extend this to recognize other objects, subclass and implement a | |
97 ``.default()`` method with another method that returns a serializable | |
98 object for ``o`` if possible, otherwise it should call the superclass | |
99 implementation (to raise ``TypeError``). | |
100 | |
101 """ | |
102 item_separator = ', ' | |
103 key_separator = ': ' | |
104 def __init__(self, skipkeys=False, ensure_ascii=True, | |
105 check_circular=True, allow_nan=True, sort_keys=False, | |
106 indent=None, separators=None, encoding='utf-8', default=None, | |
107 use_decimal=False): | |
108 """Constructor for JSONEncoder, with sensible defaults. | |
109 | |
110 If skipkeys is false, then it is a TypeError to attempt | |
111 encoding of keys that are not str, int, long, float or None. If | |
112 skipkeys is True, such items are simply skipped. | |
113 | |
114 If ensure_ascii is true, the output is guaranteed to be str | |
115 objects with all incoming unicode characters escaped. If | |
116 ensure_ascii is false, the output will be unicode object. | |
117 | |
118 If check_circular is true, then lists, dicts, and custom encoded | |
119 objects will be checked for circular references during encoding to | |
120 prevent an infinite recursion (which would cause an OverflowError). | |
121 Otherwise, no such check takes place. | |
122 | |
123 If allow_nan is true, then NaN, Infinity, and -Infinity will be | |
124 encoded as such. This behavior is not JSON specification compliant, | |
125 but is consistent with most JavaScript based encoders and decoders. | |
126 Otherwise, it will be a ValueError to encode such floats. | |
127 | |
128 If sort_keys is true, then the output of dictionaries will be | |
129 sorted by key; this is useful for regression tests to ensure | |
130 that JSON serializations can be compared on a day-to-day basis. | |
131 | |
132 If indent is a string, then JSON array elements and object members | |
133 will be pretty-printed with a newline followed by that string repeated | |
134 for each level of nesting. ``None`` (the default) selects the most compa
ct | |
135 representation without any newlines. For backwards compatibility with | |
136 versions of simplejson earlier than 2.1.0, an integer is also accepted | |
137 and is converted to a string with that many spaces. | |
138 | |
139 If specified, separators should be a (item_separator, key_separator) | |
140 tuple. The default is (', ', ': '). To get the most compact JSON | |
141 representation you should specify (',', ':') to eliminate whitespace. | |
142 | |
143 If specified, default is a function that gets called for objects | |
144 that can't otherwise be serialized. It should return a JSON encodable | |
145 version of the object or raise a ``TypeError``. | |
146 | |
147 If encoding is not None, then all input strings will be | |
148 transformed into unicode using that encoding prior to JSON-encoding. | |
149 The default is UTF-8. | |
150 | |
151 If use_decimal is true (not the default), ``decimal.Decimal`` will | |
152 be supported directly by the encoder. For the inverse, decode JSON | |
153 with ``parse_float=decimal.Decimal``. | |
154 | |
155 """ | |
156 | |
157 self.skipkeys = skipkeys | |
158 self.ensure_ascii = ensure_ascii | |
159 self.check_circular = check_circular | |
160 self.allow_nan = allow_nan | |
161 self.sort_keys = sort_keys | |
162 self.use_decimal = use_decimal | |
163 if isinstance(indent, (int, long)): | |
164 indent = ' ' * indent | |
165 self.indent = indent | |
166 if separators is not None: | |
167 self.item_separator, self.key_separator = separators | |
168 if default is not None: | |
169 self.default = default | |
170 self.encoding = encoding | |
171 | |
172 def default(self, o): | |
173 """Implement this method in a subclass such that it returns | |
174 a serializable object for ``o``, or calls the base implementation | |
175 (to raise a ``TypeError``). | |
176 | |
177 For example, to support arbitrary iterators, you could | |
178 implement default like this:: | |
179 | |
180 def default(self, o): | |
181 try: | |
182 iterable = iter(o) | |
183 except TypeError: | |
184 pass | |
185 else: | |
186 return list(iterable) | |
187 return JSONEncoder.default(self, o) | |
188 | |
189 """ | |
190 raise TypeError(repr(o) + " is not JSON serializable") | |
191 | |
192 def encode(self, o): | |
193 """Return a JSON string representation of a Python data structure. | |
194 | |
195 >>> from simplejson import JSONEncoder | |
196 >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) | |
197 '{"foo": ["bar", "baz"]}' | |
198 | |
199 """ | |
200 # This is for extremely simple cases and benchmarks. | |
201 if isinstance(o, basestring): | |
202 if isinstance(o, str): | |
203 _encoding = self.encoding | |
204 if (_encoding is not None | |
205 and not (_encoding == 'utf-8')): | |
206 o = o.decode(_encoding) | |
207 if self.ensure_ascii: | |
208 return encode_basestring_ascii(o) | |
209 else: | |
210 return encode_basestring(o) | |
211 # This doesn't pass the iterator directly to ''.join() because the | |
212 # exceptions aren't as detailed. The list call should be roughly | |
213 # equivalent to the PySequence_Fast that ''.join() would do. | |
214 chunks = self.iterencode(o, _one_shot=True) | |
215 if not isinstance(chunks, (list, tuple)): | |
216 chunks = list(chunks) | |
217 if self.ensure_ascii: | |
218 return ''.join(chunks) | |
219 else: | |
220 return u''.join(chunks) | |
221 | |
222 def iterencode(self, o, _one_shot=False): | |
223 """Encode the given object and yield each string | |
224 representation as available. | |
225 | |
226 For example:: | |
227 | |
228 for chunk in JSONEncoder().iterencode(bigobject): | |
229 mysocket.write(chunk) | |
230 | |
231 """ | |
232 if self.check_circular: | |
233 markers = {} | |
234 else: | |
235 markers = None | |
236 if self.ensure_ascii: | |
237 _encoder = encode_basestring_ascii | |
238 else: | |
239 _encoder = encode_basestring | |
240 if self.encoding != 'utf-8': | |
241 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): | |
242 if isinstance(o, str): | |
243 o = o.decode(_encoding) | |
244 return _orig_encoder(o) | |
245 | |
246 def floatstr(o, allow_nan=self.allow_nan, | |
247 _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): | |
248 # Check for specials. Note that this type of test is processor | |
249 # and/or platform-specific, so do tests which don't depend on | |
250 # the internals. | |
251 | |
252 if o != o: | |
253 text = 'NaN' | |
254 elif o == _inf: | |
255 text = 'Infinity' | |
256 elif o == _neginf: | |
257 text = '-Infinity' | |
258 else: | |
259 return _repr(o) | |
260 | |
261 if not allow_nan: | |
262 raise ValueError( | |
263 "Out of range float values are not JSON compliant: " + | |
264 repr(o)) | |
265 | |
266 return text | |
267 | |
268 | |
269 key_memo = {} | |
270 if (_one_shot and c_make_encoder is not None | |
271 and self.indent is None and not self.sort_keys): | |
272 _iterencode = c_make_encoder( | |
273 markers, self.default, _encoder, self.indent, | |
274 self.key_separator, self.item_separator, self.sort_keys, | |
275 self.skipkeys, self.allow_nan, key_memo, self.use_decimal) | |
276 else: | |
277 _iterencode = _make_iterencode( | |
278 markers, self.default, _encoder, self.indent, floatstr, | |
279 self.key_separator, self.item_separator, self.sort_keys, | |
280 self.skipkeys, _one_shot, self.use_decimal) | |
281 try: | |
282 return _iterencode(o, 0) | |
283 finally: | |
284 key_memo.clear() | |
285 | |
286 | |
287 class JSONEncoderForHTML(JSONEncoder): | |
288 """An encoder that produces JSON safe to embed in HTML. | |
289 | |
290 To embed JSON content in, say, a script tag on a web page, the | |
291 characters &, < and > should be escaped. They cannot be escaped | |
292 with the usual entities (e.g. &) because they are not expanded | |
293 within <script> tags. | |
294 """ | |
295 | |
296 def encode(self, o): | |
297 # Override JSONEncoder.encode because it has hacks for | |
298 # performance that make things more complicated. | |
299 chunks = self.iterencode(o, True) | |
300 if self.ensure_ascii: | |
301 return ''.join(chunks) | |
302 else: | |
303 return u''.join(chunks) | |
304 | |
305 def iterencode(self, o, _one_shot=False): | |
306 chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot) | |
307 for chunk in chunks: | |
308 chunk = chunk.replace('&', '\\u0026') | |
309 chunk = chunk.replace('<', '\\u003c') | |
310 chunk = chunk.replace('>', '\\u003e') | |
311 yield chunk | |
312 | |
313 | |
314 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, | |
315 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, | |
316 _use_decimal, | |
317 ## HACK: hand-optimized bytecode; turn globals into locals | |
318 False=False, | |
319 True=True, | |
320 ValueError=ValueError, | |
321 basestring=basestring, | |
322 Decimal=Decimal, | |
323 dict=dict, | |
324 float=float, | |
325 id=id, | |
326 int=int, | |
327 isinstance=isinstance, | |
328 list=list, | |
329 long=long, | |
330 str=str, | |
331 tuple=tuple, | |
332 ): | |
333 | |
334 def _iterencode_list(lst, _current_indent_level): | |
335 if not lst: | |
336 yield '[]' | |
337 return | |
338 if markers is not None: | |
339 markerid = id(lst) | |
340 if markerid in markers: | |
341 raise ValueError("Circular reference detected") | |
342 markers[markerid] = lst | |
343 buf = '[' | |
344 if _indent is not None: | |
345 _current_indent_level += 1 | |
346 newline_indent = '\n' + (_indent * _current_indent_level) | |
347 separator = _item_separator + newline_indent | |
348 buf += newline_indent | |
349 else: | |
350 newline_indent = None | |
351 separator = _item_separator | |
352 first = True | |
353 for value in lst: | |
354 if first: | |
355 first = False | |
356 else: | |
357 buf = separator | |
358 if isinstance(value, basestring): | |
359 yield buf + _encoder(value) | |
360 elif value is None: | |
361 yield buf + 'null' | |
362 elif value is True: | |
363 yield buf + 'true' | |
364 elif value is False: | |
365 yield buf + 'false' | |
366 elif isinstance(value, (int, long)): | |
367 yield buf + str(value) | |
368 elif isinstance(value, float): | |
369 yield buf + _floatstr(value) | |
370 elif _use_decimal and isinstance(value, Decimal): | |
371 yield buf + str(value) | |
372 else: | |
373 yield buf | |
374 if isinstance(value, (list, tuple)): | |
375 chunks = _iterencode_list(value, _current_indent_level) | |
376 elif isinstance(value, dict): | |
377 chunks = _iterencode_dict(value, _current_indent_level) | |
378 else: | |
379 chunks = _iterencode(value, _current_indent_level) | |
380 for chunk in chunks: | |
381 yield chunk | |
382 if newline_indent is not None: | |
383 _current_indent_level -= 1 | |
384 yield '\n' + (_indent * _current_indent_level) | |
385 yield ']' | |
386 if markers is not None: | |
387 del markers[markerid] | |
388 | |
389 def _iterencode_dict(dct, _current_indent_level): | |
390 if not dct: | |
391 yield '{}' | |
392 return | |
393 if markers is not None: | |
394 markerid = id(dct) | |
395 if markerid in markers: | |
396 raise ValueError("Circular reference detected") | |
397 markers[markerid] = dct | |
398 yield '{' | |
399 if _indent is not None: | |
400 _current_indent_level += 1 | |
401 newline_indent = '\n' + (_indent * _current_indent_level) | |
402 item_separator = _item_separator + newline_indent | |
403 yield newline_indent | |
404 else: | |
405 newline_indent = None | |
406 item_separator = _item_separator | |
407 first = True | |
408 if _sort_keys: | |
409 items = dct.items() | |
410 items.sort(key=lambda kv: kv[0]) | |
411 else: | |
412 items = dct.iteritems() | |
413 for key, value in items: | |
414 if isinstance(key, basestring): | |
415 pass | |
416 # JavaScript is weakly typed for these, so it makes sense to | |
417 # also allow them. Many encoders seem to do something like this. | |
418 elif isinstance(key, float): | |
419 key = _floatstr(key) | |
420 elif key is True: | |
421 key = 'true' | |
422 elif key is False: | |
423 key = 'false' | |
424 elif key is None: | |
425 key = 'null' | |
426 elif isinstance(key, (int, long)): | |
427 key = str(key) | |
428 elif _skipkeys: | |
429 continue | |
430 else: | |
431 raise TypeError("key " + repr(key) + " is not a string") | |
432 if first: | |
433 first = False | |
434 else: | |
435 yield item_separator | |
436 yield _encoder(key) | |
437 yield _key_separator | |
438 if isinstance(value, basestring): | |
439 yield _encoder(value) | |
440 elif value is None: | |
441 yield 'null' | |
442 elif value is True: | |
443 yield 'true' | |
444 elif value is False: | |
445 yield 'false' | |
446 elif isinstance(value, (int, long)): | |
447 yield str(value) | |
448 elif isinstance(value, float): | |
449 yield _floatstr(value) | |
450 elif _use_decimal and isinstance(value, Decimal): | |
451 yield str(value) | |
452 else: | |
453 if isinstance(value, (list, tuple)): | |
454 chunks = _iterencode_list(value, _current_indent_level) | |
455 elif isinstance(value, dict): | |
456 chunks = _iterencode_dict(value, _current_indent_level) | |
457 else: | |
458 chunks = _iterencode(value, _current_indent_level) | |
459 for chunk in chunks: | |
460 yield chunk | |
461 if newline_indent is not None: | |
462 _current_indent_level -= 1 | |
463 yield '\n' + (_indent * _current_indent_level) | |
464 yield '}' | |
465 if markers is not None: | |
466 del markers[markerid] | |
467 | |
468 def _iterencode(o, _current_indent_level): | |
469 if isinstance(o, basestring): | |
470 yield _encoder(o) | |
471 elif o is None: | |
472 yield 'null' | |
473 elif o is True: | |
474 yield 'true' | |
475 elif o is False: | |
476 yield 'false' | |
477 elif isinstance(o, (int, long)): | |
478 yield str(o) | |
479 elif isinstance(o, float): | |
480 yield _floatstr(o) | |
481 elif isinstance(o, (list, tuple)): | |
482 for chunk in _iterencode_list(o, _current_indent_level): | |
483 yield chunk | |
484 elif isinstance(o, dict): | |
485 for chunk in _iterencode_dict(o, _current_indent_level): | |
486 yield chunk | |
487 elif _use_decimal and isinstance(o, Decimal): | |
488 yield str(o) | |
489 else: | |
490 if markers is not None: | |
491 markerid = id(o) | |
492 if markerid in markers: | |
493 raise ValueError("Circular reference detected") | |
494 markers[markerid] = o | |
495 o = _default(o) | |
496 for chunk in _iterencode(o, _current_indent_level): | |
497 yield chunk | |
498 if markers is not None: | |
499 del markers[markerid] | |
500 | |
501 return _iterencode | |
OLD | NEW |