Index: src/third_party/pylib/simplejson/decoder.py |
=================================================================== |
--- src/third_party/pylib/simplejson/decoder.py (revision 9275) |
+++ src/third_party/pylib/simplejson/decoder.py (working copy) |
@@ -1,421 +0,0 @@ |
-"""Implementation of JSONDecoder |
-""" |
-import re |
-import sys |
-import struct |
- |
-from simplejson.scanner import make_scanner |
-def _import_c_scanstring(): |
- try: |
- from simplejson._speedups import scanstring |
- return scanstring |
- except ImportError: |
- return None |
-c_scanstring = _import_c_scanstring() |
- |
-__all__ = ['JSONDecoder'] |
- |
-FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL |
- |
-def _floatconstants(): |
- _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') |
- # The struct module in Python 2.4 would get frexp() out of range here |
- # when an endian is specified in the format string. Fixed in Python 2.5+ |
- if sys.byteorder != 'big': |
- _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] |
- nan, inf = struct.unpack('dd', _BYTES) |
- return nan, inf, -inf |
- |
-NaN, PosInf, NegInf = _floatconstants() |
- |
- |
-class JSONDecodeError(ValueError): |
- """Subclass of ValueError with the following additional properties: |
- |
- msg: The unformatted error message |
- doc: The JSON document being parsed |
- pos: The start index of doc where parsing failed |
- end: The end index of doc where parsing failed (may be None) |
- lineno: The line corresponding to pos |
- colno: The column corresponding to pos |
- endlineno: The line corresponding to end (may be None) |
- endcolno: The column corresponding to end (may be None) |
- |
- """ |
- def __init__(self, msg, doc, pos, end=None): |
- ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) |
- self.msg = msg |
- self.doc = doc |
- self.pos = pos |
- self.end = end |
- self.lineno, self.colno = linecol(doc, pos) |
- if end is not None: |
- self.endlineno, self.endcolno = linecol(doc, end) |
- else: |
- self.endlineno, self.endcolno = None, None |
- |
- |
-def linecol(doc, pos): |
- lineno = doc.count('\n', 0, pos) + 1 |
- if lineno == 1: |
- colno = pos |
- else: |
- colno = pos - doc.rindex('\n', 0, pos) |
- return lineno, colno |
- |
- |
-def errmsg(msg, doc, pos, end=None): |
- # Note that this function is called from _speedups |
- lineno, colno = linecol(doc, pos) |
- if end is None: |
- #fmt = '{0}: line {1} column {2} (char {3})' |
- #return fmt.format(msg, lineno, colno, pos) |
- fmt = '%s: line %d column %d (char %d)' |
- return fmt % (msg, lineno, colno, pos) |
- endlineno, endcolno = linecol(doc, end) |
- #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' |
- #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) |
- fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' |
- return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) |
- |
- |
-_CONSTANTS = { |
- '-Infinity': NegInf, |
- 'Infinity': PosInf, |
- 'NaN': NaN, |
-} |
- |
-STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) |
-BACKSLASH = { |
- '"': u'"', '\\': u'\\', '/': u'/', |
- 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', |
-} |
- |
-DEFAULT_ENCODING = "utf-8" |
- |
-def py_scanstring(s, end, encoding=None, strict=True, |
- _b=BACKSLASH, _m=STRINGCHUNK.match): |
- """Scan the string s for a JSON string. End is the index of the |
- character in s after the quote that started the JSON string. |
- Unescapes all valid JSON string escape sequences and raises ValueError |
- on attempt to decode an invalid string. If strict is False then literal |
- control characters are allowed in the string. |
- |
- Returns a tuple of the decoded string and the index of the character in s |
- after the end quote.""" |
- if encoding is None: |
- encoding = DEFAULT_ENCODING |
- chunks = [] |
- _append = chunks.append |
- begin = end - 1 |
- while 1: |
- chunk = _m(s, end) |
- if chunk is None: |
- raise JSONDecodeError( |
- "Unterminated string starting at", s, begin) |
- end = chunk.end() |
- content, terminator = chunk.groups() |
- # Content is contains zero or more unescaped string characters |
- if content: |
- if not isinstance(content, unicode): |
- content = unicode(content, encoding) |
- _append(content) |
- # Terminator is the end of string, a literal control character, |
- # or a backslash denoting that an escape sequence follows |
- if terminator == '"': |
- break |
- elif terminator != '\\': |
- if strict: |
- msg = "Invalid control character %r at" % (terminator,) |
- #msg = "Invalid control character {0!r} at".format(terminator) |
- raise JSONDecodeError(msg, s, end) |
- else: |
- _append(terminator) |
- continue |
- try: |
- esc = s[end] |
- except IndexError: |
- raise JSONDecodeError( |
- "Unterminated string starting at", s, begin) |
- # If not a unicode escape sequence, must be in the lookup table |
- if esc != 'u': |
- try: |
- char = _b[esc] |
- except KeyError: |
- msg = "Invalid \\escape: " + repr(esc) |
- raise JSONDecodeError(msg, s, end) |
- end += 1 |
- else: |
- # Unicode escape sequence |
- esc = s[end + 1:end + 5] |
- next_end = end + 5 |
- if len(esc) != 4: |
- msg = "Invalid \\uXXXX escape" |
- raise JSONDecodeError(msg, s, end) |
- uni = int(esc, 16) |
- # Check for surrogate pair on UCS-4 systems |
- if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: |
- msg = "Invalid \\uXXXX\\uXXXX surrogate pair" |
- if not s[end + 5:end + 7] == '\\u': |
- raise JSONDecodeError(msg, s, end) |
- esc2 = s[end + 7:end + 11] |
- if len(esc2) != 4: |
- raise JSONDecodeError(msg, s, end) |
- uni2 = int(esc2, 16) |
- uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) |
- next_end += 6 |
- char = unichr(uni) |
- end = next_end |
- # Append the unescaped character |
- _append(char) |
- return u''.join(chunks), end |
- |
- |
-# Use speedup if available |
-scanstring = c_scanstring or py_scanstring |
- |
-WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) |
-WHITESPACE_STR = ' \t\n\r' |
- |
-def JSONObject((s, end), encoding, strict, scan_once, object_hook, |
- object_pairs_hook, memo=None, |
- _w=WHITESPACE.match, _ws=WHITESPACE_STR): |
- # Backwards compatibility |
- if memo is None: |
- memo = {} |
- memo_get = memo.setdefault |
- pairs = [] |
- # Use a slice to prevent IndexError from being raised, the following |
- # check will raise a more specific ValueError if the string is empty |
- nextchar = s[end:end + 1] |
- # Normally we expect nextchar == '"' |
- if nextchar != '"': |
- if nextchar in _ws: |
- end = _w(s, end).end() |
- nextchar = s[end:end + 1] |
- # Trivial empty object |
- if nextchar == '}': |
- if object_pairs_hook is not None: |
- result = object_pairs_hook(pairs) |
- return result, end + 1 |
- pairs = {} |
- if object_hook is not None: |
- pairs = object_hook(pairs) |
- return pairs, end + 1 |
- elif nextchar != '"': |
- raise JSONDecodeError("Expecting property name", s, end) |
- end += 1 |
- while True: |
- key, end = scanstring(s, end, encoding, strict) |
- key = memo_get(key, key) |
- |
- # To skip some function call overhead we optimize the fast paths where |
- # the JSON key separator is ": " or just ":". |
- if s[end:end + 1] != ':': |
- end = _w(s, end).end() |
- if s[end:end + 1] != ':': |
- raise JSONDecodeError("Expecting : delimiter", s, end) |
- |
- end += 1 |
- |
- try: |
- if s[end] in _ws: |
- end += 1 |
- if s[end] in _ws: |
- end = _w(s, end + 1).end() |
- except IndexError: |
- pass |
- |
- try: |
- value, end = scan_once(s, end) |
- except StopIteration: |
- raise JSONDecodeError("Expecting object", s, end) |
- pairs.append((key, value)) |
- |
- try: |
- nextchar = s[end] |
- if nextchar in _ws: |
- end = _w(s, end + 1).end() |
- nextchar = s[end] |
- except IndexError: |
- nextchar = '' |
- end += 1 |
- |
- if nextchar == '}': |
- break |
- elif nextchar != ',': |
- raise JSONDecodeError("Expecting , delimiter", s, end - 1) |
- |
- try: |
- nextchar = s[end] |
- if nextchar in _ws: |
- end += 1 |
- nextchar = s[end] |
- if nextchar in _ws: |
- end = _w(s, end + 1).end() |
- nextchar = s[end] |
- except IndexError: |
- nextchar = '' |
- |
- end += 1 |
- if nextchar != '"': |
- raise JSONDecodeError("Expecting property name", s, end - 1) |
- |
- if object_pairs_hook is not None: |
- result = object_pairs_hook(pairs) |
- return result, end |
- pairs = dict(pairs) |
- if object_hook is not None: |
- pairs = object_hook(pairs) |
- return pairs, end |
- |
-def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): |
- values = [] |
- nextchar = s[end:end + 1] |
- if nextchar in _ws: |
- end = _w(s, end + 1).end() |
- nextchar = s[end:end + 1] |
- # Look-ahead for trivial empty array |
- if nextchar == ']': |
- return values, end + 1 |
- _append = values.append |
- while True: |
- try: |
- value, end = scan_once(s, end) |
- except StopIteration: |
- raise JSONDecodeError("Expecting object", s, end) |
- _append(value) |
- nextchar = s[end:end + 1] |
- if nextchar in _ws: |
- end = _w(s, end + 1).end() |
- nextchar = s[end:end + 1] |
- end += 1 |
- if nextchar == ']': |
- break |
- elif nextchar != ',': |
- raise JSONDecodeError("Expecting , delimiter", s, end) |
- |
- try: |
- if s[end] in _ws: |
- end += 1 |
- if s[end] in _ws: |
- end = _w(s, end + 1).end() |
- except IndexError: |
- pass |
- |
- return values, end |
- |
-class JSONDecoder(object): |
- """Simple JSON <http://json.org> decoder |
- |
- Performs the following translations in decoding by default: |
- |
- +---------------+-------------------+ |
- | JSON | Python | |
- +===============+===================+ |
- | object | dict | |
- +---------------+-------------------+ |
- | array | list | |
- +---------------+-------------------+ |
- | string | unicode | |
- +---------------+-------------------+ |
- | number (int) | int, long | |
- +---------------+-------------------+ |
- | number (real) | float | |
- +---------------+-------------------+ |
- | true | True | |
- +---------------+-------------------+ |
- | false | False | |
- +---------------+-------------------+ |
- | null | None | |
- +---------------+-------------------+ |
- |
- It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as |
- their corresponding ``float`` values, which is outside the JSON spec. |
- |
- """ |
- |
- def __init__(self, encoding=None, object_hook=None, parse_float=None, |
- parse_int=None, parse_constant=None, strict=True, |
- object_pairs_hook=None): |
- """ |
- *encoding* determines the encoding used to interpret any |
- :class:`str` objects decoded by this instance (``'utf-8'`` by |
- default). It has no effect when decoding :class:`unicode` objects. |
- |
- Note that currently only encodings that are a superset of ASCII work, |
- strings of other encodings should be passed in as :class:`unicode`. |
- |
- *object_hook*, if specified, will be called with the result of every |
- JSON object decoded and its return value will be used in place of the |
- given :class:`dict`. This can be used to provide custom |
- deserializations (e.g. to support JSON-RPC class hinting). |
- |
- *object_pairs_hook* is an optional function that will be called with |
- the result of any object literal decode with an ordered list of pairs. |
- The return value of *object_pairs_hook* will be used instead of the |
- :class:`dict`. This feature can be used to implement custom decoders |
- that rely on the order that the key and value pairs are decoded (for |
- example, :func:`collections.OrderedDict` will remember the order of |
- insertion). If *object_hook* is also defined, the *object_pairs_hook* |
- takes priority. |
- |
- *parse_float*, if specified, will be called with the string of every |
- JSON float to be decoded. By default, this is equivalent to |
- ``float(num_str)``. This can be used to use another datatype or parser |
- for JSON floats (e.g. :class:`decimal.Decimal`). |
- |
- *parse_int*, if specified, will be called with the string of every |
- JSON int to be decoded. By default, this is equivalent to |
- ``int(num_str)``. This can be used to use another datatype or parser |
- for JSON integers (e.g. :class:`float`). |
- |
- *parse_constant*, if specified, will be called with one of the |
- following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This |
- can be used to raise an exception if invalid JSON numbers are |
- encountered. |
- |
- *strict* controls the parser's behavior when it encounters an |
- invalid control character in a string. The default setting of |
- ``True`` means that unescaped control characters are parse errors, if |
- ``False`` then control characters will be allowed in strings. |
- |
- """ |
- self.encoding = encoding |
- self.object_hook = object_hook |
- self.object_pairs_hook = object_pairs_hook |
- self.parse_float = parse_float or float |
- self.parse_int = parse_int or int |
- self.parse_constant = parse_constant or _CONSTANTS.__getitem__ |
- self.strict = strict |
- self.parse_object = JSONObject |
- self.parse_array = JSONArray |
- self.parse_string = scanstring |
- self.memo = {} |
- self.scan_once = make_scanner(self) |
- |
- def decode(self, s, _w=WHITESPACE.match): |
- """Return the Python representation of ``s`` (a ``str`` or ``unicode`` |
- instance containing a JSON document) |
- |
- """ |
- obj, end = self.raw_decode(s, idx=_w(s, 0).end()) |
- end = _w(s, end).end() |
- if end != len(s): |
- raise JSONDecodeError("Extra data", s, end, len(s)) |
- return obj |
- |
- def raw_decode(self, s, idx=0): |
- """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` |
- beginning with a JSON document) and return a 2-tuple of the Python |
- representation and the index in ``s`` where the document ended. |
- |
- This can be used to decode a JSON document from a string that may |
- have extraneous data at the end. |
- |
- """ |
- try: |
- obj, end = self.scan_once(s, idx) |
- except StopIteration: |
- raise JSONDecodeError("No JSON object could be decoded", s, idx) |
- return obj, end |