Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(212)

Unified Diff: base/json/json_parser.cc

Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Really fix Windows, address comments Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « base/json/json_parser.h ('k') | base/json/json_parser_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: base/json/json_parser.cc
diff --git a/base/json/json_parser.cc b/base/json/json_parser.cc
new file mode 100644
index 0000000000000000000000000000000000000000..de2f55fabfe4fa0fd413f8eb343c5b8b559929b9
--- /dev/null
+++ b/base/json/json_parser.cc
@@ -0,0 +1,972 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/json/json_parser.h"
+
+#include "base/float_util.h"
+#include "base/logging.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/string_number_conversions.h"
+#include "base/string_util.h"
+#include "base/stringprintf.h"
+#include "base/third_party/icu/icu_utf.h"
+#include "base/utf_string_conversion_utils.h"
+#include "base/utf_string_conversions.h"
+#include "base/values.h"
+
+namespace base {
+namespace internal {
+
+namespace {
+
+const int kStackMaxDepth = 100;
+
+const int32 kExtendedASCIIStart = 0x80;
+
+// This and the class below are used to own the JSON input string for when
+// string tokens are stored as StringPiece instead of std::string. This
+// optimization avoids about 2/3rds of string memory copies. The constructor
+// takes the input string and swaps its data into the new instance. The real
+// root value is also Swap()ed into the new instance.
+class DictionaryHiddenRootValue : public base::DictionaryValue {
+ public:
+ DictionaryHiddenRootValue(std::string* json, Value* root) {
+ DCHECK(root->IsType(Value::TYPE_DICTIONARY));
+ DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
+ json->swap(json_);
+ }
+
+ virtual void Swap(DictionaryValue* other) OVERRIDE {
+ DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
+
+ // First deep copy to convert JSONStringValue to std::string and swap that
+ // copy with |other|, which contains the new contents of |this|.
+ scoped_ptr<base::DictionaryValue> copy(DeepCopy());
+ copy->Swap(other);
+
+ // Then erase the contents of the current dictionary and swap in the
+ // new contents, originally from |other|.
+ Clear();
+ json_.clear();
+ DictionaryValue::Swap(copy.get());
+ }
+
+ // Not overriding DictionaryValue::Remove because it just calls through to
+ // the method below.
+
+ virtual bool RemoveWithoutPathExpansion(const std::string& key,
+ Value** out) OVERRIDE {
+ // If the caller won't take ownership of the removed value, just call up.
+ if (!out)
+ return DictionaryValue::RemoveWithoutPathExpansion(key, out);
+
+ DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
+
+ // Otherwise, remove the value while its still "owned" by this and copy it
+ // to convert any JSONStringValues to std::string.
+ Value* out_owned = NULL;
+ if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
+ return false;
+
+ *out = out_owned->DeepCopy();
+ delete out_owned;
+
+ return true;
+ }
+
+ private:
+ std::string json_;
+
+ DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
+};
+
+class ListHiddenRootValue : public base::ListValue {
+ public:
+ ListHiddenRootValue(std::string* json, Value* root) {
+ DCHECK(root->IsType(Value::TYPE_LIST));
+ ListValue::Swap(static_cast<ListValue*>(root));
+ json->swap(json_);
+ }
+
+ virtual void Swap(ListValue* other) OVERRIDE {
+ DVLOG(1) << "Swap()ing a ListValue inefficiently.";
+
+ // First deep copy to convert JSONStringValue to std::string and swap that
+ // copy with |other|, which contains the new contents of |this|.
+ scoped_ptr<base::ListValue> copy(DeepCopy());
+ copy->Swap(other);
+
+ // Then erase the contents of the current list and swap in the new contents,
+ // originally from |other|.
+ Clear();
+ json_.clear();
+ ListValue::Swap(copy.get());
+ }
+
+ virtual bool Remove(size_t index, Value** out) OVERRIDE {
+ // If the caller won't take ownership of the removed value, just call up.
+ if (!out)
+ return ListValue::Remove(index, out);
+
+ DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
+
+ // Otherwise, remove the value while its still "owned" by this and copy it
+ // to convert any JSONStringValues to std::string.
+ Value* out_owned = NULL;
+ if (!ListValue::Remove(index, &out_owned))
+ return false;
+
+ *out = out_owned->DeepCopy();
+ delete out_owned;
+
+ return true;
+ }
+
+ private:
+ std::string json_;
+
+ DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
+};
+
+// A variant on StringValue that uses StringPiece instead of copying the string
+// into the Value. This can only be stored in a child of hidden root (above),
+// otherwise the referenced string will not be guaranteed to outlive it.
+class JSONStringValue : public base::Value {
+ public:
+ explicit JSONStringValue(const base::StringPiece& piece)
+ : Value(TYPE_STRING),
+ string_piece_(piece) {
+ }
+
+ // Value:
+ bool GetAsString(std::string* out_value) const OVERRIDE {
+ string_piece_.CopyToString(out_value);
+ return true;
+ }
+ bool GetAsString(string16* out_value) const OVERRIDE {
+ *out_value = UTF8ToUTF16(string_piece_);
+ return true;
+ }
+ virtual Value* DeepCopy() const OVERRIDE {
+ return Value::CreateStringValue(string_piece_.as_string());
+ }
+ virtual bool Equals(const Value* other) const OVERRIDE {
+ std::string other_string;
+ return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
+ StringPiece(other_string) == string_piece_;
+ }
+
+ private:
+ // The location in the original input stream.
+ base::StringPiece string_piece_;
+
+ DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
+};
+
+// Simple class that checks for maximum recursion/"stack overflow."
+class StackMarker {
+ public:
+ explicit StackMarker(int* depth) : depth_(depth) {
+ ++(*depth_);
+ DCHECK_LE(*depth_, kStackMaxDepth);
+ }
+ ~StackMarker() {
+ --(*depth_);
+ }
+
+ bool IsTooDeep() const {
+ return *depth_ >= kStackMaxDepth;
+ }
+
+ private:
+ int* const depth_;
+
+ DISALLOW_COPY_AND_ASSIGN(StackMarker);
+};
+
+} // namespace
+
+JSONParser::JSONParser(int options)
+ : options_(options),
+ start_pos_(NULL),
+ pos_(NULL),
+ end_pos_(NULL),
+ index_(0),
+ stack_depth_(0),
+ line_number_(0),
+ index_last_line_(0),
+ error_code_(JSONReader::JSON_NO_ERROR),
+ error_line_(0),
+ error_column_(0) {
+}
+
+JSONParser::~JSONParser() {
+}
+
+Value* JSONParser::Parse(const std::string& input) {
+ // TODO(rsesek): Windows has problems with StringPiece/hidden roots. Fix
+ // <http://crbug.com/126107> when my Windows box arrives.
+#if defined(OS_WIN)
+ options_ |= JSON_DETACHABLE_CHILDREN;
+#endif
+
+ std::string input_copy;
+ // If the children of a JSON root can be detached, then hidden roots cannot
+ // be used, so do not bother copying the input because StringPiece will not
+ // be used anywhere.
+ if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
+ input_copy = input;
+ start_pos_ = input_copy.data();
+ } else {
+ start_pos_ = input.data();
+ }
+ pos_ = start_pos_;
+ end_pos_ = start_pos_ + input.length();
+ index_ = 0;
+ line_number_ = 1;
+ index_last_line_ = 0;
+
+ error_code_ = JSONReader::JSON_NO_ERROR;
+ error_line_ = 0;
+ error_column_ = 0;
+
+ // When the input JSON string starts with a UTF-8 Byte-Order-Mark
+ // <0xEF 0xBB 0xBF>, advance the start position to avoid the
+ // ParseNextToken function mis-treating a Unicode BOM as an invalid
+ // character and returning NULL.
+ if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF &&
+ static_cast<uint8>(*(pos_ + 1)) == 0xBB &&
+ static_cast<uint8>(*(pos_ + 2)) == 0xBF) {
+ NextNChars(3);
+ }
+
+ // Parse the first and all subsequent tokens.
+ scoped_ptr<Value> root(ParseNextToken());
+ if (!root.get())
+ return NULL;
+
+ // Make sure the input stream is at an end.
+ if (GetNextToken() != T_END_OF_INPUT) {
+ if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
+ ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
+ return NULL;
+ }
+ }
+
+ // Dictionaries and lists can contain JSONStringValues, so wrap them in a
+ // hidden root.
+ if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
+ if (root->IsType(Value::TYPE_DICTIONARY)) {
+ return new DictionaryHiddenRootValue(&input_copy, root.release());
+ } else if (root->IsType(Value::TYPE_LIST)) {
+ return new ListHiddenRootValue(&input_copy, root.release());
+ } else if (root->IsType(Value::TYPE_STRING)) {
+ // A string type could be a JSONStringValue, but because there's no
+ // corresponding HiddenRootValue, the memory will be lost. Deep copy to
+ // preserve it.
+ return root->DeepCopy();
+ }
+ }
+
+ // All other values can be returned directly.
+ return root.release();
+}
+
+JSONReader::JsonParseError JSONParser::error_code() const {
+ return error_code_;
+}
+
+std::string JSONParser::GetErrorMessage() const {
+ return FormatErrorMessage(error_line_, error_column_,
+ JSONReader::ErrorCodeToString(error_code_));
+}
+
+// StringBuilder ///////////////////////////////////////////////////////////////
+
+JSONParser::StringBuilder::StringBuilder()
+ : pos_(NULL),
+ length_(0),
+ string_(NULL) {
+}
+
+JSONParser::StringBuilder::StringBuilder(const char* pos)
+ : pos_(pos),
+ length_(0),
+ string_(NULL) {
+}
+
+void JSONParser::StringBuilder::Swap(StringBuilder* other) {
+ std::swap(other->string_, string_);
+ std::swap(other->pos_, pos_);
+ std::swap(other->length_, length_);
+}
+
+JSONParser::StringBuilder::~StringBuilder() {
+ delete string_;
+}
+
+void JSONParser::StringBuilder::Append(const char& c) {
+ DCHECK_GE(c, 0);
+ DCHECK_LT(c, 128);
+
+ if (string_)
+ string_->push_back(c);
+ else
+ ++length_;
+}
+
+void JSONParser::StringBuilder::AppendString(const std::string& str) {
+ DCHECK(string_);
+ string_->append(str);
+}
+
+void JSONParser::StringBuilder::Convert() {
+ if (string_)
+ return;
+ string_ = new std::string(pos_, length_);
+}
+
+bool JSONParser::StringBuilder::CanBeStringPiece() const {
+ return !string_;
+}
+
+StringPiece JSONParser::StringBuilder::AsStringPiece() {
+ if (string_)
+ return StringPiece();
+ return StringPiece(pos_, length_);
+}
+
+const std::string& JSONParser::StringBuilder::AsString() {
+ if (!string_)
+ Convert();
+ return *string_;
+}
+
+// JSONParser private //////////////////////////////////////////////////////////
+
+inline bool JSONParser::CanConsume(int length) {
+ return pos_ + length <= end_pos_;
+}
+
+const char* JSONParser::NextChar() {
+ DCHECK(CanConsume(1));
+ ++index_;
+ ++pos_;
+ return pos_;
+}
+
+void JSONParser::NextNChars(int n) {
+ DCHECK(CanConsume(n));
+ index_ += n;
+ pos_ += n;
+}
+
+JSONParser::Token JSONParser::GetNextToken() {
+ EatWhitespaceAndComments();
+ if (!CanConsume(1))
+ return T_END_OF_INPUT;
+
+ switch (*pos_) {
+ case '{':
+ return T_OBJECT_BEGIN;
+ case '}':
+ return T_OBJECT_END;
+ case '[':
+ return T_ARRAY_BEGIN;
+ case ']':
+ return T_ARRAY_END;
+ case '"':
+ return T_STRING;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ case '-':
+ return T_NUMBER;
+ case 't':
+ return T_BOOL_TRUE;
+ case 'f':
+ return T_BOOL_FALSE;
+ case 'n':
+ return T_NULL;
+ case ',':
+ return T_LIST_SEPARATOR;
+ case ':':
+ return T_OBJECT_PAIR_SEPARATOR;
+ default:
+ return T_INVALID_TOKEN;
+ }
+}
+
+void JSONParser::EatWhitespaceAndComments() {
+ while (pos_ < end_pos_) {
+ switch (*pos_) {
+ case '\r':
+ case '\n':
+ index_last_line_ = index_;
+ ++line_number_;
+ // Fall through.
+ case ' ':
+ case '\t':
+ NextChar();
+ break;
+ case '/':
+ if (!EatComment())
+ return;
+ break;
+ default:
+ return;
+ }
+ }
+}
+
+bool JSONParser::EatComment() {
+ if (*pos_ != '/' || !CanConsume(1))
+ return false;
+
+ char next_char = *NextChar();
+ if (next_char == '/') {
+ // Single line comment, read to newline.
+ while (CanConsume(1)) {
+ char next_char = *NextChar();
+ if (next_char == '\n' || next_char == '\r')
+ return true;
+ }
+ } else if (next_char == '*') {
+ // Block comment, read until end marker.
+ while (CanConsume(2)) {
+ if (*NextChar() == '*' && *NextChar() == '/') {
+ // EatWhitespaceAndComments will inspect pos_, which will still be on
+ // the last / of the comment, so advance once more (which may also be
+ // end of input).
+ NextChar();
+ return true;
+ }
+ }
+
+ // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
+ }
+
+ return false;
+}
+
+Value* JSONParser::ParseNextToken() {
+ return ParseToken(GetNextToken());
+}
+
+Value* JSONParser::ParseToken(Token token) {
+ switch (token) {
+ case T_OBJECT_BEGIN:
+ return ConsumeDictionary();
+ case T_ARRAY_BEGIN:
+ return ConsumeList();
+ case T_STRING:
+ return ConsumeString();
+ case T_NUMBER:
+ return ConsumeNumber();
+ case T_BOOL_TRUE:
+ case T_BOOL_FALSE:
+ case T_NULL:
+ return ConsumeLiteral();
+ default:
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
+ return NULL;
+ }
+}
+
+Value* JSONParser::ConsumeDictionary() {
+ if (*pos_ != '{') {
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
+ return NULL;
+ }
+
+ StackMarker depth_check(&stack_depth_);
+ if (depth_check.IsTooDeep()) {
+ ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
+ return NULL;
+ }
+
+ scoped_ptr<DictionaryValue> dict(new DictionaryValue);
+
+ NextChar();
+ Token token = GetNextToken();
+ while (token != T_OBJECT_END) {
+ if (token != T_STRING) {
+ ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
+ return NULL;
+ }
+
+ // First consume the key.
+ StringBuilder key;
+ if (!ConsumeStringRaw(&key)) {
+ return NULL;
+ }
+
+ // Read the separator.
+ NextChar();
+ token = GetNextToken();
+ if (token != T_OBJECT_PAIR_SEPARATOR) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+
+ // The token is the value. Ownership transfers to |dict|.
+ NextChar();
+ Value* value = ParseNextToken();
+ if (!value) {
+ return NULL;
+ }
+
+ dict->SetWithoutPathExpansion(key.AsString(), value);
+
+ NextChar();
+ token = GetNextToken();
+ if (token == T_LIST_SEPARATOR) {
+ NextChar();
+ token = GetNextToken();
+ if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
+ ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
+ return NULL;
+ }
+ } else if (token != T_OBJECT_END) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
+ return NULL;
+ }
+ }
+
+ if (token != T_OBJECT_END)
+ return NULL;
+
+ return dict.release();
+}
+
+Value* JSONParser::ConsumeList() {
+ if (*pos_ != '[') {
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
+ return NULL;
+ }
+
+ StackMarker depth_check(&stack_depth_);
+ if (depth_check.IsTooDeep()) {
+ ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
+ return NULL;
+ }
+
+ scoped_ptr<ListValue> list(new ListValue);
+
+ NextChar();
+ Token token = GetNextToken();
+ while (token != T_ARRAY_END) {
+ Value* item = ParseToken(token);
+ if (!item) {
+ // ReportError from deeper level.
+ return NULL;
+ }
+
+ list->Append(item);
+
+ NextChar();
+ token = GetNextToken();
+ if (token == T_LIST_SEPARATOR) {
+ NextChar();
+ token = GetNextToken();
+ if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
+ ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
+ return NULL;
+ }
+ } else if (token != T_ARRAY_END) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ }
+
+ if (token != T_ARRAY_END)
+ return NULL;
+
+ return list.release();
+}
+
+Value* JSONParser::ConsumeString() {
+ StringBuilder string;
+ if (!ConsumeStringRaw(&string))
+ return NULL;
+
+ // Create the Value representation, either using a hidden root, if configured
+ // to do so, and the string can be represented by StringPiece.
+ if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
+ return new JSONStringValue(string.AsStringPiece());
+ } else {
+ if (string.CanBeStringPiece())
+ string.Convert();
+ return new StringValue(string.AsString());
+ }
+}
+
+bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
+ if (*pos_ != '"') {
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
+ return false;
+ }
+
+ // StringBuilder will internally build a StringPiece unless a UTF-16
+ // conversion occurs, at which point it will perform a copy into a
+ // std::string.
+ StringBuilder string(NextChar());
+
+ int length = end_pos_ - start_pos_;
+ int32 next_char = 0;
+
+ while (CanConsume(1)) {
+ pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement.
+ CBU8_NEXT(start_pos_, index_, length, next_char);
+ if (next_char < 0 || !IsValidCharacter(next_char)) {
+ ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
+ return false;
+ }
+
+ // If this character is an escape sequence...
+ if (next_char == '\\') {
+ // The input string will be adjusted (either by combining the two
+ // characters of an encoded escape sequence, or with a UTF conversion),
+ // so using StringPiece isn't possible -- force a conversion.
+ string.Convert();
+
+ if (!CanConsume(1)) {
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
+ return false;
+ }
+
+ switch (*NextChar()) {
+ // Allowed esape sequences:
+ case 'x': { // UTF-8 sequence.
+ // UTF-8 \x escape sequences are not allowed in the spec, but they
+ // are supported here for backwards-compatiblity with the old parser.
+ if (!CanConsume(2)) {
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
+ return false;
+ }
+
+ int hex_digit = 0;
+ if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
+ return false;
+ }
+ NextChar();
+
+ if (hex_digit < kExtendedASCIIStart)
+ string.Append(hex_digit);
+ else
+ DecodeUTF8(hex_digit, &string);
+ break;
+ }
+ case 'u': { // UTF-16 sequence.
+ // UTF units are of the form \uXXXX.
+ if (!CanConsume(5)) { // 5 being 'u' and four HEX digits.
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
+ return false;
+ }
+
+ // Skip the 'u'.
+ NextChar();
+
+ std::string utf8_units;
+ if (!DecodeUTF16(&utf8_units)) {
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
+ return false;
+ }
+
+ string.AppendString(utf8_units);
+ break;
+ }
+ case '"':
+ string.Append('"');
+ break;
+ case '\\':
+ string.Append('\\');
+ break;
+ case '/':
+ string.Append('/');
+ break;
+ case 'b':
+ string.Append('\b');
+ break;
+ case 'f':
+ string.Append('\f');
+ break;
+ case 'n':
+ string.Append('\n');
+ break;
+ case 'r':
+ string.Append('\r');
+ break;
+ case 't':
+ string.Append('\t');
+ break;
+ case 'v': // Not listed as valid escape sequence in the RFC.
+ string.Append('\v');
+ break;
+ // All other escape squences are illegal.
+ default:
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
+ return false;
+ }
+ } else if (next_char == '"') {
+ --index_; // Rewind by one because of CBU8_NEXT.
+ out->Swap(&string);
+ return true;
+ } else {
+ if (next_char < kExtendedASCIIStart)
+ string.Append(next_char);
+ else
+ DecodeUTF8(next_char, &string);
+ }
+ }
+
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
+ return false;
+}
+
+// Entry is at the first X in \uXXXX.
+bool JSONParser::DecodeUTF16(std::string* dest_string) {
+ if (!CanConsume(4))
+ return false;
+
+ // This is a 32-bit field because the shift operations in the
+ // conversion process below cause MSVC to error about "data loss."
+ // This only stores UTF-16 code units, though.
+ // Consume the UTF-16 code unit, which may be a high surrogate.
+ int code_unit16_high = 0;
+ if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
+ return false;
+
+ // Only add 3, not 4, because at the end of this iteration, the parser has
+ // finished working with the last digit of the UTF sequence, meaning that
+ // the next iteration will advance to the next byte.
+ NextNChars(3);
+
+ // Used to convert the UTF-16 code units to a code point and then to a UTF-8
+ // code unit sequence.
+ char code_point[8] = { 0 };
+ size_t offset = 0;
+
+ // If this is a high surrogate, consume the next code unit to get the
+ // low surrogate.
+ if (CBU16_IS_SURROGATE(code_unit16_high)) {
+ // Make sure this is the high surrogate. If not, it's an encoding
+ // error.
+ if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
+ return false;
+
+ // Make sure that the token has more characters to consume the
+ // lower surrogate.
+ if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits.
+ return false;
+ if (*NextChar() != '\\' || *NextChar() != 'u')
+ return false;
+
+ NextChar(); // Read past 'u'.
+ int code_unit16_low = 0;
+ if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
+ return false;
+
+ NextNChars(3);
+
+ if (!CBU16_IS_TRAIL(code_unit16_low)) {
+ return false;
+ }
+
+ uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high,
+ code_unit16_low);
+ offset = 0;
+ CBU8_APPEND_UNSAFE(code_point, offset, code_unit32);
+ } else {
+ // Not a surrogate.
+ DCHECK(CBU16_IS_SINGLE(code_unit16_high));
+ CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high);
+ }
+
+ dest_string->append(code_point);
+ return true;
+}
+
+void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) {
+ // Anything outside of the basic ASCII plane will need to be decomposed from
+ // int32 to a multi-byte sequence.
+ if (point < kExtendedASCIIStart) {
+ dest->Append(point);
+ } else {
+ char utf8_units[4] = { 0 };
+ int offset = 0;
+ CBU8_APPEND_UNSAFE(utf8_units, offset, point);
+ dest->Convert();
+ dest->AppendString(utf8_units);
+ }
+}
+
+Value* JSONParser::ConsumeNumber() {
+ const char* num_start = pos_;
+ const int start_index = index_;
+ int end_index = start_index;
+
+ if (*pos_ == '-')
+ NextChar();
+
+ if (!ReadInt(false)) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ end_index = index_;
+
+ // The optional fraction part.
+ if (*pos_ == '.') {
+ if (!CanConsume(1)) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ NextChar();
+ if (!ReadInt(true)) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ end_index = index_;
+ }
+
+ // Optional exponent part.
+ if (*pos_ == 'e' || *pos_ == 'E') {
+ NextChar();
+ if (*pos_ == '-' || *pos_ == '+')
+ NextChar();
+ if (!ReadInt(true)) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ end_index = index_;
+ }
+
+ // ReadInt is greedy because numbers have no easily detectable sentinel,
+ // so save off where the parser should be on exit (see Consume invariant at
+ // the top of the header), then make sure the next token is one which is
+ // valid.
+ const char* exit_pos = pos_ - 1;
+ int exit_index = index_ - 1;
+
+ switch (GetNextToken()) {
+ case T_OBJECT_END:
+ case T_ARRAY_END:
+ case T_LIST_SEPARATOR:
+ case T_END_OF_INPUT:
+ break;
+ default:
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+
+ pos_ = exit_pos;
+ index_ = exit_index;
+
+ StringPiece num_string(num_start, end_index - start_index);
+
+ int num_int;
+ if (StringToInt(num_string, &num_int))
+ return Value::CreateIntegerValue(num_int);
+
+ double num_double;
+ if (base::StringToDouble(num_string.as_string(), &num_double) &&
+ IsFinite(num_double)) {
+ return Value::CreateDoubleValue(num_double);
+ }
+
+ return NULL;
+}
+
+bool JSONParser::ReadInt(bool allow_leading_zeros) {
+ char first = *pos_;
+ int len = 0;
+
+ char c = first;
+ while (CanConsume(1) && IsAsciiDigit(c)) {
+ c = *NextChar();
+ ++len;
+ }
+
+ if (len == 0)
+ return false;
+
+ if (!allow_leading_zeros && len > 1 && first == '0')
+ return false;
+
+ return true;
+}
+
+Value* JSONParser::ConsumeLiteral() {
+ switch (*pos_) {
+ case 't': {
+ const char* kTrueLiteral = "true";
+ const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
+ if (!CanConsume(kTrueLen - 1) ||
+ !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ NextNChars(kTrueLen - 1);
+ return Value::CreateBooleanValue(true);
+ }
+ case 'f': {
+ const char* kFalseLiteral = "false";
+ const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
+ if (!CanConsume(kFalseLen - 1) ||
+ !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ NextNChars(kFalseLen - 1);
+ return Value::CreateBooleanValue(false);
+ }
+ case 'n': {
+ const char* kNullLiteral = "null";
+ const int kNullLen = static_cast<int>(strlen(kNullLiteral));
+ if (!CanConsume(kNullLen - 1) ||
+ !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ NextNChars(kNullLen - 1);
+ return Value::CreateNullValue();
+ }
+ default:
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
+ return NULL;
+ }
+}
+
+// static
+bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
+ return strncmp(one, two, len) == 0;
+}
+
+void JSONParser::ReportError(JSONReader::JsonParseError code,
+ int column_adjust) {
+ error_code_ = code;
+ error_line_ = line_number_;
+ error_column_ = index_ - index_last_line_ + column_adjust;
+}
+
+// static
+std::string JSONParser::FormatErrorMessage(int line, int column,
+ const std::string& description) {
+ if (line || column) {
+ return StringPrintf("Line: %i, column: %i, %s",
+ line, column, description.c_str());
+ }
+ return description;
+}
+
+} // namespace internal
+} // namespace base
« no previous file with comments | « base/json/json_parser.h ('k') | base/json/json_parser_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698