base/json/json_parser.cc - Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string.

Unified Diff: base/json/json_parser.cc

Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: '' Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: base/json/json_parser.cc

diff --git a/base/json/json_parser.cc b/base/json/json_parser.cc

new file mode 100644

index 0000000000000000000000000000000000000000..960ca6f9d718cec64305b46761153671e6b3d4d8

--- /dev/null

+++ b/base/json/json_parser.cc

@@ -0,0 +1,882 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "base/json/json_parser.h"

+#include "base/float_util.h"

+#include "base/logging.h"

+#include "base/memory/scoped_ptr.h"

+#include "base/stringprintf.h"

+#include "base/string_number_conversions.h"

Mark Mentovai 2012/04/19 16:40:11 '_' < 'p' I would have stayed quiet about it, but

Robert Sesek 2012/05/03 15:34:52 Done.

+#include "base/string_util.h"

+#include "base/third_party/icu/icu_utf.h"

+#include "base/utf_string_conversion_utils.h"

+#include "base/utf_string_conversions.h"

+#include "base/values.h"

+namespace {

+const int kStackMaxDepth = 100;

+const int32 kExtendedASCIIStart = 0x80;

+// This and the class below are used to own the JSON input string for when

+// string tokens are stored as StringPiece instead of std::string. This

+// optimization avoids about 2/3rds of string memory copies. The constructor

+// takes the input string and swaps its data into the new instance. The real

+// root value is also Swap()ed into the new instance.

+class DictionaryHiddenRootValue : public base::DictionaryValue {

+ public:

+ DictionaryHiddenRootValue(std::string* json, Value* root) {

+ CHECK(root->IsType(Value::TYPE_DICTIONARY));

Mark Mentovai 2012/04/19 16:40:11 Can this be a DCHECK? Same on line 50.

Robert Sesek 2012/05/03 15:34:52 Done.

+ Swap(static_cast<DictionaryValue*>(root));

+ json->swap(json_);

+ }

+ virtual base::DictionaryValue* DeepCopy() const OVERRIDE {

+ scoped_ptr<base::Value> data(base::DictionaryValue::DeepCopy());

+ std::string json(json_);

+ return new DictionaryHiddenRootValue(&json, data.get());

+ }

+ private:

+ std::string json_;

Mark Mentovai 2012/04/19 16:40:11 DISALLOW_COPY_AND_ASSIGN? Maybe not. Don’t know ho

Robert Sesek 2012/05/03 15:34:52 Done.

+};

+class ListHiddenRootValue : public base::ListValue {

+ public:

+ ListHiddenRootValue(std::string* json, Value* root) {

+ CHECK(root->IsType(Value::TYPE_LIST));

+ Swap(static_cast<ListValue*>(root));

+ json->swap(json_);

+ }

+ virtual base::ListValue* DeepCopy() const OVERRIDE {

+ scoped_ptr<base::Value> data(base::ListValue::DeepCopy());

+ std::string json(json_);

+ return new ListHiddenRootValue(&json, data.get());

+ }

+ private:

+ std::string json_;

+};

+// A variant on StringValue that uses StringPiece instead of copying the string

+// into the Value. This can only be stored in a child of hidden root (above),

+// otherwise the referenced string will not be guaranteed to outlive it.

+class JSONStringValue : public base::Value {

+ public:

+ explicit JSONStringValue(const base::StringPiece& piece)

+ : Value(TYPE_STRING),

+ string_piece_(piece) {

+ }

+ // Value:

+ bool GetAsString(std::string* out_value) const OVERRIDE {

+ string_piece_.CopyToString(out_value);

+ return true;

+ }

+ bool GetAsString(string16* out_value) const OVERRIDE {

+ *out_value = UTF8ToUTF16(string_piece_);

+ return true;

+ }

+ virtual Value* DeepCopy() const OVERRIDE {

+ return Value::CreateStringValue(string_piece_.as_string());

+ }

+ virtual bool Equals(const Value* other) const OVERRIDE {

+ std::string other_string;

+ return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&

Mark Mentovai 2012/04/19 16:40:11 Isn’t it cheaper to do the comparison of other and

Robert Sesek 2012/05/03 15:34:52 Yes it is. Good idea.

+ other_string == string_piece_.as_string();

+ }

+ private:

+ // The location in the original input stream.

+ base::StringPiece string_piece_;

Mark Mentovai 2012/04/19 16:40:11 DISALLOW_COPY_AND_ASSIGN? Maybe not. Don’t know ho

Robert Sesek 2012/05/03 15:34:52 Done.

+};

+// Simple class that checks for maximum recursion/"stack overflow."

+class StackMarker {

+ public:

+ StackMarker(int* depth) : depth_(depth) {

Mark Mentovai 2012/04/19 16:40:11 explicit

Robert Sesek 2012/05/03 15:34:52 Done.

+ ++(*depth_);

+ }

+ ~StackMarker() {

+ --(*depth_);

+ }

+ bool IsTooDeep() {

Mark Mentovai 2012/04/19 16:40:11 Can be const.

Robert Sesek 2012/05/03 15:34:52 Done.

+ return *depth_ >= kStackMaxDepth;

+ }

+ private:

+ int* depth_;

Mark Mentovai 2012/04/19 16:40:11 DISALLOW_COPY_AND_ASSIGN

Mark Mentovai 2012/04/19 16:40:11 The pointer (not value) can be const, which is nic

Robert Sesek 2012/05/03 15:34:52 Done.

+};

+} // namespace

+namespace base {

+namespace internal {

+JSONParser::JSONParser(int options)

+ : options_(options),

+ start_pos_(NULL),

+ pos_(0),

+ index_(0),

Mark Mentovai 2012/04/19 16:40:11 Is end_pos_ missing intentionally?

Robert Sesek 2012/05/03 15:34:52 Nope.

+ stack_depth_(0),

+ line_number_(0),

+ index_last_line_(0),

+ error_code_(JSONReader::JSON_NO_ERROR),

+ error_line_(0),

+ error_column_(0) {

+JSONParser::~JSONParser() {

+Value* JSONParser::Parse(const std::string& input) {

Mark Mentovai 2012/04/19 16:40:11 Perhaps this can even accept StringPiece input, po

+ std::string input_copy;

+ // If the children of a JSON root can be detached, then hidden roots cannot

+ // be used, so do not bother copying the input because StringPiece will not

+ // be used anywhere.

+ if (!(options_ & JSON_DETACHABLE_CHILDREN)) {

+ input_copy = input;

+ start_pos_ = input_copy.data();

+ } else {

+ start_pos_ = input.data();

+ }

+ pos_ = start_pos_;

+ end_pos_ = start_pos_ + input.length();

+ index_ = 0;

+ line_number_ = 1;

+ index_last_line_ = 0;

+ error_code_ = JSONReader::JSON_NO_ERROR;

+ error_line_ = 0;

+ error_column_ = 0;

+ // When the input JSON string starts with a UTF-8 Byte-Order-Mark (U+FEFF)

Mark Mentovai 2012/04/19 16:40:11 That’s a UTF-16 BOM. Your comment makes it sound

Robert Sesek 2012/05/03 15:34:52 Isn't U+FEFF the BOM code point, which in UTF-16 i

+ // or <0xEF 0xBB 0xBF>, advance the start position to avoid the

+ // ParseNextToken function mis-treating a Unicode BOM as an invalid

+ // character and returning NULL.

+ if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF &&

+ static_cast<uint8>(*(pos_ + 1)) == 0xBB &&

+ static_cast<uint8>(*(pos_ + 2)) == 0xBF) {

+ NextNChars(3);

+ }

+ // Parse the first and all subsequent tokens.

+ scoped_ptr<Value> root(ParseNextToken());

+ if (!root.get())

+ return NULL;

+ // Make sure the input stream is at an end.

+ if (GetNextToken() != T_END_OF_INPUT) {

+ if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {

+ ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);

Mark Mentovai 2012/04/19 16:40:11 What’s the “, 1” doing here? The unexpected data m

+ return NULL;

+ }

+ // Dictionaries and lists can contain JSONStringValues, so wrap them in a

+ // hidden root.

+ if (!(options_ & JSON_DETACHABLE_CHILDREN)) {

+ if (root->IsType(Value::TYPE_DICTIONARY)) {

+ return new DictionaryHiddenRootValue(&input_copy, root.release());

+ } else if (root->IsType(Value::TYPE_LIST)) {

+ return new ListHiddenRootValue(&input_copy, root.release());

+ } else if (root->IsType(Value::TYPE_STRING)) {

+ // A string type could be a JSONStringValue, but because there's no

+ // corresponding HiddenRootValue, the memory will be lost. Deep copy to

+ // preserve it.

+ return root->DeepCopy();

+ }

+ // All other values can be returned directly.

+ return root.release();

+JSONReader::JsonParseError JSONParser::error_code() const {

+ return error_code_;

+std::string JSONParser::GetErrorMessage() const {

+ return FormatErrorMessage(error_line_, error_column_,

+ JSONReader::ErrorCodeToString(error_code_));

+// StringBuilder ///////////////////////////////////////////////////////////////

+JSONParser::StringBuilder::StringBuilder()

+ : pos_(NULL),

+ length_(0),

+ string_(NULL) {

+JSONParser::StringBuilder::StringBuilder(const char* pos)

+ : pos_(pos),

+ length_(0),

+ string_(NULL) {

+void JSONParser::StringBuilder::Swap(StringBuilder* other) {

+ if (other->string_) {

Mark Mentovai 2012/04/19 16:40:12 This implementation seems wrong. What if other->st

Robert Sesek 2012/05/03 15:34:52 Done.

+ std::swap(other->string_, string_);

+ } else {

+ std::swap(other->pos_, pos_);

+ std::swap(other->length_, length_);

+ }

+JSONParser::StringBuilder::~StringBuilder() {

+ delete string_;

+void JSONParser::StringBuilder::Append(const int32& c) {

+ if (string_) {

+ string_->push_back(c);

Mark Mentovai 2012/04/19 16:40:12 The header never said what restrictions were place

+ } else {

+ // A code point is 32 bits/4 bytes, which needs to be mapped to 1-byte

+ // |char|s.

+ if (c < kExtendedASCIIStart) {

+ ++length_;

+ } else if (c < 0x0800) {

+ length_ += 2;

+ } else if (c < 0x1000) {

+ length_ += 3;

+ } else {

+ length_ += 4;

+ }

+void JSONParser::StringBuilder::AppendString(const std::string& str) {

+ DCHECK(string_);

+ string_->append(str);

+void JSONParser::StringBuilder::Convert() {

+ if (string_)

+ return;

+ string_ = new std::string(pos_, length_);

+bool JSONParser::StringBuilder::CanBeStringPiece() {

+ return !string_;

Mark Mentovai 2012/04/19 16:40:12 Oh, so once something is converted to a string, it

Robert Sesek 2012/05/03 15:34:52 Clarified.

+StringPiece JSONParser::StringBuilder::AsStringPiece() {

+ if (string_)

+ return StringPiece();

+ return StringPiece(pos_, length_);

+std::string JSONParser::StringBuilder::AsString() {

+ if (!string_)

+ Convert();

+ return *string_;

Mark Mentovai 2012/04/19 16:40:12 I may have said this in the other file, but if thi

Robert Sesek 2012/05/03 15:34:52 Done.

+// JSONParser private //////////////////////////////////////////////////////////

Mark Mentovai 2012/04/19 16:40:12 Stopping here for lunch.

Robert Sesek 2012/05/03 15:34:52 The flatbreads were good today, yes?

+inline bool JSONParser::CanConsume(int length) {

+ return pos_ + length <= end_pos_;

+const char* JSONParser::NextChar() {

+ DCHECK(CanConsume(1));

+ ++index_;

+ ++pos_;

+ return pos_;

+void JSONParser::NextNChars(int n) {

+ DCHECK(CanConsume(n));

+ index_ += n;

+ pos_ += n;

+JSONParser::Token JSONParser::GetNextToken() {

+ EatWhitespaceAndComments();

+ if (!CanConsume(1))

+ return T_END_OF_INPUT;

+ switch (*pos_) {

+ case '{':

+ return T_OBJECT_BEGIN;

+ case '}':

+ return T_OBJECT_END;

+ case '[':

+ return T_ARRAY_BEGIN;

+ case ']':

+ return T_ARRAY_END;

+ case '"':

+ return T_STRING;

+ case '0':

+ case '1':

+ case '2':

+ case '3':

+ case '4':

+ case '5':

+ case '6':

+ case '7':

+ case '8':

+ case '9':

+ case '-':

+ return T_NUMBER;

+ case 't':

+ return T_BOOL_TRUE;

+ case 'f':

+ return T_BOOL_FALSE;

+ case 'n':

+ return T_NULL;

+ case ',':

+ return T_LIST_SEPARATOR;

+ case ':':

+ return T_OBJECT_PAIR_SEPARATOR;

+ default:

+ return T_INVALID_TOKEN;

+ }

+void JSONParser::EatWhitespaceAndComments() {

+ while (pos_ < end_pos_) {

+ switch (*pos_) {

+ case '\r':

+ case '\n':

+ index_last_line_ = index_;

+ ++line_number_;

+ // Fall through.

+ case ' ':

+ case '\t':

+ NextChar();

+ break;

+ case '/':

+ if (!EatComment())

+ return;

+ break;

+ default:

+ return;

+ }

+bool JSONParser::EatComment() {

+ if (*pos_ != '/' || !CanConsume(1))

+ return false;

+ char next_char = *NextChar();

+ if (next_char == '/') {

+ // Single line comment, read to newline.

+ while (CanConsume(1)) {

+ char next_char = *NextChar();

+ if (next_char == '\n' || next_char == '\r')

+ return true;

+ }

+ } else if (next_char == '*') {

+ // Block comment, read until end marker.

+ while (CanConsume(2)) {

+ if (*NextChar() == '*' && *NextChar() == '/') {

+ // EatWhitespaceAndComments will inspect pos_, which will still be on

+ // the last / of the comment, so advance once more (which may also be

+ // end of input).

+ NextChar();

+ return true;

+ }

+ return false;

+Value* JSONParser::ParseNextToken() {

+ return ParseToken(GetNextToken());

+Value* JSONParser::ParseToken(Token token) {

+ switch (token) {

+ case T_OBJECT_BEGIN:

+ return ConsumeDictionary();

+ case T_ARRAY_BEGIN:

+ return ConsumeList();

+ case T_STRING:

+ return ConsumeString();

+ case T_NUMBER:

+ return ConsumeNumber();

+ case T_BOOL_TRUE:

+ case T_BOOL_FALSE:

+ case T_NULL:

+ return ConsumeLiteral();

+ default:

+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

+ return NULL;

+ }

+Value* JSONParser::ConsumeDictionary() {

+ if (*pos_ != '{') {

+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

+ return NULL;

+ }

+ StackMarker depth_check(&stack_depth_);

+ if (depth_check.IsTooDeep()) {

+ ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);

+ return NULL;

+ }

+ scoped_ptr<DictionaryValue> dict(new DictionaryValue);

+ NextChar();

+ Token token = GetNextToken();

+ while (token != T_OBJECT_END) {

+ if (token != T_STRING) {

+ ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);

+ return NULL;

+ }

+ // First consume the key.

+ StringBuilder key;

+ if (!ConsumeStringRaw(&key)) {

+ return NULL;

+ }

+ // Read the separator.

+ NextChar();

+ token = GetNextToken();

+ if (token != T_OBJECT_PAIR_SEPARATOR) {

+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

+ return NULL;

+ }

+ // The token is the value. Ownership transfers to |dict|.

+ NextChar();

+ Value* value = ParseNextToken();

+ if (!value) {

+ return NULL;

+ }

+ dict->SetWithoutPathExpansion(key.AsString(), value);

+ NextChar();

+ token = GetNextToken();

+ if (token == T_LIST_SEPARATOR) {

+ NextChar();

+ token = GetNextToken();

+ if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {

+ ReportError(JSONReader::JSON_TRAILING_COMMA, 1);

+ return NULL;

+ }

+ } else if (token != T_OBJECT_END) {

+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);

+ return NULL;

+ }

+ if (token != T_OBJECT_END)

+ return NULL;

+ return dict.release();

+Value* JSONParser::ConsumeList() {

+ if (*pos_ != '[') {

+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

+ return NULL;

+ }

+ StackMarker depth_check(&stack_depth_);

+ if (depth_check.IsTooDeep()) {

+ ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);

+ return NULL;

+ }

+ scoped_ptr<ListValue> list(new ListValue);

+ NextChar();

+ Token token = GetNextToken();

+ while (token != T_ARRAY_END) {

+ Value* item = ParseToken(token);

+ if (!item) {

+ // ReportError from deeper level.

+ return NULL;

+ }

+ list->Append(item);

+ NextChar();

+ token = GetNextToken();

+ if (token == T_LIST_SEPARATOR) {

+ NextChar();

+ token = GetNextToken();

+ if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {

+ ReportError(JSONReader::JSON_TRAILING_COMMA, 1);

+ return NULL;

+ }

+ } else if (token != T_ARRAY_END) {

+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

+ return NULL;

+ }

+ if (token != T_ARRAY_END)

+ return NULL;

+ return list.release();

+Value* JSONParser::ConsumeString() {

+ StringBuilder string;

+ if (!ConsumeStringRaw(&string))

+ return NULL;

+ // Create the Value representation, either using a hidden root, if configured

+ // to do so, and the string can be represented by StringPiece.

+ if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {

+ return new JSONStringValue(string.AsStringPiece());

+ } else {

+ if (string.CanBeStringPiece())

+ string.Convert();

+ return new StringValue(string.AsString());

+ }

+bool JSONParser::ConsumeStringRaw(StringBuilder* out) {

+ if (*pos_ != '"') {

+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

+ return false;

+ }

+ // StringBuilder will internally build a StringPiece unless a UTF-16

+ // conversion occurs, at which point it will perform a copy into a

+ // std::string.

+ StringBuilder string(NextChar());

+ int length = end_pos_ - start_pos_;

+ int32 next_char = 0;

+ DCHECK_EQ(*pos_, *(start_pos_ + index_));

+ while (CanConsume(1)) {

+ pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement.

+ CBU8_NEXT(start_pos_, index_, length, next_char);

+ if (next_char < 0 || !IsValidCharacter(next_char)) {

+ ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);

+ return false;

+ }

+ // If this character is an escape sequence...

+ if (next_char == '\\') {

+ // The input string will be adjusted (either by combining the two

+ // characters of an encoded escape sequence, or with a UTF conversion),

+ // so using StringPiece isn't possible -- force a conversion.

+ string.Convert();

+ if (!CanConsume(1)) {

+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);

+ return false;

+ }

+ switch (*NextChar()) {

+ // Allowed esape sequences:

+ case 'x': { // UTF-8 sequence.

+ if (!CanConsume(2)) {

+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);

+ return false;

+ }

+ int hex_digit = 0;

+ if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {

+ ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);

+ return false;

+ }

+ NextChar();

+ string.Append(hex_digit);

+ break;

+ }

+ case 'u': { // UTF-16 sequence.

+ // UTF units are of the form \uXXXX.

+ if (!CanConsume(5)) { // 5 being 'u' and four HEX digits.

+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);

+ return false;

+ }

+ // Skip the 'u'.

+ NextChar();

+ std::string utf8_units;

+ if (!DecodeUTF16(&utf8_units)) {

+ ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);

+ return false;

+ }

+ string.AppendString(utf8_units);

+ break;

+ }

+ case '"':

+ string.Append('"');

+ break;

+ case '\\':

+ string.Append('\\');

+ break;

+ case '/':

+ string.Append('/');

+ break;

+ case 'b':

+ string.Append('\b');

+ break;

+ case 'f':

+ string.Append('\f');

+ break;

+ case 'n':

+ string.Append('\n');

+ break;

+ case 'r':

+ string.Append('\r');

+ break;

+ case 't':

+ string.Append('\t');

+ break;

+ case 'v': // Not listed as valid escape sequence in the RFC.

+ string.Append('\v');

+ break;

+ // All other escape squences are illegal.

+ default:

+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);

+ return false;

+ }

+ } else if (next_char == '"') {

+ --index_; // Rewind by one because of CBU8_NEXT.

+ out->Swap(&string);

+ return true;

+ } else if (next_char < kExtendedASCIIStart) {

+ string.Append(next_char);

+ } else {

+ // Anything outside of the basic ASCII plane will need to be

+ // decomposed from int32 to a multi-byte sequence.

+ char utf8_units[4] = { 0 };

+ int offset = 0;

+ string.Convert();

+ CBU8_APPEND_UNSAFE(utf8_units, offset, next_char);

+ string.AppendString(utf8_units);

+ }

+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);

+ return false;

+// Entry is at the first X in \uXXXX.

+bool JSONParser::DecodeUTF16(std::string* dest_string) {

+ if (!CanConsume(4))

+ return false;

+ // This is a 32-bit field because the shift operations in the

+ // conversion process below cause MSVC to error about "data loss."

+ // This only stores UTF-16 code units, though.

+ // Consume the UTF-16 code unit, which may be a high surrogate.

+ int code_unit16_high = 0;

+ if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))

+ return false;

+ // Only add 3, not 4, because at the end of this iteration, the parser has

+ // finished working with the last digit of the UTF sequence, meaning that

+ // the next spin of the loop will advance to the next byte.

+ NextNChars(3);

+ // If this is a high surrogate, consume the next code unit to get the

+ // low surrogate.

+ int code_unit16_low = 0;

+ if (CBU16_IS_SURROGATE(code_unit16_high)) {

+ // Make sure this is the high surrogate. If not, it's an encoding

+ // error.

+ if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))

+ return false;

+ // Make sure that the token has more characters to consume the

+ // lower surrogate.

+ if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits.

+ return false;

+ if (*NextChar() != '\\' || *NextChar() != 'u')

+ return false;

+ NextChar(); // Read past 'u'.

+ if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))

+ return false;

+ NextNChars(3);

+ if (!CBU16_IS_SURROGATE(code_unit16_low) ||

+ !CBU16_IS_TRAIL(code_unit16_low)) {

+ return false;

+ }

+ } else if (!CBU16_IS_SINGLE(code_unit16_high)) {

+ // If this is not a code point, it's an encoding error.

+ return false;

+ }

+ // Convert the UTF-16 code units to a code point and then to a UTF-8

+ // code unit sequence.

+ char code_point[8] = { 0 };

+ size_t offset = 0;

+ if (!code_unit16_low) {

+ CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high);

+ } else {

+ uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high,

+ code_unit16_low);

+ offset = 0;

+ CBU8_APPEND_UNSAFE(code_point, offset, code_unit32);

+ }

+ dest_string->append(code_point);

+ return true;

+Value* JSONParser::ConsumeNumber() {

+ const char* num_start = pos_;

+ const int start_index = index_;

+ int end_index = start_index;

+ if (*pos_ == '-')

+ NextChar();

+ if (!ReadInt(false)) {

+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

+ return NULL;

+ }

+ end_index = index_;

+ // The optional faction part.

+ if (*pos_ == '.') {

+ if (!CanConsume(1)) {

+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

+ return NULL;

+ }

+ NextChar();

+ if (!ReadInt(true)) {

+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

+ return NULL;

+ }

+ end_index = index_;

+ }

+ // Optional exponent part.

+ if (*pos_ == 'e' || *pos_ == 'E') {

+ NextChar();

+ if (*pos_ == '-' || *pos_ == '+')

+ NextChar();

+ if (!ReadInt(true)) {

+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

+ return NULL;

+ }

+ end_index = index_;

+ }

+ // ReadInt is greedy because numbers have no easily detectable sentinel,

+ // so save off where the parser should be on exit (see Consume invariant at

+ // the top of the header), then make sure the next token is one which is

+ // valid.

+ const char* exit_pos = pos_ - 1;

+ int exit_index = index_ - 1;

+ switch (GetNextToken()) {

+ case T_OBJECT_END:

+ case T_ARRAY_END:

+ case T_LIST_SEPARATOR:

+ case T_END_OF_INPUT:

+ break;

+ default:

+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

+ return NULL;

+ }

+ pos_ = exit_pos;

+ index_ = exit_index;

+ StringPiece num_string(num_start, end_index - start_index);

+ int num_int;

+ if (StringToInt(num_string, &num_int))

+ return Value::CreateIntegerValue(num_int);

+ double num_double;

+ if (base::StringToDouble(num_string.as_string(), &num_double) &&

+ IsFinite(num_double)) {

+ return Value::CreateDoubleValue(num_double);

+ }

+ return NULL;

+bool JSONParser::ReadInt(bool allow_leading_zeros) {

+ char first = *pos_;

+ int len = 0;

+ char c = first;

+ while (CanConsume(1) && IsAsciiDigit(c)) {

+ c = *NextChar();

+ ++len;

+ }

+ if (len == 0)

+ return false;

+ if (!allow_leading_zeros && len > 1 && first == '0')

+ return false;

+ return true;

+Value* JSONParser::ConsumeLiteral() {

+ switch (*pos_) {

+ case 't':

+ if (!CanConsume(3) || !StringsAreEqual(pos_, "true", 4)) {

+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

+ return NULL;

+ }

+ NextNChars(3);

+ return Value::CreateBooleanValue(true);

+ case 'f':

+ if (!CanConsume(4) || !StringsAreEqual(pos_, "false", 5)) {

+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

+ return NULL;

+ }

+ NextNChars(4);

+ return Value::CreateBooleanValue(false);

+ case 'n':

+ if (!CanConsume(3) || !StringsAreEqual(pos_, "null", 4)) {

+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

+ return NULL;

+ }

+ NextNChars(3);

+ return Value::CreateNullValue();

+ default:

+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

+ return NULL;

+ }

+bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {

+ return strncmp(one, two, len) == 0;

+void JSONParser::ReportError(JSONReader::JsonParseError code,

+ int column_adjust) {

+ error_code_ = code;

+ error_line_ = line_number_;

+ error_column_ = index_ - index_last_line_ + column_adjust;

+// static

+std::string JSONParser::FormatErrorMessage(int line, int column,

+ const std::string& description) {

+ if (line || column) {

+ return base::StringPrintf(

tfarina 2012/04/19 22:54:47 nit: base:: here is not necessary as we are in bas

Robert Sesek 2012/05/03 15:34:52 Done.

+ "Line: %i, column: %i, %s", line, column, description.c_str());

+ }

+ return description;

+} // namespace internal

+} // namespace base

« base/json/json_parser.h ('K') | « base/json/json_parser.h ('k') | base/json/json_parser_unittest.cc » ('j') | base/json/json_parser_unittest.cc » ('J')