Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(324)

Unified Diff: base/json/json_reader.cc

Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Really fix Windows, address comments Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « base/json/json_reader.h ('k') | base/json/json_reader_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: base/json/json_reader.cc
diff --git a/base/json/json_reader.cc b/base/json/json_reader.cc
index 34574788671043c836dc88746810634de6719c32..fb1459b4ceb33aa42225c431888c8f2767a5dc53 100644
--- a/base/json/json_reader.cc
+++ b/base/json/json_reader.cc
@@ -4,73 +4,17 @@
#include "base/json/json_reader.h"
-#include "base/float_util.h"
+#include "base/json/json_parser.h"
#include "base/logging.h"
-#include "base/memory/scoped_ptr.h"
-#include "base/stringprintf.h"
-#include "base/string_number_conversions.h"
-#include "base/string_piece.h"
-#include "base/string_util.h"
-#include "base/third_party/icu/icu_utf.h"
-#include "base/utf_string_conversions.h"
-#include "base/values.h"
-
-namespace {
-
-const char kNullString[] = "null";
-const char kTrueString[] = "true";
-const char kFalseString[] = "false";
-
-const int kStackLimit = 100;
-
-// A helper method for ParseNumberToken. It reads an int from the end of
-// token. The method returns false if there is no valid integer at the end of
-// the token.
-bool ReadInt(base::JSONReader::Token& token, bool can_have_leading_zeros) {
- char first = token.NextChar();
- int len = 0;
-
- // Read in more digits.
- char c = first;
- while ('\0' != c && IsAsciiDigit(c)) {
- ++token.length;
- ++len;
- c = token.NextChar();
- }
- // We need at least 1 digit.
- if (len == 0)
- return false;
-
- if (!can_have_leading_zeros && len > 1 && '0' == first)
- return false;
-
- return true;
-}
-
-// A helper method for ParseStringToken. It reads |digits| hex digits from the
-// token. If the sequence if digits is not valid (contains other characters),
-// the method returns false.
-bool ReadHexDigits(base::JSONReader::Token& token, int digits) {
- for (int i = 1; i <= digits; ++i) {
- char c = *(token.begin + token.length + i);
- if (c == '\0' || !IsHexDigit(c))
- return false;
- }
-
- token.length += digits;
- return true;
-}
-
-} // namespace
namespace base {
-const char* JSONReader::kBadRootElementType =
- "Root value must be an array or object.";
const char* JSONReader::kInvalidEscape =
"Invalid escape sequence.";
const char* JSONReader::kSyntaxError =
"Syntax error.";
+const char* JSONReader::kUnexpectedToken =
+ "Unexpected token.";
const char* JSONReader::kTrailingComma =
"Trailing comma not allowed.";
const char* JSONReader::kTooMuchNesting =
@@ -83,24 +27,27 @@ const char* JSONReader::kUnquotedDictionaryKey =
"Dictionary keys must be quoted.";
JSONReader::JSONReader()
- : start_pos_(NULL),
- json_pos_(NULL),
- end_pos_(NULL),
- stack_depth_(0),
- allow_trailing_comma_(false),
- error_code_(JSON_NO_ERROR),
- error_line_(0),
- error_col_(0) {}
+ : parser_(new internal::JSONParser(JSON_PARSE_RFC)) {
+}
+
+JSONReader::JSONReader(int options)
+ : parser_(new internal::JSONParser(options)) {
+}
+
+JSONReader::~JSONReader() {
+}
// static
Value* JSONReader::Read(const std::string& json) {
- return Read(json, JSON_PARSE_RFC);
+ internal::JSONParser parser(JSON_PARSE_RFC);
+ return parser.Parse(json);
}
// static
Value* JSONReader::Read(const std::string& json,
int options) {
- return ReadAndReturnError(json, options, NULL, NULL);
+ internal::JSONParser parser(options);
+ return parser.Parse(json);
}
// static
@@ -108,16 +55,15 @@ Value* JSONReader::ReadAndReturnError(const std::string& json,
int options,
int* error_code_out,
std::string* error_msg_out) {
- JSONReader reader = JSONReader();
- Value* root = reader.JsonToValue(json, false,
- (options & JSON_ALLOW_TRAILING_COMMAS) != 0);
+ internal::JSONParser parser(options);
+ Value* root = parser.Parse(json);
if (root)
return root;
if (error_code_out)
- *error_code_out = reader.error_code();
+ *error_code_out = parser.error_code();
if (error_msg_out)
- *error_msg_out = reader.GetErrorMessage();
+ *error_msg_out = parser.GetErrorMessage();
return NULL;
}
@@ -127,12 +73,12 @@ std::string JSONReader::ErrorCodeToString(JsonParseError error_code) {
switch (error_code) {
case JSON_NO_ERROR:
return std::string();
- case JSON_BAD_ROOT_ELEMENT_TYPE:
- return kBadRootElementType;
case JSON_INVALID_ESCAPE:
return kInvalidEscape;
case JSON_SYNTAX_ERROR:
return kSyntaxError;
+ case JSON_UNEXPECTED_TOKEN:
+ return kUnexpectedToken;
case JSON_TRAILING_COMMA:
return kTrailingComma;
case JSON_TOO_MUCH_NESTING:
@@ -149,586 +95,16 @@ std::string JSONReader::ErrorCodeToString(JsonParseError error_code) {
}
}
-std::string JSONReader::GetErrorMessage() const {
- return FormatErrorMessage(error_line_, error_col_,
- ErrorCodeToString(error_code_));
-}
-
-Value* JSONReader::JsonToValue(const std::string& json, bool check_root,
- bool allow_trailing_comma) {
- // The input must be in UTF-8.
- if (!IsStringUTF8(json.data())) {
- error_code_ = JSON_UNSUPPORTED_ENCODING;
- return NULL;
- }
-
- start_pos_ = json.data();
- end_pos_ = start_pos_ + json.size();
-
- // When the input JSON string starts with a UTF-8 Byte-Order-Mark (U+FEFF)
- // or <0xEF 0xBB 0xBF>, advance the start position to avoid the
- // JSONReader::BuildValue() function from mis-treating a Unicode BOM as an
- // invalid character and returning NULL.
- if (json.size() >= 3 && static_cast<uint8>(start_pos_[0]) == 0xEF &&
- static_cast<uint8>(start_pos_[1]) == 0xBB &&
- static_cast<uint8>(start_pos_[2]) == 0xBF) {
- start_pos_ += 3;
- }
-
- json_pos_ = start_pos_;
- allow_trailing_comma_ = allow_trailing_comma;
- stack_depth_ = 0;
- error_code_ = JSON_NO_ERROR;
-
- scoped_ptr<Value> root(BuildValue(check_root));
- if (root.get()) {
- if (ParseToken().type == Token::END_OF_INPUT) {
- return root.release();
- } else {
- SetErrorCode(JSON_UNEXPECTED_DATA_AFTER_ROOT, json_pos_);
- }
- }
-
- // Default to calling errors "syntax errors".
- if (error_code_ == 0)
- SetErrorCode(JSON_SYNTAX_ERROR, json_pos_);
-
- return NULL;
-}
-
-// static
-std::string JSONReader::FormatErrorMessage(int line, int column,
- const std::string& description) {
- if (line || column) {
- return base::StringPrintf(
- "Line: %i, column: %i, %s", line, column, description.c_str());
- }
- return description;
+Value* JSONReader::ReadToValue(const std::string& json) {
+ return parser_->Parse(json);
}
-Value* JSONReader::BuildValue(bool is_root) {
- ++stack_depth_;
- if (stack_depth_ > kStackLimit) {
- SetErrorCode(JSON_TOO_MUCH_NESTING, json_pos_);
- return NULL;
- }
-
- Token token = ParseToken();
- // The root token must be an array or an object.
- if (is_root && token.type != Token::OBJECT_BEGIN &&
- token.type != Token::ARRAY_BEGIN) {
- SetErrorCode(JSON_BAD_ROOT_ELEMENT_TYPE, json_pos_);
- return NULL;
- }
-
- scoped_ptr<Value> node;
-
- switch (token.type) {
- case Token::END_OF_INPUT:
- case Token::INVALID_TOKEN:
- return NULL;
-
- case Token::NULL_TOKEN:
- node.reset(Value::CreateNullValue());
- break;
-
- case Token::BOOL_TRUE:
- node.reset(Value::CreateBooleanValue(true));
- break;
-
- case Token::BOOL_FALSE:
- node.reset(Value::CreateBooleanValue(false));
- break;
-
- case Token::NUMBER:
- node.reset(DecodeNumber(token));
- if (!node.get())
- return NULL;
- break;
-
- case Token::STRING:
- node.reset(DecodeString(token));
- if (!node.get())
- return NULL;
- break;
-
- case Token::ARRAY_BEGIN:
- {
- json_pos_ += token.length;
- token = ParseToken();
-
- node.reset(new ListValue());
- while (token.type != Token::ARRAY_END) {
- Value* array_node = BuildValue(false);
- if (!array_node)
- return NULL;
- static_cast<ListValue*>(node.get())->Append(array_node);
-
- // After a list value, we expect a comma or the end of the list.
- token = ParseToken();
- if (token.type == Token::LIST_SEPARATOR) {
- json_pos_ += token.length;
- token = ParseToken();
- // Trailing commas are invalid according to the JSON RFC, but some
- // consumers need the parsing leniency, so handle accordingly.
- if (token.type == Token::ARRAY_END) {
- if (!allow_trailing_comma_) {
- SetErrorCode(JSON_TRAILING_COMMA, json_pos_);
- return NULL;
- }
- // Trailing comma OK, stop parsing the Array.
- break;
- }
- } else if (token.type != Token::ARRAY_END) {
- // Unexpected value after list value. Bail out.
- return NULL;
- }
- }
- if (token.type != Token::ARRAY_END) {
- return NULL;
- }
- break;
- }
-
- case Token::OBJECT_BEGIN:
- {
- json_pos_ += token.length;
- token = ParseToken();
-
- node.reset(new DictionaryValue);
- while (token.type != Token::OBJECT_END) {
- if (token.type != Token::STRING) {
- SetErrorCode(JSON_UNQUOTED_DICTIONARY_KEY, json_pos_);
- return NULL;
- }
- scoped_ptr<Value> dict_key_value(DecodeString(token));
- if (!dict_key_value.get())
- return NULL;
-
- // Convert the key into a wstring.
- std::string dict_key;
- bool success = dict_key_value->GetAsString(&dict_key);
- DCHECK(success);
-
- json_pos_ += token.length;
- token = ParseToken();
- if (token.type != Token::OBJECT_PAIR_SEPARATOR)
- return NULL;
-
- json_pos_ += token.length;
- token = ParseToken();
- Value* dict_value = BuildValue(false);
- if (!dict_value)
- return NULL;
- static_cast<DictionaryValue*>(node.get())->SetWithoutPathExpansion(
- dict_key, dict_value);
-
- // After a key/value pair, we expect a comma or the end of the
- // object.
- token = ParseToken();
- if (token.type == Token::LIST_SEPARATOR) {
- json_pos_ += token.length;
- token = ParseToken();
- // Trailing commas are invalid according to the JSON RFC, but some
- // consumers need the parsing leniency, so handle accordingly.
- if (token.type == Token::OBJECT_END) {
- if (!allow_trailing_comma_) {
- SetErrorCode(JSON_TRAILING_COMMA, json_pos_);
- return NULL;
- }
- // Trailing comma OK, stop parsing the Object.
- break;
- }
- } else if (token.type != Token::OBJECT_END) {
- // Unexpected value after last object value. Bail out.
- return NULL;
- }
- }
- if (token.type != Token::OBJECT_END)
- return NULL;
-
- break;
- }
-
- default:
- // We got a token that's not a value.
- return NULL;
- }
- json_pos_ += token.length;
-
- --stack_depth_;
- return node.release();
+JSONReader::JsonParseError JSONReader::error_code() const {
+ return parser_->error_code();
}
-JSONReader::Token JSONReader::ParseNumberToken() {
- // We just grab the number here. We validate the size in DecodeNumber.
- // According to RFC4627, a valid number is: [minus] int [frac] [exp]
- Token token(Token::NUMBER, json_pos_, 0);
- char c = *json_pos_;
- if ('-' == c) {
- ++token.length;
- c = token.NextChar();
- }
-
- if (!ReadInt(token, false))
- return Token::CreateInvalidToken();
-
- // Optional fraction part
- c = token.NextChar();
- if ('.' == c) {
- ++token.length;
- if (!ReadInt(token, true))
- return Token::CreateInvalidToken();
- c = token.NextChar();
- }
-
- // Optional exponent part
- if ('e' == c || 'E' == c) {
- ++token.length;
- c = token.NextChar();
- if ('-' == c || '+' == c) {
- ++token.length;
- c = token.NextChar();
- }
- if (!ReadInt(token, true))
- return Token::CreateInvalidToken();
- }
-
- return token;
-}
-
-Value* JSONReader::DecodeNumber(const Token& token) {
- const std::string num_string(token.begin, token.length);
-
- int num_int;
- if (StringToInt(num_string, &num_int))
- return Value::CreateIntegerValue(num_int);
-
- double num_double;
- if (StringToDouble(num_string, &num_double) && base::IsFinite(num_double))
- return Value::CreateDoubleValue(num_double);
-
- return NULL;
-}
-
-JSONReader::Token JSONReader::ParseStringToken() {
- Token token(Token::STRING, json_pos_, 1);
- char c = token.NextChar();
- while (json_pos_ + token.length < end_pos_) {
- if ('\\' == c) {
- ++token.length;
- c = token.NextChar();
- // Make sure the escaped char is valid.
- switch (c) {
- case 'x':
- if (!ReadHexDigits(token, 2)) {
- SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length);
- return Token::CreateInvalidToken();
- }
- break;
- case 'u':
- if (!ReadHexDigits(token, 4)) {
- SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length);
- return Token::CreateInvalidToken();
- }
- break;
- case '\\':
- case '/':
- case 'b':
- case 'f':
- case 'n':
- case 'r':
- case 't':
- case 'v':
- case '"':
- break;
- default:
- SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length);
- return Token::CreateInvalidToken();
- }
- } else if ('"' == c) {
- ++token.length;
- return token;
- }
- ++token.length;
- c = token.NextChar();
- }
- return Token::CreateInvalidToken();
-}
-
-Value* JSONReader::DecodeString(const Token& token) {
- std::string decoded_str;
- decoded_str.reserve(token.length - 2);
-
- for (int i = 1; i < token.length - 1; ++i) {
- char c = *(token.begin + i);
- if ('\\' == c) {
- ++i;
- c = *(token.begin + i);
- switch (c) {
- case '"':
- case '/':
- case '\\':
- decoded_str.push_back(c);
- break;
- case 'b':
- decoded_str.push_back('\b');
- break;
- case 'f':
- decoded_str.push_back('\f');
- break;
- case 'n':
- decoded_str.push_back('\n');
- break;
- case 'r':
- decoded_str.push_back('\r');
- break;
- case 't':
- decoded_str.push_back('\t');
- break;
- case 'v':
- decoded_str.push_back('\v');
- break;
-
- case 'x': {
- if (i + 2 >= token.length)
- return NULL;
- int hex_digit = 0;
- if (!HexStringToInt(StringPiece(token.begin + i + 1, 2), &hex_digit))
- return NULL;
- decoded_str.push_back(hex_digit);
- i += 2;
- break;
- }
- case 'u':
- if (!ConvertUTF16Units(token, &i, &decoded_str))
- return NULL;
- break;
-
- default:
- // We should only have valid strings at this point. If not,
- // ParseStringToken didn't do its job.
- NOTREACHED();
- return NULL;
- }
- } else {
- // Not escaped
- decoded_str.push_back(c);
- }
- }
- return Value::CreateStringValue(decoded_str);
-}
-
-bool JSONReader::ConvertUTF16Units(const Token& token,
- int* i,
- std::string* dest_string) {
- if (*i + 4 >= token.length)
- return false;
-
- // This is a 32-bit field because the shift operations in the
- // conversion process below cause MSVC to error about "data loss."
- // This only stores UTF-16 code units, though.
- // Consume the UTF-16 code unit, which may be a high surrogate.
- int code_unit16_high = 0;
- if (!HexStringToInt(StringPiece(token.begin + *i + 1, 4), &code_unit16_high))
- return false;
- *i += 4;
-
- // If this is a high surrogate, consume the next code unit to get the
- // low surrogate.
- int code_unit16_low = 0;
- if (CBU16_IS_SURROGATE(code_unit16_high)) {
- // Make sure this is the high surrogate. If not, it's an encoding
- // error.
- if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
- return false;
-
- // Make sure that the token has more characters to consume the
- // lower surrogate.
- if (*i + 6 >= token.length)
- return false;
- if (*(++(*i) + token.begin) != '\\' || *(++(*i) + token.begin) != 'u')
- return false;
-
- if (!HexStringToInt(StringPiece(token.begin + *i + 1, 4), &code_unit16_low))
- return false;
- *i += 4;
- if (!CBU16_IS_SURROGATE(code_unit16_low) ||
- !CBU16_IS_TRAIL(code_unit16_low)) {
- return false;
- }
- } else if (!CBU16_IS_SINGLE(code_unit16_high)) {
- // If this is not a code point, it's an encoding error.
- return false;
- }
-
- // Convert the UTF-16 code units to a code point and then to a UTF-8
- // code unit sequence.
- char code_point[8] = { 0 };
- size_t offset = 0;
- if (!code_unit16_low) {
- CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high);
- } else {
- uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high,
- code_unit16_low);
- offset = 0;
- CBU8_APPEND_UNSAFE(code_point, offset, code_unit32);
- }
- dest_string->append(code_point);
- return true;
-}
-
-JSONReader::Token JSONReader::ParseToken() {
- EatWhitespaceAndComments();
-
- Token token(Token::INVALID_TOKEN, 0, 0);
- switch (*json_pos_) {
- case '\0':
- token.type = Token::END_OF_INPUT;
- break;
-
- case 'n':
- if (NextStringMatch(kNullString, arraysize(kNullString) - 1))
- token = Token(Token::NULL_TOKEN, json_pos_, 4);
- break;
-
- case 't':
- if (NextStringMatch(kTrueString, arraysize(kTrueString) - 1))
- token = Token(Token::BOOL_TRUE, json_pos_, 4);
- break;
-
- case 'f':
- if (NextStringMatch(kFalseString, arraysize(kFalseString) - 1))
- token = Token(Token::BOOL_FALSE, json_pos_, 5);
- break;
-
- case '[':
- token = Token(Token::ARRAY_BEGIN, json_pos_, 1);
- break;
-
- case ']':
- token = Token(Token::ARRAY_END, json_pos_, 1);
- break;
-
- case ',':
- token = Token(Token::LIST_SEPARATOR, json_pos_, 1);
- break;
-
- case '{':
- token = Token(Token::OBJECT_BEGIN, json_pos_, 1);
- break;
-
- case '}':
- token = Token(Token::OBJECT_END, json_pos_, 1);
- break;
-
- case ':':
- token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1);
- break;
-
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- case '-':
- token = ParseNumberToken();
- break;
-
- case '"':
- token = ParseStringToken();
- break;
- }
- return token;
-}
-
-void JSONReader::EatWhitespaceAndComments() {
- while (json_pos_ != end_pos_) {
- switch (*json_pos_) {
- case ' ':
- case '\n':
- case '\r':
- case '\t':
- ++json_pos_;
- break;
- case '/':
- // TODO(tc): This isn't in the RFC so it should be a parser flag.
- if (!EatComment())
- return;
- break;
- default:
- // Not a whitespace char, just exit.
- return;
- }
- }
-}
-
-bool JSONReader::EatComment() {
- if ('/' != *json_pos_)
- return false;
-
- char next_char = *(json_pos_ + 1);
- if ('/' == next_char) {
- // Line comment, read until \n or \r
- json_pos_ += 2;
- while (json_pos_ != end_pos_) {
- switch (*json_pos_) {
- case '\n':
- case '\r':
- ++json_pos_;
- return true;
- default:
- ++json_pos_;
- }
- }
- } else if ('*' == next_char) {
- // Block comment, read until */
- json_pos_ += 2;
- while (json_pos_ != end_pos_) {
- if ('*' == *json_pos_ && '/' == *(json_pos_ + 1)) {
- json_pos_ += 2;
- return true;
- }
- ++json_pos_;
- }
- } else {
- return false;
- }
- return true;
-}
-
-bool JSONReader::NextStringMatch(const char* str, size_t length) {
- return strncmp(json_pos_, str, length) == 0;
-}
-
-void JSONReader::SetErrorCode(JsonParseError error,
- const char* error_pos) {
- int line_number = 1;
- int column_number = 1;
-
- // Figure out the line and column the error occured at.
- for (const char* pos = start_pos_; pos != error_pos; ++pos) {
- if (pos > end_pos_) {
- NOTREACHED();
- return;
- }
-
- if (*pos == '\n') {
- ++line_number;
- column_number = 1;
- } else {
- ++column_number;
- }
- }
-
- error_line_ = line_number;
- error_col_ = column_number;
- error_code_ = error;
+std::string JSONReader::GetErrorMessage() const {
+ return parser_->GetErrorMessage();
}
} // namespace base
« no previous file with comments | « base/json/json_reader.h ('k') | base/json/json_reader_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698