base/json/json_parser.cc - Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string.

Side by Side Diff: base/json/json_parser.cc

Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: '' Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "base/json/json_parser.h"

	6

	7 #include "base/float_util.h"

	8 #include "base/logging.h"

	9 #include "base/memory/scoped_ptr.h"

	10 #include "base/stringprintf.h"

	11 #include "base/string_number_conversions.h"
	Mark Mentovai 2012/04/19 16:40:11 '_' < 'p' I would have stayed quiet about it, but '_' < 'p' I would have stayed quiet about it, but on lines 14 and 15, you sorted '_' < 's', so this is inconsistent. Robert Sesek 2012/05/03 15:34:52 Done. Show quoted text On 2012/04/19 16:40:11, Mark Mentovai wrote: > '_' < 'p' > > I would have stayed quiet about it, but on lines 14 and 15, you sorted '_' < > 's', so this is inconsistent. Done.
	12 #include "base/string_util.h"

	13 #include "base/third_party/icu/icu_utf.h"

	14 #include "base/utf_string_conversion_utils.h"

	15 #include "base/utf_string_conversions.h"

	16 #include "base/values.h"

	17

	18 namespace {

	19

	20 const int kStackMaxDepth = 100;

	21

	22 const int32 kExtendedASCIIStart = 0x80;

	23

	24 // This and the class below are used to own the JSON input string for when

	25 // string tokens are stored as StringPiece instead of std::string. This

	26 // optimization avoids about 2/3rds of string memory copies. The constructor

	27 // takes the input string and swaps its data into the new instance. The real

	28 // root value is also Swap()ed into the new instance.

	29 class DictionaryHiddenRootValue : public base::DictionaryValue {

	30 public:

	31 DictionaryHiddenRootValue(std::string* json, Value* root) {

	32 CHECK(root->IsType(Value::TYPE_DICTIONARY));
	Mark Mentovai 2012/04/19 16:40:11 Can this be a DCHECK? Same on line 50. Can this be a DCHECK? Same on line 50. Robert Sesek 2012/05/03 15:34:52 Done. Show quoted text On 2012/04/19 16:40:11, Mark Mentovai wrote: > Can this be a DCHECK? > > Same on line 50. Done.
	33 Swap(static_cast<DictionaryValue*>(root));

	34 json->swap(json_);

	35 }

	36

	37 virtual base::DictionaryValue* DeepCopy() const OVERRIDE {

	38 scoped_ptr<base::Value> data(base::DictionaryValue::DeepCopy());

	39 std::string json(json_);

	40 return new DictionaryHiddenRootValue(&json, data.get());

	41 }

	42

	43 private:

	44 std::string json_;
	Mark Mentovai 2012/04/19 16:40:11 DISALLOW_COPY_AND_ASSIGN? Maybe not. Don’t know ho DISALLOW_COPY_AND_ASSIGN? Maybe not. Don’t know how you’re using it yet. Same on line 62. Robert Sesek 2012/05/03 15:34:52 Done. Show quoted text On 2012/04/19 16:40:11, Mark Mentovai wrote: > DISALLOW_COPY_AND_ASSIGN? Maybe not. Don’t know how you’re using it yet. > > Same on line 62. Done.
	45 };

	46

	47 class ListHiddenRootValue : public base::ListValue {

	48 public:

	49 ListHiddenRootValue(std::string* json, Value* root) {

	50 CHECK(root->IsType(Value::TYPE_LIST));

	51 Swap(static_cast<ListValue*>(root));

	52 json->swap(json_);

	53 }

	54

	55 virtual base::ListValue* DeepCopy() const OVERRIDE {

	56 scoped_ptr<base::Value> data(base::ListValue::DeepCopy());

	57 std::string json(json_);

	58 return new ListHiddenRootValue(&json, data.get());

	59 }

	60

	61 private:

	62 std::string json_;

	63 };

	64

	65 // A variant on StringValue that uses StringPiece instead of copying the string

	66 // into the Value. This can only be stored in a child of hidden root (above),

	67 // otherwise the referenced string will not be guaranteed to outlive it.

	68 class JSONStringValue : public base::Value {

	69 public:

	70 explicit JSONStringValue(const base::StringPiece& piece)

	71 : Value(TYPE_STRING),

	72 string_piece_(piece) {

	73 }

	74

	75 // Value:

	76 bool GetAsString(std::string* out_value) const OVERRIDE {

	77 string_piece_.CopyToString(out_value);

	78 return true;

	79 }

	80 bool GetAsString(string16* out_value) const OVERRIDE {

	81 *out_value = UTF8ToUTF16(string_piece_);

	82 return true;

	83 }

	84 virtual Value* DeepCopy() const OVERRIDE {

	85 return Value::CreateStringValue(string_piece_.as_string());

	86 }

	87 virtual bool Equals(const Value* other) const OVERRIDE {

	88 std::string other_string;

	89 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
	Mark Mentovai 2012/04/19 16:40:11 Isn’t it cheaper to do the comparison of other and Isn’t it cheaper to do the comparison of other and string_piece_ as StringPieces instead of forcing everyone to be a std::string? Robert Sesek 2012/05/03 15:34:52 Yes it is. Good idea. Show quoted text On 2012/04/19 16:40:11, Mark Mentovai wrote: > Isn’t it cheaper to do the comparison of other and string_piece_ as StringPieces > instead of forcing everyone to be a std::string? Yes it is. Good idea.
	90 other_string == string_piece_.as_string();

	91 }

	92

	93 private:

	94 // The location in the original input stream.

	95 base::StringPiece string_piece_;
	Mark Mentovai 2012/04/19 16:40:11 DISALLOW_COPY_AND_ASSIGN? Maybe not. Don’t know ho DISALLOW_COPY_AND_ASSIGN? Maybe not. Don’t know how you’re using it yet. Robert Sesek 2012/05/03 15:34:52 Done. Show quoted text On 2012/04/19 16:40:11, Mark Mentovai wrote: > DISALLOW_COPY_AND_ASSIGN? Maybe not. Don’t know how you’re using it yet. Done.
	96 };

	97

	98 // Simple class that checks for maximum recursion/"stack overflow."

	99 class StackMarker {

	100 public:

	101 StackMarker(int* depth) : depth_(depth) {
	Mark Mentovai 2012/04/19 16:40:11 explicit explicit Robert Sesek 2012/05/03 15:34:52 Done. Show quoted text On 2012/04/19 16:40:11, Mark Mentovai wrote: > explicit Done.
	102 ++(*depth_);

	103 }

	104 ~StackMarker() {

	105 --(*depth_);

	106 }

	107

	108 bool IsTooDeep() {
	Mark Mentovai 2012/04/19 16:40:11 Can be const. Can be const. Robert Sesek 2012/05/03 15:34:52 Done. Show quoted text On 2012/04/19 16:40:11, Mark Mentovai wrote: > Can be const. Done.
	109 return *depth_ >= kStackMaxDepth;

	110 }

	111

	112 private:

	113 int* depth_;
	Mark Mentovai 2012/04/19 16:40:11 DISALLOW_COPY_AND_ASSIGN DISALLOW_COPY_AND_ASSIGN Mark Mentovai 2012/04/19 16:40:11 The pointer (not value) can be const, which is nic The pointer (not value) can be const, which is nice in cases like this because it keeps you from accidentally incrementing or decrementing the pointer itself. Robert Sesek 2012/05/03 15:34:52 Done. Show quoted text On 2012/04/19 16:40:11, Mark Mentovai wrote: > DISALLOW_COPY_AND_ASSIGN Done. Robert Sesek 2012/05/03 15:34:52 Done. Show quoted text On 2012/04/19 16:40:11, Mark Mentovai wrote: > The pointer (not value) can be const, which is nice in cases like this because > it keeps you from accidentally incrementing or decrementing the pointer itself. Done.
	114 };

	115

	116 } // namespace

	117

	118 namespace base {

	119 namespace internal {

	120

	121 JSONParser::JSONParser(int options)

	122 : options_(options),

	123 start_pos_(NULL),

	124 pos_(0),

	125 index_(0),
	Mark Mentovai 2012/04/19 16:40:11 Is end_pos_ missing intentionally? Is end_pos_ missing intentionally? Robert Sesek 2012/05/03 15:34:52 Nope. Show quoted text On 2012/04/19 16:40:11, Mark Mentovai wrote: > Is end_pos_ missing intentionally? Nope.
	126 stack_depth_(0),

	127 line_number_(0),

	128 index_last_line_(0),

	129 error_code_(JSONReader::JSON_NO_ERROR),

	130 error_line_(0),

	131 error_column_(0) {

	132 }

	133

	134 JSONParser::~JSONParser() {

	135 }

	136

	137 Value* JSONParser::Parse(const std::string& input) {
	Mark Mentovai 2012/04/19 16:40:11 Perhaps this can even accept StringPiece input, po Perhaps this can even accept StringPiece input, potentially saving another copy (depending on the callers).
	138 std::string input_copy;

	139 // If the children of a JSON root can be detached, then hidden roots cannot

	140 // be used, so do not bother copying the input because StringPiece will not

	141 // be used anywhere.

	142 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {

	143 input_copy = input;

	144 start_pos_ = input_copy.data();

	145 } else {

	146 start_pos_ = input.data();

	147 }

	148 pos_ = start_pos_;

	149 end_pos_ = start_pos_ + input.length();

	150 index_ = 0;

	151 line_number_ = 1;

	152 index_last_line_ = 0;

	153

	154 error_code_ = JSONReader::JSON_NO_ERROR;

	155 error_line_ = 0;

	156 error_column_ = 0;

	157

	158 // When the input JSON string starts with a UTF-8 Byte-Order-Mark (U+FEFF)
	Mark Mentovai 2012/04/19 16:40:11 That’s a UTF-16 BOM. Your comment makes it sound That’s a UTF-16 BOM. Your comment makes it sound like this will accept a UTF-16 BOM at the beginning, but that’s not true. (And if it were true, and it accepted a UTF-16 BOM and then continued to parse UTF-8, it’d be pretty weird.) Robert Sesek 2012/05/03 15:34:52 Isn't U+FEFF the BOM code point, which in UTF-16 i Show quoted text On 2012/04/19 16:40:11, Mark Mentovai wrote: > That’s a UTF-16 BOM. > > Your comment makes it sound like this will accept a UTF-16 BOM at the beginning, > but that’s not true. (And if it were true, and it accepted a UTF-16 BOM and then > continued to parse UTF-8, it’d be pretty weird.) Isn't U+FEFF the BOM code point, which in UTF-16 is a single unit that's the same as the point, but in UTF-8 is those three? That's what I was trying to say, but I can see why that'd be confusing, so fixed.
	159 // or <0xEF 0xBB 0xBF>, advance the start position to avoid the

	160 // ParseNextToken function mis-treating a Unicode BOM as an invalid

	161 // character and returning NULL.

	162 if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF &&

	163 static_cast<uint8>(*(pos_ + 1)) == 0xBB &&

	164 static_cast<uint8>(*(pos_ + 2)) == 0xBF) {

	165 NextNChars(3);

	166 }

	167

	168 // Parse the first and all subsequent tokens.

	169 scoped_ptr<Value> root(ParseNextToken());

	170 if (!root.get())

	171 return NULL;

	172

	173 // Make sure the input stream is at an end.

	174 if (GetNextToken() != T_END_OF_INPUT) {

	175 if (!CanConsume(1) \|\| (NextChar() && GetNextToken() != T_END_OF_INPUT)) {

	176 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
	Mark Mentovai 2012/04/19 16:40:11 What’s the “, 1” doing here? The unexpected data m What’s the “, 1” doing here? The unexpected data might not be one column after what ends the expected root.
	177 return NULL;

	178 }

	179 }

	180

	181 // Dictionaries and lists can contain JSONStringValues, so wrap them in a

	182 // hidden root.

	183 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {

	184 if (root->IsType(Value::TYPE_DICTIONARY)) {

	185 return new DictionaryHiddenRootValue(&input_copy, root.release());

	186 } else if (root->IsType(Value::TYPE_LIST)) {

	187 return new ListHiddenRootValue(&input_copy, root.release());

	188 } else if (root->IsType(Value::TYPE_STRING)) {

	189 // A string type could be a JSONStringValue, but because there's no

	190 // corresponding HiddenRootValue, the memory will be lost. Deep copy to

	191 // preserve it.

	192 return root->DeepCopy();

	193 }

	194 }

	195

	196 // All other values can be returned directly.

	197 return root.release();

	198 }

	199

	200 JSONReader::JsonParseError JSONParser::error_code() const {

	201 return error_code_;

	202 }

	203

	204 std::string JSONParser::GetErrorMessage() const {

	205 return FormatErrorMessage(error_line_, error_column_,

	206 JSONReader::ErrorCodeToString(error_code_));

	207 }

	208

	209 // StringBuilder ///////////////////////////////////////////////////////////////

	210

	211 JSONParser::StringBuilder::StringBuilder()

	212 : pos_(NULL),

	213 length_(0),

	214 string_(NULL) {

	215 }

	216

	217 JSONParser::StringBuilder::StringBuilder(const char* pos)

	218 : pos_(pos),

	219 length_(0),

	220 string_(NULL) {

	221 }

	222

	223 void JSONParser::StringBuilder::Swap(StringBuilder* other) {

	224 if (other->string_) {
	Mark Mentovai 2012/04/19 16:40:12 This implementation seems wrong. What if other->st This implementation seems wrong. What if other->string_ is set, string_ is not, and pos_ and length_ are set? Shouldn’t you just be swapping all three fields without regard to whether other->string_ is set? Robert Sesek 2012/05/03 15:34:52 Done. Show quoted text On 2012/04/19 16:40:12, Mark Mentovai wrote: > This implementation seems wrong. What if other->string_ is set, string_ is not, > and pos_ and length_ are set? > > Shouldn’t you just be swapping all three fields without regard to whether > other->string_ is set? Done.
	225 std::swap(other->string_, string_);

	226 } else {

	227 std::swap(other->pos_, pos_);

	228 std::swap(other->length_, length_);

	229 }

	230 }

	231

	232 JSONParser::StringBuilder::~StringBuilder() {

	233 delete string_;

	234 }

	235

	236 void JSONParser::StringBuilder::Append(const int32& c) {

	237 if (string_) {

	238 string_->push_back(c);
	Mark Mentovai 2012/04/19 16:40:12 The header never said what restrictions were place The header never said what restrictions were placed on the value of “c”, but using an int32 and not having any checks around the value, not to mention the “else” branch of this conditional, seems to imply that you want to accept arbitrary Unicode code points. You can’t push a code point (int32) onto a std::string (effectively char*) like this. Shouldn’t you be pushing one to four characters onto the string depending on the value of c, like you do below? Shouldn’t you be UTF-8-encoding c?
	239 } else {

	240 // A code point is 32 bits/4 bytes, which needs to be mapped to 1-byte

	241 // \|char\|s.

	242 if (c < kExtendedASCIIStart) {

	243 ++length_;

	244 } else if (c < 0x0800) {

	245 length_ += 2;

	246 } else if (c < 0x1000) {

	247 length_ += 3;

	248 } else {

	249 length_ += 4;

	250 }

	251 }

	252 }

	253

	254 void JSONParser::StringBuilder::AppendString(const std::string& str) {

	255 DCHECK(string_);

	256 string_->append(str);

	257 }

	258

	259 void JSONParser::StringBuilder::Convert() {

	260 if (string_)

	261 return;

	262 string_ = new std::string(pos_, length_);

	263 }

	264

	265 bool JSONParser::StringBuilder::CanBeStringPiece() {

	266 return !string_;
	Mark Mentovai 2012/04/19 16:40:12 Oh, so once something is converted to a string, it Oh, so once something is converted to a string, it can never be a StringPiece again? That wasn’t clear to me based on the header. Robert Sesek 2012/05/03 15:34:52 Clarified. Show quoted text On 2012/04/19 16:40:12, Mark Mentovai wrote: > Oh, so once something is converted to a string, it can never be a StringPiece > again? That wasn’t clear to me based on the header. Clarified.
	267 }

	268

	269 StringPiece JSONParser::StringBuilder::AsStringPiece() {

	270 if (string_)

	271 return StringPiece();

	272 return StringPiece(pos_, length_);

	273 }

	274

	275 std::string JSONParser::StringBuilder::AsString() {

	276 if (!string_)

	277 Convert();

	278 return *string_;
	Mark Mentovai 2012/04/19 16:40:12 I may have said this in the other file, but if thi I may have said this in the other file, but if this can return a const&, that’d be better. Robert Sesek 2012/05/03 15:34:52 Done. Show quoted text On 2012/04/19 16:40:12, Mark Mentovai wrote: > I may have said this in the other file, but if this can return a const&, that’d > be better. Done.
	279 }

	280

	281 // JSONParser private //////////////////////////////////////////////////////////
	Mark Mentovai 2012/04/19 16:40:12 Stopping here for lunch. Stopping here for lunch. Robert Sesek 2012/05/03 15:34:52 The flatbreads were good today, yes? Show quoted text On 2012/04/19 16:40:12, Mark Mentovai wrote: > Stopping here for lunch. The flatbreads were good today, yes?
	282

	283 inline bool JSONParser::CanConsume(int length) {

	284 return pos_ + length <= end_pos_;

	285 }

	286

	287 const char* JSONParser::NextChar() {

	288 DCHECK(CanConsume(1));

	289 ++index_;

	290 ++pos_;

	291 return pos_;

	292 }

	293

	294 void JSONParser::NextNChars(int n) {

	295 DCHECK(CanConsume(n));

	296 index_ += n;

	297 pos_ += n;

	298 }

	299

	300 JSONParser::Token JSONParser::GetNextToken() {

	301 EatWhitespaceAndComments();

	302 if (!CanConsume(1))

	303 return T_END_OF_INPUT;

	304

	305 switch (*pos_) {

	306 case '{':

	307 return T_OBJECT_BEGIN;

	308 case '}':

	309 return T_OBJECT_END;

	310 case '[':

	311 return T_ARRAY_BEGIN;

	312 case ']':

	313 return T_ARRAY_END;

	314 case '"':

	315 return T_STRING;

	316 case '0':

	317 case '1':

	318 case '2':

	319 case '3':

	320 case '4':

	321 case '5':

	322 case '6':

	323 case '7':

	324 case '8':

	325 case '9':

	326 case '-':

	327 return T_NUMBER;

	328 case 't':

	329 return T_BOOL_TRUE;

	330 case 'f':

	331 return T_BOOL_FALSE;

	332 case 'n':

	333 return T_NULL;

	334 case ',':

	335 return T_LIST_SEPARATOR;

	336 case ':':

	337 return T_OBJECT_PAIR_SEPARATOR;

	338 default:

	339 return T_INVALID_TOKEN;

	340 }

	341 }

	342

	343 void JSONParser::EatWhitespaceAndComments() {

	344 while (pos_ < end_pos_) {

	345 switch (*pos_) {

	346 case '\r':

	347 case '\n':

	348 index_last_line_ = index_;

	349 ++line_number_;

	350 // Fall through.

	351 case ' ':

	352 case '\t':

	353 NextChar();

	354 break;

	355 case '/':

	356 if (!EatComment())

	357 return;

	358 break;

	359 default:

	360 return;

	361 }

	362 }

	363 }

	364

	365 bool JSONParser::EatComment() {

	366 if (*pos_ != '/' \|\| !CanConsume(1))

	367 return false;

	368

	369 char next_char = *NextChar();

	370 if (next_char == '/') {

	371 // Single line comment, read to newline.

	372 while (CanConsume(1)) {

	373 char next_char = *NextChar();

	374 if (next_char == '\n' \|\| next_char == '\r')

	375 return true;

	376 }

	377 } else if (next_char == '*') {

	378 // Block comment, read until end marker.

	379 while (CanConsume(2)) {

	380 if (NextChar() == '' && *NextChar() == '/') {

	381 // EatWhitespaceAndComments will inspect pos_, which will still be on

	382 // the last / of the comment, so advance once more (which may also be

	383 // end of input).

	384 NextChar();

	385 return true;

	386 }

	387 }

	388 }

	389

	390 return false;

	391 }

	392

	393 Value* JSONParser::ParseNextToken() {

	394 return ParseToken(GetNextToken());

	395 }

	396

	397 Value* JSONParser::ParseToken(Token token) {

	398 switch (token) {

	399 case T_OBJECT_BEGIN:

	400 return ConsumeDictionary();

	401 case T_ARRAY_BEGIN:

	402 return ConsumeList();

	403 case T_STRING:

	404 return ConsumeString();

	405 case T_NUMBER:

	406 return ConsumeNumber();

	407 case T_BOOL_TRUE:

	408 case T_BOOL_FALSE:

	409 case T_NULL:

	410 return ConsumeLiteral();

	411 default:

	412 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	413 return NULL;

	414 }

	415 }

	416

	417 Value* JSONParser::ConsumeDictionary() {

	418 if (*pos_ != '{') {

	419 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	420 return NULL;

	421 }

	422

	423 StackMarker depth_check(&stack_depth_);

	424 if (depth_check.IsTooDeep()) {

	425 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);

	426 return NULL;

	427 }

	428

	429 scoped_ptr<DictionaryValue> dict(new DictionaryValue);

	430

	431 NextChar();

	432 Token token = GetNextToken();

	433 while (token != T_OBJECT_END) {

	434 if (token != T_STRING) {

	435 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);

	436 return NULL;

	437 }

	438

	439 // First consume the key.

	440 StringBuilder key;

	441 if (!ConsumeStringRaw(&key)) {

	442 return NULL;

	443 }

	444

	445 // Read the separator.

	446 NextChar();

	447 token = GetNextToken();

	448 if (token != T_OBJECT_PAIR_SEPARATOR) {

	449 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	450 return NULL;

	451 }

	452

	453 // The token is the value. Ownership transfers to \|dict\|.

	454 NextChar();

	455 Value* value = ParseNextToken();

	456 if (!value) {

	457 return NULL;

	458 }

	459

	460 dict->SetWithoutPathExpansion(key.AsString(), value);

	461

	462 NextChar();

	463 token = GetNextToken();

	464 if (token == T_LIST_SEPARATOR) {

	465 NextChar();

	466 token = GetNextToken();

	467 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {

	468 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);

	469 return NULL;

	470 }

	471 } else if (token != T_OBJECT_END) {

	472 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);

	473 return NULL;

	474 }

	475 }

	476

	477 if (token != T_OBJECT_END)

	478 return NULL;

	479

	480 return dict.release();

	481 }

	482

	483 Value* JSONParser::ConsumeList() {

	484 if (*pos_ != '[') {

	485 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	486 return NULL;

	487 }

	488

	489 StackMarker depth_check(&stack_depth_);

	490 if (depth_check.IsTooDeep()) {

	491 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);

	492 return NULL;

	493 }

	494

	495 scoped_ptr<ListValue> list(new ListValue);

	496

	497 NextChar();

	498 Token token = GetNextToken();

	499 while (token != T_ARRAY_END) {

	500 Value* item = ParseToken(token);

	501 if (!item) {

	502 // ReportError from deeper level.

	503 return NULL;

	504 }

	505

	506 list->Append(item);

	507

	508 NextChar();

	509 token = GetNextToken();

	510 if (token == T_LIST_SEPARATOR) {

	511 NextChar();

	512 token = GetNextToken();

	513 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {

	514 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);

	515 return NULL;

	516 }

	517 } else if (token != T_ARRAY_END) {

	518 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	519 return NULL;

	520 }

	521 }

	522

	523 if (token != T_ARRAY_END)

	524 return NULL;

	525

	526 return list.release();

	527 }

	528

	529 Value* JSONParser::ConsumeString() {

	530 StringBuilder string;

	531 if (!ConsumeStringRaw(&string))

	532 return NULL;

	533

	534 // Create the Value representation, either using a hidden root, if configured

	535 // to do so, and the string can be represented by StringPiece.

	536 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {

	537 return new JSONStringValue(string.AsStringPiece());

	538 } else {

	539 if (string.CanBeStringPiece())

	540 string.Convert();

	541 return new StringValue(string.AsString());

	542 }

	543 }

	544

	545 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {

	546 if (*pos_ != '"') {

	547 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	548 return false;

	549 }

	550

	551 // StringBuilder will internally build a StringPiece unless a UTF-16

	552 // conversion occurs, at which point it will perform a copy into a

	553 // std::string.

	554 StringBuilder string(NextChar());

	555

	556 int length = end_pos_ - start_pos_;

	557 int32 next_char = 0;

	558

	559 DCHECK_EQ(pos_, (start_pos_ + index_));

	560

	561 while (CanConsume(1)) {

	562 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement.

	563 CBU8_NEXT(start_pos_, index_, length, next_char);

	564 if (next_char < 0 \|\| !IsValidCharacter(next_char)) {

	565 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);

	566 return false;

	567 }

	568

	569 // If this character is an escape sequence...

	570 if (next_char == '\\') {

	571 // The input string will be adjusted (either by combining the two

	572 // characters of an encoded escape sequence, or with a UTF conversion),

	573 // so using StringPiece isn't possible -- force a conversion.

	574 string.Convert();

	575

	576 if (!CanConsume(1)) {

	577 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);

	578 return false;

	579 }

	580

	581 switch (*NextChar()) {

	582 // Allowed esape sequences:

	583 case 'x': { // UTF-8 sequence.

	584 if (!CanConsume(2)) {

	585 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);

	586 return false;

	587 }

	588

	589 int hex_digit = 0;

	590 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {

	591 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);

	592 return false;

	593 }

	594 NextChar();

	595

	596 string.Append(hex_digit);

	597 break;

	598 }

	599 case 'u': { // UTF-16 sequence.

	600 // UTF units are of the form \uXXXX.

	601 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits.

	602 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);

	603 return false;

	604 }

	605

	606 // Skip the 'u'.

	607 NextChar();

	608

	609 std::string utf8_units;

	610 if (!DecodeUTF16(&utf8_units)) {

	611 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);

	612 return false;

	613 }

	614

	615 string.AppendString(utf8_units);

	616 break;

	617 }

	618 case '"':

	619 string.Append('"');

	620 break;

	621 case '\\':

	622 string.Append('\\');

	623 break;

	624 case '/':

	625 string.Append('/');

	626 break;

	627 case 'b':

	628 string.Append('\b');

	629 break;

	630 case 'f':

	631 string.Append('\f');

	632 break;

	633 case 'n':

	634 string.Append('\n');

	635 break;

	636 case 'r':

	637 string.Append('\r');

	638 break;

	639 case 't':

	640 string.Append('\t');

	641 break;

	642 case 'v': // Not listed as valid escape sequence in the RFC.

	643 string.Append('\v');

	644 break;

	645 // All other escape squences are illegal.

	646 default:

	647 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);

	648 return false;

	649 }

	650 } else if (next_char == '"') {

	651 --index_; // Rewind by one because of CBU8_NEXT.

	652 out->Swap(&string);

	653 return true;

	654 } else if (next_char < kExtendedASCIIStart) {

	655 string.Append(next_char);

	656 } else {

	657 // Anything outside of the basic ASCII plane will need to be

	658 // decomposed from int32 to a multi-byte sequence.

	659 char utf8_units[4] = { 0 };

	660 int offset = 0;

	661 string.Convert();

	662 CBU8_APPEND_UNSAFE(utf8_units, offset, next_char);

	663 string.AppendString(utf8_units);

	664 }

	665 }

	666

	667 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);

	668 return false;

	669 }

	670

	671 // Entry is at the first X in \uXXXX.

	672 bool JSONParser::DecodeUTF16(std::string* dest_string) {

	673 if (!CanConsume(4))

	674 return false;

	675

	676 // This is a 32-bit field because the shift operations in the

	677 // conversion process below cause MSVC to error about "data loss."

	678 // This only stores UTF-16 code units, though.

	679 // Consume the UTF-16 code unit, which may be a high surrogate.

	680 int code_unit16_high = 0;

	681 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))

	682 return false;

	683

	684 // Only add 3, not 4, because at the end of this iteration, the parser has

	685 // finished working with the last digit of the UTF sequence, meaning that

	686 // the next spin of the loop will advance to the next byte.

	687 NextNChars(3);

	688

	689 // If this is a high surrogate, consume the next code unit to get the

	690 // low surrogate.

	691 int code_unit16_low = 0;

	692 if (CBU16_IS_SURROGATE(code_unit16_high)) {

	693 // Make sure this is the high surrogate. If not, it's an encoding

	694 // error.

	695 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))

	696 return false;

	697

	698 // Make sure that the token has more characters to consume the

	699 // lower surrogate.

	700 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits.

	701 return false;

	702 if (NextChar() != '\\' \|\| NextChar() != 'u')

	703 return false;

	704

	705 NextChar(); // Read past 'u'.

	706 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))

	707 return false;

	708

	709 NextNChars(3);

	710

	711 if (!CBU16_IS_SURROGATE(code_unit16_low) \|\|

	712 !CBU16_IS_TRAIL(code_unit16_low)) {

	713 return false;

	714 }

	715 } else if (!CBU16_IS_SINGLE(code_unit16_high)) {

	716 // If this is not a code point, it's an encoding error.

	717 return false;

	718 }

	719

	720 // Convert the UTF-16 code units to a code point and then to a UTF-8

	721 // code unit sequence.

	722 char code_point[8] = { 0 };

	723 size_t offset = 0;

	724 if (!code_unit16_low) {

	725 CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high);

	726 } else {

	727 uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high,

	728 code_unit16_low);

	729 offset = 0;

	730 CBU8_APPEND_UNSAFE(code_point, offset, code_unit32);

	731 }

	732 dest_string->append(code_point);

	733 return true;

	734 }

	735

	736 Value* JSONParser::ConsumeNumber() {

	737 const char* num_start = pos_;

	738 const int start_index = index_;

	739 int end_index = start_index;

	740

	741 if (*pos_ == '-')

	742 NextChar();

	743

	744 if (!ReadInt(false)) {

	745 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	746 return NULL;

	747 }

	748 end_index = index_;

	749

	750 // The optional faction part.

	751 if (*pos_ == '.') {

	752 if (!CanConsume(1)) {

	753 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	754 return NULL;

	755 }

	756 NextChar();

	757 if (!ReadInt(true)) {

	758 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	759 return NULL;

	760 }

	761 end_index = index_;

	762 }

	763

	764 // Optional exponent part.

	765 if (pos_ == 'e' \|\| pos_ == 'E') {

	766 NextChar();

	767 if (pos_ == '-' \|\| pos_ == '+')

	768 NextChar();

	769 if (!ReadInt(true)) {

	770 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	771 return NULL;

	772 }

	773 end_index = index_;

	774 }

	775

	776 // ReadInt is greedy because numbers have no easily detectable sentinel,

	777 // so save off where the parser should be on exit (see Consume invariant at

	778 // the top of the header), then make sure the next token is one which is

	779 // valid.

	780 const char* exit_pos = pos_ - 1;

	781 int exit_index = index_ - 1;

	782

	783 switch (GetNextToken()) {

	784 case T_OBJECT_END:

	785 case T_ARRAY_END:

	786 case T_LIST_SEPARATOR:

	787 case T_END_OF_INPUT:

	788 break;

	789 default:

	790 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	791 return NULL;

	792 }

	793

	794 pos_ = exit_pos;

	795 index_ = exit_index;

	796

	797 StringPiece num_string(num_start, end_index - start_index);

	798

	799 int num_int;

	800 if (StringToInt(num_string, &num_int))

	801 return Value::CreateIntegerValue(num_int);

	802

	803 double num_double;

	804 if (base::StringToDouble(num_string.as_string(), &num_double) &&

	805 IsFinite(num_double)) {

	806 return Value::CreateDoubleValue(num_double);

	807 }

	808

	809 return NULL;

	810 }

	811

	812 bool JSONParser::ReadInt(bool allow_leading_zeros) {

	813 char first = *pos_;

	814 int len = 0;

	815

	816 char c = first;

	817 while (CanConsume(1) && IsAsciiDigit(c)) {

	818 c = *NextChar();

	819 ++len;

	820 }

	821

	822 if (len == 0)

	823 return false;

	824

	825 if (!allow_leading_zeros && len > 1 && first == '0')

	826 return false;

	827

	828 return true;

	829 }

	830

	831 Value* JSONParser::ConsumeLiteral() {

	832 switch (*pos_) {

	833 case 't':

	834 if (!CanConsume(3) \|\| !StringsAreEqual(pos_, "true", 4)) {

	835 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	836 return NULL;

	837 }

	838 NextNChars(3);

	839 return Value::CreateBooleanValue(true);

	840 case 'f':

	841 if (!CanConsume(4) \|\| !StringsAreEqual(pos_, "false", 5)) {

	842 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	843 return NULL;

	844 }

	845 NextNChars(4);

	846 return Value::CreateBooleanValue(false);

	847 case 'n':

	848 if (!CanConsume(3) \|\| !StringsAreEqual(pos_, "null", 4)) {

	849 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	850 return NULL;

	851 }

	852 NextNChars(3);

	853 return Value::CreateNullValue();

	854 default:

	855 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	856 return NULL;

	857 }

	858 }

	859

	860 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {

	861 return strncmp(one, two, len) == 0;

	862 }

	863

	864 void JSONParser::ReportError(JSONReader::JsonParseError code,

	865 int column_adjust) {

	866 error_code_ = code;

	867 error_line_ = line_number_;

	868 error_column_ = index_ - index_last_line_ + column_adjust;

	869 }

	870

	871 // static

	872 std::string JSONParser::FormatErrorMessage(int line, int column,

	873 const std::string& description) {

	874 if (line \|\| column) {

	875 return base::StringPrintf(
	tfarina 2012/04/19 22:54:47 nit: base:: here is not necessary as we are in bas nit: base:: here is not necessary as we are in base namespace. Robert Sesek 2012/05/03 15:34:52 Done. Show quoted text On 2012/04/19 22:54:47, tfarina wrote: > nit: base:: here is not necessary as we are in base namespace. Done.
	876 "Line: %i, column: %i, %s", line, column, description.c_str());

	877 }

	878 return description;

	879 }

	880

	881 } // namespace internal

	882 } // namespace base

OLD	NEW

« base/json/json_parser.h ('K') | « base/json/json_parser.h ('k') | base/json/json_parser_unittest.cc » ('j') | base/json/json_parser_unittest.cc » ('J')