base/json/json_parser.cc - Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string.

Side by Side Diff: base/json/json_parser.cc

Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "base/json/json_parser.h"

	6

	7 #include "base/float_util.h"

	8 #include "base/logging.h"

	9 #include "base/memory/scoped_ptr.h"

	10 #include "base/stringprintf.h"

	11 #include "base/string_number_conversions.h"

	12 #include "base/string_util.h"

	13 #include "base/third_party/icu/icu_utf.h"

	14 #include "base/utf_string_conversion_utils.h"

	15 #include "base/utf_string_conversions.h"

	16 #include "base/values.h"

	17

	18 namespace {

	19

	20 const int kStackMaxDepth = 100;

	21

	22 const int32 kExtendedASCIIStart = 0x80;

	23

	24 // This and the class below are used to own the JSON input string for when

	25 // string tokens are stored as StringPiece instead of std::string. This

	26 // optimization avoids about 2/3rds of string memory copies. The constructor

	27 // takes the input string and swaps its data into the new instance. The real

	28 // root value is also Swap()ed into the new instance.

	29 class DictionaryHiddenRootValue : public base::DictionaryValue {

	30 public:

	31 DictionaryHiddenRootValue(std::string* json, Value* root) {

	32 CHECK(root->IsType(Value::TYPE_DICTIONARY));

	33 Swap(static_cast<DictionaryValue*>(root));

	34 json->swap(json_);

	35 }

	36

	37 virtual base::DictionaryValue* DeepCopy() const OVERRIDE {

	38 scoped_ptr<base::Value> data(base::DictionaryValue::DeepCopy());

	39 std::string json(json_);

	40 return new DictionaryHiddenRootValue(&json, data.get());

	41 }

	42

	43 private:

	44 std::string json_;

	45 };

	46

	47 class ListHiddenRootValue : public base::ListValue {

	48 public:

	49 ListHiddenRootValue(std::string* json, Value* root) {

	50 CHECK(root->IsType(Value::TYPE_LIST));

	51 Swap(static_cast<ListValue*>(root));

	52 json->swap(json_);

	53 }

	54

	55 virtual base::ListValue* DeepCopy() const OVERRIDE {

	56 scoped_ptr<base::Value> data(base::ListValue::DeepCopy());

	57 std::string json(json_);

	58 return new ListHiddenRootValue(&json, data.get());

	59 }

	60

	61 private:

	62 std::string json_;

	63 };

	64

	65 // A variant on StringValue that uses StringPiece instead of copying the string

	66 // into the Value. This can only be stored in a child of hidden root (above),

	67 // otherwise the referenced string will not be guaranteed to outlive it.

	68 class JSONStringValue : public base::Value {

	69 public:

	70 explicit JSONStringValue(const base::StringPiece& piece)

	71 : Value(TYPE_STRING),

	72 string_piece_(piece) {

	73 }

	74

	75 // Value:

	76 bool GetAsString(std::string* out_value) const OVERRIDE {

	77 string_piece_.CopyToString(out_value);

	78 return true;

	79 }

	80 bool GetAsString(string16* out_value) const OVERRIDE {

	81 *out_value = UTF8ToUTF16(string_piece_);

	82 return true;

	83 }

	84 virtual Value* DeepCopy() const OVERRIDE {

	85 return Value::CreateStringValue(string_piece_.as_string());

	86 }

	87 virtual bool Equals(const Value* other) const OVERRIDE {

	88 std::string other_string;

	89 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&

	90 other_string == string_piece_.as_string();

	91 }

	92

	93 private:

	94 // The location in the original input stream.

	95 base::StringPiece string_piece_;

	96 };

	97

	98 // Simple class that checks for maximum recursion/"stack overflow."

	99 class StackMarker {

	100 public:

	101 StackMarker(int* depth) : depth_(depth) {

	102 ++(*depth_);

	103 }

	104 ~StackMarker() {

	105 --(*depth_);

	106 }

	107

	108 bool IsTooDeep() {

	109 return *depth_ >= kStackMaxDepth;

	110 }

	111

	112 private:

	113 int* depth_;

	114 };

	115

	116 } // namespace

	117

	118 namespace base {

	119 namespace internal {

	120

	121 JSONParser::JSONParser(int options)

	122 : options_(options),

	123 start_pos_(NULL),

	124 pos_(0),

	125 index_(0),

	126 stack_depth_(0),

	127 line_number_(0),

	128 index_last_line_(0),

	129 error_code_(JSONReader::JSON_NO_ERROR),

	130 error_line_(0),

	131 error_column_(0) {

	132 }

	133

	134 JSONParser::~JSONParser() {

	135 }

	136

	137 Value* JSONParser::Parse(const std::string& input) {

	138 std::string input_copy;

	139 // If the children of a JSON root can be detached, then hidden roots cannot

	140 // be used, so do not bother copying the input because StringPiece will not

	141 // be used anywhere.

	142 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {

	143 input_copy = input;

	144 start_pos_ = input_copy.data();

	145 } else {

	146 start_pos_ = input.data();

	147 }

	148 pos_ = start_pos_;

	149 end_pos_ = start_pos_ + input.length();

	150 index_ = 0;

	151 line_number_ = 1;

	152 index_last_line_ = 0;

	153

	154 error_code_ = JSONReader::JSON_NO_ERROR;

	155 error_line_ = 0;

	156 error_column_ = 0;

	157

	158 // When the input JSON string starts with a UTF-8 Byte-Order-Mark (U+FEFF)

	159 // or <0xEF 0xBB 0xBF>, advance the start position to avoid the

	160 // ParseNextToken function mis-treating a Unicode BOM as an invalid

	161 // character and returning NULL.

	162 if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF &&

	163 static_cast<uint8>(*(pos_ + 1)) == 0xBB &&

	164 static_cast<uint8>(*(pos_ + 2)) == 0xBF) {

	165 NextNChars(3);

	166 }

	167

	168 // Parse the first and all subsequent tokens.

	169 scoped_ptr<Value> root(ParseNextToken());

	170 if (!root.get())

	171 return NULL;

	172

	173 // Make sure the input stream is at an end.

	174 if (GetNextToken() != T_END_OF_INPUT) {

	175 if (!CanConsume(1) \|\| (NextChar() && GetNextToken() != T_END_OF_INPUT)) {

	176 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);

	177 return NULL;

	178 }

	179 }

	180

	181 // Dictionaries and lists can contain JSONStringValues, so wrap them in a

	182 // hidden root.

	183 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {

	184 if (root->IsType(Value::TYPE_DICTIONARY)) {

	185 return new DictionaryHiddenRootValue(&input_copy, root.release());

	186 } else if (root->IsType(Value::TYPE_LIST)) {

	187 return new ListHiddenRootValue(&input_copy, root.release());

	188 } else if (root->IsType(Value::TYPE_STRING)) {

	189 // A string type could be a JSONStringValue, but because there's no

	190 // corresponding HiddenRootValue, the memory will be lost. Deep copy to

	191 // preserve it.

	192 return root->DeepCopy();

	193 }

	194 }

	195

	196 // All other values can be returned directly.

	197 return root.release();

	198 }

	199

	200 JSONReader::JsonParseError JSONParser::error_code() const {

	201 return error_code_;

	202 }

	203

	204 std::string JSONParser::GetErrorMessage() const {

	205 return FormatErrorMessage(error_line_, error_column_,

	206 JSONReader::ErrorCodeToString(error_code_));

	207 }

	208

	209 // StringBuilder ///////////////////////////////////////////////////////////////

	210

	211 JSONParser::StringBuilder::StringBuilder()

	212 : pos_(NULL),

	213 length_(0),

	214 string_(NULL) {

	215 }

	216

	217 JSONParser::StringBuilder::StringBuilder(const char* pos)

	218 : pos_(pos),

	219 length_(0),

	220 string_(NULL) {

	221 }

	222

	223 void JSONParser::StringBuilder::Swap(StringBuilder* other) {

	224 if (other->string_) {

	225 std::swap(other->string_, string_);

	226 } else {

	227 std::swap(other->pos_, pos_);

	228 std::swap(other->length_, length_);

	229 }

	230 }

	231

	232 JSONParser::StringBuilder::~StringBuilder() {

	233 delete string_;

	234 }

	235

	236 void JSONParser::StringBuilder::Append(const int32& c) {

	237 if (string_) {

	238 string_->push_back(c);

	239 } else {

	240 // A code point is 32 bits/4 bytes, which needs to be mapped to 1-byte

	241 // \|char\|s.

	242 if (c < kExtendedASCIIStart) {

	243 ++length_;

	244 } else if (c < 0x0800) {

	245 length_ += 2;

	246 } else if (c < 0x1000) {

	247 length_ += 3;

	248 } else {

	249 length_ += 4;

	250 }

	251 }

	252 }

	253

	254 void JSONParser::StringBuilder::AppendString(const std::string& str) {

	255 DCHECK(string_);

	256 string_->append(str);

	257 }

	258

	259 void JSONParser::StringBuilder::Convert() {

	260 if (string_)

	261 return;

	262 string_ = new std::string(pos_, length_);

	263 }

	264

	265 bool JSONParser::StringBuilder::CanBeStringPiece() {

	266 return !string_;

	267 }

	268

	269 StringPiece JSONParser::StringBuilder::AsStringPiece() {

	270 if (string_)

	271 return StringPiece();

	272 return StringPiece(pos_, length_);

	273 }

	274

	275 std::string JSONParser::StringBuilder::AsString() {

	276 if (!string_)

	277 Convert();

	278 return *string_;

	279 }

	280

	281 // JSONParser private //////////////////////////////////////////////////////////

	282

	283 inline bool JSONParser::CanConsume(int length) {

	284 return pos_ + length <= end_pos_;

	285 }

	286

	287 const char* JSONParser::NextChar() {

	288 DCHECK(CanConsume(1));

	289 ++index_;

	290 ++pos_;

	291 return pos_;

	292 }

	293

	294 void JSONParser::NextNChars(int n) {

	295 DCHECK(CanConsume(n));

	296 index_ += n;

	297 pos_ += n;

	298 }

	299

	300 JSONParser::Token JSONParser::GetNextToken() {

	301 EatWhitespaceAndComments();

	302 if (!CanConsume(1))

	303 return T_END_OF_INPUT;

	304

	305 switch (*pos_) {

	306 case '{':

	307 return T_OBJECT_BEGIN;

	308 case '}':

	309 return T_OBJECT_END;

	310 case '[':

	311 return T_ARRAY_BEGIN;

	312 case ']':

	313 return T_ARRAY_END;

	314 case '"':

	315 return T_STRING;

	316 case '0':

	317 case '1':

	318 case '2':

	319 case '3':

	320 case '4':

	321 case '5':

	322 case '6':

	323 case '7':

	324 case '8':

	325 case '9':

	326 case '-':

	327 return T_NUMBER;

	328 case 't':

	329 return T_BOOL_TRUE;

	330 case 'f':

	331 return T_BOOL_FALSE;

	332 case 'n':

	333 return T_NULL;

	334 case ',':

	335 return T_LIST_SEPARATOR;

	336 case ':':

	337 return T_OBJECT_PAIR_SEPARATOR;

	338 default:

	339 return T_INVALID_TOKEN;

	340 }

	341 }

	342

	343 void JSONParser::EatWhitespaceAndComments() {

	344 while (pos_ < end_pos_) {

	345 switch (*pos_) {

	346 case '\r':

	347 case '\n':

	348 index_last_line_ = index_;

	349 ++line_number_;

	350 // Fall through.

	351 case ' ':

	352 case '\t':

	353 NextChar();

	354 break;

	355 case '/':

	356 if (!EatComment())

	357 return;

	358 break;

	359 default:

	360 return;

	361 }

	362 }

	363 }

	364

	365 bool JSONParser::EatComment() {

	366 if (*pos_ != '/' \|\| !CanConsume(1))

	367 return false;

	368

	369 char next_char = *NextChar();

	370 if (next_char == '/') {

	371 // Single line comment, read to newline.

	372 while (CanConsume(1)) {

	373 char next_char = *NextChar();

	374 if (next_char == '\n' \|\| next_char == '\r')

	375 return true;

	376 }

	377 } else if (next_char == '*') {

	378 // Block comment, read until end marker.

	379 while (CanConsume(2)) {

	380 if (NextChar() == '' && *NextChar() == '/') {

	381 // EatWhitespaceAndComments will inspect pos_, which will still be on

	382 // the last / of the comment, so advance once more (which may also be

	383 // end of input).

	384 NextChar();

	385 return true;

	386 }

	387 }

	388 }

	389

	390 return false;

	391 }

	392

	393 Value* JSONParser::ParseNextToken() {

	394 return ParseToken(GetNextToken());

	395 }

	396

	397 Value* JSONParser::ParseToken(Token token) {

	398 switch (token) {

	399 case T_OBJECT_BEGIN:

	400 return ConsumeDictionary();

	401 case T_ARRAY_BEGIN:

	402 return ConsumeList();

	403 case T_STRING:

	404 return ConsumeString();

	405 case T_NUMBER:

	406 return ConsumeNumber();

	407 case T_BOOL_TRUE:

	408 case T_BOOL_FALSE:

	409 case T_NULL:

	410 return ConsumeLiteral();

	411 default:

	412 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	413 return NULL;

	414 }

	415 }

	416

	417 Value* JSONParser::ConsumeDictionary() {

	418 if (*pos_ != '{') {

	419 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	420 return NULL;

	421 }

	422

	423 StackMarker depth_check(&stack_depth_);

	424 if (depth_check.IsTooDeep()) {

	425 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);

	426 return NULL;

	427 }

	428

	429 scoped_ptr<DictionaryValue> dict(new DictionaryValue);

	430

	431 NextChar();

	432 Token token = GetNextToken();

	433 while (token != T_OBJECT_END) {

	434 if (token != T_STRING) {

	435 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);

	436 return NULL;

	437 }

	438

	439 // First consume the key.

	440 StringBuilder key;

	441 if (!ConsumeStringRaw(&key)) {

	442 return NULL;

	443 }

	444

	445 // Read the separator.

	446 NextChar();

	447 token = GetNextToken();

	448 if (token != T_OBJECT_PAIR_SEPARATOR) {

	449 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	450 return NULL;

	451 }

	452

	453 // The token is the value. Ownership transfers to \|dict\|.

	454 NextChar();

	455 Value* value = ParseNextToken();

	456 if (!value) {

	457 return NULL;

	458 }

	459

	460 dict->SetWithoutPathExpansion(key.AsString(), value);

	461

	462 NextChar();

	463 token = GetNextToken();

	464 if (token == T_LIST_SEPARATOR) {

	465 NextChar();

	466 token = GetNextToken();

	467 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {

	468 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);

	469 return NULL;

	470 }

	471 } else if (token != T_OBJECT_END) {

	472 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);

	473 return NULL;

	474 }

	475 }

	476

	477 if (token != T_OBJECT_END)

	478 return NULL;

	479

	480 return dict.release();

	481 }

	482

	483 Value* JSONParser::ConsumeList() {

	484 if (*pos_ != '[') {

	485 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	486 return NULL;

	487 }

	488

	489 StackMarker depth_check(&stack_depth_);

	490 if (depth_check.IsTooDeep()) {

	491 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);

	492 return NULL;

	493 }

	494

	495 scoped_ptr<ListValue> list(new ListValue);

	496

	497 NextChar();

	498 Token token = GetNextToken();

	499 while (token != T_ARRAY_END) {

	500 Value* item = ParseToken(token);

	501 if (!item) {

	502 // ReportError from deeper level.

	503 return NULL;

	504 }

	505

	506 list->Append(item);

	507

	508 NextChar();

	509 token = GetNextToken();

	510 if (token == T_LIST_SEPARATOR) {

	511 NextChar();

	512 token = GetNextToken();

	513 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {

	514 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);

	515 return NULL;

	516 }

	517 } else if (token != T_ARRAY_END) {

	518 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	519 return NULL;

	520 }

	521 }

	522

	523 if (token != T_ARRAY_END)

	524 return NULL;

	525

	526 return list.release();

	527 }

	528

	529 Value* JSONParser::ConsumeString() {

	530 StringBuilder string;

	531 if (!ConsumeStringRaw(&string))

	532 return NULL;

	533

	534 // Create the Value representation, either using a hidden root, if configured

	535 // to do so, and the string can be represented by StringPiece.

	536 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {

	537 return new JSONStringValue(string.AsStringPiece());

	538 } else {

	539 if (string.CanBeStringPiece())

	540 string.Convert();

	541 return new StringValue(string.AsString());

	542 }

	543 }

	544

	545 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {

	546 if (*pos_ != '"') {

	547 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	548 return false;

	549 }

	550

	551 // StringBuilder will internally build a StringPiece unless a UTF-16

	552 // conversion occurs, at which point it will perform a copy into a

	553 // std::string.

	554 StringBuilder string(NextChar());

	555

	556 int length = end_pos_ - start_pos_;

	557 int32 next_char = 0;

	558

	559 DCHECK_EQ(pos_, (start_pos_ + index_));

	560

	561 while (CanConsume(1)) {

	562 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement.

	563 CBU8_NEXT(start_pos_, index_, length, next_char);

	564 if (next_char < 0 \|\| !IsValidCharacter(next_char)) {

	565 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);

	566 return false;

	567 }

	568

	569 // If this character is an escape sequence...

	570 if (next_char == '\\') {

	571 // The input string will be adjusted (either by combining the two

	572 // characters of an encoded escape sequence, or with a UTF conversion),

	573 // so using StringPiece isn't possible -- force a conversion.

	574 string.Convert();

	575

	576 if (!CanConsume(1)) {

	577 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);

	578 return false;

	579 }

	580

	581 switch (*NextChar()) {

	582 // Allowed esape sequences:

	583 case 'x': { // UTF-8 sequence.

	584 if (!CanConsume(2)) {

	585 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);

	586 return false;

	587 }

	588

	589 int hex_digit = 0;

	590 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {

	591 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);

	592 return false;

	593 }

	594 NextChar();

	595

	596 string.Append(hex_digit);

	597 break;

	598 }

	599 case 'u': { // UTF-16 sequence.

	600 // UTF units are of the form \uXXXX.

	601 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits.

	602 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);

	603 return false;

	604 }

	605

	606 // Skip the 'u'.

	607 NextChar();

	608

	609 std::string utf8_units;

	610 if (!DecodeUTF16(&utf8_units)) {

	611 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);

	612 return false;

	613 }

	614

	615 string.AppendString(utf8_units);

	616 break;

	617 }

	618 case '"':

	619 string.Append('"');

	620 break;

	621 case '\\':

	622 string.Append('\\');

	623 break;

	624 case '/':

	625 string.Append('/');

	626 break;

	627 case 'b':

	628 string.Append('\b');

	629 break;

	630 case 'f':

	631 string.Append('\f');

	632 break;

	633 case 'n':

	634 string.Append('\n');

	635 break;

	636 case 'r':

	637 string.Append('\r');

	638 break;

	639 case 't':

	640 string.Append('\t');

	641 break;

	642 case 'v': // Not listed as valid escape sequence in the RFC.

	643 string.Append('\v');

	644 break;

	645 // All other escape squences are illegal.

	646 default:

	647 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);

	648 return false;

	649 }

	650 } else if (next_char == '"') {

	651 --index_; // Rewind by one because of CBU8_NEXT.

	652 out->Swap(&string);

	653 return true;

	654 } else if (next_char > kExtendedASCIIStart) {

	655 // Anything outside of the basic ASCII plane will need to be

	656 // decomposed from int32 to a multi-byte sequence.

	657 char utf8_units[4] = { 0 };

	658 int offset = 0;

	659 string.Convert();

	660 CBU8_APPEND_UNSAFE(utf8_units, offset, next_char);

	661 string.AppendString(utf8_units);

	662 } else {

	663 string.Append(next_char);

	664 }

	665 }

	666

	667 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);

	668 return false;

	669 }

	670

	671 // Entry is at the first X in \uXXXX.

	672 bool JSONParser::DecodeUTF16(std::string* dest_string) {

	673 if (!CanConsume(4))

	674 return false;

	675

	676 // This is a 32-bit field because the shift operations in the

	677 // conversion process below cause MSVC to error about "data loss."

	678 // This only stores UTF-16 code units, though.

	679 // Consume the UTF-16 code unit, which may be a high surrogate.

	680 int code_unit16_high = 0;

	681 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))

	682 return false;

	683

	684 // Only add 3, not 4, because at the end of this iteration, the parser has

	685 // finished working with the last digit of the UTF sequence, meaning that

	686 // the next spin of the loop will advance to the next byte.

	687 NextNChars(3);

	688

	689 // If this is a high surrogate, consume the next code unit to get the

	690 // low surrogate.

	691 int code_unit16_low = 0;

	692 if (CBU16_IS_SURROGATE(code_unit16_high)) {

	693 // Make sure this is the high surrogate. If not, it's an encoding

	694 // error.

	695 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))

	696 return false;

	697

	698 // Make sure that the token has more characters to consume the

	699 // lower surrogate.

	700 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits.

	701 return false;

	702 if (NextChar() != '\\' \|\| NextChar() != 'u')

	703 return false;

	704

	705 NextChar(); // Read past 'u'.

	706 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))

	707 return false;

	708

	709 NextNChars(3);

	710

	711 if (!CBU16_IS_SURROGATE(code_unit16_low) \|\|

	712 !CBU16_IS_TRAIL(code_unit16_low)) {

	713 return false;

	714 }

	715 } else if (!CBU16_IS_SINGLE(code_unit16_high)) {

	716 // If this is not a code point, it's an encoding error.

	717 return false;

	718 }

	719

	720 // Convert the UTF-16 code units to a code point and then to a UTF-8

	721 // code unit sequence.

	722 char code_point[8] = { 0 };

	723 size_t offset = 0;

	724 if (!code_unit16_low) {

	725 CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high);

	726 } else {

	727 uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high,

	728 code_unit16_low);

	729 offset = 0;

	730 CBU8_APPEND_UNSAFE(code_point, offset, code_unit32);

	731 }

	732 dest_string->append(code_point);

	733 return true;

	734 }

	735

	736 Value* JSONParser::ConsumeNumber() {

	737 const char* num_start = pos_;

	738 const int start_index = index_;

	739 int end_index = start_index;

	740

	741 if (*pos_ == '-')

	742 NextChar();

	743

	744 if (!ReadInt(false)) {

	745 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	746 return NULL;

	747 }

	748 end_index = index_;

	749

	750 // The optional faction part.

	751 if (*pos_ == '.') {

	752 if (!CanConsume(1)) {

	753 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	754 return NULL;

	755 }

	756 NextChar();

	757 if (!ReadInt(true)) {

	758 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	759 return NULL;

	760 }

	761 end_index = index_;

	762 }

	763

	764 // Optional exponent part.

	765 if (pos_ == 'e' \|\| pos_ == 'E') {

	766 NextChar();

	767 if (pos_ == '-' \|\| pos_ == '+')

	768 NextChar();

	769 if (!ReadInt(true)) {

	770 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	771 return NULL;

	772 }

	773 end_index = index_;

	774 }

	775

	776 // ReadInt is greedy because numbers have no easily detectable sentinel,

	777 // so save off where the parser should be on exit (see Consume invariant at

	778 // the top of the header), then make sure the next token is one which is

	779 // valid.

	780 const char* exit_pos = pos_ - 1;

	781 int exit_index = index_ - 1;

	782

	783 switch (GetNextToken()) {

	784 case T_OBJECT_END:

	785 case T_ARRAY_END:

	786 case T_LIST_SEPARATOR:

	787 case T_END_OF_INPUT:

	788 break;

	789 default:

	790 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	791 return NULL;

	792 }

	793

	794 pos_ = exit_pos;

	795 index_ = exit_index;

	796

	797 StringPiece num_string(num_start, end_index - start_index);

	798

	799 int num_int;

	800 if (StringToInt(num_string, &num_int))

	801 return Value::CreateIntegerValue(num_int);

	802

	803 double num_double;

	804 if (base::StringToDouble(num_string.as_string(), &num_double) &&

	805 IsFinite(num_double)) {

	806 return Value::CreateDoubleValue(num_double);

	807 }

	808

	809 return NULL;

	810 }

	811

	812 bool JSONParser::ReadInt(bool allow_leading_zeros) {

	813 char first = *pos_;

	814 int len = 0;

	815

	816 char c = first;

	817 while (CanConsume(1) && IsAsciiDigit(c)) {

	818 c = *NextChar();

	819 ++len;

	820 }

	821

	822 if (len == 0)

	823 return false;

	824

	825 if (!allow_leading_zeros && len > 1 && first == '0')

	826 return false;

	827

	828 return true;

	829 }

	830

	831 Value* JSONParser::ConsumeLiteral() {

	832 switch (*pos_) {

	833 case 't':

	834 if (!CanConsume(3) \|\| !StringsAreEqual(pos_, "true", 4)) {

	835 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	836 return NULL;

	837 }

	838 NextNChars(3);

	839 return Value::CreateBooleanValue(true);

	840 case 'f':

	841 if (!CanConsume(4) \|\| !StringsAreEqual(pos_, "false", 5)) {

	842 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	843 return NULL;

	844 }

	845 NextNChars(4);

	846 return Value::CreateBooleanValue(false);

	847 case 'n':

	848 if (!CanConsume(3) \|\| !StringsAreEqual(pos_, "null", 4)) {

	849 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	850 return NULL;

	851 }

	852 NextNChars(3);

	853 return Value::CreateNullValue();

	854 default:

	855 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	856 return NULL;

	857 }

	858 }

	859

	860 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {

	861 return strncmp(one, two, len) == 0;

	862 }

	863

	864 void JSONParser::ReportError(JSONReader::JsonParseError code,

	865 int column_adjust) {

	866 error_code_ = code;

	867 error_line_ = line_number_;

	868 error_column_ = index_ - index_last_line_ + column_adjust;

	869 }

	870

	871 // static

	872 std::string JSONParser::FormatErrorMessage(int line, int column,

	873 const std::string& description) {

	874 if (line \|\| column) {

	875 return base::StringPrintf(

	876 "Line: %i, column: %i, %s", line, column, description.c_str());

	877 }

	878 return description;

	879 }

	880

	881 } // namespace internal

	882 } // namespace base

OLD	NEW

« base/json/json_parser.h ('K') | « base/json/json_parser.h ('k') | base/json/json_parser_unittest.cc » ('j') | no next file with comments »