base/json/json_parser.cc - Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string.

Side by Side Diff: base/json/json_parser.cc

Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Really fix Windows, address comments Created 8 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "base/json/json_parser.h"

	6

	7 #include "base/float_util.h"

	8 #include "base/logging.h"

	9 #include "base/memory/scoped_ptr.h"

	10 #include "base/string_number_conversions.h"

	11 #include "base/string_util.h"

	12 #include "base/stringprintf.h"

	13 #include "base/third_party/icu/icu_utf.h"

	14 #include "base/utf_string_conversion_utils.h"

	15 #include "base/utf_string_conversions.h"

	16 #include "base/values.h"

	17

	18 namespace base {

	19 namespace internal {

	20

	21 namespace {

	22

	23 const int kStackMaxDepth = 100;

	24

	25 const int32 kExtendedASCIIStart = 0x80;

	26

	27 // This and the class below are used to own the JSON input string for when

	28 // string tokens are stored as StringPiece instead of std::string. This

	29 // optimization avoids about 2/3rds of string memory copies. The constructor

	30 // takes the input string and swaps its data into the new instance. The real

	31 // root value is also Swap()ed into the new instance.

	32 class DictionaryHiddenRootValue : public base::DictionaryValue {

	33 public:

	34 DictionaryHiddenRootValue(std::string* json, Value* root) {

	35 DCHECK(root->IsType(Value::TYPE_DICTIONARY));

	36 DictionaryValue::Swap(static_cast<DictionaryValue*>(root));

	37 json->swap(json_);

	38 }

	39

	40 virtual void Swap(DictionaryValue* other) OVERRIDE {

	41 DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";

	42

	43 // First deep copy to convert JSONStringValue to std::string and swap that

	44 // copy with \|other\|, which contains the new contents of \|this\|.

	45 scoped_ptr<base::DictionaryValue> copy(DeepCopy());

	46 copy->Swap(other);

	47

	48 // Then erase the contents of the current dictionary and swap in the

	49 // new contents, originally from \|other\|.

	50 Clear();

	51 json_.clear();

	52 DictionaryValue::Swap(copy.get());

	53 }

	54

	55 // Not overriding DictionaryValue::Remove because it just calls through to

	56 // the method below.

	57

	58 virtual bool RemoveWithoutPathExpansion(const std::string& key,

	59 Value** out) OVERRIDE {

	60 // If the caller won't take ownership of the removed value, just call up.

	61 if (!out)

	62 return DictionaryValue::RemoveWithoutPathExpansion(key, out);

	63

	64 DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";

	65

	66 // Otherwise, remove the value while its still "owned" by this and copy it

	67 // to convert any JSONStringValues to std::string.

	68 Value* out_owned = NULL;

	69 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))

	70 return false;

	71

	72 *out = out_owned->DeepCopy();

	73 delete out_owned;

	74

	75 return true;

	76 }

	77

	78 private:

	79 std::string json_;

	80

	81 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);

	82 };

	83

	84 class ListHiddenRootValue : public base::ListValue {

	85 public:

	86 ListHiddenRootValue(std::string* json, Value* root) {

	87 DCHECK(root->IsType(Value::TYPE_LIST));

	88 ListValue::Swap(static_cast<ListValue*>(root));

	89 json->swap(json_);

	90 }

	91

	92 virtual void Swap(ListValue* other) OVERRIDE {

	93 DVLOG(1) << "Swap()ing a ListValue inefficiently.";

	94

	95 // First deep copy to convert JSONStringValue to std::string and swap that

	96 // copy with \|other\|, which contains the new contents of \|this\|.

	97 scoped_ptr<base::ListValue> copy(DeepCopy());

	98 copy->Swap(other);

	99

	100 // Then erase the contents of the current list and swap in the new contents,

	101 // originally from \|other\|.

	102 Clear();

	103 json_.clear();

	104 ListValue::Swap(copy.get());

	105 }

	106

	107 virtual bool Remove(size_t index, Value** out) OVERRIDE {

	108 // If the caller won't take ownership of the removed value, just call up.

	109 if (!out)

	110 return ListValue::Remove(index, out);

	111

	112 DVLOG(1) << "Remove()ing from a ListValue inefficiently.";

	113

	114 // Otherwise, remove the value while its still "owned" by this and copy it

	115 // to convert any JSONStringValues to std::string.

	116 Value* out_owned = NULL;

	117 if (!ListValue::Remove(index, &out_owned))

	118 return false;

	119

	120 *out = out_owned->DeepCopy();

	121 delete out_owned;

	122

	123 return true;

	124 }

	125

	126 private:

	127 std::string json_;

	128

	129 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);

	130 };

	131

	132 // A variant on StringValue that uses StringPiece instead of copying the string

	133 // into the Value. This can only be stored in a child of hidden root (above),

	134 // otherwise the referenced string will not be guaranteed to outlive it.

	135 class JSONStringValue : public base::Value {

	136 public:

	137 explicit JSONStringValue(const base::StringPiece& piece)

	138 : Value(TYPE_STRING),

	139 string_piece_(piece) {

	140 }

	141

	142 // Value:

	143 bool GetAsString(std::string* out_value) const OVERRIDE {

	144 string_piece_.CopyToString(out_value);

	145 return true;

	146 }

	147 bool GetAsString(string16* out_value) const OVERRIDE {

	148 *out_value = UTF8ToUTF16(string_piece_);

	149 return true;

	150 }

	151 virtual Value* DeepCopy() const OVERRIDE {

	152 return Value::CreateStringValue(string_piece_.as_string());

	153 }

	154 virtual bool Equals(const Value* other) const OVERRIDE {

	155 std::string other_string;

	156 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&

	157 StringPiece(other_string) == string_piece_;

	158 }

	159

	160 private:

	161 // The location in the original input stream.

	162 base::StringPiece string_piece_;

	163

	164 DISALLOW_COPY_AND_ASSIGN(JSONStringValue);

	165 };

	166

	167 // Simple class that checks for maximum recursion/"stack overflow."

	168 class StackMarker {

	169 public:

	170 explicit StackMarker(int* depth) : depth_(depth) {

	171 ++(*depth_);

	172 DCHECK_LE(*depth_, kStackMaxDepth);

	173 }

	174 ~StackMarker() {

	175 --(*depth_);

	176 }

	177

	178 bool IsTooDeep() const {

	179 return *depth_ >= kStackMaxDepth;

	180 }

	181

	182 private:

	183 int* const depth_;

	184

	185 DISALLOW_COPY_AND_ASSIGN(StackMarker);

	186 };

	187

	188 } // namespace

	189

	190 JSONParser::JSONParser(int options)

	191 : options_(options),

	192 start_pos_(NULL),

	193 pos_(NULL),

	194 end_pos_(NULL),

	195 index_(0),

	196 stack_depth_(0),

	197 line_number_(0),

	198 index_last_line_(0),

	199 error_code_(JSONReader::JSON_NO_ERROR),

	200 error_line_(0),

	201 error_column_(0) {

	202 }

	203

	204 JSONParser::~JSONParser() {

	205 }

	206

	207 Value* JSONParser::Parse(const std::string& input) {

	208 // TODO(rsesek): Windows has problems with StringPiece/hidden roots. Fix

	209 // <http://crbug.com/126107> when my Windows box arrives.

	210 #if defined(OS_WIN)

	211 options_ \|= JSON_DETACHABLE_CHILDREN;

	212 #endif

	213

	214 std::string input_copy;

	215 // If the children of a JSON root can be detached, then hidden roots cannot

	216 // be used, so do not bother copying the input because StringPiece will not

	217 // be used anywhere.

	218 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {

	219 input_copy = input;

	220 start_pos_ = input_copy.data();

	221 } else {

	222 start_pos_ = input.data();

	223 }

	224 pos_ = start_pos_;

	225 end_pos_ = start_pos_ + input.length();

	226 index_ = 0;

	227 line_number_ = 1;

	228 index_last_line_ = 0;

	229

	230 error_code_ = JSONReader::JSON_NO_ERROR;

	231 error_line_ = 0;

	232 error_column_ = 0;

	233

	234 // When the input JSON string starts with a UTF-8 Byte-Order-Mark

	235 // <0xEF 0xBB 0xBF>, advance the start position to avoid the

	236 // ParseNextToken function mis-treating a Unicode BOM as an invalid

	237 // character and returning NULL.

	238 if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF &&

	239 static_cast<uint8>(*(pos_ + 1)) == 0xBB &&

	240 static_cast<uint8>(*(pos_ + 2)) == 0xBF) {

	241 NextNChars(3);

	242 }

	243

	244 // Parse the first and all subsequent tokens.

	245 scoped_ptr<Value> root(ParseNextToken());

	246 if (!root.get())

	247 return NULL;

	248

	249 // Make sure the input stream is at an end.

	250 if (GetNextToken() != T_END_OF_INPUT) {

	251 if (!CanConsume(1) \|\| (NextChar() && GetNextToken() != T_END_OF_INPUT)) {

	252 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);

	253 return NULL;

	254 }

	255 }

	256

	257 // Dictionaries and lists can contain JSONStringValues, so wrap them in a

	258 // hidden root.

	259 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {

	260 if (root->IsType(Value::TYPE_DICTIONARY)) {

	261 return new DictionaryHiddenRootValue(&input_copy, root.release());

	262 } else if (root->IsType(Value::TYPE_LIST)) {

	263 return new ListHiddenRootValue(&input_copy, root.release());

	264 } else if (root->IsType(Value::TYPE_STRING)) {

	265 // A string type could be a JSONStringValue, but because there's no

	266 // corresponding HiddenRootValue, the memory will be lost. Deep copy to

	267 // preserve it.

	268 return root->DeepCopy();

	269 }

	270 }

	271

	272 // All other values can be returned directly.

	273 return root.release();

	274 }

	275

	276 JSONReader::JsonParseError JSONParser::error_code() const {

	277 return error_code_;

	278 }

	279

	280 std::string JSONParser::GetErrorMessage() const {

	281 return FormatErrorMessage(error_line_, error_column_,

	282 JSONReader::ErrorCodeToString(error_code_));

	283 }

	284

	285 // StringBuilder ///////////////////////////////////////////////////////////////

	286

	287 JSONParser::StringBuilder::StringBuilder()

	288 : pos_(NULL),

	289 length_(0),

	290 string_(NULL) {

	291 }

	292

	293 JSONParser::StringBuilder::StringBuilder(const char* pos)

	294 : pos_(pos),

	295 length_(0),

	296 string_(NULL) {

	297 }

	298

	299 void JSONParser::StringBuilder::Swap(StringBuilder* other) {

	300 std::swap(other->string_, string_);

	301 std::swap(other->pos_, pos_);

	302 std::swap(other->length_, length_);

	303 }

	304

	305 JSONParser::StringBuilder::~StringBuilder() {

	306 delete string_;

	307 }

	308

	309 void JSONParser::StringBuilder::Append(const char& c) {

	310 DCHECK_GE(c, 0);

	311 DCHECK_LT(c, 128);

	312

	313 if (string_)

	314 string_->push_back(c);

	315 else

	316 ++length_;

	317 }

	318

	319 void JSONParser::StringBuilder::AppendString(const std::string& str) {

	320 DCHECK(string_);

	321 string_->append(str);

	322 }

	323

	324 void JSONParser::StringBuilder::Convert() {

	325 if (string_)

	326 return;

	327 string_ = new std::string(pos_, length_);

	328 }

	329

	330 bool JSONParser::StringBuilder::CanBeStringPiece() const {

	331 return !string_;

	332 }

	333

	334 StringPiece JSONParser::StringBuilder::AsStringPiece() {

	335 if (string_)

	336 return StringPiece();

	337 return StringPiece(pos_, length_);

	338 }

	339

	340 const std::string& JSONParser::StringBuilder::AsString() {

	341 if (!string_)

	342 Convert();

	343 return *string_;

	344 }

	345

	346 // JSONParser private //////////////////////////////////////////////////////////

	347

	348 inline bool JSONParser::CanConsume(int length) {

	349 return pos_ + length <= end_pos_;

	350 }

	351

	352 const char* JSONParser::NextChar() {

	353 DCHECK(CanConsume(1));

	354 ++index_;

	355 ++pos_;

	356 return pos_;

	357 }

	358

	359 void JSONParser::NextNChars(int n) {

	360 DCHECK(CanConsume(n));

	361 index_ += n;

	362 pos_ += n;

	363 }

	364

	365 JSONParser::Token JSONParser::GetNextToken() {

	366 EatWhitespaceAndComments();

	367 if (!CanConsume(1))

	368 return T_END_OF_INPUT;

	369

	370 switch (*pos_) {

	371 case '{':

	372 return T_OBJECT_BEGIN;

	373 case '}':

	374 return T_OBJECT_END;

	375 case '[':

	376 return T_ARRAY_BEGIN;

	377 case ']':

	378 return T_ARRAY_END;

	379 case '"':

	380 return T_STRING;

	381 case '0':

	382 case '1':

	383 case '2':

	384 case '3':

	385 case '4':

	386 case '5':

	387 case '6':

	388 case '7':

	389 case '8':

	390 case '9':

	391 case '-':

	392 return T_NUMBER;

	393 case 't':

	394 return T_BOOL_TRUE;

	395 case 'f':

	396 return T_BOOL_FALSE;

	397 case 'n':

	398 return T_NULL;

	399 case ',':

	400 return T_LIST_SEPARATOR;

	401 case ':':

	402 return T_OBJECT_PAIR_SEPARATOR;

	403 default:

	404 return T_INVALID_TOKEN;

	405 }

	406 }

	407

	408 void JSONParser::EatWhitespaceAndComments() {

	409 while (pos_ < end_pos_) {

	410 switch (*pos_) {

	411 case '\r':

	412 case '\n':

	413 index_last_line_ = index_;

	414 ++line_number_;

	415 // Fall through.

	416 case ' ':

	417 case '\t':

	418 NextChar();

	419 break;

	420 case '/':

	421 if (!EatComment())

	422 return;

	423 break;

	424 default:

	425 return;

	426 }

	427 }

	428 }

	429

	430 bool JSONParser::EatComment() {

	431 if (*pos_ != '/' \|\| !CanConsume(1))

	432 return false;

	433

	434 char next_char = *NextChar();

	435 if (next_char == '/') {

	436 // Single line comment, read to newline.

	437 while (CanConsume(1)) {

	438 char next_char = *NextChar();

	439 if (next_char == '\n' \|\| next_char == '\r')

	440 return true;

	441 }

	442 } else if (next_char == '*') {

	443 // Block comment, read until end marker.

	444 while (CanConsume(2)) {

	445 if (NextChar() == '' && *NextChar() == '/') {

	446 // EatWhitespaceAndComments will inspect pos_, which will still be on

	447 // the last / of the comment, so advance once more (which may also be

	448 // end of input).

	449 NextChar();

	450 return true;

	451 }

	452 }

	453

	454 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.

	455 }

	456

	457 return false;

	458 }

	459

	460 Value* JSONParser::ParseNextToken() {

	461 return ParseToken(GetNextToken());

	462 }

	463

	464 Value* JSONParser::ParseToken(Token token) {

	465 switch (token) {

	466 case T_OBJECT_BEGIN:

	467 return ConsumeDictionary();

	468 case T_ARRAY_BEGIN:

	469 return ConsumeList();

	470 case T_STRING:

	471 return ConsumeString();

	472 case T_NUMBER:

	473 return ConsumeNumber();

	474 case T_BOOL_TRUE:

	475 case T_BOOL_FALSE:

	476 case T_NULL:

	477 return ConsumeLiteral();

	478 default:

	479 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	480 return NULL;

	481 }

	482 }

	483

	484 Value* JSONParser::ConsumeDictionary() {

	485 if (*pos_ != '{') {

	486 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	487 return NULL;

	488 }

	489

	490 StackMarker depth_check(&stack_depth_);

	491 if (depth_check.IsTooDeep()) {

	492 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);

	493 return NULL;

	494 }

	495

	496 scoped_ptr<DictionaryValue> dict(new DictionaryValue);

	497

	498 NextChar();

	499 Token token = GetNextToken();

	500 while (token != T_OBJECT_END) {

	501 if (token != T_STRING) {

	502 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);

	503 return NULL;

	504 }

	505

	506 // First consume the key.

	507 StringBuilder key;

	508 if (!ConsumeStringRaw(&key)) {

	509 return NULL;

	510 }

	511

	512 // Read the separator.

	513 NextChar();

	514 token = GetNextToken();

	515 if (token != T_OBJECT_PAIR_SEPARATOR) {

	516 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	517 return NULL;

	518 }

	519

	520 // The token is the value. Ownership transfers to \|dict\|.

	521 NextChar();

	522 Value* value = ParseNextToken();

	523 if (!value) {

	524 return NULL;

	525 }

	526

	527 dict->SetWithoutPathExpansion(key.AsString(), value);

	528

	529 NextChar();

	530 token = GetNextToken();

	531 if (token == T_LIST_SEPARATOR) {

	532 NextChar();

	533 token = GetNextToken();

	534 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {

	535 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);

	536 return NULL;

	537 }

	538 } else if (token != T_OBJECT_END) {

	539 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);

	540 return NULL;

	541 }

	542 }

	543

	544 if (token != T_OBJECT_END)

	545 return NULL;

	546

	547 return dict.release();

	548 }

	549

	550 Value* JSONParser::ConsumeList() {

	551 if (*pos_ != '[') {

	552 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	553 return NULL;

	554 }

	555

	556 StackMarker depth_check(&stack_depth_);

	557 if (depth_check.IsTooDeep()) {

	558 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);

	559 return NULL;

	560 }

	561

	562 scoped_ptr<ListValue> list(new ListValue);

	563

	564 NextChar();

	565 Token token = GetNextToken();

	566 while (token != T_ARRAY_END) {

	567 Value* item = ParseToken(token);

	568 if (!item) {

	569 // ReportError from deeper level.

	570 return NULL;

	571 }

	572

	573 list->Append(item);

	574

	575 NextChar();

	576 token = GetNextToken();

	577 if (token == T_LIST_SEPARATOR) {

	578 NextChar();

	579 token = GetNextToken();

	580 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {

	581 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);

	582 return NULL;

	583 }

	584 } else if (token != T_ARRAY_END) {

	585 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	586 return NULL;

	587 }

	588 }

	589

	590 if (token != T_ARRAY_END)

	591 return NULL;

	592

	593 return list.release();

	594 }

	595

	596 Value* JSONParser::ConsumeString() {

	597 StringBuilder string;

	598 if (!ConsumeStringRaw(&string))

	599 return NULL;

	600

	601 // Create the Value representation, either using a hidden root, if configured

	602 // to do so, and the string can be represented by StringPiece.

	603 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {

	604 return new JSONStringValue(string.AsStringPiece());

	605 } else {

	606 if (string.CanBeStringPiece())

	607 string.Convert();

	608 return new StringValue(string.AsString());

	609 }

	610 }

	611

	612 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {

	613 if (*pos_ != '"') {

	614 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	615 return false;

	616 }

	617

	618 // StringBuilder will internally build a StringPiece unless a UTF-16

	619 // conversion occurs, at which point it will perform a copy into a

	620 // std::string.

	621 StringBuilder string(NextChar());

	622

	623 int length = end_pos_ - start_pos_;

	624 int32 next_char = 0;

	625

	626 while (CanConsume(1)) {

	627 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement.

	628 CBU8_NEXT(start_pos_, index_, length, next_char);

	629 if (next_char < 0 \|\| !IsValidCharacter(next_char)) {

	630 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);

	631 return false;

	632 }

	633

	634 // If this character is an escape sequence...

	635 if (next_char == '\\') {

	636 // The input string will be adjusted (either by combining the two

	637 // characters of an encoded escape sequence, or with a UTF conversion),

	638 // so using StringPiece isn't possible -- force a conversion.

	639 string.Convert();

	640

	641 if (!CanConsume(1)) {

	642 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);

	643 return false;

	644 }

	645

	646 switch (*NextChar()) {

	647 // Allowed esape sequences:

	648 case 'x': { // UTF-8 sequence.

	649 // UTF-8 \x escape sequences are not allowed in the spec, but they

	650 // are supported here for backwards-compatiblity with the old parser.

	651 if (!CanConsume(2)) {

	652 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);

	653 return false;

	654 }

	655

	656 int hex_digit = 0;

	657 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {

	658 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);

	659 return false;

	660 }

	661 NextChar();

	662

	663 if (hex_digit < kExtendedASCIIStart)

	664 string.Append(hex_digit);

	665 else

	666 DecodeUTF8(hex_digit, &string);

	667 break;

	668 }

	669 case 'u': { // UTF-16 sequence.

	670 // UTF units are of the form \uXXXX.

	671 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits.

	672 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);

	673 return false;

	674 }

	675

	676 // Skip the 'u'.

	677 NextChar();

	678

	679 std::string utf8_units;

	680 if (!DecodeUTF16(&utf8_units)) {

	681 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);

	682 return false;

	683 }

	684

	685 string.AppendString(utf8_units);

	686 break;

	687 }

	688 case '"':

	689 string.Append('"');

	690 break;

	691 case '\\':

	692 string.Append('\\');

	693 break;

	694 case '/':

	695 string.Append('/');

	696 break;

	697 case 'b':

	698 string.Append('\b');

	699 break;

	700 case 'f':

	701 string.Append('\f');

	702 break;

	703 case 'n':

	704 string.Append('\n');

	705 break;

	706 case 'r':

	707 string.Append('\r');

	708 break;

	709 case 't':

	710 string.Append('\t');

	711 break;

	712 case 'v': // Not listed as valid escape sequence in the RFC.

	713 string.Append('\v');

	714 break;

	715 // All other escape squences are illegal.

	716 default:

	717 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);

	718 return false;

	719 }

	720 } else if (next_char == '"') {

	721 --index_; // Rewind by one because of CBU8_NEXT.

	722 out->Swap(&string);

	723 return true;

	724 } else {

	725 if (next_char < kExtendedASCIIStart)

	726 string.Append(next_char);

	727 else

	728 DecodeUTF8(next_char, &string);

	729 }

	730 }

	731

	732 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);

	733 return false;

	734 }

	735

	736 // Entry is at the first X in \uXXXX.

	737 bool JSONParser::DecodeUTF16(std::string* dest_string) {

	738 if (!CanConsume(4))

	739 return false;

	740

	741 // This is a 32-bit field because the shift operations in the

	742 // conversion process below cause MSVC to error about "data loss."

	743 // This only stores UTF-16 code units, though.

	744 // Consume the UTF-16 code unit, which may be a high surrogate.

	745 int code_unit16_high = 0;

	746 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))

	747 return false;

	748

	749 // Only add 3, not 4, because at the end of this iteration, the parser has

	750 // finished working with the last digit of the UTF sequence, meaning that

	751 // the next iteration will advance to the next byte.

	752 NextNChars(3);

	753

	754 // Used to convert the UTF-16 code units to a code point and then to a UTF-8

	755 // code unit sequence.

	756 char code_point[8] = { 0 };

	757 size_t offset = 0;

	758

	759 // If this is a high surrogate, consume the next code unit to get the

	760 // low surrogate.

	761 if (CBU16_IS_SURROGATE(code_unit16_high)) {

	762 // Make sure this is the high surrogate. If not, it's an encoding

	763 // error.

	764 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))

	765 return false;

	766

	767 // Make sure that the token has more characters to consume the

	768 // lower surrogate.

	769 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits.

	770 return false;

	771 if (NextChar() != '\\' \|\| NextChar() != 'u')

	772 return false;

	773

	774 NextChar(); // Read past 'u'.

	775 int code_unit16_low = 0;

	776 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))

	777 return false;

	778

	779 NextNChars(3);

	780

	781 if (!CBU16_IS_TRAIL(code_unit16_low)) {

	782 return false;

	783 }

	784

	785 uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high,

	786 code_unit16_low);

	787 offset = 0;

	788 CBU8_APPEND_UNSAFE(code_point, offset, code_unit32);

	789 } else {

	790 // Not a surrogate.

	791 DCHECK(CBU16_IS_SINGLE(code_unit16_high));

	792 CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high);

	793 }

	794

	795 dest_string->append(code_point);

	796 return true;

	797 }

	798

	799 void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) {

	800 // Anything outside of the basic ASCII plane will need to be decomposed from

	801 // int32 to a multi-byte sequence.

	802 if (point < kExtendedASCIIStart) {

	803 dest->Append(point);

	804 } else {

	805 char utf8_units[4] = { 0 };

	806 int offset = 0;

	807 CBU8_APPEND_UNSAFE(utf8_units, offset, point);

	808 dest->Convert();

	809 dest->AppendString(utf8_units);

	810 }

	811 }

	812

	813 Value* JSONParser::ConsumeNumber() {

	814 const char* num_start = pos_;

	815 const int start_index = index_;

	816 int end_index = start_index;

	817

	818 if (*pos_ == '-')

	819 NextChar();

	820

	821 if (!ReadInt(false)) {

	822 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	823 return NULL;

	824 }

	825 end_index = index_;

	826

	827 // The optional fraction part.

	828 if (*pos_ == '.') {

	829 if (!CanConsume(1)) {

	830 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	831 return NULL;

	832 }

	833 NextChar();

	834 if (!ReadInt(true)) {

	835 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	836 return NULL;

	837 }

	838 end_index = index_;

	839 }

	840

	841 // Optional exponent part.

	842 if (pos_ == 'e' \|\| pos_ == 'E') {

	843 NextChar();

	844 if (pos_ == '-' \|\| pos_ == '+')

	845 NextChar();

	846 if (!ReadInt(true)) {

	847 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	848 return NULL;

	849 }

	850 end_index = index_;

	851 }

	852

	853 // ReadInt is greedy because numbers have no easily detectable sentinel,

	854 // so save off where the parser should be on exit (see Consume invariant at

	855 // the top of the header), then make sure the next token is one which is

	856 // valid.

	857 const char* exit_pos = pos_ - 1;

	858 int exit_index = index_ - 1;

	859

	860 switch (GetNextToken()) {

	861 case T_OBJECT_END:

	862 case T_ARRAY_END:

	863 case T_LIST_SEPARATOR:

	864 case T_END_OF_INPUT:

	865 break;

	866 default:

	867 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	868 return NULL;

	869 }

	870

	871 pos_ = exit_pos;

	872 index_ = exit_index;

	873

	874 StringPiece num_string(num_start, end_index - start_index);

	875

	876 int num_int;

	877 if (StringToInt(num_string, &num_int))

	878 return Value::CreateIntegerValue(num_int);

	879

	880 double num_double;

	881 if (base::StringToDouble(num_string.as_string(), &num_double) &&

	882 IsFinite(num_double)) {

	883 return Value::CreateDoubleValue(num_double);

	884 }

	885

	886 return NULL;

	887 }

	888

	889 bool JSONParser::ReadInt(bool allow_leading_zeros) {

	890 char first = *pos_;

	891 int len = 0;

	892

	893 char c = first;

	894 while (CanConsume(1) && IsAsciiDigit(c)) {

	895 c = *NextChar();

	896 ++len;

	897 }

	898

	899 if (len == 0)

	900 return false;

	901

	902 if (!allow_leading_zeros && len > 1 && first == '0')

	903 return false;

	904

	905 return true;

	906 }

	907

	908 Value* JSONParser::ConsumeLiteral() {

	909 switch (*pos_) {

	910 case 't': {

	911 const char* kTrueLiteral = "true";

	912 const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));

	913 if (!CanConsume(kTrueLen - 1) \|\|

	914 !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {

	915 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	916 return NULL;

	917 }

	918 NextNChars(kTrueLen - 1);

	919 return Value::CreateBooleanValue(true);

	920 }

	921 case 'f': {

	922 const char* kFalseLiteral = "false";

	923 const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));

	924 if (!CanConsume(kFalseLen - 1) \|\|

	925 !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {

	926 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	927 return NULL;

	928 }

	929 NextNChars(kFalseLen - 1);

	930 return Value::CreateBooleanValue(false);

	931 }

	932 case 'n': {

	933 const char* kNullLiteral = "null";

	934 const int kNullLen = static_cast<int>(strlen(kNullLiteral));

	935 if (!CanConsume(kNullLen - 1) \|\|

	936 !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {

	937 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);

	938 return NULL;

	939 }

	940 NextNChars(kNullLen - 1);

	941 return Value::CreateNullValue();

	942 }

	943 default:

	944 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);

	945 return NULL;

	946 }

	947 }

	948

	949 // static

	950 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {

	951 return strncmp(one, two, len) == 0;

	952 }

	953

	954 void JSONParser::ReportError(JSONReader::JsonParseError code,

	955 int column_adjust) {

	956 error_code_ = code;

	957 error_line_ = line_number_;

	958 error_column_ = index_ - index_last_line_ + column_adjust;

	959 }

	960

	961 // static

	962 std::string JSONParser::FormatErrorMessage(int line, int column,

	963 const std::string& description) {

	964 if (line \|\| column) {

	965 return StringPrintf("Line: %i, column: %i, %s",

	966 line, column, description.c_str());

	967 }

	968 return description;

	969 }

	970

	971 } // namespace internal

	972 } // namespace base

OLD	NEW

« no previous file with comments | « base/json/json_parser.h ('k') | base/json/json_parser_unittest.cc » ('j') | no next file with comments »