Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(349)

Side by Side Diff: base/json/json_parser.cc

Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Really fix Windows, address comments Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « base/json/json_parser.h ('k') | base/json/json_parser_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/json/json_parser.h"
6
7 #include "base/float_util.h"
8 #include "base/logging.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "base/string_number_conversions.h"
11 #include "base/string_util.h"
12 #include "base/stringprintf.h"
13 #include "base/third_party/icu/icu_utf.h"
14 #include "base/utf_string_conversion_utils.h"
15 #include "base/utf_string_conversions.h"
16 #include "base/values.h"
17
18 namespace base {
19 namespace internal {
20
21 namespace {
22
23 const int kStackMaxDepth = 100;
24
25 const int32 kExtendedASCIIStart = 0x80;
26
27 // This and the class below are used to own the JSON input string for when
28 // string tokens are stored as StringPiece instead of std::string. This
29 // optimization avoids about 2/3rds of string memory copies. The constructor
30 // takes the input string and swaps its data into the new instance. The real
31 // root value is also Swap()ed into the new instance.
32 class DictionaryHiddenRootValue : public base::DictionaryValue {
33 public:
34 DictionaryHiddenRootValue(std::string* json, Value* root) {
35 DCHECK(root->IsType(Value::TYPE_DICTIONARY));
36 DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
37 json->swap(json_);
38 }
39
40 virtual void Swap(DictionaryValue* other) OVERRIDE {
41 DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
42
43 // First deep copy to convert JSONStringValue to std::string and swap that
44 // copy with |other|, which contains the new contents of |this|.
45 scoped_ptr<base::DictionaryValue> copy(DeepCopy());
46 copy->Swap(other);
47
48 // Then erase the contents of the current dictionary and swap in the
49 // new contents, originally from |other|.
50 Clear();
51 json_.clear();
52 DictionaryValue::Swap(copy.get());
53 }
54
55 // Not overriding DictionaryValue::Remove because it just calls through to
56 // the method below.
57
58 virtual bool RemoveWithoutPathExpansion(const std::string& key,
59 Value** out) OVERRIDE {
60 // If the caller won't take ownership of the removed value, just call up.
61 if (!out)
62 return DictionaryValue::RemoveWithoutPathExpansion(key, out);
63
64 DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
65
66 // Otherwise, remove the value while its still "owned" by this and copy it
67 // to convert any JSONStringValues to std::string.
68 Value* out_owned = NULL;
69 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
70 return false;
71
72 *out = out_owned->DeepCopy();
73 delete out_owned;
74
75 return true;
76 }
77
78 private:
79 std::string json_;
80
81 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
82 };
83
84 class ListHiddenRootValue : public base::ListValue {
85 public:
86 ListHiddenRootValue(std::string* json, Value* root) {
87 DCHECK(root->IsType(Value::TYPE_LIST));
88 ListValue::Swap(static_cast<ListValue*>(root));
89 json->swap(json_);
90 }
91
92 virtual void Swap(ListValue* other) OVERRIDE {
93 DVLOG(1) << "Swap()ing a ListValue inefficiently.";
94
95 // First deep copy to convert JSONStringValue to std::string and swap that
96 // copy with |other|, which contains the new contents of |this|.
97 scoped_ptr<base::ListValue> copy(DeepCopy());
98 copy->Swap(other);
99
100 // Then erase the contents of the current list and swap in the new contents,
101 // originally from |other|.
102 Clear();
103 json_.clear();
104 ListValue::Swap(copy.get());
105 }
106
107 virtual bool Remove(size_t index, Value** out) OVERRIDE {
108 // If the caller won't take ownership of the removed value, just call up.
109 if (!out)
110 return ListValue::Remove(index, out);
111
112 DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
113
114 // Otherwise, remove the value while its still "owned" by this and copy it
115 // to convert any JSONStringValues to std::string.
116 Value* out_owned = NULL;
117 if (!ListValue::Remove(index, &out_owned))
118 return false;
119
120 *out = out_owned->DeepCopy();
121 delete out_owned;
122
123 return true;
124 }
125
126 private:
127 std::string json_;
128
129 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
130 };
131
132 // A variant on StringValue that uses StringPiece instead of copying the string
133 // into the Value. This can only be stored in a child of hidden root (above),
134 // otherwise the referenced string will not be guaranteed to outlive it.
135 class JSONStringValue : public base::Value {
136 public:
137 explicit JSONStringValue(const base::StringPiece& piece)
138 : Value(TYPE_STRING),
139 string_piece_(piece) {
140 }
141
142 // Value:
143 bool GetAsString(std::string* out_value) const OVERRIDE {
144 string_piece_.CopyToString(out_value);
145 return true;
146 }
147 bool GetAsString(string16* out_value) const OVERRIDE {
148 *out_value = UTF8ToUTF16(string_piece_);
149 return true;
150 }
151 virtual Value* DeepCopy() const OVERRIDE {
152 return Value::CreateStringValue(string_piece_.as_string());
153 }
154 virtual bool Equals(const Value* other) const OVERRIDE {
155 std::string other_string;
156 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
157 StringPiece(other_string) == string_piece_;
158 }
159
160 private:
161 // The location in the original input stream.
162 base::StringPiece string_piece_;
163
164 DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
165 };
166
167 // Simple class that checks for maximum recursion/"stack overflow."
168 class StackMarker {
169 public:
170 explicit StackMarker(int* depth) : depth_(depth) {
171 ++(*depth_);
172 DCHECK_LE(*depth_, kStackMaxDepth);
173 }
174 ~StackMarker() {
175 --(*depth_);
176 }
177
178 bool IsTooDeep() const {
179 return *depth_ >= kStackMaxDepth;
180 }
181
182 private:
183 int* const depth_;
184
185 DISALLOW_COPY_AND_ASSIGN(StackMarker);
186 };
187
188 } // namespace
189
190 JSONParser::JSONParser(int options)
191 : options_(options),
192 start_pos_(NULL),
193 pos_(NULL),
194 end_pos_(NULL),
195 index_(0),
196 stack_depth_(0),
197 line_number_(0),
198 index_last_line_(0),
199 error_code_(JSONReader::JSON_NO_ERROR),
200 error_line_(0),
201 error_column_(0) {
202 }
203
204 JSONParser::~JSONParser() {
205 }
206
207 Value* JSONParser::Parse(const std::string& input) {
208 // TODO(rsesek): Windows has problems with StringPiece/hidden roots. Fix
209 // <http://crbug.com/126107> when my Windows box arrives.
210 #if defined(OS_WIN)
211 options_ |= JSON_DETACHABLE_CHILDREN;
212 #endif
213
214 std::string input_copy;
215 // If the children of a JSON root can be detached, then hidden roots cannot
216 // be used, so do not bother copying the input because StringPiece will not
217 // be used anywhere.
218 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
219 input_copy = input;
220 start_pos_ = input_copy.data();
221 } else {
222 start_pos_ = input.data();
223 }
224 pos_ = start_pos_;
225 end_pos_ = start_pos_ + input.length();
226 index_ = 0;
227 line_number_ = 1;
228 index_last_line_ = 0;
229
230 error_code_ = JSONReader::JSON_NO_ERROR;
231 error_line_ = 0;
232 error_column_ = 0;
233
234 // When the input JSON string starts with a UTF-8 Byte-Order-Mark
235 // <0xEF 0xBB 0xBF>, advance the start position to avoid the
236 // ParseNextToken function mis-treating a Unicode BOM as an invalid
237 // character and returning NULL.
238 if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF &&
239 static_cast<uint8>(*(pos_ + 1)) == 0xBB &&
240 static_cast<uint8>(*(pos_ + 2)) == 0xBF) {
241 NextNChars(3);
242 }
243
244 // Parse the first and all subsequent tokens.
245 scoped_ptr<Value> root(ParseNextToken());
246 if (!root.get())
247 return NULL;
248
249 // Make sure the input stream is at an end.
250 if (GetNextToken() != T_END_OF_INPUT) {
251 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
252 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
253 return NULL;
254 }
255 }
256
257 // Dictionaries and lists can contain JSONStringValues, so wrap them in a
258 // hidden root.
259 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
260 if (root->IsType(Value::TYPE_DICTIONARY)) {
261 return new DictionaryHiddenRootValue(&input_copy, root.release());
262 } else if (root->IsType(Value::TYPE_LIST)) {
263 return new ListHiddenRootValue(&input_copy, root.release());
264 } else if (root->IsType(Value::TYPE_STRING)) {
265 // A string type could be a JSONStringValue, but because there's no
266 // corresponding HiddenRootValue, the memory will be lost. Deep copy to
267 // preserve it.
268 return root->DeepCopy();
269 }
270 }
271
272 // All other values can be returned directly.
273 return root.release();
274 }
275
276 JSONReader::JsonParseError JSONParser::error_code() const {
277 return error_code_;
278 }
279
280 std::string JSONParser::GetErrorMessage() const {
281 return FormatErrorMessage(error_line_, error_column_,
282 JSONReader::ErrorCodeToString(error_code_));
283 }
284
285 // StringBuilder ///////////////////////////////////////////////////////////////
286
287 JSONParser::StringBuilder::StringBuilder()
288 : pos_(NULL),
289 length_(0),
290 string_(NULL) {
291 }
292
293 JSONParser::StringBuilder::StringBuilder(const char* pos)
294 : pos_(pos),
295 length_(0),
296 string_(NULL) {
297 }
298
299 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
300 std::swap(other->string_, string_);
301 std::swap(other->pos_, pos_);
302 std::swap(other->length_, length_);
303 }
304
305 JSONParser::StringBuilder::~StringBuilder() {
306 delete string_;
307 }
308
309 void JSONParser::StringBuilder::Append(const char& c) {
310 DCHECK_GE(c, 0);
311 DCHECK_LT(c, 128);
312
313 if (string_)
314 string_->push_back(c);
315 else
316 ++length_;
317 }
318
319 void JSONParser::StringBuilder::AppendString(const std::string& str) {
320 DCHECK(string_);
321 string_->append(str);
322 }
323
324 void JSONParser::StringBuilder::Convert() {
325 if (string_)
326 return;
327 string_ = new std::string(pos_, length_);
328 }
329
330 bool JSONParser::StringBuilder::CanBeStringPiece() const {
331 return !string_;
332 }
333
334 StringPiece JSONParser::StringBuilder::AsStringPiece() {
335 if (string_)
336 return StringPiece();
337 return StringPiece(pos_, length_);
338 }
339
340 const std::string& JSONParser::StringBuilder::AsString() {
341 if (!string_)
342 Convert();
343 return *string_;
344 }
345
346 // JSONParser private //////////////////////////////////////////////////////////
347
348 inline bool JSONParser::CanConsume(int length) {
349 return pos_ + length <= end_pos_;
350 }
351
352 const char* JSONParser::NextChar() {
353 DCHECK(CanConsume(1));
354 ++index_;
355 ++pos_;
356 return pos_;
357 }
358
359 void JSONParser::NextNChars(int n) {
360 DCHECK(CanConsume(n));
361 index_ += n;
362 pos_ += n;
363 }
364
365 JSONParser::Token JSONParser::GetNextToken() {
366 EatWhitespaceAndComments();
367 if (!CanConsume(1))
368 return T_END_OF_INPUT;
369
370 switch (*pos_) {
371 case '{':
372 return T_OBJECT_BEGIN;
373 case '}':
374 return T_OBJECT_END;
375 case '[':
376 return T_ARRAY_BEGIN;
377 case ']':
378 return T_ARRAY_END;
379 case '"':
380 return T_STRING;
381 case '0':
382 case '1':
383 case '2':
384 case '3':
385 case '4':
386 case '5':
387 case '6':
388 case '7':
389 case '8':
390 case '9':
391 case '-':
392 return T_NUMBER;
393 case 't':
394 return T_BOOL_TRUE;
395 case 'f':
396 return T_BOOL_FALSE;
397 case 'n':
398 return T_NULL;
399 case ',':
400 return T_LIST_SEPARATOR;
401 case ':':
402 return T_OBJECT_PAIR_SEPARATOR;
403 default:
404 return T_INVALID_TOKEN;
405 }
406 }
407
408 void JSONParser::EatWhitespaceAndComments() {
409 while (pos_ < end_pos_) {
410 switch (*pos_) {
411 case '\r':
412 case '\n':
413 index_last_line_ = index_;
414 ++line_number_;
415 // Fall through.
416 case ' ':
417 case '\t':
418 NextChar();
419 break;
420 case '/':
421 if (!EatComment())
422 return;
423 break;
424 default:
425 return;
426 }
427 }
428 }
429
430 bool JSONParser::EatComment() {
431 if (*pos_ != '/' || !CanConsume(1))
432 return false;
433
434 char next_char = *NextChar();
435 if (next_char == '/') {
436 // Single line comment, read to newline.
437 while (CanConsume(1)) {
438 char next_char = *NextChar();
439 if (next_char == '\n' || next_char == '\r')
440 return true;
441 }
442 } else if (next_char == '*') {
443 // Block comment, read until end marker.
444 while (CanConsume(2)) {
445 if (*NextChar() == '*' && *NextChar() == '/') {
446 // EatWhitespaceAndComments will inspect pos_, which will still be on
447 // the last / of the comment, so advance once more (which may also be
448 // end of input).
449 NextChar();
450 return true;
451 }
452 }
453
454 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
455 }
456
457 return false;
458 }
459
460 Value* JSONParser::ParseNextToken() {
461 return ParseToken(GetNextToken());
462 }
463
464 Value* JSONParser::ParseToken(Token token) {
465 switch (token) {
466 case T_OBJECT_BEGIN:
467 return ConsumeDictionary();
468 case T_ARRAY_BEGIN:
469 return ConsumeList();
470 case T_STRING:
471 return ConsumeString();
472 case T_NUMBER:
473 return ConsumeNumber();
474 case T_BOOL_TRUE:
475 case T_BOOL_FALSE:
476 case T_NULL:
477 return ConsumeLiteral();
478 default:
479 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
480 return NULL;
481 }
482 }
483
484 Value* JSONParser::ConsumeDictionary() {
485 if (*pos_ != '{') {
486 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
487 return NULL;
488 }
489
490 StackMarker depth_check(&stack_depth_);
491 if (depth_check.IsTooDeep()) {
492 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
493 return NULL;
494 }
495
496 scoped_ptr<DictionaryValue> dict(new DictionaryValue);
497
498 NextChar();
499 Token token = GetNextToken();
500 while (token != T_OBJECT_END) {
501 if (token != T_STRING) {
502 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
503 return NULL;
504 }
505
506 // First consume the key.
507 StringBuilder key;
508 if (!ConsumeStringRaw(&key)) {
509 return NULL;
510 }
511
512 // Read the separator.
513 NextChar();
514 token = GetNextToken();
515 if (token != T_OBJECT_PAIR_SEPARATOR) {
516 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
517 return NULL;
518 }
519
520 // The token is the value. Ownership transfers to |dict|.
521 NextChar();
522 Value* value = ParseNextToken();
523 if (!value) {
524 return NULL;
525 }
526
527 dict->SetWithoutPathExpansion(key.AsString(), value);
528
529 NextChar();
530 token = GetNextToken();
531 if (token == T_LIST_SEPARATOR) {
532 NextChar();
533 token = GetNextToken();
534 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
535 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
536 return NULL;
537 }
538 } else if (token != T_OBJECT_END) {
539 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
540 return NULL;
541 }
542 }
543
544 if (token != T_OBJECT_END)
545 return NULL;
546
547 return dict.release();
548 }
549
550 Value* JSONParser::ConsumeList() {
551 if (*pos_ != '[') {
552 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
553 return NULL;
554 }
555
556 StackMarker depth_check(&stack_depth_);
557 if (depth_check.IsTooDeep()) {
558 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
559 return NULL;
560 }
561
562 scoped_ptr<ListValue> list(new ListValue);
563
564 NextChar();
565 Token token = GetNextToken();
566 while (token != T_ARRAY_END) {
567 Value* item = ParseToken(token);
568 if (!item) {
569 // ReportError from deeper level.
570 return NULL;
571 }
572
573 list->Append(item);
574
575 NextChar();
576 token = GetNextToken();
577 if (token == T_LIST_SEPARATOR) {
578 NextChar();
579 token = GetNextToken();
580 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
581 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
582 return NULL;
583 }
584 } else if (token != T_ARRAY_END) {
585 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
586 return NULL;
587 }
588 }
589
590 if (token != T_ARRAY_END)
591 return NULL;
592
593 return list.release();
594 }
595
596 Value* JSONParser::ConsumeString() {
597 StringBuilder string;
598 if (!ConsumeStringRaw(&string))
599 return NULL;
600
601 // Create the Value representation, either using a hidden root, if configured
602 // to do so, and the string can be represented by StringPiece.
603 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
604 return new JSONStringValue(string.AsStringPiece());
605 } else {
606 if (string.CanBeStringPiece())
607 string.Convert();
608 return new StringValue(string.AsString());
609 }
610 }
611
612 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
613 if (*pos_ != '"') {
614 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
615 return false;
616 }
617
618 // StringBuilder will internally build a StringPiece unless a UTF-16
619 // conversion occurs, at which point it will perform a copy into a
620 // std::string.
621 StringBuilder string(NextChar());
622
623 int length = end_pos_ - start_pos_;
624 int32 next_char = 0;
625
626 while (CanConsume(1)) {
627 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement.
628 CBU8_NEXT(start_pos_, index_, length, next_char);
629 if (next_char < 0 || !IsValidCharacter(next_char)) {
630 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
631 return false;
632 }
633
634 // If this character is an escape sequence...
635 if (next_char == '\\') {
636 // The input string will be adjusted (either by combining the two
637 // characters of an encoded escape sequence, or with a UTF conversion),
638 // so using StringPiece isn't possible -- force a conversion.
639 string.Convert();
640
641 if (!CanConsume(1)) {
642 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
643 return false;
644 }
645
646 switch (*NextChar()) {
647 // Allowed esape sequences:
648 case 'x': { // UTF-8 sequence.
649 // UTF-8 \x escape sequences are not allowed in the spec, but they
650 // are supported here for backwards-compatiblity with the old parser.
651 if (!CanConsume(2)) {
652 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
653 return false;
654 }
655
656 int hex_digit = 0;
657 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
658 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
659 return false;
660 }
661 NextChar();
662
663 if (hex_digit < kExtendedASCIIStart)
664 string.Append(hex_digit);
665 else
666 DecodeUTF8(hex_digit, &string);
667 break;
668 }
669 case 'u': { // UTF-16 sequence.
670 // UTF units are of the form \uXXXX.
671 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits.
672 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
673 return false;
674 }
675
676 // Skip the 'u'.
677 NextChar();
678
679 std::string utf8_units;
680 if (!DecodeUTF16(&utf8_units)) {
681 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
682 return false;
683 }
684
685 string.AppendString(utf8_units);
686 break;
687 }
688 case '"':
689 string.Append('"');
690 break;
691 case '\\':
692 string.Append('\\');
693 break;
694 case '/':
695 string.Append('/');
696 break;
697 case 'b':
698 string.Append('\b');
699 break;
700 case 'f':
701 string.Append('\f');
702 break;
703 case 'n':
704 string.Append('\n');
705 break;
706 case 'r':
707 string.Append('\r');
708 break;
709 case 't':
710 string.Append('\t');
711 break;
712 case 'v': // Not listed as valid escape sequence in the RFC.
713 string.Append('\v');
714 break;
715 // All other escape squences are illegal.
716 default:
717 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
718 return false;
719 }
720 } else if (next_char == '"') {
721 --index_; // Rewind by one because of CBU8_NEXT.
722 out->Swap(&string);
723 return true;
724 } else {
725 if (next_char < kExtendedASCIIStart)
726 string.Append(next_char);
727 else
728 DecodeUTF8(next_char, &string);
729 }
730 }
731
732 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
733 return false;
734 }
735
736 // Entry is at the first X in \uXXXX.
737 bool JSONParser::DecodeUTF16(std::string* dest_string) {
738 if (!CanConsume(4))
739 return false;
740
741 // This is a 32-bit field because the shift operations in the
742 // conversion process below cause MSVC to error about "data loss."
743 // This only stores UTF-16 code units, though.
744 // Consume the UTF-16 code unit, which may be a high surrogate.
745 int code_unit16_high = 0;
746 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
747 return false;
748
749 // Only add 3, not 4, because at the end of this iteration, the parser has
750 // finished working with the last digit of the UTF sequence, meaning that
751 // the next iteration will advance to the next byte.
752 NextNChars(3);
753
754 // Used to convert the UTF-16 code units to a code point and then to a UTF-8
755 // code unit sequence.
756 char code_point[8] = { 0 };
757 size_t offset = 0;
758
759 // If this is a high surrogate, consume the next code unit to get the
760 // low surrogate.
761 if (CBU16_IS_SURROGATE(code_unit16_high)) {
762 // Make sure this is the high surrogate. If not, it's an encoding
763 // error.
764 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
765 return false;
766
767 // Make sure that the token has more characters to consume the
768 // lower surrogate.
769 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits.
770 return false;
771 if (*NextChar() != '\\' || *NextChar() != 'u')
772 return false;
773
774 NextChar(); // Read past 'u'.
775 int code_unit16_low = 0;
776 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
777 return false;
778
779 NextNChars(3);
780
781 if (!CBU16_IS_TRAIL(code_unit16_low)) {
782 return false;
783 }
784
785 uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high,
786 code_unit16_low);
787 offset = 0;
788 CBU8_APPEND_UNSAFE(code_point, offset, code_unit32);
789 } else {
790 // Not a surrogate.
791 DCHECK(CBU16_IS_SINGLE(code_unit16_high));
792 CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high);
793 }
794
795 dest_string->append(code_point);
796 return true;
797 }
798
799 void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) {
800 // Anything outside of the basic ASCII plane will need to be decomposed from
801 // int32 to a multi-byte sequence.
802 if (point < kExtendedASCIIStart) {
803 dest->Append(point);
804 } else {
805 char utf8_units[4] = { 0 };
806 int offset = 0;
807 CBU8_APPEND_UNSAFE(utf8_units, offset, point);
808 dest->Convert();
809 dest->AppendString(utf8_units);
810 }
811 }
812
813 Value* JSONParser::ConsumeNumber() {
814 const char* num_start = pos_;
815 const int start_index = index_;
816 int end_index = start_index;
817
818 if (*pos_ == '-')
819 NextChar();
820
821 if (!ReadInt(false)) {
822 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
823 return NULL;
824 }
825 end_index = index_;
826
827 // The optional fraction part.
828 if (*pos_ == '.') {
829 if (!CanConsume(1)) {
830 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
831 return NULL;
832 }
833 NextChar();
834 if (!ReadInt(true)) {
835 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
836 return NULL;
837 }
838 end_index = index_;
839 }
840
841 // Optional exponent part.
842 if (*pos_ == 'e' || *pos_ == 'E') {
843 NextChar();
844 if (*pos_ == '-' || *pos_ == '+')
845 NextChar();
846 if (!ReadInt(true)) {
847 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
848 return NULL;
849 }
850 end_index = index_;
851 }
852
853 // ReadInt is greedy because numbers have no easily detectable sentinel,
854 // so save off where the parser should be on exit (see Consume invariant at
855 // the top of the header), then make sure the next token is one which is
856 // valid.
857 const char* exit_pos = pos_ - 1;
858 int exit_index = index_ - 1;
859
860 switch (GetNextToken()) {
861 case T_OBJECT_END:
862 case T_ARRAY_END:
863 case T_LIST_SEPARATOR:
864 case T_END_OF_INPUT:
865 break;
866 default:
867 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
868 return NULL;
869 }
870
871 pos_ = exit_pos;
872 index_ = exit_index;
873
874 StringPiece num_string(num_start, end_index - start_index);
875
876 int num_int;
877 if (StringToInt(num_string, &num_int))
878 return Value::CreateIntegerValue(num_int);
879
880 double num_double;
881 if (base::StringToDouble(num_string.as_string(), &num_double) &&
882 IsFinite(num_double)) {
883 return Value::CreateDoubleValue(num_double);
884 }
885
886 return NULL;
887 }
888
889 bool JSONParser::ReadInt(bool allow_leading_zeros) {
890 char first = *pos_;
891 int len = 0;
892
893 char c = first;
894 while (CanConsume(1) && IsAsciiDigit(c)) {
895 c = *NextChar();
896 ++len;
897 }
898
899 if (len == 0)
900 return false;
901
902 if (!allow_leading_zeros && len > 1 && first == '0')
903 return false;
904
905 return true;
906 }
907
908 Value* JSONParser::ConsumeLiteral() {
909 switch (*pos_) {
910 case 't': {
911 const char* kTrueLiteral = "true";
912 const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
913 if (!CanConsume(kTrueLen - 1) ||
914 !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
915 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
916 return NULL;
917 }
918 NextNChars(kTrueLen - 1);
919 return Value::CreateBooleanValue(true);
920 }
921 case 'f': {
922 const char* kFalseLiteral = "false";
923 const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
924 if (!CanConsume(kFalseLen - 1) ||
925 !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
926 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
927 return NULL;
928 }
929 NextNChars(kFalseLen - 1);
930 return Value::CreateBooleanValue(false);
931 }
932 case 'n': {
933 const char* kNullLiteral = "null";
934 const int kNullLen = static_cast<int>(strlen(kNullLiteral));
935 if (!CanConsume(kNullLen - 1) ||
936 !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
937 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
938 return NULL;
939 }
940 NextNChars(kNullLen - 1);
941 return Value::CreateNullValue();
942 }
943 default:
944 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
945 return NULL;
946 }
947 }
948
949 // static
950 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
951 return strncmp(one, two, len) == 0;
952 }
953
954 void JSONParser::ReportError(JSONReader::JsonParseError code,
955 int column_adjust) {
956 error_code_ = code;
957 error_line_ = line_number_;
958 error_column_ = index_ - index_last_line_ + column_adjust;
959 }
960
961 // static
962 std::string JSONParser::FormatErrorMessage(int line, int column,
963 const std::string& description) {
964 if (line || column) {
965 return StringPrintf("Line: %i, column: %i, %s",
966 line, column, description.c_str());
967 }
968 return description;
969 }
970
971 } // namespace internal
972 } // namespace base
OLDNEW
« no previous file with comments | « base/json/json_parser.h ('k') | base/json/json_parser_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698