Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(289)

Side by Side Diff: base/json/json_parser.cc

Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: '' Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/json/json_parser.h"
6
7 #include "base/float_util.h"
8 #include "base/logging.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "base/stringprintf.h"
11 #include "base/string_number_conversions.h"
Mark Mentovai 2012/04/19 16:40:11 '_' < 'p' I would have stayed quiet about it, but
Robert Sesek 2012/05/03 15:34:52 Done.
12 #include "base/string_util.h"
13 #include "base/third_party/icu/icu_utf.h"
14 #include "base/utf_string_conversion_utils.h"
15 #include "base/utf_string_conversions.h"
16 #include "base/values.h"
17
18 namespace {
19
20 const int kStackMaxDepth = 100;
21
22 const int32 kExtendedASCIIStart = 0x80;
23
24 // This and the class below are used to own the JSON input string for when
25 // string tokens are stored as StringPiece instead of std::string. This
26 // optimization avoids about 2/3rds of string memory copies. The constructor
27 // takes the input string and swaps its data into the new instance. The real
28 // root value is also Swap()ed into the new instance.
29 class DictionaryHiddenRootValue : public base::DictionaryValue {
30 public:
31 DictionaryHiddenRootValue(std::string* json, Value* root) {
32 CHECK(root->IsType(Value::TYPE_DICTIONARY));
Mark Mentovai 2012/04/19 16:40:11 Can this be a DCHECK? Same on line 50.
Robert Sesek 2012/05/03 15:34:52 Done.
33 Swap(static_cast<DictionaryValue*>(root));
34 json->swap(json_);
35 }
36
37 virtual base::DictionaryValue* DeepCopy() const OVERRIDE {
38 scoped_ptr<base::Value> data(base::DictionaryValue::DeepCopy());
39 std::string json(json_);
40 return new DictionaryHiddenRootValue(&json, data.get());
41 }
42
43 private:
44 std::string json_;
Mark Mentovai 2012/04/19 16:40:11 DISALLOW_COPY_AND_ASSIGN? Maybe not. Don’t know ho
Robert Sesek 2012/05/03 15:34:52 Done.
45 };
46
47 class ListHiddenRootValue : public base::ListValue {
48 public:
49 ListHiddenRootValue(std::string* json, Value* root) {
50 CHECK(root->IsType(Value::TYPE_LIST));
51 Swap(static_cast<ListValue*>(root));
52 json->swap(json_);
53 }
54
55 virtual base::ListValue* DeepCopy() const OVERRIDE {
56 scoped_ptr<base::Value> data(base::ListValue::DeepCopy());
57 std::string json(json_);
58 return new ListHiddenRootValue(&json, data.get());
59 }
60
61 private:
62 std::string json_;
63 };
64
65 // A variant on StringValue that uses StringPiece instead of copying the string
66 // into the Value. This can only be stored in a child of hidden root (above),
67 // otherwise the referenced string will not be guaranteed to outlive it.
68 class JSONStringValue : public base::Value {
69 public:
70 explicit JSONStringValue(const base::StringPiece& piece)
71 : Value(TYPE_STRING),
72 string_piece_(piece) {
73 }
74
75 // Value:
76 bool GetAsString(std::string* out_value) const OVERRIDE {
77 string_piece_.CopyToString(out_value);
78 return true;
79 }
80 bool GetAsString(string16* out_value) const OVERRIDE {
81 *out_value = UTF8ToUTF16(string_piece_);
82 return true;
83 }
84 virtual Value* DeepCopy() const OVERRIDE {
85 return Value::CreateStringValue(string_piece_.as_string());
86 }
87 virtual bool Equals(const Value* other) const OVERRIDE {
88 std::string other_string;
89 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
Mark Mentovai 2012/04/19 16:40:11 Isn’t it cheaper to do the comparison of other and
Robert Sesek 2012/05/03 15:34:52 Yes it is. Good idea.
90 other_string == string_piece_.as_string();
91 }
92
93 private:
94 // The location in the original input stream.
95 base::StringPiece string_piece_;
Mark Mentovai 2012/04/19 16:40:11 DISALLOW_COPY_AND_ASSIGN? Maybe not. Don’t know ho
Robert Sesek 2012/05/03 15:34:52 Done.
96 };
97
98 // Simple class that checks for maximum recursion/"stack overflow."
99 class StackMarker {
100 public:
101 StackMarker(int* depth) : depth_(depth) {
Mark Mentovai 2012/04/19 16:40:11 explicit
Robert Sesek 2012/05/03 15:34:52 Done.
102 ++(*depth_);
103 }
104 ~StackMarker() {
105 --(*depth_);
106 }
107
108 bool IsTooDeep() {
Mark Mentovai 2012/04/19 16:40:11 Can be const.
Robert Sesek 2012/05/03 15:34:52 Done.
109 return *depth_ >= kStackMaxDepth;
110 }
111
112 private:
113 int* depth_;
Mark Mentovai 2012/04/19 16:40:11 DISALLOW_COPY_AND_ASSIGN
Mark Mentovai 2012/04/19 16:40:11 The pointer (not value) can be const, which is nic
Robert Sesek 2012/05/03 15:34:52 Done.
Robert Sesek 2012/05/03 15:34:52 Done.
114 };
115
116 } // namespace
117
118 namespace base {
119 namespace internal {
120
121 JSONParser::JSONParser(int options)
122 : options_(options),
123 start_pos_(NULL),
124 pos_(0),
125 index_(0),
Mark Mentovai 2012/04/19 16:40:11 Is end_pos_ missing intentionally?
Robert Sesek 2012/05/03 15:34:52 Nope.
126 stack_depth_(0),
127 line_number_(0),
128 index_last_line_(0),
129 error_code_(JSONReader::JSON_NO_ERROR),
130 error_line_(0),
131 error_column_(0) {
132 }
133
134 JSONParser::~JSONParser() {
135 }
136
137 Value* JSONParser::Parse(const std::string& input) {
Mark Mentovai 2012/04/19 16:40:11 Perhaps this can even accept StringPiece input, po
138 std::string input_copy;
139 // If the children of a JSON root can be detached, then hidden roots cannot
140 // be used, so do not bother copying the input because StringPiece will not
141 // be used anywhere.
142 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
143 input_copy = input;
144 start_pos_ = input_copy.data();
145 } else {
146 start_pos_ = input.data();
147 }
148 pos_ = start_pos_;
149 end_pos_ = start_pos_ + input.length();
150 index_ = 0;
151 line_number_ = 1;
152 index_last_line_ = 0;
153
154 error_code_ = JSONReader::JSON_NO_ERROR;
155 error_line_ = 0;
156 error_column_ = 0;
157
158 // When the input JSON string starts with a UTF-8 Byte-Order-Mark (U+FEFF)
Mark Mentovai 2012/04/19 16:40:11 That’s a UTF-16 BOM. Your comment makes it sound
Robert Sesek 2012/05/03 15:34:52 Isn't U+FEFF the BOM code point, which in UTF-16 i
159 // or <0xEF 0xBB 0xBF>, advance the start position to avoid the
160 // ParseNextToken function mis-treating a Unicode BOM as an invalid
161 // character and returning NULL.
162 if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF &&
163 static_cast<uint8>(*(pos_ + 1)) == 0xBB &&
164 static_cast<uint8>(*(pos_ + 2)) == 0xBF) {
165 NextNChars(3);
166 }
167
168 // Parse the first and all subsequent tokens.
169 scoped_ptr<Value> root(ParseNextToken());
170 if (!root.get())
171 return NULL;
172
173 // Make sure the input stream is at an end.
174 if (GetNextToken() != T_END_OF_INPUT) {
175 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
176 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
Mark Mentovai 2012/04/19 16:40:11 What’s the “, 1” doing here? The unexpected data m
177 return NULL;
178 }
179 }
180
181 // Dictionaries and lists can contain JSONStringValues, so wrap them in a
182 // hidden root.
183 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
184 if (root->IsType(Value::TYPE_DICTIONARY)) {
185 return new DictionaryHiddenRootValue(&input_copy, root.release());
186 } else if (root->IsType(Value::TYPE_LIST)) {
187 return new ListHiddenRootValue(&input_copy, root.release());
188 } else if (root->IsType(Value::TYPE_STRING)) {
189 // A string type could be a JSONStringValue, but because there's no
190 // corresponding HiddenRootValue, the memory will be lost. Deep copy to
191 // preserve it.
192 return root->DeepCopy();
193 }
194 }
195
196 // All other values can be returned directly.
197 return root.release();
198 }
199
200 JSONReader::JsonParseError JSONParser::error_code() const {
201 return error_code_;
202 }
203
204 std::string JSONParser::GetErrorMessage() const {
205 return FormatErrorMessage(error_line_, error_column_,
206 JSONReader::ErrorCodeToString(error_code_));
207 }
208
209 // StringBuilder ///////////////////////////////////////////////////////////////
210
211 JSONParser::StringBuilder::StringBuilder()
212 : pos_(NULL),
213 length_(0),
214 string_(NULL) {
215 }
216
217 JSONParser::StringBuilder::StringBuilder(const char* pos)
218 : pos_(pos),
219 length_(0),
220 string_(NULL) {
221 }
222
223 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
224 if (other->string_) {
Mark Mentovai 2012/04/19 16:40:12 This implementation seems wrong. What if other->st
Robert Sesek 2012/05/03 15:34:52 Done.
225 std::swap(other->string_, string_);
226 } else {
227 std::swap(other->pos_, pos_);
228 std::swap(other->length_, length_);
229 }
230 }
231
232 JSONParser::StringBuilder::~StringBuilder() {
233 delete string_;
234 }
235
236 void JSONParser::StringBuilder::Append(const int32& c) {
237 if (string_) {
238 string_->push_back(c);
Mark Mentovai 2012/04/19 16:40:12 The header never said what restrictions were place
239 } else {
240 // A code point is 32 bits/4 bytes, which needs to be mapped to 1-byte
241 // |char|s.
242 if (c < kExtendedASCIIStart) {
243 ++length_;
244 } else if (c < 0x0800) {
245 length_ += 2;
246 } else if (c < 0x1000) {
247 length_ += 3;
248 } else {
249 length_ += 4;
250 }
251 }
252 }
253
254 void JSONParser::StringBuilder::AppendString(const std::string& str) {
255 DCHECK(string_);
256 string_->append(str);
257 }
258
259 void JSONParser::StringBuilder::Convert() {
260 if (string_)
261 return;
262 string_ = new std::string(pos_, length_);
263 }
264
265 bool JSONParser::StringBuilder::CanBeStringPiece() {
266 return !string_;
Mark Mentovai 2012/04/19 16:40:12 Oh, so once something is converted to a string, it
Robert Sesek 2012/05/03 15:34:52 Clarified.
267 }
268
269 StringPiece JSONParser::StringBuilder::AsStringPiece() {
270 if (string_)
271 return StringPiece();
272 return StringPiece(pos_, length_);
273 }
274
275 std::string JSONParser::StringBuilder::AsString() {
276 if (!string_)
277 Convert();
278 return *string_;
Mark Mentovai 2012/04/19 16:40:12 I may have said this in the other file, but if thi
Robert Sesek 2012/05/03 15:34:52 Done.
279 }
280
281 // JSONParser private //////////////////////////////////////////////////////////
Mark Mentovai 2012/04/19 16:40:12 Stopping here for lunch.
Robert Sesek 2012/05/03 15:34:52 The flatbreads were good today, yes?
282
283 inline bool JSONParser::CanConsume(int length) {
284 return pos_ + length <= end_pos_;
285 }
286
287 const char* JSONParser::NextChar() {
288 DCHECK(CanConsume(1));
289 ++index_;
290 ++pos_;
291 return pos_;
292 }
293
294 void JSONParser::NextNChars(int n) {
295 DCHECK(CanConsume(n));
296 index_ += n;
297 pos_ += n;
298 }
299
300 JSONParser::Token JSONParser::GetNextToken() {
301 EatWhitespaceAndComments();
302 if (!CanConsume(1))
303 return T_END_OF_INPUT;
304
305 switch (*pos_) {
306 case '{':
307 return T_OBJECT_BEGIN;
308 case '}':
309 return T_OBJECT_END;
310 case '[':
311 return T_ARRAY_BEGIN;
312 case ']':
313 return T_ARRAY_END;
314 case '"':
315 return T_STRING;
316 case '0':
317 case '1':
318 case '2':
319 case '3':
320 case '4':
321 case '5':
322 case '6':
323 case '7':
324 case '8':
325 case '9':
326 case '-':
327 return T_NUMBER;
328 case 't':
329 return T_BOOL_TRUE;
330 case 'f':
331 return T_BOOL_FALSE;
332 case 'n':
333 return T_NULL;
334 case ',':
335 return T_LIST_SEPARATOR;
336 case ':':
337 return T_OBJECT_PAIR_SEPARATOR;
338 default:
339 return T_INVALID_TOKEN;
340 }
341 }
342
343 void JSONParser::EatWhitespaceAndComments() {
344 while (pos_ < end_pos_) {
345 switch (*pos_) {
346 case '\r':
347 case '\n':
348 index_last_line_ = index_;
349 ++line_number_;
350 // Fall through.
351 case ' ':
352 case '\t':
353 NextChar();
354 break;
355 case '/':
356 if (!EatComment())
357 return;
358 break;
359 default:
360 return;
361 }
362 }
363 }
364
365 bool JSONParser::EatComment() {
366 if (*pos_ != '/' || !CanConsume(1))
367 return false;
368
369 char next_char = *NextChar();
370 if (next_char == '/') {
371 // Single line comment, read to newline.
372 while (CanConsume(1)) {
373 char next_char = *NextChar();
374 if (next_char == '\n' || next_char == '\r')
375 return true;
376 }
377 } else if (next_char == '*') {
378 // Block comment, read until end marker.
379 while (CanConsume(2)) {
380 if (*NextChar() == '*' && *NextChar() == '/') {
381 // EatWhitespaceAndComments will inspect pos_, which will still be on
382 // the last / of the comment, so advance once more (which may also be
383 // end of input).
384 NextChar();
385 return true;
386 }
387 }
388 }
389
390 return false;
391 }
392
393 Value* JSONParser::ParseNextToken() {
394 return ParseToken(GetNextToken());
395 }
396
397 Value* JSONParser::ParseToken(Token token) {
398 switch (token) {
399 case T_OBJECT_BEGIN:
400 return ConsumeDictionary();
401 case T_ARRAY_BEGIN:
402 return ConsumeList();
403 case T_STRING:
404 return ConsumeString();
405 case T_NUMBER:
406 return ConsumeNumber();
407 case T_BOOL_TRUE:
408 case T_BOOL_FALSE:
409 case T_NULL:
410 return ConsumeLiteral();
411 default:
412 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
413 return NULL;
414 }
415 }
416
417 Value* JSONParser::ConsumeDictionary() {
418 if (*pos_ != '{') {
419 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
420 return NULL;
421 }
422
423 StackMarker depth_check(&stack_depth_);
424 if (depth_check.IsTooDeep()) {
425 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
426 return NULL;
427 }
428
429 scoped_ptr<DictionaryValue> dict(new DictionaryValue);
430
431 NextChar();
432 Token token = GetNextToken();
433 while (token != T_OBJECT_END) {
434 if (token != T_STRING) {
435 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
436 return NULL;
437 }
438
439 // First consume the key.
440 StringBuilder key;
441 if (!ConsumeStringRaw(&key)) {
442 return NULL;
443 }
444
445 // Read the separator.
446 NextChar();
447 token = GetNextToken();
448 if (token != T_OBJECT_PAIR_SEPARATOR) {
449 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
450 return NULL;
451 }
452
453 // The token is the value. Ownership transfers to |dict|.
454 NextChar();
455 Value* value = ParseNextToken();
456 if (!value) {
457 return NULL;
458 }
459
460 dict->SetWithoutPathExpansion(key.AsString(), value);
461
462 NextChar();
463 token = GetNextToken();
464 if (token == T_LIST_SEPARATOR) {
465 NextChar();
466 token = GetNextToken();
467 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
468 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
469 return NULL;
470 }
471 } else if (token != T_OBJECT_END) {
472 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
473 return NULL;
474 }
475 }
476
477 if (token != T_OBJECT_END)
478 return NULL;
479
480 return dict.release();
481 }
482
483 Value* JSONParser::ConsumeList() {
484 if (*pos_ != '[') {
485 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
486 return NULL;
487 }
488
489 StackMarker depth_check(&stack_depth_);
490 if (depth_check.IsTooDeep()) {
491 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
492 return NULL;
493 }
494
495 scoped_ptr<ListValue> list(new ListValue);
496
497 NextChar();
498 Token token = GetNextToken();
499 while (token != T_ARRAY_END) {
500 Value* item = ParseToken(token);
501 if (!item) {
502 // ReportError from deeper level.
503 return NULL;
504 }
505
506 list->Append(item);
507
508 NextChar();
509 token = GetNextToken();
510 if (token == T_LIST_SEPARATOR) {
511 NextChar();
512 token = GetNextToken();
513 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
514 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
515 return NULL;
516 }
517 } else if (token != T_ARRAY_END) {
518 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
519 return NULL;
520 }
521 }
522
523 if (token != T_ARRAY_END)
524 return NULL;
525
526 return list.release();
527 }
528
529 Value* JSONParser::ConsumeString() {
530 StringBuilder string;
531 if (!ConsumeStringRaw(&string))
532 return NULL;
533
534 // Create the Value representation, either using a hidden root, if configured
535 // to do so, and the string can be represented by StringPiece.
536 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
537 return new JSONStringValue(string.AsStringPiece());
538 } else {
539 if (string.CanBeStringPiece())
540 string.Convert();
541 return new StringValue(string.AsString());
542 }
543 }
544
545 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
546 if (*pos_ != '"') {
547 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
548 return false;
549 }
550
551 // StringBuilder will internally build a StringPiece unless a UTF-16
552 // conversion occurs, at which point it will perform a copy into a
553 // std::string.
554 StringBuilder string(NextChar());
555
556 int length = end_pos_ - start_pos_;
557 int32 next_char = 0;
558
559 DCHECK_EQ(*pos_, *(start_pos_ + index_));
560
561 while (CanConsume(1)) {
562 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement.
563 CBU8_NEXT(start_pos_, index_, length, next_char);
564 if (next_char < 0 || !IsValidCharacter(next_char)) {
565 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
566 return false;
567 }
568
569 // If this character is an escape sequence...
570 if (next_char == '\\') {
571 // The input string will be adjusted (either by combining the two
572 // characters of an encoded escape sequence, or with a UTF conversion),
573 // so using StringPiece isn't possible -- force a conversion.
574 string.Convert();
575
576 if (!CanConsume(1)) {
577 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
578 return false;
579 }
580
581 switch (*NextChar()) {
582 // Allowed esape sequences:
583 case 'x': { // UTF-8 sequence.
584 if (!CanConsume(2)) {
585 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
586 return false;
587 }
588
589 int hex_digit = 0;
590 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
591 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
592 return false;
593 }
594 NextChar();
595
596 string.Append(hex_digit);
597 break;
598 }
599 case 'u': { // UTF-16 sequence.
600 // UTF units are of the form \uXXXX.
601 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits.
602 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
603 return false;
604 }
605
606 // Skip the 'u'.
607 NextChar();
608
609 std::string utf8_units;
610 if (!DecodeUTF16(&utf8_units)) {
611 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
612 return false;
613 }
614
615 string.AppendString(utf8_units);
616 break;
617 }
618 case '"':
619 string.Append('"');
620 break;
621 case '\\':
622 string.Append('\\');
623 break;
624 case '/':
625 string.Append('/');
626 break;
627 case 'b':
628 string.Append('\b');
629 break;
630 case 'f':
631 string.Append('\f');
632 break;
633 case 'n':
634 string.Append('\n');
635 break;
636 case 'r':
637 string.Append('\r');
638 break;
639 case 't':
640 string.Append('\t');
641 break;
642 case 'v': // Not listed as valid escape sequence in the RFC.
643 string.Append('\v');
644 break;
645 // All other escape squences are illegal.
646 default:
647 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
648 return false;
649 }
650 } else if (next_char == '"') {
651 --index_; // Rewind by one because of CBU8_NEXT.
652 out->Swap(&string);
653 return true;
654 } else if (next_char < kExtendedASCIIStart) {
655 string.Append(next_char);
656 } else {
657 // Anything outside of the basic ASCII plane will need to be
658 // decomposed from int32 to a multi-byte sequence.
659 char utf8_units[4] = { 0 };
660 int offset = 0;
661 string.Convert();
662 CBU8_APPEND_UNSAFE(utf8_units, offset, next_char);
663 string.AppendString(utf8_units);
664 }
665 }
666
667 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
668 return false;
669 }
670
671 // Entry is at the first X in \uXXXX.
672 bool JSONParser::DecodeUTF16(std::string* dest_string) {
673 if (!CanConsume(4))
674 return false;
675
676 // This is a 32-bit field because the shift operations in the
677 // conversion process below cause MSVC to error about "data loss."
678 // This only stores UTF-16 code units, though.
679 // Consume the UTF-16 code unit, which may be a high surrogate.
680 int code_unit16_high = 0;
681 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
682 return false;
683
684 // Only add 3, not 4, because at the end of this iteration, the parser has
685 // finished working with the last digit of the UTF sequence, meaning that
686 // the next spin of the loop will advance to the next byte.
687 NextNChars(3);
688
689 // If this is a high surrogate, consume the next code unit to get the
690 // low surrogate.
691 int code_unit16_low = 0;
692 if (CBU16_IS_SURROGATE(code_unit16_high)) {
693 // Make sure this is the high surrogate. If not, it's an encoding
694 // error.
695 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
696 return false;
697
698 // Make sure that the token has more characters to consume the
699 // lower surrogate.
700 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits.
701 return false;
702 if (*NextChar() != '\\' || *NextChar() != 'u')
703 return false;
704
705 NextChar(); // Read past 'u'.
706 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
707 return false;
708
709 NextNChars(3);
710
711 if (!CBU16_IS_SURROGATE(code_unit16_low) ||
712 !CBU16_IS_TRAIL(code_unit16_low)) {
713 return false;
714 }
715 } else if (!CBU16_IS_SINGLE(code_unit16_high)) {
716 // If this is not a code point, it's an encoding error.
717 return false;
718 }
719
720 // Convert the UTF-16 code units to a code point and then to a UTF-8
721 // code unit sequence.
722 char code_point[8] = { 0 };
723 size_t offset = 0;
724 if (!code_unit16_low) {
725 CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high);
726 } else {
727 uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high,
728 code_unit16_low);
729 offset = 0;
730 CBU8_APPEND_UNSAFE(code_point, offset, code_unit32);
731 }
732 dest_string->append(code_point);
733 return true;
734 }
735
736 Value* JSONParser::ConsumeNumber() {
737 const char* num_start = pos_;
738 const int start_index = index_;
739 int end_index = start_index;
740
741 if (*pos_ == '-')
742 NextChar();
743
744 if (!ReadInt(false)) {
745 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
746 return NULL;
747 }
748 end_index = index_;
749
750 // The optional faction part.
751 if (*pos_ == '.') {
752 if (!CanConsume(1)) {
753 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
754 return NULL;
755 }
756 NextChar();
757 if (!ReadInt(true)) {
758 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
759 return NULL;
760 }
761 end_index = index_;
762 }
763
764 // Optional exponent part.
765 if (*pos_ == 'e' || *pos_ == 'E') {
766 NextChar();
767 if (*pos_ == '-' || *pos_ == '+')
768 NextChar();
769 if (!ReadInt(true)) {
770 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
771 return NULL;
772 }
773 end_index = index_;
774 }
775
776 // ReadInt is greedy because numbers have no easily detectable sentinel,
777 // so save off where the parser should be on exit (see Consume invariant at
778 // the top of the header), then make sure the next token is one which is
779 // valid.
780 const char* exit_pos = pos_ - 1;
781 int exit_index = index_ - 1;
782
783 switch (GetNextToken()) {
784 case T_OBJECT_END:
785 case T_ARRAY_END:
786 case T_LIST_SEPARATOR:
787 case T_END_OF_INPUT:
788 break;
789 default:
790 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
791 return NULL;
792 }
793
794 pos_ = exit_pos;
795 index_ = exit_index;
796
797 StringPiece num_string(num_start, end_index - start_index);
798
799 int num_int;
800 if (StringToInt(num_string, &num_int))
801 return Value::CreateIntegerValue(num_int);
802
803 double num_double;
804 if (base::StringToDouble(num_string.as_string(), &num_double) &&
805 IsFinite(num_double)) {
806 return Value::CreateDoubleValue(num_double);
807 }
808
809 return NULL;
810 }
811
812 bool JSONParser::ReadInt(bool allow_leading_zeros) {
813 char first = *pos_;
814 int len = 0;
815
816 char c = first;
817 while (CanConsume(1) && IsAsciiDigit(c)) {
818 c = *NextChar();
819 ++len;
820 }
821
822 if (len == 0)
823 return false;
824
825 if (!allow_leading_zeros && len > 1 && first == '0')
826 return false;
827
828 return true;
829 }
830
831 Value* JSONParser::ConsumeLiteral() {
832 switch (*pos_) {
833 case 't':
834 if (!CanConsume(3) || !StringsAreEqual(pos_, "true", 4)) {
835 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
836 return NULL;
837 }
838 NextNChars(3);
839 return Value::CreateBooleanValue(true);
840 case 'f':
841 if (!CanConsume(4) || !StringsAreEqual(pos_, "false", 5)) {
842 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
843 return NULL;
844 }
845 NextNChars(4);
846 return Value::CreateBooleanValue(false);
847 case 'n':
848 if (!CanConsume(3) || !StringsAreEqual(pos_, "null", 4)) {
849 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
850 return NULL;
851 }
852 NextNChars(3);
853 return Value::CreateNullValue();
854 default:
855 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
856 return NULL;
857 }
858 }
859
860 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
861 return strncmp(one, two, len) == 0;
862 }
863
864 void JSONParser::ReportError(JSONReader::JsonParseError code,
865 int column_adjust) {
866 error_code_ = code;
867 error_line_ = line_number_;
868 error_column_ = index_ - index_last_line_ + column_adjust;
869 }
870
871 // static
872 std::string JSONParser::FormatErrorMessage(int line, int column,
873 const std::string& description) {
874 if (line || column) {
875 return base::StringPrintf(
tfarina 2012/04/19 22:54:47 nit: base:: here is not necessary as we are in bas
Robert Sesek 2012/05/03 15:34:52 Done.
876 "Line: %i, column: %i, %s", line, column, description.c_str());
877 }
878 return description;
879 }
880
881 } // namespace internal
882 } // namespace base
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698