OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "base/json/json_parser.h" | |
6 | |
7 #include "base/float_util.h" | |
8 #include "base/logging.h" | |
9 #include "base/memory/scoped_ptr.h" | |
10 #include "base/stringprintf.h" | |
11 #include "base/string_number_conversions.h" | |
Mark Mentovai
2012/04/19 16:40:11
'_' < 'p'
I would have stayed quiet about it, but
Robert Sesek
2012/05/03 15:34:52
Done.
| |
12 #include "base/string_util.h" | |
13 #include "base/third_party/icu/icu_utf.h" | |
14 #include "base/utf_string_conversion_utils.h" | |
15 #include "base/utf_string_conversions.h" | |
16 #include "base/values.h" | |
17 | |
18 namespace { | |
19 | |
20 const int kStackMaxDepth = 100; | |
21 | |
22 const int32 kExtendedASCIIStart = 0x80; | |
23 | |
24 // This and the class below are used to own the JSON input string for when | |
25 // string tokens are stored as StringPiece instead of std::string. This | |
26 // optimization avoids about 2/3rds of string memory copies. The constructor | |
27 // takes the input string and swaps its data into the new instance. The real | |
28 // root value is also Swap()ed into the new instance. | |
29 class DictionaryHiddenRootValue : public base::DictionaryValue { | |
30 public: | |
31 DictionaryHiddenRootValue(std::string* json, Value* root) { | |
32 CHECK(root->IsType(Value::TYPE_DICTIONARY)); | |
Mark Mentovai
2012/04/19 16:40:11
Can this be a DCHECK?
Same on line 50.
Robert Sesek
2012/05/03 15:34:52
Done.
| |
33 Swap(static_cast<DictionaryValue*>(root)); | |
34 json->swap(json_); | |
35 } | |
36 | |
37 virtual base::DictionaryValue* DeepCopy() const OVERRIDE { | |
38 scoped_ptr<base::Value> data(base::DictionaryValue::DeepCopy()); | |
39 std::string json(json_); | |
40 return new DictionaryHiddenRootValue(&json, data.get()); | |
41 } | |
42 | |
43 private: | |
44 std::string json_; | |
Mark Mentovai
2012/04/19 16:40:11
DISALLOW_COPY_AND_ASSIGN? Maybe not. Don’t know ho
Robert Sesek
2012/05/03 15:34:52
Done.
| |
45 }; | |
46 | |
47 class ListHiddenRootValue : public base::ListValue { | |
48 public: | |
49 ListHiddenRootValue(std::string* json, Value* root) { | |
50 CHECK(root->IsType(Value::TYPE_LIST)); | |
51 Swap(static_cast<ListValue*>(root)); | |
52 json->swap(json_); | |
53 } | |
54 | |
55 virtual base::ListValue* DeepCopy() const OVERRIDE { | |
56 scoped_ptr<base::Value> data(base::ListValue::DeepCopy()); | |
57 std::string json(json_); | |
58 return new ListHiddenRootValue(&json, data.get()); | |
59 } | |
60 | |
61 private: | |
62 std::string json_; | |
63 }; | |
64 | |
65 // A variant on StringValue that uses StringPiece instead of copying the string | |
66 // into the Value. This can only be stored in a child of hidden root (above), | |
67 // otherwise the referenced string will not be guaranteed to outlive it. | |
68 class JSONStringValue : public base::Value { | |
69 public: | |
70 explicit JSONStringValue(const base::StringPiece& piece) | |
71 : Value(TYPE_STRING), | |
72 string_piece_(piece) { | |
73 } | |
74 | |
75 // Value: | |
76 bool GetAsString(std::string* out_value) const OVERRIDE { | |
77 string_piece_.CopyToString(out_value); | |
78 return true; | |
79 } | |
80 bool GetAsString(string16* out_value) const OVERRIDE { | |
81 *out_value = UTF8ToUTF16(string_piece_); | |
82 return true; | |
83 } | |
84 virtual Value* DeepCopy() const OVERRIDE { | |
85 return Value::CreateStringValue(string_piece_.as_string()); | |
86 } | |
87 virtual bool Equals(const Value* other) const OVERRIDE { | |
88 std::string other_string; | |
89 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) && | |
Mark Mentovai
2012/04/19 16:40:11
Isn’t it cheaper to do the comparison of other and
Robert Sesek
2012/05/03 15:34:52
Yes it is. Good idea.
| |
90 other_string == string_piece_.as_string(); | |
91 } | |
92 | |
93 private: | |
94 // The location in the original input stream. | |
95 base::StringPiece string_piece_; | |
Mark Mentovai
2012/04/19 16:40:11
DISALLOW_COPY_AND_ASSIGN? Maybe not. Don’t know ho
Robert Sesek
2012/05/03 15:34:52
Done.
| |
96 }; | |
97 | |
98 // Simple class that checks for maximum recursion/"stack overflow." | |
99 class StackMarker { | |
100 public: | |
101 StackMarker(int* depth) : depth_(depth) { | |
Mark Mentovai
2012/04/19 16:40:11
explicit
Robert Sesek
2012/05/03 15:34:52
Done.
| |
102 ++(*depth_); | |
103 } | |
104 ~StackMarker() { | |
105 --(*depth_); | |
106 } | |
107 | |
108 bool IsTooDeep() { | |
Mark Mentovai
2012/04/19 16:40:11
Can be const.
Robert Sesek
2012/05/03 15:34:52
Done.
| |
109 return *depth_ >= kStackMaxDepth; | |
110 } | |
111 | |
112 private: | |
113 int* depth_; | |
Mark Mentovai
2012/04/19 16:40:11
DISALLOW_COPY_AND_ASSIGN
Mark Mentovai
2012/04/19 16:40:11
The pointer (not value) can be const, which is nic
Robert Sesek
2012/05/03 15:34:52
Done.
Robert Sesek
2012/05/03 15:34:52
Done.
| |
114 }; | |
115 | |
116 } // namespace | |
117 | |
118 namespace base { | |
119 namespace internal { | |
120 | |
121 JSONParser::JSONParser(int options) | |
122 : options_(options), | |
123 start_pos_(NULL), | |
124 pos_(0), | |
125 index_(0), | |
Mark Mentovai
2012/04/19 16:40:11
Is end_pos_ missing intentionally?
Robert Sesek
2012/05/03 15:34:52
Nope.
| |
126 stack_depth_(0), | |
127 line_number_(0), | |
128 index_last_line_(0), | |
129 error_code_(JSONReader::JSON_NO_ERROR), | |
130 error_line_(0), | |
131 error_column_(0) { | |
132 } | |
133 | |
134 JSONParser::~JSONParser() { | |
135 } | |
136 | |
137 Value* JSONParser::Parse(const std::string& input) { | |
Mark Mentovai
2012/04/19 16:40:11
Perhaps this can even accept StringPiece input, po
| |
138 std::string input_copy; | |
139 // If the children of a JSON root can be detached, then hidden roots cannot | |
140 // be used, so do not bother copying the input because StringPiece will not | |
141 // be used anywhere. | |
142 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { | |
143 input_copy = input; | |
144 start_pos_ = input_copy.data(); | |
145 } else { | |
146 start_pos_ = input.data(); | |
147 } | |
148 pos_ = start_pos_; | |
149 end_pos_ = start_pos_ + input.length(); | |
150 index_ = 0; | |
151 line_number_ = 1; | |
152 index_last_line_ = 0; | |
153 | |
154 error_code_ = JSONReader::JSON_NO_ERROR; | |
155 error_line_ = 0; | |
156 error_column_ = 0; | |
157 | |
158 // When the input JSON string starts with a UTF-8 Byte-Order-Mark (U+FEFF) | |
Mark Mentovai
2012/04/19 16:40:11
That’s a UTF-16 BOM.
Your comment makes it sound
Robert Sesek
2012/05/03 15:34:52
Isn't U+FEFF the BOM code point, which in UTF-16 i
| |
159 // or <0xEF 0xBB 0xBF>, advance the start position to avoid the | |
160 // ParseNextToken function mis-treating a Unicode BOM as an invalid | |
161 // character and returning NULL. | |
162 if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF && | |
163 static_cast<uint8>(*(pos_ + 1)) == 0xBB && | |
164 static_cast<uint8>(*(pos_ + 2)) == 0xBF) { | |
165 NextNChars(3); | |
166 } | |
167 | |
168 // Parse the first and all subsequent tokens. | |
169 scoped_ptr<Value> root(ParseNextToken()); | |
170 if (!root.get()) | |
171 return NULL; | |
172 | |
173 // Make sure the input stream is at an end. | |
174 if (GetNextToken() != T_END_OF_INPUT) { | |
175 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) { | |
176 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1); | |
Mark Mentovai
2012/04/19 16:40:11
What’s the “, 1” doing here? The unexpected data m
| |
177 return NULL; | |
178 } | |
179 } | |
180 | |
181 // Dictionaries and lists can contain JSONStringValues, so wrap them in a | |
182 // hidden root. | |
183 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { | |
184 if (root->IsType(Value::TYPE_DICTIONARY)) { | |
185 return new DictionaryHiddenRootValue(&input_copy, root.release()); | |
186 } else if (root->IsType(Value::TYPE_LIST)) { | |
187 return new ListHiddenRootValue(&input_copy, root.release()); | |
188 } else if (root->IsType(Value::TYPE_STRING)) { | |
189 // A string type could be a JSONStringValue, but because there's no | |
190 // corresponding HiddenRootValue, the memory will be lost. Deep copy to | |
191 // preserve it. | |
192 return root->DeepCopy(); | |
193 } | |
194 } | |
195 | |
196 // All other values can be returned directly. | |
197 return root.release(); | |
198 } | |
199 | |
200 JSONReader::JsonParseError JSONParser::error_code() const { | |
201 return error_code_; | |
202 } | |
203 | |
204 std::string JSONParser::GetErrorMessage() const { | |
205 return FormatErrorMessage(error_line_, error_column_, | |
206 JSONReader::ErrorCodeToString(error_code_)); | |
207 } | |
208 | |
209 // StringBuilder /////////////////////////////////////////////////////////////// | |
210 | |
211 JSONParser::StringBuilder::StringBuilder() | |
212 : pos_(NULL), | |
213 length_(0), | |
214 string_(NULL) { | |
215 } | |
216 | |
217 JSONParser::StringBuilder::StringBuilder(const char* pos) | |
218 : pos_(pos), | |
219 length_(0), | |
220 string_(NULL) { | |
221 } | |
222 | |
223 void JSONParser::StringBuilder::Swap(StringBuilder* other) { | |
224 if (other->string_) { | |
Mark Mentovai
2012/04/19 16:40:12
This implementation seems wrong. What if other->st
Robert Sesek
2012/05/03 15:34:52
Done.
| |
225 std::swap(other->string_, string_); | |
226 } else { | |
227 std::swap(other->pos_, pos_); | |
228 std::swap(other->length_, length_); | |
229 } | |
230 } | |
231 | |
232 JSONParser::StringBuilder::~StringBuilder() { | |
233 delete string_; | |
234 } | |
235 | |
236 void JSONParser::StringBuilder::Append(const int32& c) { | |
237 if (string_) { | |
238 string_->push_back(c); | |
Mark Mentovai
2012/04/19 16:40:12
The header never said what restrictions were place
| |
239 } else { | |
240 // A code point is 32 bits/4 bytes, which needs to be mapped to 1-byte | |
241 // |char|s. | |
242 if (c < kExtendedASCIIStart) { | |
243 ++length_; | |
244 } else if (c < 0x0800) { | |
245 length_ += 2; | |
246 } else if (c < 0x1000) { | |
247 length_ += 3; | |
248 } else { | |
249 length_ += 4; | |
250 } | |
251 } | |
252 } | |
253 | |
254 void JSONParser::StringBuilder::AppendString(const std::string& str) { | |
255 DCHECK(string_); | |
256 string_->append(str); | |
257 } | |
258 | |
259 void JSONParser::StringBuilder::Convert() { | |
260 if (string_) | |
261 return; | |
262 string_ = new std::string(pos_, length_); | |
263 } | |
264 | |
265 bool JSONParser::StringBuilder::CanBeStringPiece() { | |
266 return !string_; | |
Mark Mentovai
2012/04/19 16:40:12
Oh, so once something is converted to a string, it
Robert Sesek
2012/05/03 15:34:52
Clarified.
| |
267 } | |
268 | |
269 StringPiece JSONParser::StringBuilder::AsStringPiece() { | |
270 if (string_) | |
271 return StringPiece(); | |
272 return StringPiece(pos_, length_); | |
273 } | |
274 | |
275 std::string JSONParser::StringBuilder::AsString() { | |
276 if (!string_) | |
277 Convert(); | |
278 return *string_; | |
Mark Mentovai
2012/04/19 16:40:12
I may have said this in the other file, but if thi
Robert Sesek
2012/05/03 15:34:52
Done.
| |
279 } | |
280 | |
281 // JSONParser private ////////////////////////////////////////////////////////// | |
Mark Mentovai
2012/04/19 16:40:12
Stopping here for lunch.
Robert Sesek
2012/05/03 15:34:52
The flatbreads were good today, yes?
| |
282 | |
283 inline bool JSONParser::CanConsume(int length) { | |
284 return pos_ + length <= end_pos_; | |
285 } | |
286 | |
287 const char* JSONParser::NextChar() { | |
288 DCHECK(CanConsume(1)); | |
289 ++index_; | |
290 ++pos_; | |
291 return pos_; | |
292 } | |
293 | |
294 void JSONParser::NextNChars(int n) { | |
295 DCHECK(CanConsume(n)); | |
296 index_ += n; | |
297 pos_ += n; | |
298 } | |
299 | |
300 JSONParser::Token JSONParser::GetNextToken() { | |
301 EatWhitespaceAndComments(); | |
302 if (!CanConsume(1)) | |
303 return T_END_OF_INPUT; | |
304 | |
305 switch (*pos_) { | |
306 case '{': | |
307 return T_OBJECT_BEGIN; | |
308 case '}': | |
309 return T_OBJECT_END; | |
310 case '[': | |
311 return T_ARRAY_BEGIN; | |
312 case ']': | |
313 return T_ARRAY_END; | |
314 case '"': | |
315 return T_STRING; | |
316 case '0': | |
317 case '1': | |
318 case '2': | |
319 case '3': | |
320 case '4': | |
321 case '5': | |
322 case '6': | |
323 case '7': | |
324 case '8': | |
325 case '9': | |
326 case '-': | |
327 return T_NUMBER; | |
328 case 't': | |
329 return T_BOOL_TRUE; | |
330 case 'f': | |
331 return T_BOOL_FALSE; | |
332 case 'n': | |
333 return T_NULL; | |
334 case ',': | |
335 return T_LIST_SEPARATOR; | |
336 case ':': | |
337 return T_OBJECT_PAIR_SEPARATOR; | |
338 default: | |
339 return T_INVALID_TOKEN; | |
340 } | |
341 } | |
342 | |
343 void JSONParser::EatWhitespaceAndComments() { | |
344 while (pos_ < end_pos_) { | |
345 switch (*pos_) { | |
346 case '\r': | |
347 case '\n': | |
348 index_last_line_ = index_; | |
349 ++line_number_; | |
350 // Fall through. | |
351 case ' ': | |
352 case '\t': | |
353 NextChar(); | |
354 break; | |
355 case '/': | |
356 if (!EatComment()) | |
357 return; | |
358 break; | |
359 default: | |
360 return; | |
361 } | |
362 } | |
363 } | |
364 | |
365 bool JSONParser::EatComment() { | |
366 if (*pos_ != '/' || !CanConsume(1)) | |
367 return false; | |
368 | |
369 char next_char = *NextChar(); | |
370 if (next_char == '/') { | |
371 // Single line comment, read to newline. | |
372 while (CanConsume(1)) { | |
373 char next_char = *NextChar(); | |
374 if (next_char == '\n' || next_char == '\r') | |
375 return true; | |
376 } | |
377 } else if (next_char == '*') { | |
378 // Block comment, read until end marker. | |
379 while (CanConsume(2)) { | |
380 if (*NextChar() == '*' && *NextChar() == '/') { | |
381 // EatWhitespaceAndComments will inspect pos_, which will still be on | |
382 // the last / of the comment, so advance once more (which may also be | |
383 // end of input). | |
384 NextChar(); | |
385 return true; | |
386 } | |
387 } | |
388 } | |
389 | |
390 return false; | |
391 } | |
392 | |
393 Value* JSONParser::ParseNextToken() { | |
394 return ParseToken(GetNextToken()); | |
395 } | |
396 | |
397 Value* JSONParser::ParseToken(Token token) { | |
398 switch (token) { | |
399 case T_OBJECT_BEGIN: | |
400 return ConsumeDictionary(); | |
401 case T_ARRAY_BEGIN: | |
402 return ConsumeList(); | |
403 case T_STRING: | |
404 return ConsumeString(); | |
405 case T_NUMBER: | |
406 return ConsumeNumber(); | |
407 case T_BOOL_TRUE: | |
408 case T_BOOL_FALSE: | |
409 case T_NULL: | |
410 return ConsumeLiteral(); | |
411 default: | |
412 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
413 return NULL; | |
414 } | |
415 } | |
416 | |
417 Value* JSONParser::ConsumeDictionary() { | |
418 if (*pos_ != '{') { | |
419 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
420 return NULL; | |
421 } | |
422 | |
423 StackMarker depth_check(&stack_depth_); | |
424 if (depth_check.IsTooDeep()) { | |
425 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); | |
426 return NULL; | |
427 } | |
428 | |
429 scoped_ptr<DictionaryValue> dict(new DictionaryValue); | |
430 | |
431 NextChar(); | |
432 Token token = GetNextToken(); | |
433 while (token != T_OBJECT_END) { | |
434 if (token != T_STRING) { | |
435 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1); | |
436 return NULL; | |
437 } | |
438 | |
439 // First consume the key. | |
440 StringBuilder key; | |
441 if (!ConsumeStringRaw(&key)) { | |
442 return NULL; | |
443 } | |
444 | |
445 // Read the separator. | |
446 NextChar(); | |
447 token = GetNextToken(); | |
448 if (token != T_OBJECT_PAIR_SEPARATOR) { | |
449 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
450 return NULL; | |
451 } | |
452 | |
453 // The token is the value. Ownership transfers to |dict|. | |
454 NextChar(); | |
455 Value* value = ParseNextToken(); | |
456 if (!value) { | |
457 return NULL; | |
458 } | |
459 | |
460 dict->SetWithoutPathExpansion(key.AsString(), value); | |
461 | |
462 NextChar(); | |
463 token = GetNextToken(); | |
464 if (token == T_LIST_SEPARATOR) { | |
465 NextChar(); | |
466 token = GetNextToken(); | |
467 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { | |
468 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); | |
469 return NULL; | |
470 } | |
471 } else if (token != T_OBJECT_END) { | |
472 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); | |
473 return NULL; | |
474 } | |
475 } | |
476 | |
477 if (token != T_OBJECT_END) | |
478 return NULL; | |
479 | |
480 return dict.release(); | |
481 } | |
482 | |
483 Value* JSONParser::ConsumeList() { | |
484 if (*pos_ != '[') { | |
485 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
486 return NULL; | |
487 } | |
488 | |
489 StackMarker depth_check(&stack_depth_); | |
490 if (depth_check.IsTooDeep()) { | |
491 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); | |
492 return NULL; | |
493 } | |
494 | |
495 scoped_ptr<ListValue> list(new ListValue); | |
496 | |
497 NextChar(); | |
498 Token token = GetNextToken(); | |
499 while (token != T_ARRAY_END) { | |
500 Value* item = ParseToken(token); | |
501 if (!item) { | |
502 // ReportError from deeper level. | |
503 return NULL; | |
504 } | |
505 | |
506 list->Append(item); | |
507 | |
508 NextChar(); | |
509 token = GetNextToken(); | |
510 if (token == T_LIST_SEPARATOR) { | |
511 NextChar(); | |
512 token = GetNextToken(); | |
513 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { | |
514 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); | |
515 return NULL; | |
516 } | |
517 } else if (token != T_ARRAY_END) { | |
518 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
519 return NULL; | |
520 } | |
521 } | |
522 | |
523 if (token != T_ARRAY_END) | |
524 return NULL; | |
525 | |
526 return list.release(); | |
527 } | |
528 | |
529 Value* JSONParser::ConsumeString() { | |
530 StringBuilder string; | |
531 if (!ConsumeStringRaw(&string)) | |
532 return NULL; | |
533 | |
534 // Create the Value representation, either using a hidden root, if configured | |
535 // to do so, and the string can be represented by StringPiece. | |
536 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) { | |
537 return new JSONStringValue(string.AsStringPiece()); | |
538 } else { | |
539 if (string.CanBeStringPiece()) | |
540 string.Convert(); | |
541 return new StringValue(string.AsString()); | |
542 } | |
543 } | |
544 | |
545 bool JSONParser::ConsumeStringRaw(StringBuilder* out) { | |
546 if (*pos_ != '"') { | |
547 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
548 return false; | |
549 } | |
550 | |
551 // StringBuilder will internally build a StringPiece unless a UTF-16 | |
552 // conversion occurs, at which point it will perform a copy into a | |
553 // std::string. | |
554 StringBuilder string(NextChar()); | |
555 | |
556 int length = end_pos_ - start_pos_; | |
557 int32 next_char = 0; | |
558 | |
559 DCHECK_EQ(*pos_, *(start_pos_ + index_)); | |
560 | |
561 while (CanConsume(1)) { | |
562 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement. | |
563 CBU8_NEXT(start_pos_, index_, length, next_char); | |
564 if (next_char < 0 || !IsValidCharacter(next_char)) { | |
565 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1); | |
566 return false; | |
567 } | |
568 | |
569 // If this character is an escape sequence... | |
570 if (next_char == '\\') { | |
571 // The input string will be adjusted (either by combining the two | |
572 // characters of an encoded escape sequence, or with a UTF conversion), | |
573 // so using StringPiece isn't possible -- force a conversion. | |
574 string.Convert(); | |
575 | |
576 if (!CanConsume(1)) { | |
577 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); | |
578 return false; | |
579 } | |
580 | |
581 switch (*NextChar()) { | |
582 // Allowed esape sequences: | |
583 case 'x': { // UTF-8 sequence. | |
584 if (!CanConsume(2)) { | |
585 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1); | |
586 return false; | |
587 } | |
588 | |
589 int hex_digit = 0; | |
590 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) { | |
591 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); | |
592 return false; | |
593 } | |
594 NextChar(); | |
595 | |
596 string.Append(hex_digit); | |
597 break; | |
598 } | |
599 case 'u': { // UTF-16 sequence. | |
600 // UTF units are of the form \uXXXX. | |
601 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits. | |
602 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); | |
603 return false; | |
604 } | |
605 | |
606 // Skip the 'u'. | |
607 NextChar(); | |
608 | |
609 std::string utf8_units; | |
610 if (!DecodeUTF16(&utf8_units)) { | |
611 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); | |
612 return false; | |
613 } | |
614 | |
615 string.AppendString(utf8_units); | |
616 break; | |
617 } | |
618 case '"': | |
619 string.Append('"'); | |
620 break; | |
621 case '\\': | |
622 string.Append('\\'); | |
623 break; | |
624 case '/': | |
625 string.Append('/'); | |
626 break; | |
627 case 'b': | |
628 string.Append('\b'); | |
629 break; | |
630 case 'f': | |
631 string.Append('\f'); | |
632 break; | |
633 case 'n': | |
634 string.Append('\n'); | |
635 break; | |
636 case 'r': | |
637 string.Append('\r'); | |
638 break; | |
639 case 't': | |
640 string.Append('\t'); | |
641 break; | |
642 case 'v': // Not listed as valid escape sequence in the RFC. | |
643 string.Append('\v'); | |
644 break; | |
645 // All other escape squences are illegal. | |
646 default: | |
647 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); | |
648 return false; | |
649 } | |
650 } else if (next_char == '"') { | |
651 --index_; // Rewind by one because of CBU8_NEXT. | |
652 out->Swap(&string); | |
653 return true; | |
654 } else if (next_char < kExtendedASCIIStart) { | |
655 string.Append(next_char); | |
656 } else { | |
657 // Anything outside of the basic ASCII plane will need to be | |
658 // decomposed from int32 to a multi-byte sequence. | |
659 char utf8_units[4] = { 0 }; | |
660 int offset = 0; | |
661 string.Convert(); | |
662 CBU8_APPEND_UNSAFE(utf8_units, offset, next_char); | |
663 string.AppendString(utf8_units); | |
664 } | |
665 } | |
666 | |
667 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); | |
668 return false; | |
669 } | |
670 | |
671 // Entry is at the first X in \uXXXX. | |
672 bool JSONParser::DecodeUTF16(std::string* dest_string) { | |
673 if (!CanConsume(4)) | |
674 return false; | |
675 | |
676 // This is a 32-bit field because the shift operations in the | |
677 // conversion process below cause MSVC to error about "data loss." | |
678 // This only stores UTF-16 code units, though. | |
679 // Consume the UTF-16 code unit, which may be a high surrogate. | |
680 int code_unit16_high = 0; | |
681 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high)) | |
682 return false; | |
683 | |
684 // Only add 3, not 4, because at the end of this iteration, the parser has | |
685 // finished working with the last digit of the UTF sequence, meaning that | |
686 // the next spin of the loop will advance to the next byte. | |
687 NextNChars(3); | |
688 | |
689 // If this is a high surrogate, consume the next code unit to get the | |
690 // low surrogate. | |
691 int code_unit16_low = 0; | |
692 if (CBU16_IS_SURROGATE(code_unit16_high)) { | |
693 // Make sure this is the high surrogate. If not, it's an encoding | |
694 // error. | |
695 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) | |
696 return false; | |
697 | |
698 // Make sure that the token has more characters to consume the | |
699 // lower surrogate. | |
700 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits. | |
701 return false; | |
702 if (*NextChar() != '\\' || *NextChar() != 'u') | |
703 return false; | |
704 | |
705 NextChar(); // Read past 'u'. | |
706 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low)) | |
707 return false; | |
708 | |
709 NextNChars(3); | |
710 | |
711 if (!CBU16_IS_SURROGATE(code_unit16_low) || | |
712 !CBU16_IS_TRAIL(code_unit16_low)) { | |
713 return false; | |
714 } | |
715 } else if (!CBU16_IS_SINGLE(code_unit16_high)) { | |
716 // If this is not a code point, it's an encoding error. | |
717 return false; | |
718 } | |
719 | |
720 // Convert the UTF-16 code units to a code point and then to a UTF-8 | |
721 // code unit sequence. | |
722 char code_point[8] = { 0 }; | |
723 size_t offset = 0; | |
724 if (!code_unit16_low) { | |
725 CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high); | |
726 } else { | |
727 uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high, | |
728 code_unit16_low); | |
729 offset = 0; | |
730 CBU8_APPEND_UNSAFE(code_point, offset, code_unit32); | |
731 } | |
732 dest_string->append(code_point); | |
733 return true; | |
734 } | |
735 | |
736 Value* JSONParser::ConsumeNumber() { | |
737 const char* num_start = pos_; | |
738 const int start_index = index_; | |
739 int end_index = start_index; | |
740 | |
741 if (*pos_ == '-') | |
742 NextChar(); | |
743 | |
744 if (!ReadInt(false)) { | |
745 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
746 return NULL; | |
747 } | |
748 end_index = index_; | |
749 | |
750 // The optional faction part. | |
751 if (*pos_ == '.') { | |
752 if (!CanConsume(1)) { | |
753 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
754 return NULL; | |
755 } | |
756 NextChar(); | |
757 if (!ReadInt(true)) { | |
758 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
759 return NULL; | |
760 } | |
761 end_index = index_; | |
762 } | |
763 | |
764 // Optional exponent part. | |
765 if (*pos_ == 'e' || *pos_ == 'E') { | |
766 NextChar(); | |
767 if (*pos_ == '-' || *pos_ == '+') | |
768 NextChar(); | |
769 if (!ReadInt(true)) { | |
770 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
771 return NULL; | |
772 } | |
773 end_index = index_; | |
774 } | |
775 | |
776 // ReadInt is greedy because numbers have no easily detectable sentinel, | |
777 // so save off where the parser should be on exit (see Consume invariant at | |
778 // the top of the header), then make sure the next token is one which is | |
779 // valid. | |
780 const char* exit_pos = pos_ - 1; | |
781 int exit_index = index_ - 1; | |
782 | |
783 switch (GetNextToken()) { | |
784 case T_OBJECT_END: | |
785 case T_ARRAY_END: | |
786 case T_LIST_SEPARATOR: | |
787 case T_END_OF_INPUT: | |
788 break; | |
789 default: | |
790 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
791 return NULL; | |
792 } | |
793 | |
794 pos_ = exit_pos; | |
795 index_ = exit_index; | |
796 | |
797 StringPiece num_string(num_start, end_index - start_index); | |
798 | |
799 int num_int; | |
800 if (StringToInt(num_string, &num_int)) | |
801 return Value::CreateIntegerValue(num_int); | |
802 | |
803 double num_double; | |
804 if (base::StringToDouble(num_string.as_string(), &num_double) && | |
805 IsFinite(num_double)) { | |
806 return Value::CreateDoubleValue(num_double); | |
807 } | |
808 | |
809 return NULL; | |
810 } | |
811 | |
812 bool JSONParser::ReadInt(bool allow_leading_zeros) { | |
813 char first = *pos_; | |
814 int len = 0; | |
815 | |
816 char c = first; | |
817 while (CanConsume(1) && IsAsciiDigit(c)) { | |
818 c = *NextChar(); | |
819 ++len; | |
820 } | |
821 | |
822 if (len == 0) | |
823 return false; | |
824 | |
825 if (!allow_leading_zeros && len > 1 && first == '0') | |
826 return false; | |
827 | |
828 return true; | |
829 } | |
830 | |
831 Value* JSONParser::ConsumeLiteral() { | |
832 switch (*pos_) { | |
833 case 't': | |
834 if (!CanConsume(3) || !StringsAreEqual(pos_, "true", 4)) { | |
835 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
836 return NULL; | |
837 } | |
838 NextNChars(3); | |
839 return Value::CreateBooleanValue(true); | |
840 case 'f': | |
841 if (!CanConsume(4) || !StringsAreEqual(pos_, "false", 5)) { | |
842 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
843 return NULL; | |
844 } | |
845 NextNChars(4); | |
846 return Value::CreateBooleanValue(false); | |
847 case 'n': | |
848 if (!CanConsume(3) || !StringsAreEqual(pos_, "null", 4)) { | |
849 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
850 return NULL; | |
851 } | |
852 NextNChars(3); | |
853 return Value::CreateNullValue(); | |
854 default: | |
855 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
856 return NULL; | |
857 } | |
858 } | |
859 | |
860 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) { | |
861 return strncmp(one, two, len) == 0; | |
862 } | |
863 | |
864 void JSONParser::ReportError(JSONReader::JsonParseError code, | |
865 int column_adjust) { | |
866 error_code_ = code; | |
867 error_line_ = line_number_; | |
868 error_column_ = index_ - index_last_line_ + column_adjust; | |
869 } | |
870 | |
871 // static | |
872 std::string JSONParser::FormatErrorMessage(int line, int column, | |
873 const std::string& description) { | |
874 if (line || column) { | |
875 return base::StringPrintf( | |
tfarina
2012/04/19 22:54:47
nit: base:: here is not necessary as we are in bas
Robert Sesek
2012/05/03 15:34:52
Done.
| |
876 "Line: %i, column: %i, %s", line, column, description.c_str()); | |
877 } | |
878 return description; | |
879 } | |
880 | |
881 } // namespace internal | |
882 } // namespace base | |
OLD | NEW |