OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/json/json_reader.h" | 5 #include "base/json/json_reader.h" |
6 | 6 |
7 #include "base/float_util.h" | 7 #include "base/json/json_parser.h" |
8 #include "base/logging.h" | 8 #include "base/logging.h" |
9 #include "base/memory/scoped_ptr.h" | |
10 #include "base/stringprintf.h" | |
11 #include "base/string_number_conversions.h" | |
12 #include "base/string_piece.h" | |
13 #include "base/string_util.h" | |
14 #include "base/third_party/icu/icu_utf.h" | |
15 #include "base/utf_string_conversions.h" | |
16 #include "base/values.h" | |
17 | |
18 namespace { | |
19 | |
20 const char kNullString[] = "null"; | |
21 const char kTrueString[] = "true"; | |
22 const char kFalseString[] = "false"; | |
23 | |
24 const int kStackLimit = 100; | |
25 | |
26 // A helper method for ParseNumberToken. It reads an int from the end of | |
27 // token. The method returns false if there is no valid integer at the end of | |
28 // the token. | |
29 bool ReadInt(base::JSONReader::Token& token, bool can_have_leading_zeros) { | |
30 char first = token.NextChar(); | |
31 int len = 0; | |
32 | |
33 // Read in more digits. | |
34 char c = first; | |
35 while ('\0' != c && IsAsciiDigit(c)) { | |
36 ++token.length; | |
37 ++len; | |
38 c = token.NextChar(); | |
39 } | |
40 // We need at least 1 digit. | |
41 if (len == 0) | |
42 return false; | |
43 | |
44 if (!can_have_leading_zeros && len > 1 && '0' == first) | |
45 return false; | |
46 | |
47 return true; | |
48 } | |
49 | |
50 // A helper method for ParseStringToken. It reads |digits| hex digits from the | |
51 // token. If the sequence if digits is not valid (contains other characters), | |
52 // the method returns false. | |
53 bool ReadHexDigits(base::JSONReader::Token& token, int digits) { | |
54 for (int i = 1; i <= digits; ++i) { | |
55 char c = *(token.begin + token.length + i); | |
56 if (c == '\0' || !IsHexDigit(c)) | |
57 return false; | |
58 } | |
59 | |
60 token.length += digits; | |
61 return true; | |
62 } | |
63 | |
64 } // namespace | |
65 | 9 |
66 namespace base { | 10 namespace base { |
67 | 11 |
68 const char* JSONReader::kBadRootElementType = | |
69 "Root value must be an array or object."; | |
70 const char* JSONReader::kInvalidEscape = | 12 const char* JSONReader::kInvalidEscape = |
71 "Invalid escape sequence."; | 13 "Invalid escape sequence."; |
72 const char* JSONReader::kSyntaxError = | 14 const char* JSONReader::kSyntaxError = |
73 "Syntax error."; | 15 "Syntax error."; |
| 16 const char* JSONReader::kUnexpectedToken = |
| 17 "Unexpected token."; |
74 const char* JSONReader::kTrailingComma = | 18 const char* JSONReader::kTrailingComma = |
75 "Trailing comma not allowed."; | 19 "Trailing comma not allowed."; |
76 const char* JSONReader::kTooMuchNesting = | 20 const char* JSONReader::kTooMuchNesting = |
77 "Too much nesting."; | 21 "Too much nesting."; |
78 const char* JSONReader::kUnexpectedDataAfterRoot = | 22 const char* JSONReader::kUnexpectedDataAfterRoot = |
79 "Unexpected data after root element."; | 23 "Unexpected data after root element."; |
80 const char* JSONReader::kUnsupportedEncoding = | 24 const char* JSONReader::kUnsupportedEncoding = |
81 "Unsupported encoding. JSON must be UTF-8."; | 25 "Unsupported encoding. JSON must be UTF-8."; |
82 const char* JSONReader::kUnquotedDictionaryKey = | 26 const char* JSONReader::kUnquotedDictionaryKey = |
83 "Dictionary keys must be quoted."; | 27 "Dictionary keys must be quoted."; |
84 | 28 |
85 JSONReader::JSONReader() | 29 JSONReader::JSONReader() |
86 : start_pos_(NULL), | 30 : parser_(new internal::JSONParser(JSON_PARSE_RFC)) { |
87 json_pos_(NULL), | 31 } |
88 end_pos_(NULL), | 32 |
89 stack_depth_(0), | 33 JSONReader::JSONReader(int options) |
90 allow_trailing_comma_(false), | 34 : parser_(new internal::JSONParser(options)) { |
91 error_code_(JSON_NO_ERROR), | 35 } |
92 error_line_(0), | 36 |
93 error_col_(0) {} | 37 JSONReader::~JSONReader() { |
| 38 } |
94 | 39 |
95 // static | 40 // static |
96 Value* JSONReader::Read(const std::string& json) { | 41 Value* JSONReader::Read(const std::string& json) { |
97 return Read(json, JSON_PARSE_RFC); | 42 internal::JSONParser parser(JSON_PARSE_RFC); |
| 43 return parser.Parse(json); |
98 } | 44 } |
99 | 45 |
100 // static | 46 // static |
101 Value* JSONReader::Read(const std::string& json, | 47 Value* JSONReader::Read(const std::string& json, |
102 int options) { | 48 int options) { |
103 return ReadAndReturnError(json, options, NULL, NULL); | 49 internal::JSONParser parser(options); |
| 50 return parser.Parse(json); |
104 } | 51 } |
105 | 52 |
106 // static | 53 // static |
107 Value* JSONReader::ReadAndReturnError(const std::string& json, | 54 Value* JSONReader::ReadAndReturnError(const std::string& json, |
108 int options, | 55 int options, |
109 int* error_code_out, | 56 int* error_code_out, |
110 std::string* error_msg_out) { | 57 std::string* error_msg_out) { |
111 JSONReader reader = JSONReader(); | 58 internal::JSONParser parser(options); |
112 Value* root = reader.JsonToValue(json, false, | 59 Value* root = parser.Parse(json); |
113 (options & JSON_ALLOW_TRAILING_COMMAS) != 0); | |
114 if (root) | 60 if (root) |
115 return root; | 61 return root; |
116 | 62 |
117 if (error_code_out) | 63 if (error_code_out) |
118 *error_code_out = reader.error_code(); | 64 *error_code_out = parser.error_code(); |
119 if (error_msg_out) | 65 if (error_msg_out) |
120 *error_msg_out = reader.GetErrorMessage(); | 66 *error_msg_out = parser.GetErrorMessage(); |
121 | 67 |
122 return NULL; | 68 return NULL; |
123 } | 69 } |
124 | 70 |
125 // static | 71 // static |
126 std::string JSONReader::ErrorCodeToString(JsonParseError error_code) { | 72 std::string JSONReader::ErrorCodeToString(JsonParseError error_code) { |
127 switch (error_code) { | 73 switch (error_code) { |
128 case JSON_NO_ERROR: | 74 case JSON_NO_ERROR: |
129 return std::string(); | 75 return std::string(); |
130 case JSON_BAD_ROOT_ELEMENT_TYPE: | |
131 return kBadRootElementType; | |
132 case JSON_INVALID_ESCAPE: | 76 case JSON_INVALID_ESCAPE: |
133 return kInvalidEscape; | 77 return kInvalidEscape; |
134 case JSON_SYNTAX_ERROR: | 78 case JSON_SYNTAX_ERROR: |
135 return kSyntaxError; | 79 return kSyntaxError; |
| 80 case JSON_UNEXPECTED_TOKEN: |
| 81 return kUnexpectedToken; |
136 case JSON_TRAILING_COMMA: | 82 case JSON_TRAILING_COMMA: |
137 return kTrailingComma; | 83 return kTrailingComma; |
138 case JSON_TOO_MUCH_NESTING: | 84 case JSON_TOO_MUCH_NESTING: |
139 return kTooMuchNesting; | 85 return kTooMuchNesting; |
140 case JSON_UNEXPECTED_DATA_AFTER_ROOT: | 86 case JSON_UNEXPECTED_DATA_AFTER_ROOT: |
141 return kUnexpectedDataAfterRoot; | 87 return kUnexpectedDataAfterRoot; |
142 case JSON_UNSUPPORTED_ENCODING: | 88 case JSON_UNSUPPORTED_ENCODING: |
143 return kUnsupportedEncoding; | 89 return kUnsupportedEncoding; |
144 case JSON_UNQUOTED_DICTIONARY_KEY: | 90 case JSON_UNQUOTED_DICTIONARY_KEY: |
145 return kUnquotedDictionaryKey; | 91 return kUnquotedDictionaryKey; |
146 default: | 92 default: |
147 NOTREACHED(); | 93 NOTREACHED(); |
148 return std::string(); | 94 return std::string(); |
149 } | 95 } |
150 } | 96 } |
151 | 97 |
152 std::string JSONReader::GetErrorMessage() const { | 98 Value* JSONReader::ReadToValue(const std::string& json) { |
153 return FormatErrorMessage(error_line_, error_col_, | 99 return parser_->Parse(json); |
154 ErrorCodeToString(error_code_)); | |
155 } | 100 } |
156 | 101 |
157 Value* JSONReader::JsonToValue(const std::string& json, bool check_root, | 102 JSONReader::JsonParseError JSONReader::error_code() const { |
158 bool allow_trailing_comma) { | 103 return parser_->error_code(); |
159 // The input must be in UTF-8. | |
160 if (!IsStringUTF8(json.data())) { | |
161 error_code_ = JSON_UNSUPPORTED_ENCODING; | |
162 return NULL; | |
163 } | |
164 | |
165 start_pos_ = json.data(); | |
166 end_pos_ = start_pos_ + json.size(); | |
167 | |
168 // When the input JSON string starts with a UTF-8 Byte-Order-Mark (U+FEFF) | |
169 // or <0xEF 0xBB 0xBF>, advance the start position to avoid the | |
170 // JSONReader::BuildValue() function from mis-treating a Unicode BOM as an | |
171 // invalid character and returning NULL. | |
172 if (json.size() >= 3 && static_cast<uint8>(start_pos_[0]) == 0xEF && | |
173 static_cast<uint8>(start_pos_[1]) == 0xBB && | |
174 static_cast<uint8>(start_pos_[2]) == 0xBF) { | |
175 start_pos_ += 3; | |
176 } | |
177 | |
178 json_pos_ = start_pos_; | |
179 allow_trailing_comma_ = allow_trailing_comma; | |
180 stack_depth_ = 0; | |
181 error_code_ = JSON_NO_ERROR; | |
182 | |
183 scoped_ptr<Value> root(BuildValue(check_root)); | |
184 if (root.get()) { | |
185 if (ParseToken().type == Token::END_OF_INPUT) { | |
186 return root.release(); | |
187 } else { | |
188 SetErrorCode(JSON_UNEXPECTED_DATA_AFTER_ROOT, json_pos_); | |
189 } | |
190 } | |
191 | |
192 // Default to calling errors "syntax errors". | |
193 if (error_code_ == 0) | |
194 SetErrorCode(JSON_SYNTAX_ERROR, json_pos_); | |
195 | |
196 return NULL; | |
197 } | 104 } |
198 | 105 |
199 // static | 106 std::string JSONReader::GetErrorMessage() const { |
200 std::string JSONReader::FormatErrorMessage(int line, int column, | 107 return parser_->GetErrorMessage(); |
201 const std::string& description) { | |
202 if (line || column) { | |
203 return base::StringPrintf( | |
204 "Line: %i, column: %i, %s", line, column, description.c_str()); | |
205 } | |
206 return description; | |
207 } | |
208 | |
209 Value* JSONReader::BuildValue(bool is_root) { | |
210 ++stack_depth_; | |
211 if (stack_depth_ > kStackLimit) { | |
212 SetErrorCode(JSON_TOO_MUCH_NESTING, json_pos_); | |
213 return NULL; | |
214 } | |
215 | |
216 Token token = ParseToken(); | |
217 // The root token must be an array or an object. | |
218 if (is_root && token.type != Token::OBJECT_BEGIN && | |
219 token.type != Token::ARRAY_BEGIN) { | |
220 SetErrorCode(JSON_BAD_ROOT_ELEMENT_TYPE, json_pos_); | |
221 return NULL; | |
222 } | |
223 | |
224 scoped_ptr<Value> node; | |
225 | |
226 switch (token.type) { | |
227 case Token::END_OF_INPUT: | |
228 case Token::INVALID_TOKEN: | |
229 return NULL; | |
230 | |
231 case Token::NULL_TOKEN: | |
232 node.reset(Value::CreateNullValue()); | |
233 break; | |
234 | |
235 case Token::BOOL_TRUE: | |
236 node.reset(Value::CreateBooleanValue(true)); | |
237 break; | |
238 | |
239 case Token::BOOL_FALSE: | |
240 node.reset(Value::CreateBooleanValue(false)); | |
241 break; | |
242 | |
243 case Token::NUMBER: | |
244 node.reset(DecodeNumber(token)); | |
245 if (!node.get()) | |
246 return NULL; | |
247 break; | |
248 | |
249 case Token::STRING: | |
250 node.reset(DecodeString(token)); | |
251 if (!node.get()) | |
252 return NULL; | |
253 break; | |
254 | |
255 case Token::ARRAY_BEGIN: | |
256 { | |
257 json_pos_ += token.length; | |
258 token = ParseToken(); | |
259 | |
260 node.reset(new ListValue()); | |
261 while (token.type != Token::ARRAY_END) { | |
262 Value* array_node = BuildValue(false); | |
263 if (!array_node) | |
264 return NULL; | |
265 static_cast<ListValue*>(node.get())->Append(array_node); | |
266 | |
267 // After a list value, we expect a comma or the end of the list. | |
268 token = ParseToken(); | |
269 if (token.type == Token::LIST_SEPARATOR) { | |
270 json_pos_ += token.length; | |
271 token = ParseToken(); | |
272 // Trailing commas are invalid according to the JSON RFC, but some | |
273 // consumers need the parsing leniency, so handle accordingly. | |
274 if (token.type == Token::ARRAY_END) { | |
275 if (!allow_trailing_comma_) { | |
276 SetErrorCode(JSON_TRAILING_COMMA, json_pos_); | |
277 return NULL; | |
278 } | |
279 // Trailing comma OK, stop parsing the Array. | |
280 break; | |
281 } | |
282 } else if (token.type != Token::ARRAY_END) { | |
283 // Unexpected value after list value. Bail out. | |
284 return NULL; | |
285 } | |
286 } | |
287 if (token.type != Token::ARRAY_END) { | |
288 return NULL; | |
289 } | |
290 break; | |
291 } | |
292 | |
293 case Token::OBJECT_BEGIN: | |
294 { | |
295 json_pos_ += token.length; | |
296 token = ParseToken(); | |
297 | |
298 node.reset(new DictionaryValue); | |
299 while (token.type != Token::OBJECT_END) { | |
300 if (token.type != Token::STRING) { | |
301 SetErrorCode(JSON_UNQUOTED_DICTIONARY_KEY, json_pos_); | |
302 return NULL; | |
303 } | |
304 scoped_ptr<Value> dict_key_value(DecodeString(token)); | |
305 if (!dict_key_value.get()) | |
306 return NULL; | |
307 | |
308 // Convert the key into a wstring. | |
309 std::string dict_key; | |
310 bool success = dict_key_value->GetAsString(&dict_key); | |
311 DCHECK(success); | |
312 | |
313 json_pos_ += token.length; | |
314 token = ParseToken(); | |
315 if (token.type != Token::OBJECT_PAIR_SEPARATOR) | |
316 return NULL; | |
317 | |
318 json_pos_ += token.length; | |
319 token = ParseToken(); | |
320 Value* dict_value = BuildValue(false); | |
321 if (!dict_value) | |
322 return NULL; | |
323 static_cast<DictionaryValue*>(node.get())->SetWithoutPathExpansion( | |
324 dict_key, dict_value); | |
325 | |
326 // After a key/value pair, we expect a comma or the end of the | |
327 // object. | |
328 token = ParseToken(); | |
329 if (token.type == Token::LIST_SEPARATOR) { | |
330 json_pos_ += token.length; | |
331 token = ParseToken(); | |
332 // Trailing commas are invalid according to the JSON RFC, but some | |
333 // consumers need the parsing leniency, so handle accordingly. | |
334 if (token.type == Token::OBJECT_END) { | |
335 if (!allow_trailing_comma_) { | |
336 SetErrorCode(JSON_TRAILING_COMMA, json_pos_); | |
337 return NULL; | |
338 } | |
339 // Trailing comma OK, stop parsing the Object. | |
340 break; | |
341 } | |
342 } else if (token.type != Token::OBJECT_END) { | |
343 // Unexpected value after last object value. Bail out. | |
344 return NULL; | |
345 } | |
346 } | |
347 if (token.type != Token::OBJECT_END) | |
348 return NULL; | |
349 | |
350 break; | |
351 } | |
352 | |
353 default: | |
354 // We got a token that's not a value. | |
355 return NULL; | |
356 } | |
357 json_pos_ += token.length; | |
358 | |
359 --stack_depth_; | |
360 return node.release(); | |
361 } | |
362 | |
363 JSONReader::Token JSONReader::ParseNumberToken() { | |
364 // We just grab the number here. We validate the size in DecodeNumber. | |
365 // According to RFC4627, a valid number is: [minus] int [frac] [exp] | |
366 Token token(Token::NUMBER, json_pos_, 0); | |
367 char c = *json_pos_; | |
368 if ('-' == c) { | |
369 ++token.length; | |
370 c = token.NextChar(); | |
371 } | |
372 | |
373 if (!ReadInt(token, false)) | |
374 return Token::CreateInvalidToken(); | |
375 | |
376 // Optional fraction part | |
377 c = token.NextChar(); | |
378 if ('.' == c) { | |
379 ++token.length; | |
380 if (!ReadInt(token, true)) | |
381 return Token::CreateInvalidToken(); | |
382 c = token.NextChar(); | |
383 } | |
384 | |
385 // Optional exponent part | |
386 if ('e' == c || 'E' == c) { | |
387 ++token.length; | |
388 c = token.NextChar(); | |
389 if ('-' == c || '+' == c) { | |
390 ++token.length; | |
391 c = token.NextChar(); | |
392 } | |
393 if (!ReadInt(token, true)) | |
394 return Token::CreateInvalidToken(); | |
395 } | |
396 | |
397 return token; | |
398 } | |
399 | |
400 Value* JSONReader::DecodeNumber(const Token& token) { | |
401 const std::string num_string(token.begin, token.length); | |
402 | |
403 int num_int; | |
404 if (StringToInt(num_string, &num_int)) | |
405 return Value::CreateIntegerValue(num_int); | |
406 | |
407 double num_double; | |
408 if (StringToDouble(num_string, &num_double) && base::IsFinite(num_double)) | |
409 return Value::CreateDoubleValue(num_double); | |
410 | |
411 return NULL; | |
412 } | |
413 | |
414 JSONReader::Token JSONReader::ParseStringToken() { | |
415 Token token(Token::STRING, json_pos_, 1); | |
416 char c = token.NextChar(); | |
417 while (json_pos_ + token.length < end_pos_) { | |
418 if ('\\' == c) { | |
419 ++token.length; | |
420 c = token.NextChar(); | |
421 // Make sure the escaped char is valid. | |
422 switch (c) { | |
423 case 'x': | |
424 if (!ReadHexDigits(token, 2)) { | |
425 SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length); | |
426 return Token::CreateInvalidToken(); | |
427 } | |
428 break; | |
429 case 'u': | |
430 if (!ReadHexDigits(token, 4)) { | |
431 SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length); | |
432 return Token::CreateInvalidToken(); | |
433 } | |
434 break; | |
435 case '\\': | |
436 case '/': | |
437 case 'b': | |
438 case 'f': | |
439 case 'n': | |
440 case 'r': | |
441 case 't': | |
442 case 'v': | |
443 case '"': | |
444 break; | |
445 default: | |
446 SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length); | |
447 return Token::CreateInvalidToken(); | |
448 } | |
449 } else if ('"' == c) { | |
450 ++token.length; | |
451 return token; | |
452 } | |
453 ++token.length; | |
454 c = token.NextChar(); | |
455 } | |
456 return Token::CreateInvalidToken(); | |
457 } | |
458 | |
459 Value* JSONReader::DecodeString(const Token& token) { | |
460 std::string decoded_str; | |
461 decoded_str.reserve(token.length - 2); | |
462 | |
463 for (int i = 1; i < token.length - 1; ++i) { | |
464 char c = *(token.begin + i); | |
465 if ('\\' == c) { | |
466 ++i; | |
467 c = *(token.begin + i); | |
468 switch (c) { | |
469 case '"': | |
470 case '/': | |
471 case '\\': | |
472 decoded_str.push_back(c); | |
473 break; | |
474 case 'b': | |
475 decoded_str.push_back('\b'); | |
476 break; | |
477 case 'f': | |
478 decoded_str.push_back('\f'); | |
479 break; | |
480 case 'n': | |
481 decoded_str.push_back('\n'); | |
482 break; | |
483 case 'r': | |
484 decoded_str.push_back('\r'); | |
485 break; | |
486 case 't': | |
487 decoded_str.push_back('\t'); | |
488 break; | |
489 case 'v': | |
490 decoded_str.push_back('\v'); | |
491 break; | |
492 | |
493 case 'x': { | |
494 if (i + 2 >= token.length) | |
495 return NULL; | |
496 int hex_digit = 0; | |
497 if (!HexStringToInt(StringPiece(token.begin + i + 1, 2), &hex_digit)) | |
498 return NULL; | |
499 decoded_str.push_back(hex_digit); | |
500 i += 2; | |
501 break; | |
502 } | |
503 case 'u': | |
504 if (!ConvertUTF16Units(token, &i, &decoded_str)) | |
505 return NULL; | |
506 break; | |
507 | |
508 default: | |
509 // We should only have valid strings at this point. If not, | |
510 // ParseStringToken didn't do its job. | |
511 NOTREACHED(); | |
512 return NULL; | |
513 } | |
514 } else { | |
515 // Not escaped | |
516 decoded_str.push_back(c); | |
517 } | |
518 } | |
519 return Value::CreateStringValue(decoded_str); | |
520 } | |
521 | |
522 bool JSONReader::ConvertUTF16Units(const Token& token, | |
523 int* i, | |
524 std::string* dest_string) { | |
525 if (*i + 4 >= token.length) | |
526 return false; | |
527 | |
528 // This is a 32-bit field because the shift operations in the | |
529 // conversion process below cause MSVC to error about "data loss." | |
530 // This only stores UTF-16 code units, though. | |
531 // Consume the UTF-16 code unit, which may be a high surrogate. | |
532 int code_unit16_high = 0; | |
533 if (!HexStringToInt(StringPiece(token.begin + *i + 1, 4), &code_unit16_high)) | |
534 return false; | |
535 *i += 4; | |
536 | |
537 // If this is a high surrogate, consume the next code unit to get the | |
538 // low surrogate. | |
539 int code_unit16_low = 0; | |
540 if (CBU16_IS_SURROGATE(code_unit16_high)) { | |
541 // Make sure this is the high surrogate. If not, it's an encoding | |
542 // error. | |
543 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) | |
544 return false; | |
545 | |
546 // Make sure that the token has more characters to consume the | |
547 // lower surrogate. | |
548 if (*i + 6 >= token.length) | |
549 return false; | |
550 if (*(++(*i) + token.begin) != '\\' || *(++(*i) + token.begin) != 'u') | |
551 return false; | |
552 | |
553 if (!HexStringToInt(StringPiece(token.begin + *i + 1, 4), &code_unit16_low)) | |
554 return false; | |
555 *i += 4; | |
556 if (!CBU16_IS_SURROGATE(code_unit16_low) || | |
557 !CBU16_IS_TRAIL(code_unit16_low)) { | |
558 return false; | |
559 } | |
560 } else if (!CBU16_IS_SINGLE(code_unit16_high)) { | |
561 // If this is not a code point, it's an encoding error. | |
562 return false; | |
563 } | |
564 | |
565 // Convert the UTF-16 code units to a code point and then to a UTF-8 | |
566 // code unit sequence. | |
567 char code_point[8] = { 0 }; | |
568 size_t offset = 0; | |
569 if (!code_unit16_low) { | |
570 CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high); | |
571 } else { | |
572 uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high, | |
573 code_unit16_low); | |
574 offset = 0; | |
575 CBU8_APPEND_UNSAFE(code_point, offset, code_unit32); | |
576 } | |
577 dest_string->append(code_point); | |
578 return true; | |
579 } | |
580 | |
581 JSONReader::Token JSONReader::ParseToken() { | |
582 EatWhitespaceAndComments(); | |
583 | |
584 Token token(Token::INVALID_TOKEN, 0, 0); | |
585 switch (*json_pos_) { | |
586 case '\0': | |
587 token.type = Token::END_OF_INPUT; | |
588 break; | |
589 | |
590 case 'n': | |
591 if (NextStringMatch(kNullString, arraysize(kNullString) - 1)) | |
592 token = Token(Token::NULL_TOKEN, json_pos_, 4); | |
593 break; | |
594 | |
595 case 't': | |
596 if (NextStringMatch(kTrueString, arraysize(kTrueString) - 1)) | |
597 token = Token(Token::BOOL_TRUE, json_pos_, 4); | |
598 break; | |
599 | |
600 case 'f': | |
601 if (NextStringMatch(kFalseString, arraysize(kFalseString) - 1)) | |
602 token = Token(Token::BOOL_FALSE, json_pos_, 5); | |
603 break; | |
604 | |
605 case '[': | |
606 token = Token(Token::ARRAY_BEGIN, json_pos_, 1); | |
607 break; | |
608 | |
609 case ']': | |
610 token = Token(Token::ARRAY_END, json_pos_, 1); | |
611 break; | |
612 | |
613 case ',': | |
614 token = Token(Token::LIST_SEPARATOR, json_pos_, 1); | |
615 break; | |
616 | |
617 case '{': | |
618 token = Token(Token::OBJECT_BEGIN, json_pos_, 1); | |
619 break; | |
620 | |
621 case '}': | |
622 token = Token(Token::OBJECT_END, json_pos_, 1); | |
623 break; | |
624 | |
625 case ':': | |
626 token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1); | |
627 break; | |
628 | |
629 case '0': | |
630 case '1': | |
631 case '2': | |
632 case '3': | |
633 case '4': | |
634 case '5': | |
635 case '6': | |
636 case '7': | |
637 case '8': | |
638 case '9': | |
639 case '-': | |
640 token = ParseNumberToken(); | |
641 break; | |
642 | |
643 case '"': | |
644 token = ParseStringToken(); | |
645 break; | |
646 } | |
647 return token; | |
648 } | |
649 | |
650 void JSONReader::EatWhitespaceAndComments() { | |
651 while (json_pos_ != end_pos_) { | |
652 switch (*json_pos_) { | |
653 case ' ': | |
654 case '\n': | |
655 case '\r': | |
656 case '\t': | |
657 ++json_pos_; | |
658 break; | |
659 case '/': | |
660 // TODO(tc): This isn't in the RFC so it should be a parser flag. | |
661 if (!EatComment()) | |
662 return; | |
663 break; | |
664 default: | |
665 // Not a whitespace char, just exit. | |
666 return; | |
667 } | |
668 } | |
669 } | |
670 | |
671 bool JSONReader::EatComment() { | |
672 if ('/' != *json_pos_) | |
673 return false; | |
674 | |
675 char next_char = *(json_pos_ + 1); | |
676 if ('/' == next_char) { | |
677 // Line comment, read until \n or \r | |
678 json_pos_ += 2; | |
679 while (json_pos_ != end_pos_) { | |
680 switch (*json_pos_) { | |
681 case '\n': | |
682 case '\r': | |
683 ++json_pos_; | |
684 return true; | |
685 default: | |
686 ++json_pos_; | |
687 } | |
688 } | |
689 } else if ('*' == next_char) { | |
690 // Block comment, read until */ | |
691 json_pos_ += 2; | |
692 while (json_pos_ != end_pos_) { | |
693 if ('*' == *json_pos_ && '/' == *(json_pos_ + 1)) { | |
694 json_pos_ += 2; | |
695 return true; | |
696 } | |
697 ++json_pos_; | |
698 } | |
699 } else { | |
700 return false; | |
701 } | |
702 return true; | |
703 } | |
704 | |
705 bool JSONReader::NextStringMatch(const char* str, size_t length) { | |
706 return strncmp(json_pos_, str, length) == 0; | |
707 } | |
708 | |
709 void JSONReader::SetErrorCode(JsonParseError error, | |
710 const char* error_pos) { | |
711 int line_number = 1; | |
712 int column_number = 1; | |
713 | |
714 // Figure out the line and column the error occured at. | |
715 for (const char* pos = start_pos_; pos != error_pos; ++pos) { | |
716 if (pos > end_pos_) { | |
717 NOTREACHED(); | |
718 return; | |
719 } | |
720 | |
721 if (*pos == '\n') { | |
722 ++line_number; | |
723 column_number = 1; | |
724 } else { | |
725 ++column_number; | |
726 } | |
727 } | |
728 | |
729 error_line_ = line_number; | |
730 error_col_ = column_number; | |
731 error_code_ = error; | |
732 } | 108 } |
733 | 109 |
734 } // namespace base | 110 } // namespace base |
OLD | NEW |