OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "base/json/json_parser.h" |
| 6 |
| 7 #include "base/float_util.h" |
| 8 #include "base/logging.h" |
| 9 #include "base/memory/scoped_ptr.h" |
| 10 #include "base/string_number_conversions.h" |
| 11 #include "base/string_util.h" |
| 12 #include "base/stringprintf.h" |
| 13 #include "base/third_party/icu/icu_utf.h" |
| 14 #include "base/utf_string_conversion_utils.h" |
| 15 #include "base/utf_string_conversions.h" |
| 16 #include "base/values.h" |
| 17 |
| 18 namespace base { |
| 19 namespace internal { |
| 20 |
| 21 namespace { |
| 22 |
| 23 const int kStackMaxDepth = 100; |
| 24 |
| 25 const int32 kExtendedASCIIStart = 0x80; |
| 26 |
| 27 // This and the class below are used to own the JSON input string for when |
| 28 // string tokens are stored as StringPiece instead of std::string. This |
| 29 // optimization avoids about 2/3rds of string memory copies. The constructor |
| 30 // takes the input string and swaps its data into the new instance. The real |
| 31 // root value is also Swap()ed into the new instance. |
| 32 class DictionaryHiddenRootValue : public base::DictionaryValue { |
| 33 public: |
| 34 DictionaryHiddenRootValue(std::string* json, Value* root) { |
| 35 DCHECK(root->IsType(Value::TYPE_DICTIONARY)); |
| 36 DictionaryValue::Swap(static_cast<DictionaryValue*>(root)); |
| 37 json->swap(json_); |
| 38 } |
| 39 |
| 40 virtual void Swap(DictionaryValue* other) OVERRIDE { |
| 41 DVLOG(1) << "Swap()ing a DictionaryValue inefficiently."; |
| 42 |
| 43 // First deep copy to convert JSONStringValue to std::string and swap that |
| 44 // copy with |other|, which contains the new contents of |this|. |
| 45 scoped_ptr<base::DictionaryValue> copy(DeepCopy()); |
| 46 copy->Swap(other); |
| 47 |
| 48 // Then erase the contents of the current dictionary and swap in the |
| 49 // new contents, originally from |other|. |
| 50 Clear(); |
| 51 json_.clear(); |
| 52 DictionaryValue::Swap(copy.get()); |
| 53 } |
| 54 |
| 55 // Not overriding DictionaryValue::Remove because it just calls through to |
| 56 // the method below. |
| 57 |
| 58 virtual bool RemoveWithoutPathExpansion(const std::string& key, |
| 59 Value** out) OVERRIDE { |
| 60 // If the caller won't take ownership of the removed value, just call up. |
| 61 if (!out) |
| 62 return DictionaryValue::RemoveWithoutPathExpansion(key, out); |
| 63 |
| 64 DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently."; |
| 65 |
| 66 // Otherwise, remove the value while its still "owned" by this and copy it |
| 67 // to convert any JSONStringValues to std::string. |
| 68 Value* out_owned = NULL; |
| 69 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned)) |
| 70 return false; |
| 71 |
| 72 *out = out_owned->DeepCopy(); |
| 73 delete out_owned; |
| 74 |
| 75 return true; |
| 76 } |
| 77 |
| 78 private: |
| 79 std::string json_; |
| 80 |
| 81 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue); |
| 82 }; |
| 83 |
| 84 class ListHiddenRootValue : public base::ListValue { |
| 85 public: |
| 86 ListHiddenRootValue(std::string* json, Value* root) { |
| 87 DCHECK(root->IsType(Value::TYPE_LIST)); |
| 88 ListValue::Swap(static_cast<ListValue*>(root)); |
| 89 json->swap(json_); |
| 90 } |
| 91 |
| 92 virtual void Swap(ListValue* other) OVERRIDE { |
| 93 DVLOG(1) << "Swap()ing a ListValue inefficiently."; |
| 94 |
| 95 // First deep copy to convert JSONStringValue to std::string and swap that |
| 96 // copy with |other|, which contains the new contents of |this|. |
| 97 scoped_ptr<base::ListValue> copy(DeepCopy()); |
| 98 copy->Swap(other); |
| 99 |
| 100 // Then erase the contents of the current list and swap in the new contents, |
| 101 // originally from |other|. |
| 102 Clear(); |
| 103 json_.clear(); |
| 104 ListValue::Swap(copy.get()); |
| 105 } |
| 106 |
| 107 virtual bool Remove(size_t index, Value** out) OVERRIDE { |
| 108 // If the caller won't take ownership of the removed value, just call up. |
| 109 if (!out) |
| 110 return ListValue::Remove(index, out); |
| 111 |
| 112 DVLOG(1) << "Remove()ing from a ListValue inefficiently."; |
| 113 |
| 114 // Otherwise, remove the value while its still "owned" by this and copy it |
| 115 // to convert any JSONStringValues to std::string. |
| 116 Value* out_owned = NULL; |
| 117 if (!ListValue::Remove(index, &out_owned)) |
| 118 return false; |
| 119 |
| 120 *out = out_owned->DeepCopy(); |
| 121 delete out_owned; |
| 122 |
| 123 return true; |
| 124 } |
| 125 |
| 126 private: |
| 127 std::string json_; |
| 128 |
| 129 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue); |
| 130 }; |
| 131 |
| 132 // A variant on StringValue that uses StringPiece instead of copying the string |
| 133 // into the Value. This can only be stored in a child of hidden root (above), |
| 134 // otherwise the referenced string will not be guaranteed to outlive it. |
| 135 class JSONStringValue : public base::Value { |
| 136 public: |
| 137 explicit JSONStringValue(const base::StringPiece& piece) |
| 138 : Value(TYPE_STRING), |
| 139 string_piece_(piece) { |
| 140 } |
| 141 |
| 142 // Value: |
| 143 bool GetAsString(std::string* out_value) const OVERRIDE { |
| 144 string_piece_.CopyToString(out_value); |
| 145 return true; |
| 146 } |
| 147 bool GetAsString(string16* out_value) const OVERRIDE { |
| 148 *out_value = UTF8ToUTF16(string_piece_); |
| 149 return true; |
| 150 } |
| 151 virtual Value* DeepCopy() const OVERRIDE { |
| 152 return Value::CreateStringValue(string_piece_.as_string()); |
| 153 } |
| 154 virtual bool Equals(const Value* other) const OVERRIDE { |
| 155 std::string other_string; |
| 156 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) && |
| 157 StringPiece(other_string) == string_piece_; |
| 158 } |
| 159 |
| 160 private: |
| 161 // The location in the original input stream. |
| 162 base::StringPiece string_piece_; |
| 163 |
| 164 DISALLOW_COPY_AND_ASSIGN(JSONStringValue); |
| 165 }; |
| 166 |
| 167 // Simple class that checks for maximum recursion/"stack overflow." |
| 168 class StackMarker { |
| 169 public: |
| 170 explicit StackMarker(int* depth) : depth_(depth) { |
| 171 ++(*depth_); |
| 172 DCHECK_LE(*depth_, kStackMaxDepth); |
| 173 } |
| 174 ~StackMarker() { |
| 175 --(*depth_); |
| 176 } |
| 177 |
| 178 bool IsTooDeep() const { |
| 179 return *depth_ >= kStackMaxDepth; |
| 180 } |
| 181 |
| 182 private: |
| 183 int* const depth_; |
| 184 |
| 185 DISALLOW_COPY_AND_ASSIGN(StackMarker); |
| 186 }; |
| 187 |
| 188 } // namespace |
| 189 |
| 190 JSONParser::JSONParser(int options) |
| 191 : options_(options), |
| 192 start_pos_(NULL), |
| 193 pos_(NULL), |
| 194 end_pos_(NULL), |
| 195 index_(0), |
| 196 stack_depth_(0), |
| 197 line_number_(0), |
| 198 index_last_line_(0), |
| 199 error_code_(JSONReader::JSON_NO_ERROR), |
| 200 error_line_(0), |
| 201 error_column_(0) { |
| 202 } |
| 203 |
| 204 JSONParser::~JSONParser() { |
| 205 } |
| 206 |
| 207 Value* JSONParser::Parse(const std::string& input) { |
| 208 // TODO(rsesek): Windows has problems with StringPiece/hidden roots. Fix |
| 209 // <http://crbug.com/126107> when my Windows box arrives. |
| 210 #if defined(OS_WIN) |
| 211 options_ |= JSON_DETACHABLE_CHILDREN; |
| 212 #endif |
| 213 |
| 214 std::string input_copy; |
| 215 // If the children of a JSON root can be detached, then hidden roots cannot |
| 216 // be used, so do not bother copying the input because StringPiece will not |
| 217 // be used anywhere. |
| 218 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { |
| 219 input_copy = input; |
| 220 start_pos_ = input_copy.data(); |
| 221 } else { |
| 222 start_pos_ = input.data(); |
| 223 } |
| 224 pos_ = start_pos_; |
| 225 end_pos_ = start_pos_ + input.length(); |
| 226 index_ = 0; |
| 227 line_number_ = 1; |
| 228 index_last_line_ = 0; |
| 229 |
| 230 error_code_ = JSONReader::JSON_NO_ERROR; |
| 231 error_line_ = 0; |
| 232 error_column_ = 0; |
| 233 |
| 234 // When the input JSON string starts with a UTF-8 Byte-Order-Mark |
| 235 // <0xEF 0xBB 0xBF>, advance the start position to avoid the |
| 236 // ParseNextToken function mis-treating a Unicode BOM as an invalid |
| 237 // character and returning NULL. |
| 238 if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF && |
| 239 static_cast<uint8>(*(pos_ + 1)) == 0xBB && |
| 240 static_cast<uint8>(*(pos_ + 2)) == 0xBF) { |
| 241 NextNChars(3); |
| 242 } |
| 243 |
| 244 // Parse the first and all subsequent tokens. |
| 245 scoped_ptr<Value> root(ParseNextToken()); |
| 246 if (!root.get()) |
| 247 return NULL; |
| 248 |
| 249 // Make sure the input stream is at an end. |
| 250 if (GetNextToken() != T_END_OF_INPUT) { |
| 251 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) { |
| 252 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1); |
| 253 return NULL; |
| 254 } |
| 255 } |
| 256 |
| 257 // Dictionaries and lists can contain JSONStringValues, so wrap them in a |
| 258 // hidden root. |
| 259 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { |
| 260 if (root->IsType(Value::TYPE_DICTIONARY)) { |
| 261 return new DictionaryHiddenRootValue(&input_copy, root.release()); |
| 262 } else if (root->IsType(Value::TYPE_LIST)) { |
| 263 return new ListHiddenRootValue(&input_copy, root.release()); |
| 264 } else if (root->IsType(Value::TYPE_STRING)) { |
| 265 // A string type could be a JSONStringValue, but because there's no |
| 266 // corresponding HiddenRootValue, the memory will be lost. Deep copy to |
| 267 // preserve it. |
| 268 return root->DeepCopy(); |
| 269 } |
| 270 } |
| 271 |
| 272 // All other values can be returned directly. |
| 273 return root.release(); |
| 274 } |
| 275 |
| 276 JSONReader::JsonParseError JSONParser::error_code() const { |
| 277 return error_code_; |
| 278 } |
| 279 |
| 280 std::string JSONParser::GetErrorMessage() const { |
| 281 return FormatErrorMessage(error_line_, error_column_, |
| 282 JSONReader::ErrorCodeToString(error_code_)); |
| 283 } |
| 284 |
| 285 // StringBuilder /////////////////////////////////////////////////////////////// |
| 286 |
| 287 JSONParser::StringBuilder::StringBuilder() |
| 288 : pos_(NULL), |
| 289 length_(0), |
| 290 string_(NULL) { |
| 291 } |
| 292 |
| 293 JSONParser::StringBuilder::StringBuilder(const char* pos) |
| 294 : pos_(pos), |
| 295 length_(0), |
| 296 string_(NULL) { |
| 297 } |
| 298 |
| 299 void JSONParser::StringBuilder::Swap(StringBuilder* other) { |
| 300 std::swap(other->string_, string_); |
| 301 std::swap(other->pos_, pos_); |
| 302 std::swap(other->length_, length_); |
| 303 } |
| 304 |
| 305 JSONParser::StringBuilder::~StringBuilder() { |
| 306 delete string_; |
| 307 } |
| 308 |
| 309 void JSONParser::StringBuilder::Append(const char& c) { |
| 310 DCHECK_GE(c, 0); |
| 311 DCHECK_LT(c, 128); |
| 312 |
| 313 if (string_) |
| 314 string_->push_back(c); |
| 315 else |
| 316 ++length_; |
| 317 } |
| 318 |
| 319 void JSONParser::StringBuilder::AppendString(const std::string& str) { |
| 320 DCHECK(string_); |
| 321 string_->append(str); |
| 322 } |
| 323 |
| 324 void JSONParser::StringBuilder::Convert() { |
| 325 if (string_) |
| 326 return; |
| 327 string_ = new std::string(pos_, length_); |
| 328 } |
| 329 |
| 330 bool JSONParser::StringBuilder::CanBeStringPiece() const { |
| 331 return !string_; |
| 332 } |
| 333 |
| 334 StringPiece JSONParser::StringBuilder::AsStringPiece() { |
| 335 if (string_) |
| 336 return StringPiece(); |
| 337 return StringPiece(pos_, length_); |
| 338 } |
| 339 |
| 340 const std::string& JSONParser::StringBuilder::AsString() { |
| 341 if (!string_) |
| 342 Convert(); |
| 343 return *string_; |
| 344 } |
| 345 |
| 346 // JSONParser private ////////////////////////////////////////////////////////// |
| 347 |
| 348 inline bool JSONParser::CanConsume(int length) { |
| 349 return pos_ + length <= end_pos_; |
| 350 } |
| 351 |
| 352 const char* JSONParser::NextChar() { |
| 353 DCHECK(CanConsume(1)); |
| 354 ++index_; |
| 355 ++pos_; |
| 356 return pos_; |
| 357 } |
| 358 |
| 359 void JSONParser::NextNChars(int n) { |
| 360 DCHECK(CanConsume(n)); |
| 361 index_ += n; |
| 362 pos_ += n; |
| 363 } |
| 364 |
| 365 JSONParser::Token JSONParser::GetNextToken() { |
| 366 EatWhitespaceAndComments(); |
| 367 if (!CanConsume(1)) |
| 368 return T_END_OF_INPUT; |
| 369 |
| 370 switch (*pos_) { |
| 371 case '{': |
| 372 return T_OBJECT_BEGIN; |
| 373 case '}': |
| 374 return T_OBJECT_END; |
| 375 case '[': |
| 376 return T_ARRAY_BEGIN; |
| 377 case ']': |
| 378 return T_ARRAY_END; |
| 379 case '"': |
| 380 return T_STRING; |
| 381 case '0': |
| 382 case '1': |
| 383 case '2': |
| 384 case '3': |
| 385 case '4': |
| 386 case '5': |
| 387 case '6': |
| 388 case '7': |
| 389 case '8': |
| 390 case '9': |
| 391 case '-': |
| 392 return T_NUMBER; |
| 393 case 't': |
| 394 return T_BOOL_TRUE; |
| 395 case 'f': |
| 396 return T_BOOL_FALSE; |
| 397 case 'n': |
| 398 return T_NULL; |
| 399 case ',': |
| 400 return T_LIST_SEPARATOR; |
| 401 case ':': |
| 402 return T_OBJECT_PAIR_SEPARATOR; |
| 403 default: |
| 404 return T_INVALID_TOKEN; |
| 405 } |
| 406 } |
| 407 |
| 408 void JSONParser::EatWhitespaceAndComments() { |
| 409 while (pos_ < end_pos_) { |
| 410 switch (*pos_) { |
| 411 case '\r': |
| 412 case '\n': |
| 413 index_last_line_ = index_; |
| 414 ++line_number_; |
| 415 // Fall through. |
| 416 case ' ': |
| 417 case '\t': |
| 418 NextChar(); |
| 419 break; |
| 420 case '/': |
| 421 if (!EatComment()) |
| 422 return; |
| 423 break; |
| 424 default: |
| 425 return; |
| 426 } |
| 427 } |
| 428 } |
| 429 |
| 430 bool JSONParser::EatComment() { |
| 431 if (*pos_ != '/' || !CanConsume(1)) |
| 432 return false; |
| 433 |
| 434 char next_char = *NextChar(); |
| 435 if (next_char == '/') { |
| 436 // Single line comment, read to newline. |
| 437 while (CanConsume(1)) { |
| 438 char next_char = *NextChar(); |
| 439 if (next_char == '\n' || next_char == '\r') |
| 440 return true; |
| 441 } |
| 442 } else if (next_char == '*') { |
| 443 // Block comment, read until end marker. |
| 444 while (CanConsume(2)) { |
| 445 if (*NextChar() == '*' && *NextChar() == '/') { |
| 446 // EatWhitespaceAndComments will inspect pos_, which will still be on |
| 447 // the last / of the comment, so advance once more (which may also be |
| 448 // end of input). |
| 449 NextChar(); |
| 450 return true; |
| 451 } |
| 452 } |
| 453 |
| 454 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT. |
| 455 } |
| 456 |
| 457 return false; |
| 458 } |
| 459 |
| 460 Value* JSONParser::ParseNextToken() { |
| 461 return ParseToken(GetNextToken()); |
| 462 } |
| 463 |
| 464 Value* JSONParser::ParseToken(Token token) { |
| 465 switch (token) { |
| 466 case T_OBJECT_BEGIN: |
| 467 return ConsumeDictionary(); |
| 468 case T_ARRAY_BEGIN: |
| 469 return ConsumeList(); |
| 470 case T_STRING: |
| 471 return ConsumeString(); |
| 472 case T_NUMBER: |
| 473 return ConsumeNumber(); |
| 474 case T_BOOL_TRUE: |
| 475 case T_BOOL_FALSE: |
| 476 case T_NULL: |
| 477 return ConsumeLiteral(); |
| 478 default: |
| 479 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
| 480 return NULL; |
| 481 } |
| 482 } |
| 483 |
| 484 Value* JSONParser::ConsumeDictionary() { |
| 485 if (*pos_ != '{') { |
| 486 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
| 487 return NULL; |
| 488 } |
| 489 |
| 490 StackMarker depth_check(&stack_depth_); |
| 491 if (depth_check.IsTooDeep()) { |
| 492 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); |
| 493 return NULL; |
| 494 } |
| 495 |
| 496 scoped_ptr<DictionaryValue> dict(new DictionaryValue); |
| 497 |
| 498 NextChar(); |
| 499 Token token = GetNextToken(); |
| 500 while (token != T_OBJECT_END) { |
| 501 if (token != T_STRING) { |
| 502 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1); |
| 503 return NULL; |
| 504 } |
| 505 |
| 506 // First consume the key. |
| 507 StringBuilder key; |
| 508 if (!ConsumeStringRaw(&key)) { |
| 509 return NULL; |
| 510 } |
| 511 |
| 512 // Read the separator. |
| 513 NextChar(); |
| 514 token = GetNextToken(); |
| 515 if (token != T_OBJECT_PAIR_SEPARATOR) { |
| 516 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| 517 return NULL; |
| 518 } |
| 519 |
| 520 // The token is the value. Ownership transfers to |dict|. |
| 521 NextChar(); |
| 522 Value* value = ParseNextToken(); |
| 523 if (!value) { |
| 524 return NULL; |
| 525 } |
| 526 |
| 527 dict->SetWithoutPathExpansion(key.AsString(), value); |
| 528 |
| 529 NextChar(); |
| 530 token = GetNextToken(); |
| 531 if (token == T_LIST_SEPARATOR) { |
| 532 NextChar(); |
| 533 token = GetNextToken(); |
| 534 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { |
| 535 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); |
| 536 return NULL; |
| 537 } |
| 538 } else if (token != T_OBJECT_END) { |
| 539 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); |
| 540 return NULL; |
| 541 } |
| 542 } |
| 543 |
| 544 if (token != T_OBJECT_END) |
| 545 return NULL; |
| 546 |
| 547 return dict.release(); |
| 548 } |
| 549 |
| 550 Value* JSONParser::ConsumeList() { |
| 551 if (*pos_ != '[') { |
| 552 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
| 553 return NULL; |
| 554 } |
| 555 |
| 556 StackMarker depth_check(&stack_depth_); |
| 557 if (depth_check.IsTooDeep()) { |
| 558 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); |
| 559 return NULL; |
| 560 } |
| 561 |
| 562 scoped_ptr<ListValue> list(new ListValue); |
| 563 |
| 564 NextChar(); |
| 565 Token token = GetNextToken(); |
| 566 while (token != T_ARRAY_END) { |
| 567 Value* item = ParseToken(token); |
| 568 if (!item) { |
| 569 // ReportError from deeper level. |
| 570 return NULL; |
| 571 } |
| 572 |
| 573 list->Append(item); |
| 574 |
| 575 NextChar(); |
| 576 token = GetNextToken(); |
| 577 if (token == T_LIST_SEPARATOR) { |
| 578 NextChar(); |
| 579 token = GetNextToken(); |
| 580 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { |
| 581 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); |
| 582 return NULL; |
| 583 } |
| 584 } else if (token != T_ARRAY_END) { |
| 585 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| 586 return NULL; |
| 587 } |
| 588 } |
| 589 |
| 590 if (token != T_ARRAY_END) |
| 591 return NULL; |
| 592 |
| 593 return list.release(); |
| 594 } |
| 595 |
| 596 Value* JSONParser::ConsumeString() { |
| 597 StringBuilder string; |
| 598 if (!ConsumeStringRaw(&string)) |
| 599 return NULL; |
| 600 |
| 601 // Create the Value representation, either using a hidden root, if configured |
| 602 // to do so, and the string can be represented by StringPiece. |
| 603 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) { |
| 604 return new JSONStringValue(string.AsStringPiece()); |
| 605 } else { |
| 606 if (string.CanBeStringPiece()) |
| 607 string.Convert(); |
| 608 return new StringValue(string.AsString()); |
| 609 } |
| 610 } |
| 611 |
| 612 bool JSONParser::ConsumeStringRaw(StringBuilder* out) { |
| 613 if (*pos_ != '"') { |
| 614 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
| 615 return false; |
| 616 } |
| 617 |
| 618 // StringBuilder will internally build a StringPiece unless a UTF-16 |
| 619 // conversion occurs, at which point it will perform a copy into a |
| 620 // std::string. |
| 621 StringBuilder string(NextChar()); |
| 622 |
| 623 int length = end_pos_ - start_pos_; |
| 624 int32 next_char = 0; |
| 625 |
| 626 while (CanConsume(1)) { |
| 627 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement. |
| 628 CBU8_NEXT(start_pos_, index_, length, next_char); |
| 629 if (next_char < 0 || !IsValidCharacter(next_char)) { |
| 630 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1); |
| 631 return false; |
| 632 } |
| 633 |
| 634 // If this character is an escape sequence... |
| 635 if (next_char == '\\') { |
| 636 // The input string will be adjusted (either by combining the two |
| 637 // characters of an encoded escape sequence, or with a UTF conversion), |
| 638 // so using StringPiece isn't possible -- force a conversion. |
| 639 string.Convert(); |
| 640 |
| 641 if (!CanConsume(1)) { |
| 642 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); |
| 643 return false; |
| 644 } |
| 645 |
| 646 switch (*NextChar()) { |
| 647 // Allowed esape sequences: |
| 648 case 'x': { // UTF-8 sequence. |
| 649 // UTF-8 \x escape sequences are not allowed in the spec, but they |
| 650 // are supported here for backwards-compatiblity with the old parser. |
| 651 if (!CanConsume(2)) { |
| 652 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1); |
| 653 return false; |
| 654 } |
| 655 |
| 656 int hex_digit = 0; |
| 657 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) { |
| 658 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); |
| 659 return false; |
| 660 } |
| 661 NextChar(); |
| 662 |
| 663 if (hex_digit < kExtendedASCIIStart) |
| 664 string.Append(hex_digit); |
| 665 else |
| 666 DecodeUTF8(hex_digit, &string); |
| 667 break; |
| 668 } |
| 669 case 'u': { // UTF-16 sequence. |
| 670 // UTF units are of the form \uXXXX. |
| 671 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits. |
| 672 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); |
| 673 return false; |
| 674 } |
| 675 |
| 676 // Skip the 'u'. |
| 677 NextChar(); |
| 678 |
| 679 std::string utf8_units; |
| 680 if (!DecodeUTF16(&utf8_units)) { |
| 681 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); |
| 682 return false; |
| 683 } |
| 684 |
| 685 string.AppendString(utf8_units); |
| 686 break; |
| 687 } |
| 688 case '"': |
| 689 string.Append('"'); |
| 690 break; |
| 691 case '\\': |
| 692 string.Append('\\'); |
| 693 break; |
| 694 case '/': |
| 695 string.Append('/'); |
| 696 break; |
| 697 case 'b': |
| 698 string.Append('\b'); |
| 699 break; |
| 700 case 'f': |
| 701 string.Append('\f'); |
| 702 break; |
| 703 case 'n': |
| 704 string.Append('\n'); |
| 705 break; |
| 706 case 'r': |
| 707 string.Append('\r'); |
| 708 break; |
| 709 case 't': |
| 710 string.Append('\t'); |
| 711 break; |
| 712 case 'v': // Not listed as valid escape sequence in the RFC. |
| 713 string.Append('\v'); |
| 714 break; |
| 715 // All other escape squences are illegal. |
| 716 default: |
| 717 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); |
| 718 return false; |
| 719 } |
| 720 } else if (next_char == '"') { |
| 721 --index_; // Rewind by one because of CBU8_NEXT. |
| 722 out->Swap(&string); |
| 723 return true; |
| 724 } else { |
| 725 if (next_char < kExtendedASCIIStart) |
| 726 string.Append(next_char); |
| 727 else |
| 728 DecodeUTF8(next_char, &string); |
| 729 } |
| 730 } |
| 731 |
| 732 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); |
| 733 return false; |
| 734 } |
| 735 |
| 736 // Entry is at the first X in \uXXXX. |
| 737 bool JSONParser::DecodeUTF16(std::string* dest_string) { |
| 738 if (!CanConsume(4)) |
| 739 return false; |
| 740 |
| 741 // This is a 32-bit field because the shift operations in the |
| 742 // conversion process below cause MSVC to error about "data loss." |
| 743 // This only stores UTF-16 code units, though. |
| 744 // Consume the UTF-16 code unit, which may be a high surrogate. |
| 745 int code_unit16_high = 0; |
| 746 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high)) |
| 747 return false; |
| 748 |
| 749 // Only add 3, not 4, because at the end of this iteration, the parser has |
| 750 // finished working with the last digit of the UTF sequence, meaning that |
| 751 // the next iteration will advance to the next byte. |
| 752 NextNChars(3); |
| 753 |
| 754 // Used to convert the UTF-16 code units to a code point and then to a UTF-8 |
| 755 // code unit sequence. |
| 756 char code_point[8] = { 0 }; |
| 757 size_t offset = 0; |
| 758 |
| 759 // If this is a high surrogate, consume the next code unit to get the |
| 760 // low surrogate. |
| 761 if (CBU16_IS_SURROGATE(code_unit16_high)) { |
| 762 // Make sure this is the high surrogate. If not, it's an encoding |
| 763 // error. |
| 764 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) |
| 765 return false; |
| 766 |
| 767 // Make sure that the token has more characters to consume the |
| 768 // lower surrogate. |
| 769 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits. |
| 770 return false; |
| 771 if (*NextChar() != '\\' || *NextChar() != 'u') |
| 772 return false; |
| 773 |
| 774 NextChar(); // Read past 'u'. |
| 775 int code_unit16_low = 0; |
| 776 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low)) |
| 777 return false; |
| 778 |
| 779 NextNChars(3); |
| 780 |
| 781 if (!CBU16_IS_TRAIL(code_unit16_low)) { |
| 782 return false; |
| 783 } |
| 784 |
| 785 uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high, |
| 786 code_unit16_low); |
| 787 offset = 0; |
| 788 CBU8_APPEND_UNSAFE(code_point, offset, code_unit32); |
| 789 } else { |
| 790 // Not a surrogate. |
| 791 DCHECK(CBU16_IS_SINGLE(code_unit16_high)); |
| 792 CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high); |
| 793 } |
| 794 |
| 795 dest_string->append(code_point); |
| 796 return true; |
| 797 } |
| 798 |
| 799 void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) { |
| 800 // Anything outside of the basic ASCII plane will need to be decomposed from |
| 801 // int32 to a multi-byte sequence. |
| 802 if (point < kExtendedASCIIStart) { |
| 803 dest->Append(point); |
| 804 } else { |
| 805 char utf8_units[4] = { 0 }; |
| 806 int offset = 0; |
| 807 CBU8_APPEND_UNSAFE(utf8_units, offset, point); |
| 808 dest->Convert(); |
| 809 dest->AppendString(utf8_units); |
| 810 } |
| 811 } |
| 812 |
| 813 Value* JSONParser::ConsumeNumber() { |
| 814 const char* num_start = pos_; |
| 815 const int start_index = index_; |
| 816 int end_index = start_index; |
| 817 |
| 818 if (*pos_ == '-') |
| 819 NextChar(); |
| 820 |
| 821 if (!ReadInt(false)) { |
| 822 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| 823 return NULL; |
| 824 } |
| 825 end_index = index_; |
| 826 |
| 827 // The optional fraction part. |
| 828 if (*pos_ == '.') { |
| 829 if (!CanConsume(1)) { |
| 830 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| 831 return NULL; |
| 832 } |
| 833 NextChar(); |
| 834 if (!ReadInt(true)) { |
| 835 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| 836 return NULL; |
| 837 } |
| 838 end_index = index_; |
| 839 } |
| 840 |
| 841 // Optional exponent part. |
| 842 if (*pos_ == 'e' || *pos_ == 'E') { |
| 843 NextChar(); |
| 844 if (*pos_ == '-' || *pos_ == '+') |
| 845 NextChar(); |
| 846 if (!ReadInt(true)) { |
| 847 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| 848 return NULL; |
| 849 } |
| 850 end_index = index_; |
| 851 } |
| 852 |
| 853 // ReadInt is greedy because numbers have no easily detectable sentinel, |
| 854 // so save off where the parser should be on exit (see Consume invariant at |
| 855 // the top of the header), then make sure the next token is one which is |
| 856 // valid. |
| 857 const char* exit_pos = pos_ - 1; |
| 858 int exit_index = index_ - 1; |
| 859 |
| 860 switch (GetNextToken()) { |
| 861 case T_OBJECT_END: |
| 862 case T_ARRAY_END: |
| 863 case T_LIST_SEPARATOR: |
| 864 case T_END_OF_INPUT: |
| 865 break; |
| 866 default: |
| 867 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| 868 return NULL; |
| 869 } |
| 870 |
| 871 pos_ = exit_pos; |
| 872 index_ = exit_index; |
| 873 |
| 874 StringPiece num_string(num_start, end_index - start_index); |
| 875 |
| 876 int num_int; |
| 877 if (StringToInt(num_string, &num_int)) |
| 878 return Value::CreateIntegerValue(num_int); |
| 879 |
| 880 double num_double; |
| 881 if (base::StringToDouble(num_string.as_string(), &num_double) && |
| 882 IsFinite(num_double)) { |
| 883 return Value::CreateDoubleValue(num_double); |
| 884 } |
| 885 |
| 886 return NULL; |
| 887 } |
| 888 |
| 889 bool JSONParser::ReadInt(bool allow_leading_zeros) { |
| 890 char first = *pos_; |
| 891 int len = 0; |
| 892 |
| 893 char c = first; |
| 894 while (CanConsume(1) && IsAsciiDigit(c)) { |
| 895 c = *NextChar(); |
| 896 ++len; |
| 897 } |
| 898 |
| 899 if (len == 0) |
| 900 return false; |
| 901 |
| 902 if (!allow_leading_zeros && len > 1 && first == '0') |
| 903 return false; |
| 904 |
| 905 return true; |
| 906 } |
| 907 |
| 908 Value* JSONParser::ConsumeLiteral() { |
| 909 switch (*pos_) { |
| 910 case 't': { |
| 911 const char* kTrueLiteral = "true"; |
| 912 const int kTrueLen = static_cast<int>(strlen(kTrueLiteral)); |
| 913 if (!CanConsume(kTrueLen - 1) || |
| 914 !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) { |
| 915 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| 916 return NULL; |
| 917 } |
| 918 NextNChars(kTrueLen - 1); |
| 919 return Value::CreateBooleanValue(true); |
| 920 } |
| 921 case 'f': { |
| 922 const char* kFalseLiteral = "false"; |
| 923 const int kFalseLen = static_cast<int>(strlen(kFalseLiteral)); |
| 924 if (!CanConsume(kFalseLen - 1) || |
| 925 !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) { |
| 926 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| 927 return NULL; |
| 928 } |
| 929 NextNChars(kFalseLen - 1); |
| 930 return Value::CreateBooleanValue(false); |
| 931 } |
| 932 case 'n': { |
| 933 const char* kNullLiteral = "null"; |
| 934 const int kNullLen = static_cast<int>(strlen(kNullLiteral)); |
| 935 if (!CanConsume(kNullLen - 1) || |
| 936 !StringsAreEqual(pos_, kNullLiteral, kNullLen)) { |
| 937 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| 938 return NULL; |
| 939 } |
| 940 NextNChars(kNullLen - 1); |
| 941 return Value::CreateNullValue(); |
| 942 } |
| 943 default: |
| 944 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
| 945 return NULL; |
| 946 } |
| 947 } |
| 948 |
| 949 // static |
| 950 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) { |
| 951 return strncmp(one, two, len) == 0; |
| 952 } |
| 953 |
| 954 void JSONParser::ReportError(JSONReader::JsonParseError code, |
| 955 int column_adjust) { |
| 956 error_code_ = code; |
| 957 error_line_ = line_number_; |
| 958 error_column_ = index_ - index_last_line_ + column_adjust; |
| 959 } |
| 960 |
| 961 // static |
| 962 std::string JSONParser::FormatErrorMessage(int line, int column, |
| 963 const std::string& description) { |
| 964 if (line || column) { |
| 965 return StringPrintf("Line: %i, column: %i, %s", |
| 966 line, column, description.c_str()); |
| 967 } |
| 968 return description; |
| 969 } |
| 970 |
| 971 } // namespace internal |
| 972 } // namespace base |
OLD | NEW |