| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // A JSON parser. Converts strings of JSON into a Value object (see | 5 // A JSON parser. Converts strings of JSON into a Value object (see |
| 6 // base/values.h). | 6 // base/values.h). |
| 7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627 | 7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627 |
| 8 // | 8 // |
| 9 // Known limitations/deviations from the RFC: | 9 // Known limitations/deviations from the RFC: |
| 10 // - Only knows how to parse ints within the range of a signed 32 bit int and | 10 // - Only knows how to parse ints within the range of a signed 32 bit int and |
| (...skipping 15 matching lines...) Expand all Loading... |
| 26 // TODO(tc): Add an option to disable comment stripping | 26 // TODO(tc): Add an option to disable comment stripping |
| 27 | 27 |
| 28 #ifndef BASE_JSON_JSON_READER_H_ | 28 #ifndef BASE_JSON_JSON_READER_H_ |
| 29 #define BASE_JSON_JSON_READER_H_ | 29 #define BASE_JSON_JSON_READER_H_ |
| 30 #pragma once | 30 #pragma once |
| 31 | 31 |
| 32 #include <string> | 32 #include <string> |
| 33 | 33 |
| 34 #include "base/base_export.h" | 34 #include "base/base_export.h" |
| 35 #include "base/basictypes.h" | 35 #include "base/basictypes.h" |
| 36 | 36 #include "base/memory/scoped_ptr.h" |
| 37 // Chromium and Chromium OS check out gtest to different places, so we're | |
| 38 // unable to compile on both if we include gtest_prod.h here. Instead, include | |
| 39 // its only contents -- this will need to be updated if the macro ever changes. | |
| 40 #define FRIEND_TEST(test_case_name, test_name)\ | |
| 41 friend class test_case_name##_##test_name##_Test | |
| 42 | |
| 43 #define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \ | |
| 44 FRIEND_TEST(test_case_name, test_name); \ | |
| 45 FRIEND_TEST(test_case_name, DISABLED_##test_name); \ | |
| 46 FRIEND_TEST(test_case_name, FLAKY_##test_name); \ | |
| 47 FRIEND_TEST(test_case_name, FAILS_##test_name) | |
| 48 | 37 |
| 49 namespace base { | 38 namespace base { |
| 39 class Value; |
| 50 | 40 |
| 51 class Value; | 41 namespace internal { |
| 42 class JSONParser; |
| 43 } |
| 44 } |
| 45 |
| 46 namespace base { |
| 52 | 47 |
| 53 enum JSONParserOptions { | 48 enum JSONParserOptions { |
| 54 // Parses the input strictly according to RFC 4627, except for where noted | 49 // Parses the input strictly according to RFC 4627, except for where noted |
| 55 // above. | 50 // above. |
| 56 JSON_PARSE_RFC = 0, | 51 JSON_PARSE_RFC = 0, |
| 57 | 52 |
| 58 // Allows commas to exist after the last element in structures. | 53 // Allows commas to exist after the last element in structures. |
| 59 JSON_ALLOW_TRAILING_COMMAS = 1 << 0, | 54 JSON_ALLOW_TRAILING_COMMAS = 1 << 0, |
| 55 |
| 56 // The parser can perform optimizations by placing hidden data in the root of |
| 57 // the JSON object, which speeds up certain operations on children. However, |
| 58 // if the child is Remove()d from root, it would result in use-after-free |
| 59 // unless it is DeepCopy()ed or this option is used. |
| 60 JSON_DETACHABLE_CHILDREN = 1 << 1, |
| 60 }; | 61 }; |
| 61 | 62 |
| 62 class BASE_EXPORT JSONReader { | 63 class BASE_EXPORT JSONReader { |
| 63 public: | 64 public: |
| 64 // A struct to hold a JS token. | |
| 65 class Token { | |
| 66 public: | |
| 67 enum Type { | |
| 68 OBJECT_BEGIN, // { | |
| 69 OBJECT_END, // } | |
| 70 ARRAY_BEGIN, // [ | |
| 71 ARRAY_END, // ] | |
| 72 STRING, | |
| 73 NUMBER, | |
| 74 BOOL_TRUE, // true | |
| 75 BOOL_FALSE, // false | |
| 76 NULL_TOKEN, // null | |
| 77 LIST_SEPARATOR, // , | |
| 78 OBJECT_PAIR_SEPARATOR, // : | |
| 79 END_OF_INPUT, | |
| 80 INVALID_TOKEN, | |
| 81 }; | |
| 82 | |
| 83 Token(Type t, const char* b, int len) | |
| 84 : type(t), begin(b), length(len) {} | |
| 85 | |
| 86 // Get the character that's one past the end of this token. | |
| 87 char NextChar() { | |
| 88 return *(begin + length); | |
| 89 } | |
| 90 | |
| 91 static Token CreateInvalidToken() { | |
| 92 return Token(INVALID_TOKEN, 0, 0); | |
| 93 } | |
| 94 | |
| 95 Type type; | |
| 96 | |
| 97 // A pointer into JSONReader::json_pos_ that's the beginning of this token. | |
| 98 const char* begin; | |
| 99 | |
| 100 // End should be one char past the end of the token. | |
| 101 int length; | |
| 102 }; | |
| 103 | |
| 104 // Error codes during parsing. | 65 // Error codes during parsing. |
| 105 enum JsonParseError { | 66 enum JsonParseError { |
| 106 JSON_NO_ERROR = 0, | 67 JSON_NO_ERROR = 0, |
| 107 JSON_BAD_ROOT_ELEMENT_TYPE, | |
| 108 JSON_INVALID_ESCAPE, | 68 JSON_INVALID_ESCAPE, |
| 109 JSON_SYNTAX_ERROR, | 69 JSON_SYNTAX_ERROR, |
| 70 JSON_UNEXPECTED_TOKEN, |
| 110 JSON_TRAILING_COMMA, | 71 JSON_TRAILING_COMMA, |
| 111 JSON_TOO_MUCH_NESTING, | 72 JSON_TOO_MUCH_NESTING, |
| 112 JSON_UNEXPECTED_DATA_AFTER_ROOT, | 73 JSON_UNEXPECTED_DATA_AFTER_ROOT, |
| 113 JSON_UNSUPPORTED_ENCODING, | 74 JSON_UNSUPPORTED_ENCODING, |
| 114 JSON_UNQUOTED_DICTIONARY_KEY, | 75 JSON_UNQUOTED_DICTIONARY_KEY, |
| 115 }; | 76 }; |
| 116 | 77 |
| 117 // String versions of parse error codes. | 78 // String versions of parse error codes. |
| 118 static const char* kBadRootElementType; | |
| 119 static const char* kInvalidEscape; | 79 static const char* kInvalidEscape; |
| 120 static const char* kSyntaxError; | 80 static const char* kSyntaxError; |
| 81 static const char* kUnexpectedToken; |
| 121 static const char* kTrailingComma; | 82 static const char* kTrailingComma; |
| 122 static const char* kTooMuchNesting; | 83 static const char* kTooMuchNesting; |
| 123 static const char* kUnexpectedDataAfterRoot; | 84 static const char* kUnexpectedDataAfterRoot; |
| 124 static const char* kUnsupportedEncoding; | 85 static const char* kUnsupportedEncoding; |
| 125 static const char* kUnquotedDictionaryKey; | 86 static const char* kUnquotedDictionaryKey; |
| 126 | 87 |
| 88 // Constructs a reader with the default options, JSON_PARSE_RFC. |
| 127 JSONReader(); | 89 JSONReader(); |
| 128 | 90 |
| 91 // Constructs a reader with custom options. |
| 92 explicit JSONReader(int options); |
| 93 |
| 94 ~JSONReader(); |
| 95 |
| 129 // Reads and parses |json|, returning a Value. The caller owns the returned | 96 // Reads and parses |json|, returning a Value. The caller owns the returned |
| 130 // instance. If |json| is not a properly formed JSON string, returns NULL. | 97 // instance. If |json| is not a properly formed JSON string, returns NULL. |
| 131 static Value* Read(const std::string& json); | 98 static Value* Read(const std::string& json); |
| 132 | 99 |
| 133 // Reads and parses |json|, returning a Value owned by the caller. The | 100 // Reads and parses |json|, returning a Value owned by the caller. The |
| 134 // parser respects the given |options|. If the input is not properly formed, | 101 // parser respects the given |options|. If the input is not properly formed, |
| 135 // returns NULL. | 102 // returns NULL. |
| 136 static Value* Read(const std::string& json, int options); | 103 static Value* Read(const std::string& json, int options); |
| 137 | 104 |
| 138 // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out| | 105 // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out| |
| 139 // are optional. If specified and NULL is returned, they will be populated | 106 // are optional. If specified and NULL is returned, they will be populated |
| 140 // an error code and a formatted error message (including error location if | 107 // an error code and a formatted error message (including error location if |
| 141 // appropriate). Otherwise, they will be unmodified. | 108 // appropriate). Otherwise, they will be unmodified. |
| 142 static Value* ReadAndReturnError(const std::string& json, | 109 static Value* ReadAndReturnError(const std::string& json, |
| 143 int options, // JSONParserOptions | 110 int options, // JSONParserOptions |
| 144 int* error_code_out, | 111 int* error_code_out, |
| 145 std::string* error_msg_out); | 112 std::string* error_msg_out); |
| 146 | 113 |
| 147 // Converts a JSON parse error code into a human readable message. | 114 // Converts a JSON parse error code into a human readable message. |
| 148 // Returns an empty string if error_code is JSON_NO_ERROR. | 115 // Returns an empty string if error_code is JSON_NO_ERROR. |
| 149 static std::string ErrorCodeToString(JsonParseError error_code); | 116 static std::string ErrorCodeToString(JsonParseError error_code); |
| 150 | 117 |
| 151 // Returns the error code if the last call to JsonToValue() failed. | 118 // Parses an input string into a Value that is owned by the caller. |
| 119 Value* ReadToValue(const std::string& json); |
| 120 |
| 121 // Returns the error code if the last call to ReadToValue() failed. |
| 152 // Returns JSON_NO_ERROR otherwise. | 122 // Returns JSON_NO_ERROR otherwise. |
| 153 JsonParseError error_code() const { return error_code_; } | 123 JsonParseError error_code() const; |
| 154 | 124 |
| 155 // Converts error_code_ to a human-readable string, including line and column | 125 // Converts error_code_ to a human-readable string, including line and column |
| 156 // numbers if appropriate. | 126 // numbers if appropriate. |
| 157 std::string GetErrorMessage() const; | 127 std::string GetErrorMessage() const; |
| 158 | 128 |
| 159 // Reads and parses |json|, returning a Value. The caller owns the returned | |
| 160 // instance. If |json| is not a properly formed JSON string, returns NULL and | |
| 161 // a detailed error can be retrieved from |error_message()|. | |
| 162 // If |check_root| is true, we require that the root object be an object or | |
| 163 // array. Otherwise, it can be any valid JSON type. | |
| 164 // If |allow_trailing_comma| is true, we will ignore trailing commas in | |
| 165 // objects and arrays even though this goes against the RFC. | |
| 166 Value* JsonToValue(const std::string& json, bool check_root, | |
| 167 bool allow_trailing_comma); | |
| 168 | |
| 169 private: | 129 private: |
| 170 FRIEND_TEST_ALL_PREFIXES(JSONReaderTest, Reading); | 130 scoped_ptr<internal::JSONParser> parser_; |
| 171 FRIEND_TEST_ALL_PREFIXES(JSONReaderTest, ErrorMessages); | |
| 172 | |
| 173 static std::string FormatErrorMessage(int line, int column, | |
| 174 const std::string& description); | |
| 175 | |
| 176 // Recursively build Value. Returns NULL if we don't have a valid JSON | |
| 177 // string. If |is_root| is true, we verify that the root element is either | |
| 178 // an object or an array. | |
| 179 Value* BuildValue(bool is_root); | |
| 180 | |
| 181 // Parses a sequence of characters into a Token::NUMBER. If the sequence of | |
| 182 // characters is not a valid number, returns a Token::INVALID_TOKEN. Note | |
| 183 // that DecodeNumber is used to actually convert from a string to an | |
| 184 // int/double. | |
| 185 Token ParseNumberToken(); | |
| 186 | |
| 187 // Try and convert the substring that token holds into an int or a double. If | |
| 188 // we can (ie., no overflow), return the value, else return NULL. | |
| 189 Value* DecodeNumber(const Token& token); | |
| 190 | |
| 191 // Parses a sequence of characters into a Token::STRING. If the sequence of | |
| 192 // characters is not a valid string, returns a Token::INVALID_TOKEN. Note | |
| 193 // that DecodeString is used to actually decode the escaped string into an | |
| 194 // actual wstring. | |
| 195 Token ParseStringToken(); | |
| 196 | |
| 197 // Convert the substring into a value string. This should always succeed | |
| 198 // (otherwise ParseStringToken would have failed). | |
| 199 Value* DecodeString(const Token& token); | |
| 200 | |
| 201 // Helper function for DecodeString that consumes UTF16 [0,2] code units and | |
| 202 // convers them to UTF8 code untis. |token| is the string token in which the | |
| 203 // units should be read, |i| is the position in the token at which the first | |
| 204 // code unit starts, immediately after the |\u|. This will be mutated if code | |
| 205 // units are consumed. |dest_string| is a string to which the UTF8 code unit | |
| 206 // should be appended. Returns true on success and false if there's an | |
| 207 // encoding error. | |
| 208 bool ConvertUTF16Units(const Token& token, | |
| 209 int* i, | |
| 210 std::string* dest_string); | |
| 211 | |
| 212 // Grabs the next token in the JSON stream. This does not increment the | |
| 213 // stream so it can be used to look ahead at the next token. | |
| 214 Token ParseToken(); | |
| 215 | |
| 216 // Increments |json_pos_| past leading whitespace and comments. | |
| 217 void EatWhitespaceAndComments(); | |
| 218 | |
| 219 // If |json_pos_| is at the start of a comment, eat it, otherwise, returns | |
| 220 // false. | |
| 221 bool EatComment(); | |
| 222 | |
| 223 // Checks if |json_pos_| matches str. | |
| 224 bool NextStringMatch(const char* str, size_t length); | |
| 225 | |
| 226 // Sets the error code that will be returned to the caller. The current | |
| 227 // line and column are determined and added into the final message. | |
| 228 void SetErrorCode(const JsonParseError error, const char* error_pos); | |
| 229 | |
| 230 // Pointer to the starting position in the input string. | |
| 231 const char* start_pos_; | |
| 232 | |
| 233 // Pointer to the current position in the input string. | |
| 234 const char* json_pos_; | |
| 235 | |
| 236 // Pointer to the last position in the input string. | |
| 237 const char* end_pos_; | |
| 238 | |
| 239 // Used to keep track of how many nested lists/dicts there are. | |
| 240 int stack_depth_; | |
| 241 | |
| 242 // A parser flag that allows trailing commas in objects and arrays. | |
| 243 bool allow_trailing_comma_; | |
| 244 | |
| 245 // Contains the error code for the last call to JsonToValue(), if any. | |
| 246 JsonParseError error_code_; | |
| 247 int error_line_; | |
| 248 int error_col_; | |
| 249 | |
| 250 DISALLOW_COPY_AND_ASSIGN(JSONReader); | |
| 251 }; | 131 }; |
| 252 | 132 |
| 253 } // namespace base | 133 } // namespace base |
| 254 | 134 |
| 255 #endif // BASE_JSON_JSON_READER_H_ | 135 #endif // BASE_JSON_JSON_READER_H_ |
| OLD | NEW |