OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // A JSON parser. Converts strings of JSON into a Value object (see | 5 // A JSON parser. Converts strings of JSON into a Value object (see |
6 // base/values.h). | 6 // base/values.h). |
7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627 | 7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627 |
8 // | 8 // |
9 // Known limitations/deviations from the RFC: | 9 // Known limitations/deviations from the RFC: |
10 // - Only knows how to parse ints within the range of a signed 32 bit int and | 10 // - Only knows how to parse ints within the range of a signed 32 bit int and |
(...skipping 15 matching lines...) Expand all Loading... |
26 // TODO(tc): Add an option to disable comment stripping | 26 // TODO(tc): Add an option to disable comment stripping |
27 | 27 |
28 #ifndef BASE_JSON_JSON_READER_H_ | 28 #ifndef BASE_JSON_JSON_READER_H_ |
29 #define BASE_JSON_JSON_READER_H_ | 29 #define BASE_JSON_JSON_READER_H_ |
30 #pragma once | 30 #pragma once |
31 | 31 |
32 #include <string> | 32 #include <string> |
33 | 33 |
34 #include "base/base_export.h" | 34 #include "base/base_export.h" |
35 #include "base/basictypes.h" | 35 #include "base/basictypes.h" |
36 | 36 #include "base/memory/scoped_ptr.h" |
37 // Chromium and Chromium OS check out gtest to different places, so we're | |
38 // unable to compile on both if we include gtest_prod.h here. Instead, include | |
39 // its only contents -- this will need to be updated if the macro ever changes. | |
40 #define FRIEND_TEST(test_case_name, test_name)\ | |
41 friend class test_case_name##_##test_name##_Test | |
42 | |
43 #define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \ | |
44 FRIEND_TEST(test_case_name, test_name); \ | |
45 FRIEND_TEST(test_case_name, DISABLED_##test_name); \ | |
46 FRIEND_TEST(test_case_name, FLAKY_##test_name); \ | |
47 FRIEND_TEST(test_case_name, FAILS_##test_name) | |
48 | 37 |
49 namespace base { | 38 namespace base { |
| 39 class Value; |
50 | 40 |
51 class Value; | 41 namespace internal { |
| 42 class JSONParser; |
| 43 } |
| 44 } |
| 45 |
| 46 namespace base { |
52 | 47 |
53 enum JSONParserOptions { | 48 enum JSONParserOptions { |
54 // Parses the input strictly according to RFC 4627, except for where noted | 49 // Parses the input strictly according to RFC 4627, except for where noted |
55 // above. | 50 // above. |
56 JSON_PARSE_RFC = 0, | 51 JSON_PARSE_RFC = 0, |
57 | 52 |
58 // Allows commas to exist after the last element in structures. | 53 // Allows commas to exist after the last element in structures. |
59 JSON_ALLOW_TRAILING_COMMAS = 1 << 0, | 54 JSON_ALLOW_TRAILING_COMMAS = 1 << 0, |
| 55 |
| 56 // The parser can perform optimizations by placing hidden data in the root of |
| 57 // the JSON object, which speeds up certain operations on children. However, |
| 58 // if the child is Remove()d from root, it would result in use-after-free |
| 59 // unless it is DeepCopy()ed or this option is used. |
| 60 JSON_DETACHABLE_CHILDREN = 1 << 1, |
60 }; | 61 }; |
61 | 62 |
62 class BASE_EXPORT JSONReader { | 63 class BASE_EXPORT JSONReader { |
63 public: | 64 public: |
64 // A struct to hold a JS token. | |
65 class Token { | |
66 public: | |
67 enum Type { | |
68 OBJECT_BEGIN, // { | |
69 OBJECT_END, // } | |
70 ARRAY_BEGIN, // [ | |
71 ARRAY_END, // ] | |
72 STRING, | |
73 NUMBER, | |
74 BOOL_TRUE, // true | |
75 BOOL_FALSE, // false | |
76 NULL_TOKEN, // null | |
77 LIST_SEPARATOR, // , | |
78 OBJECT_PAIR_SEPARATOR, // : | |
79 END_OF_INPUT, | |
80 INVALID_TOKEN, | |
81 }; | |
82 | |
83 Token(Type t, const char* b, int len) | |
84 : type(t), begin(b), length(len) {} | |
85 | |
86 // Get the character that's one past the end of this token. | |
87 char NextChar() { | |
88 return *(begin + length); | |
89 } | |
90 | |
91 static Token CreateInvalidToken() { | |
92 return Token(INVALID_TOKEN, 0, 0); | |
93 } | |
94 | |
95 Type type; | |
96 | |
97 // A pointer into JSONReader::json_pos_ that's the beginning of this token. | |
98 const char* begin; | |
99 | |
100 // End should be one char past the end of the token. | |
101 int length; | |
102 }; | |
103 | |
104 // Error codes during parsing. | 65 // Error codes during parsing. |
105 enum JsonParseError { | 66 enum JsonParseError { |
106 JSON_NO_ERROR = 0, | 67 JSON_NO_ERROR = 0, |
107 JSON_BAD_ROOT_ELEMENT_TYPE, | |
108 JSON_INVALID_ESCAPE, | 68 JSON_INVALID_ESCAPE, |
109 JSON_SYNTAX_ERROR, | 69 JSON_SYNTAX_ERROR, |
| 70 JSON_UNEXPECTED_TOKEN, |
110 JSON_TRAILING_COMMA, | 71 JSON_TRAILING_COMMA, |
111 JSON_TOO_MUCH_NESTING, | 72 JSON_TOO_MUCH_NESTING, |
112 JSON_UNEXPECTED_DATA_AFTER_ROOT, | 73 JSON_UNEXPECTED_DATA_AFTER_ROOT, |
113 JSON_UNSUPPORTED_ENCODING, | 74 JSON_UNSUPPORTED_ENCODING, |
114 JSON_UNQUOTED_DICTIONARY_KEY, | 75 JSON_UNQUOTED_DICTIONARY_KEY, |
115 }; | 76 }; |
116 | 77 |
117 // String versions of parse error codes. | 78 // String versions of parse error codes. |
118 static const char* kBadRootElementType; | |
119 static const char* kInvalidEscape; | 79 static const char* kInvalidEscape; |
120 static const char* kSyntaxError; | 80 static const char* kSyntaxError; |
| 81 static const char* kUnexpectedToken; |
121 static const char* kTrailingComma; | 82 static const char* kTrailingComma; |
122 static const char* kTooMuchNesting; | 83 static const char* kTooMuchNesting; |
123 static const char* kUnexpectedDataAfterRoot; | 84 static const char* kUnexpectedDataAfterRoot; |
124 static const char* kUnsupportedEncoding; | 85 static const char* kUnsupportedEncoding; |
125 static const char* kUnquotedDictionaryKey; | 86 static const char* kUnquotedDictionaryKey; |
126 | 87 |
| 88 // Constructs a reader with the default options, JSON_PARSE_RFC. |
127 JSONReader(); | 89 JSONReader(); |
128 | 90 |
| 91 // Constructs a reader with custom options. |
| 92 explicit JSONReader(int options); |
| 93 |
| 94 ~JSONReader(); |
| 95 |
129 // Reads and parses |json|, returning a Value. The caller owns the returned | 96 // Reads and parses |json|, returning a Value. The caller owns the returned |
130 // instance. If |json| is not a properly formed JSON string, returns NULL. | 97 // instance. If |json| is not a properly formed JSON string, returns NULL. |
131 static Value* Read(const std::string& json); | 98 static Value* Read(const std::string& json); |
132 | 99 |
133 // Reads and parses |json|, returning a Value owned by the caller. The | 100 // Reads and parses |json|, returning a Value owned by the caller. The |
134 // parser respects the given |options|. If the input is not properly formed, | 101 // parser respects the given |options|. If the input is not properly formed, |
135 // returns NULL. | 102 // returns NULL. |
136 static Value* Read(const std::string& json, int options); | 103 static Value* Read(const std::string& json, int options); |
137 | 104 |
138 // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out| | 105 // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out| |
139 // are optional. If specified and NULL is returned, they will be populated | 106 // are optional. If specified and NULL is returned, they will be populated |
140 // an error code and a formatted error message (including error location if | 107 // an error code and a formatted error message (including error location if |
141 // appropriate). Otherwise, they will be unmodified. | 108 // appropriate). Otherwise, they will be unmodified. |
142 static Value* ReadAndReturnError(const std::string& json, | 109 static Value* ReadAndReturnError(const std::string& json, |
143 int options, // JSONParserOptions | 110 int options, // JSONParserOptions |
144 int* error_code_out, | 111 int* error_code_out, |
145 std::string* error_msg_out); | 112 std::string* error_msg_out); |
146 | 113 |
147 // Converts a JSON parse error code into a human readable message. | 114 // Converts a JSON parse error code into a human readable message. |
148 // Returns an empty string if error_code is JSON_NO_ERROR. | 115 // Returns an empty string if error_code is JSON_NO_ERROR. |
149 static std::string ErrorCodeToString(JsonParseError error_code); | 116 static std::string ErrorCodeToString(JsonParseError error_code); |
150 | 117 |
151 // Returns the error code if the last call to JsonToValue() failed. | 118 // Parses an input string into a Value that is owned by the caller. |
| 119 Value* ReadToValue(const std::string& json); |
| 120 |
| 121 // Returns the error code if the last call to ReadToValue() failed. |
152 // Returns JSON_NO_ERROR otherwise. | 122 // Returns JSON_NO_ERROR otherwise. |
153 JsonParseError error_code() const { return error_code_; } | 123 JsonParseError error_code() const; |
154 | 124 |
155 // Converts error_code_ to a human-readable string, including line and column | 125 // Converts error_code_ to a human-readable string, including line and column |
156 // numbers if appropriate. | 126 // numbers if appropriate. |
157 std::string GetErrorMessage() const; | 127 std::string GetErrorMessage() const; |
158 | 128 |
159 // Reads and parses |json|, returning a Value. The caller owns the returned | |
160 // instance. If |json| is not a properly formed JSON string, returns NULL and | |
161 // a detailed error can be retrieved from |error_message()|. | |
162 // If |check_root| is true, we require that the root object be an object or | |
163 // array. Otherwise, it can be any valid JSON type. | |
164 // If |allow_trailing_comma| is true, we will ignore trailing commas in | |
165 // objects and arrays even though this goes against the RFC. | |
166 Value* JsonToValue(const std::string& json, bool check_root, | |
167 bool allow_trailing_comma); | |
168 | |
169 private: | 129 private: |
170 FRIEND_TEST_ALL_PREFIXES(JSONReaderTest, Reading); | 130 scoped_ptr<internal::JSONParser> parser_; |
171 FRIEND_TEST_ALL_PREFIXES(JSONReaderTest, ErrorMessages); | |
172 | |
173 static std::string FormatErrorMessage(int line, int column, | |
174 const std::string& description); | |
175 | |
176 // Recursively build Value. Returns NULL if we don't have a valid JSON | |
177 // string. If |is_root| is true, we verify that the root element is either | |
178 // an object or an array. | |
179 Value* BuildValue(bool is_root); | |
180 | |
181 // Parses a sequence of characters into a Token::NUMBER. If the sequence of | |
182 // characters is not a valid number, returns a Token::INVALID_TOKEN. Note | |
183 // that DecodeNumber is used to actually convert from a string to an | |
184 // int/double. | |
185 Token ParseNumberToken(); | |
186 | |
187 // Try and convert the substring that token holds into an int or a double. If | |
188 // we can (ie., no overflow), return the value, else return NULL. | |
189 Value* DecodeNumber(const Token& token); | |
190 | |
191 // Parses a sequence of characters into a Token::STRING. If the sequence of | |
192 // characters is not a valid string, returns a Token::INVALID_TOKEN. Note | |
193 // that DecodeString is used to actually decode the escaped string into an | |
194 // actual wstring. | |
195 Token ParseStringToken(); | |
196 | |
197 // Convert the substring into a value string. This should always succeed | |
198 // (otherwise ParseStringToken would have failed). | |
199 Value* DecodeString(const Token& token); | |
200 | |
201 // Helper function for DecodeString that consumes UTF16 [0,2] code units and | |
202 // convers them to UTF8 code untis. |token| is the string token in which the | |
203 // units should be read, |i| is the position in the token at which the first | |
204 // code unit starts, immediately after the |\u|. This will be mutated if code | |
205 // units are consumed. |dest_string| is a string to which the UTF8 code unit | |
206 // should be appended. Returns true on success and false if there's an | |
207 // encoding error. | |
208 bool ConvertUTF16Units(const Token& token, | |
209 int* i, | |
210 std::string* dest_string); | |
211 | |
212 // Grabs the next token in the JSON stream. This does not increment the | |
213 // stream so it can be used to look ahead at the next token. | |
214 Token ParseToken(); | |
215 | |
216 // Increments |json_pos_| past leading whitespace and comments. | |
217 void EatWhitespaceAndComments(); | |
218 | |
219 // If |json_pos_| is at the start of a comment, eat it, otherwise, returns | |
220 // false. | |
221 bool EatComment(); | |
222 | |
223 // Checks if |json_pos_| matches str. | |
224 bool NextStringMatch(const char* str, size_t length); | |
225 | |
226 // Sets the error code that will be returned to the caller. The current | |
227 // line and column are determined and added into the final message. | |
228 void SetErrorCode(const JsonParseError error, const char* error_pos); | |
229 | |
230 // Pointer to the starting position in the input string. | |
231 const char* start_pos_; | |
232 | |
233 // Pointer to the current position in the input string. | |
234 const char* json_pos_; | |
235 | |
236 // Pointer to the last position in the input string. | |
237 const char* end_pos_; | |
238 | |
239 // Used to keep track of how many nested lists/dicts there are. | |
240 int stack_depth_; | |
241 | |
242 // A parser flag that allows trailing commas in objects and arrays. | |
243 bool allow_trailing_comma_; | |
244 | |
245 // Contains the error code for the last call to JsonToValue(), if any. | |
246 JsonParseError error_code_; | |
247 int error_line_; | |
248 int error_col_; | |
249 | |
250 DISALLOW_COPY_AND_ASSIGN(JSONReader); | |
251 }; | 131 }; |
252 | 132 |
253 } // namespace base | 133 } // namespace base |
254 | 134 |
255 #endif // BASE_JSON_JSON_READER_H_ | 135 #endif // BASE_JSON_JSON_READER_H_ |
OLD | NEW |