Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(751)

Side by Side Diff: base/json/json_reader.h

Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Really fix Windows, address comments Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « base/json/json_parser_unittest.cc ('k') | base/json/json_reader.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 // 4 //
5 // A JSON parser. Converts strings of JSON into a Value object (see 5 // A JSON parser. Converts strings of JSON into a Value object (see
6 // base/values.h). 6 // base/values.h).
7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627 7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627
8 // 8 //
9 // Known limitations/deviations from the RFC: 9 // Known limitations/deviations from the RFC:
10 // - Only knows how to parse ints within the range of a signed 32 bit int and 10 // - Only knows how to parse ints within the range of a signed 32 bit int and
(...skipping 15 matching lines...) Expand all
26 // TODO(tc): Add an option to disable comment stripping 26 // TODO(tc): Add an option to disable comment stripping
27 27
28 #ifndef BASE_JSON_JSON_READER_H_ 28 #ifndef BASE_JSON_JSON_READER_H_
29 #define BASE_JSON_JSON_READER_H_ 29 #define BASE_JSON_JSON_READER_H_
30 #pragma once 30 #pragma once
31 31
32 #include <string> 32 #include <string>
33 33
34 #include "base/base_export.h" 34 #include "base/base_export.h"
35 #include "base/basictypes.h" 35 #include "base/basictypes.h"
36 36 #include "base/memory/scoped_ptr.h"
37 // Chromium and Chromium OS check out gtest to different places, so we're
38 // unable to compile on both if we include gtest_prod.h here. Instead, include
39 // its only contents -- this will need to be updated if the macro ever changes.
40 #define FRIEND_TEST(test_case_name, test_name)\
41 friend class test_case_name##_##test_name##_Test
42
43 #define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \
44 FRIEND_TEST(test_case_name, test_name); \
45 FRIEND_TEST(test_case_name, DISABLED_##test_name); \
46 FRIEND_TEST(test_case_name, FLAKY_##test_name); \
47 FRIEND_TEST(test_case_name, FAILS_##test_name)
48 37
49 namespace base { 38 namespace base {
39 class Value;
50 40
51 class Value; 41 namespace internal {
42 class JSONParser;
43 }
44 }
45
46 namespace base {
52 47
53 enum JSONParserOptions { 48 enum JSONParserOptions {
54 // Parses the input strictly according to RFC 4627, except for where noted 49 // Parses the input strictly according to RFC 4627, except for where noted
55 // above. 50 // above.
56 JSON_PARSE_RFC = 0, 51 JSON_PARSE_RFC = 0,
57 52
58 // Allows commas to exist after the last element in structures. 53 // Allows commas to exist after the last element in structures.
59 JSON_ALLOW_TRAILING_COMMAS = 1 << 0, 54 JSON_ALLOW_TRAILING_COMMAS = 1 << 0,
55
56 // The parser can perform optimizations by placing hidden data in the root of
57 // the JSON object, which speeds up certain operations on children. However,
58 // if the child is Remove()d from root, it would result in use-after-free
59 // unless it is DeepCopy()ed or this option is used.
60 JSON_DETACHABLE_CHILDREN = 1 << 1,
60 }; 61 };
61 62
62 class BASE_EXPORT JSONReader { 63 class BASE_EXPORT JSONReader {
63 public: 64 public:
64 // A struct to hold a JS token.
65 class Token {
66 public:
67 enum Type {
68 OBJECT_BEGIN, // {
69 OBJECT_END, // }
70 ARRAY_BEGIN, // [
71 ARRAY_END, // ]
72 STRING,
73 NUMBER,
74 BOOL_TRUE, // true
75 BOOL_FALSE, // false
76 NULL_TOKEN, // null
77 LIST_SEPARATOR, // ,
78 OBJECT_PAIR_SEPARATOR, // :
79 END_OF_INPUT,
80 INVALID_TOKEN,
81 };
82
83 Token(Type t, const char* b, int len)
84 : type(t), begin(b), length(len) {}
85
86 // Get the character that's one past the end of this token.
87 char NextChar() {
88 return *(begin + length);
89 }
90
91 static Token CreateInvalidToken() {
92 return Token(INVALID_TOKEN, 0, 0);
93 }
94
95 Type type;
96
97 // A pointer into JSONReader::json_pos_ that's the beginning of this token.
98 const char* begin;
99
100 // End should be one char past the end of the token.
101 int length;
102 };
103
104 // Error codes during parsing. 65 // Error codes during parsing.
105 enum JsonParseError { 66 enum JsonParseError {
106 JSON_NO_ERROR = 0, 67 JSON_NO_ERROR = 0,
107 JSON_BAD_ROOT_ELEMENT_TYPE,
108 JSON_INVALID_ESCAPE, 68 JSON_INVALID_ESCAPE,
109 JSON_SYNTAX_ERROR, 69 JSON_SYNTAX_ERROR,
70 JSON_UNEXPECTED_TOKEN,
110 JSON_TRAILING_COMMA, 71 JSON_TRAILING_COMMA,
111 JSON_TOO_MUCH_NESTING, 72 JSON_TOO_MUCH_NESTING,
112 JSON_UNEXPECTED_DATA_AFTER_ROOT, 73 JSON_UNEXPECTED_DATA_AFTER_ROOT,
113 JSON_UNSUPPORTED_ENCODING, 74 JSON_UNSUPPORTED_ENCODING,
114 JSON_UNQUOTED_DICTIONARY_KEY, 75 JSON_UNQUOTED_DICTIONARY_KEY,
115 }; 76 };
116 77
117 // String versions of parse error codes. 78 // String versions of parse error codes.
118 static const char* kBadRootElementType;
119 static const char* kInvalidEscape; 79 static const char* kInvalidEscape;
120 static const char* kSyntaxError; 80 static const char* kSyntaxError;
81 static const char* kUnexpectedToken;
121 static const char* kTrailingComma; 82 static const char* kTrailingComma;
122 static const char* kTooMuchNesting; 83 static const char* kTooMuchNesting;
123 static const char* kUnexpectedDataAfterRoot; 84 static const char* kUnexpectedDataAfterRoot;
124 static const char* kUnsupportedEncoding; 85 static const char* kUnsupportedEncoding;
125 static const char* kUnquotedDictionaryKey; 86 static const char* kUnquotedDictionaryKey;
126 87
88 // Constructs a reader with the default options, JSON_PARSE_RFC.
127 JSONReader(); 89 JSONReader();
128 90
91 // Constructs a reader with custom options.
92 explicit JSONReader(int options);
93
94 ~JSONReader();
95
129 // Reads and parses |json|, returning a Value. The caller owns the returned 96 // Reads and parses |json|, returning a Value. The caller owns the returned
130 // instance. If |json| is not a properly formed JSON string, returns NULL. 97 // instance. If |json| is not a properly formed JSON string, returns NULL.
131 static Value* Read(const std::string& json); 98 static Value* Read(const std::string& json);
132 99
133 // Reads and parses |json|, returning a Value owned by the caller. The 100 // Reads and parses |json|, returning a Value owned by the caller. The
134 // parser respects the given |options|. If the input is not properly formed, 101 // parser respects the given |options|. If the input is not properly formed,
135 // returns NULL. 102 // returns NULL.
136 static Value* Read(const std::string& json, int options); 103 static Value* Read(const std::string& json, int options);
137 104
138 // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out| 105 // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out|
139 // are optional. If specified and NULL is returned, they will be populated 106 // are optional. If specified and NULL is returned, they will be populated
140 // an error code and a formatted error message (including error location if 107 // an error code and a formatted error message (including error location if
141 // appropriate). Otherwise, they will be unmodified. 108 // appropriate). Otherwise, they will be unmodified.
142 static Value* ReadAndReturnError(const std::string& json, 109 static Value* ReadAndReturnError(const std::string& json,
143 int options, // JSONParserOptions 110 int options, // JSONParserOptions
144 int* error_code_out, 111 int* error_code_out,
145 std::string* error_msg_out); 112 std::string* error_msg_out);
146 113
147 // Converts a JSON parse error code into a human readable message. 114 // Converts a JSON parse error code into a human readable message.
148 // Returns an empty string if error_code is JSON_NO_ERROR. 115 // Returns an empty string if error_code is JSON_NO_ERROR.
149 static std::string ErrorCodeToString(JsonParseError error_code); 116 static std::string ErrorCodeToString(JsonParseError error_code);
150 117
151 // Returns the error code if the last call to JsonToValue() failed. 118 // Parses an input string into a Value that is owned by the caller.
119 Value* ReadToValue(const std::string& json);
120
121 // Returns the error code if the last call to ReadToValue() failed.
152 // Returns JSON_NO_ERROR otherwise. 122 // Returns JSON_NO_ERROR otherwise.
153 JsonParseError error_code() const { return error_code_; } 123 JsonParseError error_code() const;
154 124
155 // Converts error_code_ to a human-readable string, including line and column 125 // Converts error_code_ to a human-readable string, including line and column
156 // numbers if appropriate. 126 // numbers if appropriate.
157 std::string GetErrorMessage() const; 127 std::string GetErrorMessage() const;
158 128
159 // Reads and parses |json|, returning a Value. The caller owns the returned
160 // instance. If |json| is not a properly formed JSON string, returns NULL and
161 // a detailed error can be retrieved from |error_message()|.
162 // If |check_root| is true, we require that the root object be an object or
163 // array. Otherwise, it can be any valid JSON type.
164 // If |allow_trailing_comma| is true, we will ignore trailing commas in
165 // objects and arrays even though this goes against the RFC.
166 Value* JsonToValue(const std::string& json, bool check_root,
167 bool allow_trailing_comma);
168
169 private: 129 private:
170 FRIEND_TEST_ALL_PREFIXES(JSONReaderTest, Reading); 130 scoped_ptr<internal::JSONParser> parser_;
171 FRIEND_TEST_ALL_PREFIXES(JSONReaderTest, ErrorMessages);
172
173 static std::string FormatErrorMessage(int line, int column,
174 const std::string& description);
175
176 // Recursively build Value. Returns NULL if we don't have a valid JSON
177 // string. If |is_root| is true, we verify that the root element is either
178 // an object or an array.
179 Value* BuildValue(bool is_root);
180
181 // Parses a sequence of characters into a Token::NUMBER. If the sequence of
182 // characters is not a valid number, returns a Token::INVALID_TOKEN. Note
183 // that DecodeNumber is used to actually convert from a string to an
184 // int/double.
185 Token ParseNumberToken();
186
187 // Try and convert the substring that token holds into an int or a double. If
188 // we can (ie., no overflow), return the value, else return NULL.
189 Value* DecodeNumber(const Token& token);
190
191 // Parses a sequence of characters into a Token::STRING. If the sequence of
192 // characters is not a valid string, returns a Token::INVALID_TOKEN. Note
193 // that DecodeString is used to actually decode the escaped string into an
194 // actual wstring.
195 Token ParseStringToken();
196
197 // Convert the substring into a value string. This should always succeed
198 // (otherwise ParseStringToken would have failed).
199 Value* DecodeString(const Token& token);
200
201 // Helper function for DecodeString that consumes UTF16 [0,2] code units and
202 // convers them to UTF8 code untis. |token| is the string token in which the
203 // units should be read, |i| is the position in the token at which the first
204 // code unit starts, immediately after the |\u|. This will be mutated if code
205 // units are consumed. |dest_string| is a string to which the UTF8 code unit
206 // should be appended. Returns true on success and false if there's an
207 // encoding error.
208 bool ConvertUTF16Units(const Token& token,
209 int* i,
210 std::string* dest_string);
211
212 // Grabs the next token in the JSON stream. This does not increment the
213 // stream so it can be used to look ahead at the next token.
214 Token ParseToken();
215
216 // Increments |json_pos_| past leading whitespace and comments.
217 void EatWhitespaceAndComments();
218
219 // If |json_pos_| is at the start of a comment, eat it, otherwise, returns
220 // false.
221 bool EatComment();
222
223 // Checks if |json_pos_| matches str.
224 bool NextStringMatch(const char* str, size_t length);
225
226 // Sets the error code that will be returned to the caller. The current
227 // line and column are determined and added into the final message.
228 void SetErrorCode(const JsonParseError error, const char* error_pos);
229
230 // Pointer to the starting position in the input string.
231 const char* start_pos_;
232
233 // Pointer to the current position in the input string.
234 const char* json_pos_;
235
236 // Pointer to the last position in the input string.
237 const char* end_pos_;
238
239 // Used to keep track of how many nested lists/dicts there are.
240 int stack_depth_;
241
242 // A parser flag that allows trailing commas in objects and arrays.
243 bool allow_trailing_comma_;
244
245 // Contains the error code for the last call to JsonToValue(), if any.
246 JsonParseError error_code_;
247 int error_line_;
248 int error_col_;
249
250 DISALLOW_COPY_AND_ASSIGN(JSONReader);
251 }; 131 };
252 132
253 } // namespace base 133 } // namespace base
254 134
255 #endif // BASE_JSON_JSON_READER_H_ 135 #endif // BASE_JSON_JSON_READER_H_
OLDNEW
« no previous file with comments | « base/json/json_parser_unittest.cc ('k') | base/json/json_reader.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698