Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(285)

Side by Side Diff: base/json/json_parser.h

Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Really fix Windows, address comments Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « base/debug/trace_event_unittest.cc ('k') | base/json/json_parser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef BASE_JSON_JSON_PARSER_H_
6 #define BASE_JSON_JSON_PARSER_H_
7 #pragma once
8
9 #include <string>
10
11 #include "base/base_export.h"
12 #include "base/basictypes.h"
13 #include "base/compiler_specific.h"
14 #include "base/json/json_reader.h"
15 #include "base/string_piece.h"
16
17 #if !defined(OS_CHROMEOS)
18 #include "base/gtest_prod_util.h"
19 #endif
20
21 namespace base {
22 class Value;
23 }
24
25 #if defined(OS_CHROMEOS)
26 // Chromium and Chromium OS check out gtest to different places, so this is
27 // unable to compile on both if gtest_prod.h is included here. Instead, include
28 // its only contents -- this will need to be updated if the macro ever changes.
29 #define FRIEND_TEST(test_case_name, test_name)\
30 friend class test_case_name##_##test_name##_Test
31
32 #define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \
33 FRIEND_TEST(test_case_name, test_name); \
34 FRIEND_TEST(test_case_name, DISABLED_##test_name); \
35 FRIEND_TEST(test_case_name, FLAKY_##test_name); \
36 FRIEND_TEST(test_case_name, FAILS_##test_name)
37 #endif // OS_CHROMEOS
38
39 namespace base {
40 namespace internal {
41
42 class JSONParserTest;
43
44 // The implementation behind the JSONReader interface. This class is not meant
45 // to be used directly; it encapsulates logic that need not be exposed publicly.
46 //
47 // This parser guarantees O(n) time through the input string. It also optimizes
48 // base::StringValue by using StringPiece where possible when returning Value
49 // objects by using "hidden roots," discussed in the implementation.
50 //
51 // Iteration happens on the byte level, with the functions CanConsume and
52 // NextChar. The conversion from byte to JSON token happens without advancing
53 // the parser in GetNextToken/ParseToken, that is tokenization operates on
54 // the current parser position without advancing.
55 //
56 // Built on top of these are a family of Consume functions that iterate
57 // internally. Invariant: on entry of a Consume function, the parser is wound
58 // to the first byte of a valid JSON token. On exit, it is on the last byte
59 // of a token, such that the next iteration of the parser will be at the byte
60 // immediately following the token, which would likely be the first byte of the
61 // next token.
62 class BASE_EXPORT_PRIVATE JSONParser {
63 public:
64 explicit JSONParser(int options);
65 ~JSONParser();
66
67 // Parses the input string according to the set options and returns the
68 // result as a Value owned by the caller.
69 Value* Parse(const std::string& input);
70
71 // Returns the error code.
72 JSONReader::JsonParseError error_code() const;
73
74 // Returns the human-friendly error message.
75 std::string GetErrorMessage() const;
76
77 private:
78 enum Token {
79 T_OBJECT_BEGIN, // {
80 T_OBJECT_END, // }
81 T_ARRAY_BEGIN, // [
82 T_ARRAY_END, // ]
83 T_STRING,
84 T_NUMBER,
85 T_BOOL_TRUE, // true
86 T_BOOL_FALSE, // false
87 T_NULL, // null
88 T_LIST_SEPARATOR, // ,
89 T_OBJECT_PAIR_SEPARATOR, // :
90 T_END_OF_INPUT,
91 T_INVALID_TOKEN,
92 };
93
94 // A helper class used for parsing strings. One optimization performed is to
95 // create base::Value with a StringPiece to avoid unnecessary std::string
96 // copies. This is not possible if the input string needs to be decoded from
97 // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped.
98 // This class centralizes that logic.
99 class StringBuilder {
100 public:
101 // Empty constructor. Used for creating a builder with which to Swap().
102 StringBuilder();
103
104 // |pos| is the beginning of an input string, excluding the |"|.
105 explicit StringBuilder(const char* pos);
106
107 ~StringBuilder();
108
109 // Swaps the contents of |other| with this.
110 void Swap(StringBuilder* other);
111
112 // Either increases the |length_| of the string or copies the character if
113 // the StringBuilder has been converted. |c| must be in the basic ASCII
114 // plane; all other characters need to be in UTF-8 units, appended with
115 // AppendString below.
116 void Append(const char& c);
117
118 // Appends a string to the std::string. Must be Convert()ed to use.
119 void AppendString(const std::string& str);
120
121 // Converts the builder from its default StringPiece to a full std::string,
122 // performing a copy. Once a builder is converted, it cannot be made a
123 // StringPiece again.
124 void Convert();
125
126 // Returns whether the builder can be converted to a StringPiece.
127 bool CanBeStringPiece() const;
128
129 // Returns the StringPiece representation. Returns an empty piece if it
130 // cannot be converted.
131 StringPiece AsStringPiece();
132
133 // Returns the builder as a std::string.
134 const std::string& AsString();
135
136 private:
137 // The beginning of the input string.
138 const char* pos_;
139
140 // Number of bytes in |pos_| that make up the string being built.
141 size_t length_;
142
143 // The copied string representation. NULL until Convert() is called.
144 // Strong. scoped_ptr<T> has too much of an overhead here.
145 std::string* string_;
146 };
147
148 // Quick check that the stream has capacity to consume |length| more bytes.
149 bool CanConsume(int length);
150
151 // The basic way to consume a single character in the stream. Consumes one
152 // byte of the input stream and returns a pointer to the rest of it.
153 const char* NextChar();
154
155 // Performs the equivalent of NextChar N times.
156 void NextNChars(int n);
157
158 // Skips over whitespace and comments to find the next token in the stream.
159 // This does not advance the parser for non-whitespace or comment chars.
160 Token GetNextToken();
161
162 // Consumes whitespace characters and comments until the next non-that is
163 // encountered.
164 void EatWhitespaceAndComments();
165 // Helper function that consumes a comment, assuming that the parser is
166 // currently wound to a '/'.
167 bool EatComment();
168
169 // Calls GetNextToken() and then ParseToken(). Caller owns the result.
170 Value* ParseNextToken();
171
172 // Takes a token that represents the start of a Value ("a structural token"
173 // in RFC terms) and consumes it, returning the result as an object the
174 // caller owns.
175 Value* ParseToken(Token token);
176
177 // Assuming that the parser is currently wound to '{', this parses a JSON
178 // object into a DictionaryValue.
179 Value* ConsumeDictionary();
180
181 // Assuming that the parser is wound to '[', this parses a JSON list into a
182 // ListValue.
183 Value* ConsumeList();
184
185 // Calls through ConsumeStringRaw and wraps it in a value.
186 Value* ConsumeString();
187
188 // Assuming that the parser is wound to a double quote, this parses a string,
189 // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on
190 // success and Swap()s the result into |out|. Returns false on failure with
191 // error information set.
192 bool ConsumeStringRaw(StringBuilder* out);
193 // Helper function for ConsumeStringRaw() that consumes the next four or 10
194 // bytes (parser is wound to the first character of a HEX sequence, with the
195 // potential for consuming another \uXXXX for a surrogate). Returns true on
196 // success and places the UTF8 code units in |dest_string|, and false on
197 // failure.
198 bool DecodeUTF16(std::string* dest_string);
199 // Helper function for ConsumeStringRaw() that takes a single code point,
200 // decodes it into UTF-8 units, and appends it to the given builder. The
201 // point must be valid.
202 void DecodeUTF8(const int32& point, StringBuilder* dest);
203
204 // Assuming that the parser is wound to the start of a valid JSON number,
205 // this parses and converts it to either an int or double value.
206 Value* ConsumeNumber();
207 // Helper that reads characters that are ints. Returns true if a number was
208 // read and false on error.
209 bool ReadInt(bool allow_leading_zeros);
210
211 // Consumes the literal values of |true|, |false|, and |null|, assuming the
212 // parser is wound to the first character of any of those.
213 Value* ConsumeLiteral();
214
215 // Compares two string buffers of a given length.
216 static bool StringsAreEqual(const char* left, const char* right, size_t len);
217
218 // Sets the error information to |code| at the current column, based on
219 // |index_| and |index_last_line_|, with an optional positive/negative
220 // adjustment by |column_adjust|.
221 void ReportError(JSONReader::JsonParseError code, int column_adjust);
222
223 // Given the line and column number of an error, formats one of the error
224 // message contants from json_reader.h for human display.
225 static std::string FormatErrorMessage(int line, int column,
226 const std::string& description);
227
228 // base::JSONParserOptions that control parsing.
229 int options_;
230
231 // Pointer to the start of the input data.
232 const char* start_pos_;
233
234 // Pointer to the current position in the input data. Equivalent to
235 // |start_pos_ + index_|.
236 const char* pos_;
237
238 // Pointer to the last character of the input data.
239 const char* end_pos_;
240
241 // The index in the input stream to which the parser is wound.
242 int index_;
243
244 // The number of times the parser has recursed (current stack depth).
245 int stack_depth_;
246
247 // The line number that the parser is at currently.
248 int line_number_;
249
250 // The last value of |index_| on the previous line.
251 int index_last_line_;
252
253 // Error information.
254 JSONReader::JsonParseError error_code_;
255 int error_line_;
256 int error_column_;
257
258 friend class JSONParserTest;
259 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar);
260 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary);
261 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList);
262 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString);
263 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals);
264 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers);
265 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages);
266
267 DISALLOW_COPY_AND_ASSIGN(JSONParser);
268 };
269
270 } // namespace internal
271 } // namespace base
272
273 #endif // BASE_JSON_JSON_PARSER_H_
OLDNEW
« no previous file with comments | « base/debug/trace_event_unittest.cc ('k') | base/json/json_parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698