Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(79)

Side by Side Diff: base/json/json_parser.h

Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: '' Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef BASE_JSON_JSON_PARSER_H_
6 #define BASE_JSON_JSON_PARSER_H_
7
Mark Mentovai 2012/04/19 16:40:12 #pragma once
Robert Sesek 2012/05/03 15:34:52 Done.
8 #include <string>
9
10 #include "base/basictypes.h"
11 #include "base/compiler_specific.h"
12 #include "base/json/json_reader.h"
13 #include "base/string_piece.h"
14
15 namespace base {
16 class Value;
17 }
Mark Mentovai 2012/04/19 16:40:12 } // namespace base
Robert Sesek 2012/05/03 15:34:52 I don't do this for forward declares.
18
19 // Chromium and Chromium OS check out gtest to different places, so we're
Mark Mentovai 2012/04/19 16:40:12 Who are “we?”
Robert Sesek 2012/05/03 15:34:52 This was copied… done.
20 // unable to compile on both if we include gtest_prod.h here. Instead, include
Mark Mentovai 2012/04/19 16:40:12 Isn’t there some macro that’s set if you’re buildi
Robert Sesek 2012/05/03 15:34:52 No idea. I couldn't find one.
21 // its only contents -- this will need to be updated if the macro ever changes.
22 #define FRIEND_TEST(test_case_name, test_name)\
23 friend class test_case_name##_##test_name##_Test
24
25 #define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \
26 FRIEND_TEST(test_case_name, test_name); \
27 FRIEND_TEST(test_case_name, DISABLED_##test_name); \
28 FRIEND_TEST(test_case_name, FLAKY_##test_name); \
29 FRIEND_TEST(test_case_name, FAILS_##test_name)
30
31 namespace base {
32 namespace internal {
33
34 class JSONParserTest;
35
36 // The implementation behind the JSONReader interface. This class is not meant
37 // to be used directly; it encapsulates logic that need not be exposed publicly.
38 //
39 // This parser guarantees O(n) time through the input string. It also optimizes
40 // base::StringValue by using StringPiece where possible when returning Value
41 // objects by using "hidden roots," discussed in the implementation.
42 //
43 // Iteration happens on the byte level, with the functions CanConsume and
44 // NextChar. The conversion from byte to JSON token happens without advancing
45 // the parser in GetNextToken/ParseToken, that is that tokenization operates on
Mark Mentovai 2012/04/19 16:40:12 The “, that is that” construction is hard to read.
Robert Sesek 2012/05/03 15:34:52 Done.
46 // the current parser position without advancing.
47 //
48 // Built on top of these are a family of Consume functions that iterate
49 // internally. Invariant: on entry of a Consume function, the parser is wound
50 // to the first byte of a valid JSON token. On exit, it is on the last byte
51 // of a token, such that the next loop of the parser will be at the byte
Mark Mentovai 2012/04/19 16:40:12 Nit: “iteration,” not “loop.”
Robert Sesek 2012/05/03 15:34:52 Done.
52 // immediately following the token, which would likely be the first byte of the
53 // next token.
54 class JSONParser {
55 public:
56 explicit JSONParser(int options);
57 virtual ~JSONParser();
tfarina 2012/04/19 22:48:18 nit: I think this doesn't need to be virtual.
Robert Sesek 2012/05/03 15:34:52 Done.
58
59 // Parses the input string according to the set options and returns the
60 // result as a Value owned by the caller.
61 Value* Parse(const std::string& input);
62
63 // Returns the error code.
64 JSONReader::JsonParseError error_code() const;
65
66 // Returns the human-friendly error message.
67 std::string GetErrorMessage() const;
68
69 private:
70 enum Token {
71 T_OBJECT_BEGIN, // {
72 T_OBJECT_END, // }
73 T_ARRAY_BEGIN, // [
74 T_ARRAY_END, // ]
75 T_STRING,
76 T_NUMBER,
77 T_BOOL_TRUE, // true
78 T_BOOL_FALSE, // false
79 T_NULL, // null
80 T_LIST_SEPARATOR, // ,
81 T_OBJECT_PAIR_SEPARATOR, // :
82 T_END_OF_INPUT,
83 T_INVALID_TOKEN,
84 };
85
86 // A helper class used for parsing strings. One optimization performed is to
87 // create base::Value with a StringPiece to avoid unnecessary std::string
88 // copies. This is not possible if the input string needs to be decoded from
89 // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped.
90 // This class centralizes that logic.
91 class StringBuilder {
92 public:
93 // Empty constructor. Used for creating a builder with which to Swap().
94 StringBuilder();
95
96 // |pos| is the beginning of an input string, excluding the |"|.
97 explicit StringBuilder(const char* pos);
98 ~StringBuilder();
Mark Mentovai 2012/04/19 16:40:12 Blank line before to make it apparent than the com
Robert Sesek 2012/05/03 15:34:52 Done.
99
100 // Swaps the contents of |other| with this.
101 void Swap(StringBuilder* other);
102
103 // Either increases the |length_| of the string or copies the character if
104 // the StringBuilder has been converted.
105 void Append(const int32& c);
106
107 // Appends a string to the std::string. Must be Convert()ed to use.
108 void AppendString(const std::string& str);
109
110 // Converts the builder from its default StringPiece to a full std::string,
111 // performing a copy.
112 void Convert();
113
114 // Returns whether the builder can be converted to a StringPiece.
115 bool CanBeStringPiece();
Mark Mentovai 2012/04/19 16:40:12 Can this be a const function?
Robert Sesek 2012/05/03 15:34:52 Done.
116
117 // Returns the StringPiece representation. Returns an empty piece if it
118 // cannot be converted.
Mark Mentovai 2012/04/19 16:40:12 Lines 110-111 told me that Convert() converts from
Robert Sesek 2012/05/03 15:34:52 I don't think that's what it says…
119 StringPiece AsStringPiece();
Mark Mentovai 2012/04/19 16:40:12 Can this return a const ref?
Robert Sesek 2012/05/03 15:34:52 No, but StringPiece is cheap, so it's okay.
120
121 // Returns the builder as a std::string.
122 std::string AsString();
Mark Mentovai 2012/04/19 16:40:12 Can this return a const ref too?
Robert Sesek 2012/05/03 15:34:52 Yes.
123
124 private:
125 // The beginning of the input string.
126 const char* pos_;
127 // Number of bytes in |pos_| that compose its length.
Mark Mentovai 2012/04/19 16:40:12 Blank line before this.
Mark Mentovai 2012/04/19 16:40:12 I don’t know for sure what you mean by “compose it
Robert Sesek 2012/05/03 15:34:52 Done.
Robert Sesek 2012/05/03 15:34:52 Done.
Robert Sesek 2012/05/03 15:34:52 Done.
128 size_t length_;
129 // The copied string representation. NULL until Convert() is called.
Mark Mentovai 2012/04/19 16:40:12 Blank line before this too.
Robert Sesek 2012/05/03 15:34:52 Done.
130 // Strong. scoped_ptr<T> has too much of an overhead here.
131 std::string* string_;
132 };
133
134 // Quick check that the stream has enough to consume |length| more bytes.
Mark Mentovai 2012/04/19 16:40:12 The stream has enough…what?
Robert Sesek 2012/05/03 15:34:52 Done.
135 inline bool CanConsume(int length);
Mark Mentovai 2012/04/19 16:40:12 Get rid of the “inline”.
Robert Sesek 2012/05/03 15:34:52 Done.
136
137 // The basic way to consume a single character in the stream. Consumes one
138 // byte of the input stream and returns a pointer to the rest of it.
139 const char* NextChar();
140 // Performs the equivalent of NextChar N times.
Mark Mentovai 2012/04/19 16:40:12 Blank line before.
Robert Sesek 2012/05/03 15:34:52 Done.
141 void NextNChars(int n);
142
143 // Skips over whitespace and comments to find the next token in the stream.
144 // This does not advance the parser for non-whitespace or comment chars.
145 Token GetNextToken();
146
147 // Consumes whitespace characters and comments until the next non-that is
Mark Mentovai 2012/04/19 16:40:12 😃
148 // encountered.
149 void EatWhitespaceAndComments();
150 // Helper function that consumes a comment, assuming that the parser is
Mark Mentovai 2012/04/19 16:40:12 Blank line before.
Robert Sesek 2012/05/03 15:34:52 This and the remaining instances are done delibera
151 // currently wound to a '/'.
152 bool EatComment();
153
154 // Calls GetNextToken() and then ParseToken(). Caller owns the result.
155 Value* ParseNextToken();
156
157 // Takes a token that represents the start of a Value ("a structural token"
158 // in RFC terms) and consumes it, returning the result as an object the
159 // caller owns.
160 Value* ParseToken(Token token);
161
162 // Assuming that the parser is currently wound to '{', this parses a JSON
163 // object into a DictionaryValue.
164 Value* ConsumeDictionary();
165
166 // Assuming that the parser is wound to '[', this parses a JSON list into a
167 // ListValue.
168 Value* ConsumeList();
169
170 // Calls through ConsumeStringRaw and wraps it in a value.
171 Value* ConsumeString();
172
173 // Assuming that the parser is wound to a double quote, this parses a string,
174 // potentially performing a UTF-16 to UTF-8 conversion. Returns true on
Mark Mentovai 2012/04/19 16:40:12 What’s the “potential?” When does it and when does
Robert Sesek 2012/05/03 15:34:52 Done.
175 // success and Swap()s the result into |out|. Returns false on failure with
176 // error information set.
177 bool ConsumeStringRaw(StringBuilder* out);
178 // Helper function for ConsumeStringRaw() that consumes the next four to 10
Mark Mentovai 2012/04/19 16:40:12 Blank line before.
Mark Mentovai 2012/04/19 16:40:12 Four OR ten, not four TO ten, right?
Robert Sesek 2012/05/03 15:34:52 Done.
Robert Sesek 2012/05/03 15:34:52 Done.
179 // bytes (parser is wound to the first character of a HEX sequence, with the
180 // potential for consuming another \uXXXX for a surrogate). Returns true on
181 // success and places the UTF8 code units in |dest_string|, and false on
182 // failure.
183 bool DecodeUTF16(std::string* dest_string);
184
185 // Assuming that the parser is wound to the start of a valid JSON number,
186 // this parses and converts it to either an int or double value.
187 Value* ConsumeNumber();
188 // Helper that reads characters that are ints. Returns true if a number was
Mark Mentovai 2012/04/19 16:40:12 Blank line before.
189 // read and false on error.
190 bool ReadInt(bool allow_leading_zeros);
191
192 // Consumes the literal values of |true|, |false|, and |null|, assuming the
193 // parser is wound to the first character of any of those.
194 Value* ConsumeLiteral();
195
196 // Compares two string buffers of a given length.
197 bool StringsAreEqual(const char* left, const char* right, size_t len);
Mark Mentovai 2012/04/19 16:40:12 This one seems like it could be static. It may no
Robert Sesek 2012/05/03 15:34:52 Made static. Considering this entire file is an im
198
199 // Sets the error information to |code| at the current column, based on
200 // |index_| and |index_last_line_|, with an optional positive/negative
201 // adjustment by |column_adjust|.
202 void ReportError(JSONReader::JsonParseError code, int column_adjust);
203 static std::string FormatErrorMessage(int line, int column,
Mark Mentovai 2012/04/19 16:40:12 What does this do?
Robert Sesek 2012/05/03 15:34:52 Done.
204 const std::string& description);
205
206 // Options that control parsing.
Mark Mentovai 2012/04/19 16:40:12 What values might this have? Oh, base::JSONParserO
Robert Sesek 2012/05/03 15:34:52 Done.
207 int options_;
208
209 // Pointer to the start of the input data.
210 const char* start_pos_;
211 // Pointer to the current position in the input data. Equivalent to
Mark Mentovai 2012/04/19 16:40:12 Blank line before this, and 214, and 216.
Robert Sesek 2012/05/03 15:34:52 Done.
212 // |start_pos_ + index_|.
Mark Mentovai 2012/04/19 16:40:12 If this is equivalent to something else that’s alr
Robert Sesek 2012/05/03 15:34:52 Because both are checked frequently and independen
213 const char* pos_;
214 // Pointer to the last character of the input data.
215 const char* end_pos_;
216 // The index in the input stream to which the parser is wound.
217 int index_;
218
219 // The number of times the parser has recursed (current stack depth).
220 int stack_depth_;
221
222 // The line number that the parser is at currently.
223 int line_number_;
224 // The last value of |index_| on the previous line.
Mark Mentovai 2012/04/19 16:40:12 Blank line before.
Robert Sesek 2012/05/03 15:34:52 Done.
225 int index_last_line_;
226
227 // Error information.
228 JSONReader::JsonParseError error_code_;
229 int error_line_;
230 int error_column_;
231
232 friend class JSONParserTest;
233 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar);
234 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary);
235 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList);
236 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString);
237 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals);
238 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers);
239 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages);
240 DISALLOW_COPY_AND_ASSIGN(JSONParser);
Mark Mentovai 2012/04/19 16:40:12 Blank line before.
Robert Sesek 2012/05/03 15:34:52 Done.
241 };
242
243 } // namespace internal
244 } // namespace base
245
246 #endif // BASE_JSON_JSON_PARSER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698