Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: src/lexer/lexer.re

Issue 26762004: Some C++ style fixes in lexer.re. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Align in DoubleQuoteString rule Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #include <fcntl.h> 1 #include <fcntl.h>
2 #include <stdio.h> 2 #include <stdio.h>
3 #include <stddef.h> 3 #include <stddef.h>
4 #include <stdlib.h> 4 #include <stdlib.h>
5 #include <string.h> 5 #include <string.h>
6 6
7 /* 7 // TODO:
8 TODO: 8 // - SpiderMonkey compatibility hack: " --> something" is treated
9 - SpiderMonkey compatibility hack: " --> something" is treated as a single li ne comment. 9 // as a single line comment.
10 - An identifier cannot start immediately after a number. 10 // - An identifier cannot start immediately after a number.
11
12 */
13
14
15 11
16 enum Condition { 12 enum Condition {
17 EConditionNormal, 13 kConditionNormal,
18 EConditionDoubleQuoteString, 14 kConditionDoubleQuoteString,
19 EConditionSingleQuoteString, 15 kConditionSingleQuoteString,
20 EConditionIdentifier, 16 kConditionIdentifier,
21 EConditionSingleLineComment, 17 kConditionSingleLineComment,
22 EConditionMultiLineComment, 18 kConditionMultiLineComment,
23 EConditionHtmlComment 19 kConditionHtmlComment
24 }; 20 };
25 21
26 #if defined(WIN32) 22 #if defined(WIN32)
27 23
28 typedef signed char int8_t; 24 typedef signed char int8_t;
29 typedef signed short int16_t; 25 typedef signed short int16_t;
30 typedef signed int int32_t; 26 typedef signed int int32_t;
31 27
32 typedef unsigned char uint8_t; 28 typedef unsigned char uint8_t;
33 typedef unsigned short uint16_t; 29 typedef unsigned short uint16_t;
34 typedef unsigned int uint32_t; 30 typedef unsigned int uint32_t;
35 31
36 #else 32 #else
37 33
38 #include <stdint.h> 34 #include <stdint.h>
39 #include <unistd.h> 35 #include <unistd.h>
40 36
41 #ifndef O_BINARY 37 #ifndef O_BINARY
42 #define O_BINARY 0 38 #define O_BINARY 0
43 #endif
44
45 #endif 39 #endif
46 40
41 #endif // defined(WIN32)
42
47 #include "lexer.h" 43 #include "lexer.h"
44
48 using namespace v8::internal; 45 using namespace v8::internal;
49 46
50 // ----------------------------------------------------------------------
51 #define PUSH_TOKEN(T) { send(T); SKIP(); } 47 #define PUSH_TOKEN(T) { send(T); SKIP(); }
52 #define PUSH_LINE_TERMINATOR() { SKIP(); } 48 #define PUSH_LINE_TERMINATOR() { SKIP(); }
53 #define TERMINATE_ILLEGAL() { return 1; } 49 #define TERMINATE_ILLEGAL() { return 1; }
54 50
55 // ---------------------------------------------------------------------- 51 class PushScanner {
56 class PushScanner
57 {
58
59 private:
60
61 bool eof;
62 int32_t state;
63 int32_t condition;
64
65 uint8_t *limit;
66 uint8_t *start;
67 uint8_t *cursor;
68 uint8_t *marker;
69 int real_start;
70
71 uint8_t *buffer;
72 uint8_t *bufferEnd;
73
74 uint8_t yych;
75 uint32_t yyaccept;
76
77 ExperimentalScanner* sink_;
78 52
79 public: 53 public:
80 54 PushScanner(ExperimentalScanner* sink):
81 // ---------------------------------------------------------------------- 55 eof_(false),
82 PushScanner(ExperimentalScanner* sink) 56 state_(-1),
83 { 57 condition_(kConditionNormal),
84 limit = 0; 58 limit_(NULL),
85 start = 0; 59 start_(NULL),
86 state = -1; 60 cursor_(NULL),
87 condition = EConditionNormal; 61 marker_(NULL),
88 cursor = 0; 62 real_start_(0),
89 marker = 0; 63 buffer_(NULL),
90 buffer = 0; 64 buffer_end_(NULL),
91 eof = false; 65 yych(0),
92 bufferEnd = 0; 66 yyaccept(0),
93 sink_ = sink; 67 sink_(sink) {
94 real_start = 0; 68 }
95 } 69
96 70 ~PushScanner() {
97 // ---------------------------------------------------------------------- 71 }
98 ~PushScanner() 72
99 { 73 void send(Token::Value token) {
100 } 74 int beg = (start_ - buffer_) + real_start_;
101 75 int end = (cursor_ - buffer_) + real_start_;
102 // ---------------------------------------------------------------------- 76 if (FLAG_trace_lexer) {
103 void send(Token::Value token) { 77 printf("got %s at (%d, %d): ", Token::Name(token), beg, end);
104 int beg = (start - buffer) + real_start; 78 for (uint8_t* s = start_; s != cursor_; s++) printf("%c", (char)*s);
105 int end = (cursor - buffer) + real_start; 79 printf(".\n");
106 if (FLAG_trace_lexer) { 80 }
107 printf("got %s at (%d, %d): ", Token::Name(token), beg, end); 81 sink_->Record(token, beg, end);
108 for (uint8_t* s = start; s != cursor; s++) printf("%c", (char)*s); 82 }
109 printf(".\n"); 83
110 } 84 uint32_t push(const void *input, int input_size) {
111 sink_->Record(token, beg, end); 85 if (FLAG_trace_lexer) {
112 } 86 printf(
113 87 "scanner is receiving a new data batch of length %d\n"
114 // ---------------------------------------------------------------------- 88 "scanner continues with saved state_ = %d\n",
115 uint32_t push(const void *input, int input_size) { 89 input_size,
116 if (FLAG_trace_lexer) { 90 state_
117 printf( 91 );
118 "scanner is receiving a new data batch of length %d\n" 92 }
119 "scanner continues with saved state = %d\n", 93
120 input_size, 94 // Data source is signaling end of file when batch size
121 state 95 // is less than max_fill. This is slightly annoying because
122 ); 96 // max_fill is a value that can only be known after re2c does
123 } 97 // its thing. Practically though, max_fill is never bigger than
124 98 // the longest keyword, so given our grammar, 32 is a safe bet.
125 /* 99
126 * Data source is signaling end of file when batch size 100 uint8_t null[64];
127 * is less than maxFill. This is slightly annoying because 101 const int max_fill = 32;
128 * maxFill is a value that can only be known after re2c does 102 if (input_size < max_fill) { // FIXME: do something about this!!!
129 * its thing. Practically though, maxFill is never bigger than 103 eof_ = true;
130 * the longest keyword, so given our grammar, 32 is a safe bet. 104 input = null;
131 */ 105 input_size = sizeof(null);
132 uint8_t null[64]; 106 memset(null, 0, sizeof(null));
133 const int maxFill = 32; 107 }
134 if(input_size<maxFill) // FIXME: do something about this!!! 108
135 { 109
136 eof = true; 110 // When we get here, we have a partially
137 input = null; 111 // consumed buffer_ which is in the following state_:
138 input_size = sizeof(null); 112 // last valid char last valid buffer_ spo t
139 memset(null, 0, sizeof(null)); 113 // v v
140 } 114 // +-------------------+-------------+---------------+-------------+------- ---------------+
141 115 // ^ ^ ^ ^ ^ ^
142 /* 116 // buffer_ start_ marker_ cursor_ limit_ buffer_en d_
143 * When we get here, we have a partially 117 //
144 * consumed buffer which is in the following state: 118 // We need to stretch the buffer_ and concatenate the new chunk of input to it
145 * last v alid char last valid buffer spot 119
146 * v v 120 size_t used = limit_ - buffer_;
147 * +-------------------+-------------+---------------+-------------+---- ------------------+ 121 size_t needed = used + input_size;
148 * ^ ^ ^ ^ ^ ^ 122 size_t allocated = buffer_end_ - buffer_;
149 * buffer start marker cursor limit bufferEnd 123 if(allocated < needed) {
150 * 124 size_t limit__offset = limit_ - buffer_;
151 * We need to stretch the buffer and concatenate the new chunk of input to it 125 size_t start__offset = start_ - buffer_;
152 * 126 size_t marker__offset = marker_ - buffer_;
153 */ 127 size_t cursor__offset = cursor_ - buffer_;
154 size_t used = limit-buffer; 128
155 size_t needed = used+input_size; 129 buffer_ = (uint8_t*)realloc(buffer_, needed);
156 size_t allocated = bufferEnd-buffer; 130 buffer_end_ = needed + buffer_;
157 if(allocated<needed) 131
158 { 132 marker_ = marker__offset + buffer_;
159 size_t limitOffset = limit-buffer; 133 cursor_ = cursor__offset + buffer_;
160 size_t startOffset = start-buffer; 134 start_ = buffer_ + start__offset;
161 size_t markerOffset = marker-buffer; 135 limit_ = limit__offset + buffer_;
162 size_t cursorOffset = cursor-buffer; 136 }
163 137 memcpy(limit_, input, input_size);
164 buffer = (uint8_t*)realloc(buffer, needed); 138 limit_ += input_size;
165 bufferEnd = needed+buffer; 139
166 140 // The scanner start_s here
167 marker = markerOffset + buffer; 141 #define YYLIMIT limit_
168 cursor = cursorOffset + buffer; 142 #define YYCURSOR cursor_
169 start = buffer + startOffset; 143 #define YYMARKER marker_
170 limit = limitOffset + buffer; 144 #define YYCTYPE uint8_t
171 } 145
172 memcpy(limit, input, input_size); 146 #define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal); got o yy0; }
173 limit += input_size; 147 #define YYFILL(n) { goto fill; }
174 148
175 // The scanner starts here 149 #define YYGETSTATE() state_
176 #define YYLIMIT limit 150 #define YYSETSTATE(x) { state_ = (x); }
177 #define YYCURSOR cursor 151
178 #define YYMARKER marker 152 #define YYGETCONDITION() condition_
179 #define YYCTYPE uint8_t 153 #define YYSETCONDITION(x) { condition_ = (x); }
180 154
181 #define SKIP() { start = cursor; YYSETCONDITION(EConditionNorma l); goto yy0; } 155 start_:
182 #define YYFILL(n) { goto fill; } 156 if (FLAG_trace_lexer) {
183 157 printf("Starting a round; state_: %d, condition_: %d\n", state_, condition _);
184 #define YYGETSTATE() state 158 }
185 #define YYSETSTATE(x) { state = (x); } 159
186 160 /*!re2c
187 #define YYGETCONDITION() condition 161 re2c:indent:top = 1;
188 #define YYSETCONDITION(x) { condition = (x); } 162 re2c:yych:conversion = 0;
189 163 re2c:condenumprefix = kCondition;
190 start: 164 re2c:define:YYCONDTYPE = Condition;
191 165
192 if (FLAG_trace_lexer) { 166 eof = "\000";
193 printf("Starting a round; state: %d, condition: %d\n", state, condit ion); 167 any = [\000-\377];
194 } 168 whitespace_char = [ \t\v\f\r];
195 169 whitespace = whitespace_char+;
196 /*!re2c 170 identifier_start_ = [$_\\a-zA-z];
197 re2c:indent:top = 1; 171 identifier_char = [$_\\a-zA-z0-9];
198 re2c:yych:conversion = 0; 172 line_terminator = [\n\r]+;
199 re2c:condenumprefix = ECondition; 173 digit = [0-9];
200 re2c:define:YYCONDTYPE = Condition; 174 hex_digit = [0-9a-fA-F];
201 175 maybe_exponent = ('e' [-+]? digit+)?;
202 eof = "\000"; 176
203 any = [\000-\377]; 177 <Normal> "|=" { PUSH_TOKEN(Token::ASSIGN_BIT_OR); }
204 whitespace_char = [ \t\v\f\r]; 178 <Normal> "^=" { PUSH_TOKEN(Token::ASSIGN_BIT_XOR); }
205 whitespace = whitespace_char+; 179 <Normal> "&=" { PUSH_TOKEN(Token::ASSIGN_BIT_AND); }
206 identifier_start = [$_\\a-zA-z]; 180 <Normal> "+=" { PUSH_TOKEN(Token::ASSIGN_ADD); }
207 identifier_char = [$_\\a-zA-z0-9]; 181 <Normal> "-=" { PUSH_TOKEN(Token::ASSIGN_SUB); }
208 line_terminator = [\n\r]+; 182 <Normal> "*=" { PUSH_TOKEN(Token::ASSIGN_MUL); }
209 digit = [0-9]; 183 <Normal> "/=" { PUSH_TOKEN(Token::ASSIGN_DIV); }
210 hex_digit = [0-9a-fA-F]; 184 <Normal> "%=" { PUSH_TOKEN(Token::ASSIGN_MOD); }
211 maybe_exponent = ('e' [-+]? digit+)?; 185
212 186 <Normal> "===" { PUSH_TOKEN(Token::EQ_STRICT); }
213 <Normal> "|=" { PUSH_TOKEN(Token::ASSIGN_BIT_OR); } 187 <Normal> "==" { PUSH_TOKEN(Token::EQ); }
214 <Normal> "^=" { PUSH_TOKEN(Token::ASSIGN_BIT_XOR); } 188 <Normal> "=" { PUSH_TOKEN(Token::ASSIGN); }
215 <Normal> "&=" { PUSH_TOKEN(Token::ASSIGN_BIT_AND); } 189 <Normal> "!==" { PUSH_TOKEN(Token::NE_STRICT); }
216 <Normal> "+=" { PUSH_TOKEN(Token::ASSIGN_ADD); } 190 <Normal> "!=" { PUSH_TOKEN(Token::NE); }
217 <Normal> "-=" { PUSH_TOKEN(Token::ASSIGN_SUB); } 191 <Normal> "!" { PUSH_TOKEN(Token::NOT); }
218 <Normal> "*=" { PUSH_TOKEN(Token::ASSIGN_MUL); } 192
219 <Normal> "/=" { PUSH_TOKEN(Token::ASSIGN_DIV); } 193 <Normal> "//" :=> SingleLineComment
220 <Normal> "%=" { PUSH_TOKEN(Token::ASSIGN_MOD); } 194 <Normal> "/*" :=> MultiLineComment
221 195 <Normal> "<!--" :=> HtmlComment
222 <Normal> "===" { PUSH_TOKEN(Token::EQ_STRICT); } 196
223 <Normal> "==" { PUSH_TOKEN(Token::EQ); } 197 <Normal> ">>>=" { PUSH_TOKEN(Token::ASSIGN_SHR); }
224 <Normal> "=" { PUSH_TOKEN(Token::ASSIGN); } 198 <Normal> "<<=" { PUSH_TOKEN(Token::ASSIGN_SHL); }
225 <Normal> "!==" { PUSH_TOKEN(Token::NE_STRICT); } 199 <Normal> ">>=" { PUSH_TOKEN(Token::ASSIGN_SAR); }
226 <Normal> "!=" { PUSH_TOKEN(Token::NE); } 200 <Normal> "<=" { PUSH_TOKEN(Token::LTE); }
227 <Normal> "!" { PUSH_TOKEN(Token::NOT); } 201 <Normal> ">=" { PUSH_TOKEN(Token::GTE); }
228 202 <Normal> "<<" { PUSH_TOKEN(Token::SHL); }
229 <Normal> "//" :=> SingleLineComment 203 <Normal> ">>" { PUSH_TOKEN(Token::SAR); }
230 <Normal> "/*" :=> MultiLineComment 204 <Normal> "<" { PUSH_TOKEN(Token::LT); }
231 <Normal> "<!--" :=> HtmlComment 205 <Normal> ">" { PUSH_TOKEN(Token::GT); }
232 206
233 <Normal> ">>>=" { PUSH_TOKEN(Token::ASSIGN_SHR); } 207 <Normal> '0x' hex_digit+ { PUSH_TOKEN(Token::NUMBER); }
234 <Normal> "<<=" { PUSH_TOKEN(Token::ASSIGN_SHL); } 208 <Normal> "." digit+ maybe_exponent { PUSH_TOKEN(Token::NUMBER); }
235 <Normal> ">>=" { PUSH_TOKEN(Token::ASSIGN_SAR); } 209 <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_TOKEN(Token::NUMBER); }
236 <Normal> "<=" { PUSH_TOKEN(Token::LTE); } 210
237 <Normal> ">=" { PUSH_TOKEN(Token::GTE); } 211 <Normal> "(" { PUSH_TOKEN(Token::LPAREN); }
238 <Normal> "<<" { PUSH_TOKEN(Token::SHL); } 212 <Normal> ")" { PUSH_TOKEN(Token::RPAREN); }
239 <Normal> ">>" { PUSH_TOKEN(Token::SAR); } 213 <Normal> "[" { PUSH_TOKEN(Token::LBRACK); }
240 <Normal> "<" { PUSH_TOKEN(Token::LT); } 214 <Normal> "]" { PUSH_TOKEN(Token::RBRACK); }
241 <Normal> ">" { PUSH_TOKEN(Token::GT); } 215 <Normal> "{" { PUSH_TOKEN(Token::LBRACE); }
242 216 <Normal> "}" { PUSH_TOKEN(Token::RBRACE); }
243 <Normal> '0x' hex_digit+ { PUSH_TOKEN(Token::NUMBER); } 217 <Normal> ":" { PUSH_TOKEN(Token::COLON); }
244 <Normal> "." digit+ maybe_exponent { PUSH_TOKEN(Token::NUMBER); } 218 <Normal> ";" { PUSH_TOKEN(Token::SEMICOLON); }
245 <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_TOKEN(Token::NUMBER) ; } 219 <Normal> "." { PUSH_TOKEN(Token::PERIOD); }
246 220 <Normal> "?" { PUSH_TOKEN(Token::CONDITIONAL); }
247 <Normal> "(" { PUSH_TOKEN(Token::LPAREN); } 221 <Normal> "++" { PUSH_TOKEN(Token::INC); }
248 <Normal> ")" { PUSH_TOKEN(Token::RPAREN); } 222 <Normal> "--" { PUSH_TOKEN(Token::DEC); }
249 <Normal> "[" { PUSH_TOKEN(Token::LBRACK); } 223
250 <Normal> "]" { PUSH_TOKEN(Token::RBRACK); } 224 <Normal> "||" { PUSH_TOKEN(Token::OR); }
251 <Normal> "{" { PUSH_TOKEN(Token::LBRACE); } 225 <Normal> "&&" { PUSH_TOKEN(Token::AND); }
252 <Normal> "}" { PUSH_TOKEN(Token::RBRACE); } 226
253 <Normal> ":" { PUSH_TOKEN(Token::COLON); } 227 <Normal> "|" { PUSH_TOKEN(Token::BIT_OR); }
254 <Normal> ";" { PUSH_TOKEN(Token::SEMICOLON); } 228 <Normal> "^" { PUSH_TOKEN(Token::BIT_XOR); }
255 <Normal> "." { PUSH_TOKEN(Token::PERIOD); } 229 <Normal> "&" { PUSH_TOKEN(Token::BIT_AND); }
256 <Normal> "?" { PUSH_TOKEN(Token::CONDITIONAL); } 230 <Normal> "+" { PUSH_TOKEN(Token::ADD); }
257 <Normal> "++" { PUSH_TOKEN(Token::INC); } 231 <Normal> "-" { PUSH_TOKEN(Token::SUB); }
258 <Normal> "--" { PUSH_TOKEN(Token::DEC); } 232 <Normal> "*" { PUSH_TOKEN(Token::MUL); }
259 233 <Normal> "/" { PUSH_TOKEN(Token::DIV); }
260 <Normal> "||" { PUSH_TOKEN(Token::OR); } 234 <Normal> "%" { PUSH_TOKEN(Token::MOD); }
261 <Normal> "&&" { PUSH_TOKEN(Token::AND); } 235 <Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); }
262 236 <Normal> "," { PUSH_TOKEN(Token::COMMA); }
263 <Normal> "|" { PUSH_TOKEN(Token::BIT_OR); } 237
264 <Normal> "^" { PUSH_TOKEN(Token::BIT_XOR); } 238 <Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); }
265 <Normal> "&" { PUSH_TOKEN(Token::BIT_AND); } 239 <Normal> whitespace { SKIP(); }
266 <Normal> "+" { PUSH_TOKEN(Token::ADD); } 240
267 <Normal> "-" { PUSH_TOKEN(Token::SUB); } 241 <Normal> ["] :=> DoubleQuoteString
268 <Normal> "*" { PUSH_TOKEN(Token::MUL); } 242 <Normal> ['] :=> SingleQuoteString
269 <Normal> "/" { PUSH_TOKEN(Token::DIV); } 243
270 <Normal> "%" { PUSH_TOKEN(Token::MOD); } 244 <Normal> identifier_start_ :=> Identifier
271 <Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); } 245
272 <Normal> "," { PUSH_TOKEN(Token::COMMA); } 246 <Normal> eof { PUSH_TOKEN(Token::EOS); return 1; }
273 247 <Normal> any { TERMINATE_ILLEGAL(); }
274 <Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); } 248
275 <Normal> whitespace { SKIP(); } 249 <DoubleQuoteString> "\\\"" { goto yy0; }
276 250 <DoubleQuoteString> '"' { PUSH_TOKEN(Token::STRING);}
277 <Normal> ["] :=> DoubleQuoteString 251 <DoubleQuoteString> any { goto yy0; }
278 <Normal> ['] :=> SingleQuoteString 252
279 253 <SingleQuoteString> "\\'" { goto yy0; }
280 <Normal> identifier_start :=> Identifier 254 <SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING);}
281 255 <SingleQuoteString> any { goto yy0; }
282 <Normal> eof { PUSH_TOKEN(Token::EOS); return 1; } 256
283 <Normal> any { TERMINATE_ILLEGAL(); } 257 <Identifier> identifier_char+ { goto yy0; }
284 258 <Identifier> any { cursor_--; PUSH_TOKEN(Token::IDENTIFIER); }
285 <DoubleQuoteString> "\\\"" { goto yy0; } 259
286 <DoubleQuoteString> '"' { PUSH_TOKEN(Token::STRING);} 260 <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();}
287 <DoubleQuoteString> any { goto yy0; } 261 <SingleLineComment> eof { PUSH_LINE_TERMINATOR();}
288 262 <SingleLineComment> any { goto yy0; }
289 <SingleQuoteString> "\\'" { goto yy0; } 263
290 <SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING);} 264 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();}
291 <SingleQuoteString> any { goto yy0; } 265 <MultiLineComment> eof { TERMINATE_ILLEGAL(); }
292 266 <MultiLineComment> any { goto yy0; }
293 <Identifier> identifier_char+ { goto yy0; } 267
294 <Identifier> any { cursor--; PUSH_TOKEN(Token::IDENTIFIE R); } 268 <HtmlComment> eof { TERMINATE_ILLEGAL(); }
295 269 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();}
296 <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();} 270 <HtmlComment> any { goto yy0; }
297 <SingleLineComment> eof { PUSH_LINE_TERMINATOR();} 271 */
298 <SingleLineComment> any { goto yy0; } 272
299 273 fill:
300 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();} 274 int unfinished_size = cursor_-start_;
301 <MultiLineComment> eof { TERMINATE_ILLEGAL(); } 275 if (FLAG_trace_lexer) {
302 <MultiLineComment> any { goto yy0; } 276 printf(
303 277 "scanner needs a refill. Exiting for now with:\n"
304 <HtmlComment> eof { TERMINATE_ILLEGAL(); } 278 " saved fill state_ = %d\n"
305 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();} 279 " unfinished token size = %d\n",
306 <HtmlComment> any { goto yy0; } 280 state_,
307 */ 281 unfinished_size
308 282 );
309 fill: 283 if(0 < unfinished_size && start_ < limit_) {
310 int unfinishedSize = cursor-start; 284 printf(" unfinished token is: ");
311 if (FLAG_trace_lexer) { 285 fwrite(start_, 1, cursor_-start_, stdout);
312 printf( 286 putchar('\n');
313 "scanner needs a refill. Exiting for now with:\n" 287 }
314 " saved fill state = %d\n" 288 putchar('\n');
315 " unfinished token size = %d\n", 289 }
316 state, 290
317 unfinishedSize 291 if (eof_) goto start_;
318 ); 292
319 if(0 < unfinishedSize && start < limit) { 293 // Once we get here, we can get rid of
320 printf(" unfinished token is: "); 294 // everything before start_ and after limit_.
321 fwrite(start, 1, cursor-start, stdout); 295
322 putchar('\n'); 296 if (buffer_ < start_) {
323 } 297 size_t start__offset = start_ - buffer_;
324 putchar('\n'); 298 memmove(buffer_, start_, limit_ - start_);
325 } 299 marker_ -= start__offset;
326 300 cursor_ -= start__offset;
327 /* 301 limit_ -= start__offset;
328 * Once we get here, we can get rid of 302 start_ -= start__offset;
329 * everything before start and after limit. 303 real_start_ += start__offset;
330 */ 304 }
331 if (eof == true) goto start; 305 return 0;
332 if (buffer < start) { 306 }
333 size_t start_offset = start - buffer; 307
334 memmove(buffer, start, limit - start); 308 private:
335 marker -= start_offset; 309 bool eof_;
336 cursor -= start_offset; 310 int32_t state_;
337 limit -= start_offset; 311 int32_t condition_;
338 start -= start_offset; 312
339 real_start += start_offset; 313 uint8_t* limit_;
340 } 314 uint8_t* start_;
341 return 0; 315 uint8_t* cursor_;
342 } 316 uint8_t* marker_;
317 int real_start_;
318
319 uint8_t* buffer_;
320 uint8_t* buffer_end_;
321
322 uint8_t yych;
323 uint32_t yyaccept;
324
325 ExperimentalScanner* sink_;
343 }; 326 };
344 327
345 328
346 ExperimentalScanner::ExperimentalScanner(const char* fname) : 329 ExperimentalScanner::ExperimentalScanner(const char* fname) :
347 current_(0), fetched_(0) { 330 current_(0), fetched_(0) {
348 file_ = fopen(fname, "rb"); 331 file_ = fopen(fname, "rb");
349 scanner_ = new PushScanner(this); 332 scanner_ = new PushScanner(this);
350 } 333 }
351 334
352 335
(...skipping 12 matching lines...) Expand all
365 348
366 349
367 Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) { 350 Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) {
368 if (current_ == fetched_) { 351 if (current_ == fetched_) {
369 FillTokens(); 352 FillTokens();
370 } 353 }
371 *beg_pos = beg_[current_]; 354 *beg_pos = beg_[current_];
372 *end_pos = end_[current_]; 355 *end_pos = end_[current_];
373 Token::Value res = token_[current_]; 356 Token::Value res = token_[current_];
374 if (token_[current_] != Token::Token::EOS && 357 if (token_[current_] != Token::Token::EOS &&
375 token_[current_] != Token::ILLEGAL) current_++; 358 token_[current_] != Token::ILLEGAL) {
359 current_++;
360 }
376 return res; 361 return res;
377 } 362 }
378 363
379 364
380 void ExperimentalScanner::Record(Token::Value token, int beg, int end) { 365 void ExperimentalScanner::Record(Token::Value token, int beg, int end) {
381 if (token == Token::EOS) end--; 366 if (token == Token::EOS) end--;
382 token_[fetched_] = token; 367 token_[fetched_] = token;
383 beg_[fetched_] = beg; 368 beg_[fetched_] = beg;
384 end_[fetched_] = end; 369 end_[fetched_] = end;
385 fetched_++; 370 fetched_++;
386 } 371 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698