Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(119)

Side by Side Diff: src/lexer/lexer.re

Issue 26764004: Invoke generated lexer along with baseline lexer to compare results. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/lexer/lexer.gyp ('k') | src/lexer/lexer-shell.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #include <fcntl.h> 1 #include <fcntl.h>
2 #include <stdio.h> 2 #include <stdio.h>
3 #include <stddef.h> 3 #include <stddef.h>
4 #include <stdlib.h> 4 #include <stdlib.h>
5 #include <string.h> 5 #include <string.h>
6 6
7
8 /* 7 /*
9 TODO: 8 TODO:
10 - SpiderMonkey compatibility hack: " --> something" is treated as a single li ne comment. 9 - SpiderMonkey compatibility hack: " --> something" is treated as a single li ne comment.
11 - An identifier cannot start immediately after a number. 10 - An identifier cannot start immediately after a number.
12 11
13 */ 12 */
14 13
15 14
16 /*!types:re2c */ 15
16 enum Condition {
17 EConditionNormal,
18 EConditionDoubleQuoteString,
19 EConditionSingleQuoteString,
20 EConditionIdentifier,
21 EConditionSingleLineComment,
22 EConditionMultiLineComment,
23 EConditionHtmlComment
ulan 2013/10/14 14:28:52 Had to move it here from the generated file, becau
24 };
17 25
18 #if defined(WIN32) 26 #if defined(WIN32)
19 27
20 typedef signed char int8_t; 28 typedef signed char int8_t;
21 typedef signed short int16_t; 29 typedef signed short int16_t;
22 typedef signed int int32_t; 30 typedef signed int int32_t;
23 31
24 typedef unsigned char uint8_t; 32 typedef unsigned char uint8_t;
25 typedef unsigned short uint16_t; 33 typedef unsigned short uint16_t;
26 typedef unsigned int uint32_t; 34 typedef unsigned int uint32_t;
27 35
28 #else 36 #else
29 37
30 #include <stdint.h> 38 #include <stdint.h>
31 #include <unistd.h> 39 #include <unistd.h>
32 40
33 #ifndef O_BINARY 41 #ifndef O_BINARY
34 #define O_BINARY 0 42 #define O_BINARY 0
35 #endif 43 #endif
36 44
37 #endif 45 #endif
38 46
39 // ---------------------------------------------------------------------- 47 #include "lexer.h"
40 #define PUSH_EOS(T) { printf("got eos\n"); } 48 using namespace v8::internal;
41 #define PUSH_TOKEN(T) { \
42 printf("got token %s (%d)\n", tokenNames[T], T); \
43 SKIP(); }
44 #define PUSH_STRING() { \
45 printf("got string\n"); \
46 size_t tokenSize = cursor-start; \
47 fwrite(start, tokenSize, 1, stdout); \
48 printf("\n"); \
49 SKIP(); }
50 #define PUSH_NUMBER() { \
51 printf("got number\n"); \
52 size_t tokenSize = cursor-start; \
53 fwrite(start, tokenSize, 1, stdout); \
54 printf("\n"); \
55 SKIP(); }
56 #define PUSH_IDENTIFIER() { \
57 --cursor; \
58 printf("got identifier: "); \
59 size_t tokenSize = cursor-start; \
60 fwrite(start, tokenSize, 1, stdout); \
61 printf("\n"); \
62 SKIP(); }
63 #define PUSH_LINE_TERMINATOR() { printf("got line terminator\n"); SKIP();}
64 #define TERMINATE_ILLEGAL() { return 1; }
65
66 #define TOKENS \
67 TOK(EOS) \
68 TOK(LPAREN) \
69 TOK(RPAREN) \
70 TOK(LBRACK) \
71 TOK(RBRACK) \
72 TOK(LBRACE) \
73 TOK(RBRACE) \
74 TOK(COLON) \
75 TOK(SEMICOLON) \
76 TOK(PERIOD) \
77 TOK(CONDITIONAL) \
78 TOK(INC) \
79 TOK(DEC) \
80 TOK(ASSIGN) \
81 TOK(ASSIGN_BIT_OR) \
82 TOK(ASSIGN_BIT_XOR) \
83 TOK(ASSIGN_BIT_AND) \
84 TOK(ASSIGN_SHL) \
85 TOK(ASSIGN_SAR) \
86 TOK(ASSIGN_SHR) \
87 TOK(ASSIGN_ADD) \
88 TOK(ASSIGN_SUB) \
89 TOK(ASSIGN_MUL) \
90 TOK(ASSIGN_DIV) \
91 TOK(ASSIGN_MOD) \
92 TOK(COMMA) \
93 TOK(OR) \
94 TOK(AND) \
95 TOK(BIT_OR) \
96 TOK(BIT_XOR) \
97 TOK(BIT_AND) \
98 TOK(SHL) \
99 TOK(SAR) \
100 TOK(ADD) \
101 TOK(SUB) \
102 TOK(MUL) \
103 TOK(DIV) \
104 TOK(MOD) \
105 TOK(EQ) \
106 TOK(NE) \
107 TOK(EQ_STRICT) \
108 TOK(NE_STRICT) \
109 TOK(LT) \
110 TOK(GT) \
111 TOK(LTE) \
112 TOK(GTE) \
113 TOK(NOT) \
114 TOK(BIT_NOT) \
115 49
116 // ---------------------------------------------------------------------- 50 // ----------------------------------------------------------------------
117 static const char *tokenNames[] = 51 #define PUSH_TOKEN(T) { send(T); SKIP(); }
118 { 52 #define PUSH_LINE_TERMINATOR() { SKIP(); }
119 #define TOK(x) #x, 53 #define TERMINATE_ILLEGAL() { return 1; }
120 TOKENS
121 #undef TOK
122 };
123 54
124 // ---------------------------------------------------------------------- 55 // ----------------------------------------------------------------------
125 class PushScanner 56 class PushScanner
126 { 57 {
127 public:
128
129 enum Token
130 {
131 #define TOK(x) x,
132 TOKENS
133 #undef TOK
134 };
135 58
136 private: 59 private:
137 60
138 bool eof; 61 bool eof;
139 int32_t state; 62 int32_t state;
140 int32_t condition; 63 int32_t condition;
141 64
142 uint8_t *limit; 65 uint8_t *limit;
143 uint8_t *start; 66 uint8_t *start;
144 uint8_t *cursor; 67 uint8_t *cursor;
145 uint8_t *marker; 68 uint8_t *marker;
69 int real_start;
146 70
147 uint8_t *buffer; 71 uint8_t *buffer;
148 uint8_t *bufferEnd; 72 uint8_t *bufferEnd;
149 73
150 uint8_t yych; 74 uint8_t yych;
151 uint32_t yyaccept; 75 uint32_t yyaccept;
152 76
77 ExperimentalScanner* sink_;
78
153 public: 79 public:
154 80
155 // ---------------------------------------------------------------------- 81 // ----------------------------------------------------------------------
156 PushScanner() 82 PushScanner(ExperimentalScanner* sink)
157 { 83 {
158 limit = 0; 84 limit = 0;
159 start = 0; 85 start = 0;
160 state = -1; 86 state = -1;
161 condition = EConditionNormal; 87 condition = EConditionNormal;
162 cursor = 0; 88 cursor = 0;
163 marker = 0; 89 marker = 0;
164 buffer = 0; 90 buffer = 0;
165 eof = false; 91 eof = false;
166 bufferEnd = 0; 92 bufferEnd = 0;
93 sink_ = sink;
94 real_start = 0;
167 } 95 }
168 96
169 // ---------------------------------------------------------------------- 97 // ----------------------------------------------------------------------
170 ~PushScanner() 98 ~PushScanner()
171 { 99 {
172 } 100 }
173 101
174 // ---------------------------------------------------------------------- 102 // ----------------------------------------------------------------------
175 void send( 103 void send(Token::Value token) {
176 Token token 104 int beg = (start - buffer) + real_start;
177 ) 105 int end = (cursor - buffer) + real_start;
178 { 106 if (FLAG_trace_lexer) {
179 size_t tokenSize = cursor-start; 107 printf("got %s at (%d, %d): ", Token::Name(token), beg, end);
180 const char *tokenName = tokenNames[token]; 108 for (uint8_t* s = start; s != cursor; s++) printf("%c", (char)*s);
181 printf( 109 printf(".\n");
182 "scanner is pushing out a token of type %d (%s)",
183 token,
184 tokenName
185 );
186
187 if(token==EOS) putchar('\n');
188 else
189 {
190 size_t tokenNameSize = strlen(tokenNames[token]);
191 size_t padSize = 20-(20<tokenNameSize ? 20 : tokenNameSize);
192 for(size_t i=0; i<padSize; ++i) putchar(' ');
193 printf(" : ---->");
194
195 fwrite(
196 start,
197 tokenSize,
198 1,
199 stdout
200 );
201
202 printf("<----\n");
203 } 110 }
111 sink_->Record(token, beg, end);
204 } 112 }
205 113
206 // ---------------------------------------------------------------------- 114 // ----------------------------------------------------------------------
207 uint32_t push( 115 uint32_t push(const void *input, int input_size) {
208 const void *input, 116 if (FLAG_trace_lexer) {
209 ssize_t inputSize 117 printf(
210 ) 118 "scanner is receiving a new data batch of length %d\n"
211 { 119 "scanner continues with saved state = %d\n",
212 printf( 120 input_size,
213 "scanner is receiving a new data batch of length %ld\n" 121 state
214 "scanner continues with saved state = %d\n", 122 );
215 inputSize, 123 }
216 state
217 );
218 124
219 /* 125 /*
220 * Data source is signaling end of file when batch size 126 * Data source is signaling end of file when batch size
221 * is less than maxFill. This is slightly annoying because 127 * is less than maxFill. This is slightly annoying because
222 * maxFill is a value that can only be known after re2c does 128 * maxFill is a value that can only be known after re2c does
223 * its thing. Practically though, maxFill is never bigger than 129 * its thing. Practically though, maxFill is never bigger than
224 * the longest keyword, so given our grammar, 32 is a safe bet. 130 * the longest keyword, so given our grammar, 32 is a safe bet.
225 */ 131 */
226 uint8_t null[64]; 132 uint8_t null[64];
227 const ssize_t maxFill = 32; 133 const int maxFill = 32;
228 if(inputSize<maxFill) // FIXME: do something about this!!! 134 if(input_size<maxFill) // FIXME: do something about this!!!
229 { 135 {
230 eof = true; 136 eof = true;
231 input = null; 137 input = null;
232 inputSize = sizeof(null); 138 input_size = sizeof(null);
233 memset(null, 0, sizeof(null)); 139 memset(null, 0, sizeof(null));
234 } 140 }
235 141
236 /* 142 /*
237 * When we get here, we have a partially 143 * When we get here, we have a partially
238 * consumed buffer which is in the following state: 144 * consumed buffer which is in the following state:
239 * last v alid char last valid buffer spot 145 * last v alid char last valid buffer spot
240 * v v 146 * v v
241 * +-------------------+-------------+---------------+-------------+---- ------------------+ 147 * +-------------------+-------------+---------------+-------------+---- ------------------+
242 * ^ ^ ^ ^ ^ ^ 148 * ^ ^ ^ ^ ^ ^
243 * buffer start marker cursor limit bufferEnd 149 * buffer start marker cursor limit bufferEnd
244 * 150 *
245 * We need to stretch the buffer and concatenate the new chunk of input to it 151 * We need to stretch the buffer and concatenate the new chunk of input to it
246 * 152 *
247 */ 153 */
248 size_t used = limit-buffer; 154 size_t used = limit-buffer;
249 size_t needed = used+inputSize; 155 size_t needed = used+input_size;
250 size_t allocated = bufferEnd-buffer; 156 size_t allocated = bufferEnd-buffer;
251 if(allocated<needed) 157 if(allocated<needed)
252 { 158 {
253 size_t limitOffset = limit-buffer; 159 size_t limitOffset = limit-buffer;
254 size_t startOffset = start-buffer; 160 size_t startOffset = start-buffer;
255 size_t markerOffset = marker-buffer; 161 size_t markerOffset = marker-buffer;
256 size_t cursorOffset = cursor-buffer; 162 size_t cursorOffset = cursor-buffer;
257 163
258 buffer = (uint8_t*)realloc(buffer, needed); 164 buffer = (uint8_t*)realloc(buffer, needed);
259 bufferEnd = needed+buffer; 165 bufferEnd = needed+buffer;
260 166
261 marker = markerOffset + buffer; 167 marker = markerOffset + buffer;
262 cursor = cursorOffset + buffer; 168 cursor = cursorOffset + buffer;
263 start = buffer + startOffset; 169 start = buffer + startOffset;
264 limit = limitOffset + buffer; 170 limit = limitOffset + buffer;
265 } 171 }
266 memcpy(limit, input, inputSize); 172 memcpy(limit, input, input_size);
267 limit += inputSize; 173 limit += input_size;
268 174
269 // The scanner starts here 175 // The scanner starts here
270 #define YYLIMIT limit 176 #define YYLIMIT limit
271 #define YYCURSOR cursor 177 #define YYCURSOR cursor
272 #define YYMARKER marker 178 #define YYMARKER marker
273 #define YYCTYPE uint8_t 179 #define YYCTYPE uint8_t
274 180
275 #define SKIP() { start = cursor; YYSETCONDITION(EConditionNorma l); goto yy0; } 181 #define SKIP() { start = cursor; YYSETCONDITION(EConditionNorma l); goto yy0; }
276 #define YYFILL(n) { goto fill; } 182 #define YYFILL(n) { goto fill; }
277 183
278 #define YYGETSTATE() state 184 #define YYGETSTATE() state
279 #define YYSETSTATE(x) { state = (x); } 185 #define YYSETSTATE(x) { state = (x); }
280 186
281 #define YYGETCONDITION() condition 187 #define YYGETCONDITION() condition
282 #define YYSETCONDITION(x) { condition = (x); } 188 #define YYSETCONDITION(x) { condition = (x); }
283 189
284 start: 190 start:
285 191
286 printf("Starting a round; state: %d, condition: %d\n", state, condition) ; 192 if (FLAG_trace_lexer) {
193 printf("Starting a round; state: %d, condition: %d\n", state, condit ion);
194 }
287 195
288 /*!re2c 196 /*!re2c
289 re2c:indent:top = 1; 197 re2c:indent:top = 1;
290 re2c:yych:conversion = 0; 198 re2c:yych:conversion = 0;
291 re2c:condenumprefix = ECondition; 199 re2c:condenumprefix = ECondition;
292 re2c:define:YYCONDTYPE = Condition; 200 re2c:define:YYCONDTYPE = Condition;
293 201
294 eof = "\000"; 202 eof = "\000";
295 any = [\000-\377]; 203 any = [\000-\377];
296 whitespace_char = [ \t\v\f\r]; 204 whitespace_char = [ \t\v\f\r];
297 whitespace = whitespace_char+; 205 whitespace = whitespace_char+;
298 identifier_start = [$_\\a-zA-z]; 206 identifier_start = [$_\\a-zA-z];
299 identifier_char = [$_\\a-zA-z0-9]; 207 identifier_char = [$_\\a-zA-z0-9];
300 line_terminator = [\n\r]+; 208 line_terminator = [\n\r]+;
301 digit = [0-9]; 209 digit = [0-9];
302 hex_digit = [0-9a-fA-F]; 210 hex_digit = [0-9a-fA-F];
303 maybe_exponent = ('e' [-+]? digit+)?; 211 maybe_exponent = ('e' [-+]? digit+)?;
304 212
305 <Normal> "|=" { PUSH_TOKEN(ASSIGN_BIT_OR); } 213 <Normal> "|=" { PUSH_TOKEN(Token::ASSIGN_BIT_OR); }
306 <Normal> "^=" { PUSH_TOKEN(ASSIGN_BIT_XOR); } 214 <Normal> "^=" { PUSH_TOKEN(Token::ASSIGN_BIT_XOR); }
307 <Normal> "&=" { PUSH_TOKEN(ASSIGN_BIT_AND); } 215 <Normal> "&=" { PUSH_TOKEN(Token::ASSIGN_BIT_AND); }
308 <Normal> "+=" { PUSH_TOKEN(ASSIGN_ADD); } 216 <Normal> "+=" { PUSH_TOKEN(Token::ASSIGN_ADD); }
309 <Normal> "-=" { PUSH_TOKEN(ASSIGN_SUB); } 217 <Normal> "-=" { PUSH_TOKEN(Token::ASSIGN_SUB); }
310 <Normal> "*=" { PUSH_TOKEN(ASSIGN_MUL); } 218 <Normal> "*=" { PUSH_TOKEN(Token::ASSIGN_MUL); }
311 <Normal> "/=" { PUSH_TOKEN(ASSIGN_DIV); } 219 <Normal> "/=" { PUSH_TOKEN(Token::ASSIGN_DIV); }
312 <Normal> "%=" { PUSH_TOKEN(ASSIGN_MOD); } 220 <Normal> "%=" { PUSH_TOKEN(Token::ASSIGN_MOD); }
313 221
314 <Normal> "===" { PUSH_TOKEN(EQ_STRICT); } 222 <Normal> "===" { PUSH_TOKEN(Token::EQ_STRICT); }
315 <Normal> "==" { PUSH_TOKEN(EQ); } 223 <Normal> "==" { PUSH_TOKEN(Token::EQ); }
316 <Normal> "=" { PUSH_TOKEN(ASSIGN); } 224 <Normal> "=" { PUSH_TOKEN(Token::ASSIGN); }
317 <Normal> "!==" { PUSH_TOKEN(NE_STRICT); } 225 <Normal> "!==" { PUSH_TOKEN(Token::NE_STRICT); }
318 <Normal> "!=" { PUSH_TOKEN(NE); } 226 <Normal> "!=" { PUSH_TOKEN(Token::NE); }
319 <Normal> "!" { PUSH_TOKEN(NOT); } 227 <Normal> "!" { PUSH_TOKEN(Token::NOT); }
320 228
321 <Normal> "//" :=> SingleLineComment 229 <Normal> "//" :=> SingleLineComment
322 <Normal> "/*" :=> MultiLineComment 230 <Normal> "/*" :=> MultiLineComment
323 <Normal> "<!--" :=> HtmlComment 231 <Normal> "<!--" :=> HtmlComment
324 232
325 <Normal> ">>>=" { PUSH_TOKEN(ASSIGN_SHR); } 233 <Normal> ">>>=" { PUSH_TOKEN(Token::ASSIGN_SHR); }
326 <Normal> "<<=" { PUSH_TOKEN(ASSIGN_SHL); } 234 <Normal> "<<=" { PUSH_TOKEN(Token::ASSIGN_SHL); }
327 <Normal> ">>=" { PUSH_TOKEN(ASSIGN_SAR); } 235 <Normal> ">>=" { PUSH_TOKEN(Token::ASSIGN_SAR); }
328 <Normal> "<=" { PUSH_TOKEN(LTE); } 236 <Normal> "<=" { PUSH_TOKEN(Token::LTE); }
329 <Normal> ">=" { PUSH_TOKEN(GTE); } 237 <Normal> ">=" { PUSH_TOKEN(Token::GTE); }
330 <Normal> "<<" { PUSH_TOKEN(SHL); } 238 <Normal> "<<" { PUSH_TOKEN(Token::SHL); }
331 <Normal> ">>" { PUSH_TOKEN(SAR); } 239 <Normal> ">>" { PUSH_TOKEN(Token::SAR); }
332 <Normal> "<" { PUSH_TOKEN(LT); } 240 <Normal> "<" { PUSH_TOKEN(Token::LT); }
333 <Normal> ">" { PUSH_TOKEN(GT); } 241 <Normal> ">" { PUSH_TOKEN(Token::GT); }
334 242
335 <Normal> '0x' hex_digit+ { PUSH_NUMBER(); } 243 <Normal> '0x' hex_digit+ { PUSH_TOKEN(Token::NUMBER); }
336 <Normal> "." digit+ maybe_exponent { PUSH_NUMBER(); } 244 <Normal> "." digit+ maybe_exponent { PUSH_TOKEN(Token::NUMBER); }
337 <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_NUMBER(); } 245 <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_TOKEN(Token::NUMBER) ; }
338 246
339 <Normal> "(" { PUSH_TOKEN(LPAREN); } 247 <Normal> "(" { PUSH_TOKEN(Token::LPAREN); }
340 <Normal> ")" { PUSH_TOKEN(RPAREN); } 248 <Normal> ")" { PUSH_TOKEN(Token::RPAREN); }
341 <Normal> "[" { PUSH_TOKEN(LBRACK); } 249 <Normal> "[" { PUSH_TOKEN(Token::LBRACK); }
342 <Normal> "]" { PUSH_TOKEN(RBRACK); } 250 <Normal> "]" { PUSH_TOKEN(Token::RBRACK); }
343 <Normal> "{" { PUSH_TOKEN(LBRACE); } 251 <Normal> "{" { PUSH_TOKEN(Token::LBRACE); }
344 <Normal> "}" { PUSH_TOKEN(RBRACE); } 252 <Normal> "}" { PUSH_TOKEN(Token::RBRACE); }
345 <Normal> ":" { PUSH_TOKEN(COLON); } 253 <Normal> ":" { PUSH_TOKEN(Token::COLON); }
346 <Normal> ";" { PUSH_TOKEN(SEMICOLON); } 254 <Normal> ";" { PUSH_TOKEN(Token::SEMICOLON); }
347 <Normal> "." { PUSH_TOKEN(PERIOD); } 255 <Normal> "." { PUSH_TOKEN(Token::PERIOD); }
348 <Normal> "?" { PUSH_TOKEN(CONDITIONAL); } 256 <Normal> "?" { PUSH_TOKEN(Token::CONDITIONAL); }
349 <Normal> "++" { PUSH_TOKEN(INC); } 257 <Normal> "++" { PUSH_TOKEN(Token::INC); }
350 <Normal> "--" { PUSH_TOKEN(DEC); } 258 <Normal> "--" { PUSH_TOKEN(Token::DEC); }
351 259
352 <Normal> "||" { PUSH_TOKEN(OR); } 260 <Normal> "||" { PUSH_TOKEN(Token::OR); }
353 <Normal> "&&" { PUSH_TOKEN(AND); } 261 <Normal> "&&" { PUSH_TOKEN(Token::AND); }
354 262
355 <Normal> "|" { PUSH_TOKEN(BIT_OR); } 263 <Normal> "|" { PUSH_TOKEN(Token::BIT_OR); }
356 <Normal> "^" { PUSH_TOKEN(BIT_XOR); } 264 <Normal> "^" { PUSH_TOKEN(Token::BIT_XOR); }
357 <Normal> "&" { PUSH_TOKEN(BIT_AND); } 265 <Normal> "&" { PUSH_TOKEN(Token::BIT_AND); }
358 <Normal> "+" { PUSH_TOKEN(ADD); } 266 <Normal> "+" { PUSH_TOKEN(Token::ADD); }
359 <Normal> "-" { PUSH_TOKEN(SUB); } 267 <Normal> "-" { PUSH_TOKEN(Token::SUB); }
360 <Normal> "*" { PUSH_TOKEN(MUL); } 268 <Normal> "*" { PUSH_TOKEN(Token::MUL); }
361 <Normal> "/" { PUSH_TOKEN(DIV); } 269 <Normal> "/" { PUSH_TOKEN(Token::DIV); }
362 <Normal> "%" { PUSH_TOKEN(MOD); } 270 <Normal> "%" { PUSH_TOKEN(Token::MOD); }
363 <Normal> "~" { PUSH_TOKEN(BIT_NOT); } 271 <Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); }
364 <Normal> "," { PUSH_TOKEN(COMMA); } 272 <Normal> "," { PUSH_TOKEN(Token::COMMA); }
365 273
366 <Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); } 274 <Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); }
367 <Normal> whitespace { SKIP(); } 275 <Normal> whitespace { SKIP(); }
368 276
369 <Normal> ["] :=> DoubleQuoteString 277 <Normal> ["] :=> DoubleQuoteString
370 <Normal> ['] :=> SingleQuoteString 278 <Normal> ['] :=> SingleQuoteString
371 279
372 <Normal> identifier_start :=> Identifier 280 <Normal> identifier_start :=> Identifier
373 281
374 <Normal> eof { PUSH_EOS(); return 1; } 282 <Normal> eof { PUSH_TOKEN(Token::EOS); return 1; }
375 <Normal> any { TERMINATE_ILLEGAL(); } 283 <Normal> any { TERMINATE_ILLEGAL(); }
376 284
377 <DoubleQuoteString> "\\\"" { goto yy0; } 285 <DoubleQuoteString> "\\\"" { goto yy0; }
378 <DoubleQuoteString> '"' { PUSH_STRING();} 286 <DoubleQuoteString> '"' { PUSH_TOKEN(Token::STRING);}
379 <DoubleQuoteString> any { goto yy0; } 287 <DoubleQuoteString> any { goto yy0; }
380 288
381 <SingleQuoteString> "\\'" { goto yy0; } 289 <SingleQuoteString> "\\'" { goto yy0; }
382 <SingleQuoteString> "'" { PUSH_STRING();} 290 <SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING);}
383 <SingleQuoteString> any { goto yy0; } 291 <SingleQuoteString> any { goto yy0; }
384 292
385 <Identifier> identifier_char+ { goto yy0; } 293 <Identifier> identifier_char+ { goto yy0; }
386 <Identifier> any { PUSH_IDENTIFIER(); } 294 <Identifier> any { cursor--; PUSH_TOKEN(Token::IDENTIFIE R); }
387 295
388 <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();} 296 <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();}
389 <SingleLineComment> eof { PUSH_LINE_TERMINATOR();} 297 <SingleLineComment> eof { PUSH_LINE_TERMINATOR();}
390 <SingleLineComment> any { goto yy0; } 298 <SingleLineComment> any { goto yy0; }
391 299
392 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();} 300 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();}
393 <MultiLineComment> eof { TERMINATE_ILLEGAL(); } 301 <MultiLineComment> eof { TERMINATE_ILLEGAL(); }
394 <MultiLineComment> any { goto yy0; } 302 <MultiLineComment> any { goto yy0; }
395 303
396 <HtmlComment> eof { TERMINATE_ILLEGAL(); } 304 <HtmlComment> eof { TERMINATE_ILLEGAL(); }
397 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();} 305 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();}
398 <HtmlComment> any { goto yy0; } 306 <HtmlComment> any { goto yy0; }
399 */ 307 */
400 308
401 fill: 309 fill:
402 ssize_t unfinishedSize = cursor-start; 310 int unfinishedSize = cursor-start;
403 printf( 311 if (FLAG_trace_lexer) {
404 "scanner needs a refill. Exiting for now with:\n" 312 printf(
405 " saved fill state = %d\n" 313 "scanner needs a refill. Exiting for now with:\n"
406 " unfinished token size = %ld\n", 314 " saved fill state = %d\n"
407 state, 315 " unfinished token size = %d\n",
408 unfinishedSize 316 state,
409 ); 317 unfinishedSize
410 318 );
411 if(0<unfinishedSize && start<limit) 319 if(0 < unfinishedSize && start < limit) {
412 { 320 printf(" unfinished token is: ");
413 printf(" unfinished token is: "); 321 fwrite(start, 1, cursor-start, stdout);
414 fwrite(start, 1, cursor-start, stdout); 322 putchar('\n');
323 }
415 putchar('\n'); 324 putchar('\n');
416 } 325 }
417 putchar('\n');
418 326
419 /* 327 /*
420 * Once we get here, we can get rid of 328 * Once we get here, we can get rid of
421 * everything before start and after limit. 329 * everything before start and after limit.
422 */ 330 */
423 if(eof==true) goto start; 331 if (eof == true) goto start;
424 if(buffer<start) 332 if (buffer < start) {
425 { 333 size_t start_offset = start - buffer;
426 size_t startOffset = start-buffer; 334 memmove(buffer, start, limit - start);
427 memmove(buffer, start, limit-start); 335 marker -= start_offset;
428 marker -= startOffset; 336 cursor -= start_offset;
429 cursor -= startOffset; 337 limit -= start_offset;
430 limit -= startOffset; 338 start -= start_offset;
431 start -= startOffset; 339 real_start += start_offset;
432 } 340 }
433 return 0; 341 return 0;
434 } 342 }
435 }; 343 };
436 344
437 // ----------------------------------------------------------------------
438 int main(
439 int argc,
440 char **argv
441 )
442 {
443 // Parse cmd line
444 int input = 0;
445 if(1<argc)
446 {
447 input = open(argv[1], O_RDONLY | O_BINARY);
448 if(input<0)
449 {
450 fprintf(
451 stderr,
452 "could not open file %s\n",
453 argv[1]
454 );
455 exit(1);
456 }
457 }
458 345
459 /* 346 ExperimentalScanner::ExperimentalScanner(const char* fname) :
460 * Tokenize input file by pushing batches 347 current_(0), fetched_(0) {
461 * of data one by one into the scanner. 348 file_ = fopen(fname, "rb");
462 */ 349 scanner_ = new PushScanner(this);
463 const size_t batchSize = 256; 350 }
464 uint8_t buffer[batchSize];
465 PushScanner scanner;
466 while(1)
467 {
468 ssize_t n = read(input, buffer, batchSize);
469 if (scanner.push(buffer, n)) {
470 printf("Scanner: illegal data\n");
471 return 1;
472 }
473 if(n<batchSize) break;
474 }
475 scanner.push(0, -1);
476 close(input);
477 351
478 // Done 352
479 return 0; 353 ExperimentalScanner::~ExperimentalScanner() {
354 fclose(file_);
480 } 355 }
356
357
358 void ExperimentalScanner::FillTokens() {
359 current_ = 0;
360 fetched_ = 0;
361 uint8_t chars[BUFFER_SIZE];
362 int n = static_cast<int>(fread(&chars, 1, BUFFER_SIZE, file_));
363 scanner_->push(chars, n);
364 }
365
366
367 Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) {
368 if (current_ == fetched_) {
369 FillTokens();
370 }
371 *beg_pos = beg_[current_];
372 *end_pos = end_[current_];
373 Token::Value res = token_[current_];
374 if (token_[current_] != Token::Token::EOS &&
375 token_[current_] != Token::ILLEGAL) current_++;
376 return res;
377 }
378
379
380 void ExperimentalScanner::Record(Token::Value token, int beg, int end) {
381 if (token == Token::EOS) end--;
382 token_[fetched_] = token;
383 beg_[fetched_] = beg;
384 end_[fetched_] = end;
385 fetched_++;
386 }
OLDNEW
« no previous file with comments | « src/lexer/lexer.gyp ('k') | src/lexer/lexer-shell.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698