Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #include <fcntl.h> | 1 #include <fcntl.h> |
| 2 #include <stdio.h> | 2 #include <stdio.h> |
| 3 #include <stddef.h> | 3 #include <stddef.h> |
| 4 #include <stdlib.h> | 4 #include <stdlib.h> |
| 5 #include <string.h> | 5 #include <string.h> |
| 6 | 6 |
| 7 | |
| 8 /* | 7 /* |
| 9 TODO: | 8 TODO: |
| 10 - SpiderMonkey compatibility hack: " --> something" is treated as a single li ne comment. | 9 - SpiderMonkey compatibility hack: " --> something" is treated as a single li ne comment. |
| 11 - An identifier cannot start immediately after a number. | 10 - An identifier cannot start immediately after a number. |
| 12 | 11 |
| 13 */ | 12 */ |
| 14 | 13 |
| 15 | 14 |
| 16 /*!types:re2c */ | 15 |
| 16 enum Condition { | |
| 17 EConditionNormal, | |
| 18 EConditionDoubleQuoteString, | |
| 19 EConditionSingleQuoteString, | |
| 20 EConditionIdentifier, | |
| 21 EConditionSingleLineComment, | |
| 22 EConditionMultiLineComment, | |
| 23 EConditionHtmlComment | |
|
ulan
2013/10/14 14:28:52
Had to move it here from the generated file, becau
| |
| 24 }; | |
| 17 | 25 |
| 18 #if defined(WIN32) | 26 #if defined(WIN32) |
| 19 | 27 |
| 20 typedef signed char int8_t; | 28 typedef signed char int8_t; |
| 21 typedef signed short int16_t; | 29 typedef signed short int16_t; |
| 22 typedef signed int int32_t; | 30 typedef signed int int32_t; |
| 23 | 31 |
| 24 typedef unsigned char uint8_t; | 32 typedef unsigned char uint8_t; |
| 25 typedef unsigned short uint16_t; | 33 typedef unsigned short uint16_t; |
| 26 typedef unsigned int uint32_t; | 34 typedef unsigned int uint32_t; |
| 27 | 35 |
| 28 #else | 36 #else |
| 29 | 37 |
| 30 #include <stdint.h> | 38 #include <stdint.h> |
| 31 #include <unistd.h> | 39 #include <unistd.h> |
| 32 | 40 |
| 33 #ifndef O_BINARY | 41 #ifndef O_BINARY |
| 34 #define O_BINARY 0 | 42 #define O_BINARY 0 |
| 35 #endif | 43 #endif |
| 36 | 44 |
| 37 #endif | 45 #endif |
| 38 | 46 |
| 39 // ---------------------------------------------------------------------- | 47 #include "lexer.h" |
| 40 #define PUSH_EOS(T) { printf("got eos\n"); } | 48 using namespace v8::internal; |
| 41 #define PUSH_TOKEN(T) { \ | |
| 42 printf("got token %s (%d)\n", tokenNames[T], T); \ | |
| 43 SKIP(); } | |
| 44 #define PUSH_STRING() { \ | |
| 45 printf("got string\n"); \ | |
| 46 size_t tokenSize = cursor-start; \ | |
| 47 fwrite(start, tokenSize, 1, stdout); \ | |
| 48 printf("\n"); \ | |
| 49 SKIP(); } | |
| 50 #define PUSH_NUMBER() { \ | |
| 51 printf("got number\n"); \ | |
| 52 size_t tokenSize = cursor-start; \ | |
| 53 fwrite(start, tokenSize, 1, stdout); \ | |
| 54 printf("\n"); \ | |
| 55 SKIP(); } | |
| 56 #define PUSH_IDENTIFIER() { \ | |
| 57 --cursor; \ | |
| 58 printf("got identifier: "); \ | |
| 59 size_t tokenSize = cursor-start; \ | |
| 60 fwrite(start, tokenSize, 1, stdout); \ | |
| 61 printf("\n"); \ | |
| 62 SKIP(); } | |
| 63 #define PUSH_LINE_TERMINATOR() { printf("got line terminator\n"); SKIP();} | |
| 64 #define TERMINATE_ILLEGAL() { return 1; } | |
| 65 | |
| 66 #define TOKENS \ | |
| 67 TOK(EOS) \ | |
| 68 TOK(LPAREN) \ | |
| 69 TOK(RPAREN) \ | |
| 70 TOK(LBRACK) \ | |
| 71 TOK(RBRACK) \ | |
| 72 TOK(LBRACE) \ | |
| 73 TOK(RBRACE) \ | |
| 74 TOK(COLON) \ | |
| 75 TOK(SEMICOLON) \ | |
| 76 TOK(PERIOD) \ | |
| 77 TOK(CONDITIONAL) \ | |
| 78 TOK(INC) \ | |
| 79 TOK(DEC) \ | |
| 80 TOK(ASSIGN) \ | |
| 81 TOK(ASSIGN_BIT_OR) \ | |
| 82 TOK(ASSIGN_BIT_XOR) \ | |
| 83 TOK(ASSIGN_BIT_AND) \ | |
| 84 TOK(ASSIGN_SHL) \ | |
| 85 TOK(ASSIGN_SAR) \ | |
| 86 TOK(ASSIGN_SHR) \ | |
| 87 TOK(ASSIGN_ADD) \ | |
| 88 TOK(ASSIGN_SUB) \ | |
| 89 TOK(ASSIGN_MUL) \ | |
| 90 TOK(ASSIGN_DIV) \ | |
| 91 TOK(ASSIGN_MOD) \ | |
| 92 TOK(COMMA) \ | |
| 93 TOK(OR) \ | |
| 94 TOK(AND) \ | |
| 95 TOK(BIT_OR) \ | |
| 96 TOK(BIT_XOR) \ | |
| 97 TOK(BIT_AND) \ | |
| 98 TOK(SHL) \ | |
| 99 TOK(SAR) \ | |
| 100 TOK(ADD) \ | |
| 101 TOK(SUB) \ | |
| 102 TOK(MUL) \ | |
| 103 TOK(DIV) \ | |
| 104 TOK(MOD) \ | |
| 105 TOK(EQ) \ | |
| 106 TOK(NE) \ | |
| 107 TOK(EQ_STRICT) \ | |
| 108 TOK(NE_STRICT) \ | |
| 109 TOK(LT) \ | |
| 110 TOK(GT) \ | |
| 111 TOK(LTE) \ | |
| 112 TOK(GTE) \ | |
| 113 TOK(NOT) \ | |
| 114 TOK(BIT_NOT) \ | |
| 115 | 49 |
| 116 // ---------------------------------------------------------------------- | 50 // ---------------------------------------------------------------------- |
| 117 static const char *tokenNames[] = | 51 #define PUSH_TOKEN(T) { send(T); SKIP(); } |
| 118 { | 52 #define PUSH_LINE_TERMINATOR() { SKIP(); } |
| 119 #define TOK(x) #x, | 53 #define TERMINATE_ILLEGAL() { return 1; } |
| 120 TOKENS | |
| 121 #undef TOK | |
| 122 }; | |
| 123 | 54 |
| 124 // ---------------------------------------------------------------------- | 55 // ---------------------------------------------------------------------- |
| 125 class PushScanner | 56 class PushScanner |
| 126 { | 57 { |
| 127 public: | |
| 128 | |
| 129 enum Token | |
| 130 { | |
| 131 #define TOK(x) x, | |
| 132 TOKENS | |
| 133 #undef TOK | |
| 134 }; | |
| 135 | 58 |
| 136 private: | 59 private: |
| 137 | 60 |
| 138 bool eof; | 61 bool eof; |
| 139 int32_t state; | 62 int32_t state; |
| 140 int32_t condition; | 63 int32_t condition; |
| 141 | 64 |
| 142 uint8_t *limit; | 65 uint8_t *limit; |
| 143 uint8_t *start; | 66 uint8_t *start; |
| 144 uint8_t *cursor; | 67 uint8_t *cursor; |
| 145 uint8_t *marker; | 68 uint8_t *marker; |
| 69 int real_start; | |
| 146 | 70 |
| 147 uint8_t *buffer; | 71 uint8_t *buffer; |
| 148 uint8_t *bufferEnd; | 72 uint8_t *bufferEnd; |
| 149 | 73 |
| 150 uint8_t yych; | 74 uint8_t yych; |
| 151 uint32_t yyaccept; | 75 uint32_t yyaccept; |
| 152 | 76 |
| 77 ExperimentalScanner* sink_; | |
| 78 | |
| 153 public: | 79 public: |
| 154 | 80 |
| 155 // ---------------------------------------------------------------------- | 81 // ---------------------------------------------------------------------- |
| 156 PushScanner() | 82 PushScanner(ExperimentalScanner* sink) |
| 157 { | 83 { |
| 158 limit = 0; | 84 limit = 0; |
| 159 start = 0; | 85 start = 0; |
| 160 state = -1; | 86 state = -1; |
| 161 condition = EConditionNormal; | 87 condition = EConditionNormal; |
| 162 cursor = 0; | 88 cursor = 0; |
| 163 marker = 0; | 89 marker = 0; |
| 164 buffer = 0; | 90 buffer = 0; |
| 165 eof = false; | 91 eof = false; |
| 166 bufferEnd = 0; | 92 bufferEnd = 0; |
| 93 sink_ = sink; | |
| 94 real_start = 0; | |
| 167 } | 95 } |
| 168 | 96 |
| 169 // ---------------------------------------------------------------------- | 97 // ---------------------------------------------------------------------- |
| 170 ~PushScanner() | 98 ~PushScanner() |
| 171 { | 99 { |
| 172 } | 100 } |
| 173 | 101 |
| 174 // ---------------------------------------------------------------------- | 102 // ---------------------------------------------------------------------- |
| 175 void send( | 103 void send(Token::Value token) { |
| 176 Token token | 104 int beg = (start - buffer) + real_start; |
| 177 ) | 105 int end = (cursor - buffer) + real_start; |
| 178 { | 106 if (FLAG_trace_lexer) { |
| 179 size_t tokenSize = cursor-start; | 107 printf("got %s at (%d, %d): ", Token::Name(token), beg, end); |
| 180 const char *tokenName = tokenNames[token]; | 108 for (uint8_t* s = start; s != cursor; s++) printf("%c", (char)*s); |
| 181 printf( | 109 printf(".\n"); |
| 182 "scanner is pushing out a token of type %d (%s)", | |
| 183 token, | |
| 184 tokenName | |
| 185 ); | |
| 186 | |
| 187 if(token==EOS) putchar('\n'); | |
| 188 else | |
| 189 { | |
| 190 size_t tokenNameSize = strlen(tokenNames[token]); | |
| 191 size_t padSize = 20-(20<tokenNameSize ? 20 : tokenNameSize); | |
| 192 for(size_t i=0; i<padSize; ++i) putchar(' '); | |
| 193 printf(" : ---->"); | |
| 194 | |
| 195 fwrite( | |
| 196 start, | |
| 197 tokenSize, | |
| 198 1, | |
| 199 stdout | |
| 200 ); | |
| 201 | |
| 202 printf("<----\n"); | |
| 203 } | 110 } |
| 111 sink_->Record(token, beg, end); | |
| 204 } | 112 } |
| 205 | 113 |
| 206 // ---------------------------------------------------------------------- | 114 // ---------------------------------------------------------------------- |
| 207 uint32_t push( | 115 uint32_t push(const void *input, int input_size) { |
| 208 const void *input, | 116 if (FLAG_trace_lexer) { |
| 209 ssize_t inputSize | 117 printf( |
| 210 ) | 118 "scanner is receiving a new data batch of length %d\n" |
| 211 { | 119 "scanner continues with saved state = %d\n", |
| 212 printf( | 120 input_size, |
| 213 "scanner is receiving a new data batch of length %ld\n" | 121 state |
| 214 "scanner continues with saved state = %d\n", | 122 ); |
| 215 inputSize, | 123 } |
| 216 state | |
| 217 ); | |
| 218 | 124 |
| 219 /* | 125 /* |
| 220 * Data source is signaling end of file when batch size | 126 * Data source is signaling end of file when batch size |
| 221 * is less than maxFill. This is slightly annoying because | 127 * is less than maxFill. This is slightly annoying because |
| 222 * maxFill is a value that can only be known after re2c does | 128 * maxFill is a value that can only be known after re2c does |
| 223 * its thing. Practically though, maxFill is never bigger than | 129 * its thing. Practically though, maxFill is never bigger than |
| 224 * the longest keyword, so given our grammar, 32 is a safe bet. | 130 * the longest keyword, so given our grammar, 32 is a safe bet. |
| 225 */ | 131 */ |
| 226 uint8_t null[64]; | 132 uint8_t null[64]; |
| 227 const ssize_t maxFill = 32; | 133 const int maxFill = 32; |
| 228 if(inputSize<maxFill) // FIXME: do something about this!!! | 134 if(input_size<maxFill) // FIXME: do something about this!!! |
| 229 { | 135 { |
| 230 eof = true; | 136 eof = true; |
| 231 input = null; | 137 input = null; |
| 232 inputSize = sizeof(null); | 138 input_size = sizeof(null); |
| 233 memset(null, 0, sizeof(null)); | 139 memset(null, 0, sizeof(null)); |
| 234 } | 140 } |
| 235 | 141 |
| 236 /* | 142 /* |
| 237 * When we get here, we have a partially | 143 * When we get here, we have a partially |
| 238 * consumed buffer which is in the following state: | 144 * consumed buffer which is in the following state: |
| 239 * last v alid char last valid buffer spot | 145 * last v alid char last valid buffer spot |
| 240 * v v | 146 * v v |
| 241 * +-------------------+-------------+---------------+-------------+---- ------------------+ | 147 * +-------------------+-------------+---------------+-------------+---- ------------------+ |
| 242 * ^ ^ ^ ^ ^ ^ | 148 * ^ ^ ^ ^ ^ ^ |
| 243 * buffer start marker cursor limit bufferEnd | 149 * buffer start marker cursor limit bufferEnd |
| 244 * | 150 * |
| 245 * We need to stretch the buffer and concatenate the new chunk of input to it | 151 * We need to stretch the buffer and concatenate the new chunk of input to it |
| 246 * | 152 * |
| 247 */ | 153 */ |
| 248 size_t used = limit-buffer; | 154 size_t used = limit-buffer; |
| 249 size_t needed = used+inputSize; | 155 size_t needed = used+input_size; |
| 250 size_t allocated = bufferEnd-buffer; | 156 size_t allocated = bufferEnd-buffer; |
| 251 if(allocated<needed) | 157 if(allocated<needed) |
| 252 { | 158 { |
| 253 size_t limitOffset = limit-buffer; | 159 size_t limitOffset = limit-buffer; |
| 254 size_t startOffset = start-buffer; | 160 size_t startOffset = start-buffer; |
| 255 size_t markerOffset = marker-buffer; | 161 size_t markerOffset = marker-buffer; |
| 256 size_t cursorOffset = cursor-buffer; | 162 size_t cursorOffset = cursor-buffer; |
| 257 | 163 |
| 258 buffer = (uint8_t*)realloc(buffer, needed); | 164 buffer = (uint8_t*)realloc(buffer, needed); |
| 259 bufferEnd = needed+buffer; | 165 bufferEnd = needed+buffer; |
| 260 | 166 |
| 261 marker = markerOffset + buffer; | 167 marker = markerOffset + buffer; |
| 262 cursor = cursorOffset + buffer; | 168 cursor = cursorOffset + buffer; |
| 263 start = buffer + startOffset; | 169 start = buffer + startOffset; |
| 264 limit = limitOffset + buffer; | 170 limit = limitOffset + buffer; |
| 265 } | 171 } |
| 266 memcpy(limit, input, inputSize); | 172 memcpy(limit, input, input_size); |
| 267 limit += inputSize; | 173 limit += input_size; |
| 268 | 174 |
| 269 // The scanner starts here | 175 // The scanner starts here |
| 270 #define YYLIMIT limit | 176 #define YYLIMIT limit |
| 271 #define YYCURSOR cursor | 177 #define YYCURSOR cursor |
| 272 #define YYMARKER marker | 178 #define YYMARKER marker |
| 273 #define YYCTYPE uint8_t | 179 #define YYCTYPE uint8_t |
| 274 | 180 |
| 275 #define SKIP() { start = cursor; YYSETCONDITION(EConditionNorma l); goto yy0; } | 181 #define SKIP() { start = cursor; YYSETCONDITION(EConditionNorma l); goto yy0; } |
| 276 #define YYFILL(n) { goto fill; } | 182 #define YYFILL(n) { goto fill; } |
| 277 | 183 |
| 278 #define YYGETSTATE() state | 184 #define YYGETSTATE() state |
| 279 #define YYSETSTATE(x) { state = (x); } | 185 #define YYSETSTATE(x) { state = (x); } |
| 280 | 186 |
| 281 #define YYGETCONDITION() condition | 187 #define YYGETCONDITION() condition |
| 282 #define YYSETCONDITION(x) { condition = (x); } | 188 #define YYSETCONDITION(x) { condition = (x); } |
| 283 | 189 |
| 284 start: | 190 start: |
| 285 | 191 |
| 286 printf("Starting a round; state: %d, condition: %d\n", state, condition) ; | 192 if (FLAG_trace_lexer) { |
| 193 printf("Starting a round; state: %d, condition: %d\n", state, condit ion); | |
| 194 } | |
| 287 | 195 |
| 288 /*!re2c | 196 /*!re2c |
| 289 re2c:indent:top = 1; | 197 re2c:indent:top = 1; |
| 290 re2c:yych:conversion = 0; | 198 re2c:yych:conversion = 0; |
| 291 re2c:condenumprefix = ECondition; | 199 re2c:condenumprefix = ECondition; |
| 292 re2c:define:YYCONDTYPE = Condition; | 200 re2c:define:YYCONDTYPE = Condition; |
| 293 | 201 |
| 294 eof = "\000"; | 202 eof = "\000"; |
| 295 any = [\000-\377]; | 203 any = [\000-\377]; |
| 296 whitespace_char = [ \t\v\f\r]; | 204 whitespace_char = [ \t\v\f\r]; |
| 297 whitespace = whitespace_char+; | 205 whitespace = whitespace_char+; |
| 298 identifier_start = [$_\\a-zA-z]; | 206 identifier_start = [$_\\a-zA-z]; |
| 299 identifier_char = [$_\\a-zA-z0-9]; | 207 identifier_char = [$_\\a-zA-z0-9]; |
| 300 line_terminator = [\n\r]+; | 208 line_terminator = [\n\r]+; |
| 301 digit = [0-9]; | 209 digit = [0-9]; |
| 302 hex_digit = [0-9a-fA-F]; | 210 hex_digit = [0-9a-fA-F]; |
| 303 maybe_exponent = ('e' [-+]? digit+)?; | 211 maybe_exponent = ('e' [-+]? digit+)?; |
| 304 | 212 |
| 305 <Normal> "|=" { PUSH_TOKEN(ASSIGN_BIT_OR); } | 213 <Normal> "|=" { PUSH_TOKEN(Token::ASSIGN_BIT_OR); } |
| 306 <Normal> "^=" { PUSH_TOKEN(ASSIGN_BIT_XOR); } | 214 <Normal> "^=" { PUSH_TOKEN(Token::ASSIGN_BIT_XOR); } |
| 307 <Normal> "&=" { PUSH_TOKEN(ASSIGN_BIT_AND); } | 215 <Normal> "&=" { PUSH_TOKEN(Token::ASSIGN_BIT_AND); } |
| 308 <Normal> "+=" { PUSH_TOKEN(ASSIGN_ADD); } | 216 <Normal> "+=" { PUSH_TOKEN(Token::ASSIGN_ADD); } |
| 309 <Normal> "-=" { PUSH_TOKEN(ASSIGN_SUB); } | 217 <Normal> "-=" { PUSH_TOKEN(Token::ASSIGN_SUB); } |
| 310 <Normal> "*=" { PUSH_TOKEN(ASSIGN_MUL); } | 218 <Normal> "*=" { PUSH_TOKEN(Token::ASSIGN_MUL); } |
| 311 <Normal> "/=" { PUSH_TOKEN(ASSIGN_DIV); } | 219 <Normal> "/=" { PUSH_TOKEN(Token::ASSIGN_DIV); } |
| 312 <Normal> "%=" { PUSH_TOKEN(ASSIGN_MOD); } | 220 <Normal> "%=" { PUSH_TOKEN(Token::ASSIGN_MOD); } |
| 313 | 221 |
| 314 <Normal> "===" { PUSH_TOKEN(EQ_STRICT); } | 222 <Normal> "===" { PUSH_TOKEN(Token::EQ_STRICT); } |
| 315 <Normal> "==" { PUSH_TOKEN(EQ); } | 223 <Normal> "==" { PUSH_TOKEN(Token::EQ); } |
| 316 <Normal> "=" { PUSH_TOKEN(ASSIGN); } | 224 <Normal> "=" { PUSH_TOKEN(Token::ASSIGN); } |
| 317 <Normal> "!==" { PUSH_TOKEN(NE_STRICT); } | 225 <Normal> "!==" { PUSH_TOKEN(Token::NE_STRICT); } |
| 318 <Normal> "!=" { PUSH_TOKEN(NE); } | 226 <Normal> "!=" { PUSH_TOKEN(Token::NE); } |
| 319 <Normal> "!" { PUSH_TOKEN(NOT); } | 227 <Normal> "!" { PUSH_TOKEN(Token::NOT); } |
| 320 | 228 |
| 321 <Normal> "//" :=> SingleLineComment | 229 <Normal> "//" :=> SingleLineComment |
| 322 <Normal> "/*" :=> MultiLineComment | 230 <Normal> "/*" :=> MultiLineComment |
| 323 <Normal> "<!--" :=> HtmlComment | 231 <Normal> "<!--" :=> HtmlComment |
| 324 | 232 |
| 325 <Normal> ">>>=" { PUSH_TOKEN(ASSIGN_SHR); } | 233 <Normal> ">>>=" { PUSH_TOKEN(Token::ASSIGN_SHR); } |
| 326 <Normal> "<<=" { PUSH_TOKEN(ASSIGN_SHL); } | 234 <Normal> "<<=" { PUSH_TOKEN(Token::ASSIGN_SHL); } |
| 327 <Normal> ">>=" { PUSH_TOKEN(ASSIGN_SAR); } | 235 <Normal> ">>=" { PUSH_TOKEN(Token::ASSIGN_SAR); } |
| 328 <Normal> "<=" { PUSH_TOKEN(LTE); } | 236 <Normal> "<=" { PUSH_TOKEN(Token::LTE); } |
| 329 <Normal> ">=" { PUSH_TOKEN(GTE); } | 237 <Normal> ">=" { PUSH_TOKEN(Token::GTE); } |
| 330 <Normal> "<<" { PUSH_TOKEN(SHL); } | 238 <Normal> "<<" { PUSH_TOKEN(Token::SHL); } |
| 331 <Normal> ">>" { PUSH_TOKEN(SAR); } | 239 <Normal> ">>" { PUSH_TOKEN(Token::SAR); } |
| 332 <Normal> "<" { PUSH_TOKEN(LT); } | 240 <Normal> "<" { PUSH_TOKEN(Token::LT); } |
| 333 <Normal> ">" { PUSH_TOKEN(GT); } | 241 <Normal> ">" { PUSH_TOKEN(Token::GT); } |
| 334 | 242 |
| 335 <Normal> '0x' hex_digit+ { PUSH_NUMBER(); } | 243 <Normal> '0x' hex_digit+ { PUSH_TOKEN(Token::NUMBER); } |
| 336 <Normal> "." digit+ maybe_exponent { PUSH_NUMBER(); } | 244 <Normal> "." digit+ maybe_exponent { PUSH_TOKEN(Token::NUMBER); } |
| 337 <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_NUMBER(); } | 245 <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_TOKEN(Token::NUMBER) ; } |
| 338 | 246 |
| 339 <Normal> "(" { PUSH_TOKEN(LPAREN); } | 247 <Normal> "(" { PUSH_TOKEN(Token::LPAREN); } |
| 340 <Normal> ")" { PUSH_TOKEN(RPAREN); } | 248 <Normal> ")" { PUSH_TOKEN(Token::RPAREN); } |
| 341 <Normal> "[" { PUSH_TOKEN(LBRACK); } | 249 <Normal> "[" { PUSH_TOKEN(Token::LBRACK); } |
| 342 <Normal> "]" { PUSH_TOKEN(RBRACK); } | 250 <Normal> "]" { PUSH_TOKEN(Token::RBRACK); } |
| 343 <Normal> "{" { PUSH_TOKEN(LBRACE); } | 251 <Normal> "{" { PUSH_TOKEN(Token::LBRACE); } |
| 344 <Normal> "}" { PUSH_TOKEN(RBRACE); } | 252 <Normal> "}" { PUSH_TOKEN(Token::RBRACE); } |
| 345 <Normal> ":" { PUSH_TOKEN(COLON); } | 253 <Normal> ":" { PUSH_TOKEN(Token::COLON); } |
| 346 <Normal> ";" { PUSH_TOKEN(SEMICOLON); } | 254 <Normal> ";" { PUSH_TOKEN(Token::SEMICOLON); } |
| 347 <Normal> "." { PUSH_TOKEN(PERIOD); } | 255 <Normal> "." { PUSH_TOKEN(Token::PERIOD); } |
| 348 <Normal> "?" { PUSH_TOKEN(CONDITIONAL); } | 256 <Normal> "?" { PUSH_TOKEN(Token::CONDITIONAL); } |
| 349 <Normal> "++" { PUSH_TOKEN(INC); } | 257 <Normal> "++" { PUSH_TOKEN(Token::INC); } |
| 350 <Normal> "--" { PUSH_TOKEN(DEC); } | 258 <Normal> "--" { PUSH_TOKEN(Token::DEC); } |
| 351 | 259 |
| 352 <Normal> "||" { PUSH_TOKEN(OR); } | 260 <Normal> "||" { PUSH_TOKEN(Token::OR); } |
| 353 <Normal> "&&" { PUSH_TOKEN(AND); } | 261 <Normal> "&&" { PUSH_TOKEN(Token::AND); } |
| 354 | 262 |
| 355 <Normal> "|" { PUSH_TOKEN(BIT_OR); } | 263 <Normal> "|" { PUSH_TOKEN(Token::BIT_OR); } |
| 356 <Normal> "^" { PUSH_TOKEN(BIT_XOR); } | 264 <Normal> "^" { PUSH_TOKEN(Token::BIT_XOR); } |
| 357 <Normal> "&" { PUSH_TOKEN(BIT_AND); } | 265 <Normal> "&" { PUSH_TOKEN(Token::BIT_AND); } |
| 358 <Normal> "+" { PUSH_TOKEN(ADD); } | 266 <Normal> "+" { PUSH_TOKEN(Token::ADD); } |
| 359 <Normal> "-" { PUSH_TOKEN(SUB); } | 267 <Normal> "-" { PUSH_TOKEN(Token::SUB); } |
| 360 <Normal> "*" { PUSH_TOKEN(MUL); } | 268 <Normal> "*" { PUSH_TOKEN(Token::MUL); } |
| 361 <Normal> "/" { PUSH_TOKEN(DIV); } | 269 <Normal> "/" { PUSH_TOKEN(Token::DIV); } |
| 362 <Normal> "%" { PUSH_TOKEN(MOD); } | 270 <Normal> "%" { PUSH_TOKEN(Token::MOD); } |
| 363 <Normal> "~" { PUSH_TOKEN(BIT_NOT); } | 271 <Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); } |
| 364 <Normal> "," { PUSH_TOKEN(COMMA); } | 272 <Normal> "," { PUSH_TOKEN(Token::COMMA); } |
| 365 | 273 |
| 366 <Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); } | 274 <Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); } |
| 367 <Normal> whitespace { SKIP(); } | 275 <Normal> whitespace { SKIP(); } |
| 368 | 276 |
| 369 <Normal> ["] :=> DoubleQuoteString | 277 <Normal> ["] :=> DoubleQuoteString |
| 370 <Normal> ['] :=> SingleQuoteString | 278 <Normal> ['] :=> SingleQuoteString |
| 371 | 279 |
| 372 <Normal> identifier_start :=> Identifier | 280 <Normal> identifier_start :=> Identifier |
| 373 | 281 |
| 374 <Normal> eof { PUSH_EOS(); return 1; } | 282 <Normal> eof { PUSH_TOKEN(Token::EOS); return 1; } |
| 375 <Normal> any { TERMINATE_ILLEGAL(); } | 283 <Normal> any { TERMINATE_ILLEGAL(); } |
| 376 | 284 |
| 377 <DoubleQuoteString> "\\\"" { goto yy0; } | 285 <DoubleQuoteString> "\\\"" { goto yy0; } |
| 378 <DoubleQuoteString> '"' { PUSH_STRING();} | 286 <DoubleQuoteString> '"' { PUSH_TOKEN(Token::STRING);} |
| 379 <DoubleQuoteString> any { goto yy0; } | 287 <DoubleQuoteString> any { goto yy0; } |
| 380 | 288 |
| 381 <SingleQuoteString> "\\'" { goto yy0; } | 289 <SingleQuoteString> "\\'" { goto yy0; } |
| 382 <SingleQuoteString> "'" { PUSH_STRING();} | 290 <SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING);} |
| 383 <SingleQuoteString> any { goto yy0; } | 291 <SingleQuoteString> any { goto yy0; } |
| 384 | 292 |
| 385 <Identifier> identifier_char+ { goto yy0; } | 293 <Identifier> identifier_char+ { goto yy0; } |
| 386 <Identifier> any { PUSH_IDENTIFIER(); } | 294 <Identifier> any { cursor--; PUSH_TOKEN(Token::IDENTIFIE R); } |
| 387 | 295 |
| 388 <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();} | 296 <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();} |
| 389 <SingleLineComment> eof { PUSH_LINE_TERMINATOR();} | 297 <SingleLineComment> eof { PUSH_LINE_TERMINATOR();} |
| 390 <SingleLineComment> any { goto yy0; } | 298 <SingleLineComment> any { goto yy0; } |
| 391 | 299 |
| 392 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();} | 300 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();} |
| 393 <MultiLineComment> eof { TERMINATE_ILLEGAL(); } | 301 <MultiLineComment> eof { TERMINATE_ILLEGAL(); } |
| 394 <MultiLineComment> any { goto yy0; } | 302 <MultiLineComment> any { goto yy0; } |
| 395 | 303 |
| 396 <HtmlComment> eof { TERMINATE_ILLEGAL(); } | 304 <HtmlComment> eof { TERMINATE_ILLEGAL(); } |
| 397 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();} | 305 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();} |
| 398 <HtmlComment> any { goto yy0; } | 306 <HtmlComment> any { goto yy0; } |
| 399 */ | 307 */ |
| 400 | 308 |
| 401 fill: | 309 fill: |
| 402 ssize_t unfinishedSize = cursor-start; | 310 int unfinishedSize = cursor-start; |
| 403 printf( | 311 if (FLAG_trace_lexer) { |
| 404 "scanner needs a refill. Exiting for now with:\n" | 312 printf( |
| 405 " saved fill state = %d\n" | 313 "scanner needs a refill. Exiting for now with:\n" |
| 406 " unfinished token size = %ld\n", | 314 " saved fill state = %d\n" |
| 407 state, | 315 " unfinished token size = %d\n", |
| 408 unfinishedSize | 316 state, |
| 409 ); | 317 unfinishedSize |
| 410 | 318 ); |
| 411 if(0<unfinishedSize && start<limit) | 319 if(0 < unfinishedSize && start < limit) { |
| 412 { | 320 printf(" unfinished token is: "); |
| 413 printf(" unfinished token is: "); | 321 fwrite(start, 1, cursor-start, stdout); |
| 414 fwrite(start, 1, cursor-start, stdout); | 322 putchar('\n'); |
| 323 } | |
| 415 putchar('\n'); | 324 putchar('\n'); |
| 416 } | 325 } |
| 417 putchar('\n'); | |
| 418 | 326 |
| 419 /* | 327 /* |
| 420 * Once we get here, we can get rid of | 328 * Once we get here, we can get rid of |
| 421 * everything before start and after limit. | 329 * everything before start and after limit. |
| 422 */ | 330 */ |
| 423 if(eof==true) goto start; | 331 if (eof == true) goto start; |
| 424 if(buffer<start) | 332 if (buffer < start) { |
| 425 { | 333 size_t start_offset = start - buffer; |
| 426 size_t startOffset = start-buffer; | 334 memmove(buffer, start, limit - start); |
| 427 memmove(buffer, start, limit-start); | 335 marker -= start_offset; |
| 428 marker -= startOffset; | 336 cursor -= start_offset; |
| 429 cursor -= startOffset; | 337 limit -= start_offset; |
| 430 limit -= startOffset; | 338 start -= start_offset; |
| 431 start -= startOffset; | 339 real_start += start_offset; |
| 432 } | 340 } |
| 433 return 0; | 341 return 0; |
| 434 } | 342 } |
| 435 }; | 343 }; |
| 436 | 344 |
| 437 // ---------------------------------------------------------------------- | |
| 438 int main( | |
| 439 int argc, | |
| 440 char **argv | |
| 441 ) | |
| 442 { | |
| 443 // Parse cmd line | |
| 444 int input = 0; | |
| 445 if(1<argc) | |
| 446 { | |
| 447 input = open(argv[1], O_RDONLY | O_BINARY); | |
| 448 if(input<0) | |
| 449 { | |
| 450 fprintf( | |
| 451 stderr, | |
| 452 "could not open file %s\n", | |
| 453 argv[1] | |
| 454 ); | |
| 455 exit(1); | |
| 456 } | |
| 457 } | |
| 458 | 345 |
| 459 /* | 346 ExperimentalScanner::ExperimentalScanner(const char* fname) : |
| 460 * Tokenize input file by pushing batches | 347 current_(0), fetched_(0) { |
| 461 * of data one by one into the scanner. | 348 file_ = fopen(fname, "rb"); |
| 462 */ | 349 scanner_ = new PushScanner(this); |
| 463 const size_t batchSize = 256; | 350 } |
| 464 uint8_t buffer[batchSize]; | |
| 465 PushScanner scanner; | |
| 466 while(1) | |
| 467 { | |
| 468 ssize_t n = read(input, buffer, batchSize); | |
| 469 if (scanner.push(buffer, n)) { | |
| 470 printf("Scanner: illegal data\n"); | |
| 471 return 1; | |
| 472 } | |
| 473 if(n<batchSize) break; | |
| 474 } | |
| 475 scanner.push(0, -1); | |
| 476 close(input); | |
| 477 | 351 |
| 478 // Done | 352 |
| 479 return 0; | 353 ExperimentalScanner::~ExperimentalScanner() { |
| 354 fclose(file_); | |
| 480 } | 355 } |
| 356 | |
| 357 | |
| 358 void ExperimentalScanner::FillTokens() { | |
| 359 current_ = 0; | |
| 360 fetched_ = 0; | |
| 361 uint8_t chars[BUFFER_SIZE]; | |
| 362 int n = static_cast<int>(fread(&chars, 1, BUFFER_SIZE, file_)); | |
| 363 scanner_->push(chars, n); | |
| 364 } | |
| 365 | |
| 366 | |
| 367 Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) { | |
| 368 if (current_ == fetched_) { | |
| 369 FillTokens(); | |
| 370 } | |
| 371 *beg_pos = beg_[current_]; | |
| 372 *end_pos = end_[current_]; | |
| 373 Token::Value res = token_[current_]; | |
| 374 if (token_[current_] != Token::Token::EOS && | |
| 375 token_[current_] != Token::ILLEGAL) current_++; | |
| 376 return res; | |
| 377 } | |
| 378 | |
| 379 | |
| 380 void ExperimentalScanner::Record(Token::Value token, int beg, int end) { | |
| 381 if (token == Token::EOS) end--; | |
| 382 token_[fetched_] = token; | |
| 383 beg_[fetched_] = beg; | |
| 384 end_[fetched_] = end; | |
| 385 fetched_++; | |
| 386 } | |
| OLD | NEW |