| OLD | NEW |
| 1 #include <fcntl.h> | 1 #include <fcntl.h> |
| 2 #include <stdio.h> | 2 #include <stdio.h> |
| 3 #include <stddef.h> | 3 #include <stddef.h> |
| 4 #include <stdlib.h> | 4 #include <stdlib.h> |
| 5 #include <string.h> | 5 #include <string.h> |
| 6 | 6 |
| 7 /* | 7 // TODO: |
| 8 TODO: | 8 // - SpiderMonkey compatibility hack: " --> something" is treated |
| 9 - SpiderMonkey compatibility hack: " --> something" is treated as a single li
ne comment. | 9 // as a single line comment. |
| 10 - An identifier cannot start immediately after a number. | 10 // - An identifier cannot start immediately after a number. |
| 11 | |
| 12 */ | |
| 13 | |
| 14 | |
| 15 | 11 |
| 16 enum Condition { | 12 enum Condition { |
| 17 EConditionNormal, | 13 kConditionNormal, |
| 18 EConditionDoubleQuoteString, | 14 kConditionDoubleQuoteString, |
| 19 EConditionSingleQuoteString, | 15 kConditionSingleQuoteString, |
| 20 EConditionIdentifier, | 16 kConditionIdentifier, |
| 21 EConditionSingleLineComment, | 17 kConditionSingleLineComment, |
| 22 EConditionMultiLineComment, | 18 kConditionMultiLineComment, |
| 23 EConditionHtmlComment | 19 kConditionHtmlComment |
| 24 }; | 20 }; |
| 25 | 21 |
| 26 #if defined(WIN32) | 22 #if defined(WIN32) |
| 27 | 23 |
| 28 typedef signed char int8_t; | 24 typedef signed char int8_t; |
| 29 typedef signed short int16_t; | 25 typedef signed short int16_t; |
| 30 typedef signed int int32_t; | 26 typedef signed int int32_t; |
| 31 | 27 |
| 32 typedef unsigned char uint8_t; | 28 typedef unsigned char uint8_t; |
| 33 typedef unsigned short uint16_t; | 29 typedef unsigned short uint16_t; |
| 34 typedef unsigned int uint32_t; | 30 typedef unsigned int uint32_t; |
| 35 | 31 |
| 36 #else | 32 #else |
| 37 | 33 |
| 38 #include <stdint.h> | 34 #include <stdint.h> |
| 39 #include <unistd.h> | 35 #include <unistd.h> |
| 40 | 36 |
| 41 #ifndef O_BINARY | 37 #ifndef O_BINARY |
| 42 #define O_BINARY 0 | 38 #define O_BINARY 0 |
| 43 #endif | |
| 44 | |
| 45 #endif | 39 #endif |
| 46 | 40 |
| 41 #endif // defined(WIN32) |
| 42 |
| 47 #include "lexer.h" | 43 #include "lexer.h" |
| 44 |
| 48 using namespace v8::internal; | 45 using namespace v8::internal; |
| 49 | 46 |
| 50 // ---------------------------------------------------------------------- | |
| 51 #define PUSH_TOKEN(T) { send(T); SKIP(); } | 47 #define PUSH_TOKEN(T) { send(T); SKIP(); } |
| 52 #define PUSH_LINE_TERMINATOR() { SKIP(); } | 48 #define PUSH_LINE_TERMINATOR() { SKIP(); } |
| 53 #define TERMINATE_ILLEGAL() { return 1; } | 49 #define TERMINATE_ILLEGAL() { return 1; } |
| 54 | 50 |
| 55 // ---------------------------------------------------------------------- | 51 class PushScanner { |
| 56 class PushScanner | |
| 57 { | |
| 58 | |
| 59 private: | |
| 60 | |
| 61 bool eof; | |
| 62 int32_t state; | |
| 63 int32_t condition; | |
| 64 | |
| 65 uint8_t *limit; | |
| 66 uint8_t *start; | |
| 67 uint8_t *cursor; | |
| 68 uint8_t *marker; | |
| 69 int real_start; | |
| 70 | |
| 71 uint8_t *buffer; | |
| 72 uint8_t *bufferEnd; | |
| 73 | |
| 74 uint8_t yych; | |
| 75 uint32_t yyaccept; | |
| 76 | |
| 77 ExperimentalScanner* sink_; | |
| 78 | 52 |
| 79 public: | 53 public: |
| 80 | 54 PushScanner(ExperimentalScanner* sink): |
| 81 // ---------------------------------------------------------------------- | 55 eof_(false), |
| 82 PushScanner(ExperimentalScanner* sink) | 56 state_(-1), |
| 83 { | 57 condition_(kConditionNormal), |
| 84 limit = 0; | 58 limit_(NULL), |
| 85 start = 0; | 59 start_(NULL), |
| 86 state = -1; | 60 cursor_(NULL), |
| 87 condition = EConditionNormal; | 61 marker_(NULL), |
| 88 cursor = 0; | 62 real_start_(0), |
| 89 marker = 0; | 63 buffer_(NULL), |
| 90 buffer = 0; | 64 buffer_end_(NULL), |
| 91 eof = false; | 65 yych(0), |
| 92 bufferEnd = 0; | 66 yyaccept(0), |
| 93 sink_ = sink; | 67 sink_(sink) { |
| 94 real_start = 0; | 68 } |
| 95 } | 69 |
| 96 | 70 ~PushScanner() { |
| 97 // ---------------------------------------------------------------------- | 71 } |
| 98 ~PushScanner() | 72 |
| 99 { | 73 void send(Token::Value token) { |
| 100 } | 74 int beg = (start_ - buffer_) + real_start_; |
| 101 | 75 int end = (cursor_ - buffer_) + real_start_; |
| 102 // ---------------------------------------------------------------------- | 76 if (FLAG_trace_lexer) { |
| 103 void send(Token::Value token) { | 77 printf("got %s at (%d, %d): ", Token::Name(token), beg, end); |
| 104 int beg = (start - buffer) + real_start; | 78 for (uint8_t* s = start_; s != cursor_; s++) printf("%c", (char)*s); |
| 105 int end = (cursor - buffer) + real_start; | 79 printf(".\n"); |
| 106 if (FLAG_trace_lexer) { | 80 } |
| 107 printf("got %s at (%d, %d): ", Token::Name(token), beg, end); | 81 sink_->Record(token, beg, end); |
| 108 for (uint8_t* s = start; s != cursor; s++) printf("%c", (char)*s); | 82 } |
| 109 printf(".\n"); | 83 |
| 110 } | 84 uint32_t push(const void *input, int input_size) { |
| 111 sink_->Record(token, beg, end); | 85 if (FLAG_trace_lexer) { |
| 112 } | 86 printf( |
| 113 | 87 "scanner is receiving a new data batch of length %d\n" |
| 114 // ---------------------------------------------------------------------- | 88 "scanner continues with saved state_ = %d\n", |
| 115 uint32_t push(const void *input, int input_size) { | 89 input_size, |
| 116 if (FLAG_trace_lexer) { | 90 state_ |
| 117 printf( | 91 ); |
| 118 "scanner is receiving a new data batch of length %d\n" | 92 } |
| 119 "scanner continues with saved state = %d\n", | 93 |
| 120 input_size, | 94 // Data source is signaling end of file when batch size |
| 121 state | 95 // is less than max_fill. This is slightly annoying because |
| 122 ); | 96 // max_fill is a value that can only be known after re2c does |
| 123 } | 97 // its thing. Practically though, max_fill is never bigger than |
| 124 | 98 // the longest keyword, so given our grammar, 32 is a safe bet. |
| 125 /* | 99 |
| 126 * Data source is signaling end of file when batch size | 100 uint8_t null[64]; |
| 127 * is less than maxFill. This is slightly annoying because | 101 const int max_fill = 32; |
| 128 * maxFill is a value that can only be known after re2c does | 102 if (input_size < max_fill) { // FIXME: do something about this!!! |
| 129 * its thing. Practically though, maxFill is never bigger than | 103 eof_ = true; |
| 130 * the longest keyword, so given our grammar, 32 is a safe bet. | 104 input = null; |
| 131 */ | 105 input_size = sizeof(null); |
| 132 uint8_t null[64]; | 106 memset(null, 0, sizeof(null)); |
| 133 const int maxFill = 32; | 107 } |
| 134 if(input_size<maxFill) // FIXME: do something about this!!! | 108 |
| 135 { | 109 |
| 136 eof = true; | 110 // When we get here, we have a partially |
| 137 input = null; | 111 // consumed buffer_ which is in the following state_: |
| 138 input_size = sizeof(null); | 112 // last valid char last valid buffer_ spo
t |
| 139 memset(null, 0, sizeof(null)); | 113 // v v |
| 140 } | 114 // +-------------------+-------------+---------------+-------------+-------
---------------+ |
| 141 | 115 // ^ ^ ^ ^ ^ ^ |
| 142 /* | 116 // buffer_ start_ marker_ cursor_ limit_ buffer_en
d_ |
| 143 * When we get here, we have a partially | 117 // |
| 144 * consumed buffer which is in the following state: | 118 // We need to stretch the buffer_ and concatenate the new chunk of input to
it |
| 145 * last v
alid char last valid buffer spot | 119 |
| 146 * v
v | 120 size_t used = limit_ - buffer_; |
| 147 * +-------------------+-------------+---------------+-------------+----
------------------+ | 121 size_t needed = used + input_size; |
| 148 * ^ ^ ^ ^ ^
^ | 122 size_t allocated = buffer_end_ - buffer_; |
| 149 * buffer start marker cursor limit
bufferEnd | 123 if(allocated < needed) { |
| 150 * | 124 size_t limit__offset = limit_ - buffer_; |
| 151 * We need to stretch the buffer and concatenate the new chunk of input
to it | 125 size_t start__offset = start_ - buffer_; |
| 152 * | 126 size_t marker__offset = marker_ - buffer_; |
| 153 */ | 127 size_t cursor__offset = cursor_ - buffer_; |
| 154 size_t used = limit-buffer; | 128 |
| 155 size_t needed = used+input_size; | 129 buffer_ = (uint8_t*)realloc(buffer_, needed); |
| 156 size_t allocated = bufferEnd-buffer; | 130 buffer_end_ = needed + buffer_; |
| 157 if(allocated<needed) | 131 |
| 158 { | 132 marker_ = marker__offset + buffer_; |
| 159 size_t limitOffset = limit-buffer; | 133 cursor_ = cursor__offset + buffer_; |
| 160 size_t startOffset = start-buffer; | 134 start_ = buffer_ + start__offset; |
| 161 size_t markerOffset = marker-buffer; | 135 limit_ = limit__offset + buffer_; |
| 162 size_t cursorOffset = cursor-buffer; | 136 } |
| 163 | 137 memcpy(limit_, input, input_size); |
| 164 buffer = (uint8_t*)realloc(buffer, needed); | 138 limit_ += input_size; |
| 165 bufferEnd = needed+buffer; | 139 |
| 166 | 140 // The scanner start_s here |
| 167 marker = markerOffset + buffer; | 141 #define YYLIMIT limit_ |
| 168 cursor = cursorOffset + buffer; | 142 #define YYCURSOR cursor_ |
| 169 start = buffer + startOffset; | 143 #define YYMARKER marker_ |
| 170 limit = limitOffset + buffer; | 144 #define YYCTYPE uint8_t |
| 171 } | 145 |
| 172 memcpy(limit, input, input_size); | 146 #define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal); got
o yy0; } |
| 173 limit += input_size; | 147 #define YYFILL(n) { goto fill; } |
| 174 | 148 |
| 175 // The scanner starts here | 149 #define YYGETSTATE() state_ |
| 176 #define YYLIMIT limit | 150 #define YYSETSTATE(x) { state_ = (x); } |
| 177 #define YYCURSOR cursor | 151 |
| 178 #define YYMARKER marker | 152 #define YYGETCONDITION() condition_ |
| 179 #define YYCTYPE uint8_t | 153 #define YYSETCONDITION(x) { condition_ = (x); } |
| 180 | 154 |
| 181 #define SKIP() { start = cursor; YYSETCONDITION(EConditionNorma
l); goto yy0; } | 155 start_: |
| 182 #define YYFILL(n) { goto fill; } | 156 if (FLAG_trace_lexer) { |
| 183 | 157 printf("Starting a round; state_: %d, condition_: %d\n", state_, condition
_); |
| 184 #define YYGETSTATE() state | 158 } |
| 185 #define YYSETSTATE(x) { state = (x); } | 159 |
| 186 | 160 /*!re2c |
| 187 #define YYGETCONDITION() condition | 161 re2c:indent:top = 1; |
| 188 #define YYSETCONDITION(x) { condition = (x); } | 162 re2c:yych:conversion = 0; |
| 189 | 163 re2c:condenumprefix = kCondition; |
| 190 start: | 164 re2c:define:YYCONDTYPE = Condition; |
| 191 | 165 |
| 192 if (FLAG_trace_lexer) { | 166 eof = "\000"; |
| 193 printf("Starting a round; state: %d, condition: %d\n", state, condit
ion); | 167 any = [\000-\377]; |
| 194 } | 168 whitespace_char = [ \t\v\f\r]; |
| 195 | 169 whitespace = whitespace_char+; |
| 196 /*!re2c | 170 identifier_start_ = [$_\\a-zA-z]; |
| 197 re2c:indent:top = 1; | 171 identifier_char = [$_\\a-zA-z0-9]; |
| 198 re2c:yych:conversion = 0; | 172 line_terminator = [\n\r]+; |
| 199 re2c:condenumprefix = ECondition; | 173 digit = [0-9]; |
| 200 re2c:define:YYCONDTYPE = Condition; | 174 hex_digit = [0-9a-fA-F]; |
| 201 | 175 maybe_exponent = ('e' [-+]? digit+)?; |
| 202 eof = "\000"; | 176 |
| 203 any = [\000-\377]; | 177 <Normal> "|=" { PUSH_TOKEN(Token::ASSIGN_BIT_OR); } |
| 204 whitespace_char = [ \t\v\f\r]; | 178 <Normal> "^=" { PUSH_TOKEN(Token::ASSIGN_BIT_XOR); } |
| 205 whitespace = whitespace_char+; | 179 <Normal> "&=" { PUSH_TOKEN(Token::ASSIGN_BIT_AND); } |
| 206 identifier_start = [$_\\a-zA-z]; | 180 <Normal> "+=" { PUSH_TOKEN(Token::ASSIGN_ADD); } |
| 207 identifier_char = [$_\\a-zA-z0-9]; | 181 <Normal> "-=" { PUSH_TOKEN(Token::ASSIGN_SUB); } |
| 208 line_terminator = [\n\r]+; | 182 <Normal> "*=" { PUSH_TOKEN(Token::ASSIGN_MUL); } |
| 209 digit = [0-9]; | 183 <Normal> "/=" { PUSH_TOKEN(Token::ASSIGN_DIV); } |
| 210 hex_digit = [0-9a-fA-F]; | 184 <Normal> "%=" { PUSH_TOKEN(Token::ASSIGN_MOD); } |
| 211 maybe_exponent = ('e' [-+]? digit+)?; | 185 |
| 212 | 186 <Normal> "===" { PUSH_TOKEN(Token::EQ_STRICT); } |
| 213 <Normal> "|=" { PUSH_TOKEN(Token::ASSIGN_BIT_OR); } | 187 <Normal> "==" { PUSH_TOKEN(Token::EQ); } |
| 214 <Normal> "^=" { PUSH_TOKEN(Token::ASSIGN_BIT_XOR); } | 188 <Normal> "=" { PUSH_TOKEN(Token::ASSIGN); } |
| 215 <Normal> "&=" { PUSH_TOKEN(Token::ASSIGN_BIT_AND); } | 189 <Normal> "!==" { PUSH_TOKEN(Token::NE_STRICT); } |
| 216 <Normal> "+=" { PUSH_TOKEN(Token::ASSIGN_ADD); } | 190 <Normal> "!=" { PUSH_TOKEN(Token::NE); } |
| 217 <Normal> "-=" { PUSH_TOKEN(Token::ASSIGN_SUB); } | 191 <Normal> "!" { PUSH_TOKEN(Token::NOT); } |
| 218 <Normal> "*=" { PUSH_TOKEN(Token::ASSIGN_MUL); } | 192 |
| 219 <Normal> "/=" { PUSH_TOKEN(Token::ASSIGN_DIV); } | 193 <Normal> "//" :=> SingleLineComment |
| 220 <Normal> "%=" { PUSH_TOKEN(Token::ASSIGN_MOD); } | 194 <Normal> "/*" :=> MultiLineComment |
| 221 | 195 <Normal> "<!--" :=> HtmlComment |
| 222 <Normal> "===" { PUSH_TOKEN(Token::EQ_STRICT); } | 196 |
| 223 <Normal> "==" { PUSH_TOKEN(Token::EQ); } | 197 <Normal> ">>>=" { PUSH_TOKEN(Token::ASSIGN_SHR); } |
| 224 <Normal> "=" { PUSH_TOKEN(Token::ASSIGN); } | 198 <Normal> "<<=" { PUSH_TOKEN(Token::ASSIGN_SHL); } |
| 225 <Normal> "!==" { PUSH_TOKEN(Token::NE_STRICT); } | 199 <Normal> ">>=" { PUSH_TOKEN(Token::ASSIGN_SAR); } |
| 226 <Normal> "!=" { PUSH_TOKEN(Token::NE); } | 200 <Normal> "<=" { PUSH_TOKEN(Token::LTE); } |
| 227 <Normal> "!" { PUSH_TOKEN(Token::NOT); } | 201 <Normal> ">=" { PUSH_TOKEN(Token::GTE); } |
| 228 | 202 <Normal> "<<" { PUSH_TOKEN(Token::SHL); } |
| 229 <Normal> "//" :=> SingleLineComment | 203 <Normal> ">>" { PUSH_TOKEN(Token::SAR); } |
| 230 <Normal> "/*" :=> MultiLineComment | 204 <Normal> "<" { PUSH_TOKEN(Token::LT); } |
| 231 <Normal> "<!--" :=> HtmlComment | 205 <Normal> ">" { PUSH_TOKEN(Token::GT); } |
| 232 | 206 |
| 233 <Normal> ">>>=" { PUSH_TOKEN(Token::ASSIGN_SHR); } | 207 <Normal> '0x' hex_digit+ { PUSH_TOKEN(Token::NUMBER); } |
| 234 <Normal> "<<=" { PUSH_TOKEN(Token::ASSIGN_SHL); } | 208 <Normal> "." digit+ maybe_exponent { PUSH_TOKEN(Token::NUMBER); } |
| 235 <Normal> ">>=" { PUSH_TOKEN(Token::ASSIGN_SAR); } | 209 <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_TOKEN(Token::NUMBER); } |
| 236 <Normal> "<=" { PUSH_TOKEN(Token::LTE); } | 210 |
| 237 <Normal> ">=" { PUSH_TOKEN(Token::GTE); } | 211 <Normal> "(" { PUSH_TOKEN(Token::LPAREN); } |
| 238 <Normal> "<<" { PUSH_TOKEN(Token::SHL); } | 212 <Normal> ")" { PUSH_TOKEN(Token::RPAREN); } |
| 239 <Normal> ">>" { PUSH_TOKEN(Token::SAR); } | 213 <Normal> "[" { PUSH_TOKEN(Token::LBRACK); } |
| 240 <Normal> "<" { PUSH_TOKEN(Token::LT); } | 214 <Normal> "]" { PUSH_TOKEN(Token::RBRACK); } |
| 241 <Normal> ">" { PUSH_TOKEN(Token::GT); } | 215 <Normal> "{" { PUSH_TOKEN(Token::LBRACE); } |
| 242 | 216 <Normal> "}" { PUSH_TOKEN(Token::RBRACE); } |
| 243 <Normal> '0x' hex_digit+ { PUSH_TOKEN(Token::NUMBER); } | 217 <Normal> ":" { PUSH_TOKEN(Token::COLON); } |
| 244 <Normal> "." digit+ maybe_exponent { PUSH_TOKEN(Token::NUMBER); } | 218 <Normal> ";" { PUSH_TOKEN(Token::SEMICOLON); } |
| 245 <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_TOKEN(Token::NUMBER)
; } | 219 <Normal> "." { PUSH_TOKEN(Token::PERIOD); } |
| 246 | 220 <Normal> "?" { PUSH_TOKEN(Token::CONDITIONAL); } |
| 247 <Normal> "(" { PUSH_TOKEN(Token::LPAREN); } | 221 <Normal> "++" { PUSH_TOKEN(Token::INC); } |
| 248 <Normal> ")" { PUSH_TOKEN(Token::RPAREN); } | 222 <Normal> "--" { PUSH_TOKEN(Token::DEC); } |
| 249 <Normal> "[" { PUSH_TOKEN(Token::LBRACK); } | 223 |
| 250 <Normal> "]" { PUSH_TOKEN(Token::RBRACK); } | 224 <Normal> "||" { PUSH_TOKEN(Token::OR); } |
| 251 <Normal> "{" { PUSH_TOKEN(Token::LBRACE); } | 225 <Normal> "&&" { PUSH_TOKEN(Token::AND); } |
| 252 <Normal> "}" { PUSH_TOKEN(Token::RBRACE); } | 226 |
| 253 <Normal> ":" { PUSH_TOKEN(Token::COLON); } | 227 <Normal> "|" { PUSH_TOKEN(Token::BIT_OR); } |
| 254 <Normal> ";" { PUSH_TOKEN(Token::SEMICOLON); } | 228 <Normal> "^" { PUSH_TOKEN(Token::BIT_XOR); } |
| 255 <Normal> "." { PUSH_TOKEN(Token::PERIOD); } | 229 <Normal> "&" { PUSH_TOKEN(Token::BIT_AND); } |
| 256 <Normal> "?" { PUSH_TOKEN(Token::CONDITIONAL); } | 230 <Normal> "+" { PUSH_TOKEN(Token::ADD); } |
| 257 <Normal> "++" { PUSH_TOKEN(Token::INC); } | 231 <Normal> "-" { PUSH_TOKEN(Token::SUB); } |
| 258 <Normal> "--" { PUSH_TOKEN(Token::DEC); } | 232 <Normal> "*" { PUSH_TOKEN(Token::MUL); } |
| 259 | 233 <Normal> "/" { PUSH_TOKEN(Token::DIV); } |
| 260 <Normal> "||" { PUSH_TOKEN(Token::OR); } | 234 <Normal> "%" { PUSH_TOKEN(Token::MOD); } |
| 261 <Normal> "&&" { PUSH_TOKEN(Token::AND); } | 235 <Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); } |
| 262 | 236 <Normal> "," { PUSH_TOKEN(Token::COMMA); } |
| 263 <Normal> "|" { PUSH_TOKEN(Token::BIT_OR); } | 237 |
| 264 <Normal> "^" { PUSH_TOKEN(Token::BIT_XOR); } | 238 <Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); } |
| 265 <Normal> "&" { PUSH_TOKEN(Token::BIT_AND); } | 239 <Normal> whitespace { SKIP(); } |
| 266 <Normal> "+" { PUSH_TOKEN(Token::ADD); } | 240 |
| 267 <Normal> "-" { PUSH_TOKEN(Token::SUB); } | 241 <Normal> ["] :=> DoubleQuoteString |
| 268 <Normal> "*" { PUSH_TOKEN(Token::MUL); } | 242 <Normal> ['] :=> SingleQuoteString |
| 269 <Normal> "/" { PUSH_TOKEN(Token::DIV); } | 243 |
| 270 <Normal> "%" { PUSH_TOKEN(Token::MOD); } | 244 <Normal> identifier_start_ :=> Identifier |
| 271 <Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); } | 245 |
| 272 <Normal> "," { PUSH_TOKEN(Token::COMMA); } | 246 <Normal> eof { PUSH_TOKEN(Token::EOS); return 1; } |
| 273 | 247 <Normal> any { TERMINATE_ILLEGAL(); } |
| 274 <Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); } | 248 |
| 275 <Normal> whitespace { SKIP(); } | 249 <DoubleQuoteString> "\\\"" { goto yy0; } |
| 276 | 250 <DoubleQuoteString> '"' { PUSH_TOKEN(Token::STRING);} |
| 277 <Normal> ["] :=> DoubleQuoteString | 251 <DoubleQuoteString> any { goto yy0; } |
| 278 <Normal> ['] :=> SingleQuoteString | 252 |
| 279 | 253 <SingleQuoteString> "\\'" { goto yy0; } |
| 280 <Normal> identifier_start :=> Identifier | 254 <SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING);} |
| 281 | 255 <SingleQuoteString> any { goto yy0; } |
| 282 <Normal> eof { PUSH_TOKEN(Token::EOS); return 1; } | 256 |
| 283 <Normal> any { TERMINATE_ILLEGAL(); } | 257 <Identifier> identifier_char+ { goto yy0; } |
| 284 | 258 <Identifier> any { cursor_--; PUSH_TOKEN(Token::IDENTIFIER); } |
| 285 <DoubleQuoteString> "\\\"" { goto yy0; } | 259 |
| 286 <DoubleQuoteString> '"' { PUSH_TOKEN(Token::STRING);} | 260 <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();} |
| 287 <DoubleQuoteString> any { goto yy0; } | 261 <SingleLineComment> eof { PUSH_LINE_TERMINATOR();} |
| 288 | 262 <SingleLineComment> any { goto yy0; } |
| 289 <SingleQuoteString> "\\'" { goto yy0; } | 263 |
| 290 <SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING);} | 264 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();} |
| 291 <SingleQuoteString> any { goto yy0; } | 265 <MultiLineComment> eof { TERMINATE_ILLEGAL(); } |
| 292 | 266 <MultiLineComment> any { goto yy0; } |
| 293 <Identifier> identifier_char+ { goto yy0; } | 267 |
| 294 <Identifier> any { cursor--; PUSH_TOKEN(Token::IDENTIFIE
R); } | 268 <HtmlComment> eof { TERMINATE_ILLEGAL(); } |
| 295 | 269 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();} |
| 296 <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();} | 270 <HtmlComment> any { goto yy0; } |
| 297 <SingleLineComment> eof { PUSH_LINE_TERMINATOR();} | 271 */ |
| 298 <SingleLineComment> any { goto yy0; } | 272 |
| 299 | 273 fill: |
| 300 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();} | 274 int unfinished_size = cursor_-start_; |
| 301 <MultiLineComment> eof { TERMINATE_ILLEGAL(); } | 275 if (FLAG_trace_lexer) { |
| 302 <MultiLineComment> any { goto yy0; } | 276 printf( |
| 303 | 277 "scanner needs a refill. Exiting for now with:\n" |
| 304 <HtmlComment> eof { TERMINATE_ILLEGAL(); } | 278 " saved fill state_ = %d\n" |
| 305 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();} | 279 " unfinished token size = %d\n", |
| 306 <HtmlComment> any { goto yy0; } | 280 state_, |
| 307 */ | 281 unfinished_size |
| 308 | 282 ); |
| 309 fill: | 283 if(0 < unfinished_size && start_ < limit_) { |
| 310 int unfinishedSize = cursor-start; | 284 printf(" unfinished token is: "); |
| 311 if (FLAG_trace_lexer) { | 285 fwrite(start_, 1, cursor_-start_, stdout); |
| 312 printf( | 286 putchar('\n'); |
| 313 "scanner needs a refill. Exiting for now with:\n" | 287 } |
| 314 " saved fill state = %d\n" | 288 putchar('\n'); |
| 315 " unfinished token size = %d\n", | 289 } |
| 316 state, | 290 |
| 317 unfinishedSize | 291 if (eof_) goto start_; |
| 318 ); | 292 |
| 319 if(0 < unfinishedSize && start < limit) { | 293 // Once we get here, we can get rid of |
| 320 printf(" unfinished token is: "); | 294 // everything before start_ and after limit_. |
| 321 fwrite(start, 1, cursor-start, stdout); | 295 |
| 322 putchar('\n'); | 296 if (buffer_ < start_) { |
| 323 } | 297 size_t start__offset = start_ - buffer_; |
| 324 putchar('\n'); | 298 memmove(buffer_, start_, limit_ - start_); |
| 325 } | 299 marker_ -= start__offset; |
| 326 | 300 cursor_ -= start__offset; |
| 327 /* | 301 limit_ -= start__offset; |
| 328 * Once we get here, we can get rid of | 302 start_ -= start__offset; |
| 329 * everything before start and after limit. | 303 real_start_ += start__offset; |
| 330 */ | 304 } |
| 331 if (eof == true) goto start; | 305 return 0; |
| 332 if (buffer < start) { | 306 } |
| 333 size_t start_offset = start - buffer; | 307 |
| 334 memmove(buffer, start, limit - start); | 308 private: |
| 335 marker -= start_offset; | 309 bool eof_; |
| 336 cursor -= start_offset; | 310 int32_t state_; |
| 337 limit -= start_offset; | 311 int32_t condition_; |
| 338 start -= start_offset; | 312 |
| 339 real_start += start_offset; | 313 uint8_t* limit_; |
| 340 } | 314 uint8_t* start_; |
| 341 return 0; | 315 uint8_t* cursor_; |
| 342 } | 316 uint8_t* marker_; |
| 317 int real_start_; |
| 318 |
| 319 uint8_t* buffer_; |
| 320 uint8_t* buffer_end_; |
| 321 |
| 322 uint8_t yych; |
| 323 uint32_t yyaccept; |
| 324 |
| 325 ExperimentalScanner* sink_; |
| 343 }; | 326 }; |
| 344 | 327 |
| 345 | 328 |
| 346 ExperimentalScanner::ExperimentalScanner(const char* fname) : | 329 ExperimentalScanner::ExperimentalScanner(const char* fname) : |
| 347 current_(0), fetched_(0) { | 330 current_(0), fetched_(0) { |
| 348 file_ = fopen(fname, "rb"); | 331 file_ = fopen(fname, "rb"); |
| 349 scanner_ = new PushScanner(this); | 332 scanner_ = new PushScanner(this); |
| 350 } | 333 } |
| 351 | 334 |
| 352 | 335 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 365 | 348 |
| 366 | 349 |
| 367 Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) { | 350 Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) { |
| 368 if (current_ == fetched_) { | 351 if (current_ == fetched_) { |
| 369 FillTokens(); | 352 FillTokens(); |
| 370 } | 353 } |
| 371 *beg_pos = beg_[current_]; | 354 *beg_pos = beg_[current_]; |
| 372 *end_pos = end_[current_]; | 355 *end_pos = end_[current_]; |
| 373 Token::Value res = token_[current_]; | 356 Token::Value res = token_[current_]; |
| 374 if (token_[current_] != Token::Token::EOS && | 357 if (token_[current_] != Token::Token::EOS && |
| 375 token_[current_] != Token::ILLEGAL) current_++; | 358 token_[current_] != Token::ILLEGAL) { |
| 359 current_++; |
| 360 } |
| 376 return res; | 361 return res; |
| 377 } | 362 } |
| 378 | 363 |
| 379 | 364 |
| 380 void ExperimentalScanner::Record(Token::Value token, int beg, int end) { | 365 void ExperimentalScanner::Record(Token::Value token, int beg, int end) { |
| 381 if (token == Token::EOS) end--; | 366 if (token == Token::EOS) end--; |
| 382 token_[fetched_] = token; | 367 token_[fetched_] = token; |
| 383 beg_[fetched_] = beg; | 368 beg_[fetched_] = beg; |
| 384 end_[fetched_] = end; | 369 end_[fetched_] = end; |
| 385 fetched_++; | 370 fetched_++; |
| 386 } | 371 } |
| OLD | NEW |