Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(96)

Unified Diff: src/lexer/lexer.re

Issue 26764004: Invoke generated lexer along with baseline lexer to compare results. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/lexer/lexer.gyp ('k') | src/lexer/lexer-shell.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/lexer/lexer.re
diff --git a/src/lexer/lexer.re b/src/lexer/lexer.re
index 370de928ae7e3dc92aa924eac4674741a3a4a020..d2a8603b6107e7d5e61af0d1bb83d71b0d1ce10f 100644
--- a/src/lexer/lexer.re
+++ b/src/lexer/lexer.re
@@ -4,7 +4,6 @@
#include <stdlib.h>
#include <string.h>
-
/*
TODO:
- SpiderMonkey compatibility hack: " --> something" is treated as a single line comment.
@@ -13,7 +12,16 @@ TODO:
*/
-/*!types:re2c */
+
+enum Condition {
+ EConditionNormal,
+ EConditionDoubleQuoteString,
+ EConditionSingleQuoteString,
+ EConditionIdentifier,
+ EConditionSingleLineComment,
+ EConditionMultiLineComment,
+ EConditionHtmlComment
ulan 2013/10/14 14:28:52 Had to move it here from the generated file, becau
+};
#if defined(WIN32)
@@ -36,102 +44,17 @@ TODO:
#endif
-// ----------------------------------------------------------------------
-#define PUSH_EOS(T) { printf("got eos\n"); }
-#define PUSH_TOKEN(T) { \
- printf("got token %s (%d)\n", tokenNames[T], T); \
- SKIP(); }
-#define PUSH_STRING() { \
- printf("got string\n"); \
- size_t tokenSize = cursor-start; \
- fwrite(start, tokenSize, 1, stdout); \
- printf("\n"); \
- SKIP(); }
-#define PUSH_NUMBER() { \
- printf("got number\n"); \
- size_t tokenSize = cursor-start; \
- fwrite(start, tokenSize, 1, stdout); \
- printf("\n"); \
- SKIP(); }
-#define PUSH_IDENTIFIER() { \
- --cursor; \
- printf("got identifier: "); \
- size_t tokenSize = cursor-start; \
- fwrite(start, tokenSize, 1, stdout); \
- printf("\n"); \
- SKIP(); }
-#define PUSH_LINE_TERMINATOR() { printf("got line terminator\n"); SKIP();}
-#define TERMINATE_ILLEGAL() { return 1; }
-
-#define TOKENS \
- TOK(EOS) \
- TOK(LPAREN) \
- TOK(RPAREN) \
- TOK(LBRACK) \
- TOK(RBRACK) \
- TOK(LBRACE) \
- TOK(RBRACE) \
- TOK(COLON) \
- TOK(SEMICOLON) \
- TOK(PERIOD) \
- TOK(CONDITIONAL) \
- TOK(INC) \
- TOK(DEC) \
- TOK(ASSIGN) \
- TOK(ASSIGN_BIT_OR) \
- TOK(ASSIGN_BIT_XOR) \
- TOK(ASSIGN_BIT_AND) \
- TOK(ASSIGN_SHL) \
- TOK(ASSIGN_SAR) \
- TOK(ASSIGN_SHR) \
- TOK(ASSIGN_ADD) \
- TOK(ASSIGN_SUB) \
- TOK(ASSIGN_MUL) \
- TOK(ASSIGN_DIV) \
- TOK(ASSIGN_MOD) \
- TOK(COMMA) \
- TOK(OR) \
- TOK(AND) \
- TOK(BIT_OR) \
- TOK(BIT_XOR) \
- TOK(BIT_AND) \
- TOK(SHL) \
- TOK(SAR) \
- TOK(ADD) \
- TOK(SUB) \
- TOK(MUL) \
- TOK(DIV) \
- TOK(MOD) \
- TOK(EQ) \
- TOK(NE) \
- TOK(EQ_STRICT) \
- TOK(NE_STRICT) \
- TOK(LT) \
- TOK(GT) \
- TOK(LTE) \
- TOK(GTE) \
- TOK(NOT) \
- TOK(BIT_NOT) \
+#include "lexer.h"
+using namespace v8::internal;
// ----------------------------------------------------------------------
-static const char *tokenNames[] =
-{
- #define TOK(x) #x,
- TOKENS
- #undef TOK
-};
+#define PUSH_TOKEN(T) { send(T); SKIP(); }
+#define PUSH_LINE_TERMINATOR() { SKIP(); }
+#define TERMINATE_ILLEGAL() { return 1; }
// ----------------------------------------------------------------------
class PushScanner
{
-public:
-
- enum Token
- {
- #define TOK(x) x,
- TOKENS
- #undef TOK
- };
private:
@@ -143,6 +66,7 @@ private:
uint8_t *start;
uint8_t *cursor;
uint8_t *marker;
+ int real_start;
uint8_t *buffer;
uint8_t *bufferEnd;
@@ -150,10 +74,12 @@ private:
uint8_t yych;
uint32_t yyaccept;
+ ExperimentalScanner* sink_;
+
public:
// ----------------------------------------------------------------------
- PushScanner()
+ PushScanner(ExperimentalScanner* sink)
{
limit = 0;
start = 0;
@@ -164,6 +90,8 @@ public:
buffer = 0;
eof = false;
bufferEnd = 0;
+ sink_ = sink;
+ real_start = 0;
}
// ----------------------------------------------------------------------
@@ -172,49 +100,27 @@ public:
}
// ----------------------------------------------------------------------
- void send(
- Token token
- )
- {
- size_t tokenSize = cursor-start;
- const char *tokenName = tokenNames[token];
- printf(
- "scanner is pushing out a token of type %d (%s)",
- token,
- tokenName
- );
-
- if(token==EOS) putchar('\n');
- else
- {
- size_t tokenNameSize = strlen(tokenNames[token]);
- size_t padSize = 20-(20<tokenNameSize ? 20 : tokenNameSize);
- for(size_t i=0; i<padSize; ++i) putchar(' ');
- printf(" : ---->");
-
- fwrite(
- start,
- tokenSize,
- 1,
- stdout
- );
-
- printf("<----\n");
+ void send(Token::Value token) {
+ int beg = (start - buffer) + real_start;
+ int end = (cursor - buffer) + real_start;
+ if (FLAG_trace_lexer) {
+ printf("got %s at (%d, %d): ", Token::Name(token), beg, end);
+ for (uint8_t* s = start; s != cursor; s++) printf("%c", (char)*s);
+ printf(".\n");
}
+ sink_->Record(token, beg, end);
}
// ----------------------------------------------------------------------
- uint32_t push(
- const void *input,
- ssize_t inputSize
- )
- {
- printf(
- "scanner is receiving a new data batch of length %ld\n"
- "scanner continues with saved state = %d\n",
- inputSize,
- state
- );
+ uint32_t push(const void *input, int input_size) {
+ if (FLAG_trace_lexer) {
+ printf(
+ "scanner is receiving a new data batch of length %d\n"
+ "scanner continues with saved state = %d\n",
+ input_size,
+ state
+ );
+ }
/*
* Data source is signaling end of file when batch size
@@ -224,12 +130,12 @@ public:
* the longest keyword, so given our grammar, 32 is a safe bet.
*/
uint8_t null[64];
- const ssize_t maxFill = 32;
- if(inputSize<maxFill) // FIXME: do something about this!!!
+ const int maxFill = 32;
+ if(input_size<maxFill) // FIXME: do something about this!!!
{
eof = true;
input = null;
- inputSize = sizeof(null);
+ input_size = sizeof(null);
memset(null, 0, sizeof(null));
}
@@ -246,7 +152,7 @@ public:
*
*/
size_t used = limit-buffer;
- size_t needed = used+inputSize;
+ size_t needed = used+input_size;
size_t allocated = bufferEnd-buffer;
if(allocated<needed)
{
@@ -263,8 +169,8 @@ public:
start = buffer + startOffset;
limit = limitOffset + buffer;
}
- memcpy(limit, input, inputSize);
- limit += inputSize;
+ memcpy(limit, input, input_size);
+ limit += input_size;
// The scanner starts here
#define YYLIMIT limit
@@ -283,7 +189,9 @@ public:
start:
- printf("Starting a round; state: %d, condition: %d\n", state, condition);
+ if (FLAG_trace_lexer) {
+ printf("Starting a round; state: %d, condition: %d\n", state, condition);
+ }
/*!re2c
re2c:indent:top = 1;
@@ -302,66 +210,66 @@ public:
hex_digit = [0-9a-fA-F];
maybe_exponent = ('e' [-+]? digit+)?;
- <Normal> "|=" { PUSH_TOKEN(ASSIGN_BIT_OR); }
- <Normal> "^=" { PUSH_TOKEN(ASSIGN_BIT_XOR); }
- <Normal> "&=" { PUSH_TOKEN(ASSIGN_BIT_AND); }
- <Normal> "+=" { PUSH_TOKEN(ASSIGN_ADD); }
- <Normal> "-=" { PUSH_TOKEN(ASSIGN_SUB); }
- <Normal> "*=" { PUSH_TOKEN(ASSIGN_MUL); }
- <Normal> "/=" { PUSH_TOKEN(ASSIGN_DIV); }
- <Normal> "%=" { PUSH_TOKEN(ASSIGN_MOD); }
-
- <Normal> "===" { PUSH_TOKEN(EQ_STRICT); }
- <Normal> "==" { PUSH_TOKEN(EQ); }
- <Normal> "=" { PUSH_TOKEN(ASSIGN); }
- <Normal> "!==" { PUSH_TOKEN(NE_STRICT); }
- <Normal> "!=" { PUSH_TOKEN(NE); }
- <Normal> "!" { PUSH_TOKEN(NOT); }
+ <Normal> "|=" { PUSH_TOKEN(Token::ASSIGN_BIT_OR); }
+ <Normal> "^=" { PUSH_TOKEN(Token::ASSIGN_BIT_XOR); }
+ <Normal> "&=" { PUSH_TOKEN(Token::ASSIGN_BIT_AND); }
+ <Normal> "+=" { PUSH_TOKEN(Token::ASSIGN_ADD); }
+ <Normal> "-=" { PUSH_TOKEN(Token::ASSIGN_SUB); }
+ <Normal> "*=" { PUSH_TOKEN(Token::ASSIGN_MUL); }
+ <Normal> "/=" { PUSH_TOKEN(Token::ASSIGN_DIV); }
+ <Normal> "%=" { PUSH_TOKEN(Token::ASSIGN_MOD); }
+
+ <Normal> "===" { PUSH_TOKEN(Token::EQ_STRICT); }
+ <Normal> "==" { PUSH_TOKEN(Token::EQ); }
+ <Normal> "=" { PUSH_TOKEN(Token::ASSIGN); }
+ <Normal> "!==" { PUSH_TOKEN(Token::NE_STRICT); }
+ <Normal> "!=" { PUSH_TOKEN(Token::NE); }
+ <Normal> "!" { PUSH_TOKEN(Token::NOT); }
<Normal> "//" :=> SingleLineComment
<Normal> "/*" :=> MultiLineComment
<Normal> "<!--" :=> HtmlComment
- <Normal> ">>>=" { PUSH_TOKEN(ASSIGN_SHR); }
- <Normal> "<<=" { PUSH_TOKEN(ASSIGN_SHL); }
- <Normal> ">>=" { PUSH_TOKEN(ASSIGN_SAR); }
- <Normal> "<=" { PUSH_TOKEN(LTE); }
- <Normal> ">=" { PUSH_TOKEN(GTE); }
- <Normal> "<<" { PUSH_TOKEN(SHL); }
- <Normal> ">>" { PUSH_TOKEN(SAR); }
- <Normal> "<" { PUSH_TOKEN(LT); }
- <Normal> ">" { PUSH_TOKEN(GT); }
-
- <Normal> '0x' hex_digit+ { PUSH_NUMBER(); }
- <Normal> "." digit+ maybe_exponent { PUSH_NUMBER(); }
- <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_NUMBER(); }
-
- <Normal> "(" { PUSH_TOKEN(LPAREN); }
- <Normal> ")" { PUSH_TOKEN(RPAREN); }
- <Normal> "[" { PUSH_TOKEN(LBRACK); }
- <Normal> "]" { PUSH_TOKEN(RBRACK); }
- <Normal> "{" { PUSH_TOKEN(LBRACE); }
- <Normal> "}" { PUSH_TOKEN(RBRACE); }
- <Normal> ":" { PUSH_TOKEN(COLON); }
- <Normal> ";" { PUSH_TOKEN(SEMICOLON); }
- <Normal> "." { PUSH_TOKEN(PERIOD); }
- <Normal> "?" { PUSH_TOKEN(CONDITIONAL); }
- <Normal> "++" { PUSH_TOKEN(INC); }
- <Normal> "--" { PUSH_TOKEN(DEC); }
-
- <Normal> "||" { PUSH_TOKEN(OR); }
- <Normal> "&&" { PUSH_TOKEN(AND); }
-
- <Normal> "|" { PUSH_TOKEN(BIT_OR); }
- <Normal> "^" { PUSH_TOKEN(BIT_XOR); }
- <Normal> "&" { PUSH_TOKEN(BIT_AND); }
- <Normal> "+" { PUSH_TOKEN(ADD); }
- <Normal> "-" { PUSH_TOKEN(SUB); }
- <Normal> "*" { PUSH_TOKEN(MUL); }
- <Normal> "/" { PUSH_TOKEN(DIV); }
- <Normal> "%" { PUSH_TOKEN(MOD); }
- <Normal> "~" { PUSH_TOKEN(BIT_NOT); }
- <Normal> "," { PUSH_TOKEN(COMMA); }
+ <Normal> ">>>=" { PUSH_TOKEN(Token::ASSIGN_SHR); }
+ <Normal> "<<=" { PUSH_TOKEN(Token::ASSIGN_SHL); }
+ <Normal> ">>=" { PUSH_TOKEN(Token::ASSIGN_SAR); }
+ <Normal> "<=" { PUSH_TOKEN(Token::LTE); }
+ <Normal> ">=" { PUSH_TOKEN(Token::GTE); }
+ <Normal> "<<" { PUSH_TOKEN(Token::SHL); }
+ <Normal> ">>" { PUSH_TOKEN(Token::SAR); }
+ <Normal> "<" { PUSH_TOKEN(Token::LT); }
+ <Normal> ">" { PUSH_TOKEN(Token::GT); }
+
+ <Normal> '0x' hex_digit+ { PUSH_TOKEN(Token::NUMBER); }
+ <Normal> "." digit+ maybe_exponent { PUSH_TOKEN(Token::NUMBER); }
+ <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_TOKEN(Token::NUMBER); }
+
+ <Normal> "(" { PUSH_TOKEN(Token::LPAREN); }
+ <Normal> ")" { PUSH_TOKEN(Token::RPAREN); }
+ <Normal> "[" { PUSH_TOKEN(Token::LBRACK); }
+ <Normal> "]" { PUSH_TOKEN(Token::RBRACK); }
+ <Normal> "{" { PUSH_TOKEN(Token::LBRACE); }
+ <Normal> "}" { PUSH_TOKEN(Token::RBRACE); }
+ <Normal> ":" { PUSH_TOKEN(Token::COLON); }
+ <Normal> ";" { PUSH_TOKEN(Token::SEMICOLON); }
+ <Normal> "." { PUSH_TOKEN(Token::PERIOD); }
+ <Normal> "?" { PUSH_TOKEN(Token::CONDITIONAL); }
+ <Normal> "++" { PUSH_TOKEN(Token::INC); }
+ <Normal> "--" { PUSH_TOKEN(Token::DEC); }
+
+ <Normal> "||" { PUSH_TOKEN(Token::OR); }
+ <Normal> "&&" { PUSH_TOKEN(Token::AND); }
+
+ <Normal> "|" { PUSH_TOKEN(Token::BIT_OR); }
+ <Normal> "^" { PUSH_TOKEN(Token::BIT_XOR); }
+ <Normal> "&" { PUSH_TOKEN(Token::BIT_AND); }
+ <Normal> "+" { PUSH_TOKEN(Token::ADD); }
+ <Normal> "-" { PUSH_TOKEN(Token::SUB); }
+ <Normal> "*" { PUSH_TOKEN(Token::MUL); }
+ <Normal> "/" { PUSH_TOKEN(Token::DIV); }
+ <Normal> "%" { PUSH_TOKEN(Token::MOD); }
+ <Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); }
+ <Normal> "," { PUSH_TOKEN(Token::COMMA); }
<Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); }
<Normal> whitespace { SKIP(); }
@@ -371,19 +279,19 @@ public:
<Normal> identifier_start :=> Identifier
- <Normal> eof { PUSH_EOS(); return 1; }
+ <Normal> eof { PUSH_TOKEN(Token::EOS); return 1; }
<Normal> any { TERMINATE_ILLEGAL(); }
<DoubleQuoteString> "\\\"" { goto yy0; }
- <DoubleQuoteString> '"' { PUSH_STRING();}
+ <DoubleQuoteString> '"' { PUSH_TOKEN(Token::STRING);}
<DoubleQuoteString> any { goto yy0; }
<SingleQuoteString> "\\'" { goto yy0; }
- <SingleQuoteString> "'" { PUSH_STRING();}
+ <SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING);}
<SingleQuoteString> any { goto yy0; }
<Identifier> identifier_char+ { goto yy0; }
- <Identifier> any { PUSH_IDENTIFIER(); }
+ <Identifier> any { cursor--; PUSH_TOKEN(Token::IDENTIFIER); }
<SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();}
<SingleLineComment> eof { PUSH_LINE_TERMINATOR();}
@@ -399,82 +307,80 @@ public:
*/
fill:
- ssize_t unfinishedSize = cursor-start;
- printf(
- "scanner needs a refill. Exiting for now with:\n"
- " saved fill state = %d\n"
- " unfinished token size = %ld\n",
- state,
- unfinishedSize
- );
-
- if(0<unfinishedSize && start<limit)
- {
- printf(" unfinished token is: ");
- fwrite(start, 1, cursor-start, stdout);
+ int unfinishedSize = cursor-start;
+ if (FLAG_trace_lexer) {
+ printf(
+ "scanner needs a refill. Exiting for now with:\n"
+ " saved fill state = %d\n"
+ " unfinished token size = %d\n",
+ state,
+ unfinishedSize
+ );
+ if(0 < unfinishedSize && start < limit) {
+ printf(" unfinished token is: ");
+ fwrite(start, 1, cursor-start, stdout);
+ putchar('\n');
+ }
putchar('\n');
}
- putchar('\n');
/*
* Once we get here, we can get rid of
* everything before start and after limit.
*/
- if(eof==true) goto start;
- if(buffer<start)
- {
- size_t startOffset = start-buffer;
- memmove(buffer, start, limit-start);
- marker -= startOffset;
- cursor -= startOffset;
- limit -= startOffset;
- start -= startOffset;
+ if (eof == true) goto start;
+ if (buffer < start) {
+ size_t start_offset = start - buffer;
+ memmove(buffer, start, limit - start);
+ marker -= start_offset;
+ cursor -= start_offset;
+ limit -= start_offset;
+ start -= start_offset;
+ real_start += start_offset;
}
return 0;
}
};
-// ----------------------------------------------------------------------
-int main(
- int argc,
- char **argv
-)
-{
- // Parse cmd line
- int input = 0;
- if(1<argc)
- {
- input = open(argv[1], O_RDONLY | O_BINARY);
- if(input<0)
- {
- fprintf(
- stderr,
- "could not open file %s\n",
- argv[1]
- );
- exit(1);
- }
- }
- /*
- * Tokenize input file by pushing batches
- * of data one by one into the scanner.
- */
- const size_t batchSize = 256;
- uint8_t buffer[batchSize];
- PushScanner scanner;
- while(1)
- {
- ssize_t n = read(input, buffer, batchSize);
- if (scanner.push(buffer, n)) {
- printf("Scanner: illegal data\n");
- return 1;
- }
- if(n<batchSize) break;
- }
- scanner.push(0, -1);
- close(input);
+ExperimentalScanner::ExperimentalScanner(const char* fname) :
+ current_(0), fetched_(0) {
+ file_ = fopen(fname, "rb");
+ scanner_ = new PushScanner(this);
+}
+
+
+ExperimentalScanner::~ExperimentalScanner() {
+ fclose(file_);
+}
+
+
+void ExperimentalScanner::FillTokens() {
+ current_ = 0;
+ fetched_ = 0;
+ uint8_t chars[BUFFER_SIZE];
+ int n = static_cast<int>(fread(&chars, 1, BUFFER_SIZE, file_));
+ scanner_->push(chars, n);
+}
+
+
+Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) {
+ if (current_ == fetched_) {
+ FillTokens();
+ }
+ *beg_pos = beg_[current_];
+ *end_pos = end_[current_];
+ Token::Value res = token_[current_];
+ if (token_[current_] != Token::Token::EOS &&
+ token_[current_] != Token::ILLEGAL) current_++;
+ return res;
+}
+
- // Done
- return 0;
+void ExperimentalScanner::Record(Token::Value token, int beg, int end) {
+ if (token == Token::EOS) end--;
+ token_[fetched_] = token;
+ beg_[fetched_] = beg;
+ end_[fetched_] = end;
+ fetched_++;
}
« no previous file with comments | « src/lexer/lexer.gyp ('k') | src/lexer/lexer-shell.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698