| Index: src/trusted/validator_arm/dgen_input.py
|
| ===================================================================
|
| --- src/trusted/validator_arm/dgen_input.py (revision 8275)
|
| +++ src/trusted/validator_arm/dgen_input.py (working copy)
|
| @@ -1,8 +1,8 @@
|
| #!/usr/bin/python
|
| #
|
| -# Copyright 2012 The Native Client Authors. All rights reserved.
|
| -# Use of this source code is governed by a BSD-style license that can
|
| -# be found in the LICENSE file.
|
| +# Copyright (c) 2012 The Native Client Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| #
|
|
|
| """
|
| @@ -11,124 +11,322 @@
|
| The grammar implemented here is roughly (taking some liberties with whitespace
|
| and comment parsing):
|
|
|
| -table_file ::= ( BLANK_LINE | table_def ) end_of_file ;
|
| -table_def ::= "--" IDENT CITATION NL
|
| - table_header
|
| - ( table_row )+ ;
|
| -table_header ::= ( IDENT "(" BITRANGE ")" )+ ;
|
| -table_row ::= ( PATTERN )+ ACTION ;
|
| +table_file ::= table+ eof ;
|
|
|
| -IDENT = /[a-z0-9_]+/
|
| -CITATION = "(" /[^)]+/ ")"
|
| -BITRANGE = /[0-9]+/ (":" /[0-9]+/)?
|
| -PATTERN = /[10x_]+/
|
| -ACTION = ( "=" IDENT | "->" IDENT ) ( "(" IDENT ")" )?
|
| -NL = a newline
|
| -BLANK_LINE = what you might expect it to be
|
| +arch ::= '(' word+ ')'
|
| +citation ::= '(' word+ ')'
|
| +decoder_action ::= id (id (word (id)?)?)?
|
| +decoder_method ::= '->' id
|
| +footer ::= '+' '-' '-'
|
| +header ::= "|" (id '(' int (':' int)? ')')+
|
| +int ::= word (where word is a sequence of digits)
|
| +id ::= word (where word is sequence of letters, digits and _)
|
| +parenthesized_exp ::= '(' (word | punctuation)+ ')'
|
| +row ::= '|' pattern+ (decoder_action arch? | decoder_method)?
|
| +table ::= table_desc header row+ footer
|
| +table_desc ::= '+' '-' '-' id citation?
|
| +
|
| +If a decoder_action has more than one element, the interpretation is as follows:
|
| + id[0] = action (plus optional architecture) to apply.
|
| + id[1] = Arm rule action corresponds to.
|
| + word = Bit pattern of rule.
|
| + id[3] = Name defining additional constraints for match.
|
| """
|
|
|
| import re
|
| import dgen_core
|
|
|
| -# These globals track the parser state.
|
| -_in = None
|
| -_line_no = None
|
| -_tables = None
|
| -_line = None
|
| -_last_row = None
|
| +def parse_tables(input):
|
| + """Entry point for the parser. Input should be a file or file-like."""
|
| + parser = Parser()
|
| + return parser.parse(input)
|
|
|
| +class Token(object):
|
| + """Holds a (characterized) unit of text for the parser."""
|
|
|
| -def parse_tables(input):
|
| - """Entry point for the parser. Input should be a file or file-like."""
|
| - global _in, _line_no, _tables
|
| - _in = input
|
| - _line_no = 0
|
| - _tables = []
|
| - next_line()
|
| + def __init__(self, kind, value=None):
|
| + self.kind = kind
|
| + self.value = value if value else kind
|
|
|
| - while not end_of_file():
|
| - blank_line() or table_def() or unexpected()
|
| +class Parser(object):
|
| + """Parses a set of tables from the input file."""
|
|
|
| - return _tables
|
| + def __init__(self):
|
| + self.words = [] # Words left on current line, not yet parsed.
|
| + self.line_no = 0 # The current line being parsed
|
| + self.token = None # The next token from the input.
|
| + self.at_eof = False # True when end of file reached
|
| + # Punctuation allowed. Must be ordered such that if
|
| + # p1 != p2 are in the list, and p1.startswith(p2), then
|
| + # p1 must appear before p2.
|
| + self.punctuation = ['->', '-', '+', '(', ')', '=', ':', '"', '|']
|
|
|
| + def parse(self, input):
|
| + self.input = input # The remaining input to parse
|
| + decoder = dgen_core.Decoder() # The generated decoder of parse tables.
|
| + # Read tables while there.
|
| + while self._next_token().kind == '+':
|
| + self._table(decoder)
|
|
|
| -def blank_line():
|
| - if _line:
|
| - return False
|
| + if not self._next_token().kind == 'eof':
|
| + self._unexpected('unrecognized input found')
|
| + if not decoder.primary:
|
| + self._unexpected('No primary table defined')
|
| + if not decoder.tables():
|
| + self._unexpected('No tables defined')
|
| + return decoder
|
|
|
| - next_line();
|
| - return True
|
| + def _arch(self):
|
| + """ arch ::= '(' word+ ')' """
|
| + return ' '.join(self._parenthesized_exp())
|
|
|
| + def _citation(self):
|
| + """ citation ::= '(' word+ ')' """
|
| + return ' '.join(self._parenthesized_exp())
|
|
|
| -def table_def():
|
| - global _last_row
|
| + def _decoder_action(self):
|
| + """ decoder_action ::= id (id (id (id)?)?)? """
|
| + self._read_token('=')
|
| + name = None
|
| + rule = None
|
| + pattern = None
|
| + constraints = None
|
| + count = 1
|
| + while self._next_token().kind not in ['|', '+', '(']:
|
| + if count == 1:
|
| + name = _name_if_not_none(self._id())
|
| + elif count == 2:
|
| + rule = _name_if_not_none(self._id())
|
| + elif count == 3:
|
| + pattern = _name_if_not_none(self._read_token('word').value)
|
| + elif count == 4:
|
| + constraints = _name_if_not_none(self._id())
|
| + else:
|
| + self._unexpected('Too many entries in decoder action')
|
| + count += 1
|
| + return dgen_core.DecoderAction(name, rule, pattern, constraints)
|
|
|
| - m = re.match(r'^-- ([^ ]+) \(([^)]+)\)', _line)
|
| - if not m: return False
|
| + def _decoder_method(self):
|
| + """ decoder_method ::= '->' id """
|
| + self._read_token('->')
|
| + name = self._id()
|
| + return dgen_core.DecoderMethod(name)
|
|
|
| - table = dgen_core.Table(m.group(1), m.group(2))
|
| - next_line()
|
| - while blank_line(): pass
|
| + def _footer(self):
|
| + """ footer ::= '+' '-' '-' """
|
| + self._read_token('+')
|
| + self._read_token('-')
|
| + self._read_token('-')
|
|
|
| - table_header(table)
|
| - _last_row = None
|
| - while not end_of_file() and not blank_line():
|
| - table_row(table)
|
| + def _header(self, table):
|
| + """ header ::= "|" (id '(' int (':' int)? ')')+ """
|
| + self._read_token('|')
|
| + while not self._next_token().kind == '|':
|
| + name = self._read_token('word').value
|
| + self._read_token('(')
|
| + hi_bit = self._int()
|
| + lo_bit = hi_bit
|
| + if self._next_token().kind == ':':
|
| + self._read_token(':')
|
| + lo_bit = self._int()
|
| + self._read_token(')')
|
| + table.add_column(name, hi_bit, lo_bit)
|
|
|
| - _tables.append(table)
|
| - return True
|
| + def _int(self):
|
| + """ int ::= word
|
|
|
| + Int is a sequence of digits. Returns the corresponding integer.
|
| + """
|
| + word = self._read_token('word').value
|
| + m = re.match(r'^([0-9]+)$', word)
|
| + if m:
|
| + return int(word)
|
| + else:
|
| + self._unexpected('integer expected but found "%s"' % word)
|
|
|
| -def table_header(table):
|
| - for col in _line.split():
|
| - m = re.match(r'^([a-z0-9_]+)\(([0-9]+)(:([0-9]+))?\)$', col, re.I)
|
| - if not m: raise Exception('Invalid column header: %s' % col)
|
| + def _id(self):
|
| + """ id ::= word
|
|
|
| - hi_bit = int(m.group(2))
|
| - if m.group(4):
|
| - lo_bit = int(m.group(4))
|
| + Word starts with a letter, and followed by letters, digits,
|
| + and underscores. Returns the corresponding identifier.
|
| + """
|
| + ident = self._read_token('word').value
|
| + m = re.match(r'^[a-zA-z][a-zA-z0-9_]*$', ident)
|
| + if not m:
|
| + self._unexpected('"%s" is not a valid identifier' % ident)
|
| + return ident
|
| +
|
| + def _parenthesized_exp(self, minlength=1):
|
| + """ parenthesized_exp ::= '(' (word | punctuation)+ ')'
|
| +
|
| + The punctuation doesn't include ')'.
|
| + Returns the sequence of token values parsed.
|
| + """
|
| + self._read_token('(')
|
| + words = []
|
| + while not self._at_eof() and self._next_token().kind != ')':
|
| + words.append(self._read_token().value)
|
| + if len(words) < minlength:
|
| + self._unexpected("len(parenthesized expresssion) < %s" % minlength)
|
| + self._read_token(')')
|
| + return words
|
| +
|
| + def _repeat_entry_from_last(self, n,
|
| + cur_pattern, cur_action, cur_arch,
|
| + last_patterns, last_action, last_arch):
|
| + """Gets the nth entry from the last row and returns it.
|
| +
|
| + Arguments are:
|
| + n - The (column) to be repeated from the last row.
|
| + cur_pattern - The current pattern being built
|
| + cur_act
|
| + """
|
| +
|
| + def _row(self, table, last_patterns=None,
|
| + last_action=None, last_arch= None):
|
| + """ row ::= '|' pattern+ (decoder_action arch? | decoder_method)?
|
| +
|
| + Passed in sequence of patterns and action from last row,
|
| + and returns list of patterns and action from this row.
|
| + """
|
| + patterns = []
|
| + expanded_patterns = []
|
| + action = None
|
| + arch = None
|
| + self._read_token('|')
|
| + while self._next_token().kind not in ['=', '->', '|', '+']:
|
| + pattern = None
|
| + if self._next_token().kind == '"':
|
| + # comment?
|
| + self._read_token('"')
|
| + if last_patterns:
|
| + pat_len = len(patterns)
|
| + last_pat_len = len(last_patterns)
|
| + if pat_len < last_pat_len:
|
| + pattern = last_patterns[len(patterns)]
|
| + elif pat_len == last_pat_len:
|
| + action = last_action
|
| + arch = last_arch
|
| + break
|
| + else:
|
| + self._unexpected('Can\'t determine value for "')
|
| else:
|
| - lo_bit = hi_bit
|
| - table.add_column(m.group(1), hi_bit, lo_bit)
|
| - next_line()
|
| + self._unexpected('Can\'t determine value for "')
|
| + elif self._next_token().kind == '-':
|
| + pattern = self._read_token('-').value
|
| + else:
|
| + pattern = self._read_token('word').value
|
| + if pattern:
|
| + col = len(patterns)
|
| + patterns.append(pattern)
|
| + pattern = table.define_pattern(pattern, col)
|
| + expanded_patterns.append(pattern)
|
| + else:
|
| + self._unexpected("Pattern illegal, can't continue")
|
| + if self._next_token().kind == '=':
|
| + if action:
|
| + self._unexpected('action specified when " defined action')
|
| + action = self._decoder_action()
|
| + if self._next_token().kind == '(':
|
| + arch = self._arch()
|
| + elif self._next_token().kind == '->':
|
| + if action:
|
| + self._unexpected('action specified when " defined action')
|
| + action = self._decoder_method()
|
| + if self._next_token().kind in ['|', '+']:
|
| + if not action:
|
| + self._unexpected('No action defined for row')
|
| + table.add_row(expanded_patterns, action, arch)
|
| + return (patterns, action, arch)
|
| + else:
|
| + self._unexpected('Malformed row')
|
|
|
| + def _table(self, decoder):
|
| + """ table ::= table_desc header row+ footer """
|
| + table = self._table_desc()
|
| + print 'Reading table %s...' % table.name
|
| + self._header(table)
|
| + (pattern, action, arch) = self._row(table)
|
| + while not self._next_token().kind == '+':
|
| + (pattern, action, arch) = self._row(table, pattern, action, arch)
|
| + if not decoder.add(table):
|
| + self._unexpected('Multiple tables with name %s' % table.name)
|
| + self._footer()
|
|
|
| -def table_row(table):
|
| - global _last_row
|
| + def _table_desc(self):
|
| + """ table_desc ::= '+' '-' '-' id citation? """
|
| + self._read_token('+')
|
| + self._read_token('-')
|
| + self._read_token('-')
|
| + name = self._id()
|
| + citation = None
|
| + if self._next_token().kind == '(':
|
| + citation = self._citation()
|
| + return dgen_core.Table(name, citation)
|
|
|
| - row = _line.split()
|
| - for i in range(0, len(row)):
|
| - if row[i] == '"': row[i] = _last_row[i]
|
| - _last_row = row
|
| + def _at_eof(self):
|
| + """Returns true if next token is the eof token."""
|
| + return self._next_token().kind == 'eof'
|
|
|
| - action = row[-1]
|
| - patterns = row[:-1]
|
| - table.add_row(patterns, action)
|
| - next_line()
|
| + def _read_token(self, kind=None):
|
| + """Reads and returns the next token from input."""
|
| + token = self._next_token()
|
| + self.token = None
|
| + if kind and kind != token.kind:
|
| + self._unexpected('Expected "%s" but found "%s"'
|
| + % (kind, token.kind))
|
| + return token
|
|
|
| + def _next_token(self):
|
| + """Returns the next token from the input."""
|
| + # First seee if cached.
|
| + if self.token: return self.token
|
|
|
| -def end_of_file():
|
| - return _line is None
|
| + # If no more tokens left on the current line. read
|
| + # input till more tokens are found
|
| + while not self.at_eof and not self.words:
|
| + self.words = self._read_line().split()
|
|
|
| + if self.words:
|
| + # More tokens found. Convert the first word to a token.
|
| + word = self.words.pop(0)
|
| + # First remove any applicable punctuation.
|
| + for p in self.punctuation:
|
| + index = word.find(p)
|
| + if index == 0:
|
| + # Found punctuation, return it.
|
| + self._pushback(word[len(p):])
|
| + self.token = Token(p)
|
| + return self.token
|
| + elif index > 0:
|
| + self._pushback(word[index:])
|
| + word = word[:index]
|
| + # if reached, word doesn't contain any punctuation, so return it.
|
| + self.token = Token('word', word)
|
| + else:
|
| + # No more tokens found, assume eof.
|
| + self.token = Token('eof')
|
| + return self.token
|
|
|
| -def next_line():
|
| - "Reads the next non-comment line"
|
| - global _line_no, _line
|
| + def _pushback(self, word):
|
| + """Puts word back onto the list of words."""
|
| + if word:
|
| + self.words.insert(0, word)
|
|
|
| - _line_no += 1
|
| - _line = _in.readline()
|
| - while True:
|
| - if _line:
|
| - if _line[0] == '#':
|
| - # skip comment line and continue search.
|
| - _line_no += 1
|
| - _line = _in.readline()
|
| - continue
|
| - _line = re.sub(r'#.*', '', _line).strip()
|
| - else:
|
| - _line = None
|
| - # if reached, found line.
|
| - return
|
| + def _read_line(self):
|
| + """Reads the next line of input, and returns it. Otherwise None."""
|
| + self.line_no += 1
|
| + line = self.input.readline()
|
| + if line:
|
| + return re.sub(r'#.*', '', line).strip()
|
| + else:
|
| + self.at_eof = True
|
| + return ''
|
|
|
| -def unexpected():
|
| - raise Exception('Line %d: Unexpected line in input: %s' % (_line_no, _line))
|
| + def _unexpected(self, context='Unexpected line in input'):
|
| + """"Reports that we didn't find the expected context. """
|
| + raise Exception('Line %d: %s' % (self.line_no, context))
|
| +
|
| +def _name_if_not_none(id):
|
| + """ Returns id unless string 'None' """
|
| + return None if id and id == 'None' else id
|
|
|