src/trusted/validator_arm/dgen_input.py - Issue 9960043: Finish separation of testing from sel_ldr validation. Also, automate

Unified Diff: src/trusted/validator_arm/dgen_input.py

Issue 9960043: Finish separation of testing from sel_ldr validation. Also, automate (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/

Patch Set: Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« src/trusted/validator_arm/dgen_decoder_output.py ('K') | « src/trusted/validator_arm/dgen_decoder_output.py ('k') | src/trusted/validator_arm/dgen_output.py » ('j') | src/trusted/validator_arm/dgen_output.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: src/trusted/validator_arm/dgen_input.py

===================================================================

--- src/trusted/validator_arm/dgen_input.py (revision 8275)

+++ src/trusted/validator_arm/dgen_input.py (working copy)

@@ -1,8 +1,8 @@

#!/usr/bin/python

-# Use of this source code is governed by a BSD-style license that can

-# be found in the LICENSE file.

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

"""

@@ -11,124 +11,310 @@

The grammar implemented here is roughly (taking some liberties with whitespace

and comment parsing):

-table_file ::= ( BLANK_LINE | table_def ) end_of_file ;

-table_def ::= "--" IDENT CITATION NL

- table_header

- ( table_row )+ ;

-table_header ::= ( IDENT "(" BITRANGE ")" )+ ;

-table_row ::= ( PATTERN )+ ACTION ;

+table_file ::= table+ eof ;

-IDENT = /[a-z0-9_]+/

-CITATION = "(" /[^)]+/ ")"

-BITRANGE = /[0-9]+/ (":" /[0-9]+/)?

-PATTERN = /[10x_]+/

-ACTION = ( "=" IDENT | "->" IDENT ) ( "(" IDENT ")" )?

-NL = a newline

-BLANK_LINE = what you might expect it to be

+arch ::= '(' word+ ')'

+citation ::= '(' word+ ')'

+decoder_action ::= id (id (word (id)?)?)?

+decoder_method ::= '->' id

+footer ::= '+' '-' '-'

+header ::= "|" (id '(' int (':' int)? ')')+

+int ::= word (where word is a sequence of digits)

+id ::= word (where word is sequence of letters, digits and _)

+parenthesized_exp ::= '(' (word | punctuation)+ ')'

+row ::= '|' pattern+ (decoder_action arch? | decoder_method)?

+table ::= table_desc header row+ footer

+table_desc ::= '+' '-' '-' id citation?

+If a decoder_action has more than one element, the interpretation is as follows:

+ id[0] = action (plus optional architecture) to apply.

+ id[1] = Arm rule action corresponds to.

+ word = Bit pattern of rule.

+ id[3] = Name defining additional constraints for match.

"""

import re

import dgen_core

-# These globals track the parser state.

-_in = None

-_line_no = None

-_tables = None

-_line = None

-_last_row = None

+def parse_tables(input):

+ """Entry point for the parser. Input should be a file or file-like."""

+ parser = Parser()

+ return parser.parse(input)

+class Token(object):

+ """Holds a (characterized) unit of text for the parser."""

-def parse_tables(input):

- """Entry point for the parser. Input should be a file or file-like."""

- global _in, _line_no, _tables

- _in = input

- _line_no = 0

- _tables = []

- next_line()

+ def __init__(self, kind, value=None):

+ self.kind = kind

+ self.value = value if value else kind

- while not end_of_file():

- blank_line() or table_def() or unexpected()

+class Parser(object):

+ """Parses a set of tables from the input file."""

- return _tables

+ def __init__(self):

+ self.words = [] # Words left on current line, not yet parsed.

+ self.line_no = 0 # The current line being parsed

+ self.token = None # The next token from the input.

+ self.at_eof = False # True when end of file reached

+ # Punctuation allowed. Must be ordered such that if

+ # p1 != p2 are in the list, and p1.startswith(p2), then

+ # p1 must appear before p2.

+ self.punctuation = ['->', '-', '+', '(', ')', '=', ':', '"', '|']

+ def parse(self, input):

+ self.input = input # The remaining input to parse

+ decoder = dgen_core.Decoder() # The generated decoder of parse tables.

+ # Read tables while there.

+ while self._next_token().kind == '+':

+ self._table(decoder)

-def blank_line():

- if _line:

- return False

+ if not self._next_token().kind == 'eof':

+ self._unexpected('unrecognized input found')

+ if not decoder.primary:

+ self._unexpected('No primary table defined')

+ if not decoder.tables():

+ self._unexpected('No tables defined')

+ return decoder

- next_line();

- return True

+ def _arch(self):

+ """ arch ::= '(' word+ ')' """

+ return ' '.join(self._parenthesized_exp())

+ def _citation(self):

+ """ citation ::= '(' word+ ')' """

+ return ' '.join(self._parenthesized_exp())

-def table_def():

- global _last_row

+ def _decoder_action(self):

+ """ decoder_action ::= id (id (id (id)?)?)? """

+ self._read_token('=')

+ name = None

+ rule = None

+ pattern = None

+ constraints = None

+ count = 1

+ while self._next_token().kind not in ['|', '+', '(']:

robertm 2012/04/17 17:12:19 not using a loop might be clearer

+ if count == 1:

+ name = _name_if_not_none(self._id())

+ elif count == 2:

+ rule = _name_if_not_none(self._id())

+ elif count == 3:

+ pattern = _name_if_not_none(self._read_token('word').value)

+ elif count == 4:

+ constraints = _name_if_not_none(self._id())

+ else:

+ self._unexpected('Too many entries in decoder action')

+ count += 1

+ return dgen_core.DecoderAction(name, rule, pattern, constraints)

- m = re.match(r'^-- ([^ ]+) $([^)]+)$', _line)

- if not m: return False

+ def _decoder_method(self):

+ """ decoder_method ::= '->' id """

+ self._read_token('->')

+ name = self._id()

+ return dgen_core.DecoderMethod(name)

- table = dgen_core.Table(m.group(1), m.group(2))

- next_line()

- while blank_line(): pass

+ def _footer(self):

+ """ footer ::= '+' '-' '-' """

+ self._read_token('+')

+ self._read_token('-')

- table_header(table)

- _last_row = None

- while not end_of_file() and not blank_line():

- table_row(table)

+ def _header(self, table):

+ """ header ::= "|" (id '(' int (':' int)? ')')+ """

+ self._read_token('|')

+ while not self._next_token().kind == '|':

+ name = self._read_token('word').value

+ self._read_token('(')

+ hi_bit = self._int()

+ lo_bit = hi_bit

+ if self._next_token().kind == ':':

+ self._read_token(':')

+ lo_bit = self._int()

+ self._read_token(')')

+ table.add_column(name, hi_bit, lo_bit)

- _tables.append(table)

- return True

+ def _int(self):

+ """ int ::= word

+ Int is a sequence of digits. Returns the corresponding integer.

+ """

+ word = self._read_token('word').value

+ m = re.match(r'^([0-9]+)$', word)

+ if m:

+ return int(word)

+ else:

+ self._unexpected('integer expected but found "%s"' % word)

-def table_header(table):

- for col in _line.split():

- m = re.match(r'^([a-z0-9_]+)$([0-9]+)(:([0-9]+))?$$', col, re.I)

- if not m: raise Exception('Invalid column header: %s' % col)

+ def _id(self):

+ """ id ::= word

- hi_bit = int(m.group(2))

- if m.group(4):

- lo_bit = int(m.group(4))

+ Word starts with a letter, and followed by letters, digits,

+ and underscores. Returns the corresponding identifier.

+ """

+ ident = self._read_token('word').value

+ m = re.match(r'^[a-zA-z][a-zA-z0-9_]*$', ident)

+ if not m:

+ self._unexpected('"%s" is not a valid identifier' % ident)

+ return ident

+ def _parenthesized_exp(self, minlength=1):

+ """ parenthesized_exp ::= '(' (word | punctuation)+ ')'

+ The punctuation doesn't include ')'.

+ Returns the sequence of token values parsed.

+ """

+ self._read_token('(')

+ words = []

+ while not self._at_eof() and self._next_token().kind != ')':

+ words.append(self._read_token().value)

+ if len(words) < minlength:

+ self._unexpected("len(parenthesized expresssion) < %s" % minlength)

+ self._read_token(')')

+ return words

+ def _row(self, table, last_patterns=None,

+ last_action=None, last_arch= None):

+ """ row ::= '|' pattern+ (decoder_action arch? | decoder_method)?

+ Passed in sequence of patterns and action from last row,

+ and returns list of patterns and action from this row.

+ """

+ patterns = []

+ expanded_patterns = []

+ action = None

+ arch = None

+ self._read_token('|')

+ while self._next_token().kind not in ['=', '->', '|', '+']:

+ pattern = None

+ if self._next_token().kind == '"':

+ self._read_token('"')

robertm 2012/04/17 17:12:19 add a comment here

+ if last_patterns:

+ pat_len = len(patterns)

+ last_pat_len = len(last_patterns)

+ if pat_len < last_pat_len:

+ pattern = last_patterns[len(patterns)]

+ elif pat_len == last_pat_len:

+ action = last_action

+ arch = last_arch

+ break

+ else:

+ self._unexpected('Can\'t determine value for "')

else:

- lo_bit = hi_bit

- table.add_column(m.group(1), hi_bit, lo_bit)

- next_line()

+ self._unexpected('Can\'t determine value for "')

+ elif self._next_token().kind == '-':

+ pattern = self._read_token('-').value

+ else:

+ pattern = self._read_token('word').value

+ if pattern:

+ col = len(patterns)

+ patterns.append(pattern)

+ pattern = table.define_pattern(pattern, col)

+ expanded_patterns.append(pattern)

+ else:

+ self._unexpected("Pattern illegal, can't continue")

+ if self._next_token().kind == '=':

+ if action:

+ self._unexpected('action specified when " defined action')

+ action = self._decoder_action()

+ if self._next_token().kind == '(':

+ arch = self._arch()

+ elif self._next_token().kind == '->':

+ if action:

+ self._unexpected('action specified when " defined action')

+ action = self._decoder_method()

+ if self._next_token().kind in ['|', '+']:

+ if not action:

+ self._unexpected('No action defined for row')

+ table.add_row(expanded_patterns, action, arch)

+ return (patterns, action, arch)

+ else:

+ self._unexpected('Malformed row')

+ def _table(self, decoder):

+ """ table ::= table_desc header row+ footer """

+ table = self._table_desc()

+ print 'Reading table %s...' % table.name

+ self._header(table)

+ (pattern, action, arch) = self._row(table)

+ while not self._next_token().kind == '+':

+ (pattern, action, arch) = self._row(table, pattern, action, arch)

+ if not decoder.add(table):

+ self._unexpected('Multiple tables with name %s' % table.name)

+ self._footer()

-def table_row(table):

- global _last_row

+ def _table_desc(self):

+ """ table_desc ::= '+' '-' '-' id citation? """

+ self._read_token('+')

+ self._read_token('-')

+ name = self._id()

+ citation = None

+ if self._next_token().kind == '(':

+ citation = self._citation()

+ return dgen_core.Table(name, citation)

- row = _line.split()

- for i in range(0, len(row)):

- if row[i] == '"': row[i] = _last_row[i]

- _last_row = row

+ def _at_eof(self):

+ """Returns true if next token is the eof token."""

+ return self._next_token().kind == 'eof'

- action = row[-1]

- patterns = row[:-1]

- table.add_row(patterns, action)

- next_line()

+ def _read_token(self, kind=None):

+ """Reads and returns the next token from input."""

+ token = self._next_token()

+ self.token = None

+ if kind and kind != token.kind:

+ self._unexpected('Expected "%s" but found "%s"'

+ % (kind, token.kind))

+ return token

+ def _next_token(self):

+ """Returns the next token from the input."""

+ # First seee if cached.

+ if self.token: return self.token

-def end_of_file():

- return _line is None

+ # If no more tokens left on the current line. read

+ # input till more tokens are found

+ while not self.at_eof and not self.words:

+ self.words = self._read_line().split()

+ if self.words:

+ # More tokens found. Convert the first word to a token.

+ word = self.words.pop(0)

+ # First remove any applicable punctuation.

+ for p in self.punctuation:

+ index = word.find(p)

+ if index == 0:

+ # Found punctuation, return it.

+ self._pushback(word[len(p):])

+ self.token = Token(p)

+ return self.token

+ elif index > 0:

+ self._pushback(word[index:])

+ word = word[:index]

+ # if reached, word doesn't contain any punctuation, so return it.

+ self.token = Token('word', word)

+ else:

+ # No more tokens found, assume eof.

+ self.token = Token('eof')

+ return self.token

-def next_line():

- "Reads the next non-comment line"

- global _line_no, _line

+ def _pushback(self, word):

+ """Puts word back onto the list of words."""

+ if word:

+ self.words.insert(0, word)

- _line_no += 1

- _line = _in.readline()

- while True:

- if _line:

- if _line[0] == '#':

- # skip comment line and continue search.

- _line_no += 1

- _line = _in.readline()

- continue

- _line = re.sub(r'#.*', '', _line).strip()

- else:

- _line = None

- # if reached, found line.

- return

+ def _read_line(self):

+ """Reads the next line of input, and returns it. Otherwise None."""

+ self.line_no += 1

+ line = self.input.readline()

+ if line:

+ return re.sub(r'#.*', '', line).strip()

+ else:

+ self.at_eof = True

+ return ''

-def unexpected():

- raise Exception('Line %d: Unexpected line in input: %s' % (_line_no, _line))

+ def _unexpected(self, context='Unexpected line in input'):

+ """"Reports that we didn't find the expected context. """

+ raise Exception('Line %d: %s' % (self.line_no, context))

+def _name_if_not_none(id):

+ """ Returns id unless string 'None' """

+ return None if id and id == 'None' else id