| OLD | NEW |
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 # | 2 # |
| 3 # Copyright 2012 The Native Client Authors. All rights reserved. | 3 # Copyright (c) 2012 The Native Client Authors. All rights reserved. |
| 4 # Use of this source code is governed by a BSD-style license that can | 4 # Use of this source code is governed by a BSD-style license that can be |
| 5 # be found in the LICENSE file. | 5 # found in the LICENSE file. |
| 6 # | 6 # |
| 7 | 7 |
| 8 """ | 8 """ |
| 9 A simple recursive-descent parser for the table file format. | 9 A simple recursive-descent parser for the table file format. |
| 10 | 10 |
| 11 The grammar implemented here is roughly (taking some liberties with whitespace | 11 The grammar implemented here is roughly (taking some liberties with whitespace |
| 12 and comment parsing): | 12 and comment parsing): |
| 13 | 13 |
| 14 table_file ::= ( BLANK_LINE | table_def ) end_of_file ; | 14 table_file ::= table+ eof ; |
| 15 table_def ::= "--" IDENT CITATION NL | 15 |
| 16 table_header | 16 action ::= decoder_action arch | decoder_method | '"' |
| 17 ( table_row )+ ; | 17 arch ::= '(' word+ ')' |
| 18 table_header ::= ( IDENT "(" BITRANGE ")" )+ ; | 18 citation ::= '(' word+ ')' |
| 19 table_row ::= ( PATTERN )+ ACTION ; | 19 decoder_action ::= id (id (word (id)?)?)? |
| 20 | 20 decoder_method ::= '->' id |
| 21 IDENT = /[a-z0-9_]+/ | 21 footer ::= '+' '-' '-' |
| 22 CITATION = "(" /[^)]+/ ")" | 22 header ::= "|" (id '(' int (':' int)? ')')+ |
| 23 BITRANGE = /[0-9]+/ (":" /[0-9]+/)? | 23 int ::= word (where word is a sequence of digits) |
| 24 PATTERN = /[10x_]+/ | 24 id ::= word (where word is sequence of letters, digits and _) |
| 25 ACTION = ( "=" IDENT | "->" IDENT ) ( "(" IDENT ")" )? | 25 parenthesized_exp ::= '(' (word | punctuation)+ ')' |
| 26 NL = a newline | 26 pattern ::= 'word' | '-' | '"' |
| 27 BLANK_LINE = what you might expect it to be | 27 row ::= '|' pattern+ action |
| 28 table ::= table_desc header row+ footer |
| 29 table_desc ::= '+' '-' '-' id citation? |
| 30 |
| 31 If a decoder_action has more than one element, the interpretation is as follows: |
| 32 id[0] = action (plus optional architecture) to apply. |
| 33 id[1] = Arm rule action corresponds to. |
| 34 word = Bit pattern of rule. |
| 35 id[3] = Name defining additional constraints for match. |
| 28 """ | 36 """ |
| 29 | 37 |
| 30 import re | 38 import re |
| 31 import dgen_core | 39 import dgen_core |
| 32 | 40 |
| 33 # These globals track the parser state. | |
| 34 _in = None | |
| 35 _line_no = None | |
| 36 _tables = None | |
| 37 _line = None | |
| 38 _last_row = None | |
| 39 | |
| 40 | |
| 41 def parse_tables(input): | 41 def parse_tables(input): |
| 42 """Entry point for the parser. Input should be a file or file-like.""" | 42 """Entry point for the parser. Input should be a file or file-like.""" |
| 43 global _in, _line_no, _tables | 43 parser = Parser() |
| 44 _in = input | 44 return parser.parse(input) |
| 45 _line_no = 0 | 45 |
| 46 _tables = [] | 46 class Token(object): |
| 47 next_line() | 47 """Holds a (characterized) unit of text for the parser.""" |
| 48 | 48 |
| 49 while not end_of_file(): | 49 def __init__(self, kind, value=None): |
| 50 blank_line() or table_def() or unexpected() | 50 self.kind = kind |
| 51 | 51 self.value = value if value else kind |
| 52 return _tables | 52 |
| 53 | 53 class Parser(object): |
| 54 | 54 """Parses a set of tables from the input file.""" |
| 55 def blank_line(): | 55 |
| 56 if _line: | 56 def parse(self, input): |
| 57 return False | 57 self.input = input # The remaining input to parse |
| 58 | 58 decoder = dgen_core.Decoder() # The generated decoder of parse tables. |
| 59 next_line(); | 59 # Read tables while there. |
| 60 return True | 60 while self._next_token().kind == '+': |
| 61 | 61 self._table(decoder) |
| 62 | 62 |
| 63 def table_def(): | 63 if not self._next_token().kind == 'eof': |
| 64 global _last_row | 64 self._unexpected('unrecognized input found') |
| 65 | 65 if not decoder.primary: |
| 66 m = re.match(r'^-- ([^ ]+) \(([^)]+)\)', _line) | 66 self._unexpected('No primary table defined') |
| 67 if not m: return False | 67 if not decoder.tables(): |
| 68 | 68 self._unexpected('No tables defined') |
| 69 table = dgen_core.Table(m.group(1), m.group(2)) | 69 return decoder |
| 70 next_line() | 70 |
| 71 while blank_line(): pass | 71 def __init__(self): |
| 72 | 72 self._words = [] # Words left on current line, not yet parsed. |
| 73 table_header(table) | 73 self._line_no = 0 # The current line being parsed |
| 74 _last_row = None | 74 self._token = None # The next token from the input. |
| 75 while not end_of_file() and not blank_line(): | 75 self._reached_eof = False # True when end of file reached |
| 76 table_row(table) | 76 # Punctuation allowed. Must be ordered such that if |
| 77 | 77 # p1 != p2 are in the list, and p1.startswith(p2), then |
| 78 _tables.append(table) | 78 # p1 must appear before p2. |
| 79 return True | 79 self._punctuation = ['->', '-', '+', '(', ')', '=', ':', '"', '|'] |
| 80 | 80 |
| 81 | 81 def _action(self, last_action, last_arch): |
| 82 def table_header(table): | 82 """ action ::= decoder_action arch | decoder_method | '"' """ |
| 83 for col in _line.split(): | 83 if self._next_token().kind == '"': |
| 84 m = re.match(r'^([a-z0-9_]+)\(([0-9]+)(:([0-9]+))?\)$', col, re.I) | 84 self._read_token('"') |
| 85 if not m: raise Exception('Invalid column header: %s' % col) | 85 return (last_action, last_arch) |
| 86 | 86 if self._next_token().kind == '=': |
| 87 hi_bit = int(m.group(2)) | 87 action = self._decoder_action() |
| 88 if m.group(4): | 88 arch = None |
| 89 lo_bit = int(m.group(4)) | 89 if self._next_token().kind == '(': |
| 90 else: | 90 arch = self._arch() |
| 91 lo_bit = hi_bit | 91 return (action, arch) |
| 92 table.add_column(m.group(1), hi_bit, lo_bit) | 92 elif self._next_token().kind == '->': |
| 93 next_line() | 93 return (self._decoder_method(), None) |
| 94 | 94 else: |
| 95 | 95 self._unexpected("Row doesn't define an action") |
| 96 def table_row(table): | 96 |
| 97 global _last_row | 97 def _pattern(self, col_no, last_patterns, last_action, last_arch): |
| 98 | 98 pass |
| 99 row = _line.split() | 99 |
| 100 for i in range(0, len(row)): | 100 def _arch(self): |
| 101 if row[i] == '"': row[i] = _last_row[i] | 101 """ arch ::= '(' word+ ')' """ |
| 102 _last_row = row | 102 return ' '.join(self._parenthesized_exp()) |
| 103 | 103 |
| 104 action = row[-1] | 104 def _citation(self): |
| 105 patterns = row[:-1] | 105 """ citation ::= '(' word+ ')' """ |
| 106 table.add_row(patterns, action) | 106 return ' '.join(self._parenthesized_exp()) |
| 107 next_line() | 107 |
| 108 | 108 def _read_id_or_none(self, read_id): |
| 109 | 109 if self._next_token().kind in ['|', '+', '(']: |
| 110 def end_of_file(): | 110 return None |
| 111 return _line is None | 111 id = self._id() if read_id else self._read_token('word').value |
| 112 | 112 return None if id and id == 'None' else id |
| 113 | 113 |
| 114 def next_line(): | 114 def _decoder_action(self): |
| 115 "Reads the next non-comment line" | 115 """ decoder_action ::= id (id (word (id)?)?)? """ |
| 116 global _line_no, _line | 116 self._read_token('=') |
| 117 | 117 name = self._read_id_or_none(True) |
| 118 _line_no += 1 | 118 rule = self._read_id_or_none(True) |
| 119 _line = _in.readline() | 119 pattern = self._read_id_or_none(False) |
| 120 while True: | 120 constraints = self._read_id_or_none(True) |
| 121 if _line: | 121 return dgen_core.DecoderAction(name, rule, pattern, constraints) |
| 122 if _line[0] == '#': | 122 |
| 123 # skip comment line and continue search. | 123 def _decoder_method(self): |
| 124 _line_no += 1 | 124 """ decoder_method ::= '->' id """ |
| 125 _line = _in.readline() | 125 self._read_token('->') |
| 126 continue | 126 name = self._id() |
| 127 _line = re.sub(r'#.*', '', _line).strip() | 127 return dgen_core.DecoderMethod(name) |
| 128 |
| 129 def _footer(self): |
| 130 """ footer ::= '+' '-' '-' """ |
| 131 self._read_token('+') |
| 132 self._read_token('-') |
| 133 self._read_token('-') |
| 134 |
| 135 def _header(self, table): |
| 136 """ header ::= "|" (id '(' int (':' int)? ')')+ """ |
| 137 self._read_token('|') |
| 138 while not self._next_token().kind == '|': |
| 139 name = self._read_token('word').value |
| 140 self._read_token('(') |
| 141 hi_bit = self._int() |
| 142 lo_bit = hi_bit |
| 143 if self._next_token().kind == ':': |
| 144 self._read_token(':') |
| 145 lo_bit = self._int() |
| 146 self._read_token(')') |
| 147 table.add_column(name, hi_bit, lo_bit) |
| 148 |
| 149 def _int(self): |
| 150 """ int ::= word |
| 151 |
| 152 Int is a sequence of digits. Returns the corresponding integer. |
| 153 """ |
| 154 word = self._read_token('word').value |
| 155 m = re.match(r'^([0-9]+)$', word) |
| 156 if m: |
| 157 return int(word) |
| 158 else: |
| 159 self._unexpected('integer expected but found "%s"' % word) |
| 160 |
| 161 def _id(self): |
| 162 """ id ::= word |
| 163 |
| 164 Word starts with a letter, and followed by letters, digits, |
| 165 and underscores. Returns the corresponding identifier. |
| 166 """ |
| 167 ident = self._read_token('word').value |
| 168 m = re.match(r'^[a-zA-z][a-zA-z0-9_]*$', ident) |
| 169 if not m: |
| 170 self._unexpected('"%s" is not a valid identifier' % ident) |
| 171 return ident |
| 172 |
| 173 def _parenthesized_exp(self, minlength=1): |
| 174 """ parenthesized_exp ::= '(' (word | punctuation)+ ')' |
| 175 |
| 176 The punctuation doesn't include ')'. |
| 177 Returns the sequence of token values parsed. |
| 178 """ |
| 179 self._read_token('(') |
| 180 words = [] |
| 181 while not self._at_eof() and self._next_token().kind != ')': |
| 182 words.append(self._read_token().value) |
| 183 if len(words) < minlength: |
| 184 self._unexpected("len(parenthesized expresssion) < %s" % minlength) |
| 185 self._read_token(')') |
| 186 return words |
| 187 |
| 188 def _pattern(self, last_pattern): |
| 189 """ pattern ::= 'word' | '-' | '"' |
| 190 |
| 191 Arguments are: |
| 192 col_no - The current column entry being read. |
| 193 last_patterns - The list of patterns defined on the last row. |
| 194 last_action - The action defined on the last row. |
| 195 last_arch - The architecture defined on the last row.. |
| 196 """ |
| 197 if self._next_token().kind == '"': |
| 198 self._read_token('"') |
| 199 return last_pattern |
| 200 if self._next_token().kind in ['-', 'word']: |
| 201 return self._read_token().value |
| 202 self._unexpected('Malformed pattern') |
| 203 |
| 204 def _row(self, table, last_patterns=None, |
| 205 last_action=None, last_arch= None): |
| 206 """ row ::= '|' pattern+ (decoder_action arch? | decoder_method)? |
| 207 |
| 208 Passed in sequence of patterns and action from last row, |
| 209 and returns list of patterns and action from this row. |
| 210 """ |
| 211 patterns = [] # Patterns as found on input. |
| 212 expanded_patterns = [] # Patterns after being expanded. |
| 213 self._read_token('|') |
| 214 num_patterns = 0 |
| 215 num_patterns_last = len(last_patterns) if last_patterns else None |
| 216 while self._next_token().kind not in ['=', '->', '|', '+']: |
| 217 if not last_patterns or num_patterns < num_patterns_last: |
| 218 last_pattern = last_patterns[num_patterns] if last_patterns else None |
| 219 pattern = self._pattern(last_pattern) |
| 220 patterns.append(pattern) |
| 221 expanded_patterns.append(table.define_pattern(pattern, num_patterns)) |
| 222 num_patterns += 1 |
| 128 else: | 223 else: |
| 129 _line = None | 224 # Processed patterns in this row, since width is now the |
| 130 # if reached, found line. | 225 # same as last row. |
| 131 return | 226 break; |
| 132 | 227 |
| 133 def unexpected(): | 228 (action, arch) = self._action(last_action, last_arch) |
| 134 raise Exception('Line %d: Unexpected line in input: %s' % (_line_no, _line)) | 229 table.add_row(expanded_patterns, action, arch) |
| 230 return (patterns, action, arch) |
| 231 |
| 232 def _table(self, decoder): |
| 233 """ table ::= table_desc header row+ footer """ |
| 234 table = self._table_desc() |
| 235 print 'Reading table %s...' % table.name |
| 236 self._header(table) |
| 237 (pattern, action, arch) = self._row(table) |
| 238 while not self._next_token().kind == '+': |
| 239 (pattern, action, arch) = self._row(table, pattern, action, arch) |
| 240 if not decoder.add(table): |
| 241 self._unexpected('Multiple tables with name %s' % table.name) |
| 242 self._footer() |
| 243 |
| 244 def _table_desc(self): |
| 245 """ table_desc ::= '+' '-' '-' id citation? """ |
| 246 self._read_token('+') |
| 247 self._read_token('-') |
| 248 self._read_token('-') |
| 249 name = self._id() |
| 250 citation = None |
| 251 if self._next_token().kind == '(': |
| 252 citation = self._citation() |
| 253 return dgen_core.Table(name, citation) |
| 254 |
| 255 def _at_eof(self): |
| 256 """Returns true if next token is the eof token.""" |
| 257 return self._next_token().kind == 'eof' |
| 258 |
| 259 def _read_token(self, kind=None): |
| 260 """Reads and returns the next token from input.""" |
| 261 token = self._next_token() |
| 262 self._token = None |
| 263 if kind and kind != token.kind: |
| 264 self._unexpected('Expected "%s" but found "%s"' |
| 265 % (kind, token.kind)) |
| 266 return token |
| 267 |
| 268 def _next_token(self): |
| 269 """Returns the next token from the input.""" |
| 270 # First seee if cached. |
| 271 if self._token: return self._token |
| 272 |
| 273 # If no more tokens left on the current line. read |
| 274 # input till more tokens are found |
| 275 while not self._reached_eof and not self._words: |
| 276 self._words = self._read_line().split() |
| 277 |
| 278 if self._words: |
| 279 # More tokens found. Convert the first word to a token. |
| 280 word = self._words.pop(0) |
| 281 # First remove any applicable punctuation. |
| 282 for p in self._punctuation: |
| 283 index = word.find(p) |
| 284 if index == 0: |
| 285 # Found punctuation, return it. |
| 286 self._pushback(word[len(p):]) |
| 287 self._token = Token(p) |
| 288 return self._token |
| 289 elif index > 0: |
| 290 self._pushback(word[index:]) |
| 291 word = word[:index] |
| 292 # if reached, word doesn't contain any punctuation, so return it. |
| 293 self._token = Token('word', word) |
| 294 else: |
| 295 # No more tokens found, assume eof. |
| 296 self._token = Token('eof') |
| 297 return self._token |
| 298 |
| 299 def _pushback(self, word): |
| 300 """Puts word back onto the list of words.""" |
| 301 if word: |
| 302 self._words.insert(0, word) |
| 303 |
| 304 def _read_line(self): |
| 305 """Reads the next line of input, and returns it. Otherwise None.""" |
| 306 self._line_no += 1 |
| 307 line = self.input.readline() |
| 308 if line: |
| 309 return re.sub(r'#.*', '', line).strip() |
| 310 else: |
| 311 self._reached_eof = True |
| 312 return '' |
| 313 |
| 314 def _unexpected(self, context='Unexpected line in input'): |
| 315 """"Reports that we didn't find the expected context. """ |
| 316 raise Exception('Line %d: %s' % (self._line_no, context)) |
| OLD | NEW |