| OLD | NEW |
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 # | 2 # |
| 3 # Copyright 2012 The Native Client Authors. All rights reserved. | 3 # Copyright (c) 2012 The Native Client Authors. All rights reserved. |
| 4 # Use of this source code is governed by a BSD-style license that can | 4 # Use of this source code is governed by a BSD-style license that can be |
| 5 # be found in the LICENSE file. | 5 # found in the LICENSE file. |
| 6 # | 6 # |
| 7 | 7 |
| 8 """ | 8 """ |
| 9 A simple recursive-descent parser for the table file format. | 9 A simple recursive-descent parser for the table file format. |
| 10 | 10 |
| 11 The grammar implemented here is roughly (taking some liberties with whitespace | 11 The grammar implemented here is roughly (taking some liberties with whitespace |
| 12 and comment parsing): | 12 and comment parsing): |
| 13 | 13 |
| 14 table_file ::= ( BLANK_LINE | table_def ) end_of_file ; | 14 table_file ::= table+ eof ; |
| 15 table_def ::= "--" IDENT CITATION NL | 15 |
| 16 table_header | 16 arch ::= '(' word+ ')' |
| 17 ( table_row )+ ; | 17 citation ::= '(' word+ ')' |
| 18 table_header ::= ( IDENT "(" BITRANGE ")" )+ ; | 18 decoder_action ::= id (id (word (id)?)?)? |
| 19 table_row ::= ( PATTERN )+ ACTION ; | 19 decoder_method ::= '->' id |
| 20 | 20 footer ::= '+' '-' '-' |
| 21 IDENT = /[a-z0-9_]+/ | 21 header ::= "|" (id '(' int (':' int)? ')')+ |
| 22 CITATION = "(" /[^)]+/ ")" | 22 int ::= word (where word is a sequence of digits) |
| 23 BITRANGE = /[0-9]+/ (":" /[0-9]+/)? | 23 id ::= word (where word is sequence of letters, digits and _) |
| 24 PATTERN = /[10x_]+/ | 24 parenthesized_exp ::= '(' (word | punctuation)+ ')' |
| 25 ACTION = ( "=" IDENT | "->" IDENT ) ( "(" IDENT ")" )? | 25 row ::= '|' pattern+ (decoder_action arch? | decoder_method)? |
| 26 NL = a newline | 26 table ::= table_desc header row+ footer |
| 27 BLANK_LINE = what you might expect it to be | 27 table_desc ::= '+' '-' '-' id citation? |
| 28 |
| 29 If a decoder_action has more than one element, the interpretation is as follows: |
| 30 id[0] = action (plus optional architecture) to apply. |
| 31 id[1] = Arm rule action corresponds to. |
| 32 word = Bit pattern of rule. |
| 33 id[3] = Name defining additional constraints for match. |
| 28 """ | 34 """ |
| 29 | 35 |
| 30 import re | 36 import re |
| 31 import dgen_core | 37 import dgen_core |
| 32 | 38 |
| 33 # These globals track the parser state. | |
| 34 _in = None | |
| 35 _line_no = None | |
| 36 _tables = None | |
| 37 _line = None | |
| 38 _last_row = None | |
| 39 | |
| 40 | |
| 41 def parse_tables(input): | 39 def parse_tables(input): |
| 42 """Entry point for the parser. Input should be a file or file-like.""" | 40 """Entry point for the parser. Input should be a file or file-like.""" |
| 43 global _in, _line_no, _tables | 41 parser = Parser() |
| 44 _in = input | 42 return parser.parse(input) |
| 45 _line_no = 0 | 43 |
| 46 _tables = [] | 44 class Token(object): |
| 47 next_line() | 45 """Holds a (characterized) unit of text for the parser.""" |
| 48 | 46 |
| 49 while not end_of_file(): | 47 def __init__(self, kind, value=None): |
| 50 blank_line() or table_def() or unexpected() | 48 self.kind = kind |
| 51 | 49 self.value = value if value else kind |
| 52 return _tables | 50 |
| 53 | 51 class Parser(object): |
| 54 | 52 """Parses a set of tables from the input file.""" |
| 55 def blank_line(): | 53 |
| 56 if _line: | 54 def __init__(self): |
| 57 return False | 55 self.words = [] # Words left on current line, not yet parsed. |
| 58 | 56 self.line_no = 0 # The current line being parsed |
| 59 next_line(); | 57 self.token = None # The next token from the input. |
| 60 return True | 58 self.at_eof = False # True when end of file reached |
| 61 | 59 # Punctuation allowed. Must be ordered such that if |
| 62 | 60 # p1 != p2 are in the list, and p1.startswith(p2), then |
| 63 def table_def(): | 61 # p1 must appear before p2. |
| 64 global _last_row | 62 self.punctuation = ['->', '-', '+', '(', ')', '=', ':', '"', '|'] |
| 65 | 63 |
| 66 m = re.match(r'^-- ([^ ]+) \(([^)]+)\)', _line) | 64 def parse(self, input): |
| 67 if not m: return False | 65 self.input = input # The remaining input to parse |
| 68 | 66 decoder = dgen_core.Decoder() # The generated decoder of parse tables. |
| 69 table = dgen_core.Table(m.group(1), m.group(2)) | 67 # Read tables while there. |
| 70 next_line() | 68 while self._next_token().kind == '+': |
| 71 while blank_line(): pass | 69 self._table(decoder) |
| 72 | 70 |
| 73 table_header(table) | 71 if not self._next_token().kind == 'eof': |
| 74 _last_row = None | 72 self._unexpected('unrecognized input found') |
| 75 while not end_of_file() and not blank_line(): | 73 if not decoder.primary: |
| 76 table_row(table) | 74 self._unexpected('No primary table defined') |
| 77 | 75 if not decoder.tables(): |
| 78 _tables.append(table) | 76 self._unexpected('No tables defined') |
| 79 return True | 77 return decoder |
| 80 | 78 |
| 81 | 79 def _arch(self): |
| 82 def table_header(table): | 80 """ arch ::= '(' word+ ')' """ |
| 83 for col in _line.split(): | 81 return ' '.join(self._parenthesized_exp()) |
| 84 m = re.match(r'^([a-z0-9_]+)\(([0-9]+)(:([0-9]+))?\)$', col, re.I) | 82 |
| 85 if not m: raise Exception('Invalid column header: %s' % col) | 83 def _citation(self): |
| 86 | 84 """ citation ::= '(' word+ ')' """ |
| 87 hi_bit = int(m.group(2)) | 85 return ' '.join(self._parenthesized_exp()) |
| 88 if m.group(4): | 86 |
| 89 lo_bit = int(m.group(4)) | 87 def _decoder_action(self): |
| 88 """ decoder_action ::= id (id (id (id)?)?)? """ |
| 89 self._read_token('=') |
| 90 name = None |
| 91 rule = None |
| 92 pattern = None |
| 93 constraints = None |
| 94 count = 1 |
| 95 while self._next_token().kind not in ['|', '+', '(']: |
| 96 if count == 1: |
| 97 name = _name_if_not_none(self._id()) |
| 98 elif count == 2: |
| 99 rule = _name_if_not_none(self._id()) |
| 100 elif count == 3: |
| 101 pattern = _name_if_not_none(self._read_token('word').value) |
| 102 elif count == 4: |
| 103 constraints = _name_if_not_none(self._id()) |
| 104 else: |
| 105 self._unexpected('Too many entries in decoder action') |
| 106 count += 1 |
| 107 return dgen_core.DecoderAction(name, rule, pattern, constraints) |
| 108 |
| 109 def _decoder_method(self): |
| 110 """ decoder_method ::= '->' id """ |
| 111 self._read_token('->') |
| 112 name = self._id() |
| 113 return dgen_core.DecoderMethod(name) |
| 114 |
| 115 def _footer(self): |
| 116 """ footer ::= '+' '-' '-' """ |
| 117 self._read_token('+') |
| 118 self._read_token('-') |
| 119 self._read_token('-') |
| 120 |
| 121 def _header(self, table): |
| 122 """ header ::= "|" (id '(' int (':' int)? ')')+ """ |
| 123 self._read_token('|') |
| 124 while not self._next_token().kind == '|': |
| 125 name = self._read_token('word').value |
| 126 self._read_token('(') |
| 127 hi_bit = self._int() |
| 128 lo_bit = hi_bit |
| 129 if self._next_token().kind == ':': |
| 130 self._read_token(':') |
| 131 lo_bit = self._int() |
| 132 self._read_token(')') |
| 133 table.add_column(name, hi_bit, lo_bit) |
| 134 |
| 135 def _int(self): |
| 136 """ int ::= word |
| 137 |
| 138 Int is a sequence of digits. Returns the corresponding integer. |
| 139 """ |
| 140 word = self._read_token('word').value |
| 141 m = re.match(r'^([0-9]+)$', word) |
| 142 if m: |
| 143 return int(word) |
| 144 else: |
| 145 self._unexpected('integer expected but found "%s"' % word) |
| 146 |
| 147 def _id(self): |
| 148 """ id ::= word |
| 149 |
| 150 Word starts with a letter, and followed by letters, digits, |
| 151 and underscores. Returns the corresponding identifier. |
| 152 """ |
| 153 ident = self._read_token('word').value |
| 154 m = re.match(r'^[a-zA-z][a-zA-z0-9_]*$', ident) |
| 155 if not m: |
| 156 self._unexpected('"%s" is not a valid identifier' % ident) |
| 157 return ident |
| 158 |
| 159 def _parenthesized_exp(self, minlength=1): |
| 160 """ parenthesized_exp ::= '(' (word | punctuation)+ ')' |
| 161 |
| 162 The punctuation doesn't include ')'. |
| 163 Returns the sequence of token values parsed. |
| 164 """ |
| 165 self._read_token('(') |
| 166 words = [] |
| 167 while not self._at_eof() and self._next_token().kind != ')': |
| 168 words.append(self._read_token().value) |
| 169 if len(words) < minlength: |
| 170 self._unexpected("len(parenthesized expresssion) < %s" % minlength) |
| 171 self._read_token(')') |
| 172 return words |
| 173 |
| 174 def _repeat_entry_from_last(self, n, |
| 175 cur_pattern, cur_action, cur_arch, |
| 176 last_patterns, last_action, last_arch): |
| 177 """Gets the nth entry from the last row and returns it. |
| 178 |
| 179 Arguments are: |
| 180 n - The (column) to be repeated from the last row. |
| 181 cur_pattern - The current pattern being built |
| 182 cur_act |
| 183 """ |
| 184 |
| 185 def _row(self, table, last_patterns=None, |
| 186 last_action=None, last_arch= None): |
| 187 """ row ::= '|' pattern+ (decoder_action arch? | decoder_method)? |
| 188 |
| 189 Passed in sequence of patterns and action from last row, |
| 190 and returns list of patterns and action from this row. |
| 191 """ |
| 192 patterns = [] |
| 193 expanded_patterns = [] |
| 194 action = None |
| 195 arch = None |
| 196 self._read_token('|') |
| 197 while self._next_token().kind not in ['=', '->', '|', '+']: |
| 198 pattern = None |
| 199 if self._next_token().kind == '"': |
| 200 # comment? |
| 201 self._read_token('"') |
| 202 if last_patterns: |
| 203 pat_len = len(patterns) |
| 204 last_pat_len = len(last_patterns) |
| 205 if pat_len < last_pat_len: |
| 206 pattern = last_patterns[len(patterns)] |
| 207 elif pat_len == last_pat_len: |
| 208 action = last_action |
| 209 arch = last_arch |
| 210 break |
| 211 else: |
| 212 self._unexpected('Can\'t determine value for "') |
| 90 else: | 213 else: |
| 91 lo_bit = hi_bit | 214 self._unexpected('Can\'t determine value for "') |
| 92 table.add_column(m.group(1), hi_bit, lo_bit) | 215 elif self._next_token().kind == '-': |
| 93 next_line() | 216 pattern = self._read_token('-').value |
| 94 | |
| 95 | |
| 96 def table_row(table): | |
| 97 global _last_row | |
| 98 | |
| 99 row = _line.split() | |
| 100 for i in range(0, len(row)): | |
| 101 if row[i] == '"': row[i] = _last_row[i] | |
| 102 _last_row = row | |
| 103 | |
| 104 action = row[-1] | |
| 105 patterns = row[:-1] | |
| 106 table.add_row(patterns, action) | |
| 107 next_line() | |
| 108 | |
| 109 | |
| 110 def end_of_file(): | |
| 111 return _line is None | |
| 112 | |
| 113 | |
| 114 def next_line(): | |
| 115 "Reads the next non-comment line" | |
| 116 global _line_no, _line | |
| 117 | |
| 118 _line_no += 1 | |
| 119 _line = _in.readline() | |
| 120 while True: | |
| 121 if _line: | |
| 122 if _line[0] == '#': | |
| 123 # skip comment line and continue search. | |
| 124 _line_no += 1 | |
| 125 _line = _in.readline() | |
| 126 continue | |
| 127 _line = re.sub(r'#.*', '', _line).strip() | |
| 128 else: | 217 else: |
| 129 _line = None | 218 pattern = self._read_token('word').value |
| 130 # if reached, found line. | 219 if pattern: |
| 131 return | 220 col = len(patterns) |
| 132 | 221 patterns.append(pattern) |
| 133 def unexpected(): | 222 pattern = table.define_pattern(pattern, col) |
| 134 raise Exception('Line %d: Unexpected line in input: %s' % (_line_no, _line)) | 223 expanded_patterns.append(pattern) |
| 224 else: |
| 225 self._unexpected("Pattern illegal, can't continue") |
| 226 if self._next_token().kind == '=': |
| 227 if action: |
| 228 self._unexpected('action specified when " defined action') |
| 229 action = self._decoder_action() |
| 230 if self._next_token().kind == '(': |
| 231 arch = self._arch() |
| 232 elif self._next_token().kind == '->': |
| 233 if action: |
| 234 self._unexpected('action specified when " defined action') |
| 235 action = self._decoder_method() |
| 236 if self._next_token().kind in ['|', '+']: |
| 237 if not action: |
| 238 self._unexpected('No action defined for row') |
| 239 table.add_row(expanded_patterns, action, arch) |
| 240 return (patterns, action, arch) |
| 241 else: |
| 242 self._unexpected('Malformed row') |
| 243 |
| 244 def _table(self, decoder): |
| 245 """ table ::= table_desc header row+ footer """ |
| 246 table = self._table_desc() |
| 247 print 'Reading table %s...' % table.name |
| 248 self._header(table) |
| 249 (pattern, action, arch) = self._row(table) |
| 250 while not self._next_token().kind == '+': |
| 251 (pattern, action, arch) = self._row(table, pattern, action, arch) |
| 252 if not decoder.add(table): |
| 253 self._unexpected('Multiple tables with name %s' % table.name) |
| 254 self._footer() |
| 255 |
| 256 def _table_desc(self): |
| 257 """ table_desc ::= '+' '-' '-' id citation? """ |
| 258 self._read_token('+') |
| 259 self._read_token('-') |
| 260 self._read_token('-') |
| 261 name = self._id() |
| 262 citation = None |
| 263 if self._next_token().kind == '(': |
| 264 citation = self._citation() |
| 265 return dgen_core.Table(name, citation) |
| 266 |
| 267 def _at_eof(self): |
| 268 """Returns true if next token is the eof token.""" |
| 269 return self._next_token().kind == 'eof' |
| 270 |
| 271 def _read_token(self, kind=None): |
| 272 """Reads and returns the next token from input.""" |
| 273 token = self._next_token() |
| 274 self.token = None |
| 275 if kind and kind != token.kind: |
| 276 self._unexpected('Expected "%s" but found "%s"' |
| 277 % (kind, token.kind)) |
| 278 return token |
| 279 |
| 280 def _next_token(self): |
| 281 """Returns the next token from the input.""" |
| 282 # First seee if cached. |
| 283 if self.token: return self.token |
| 284 |
| 285 # If no more tokens left on the current line. read |
| 286 # input till more tokens are found |
| 287 while not self.at_eof and not self.words: |
| 288 self.words = self._read_line().split() |
| 289 |
| 290 if self.words: |
| 291 # More tokens found. Convert the first word to a token. |
| 292 word = self.words.pop(0) |
| 293 # First remove any applicable punctuation. |
| 294 for p in self.punctuation: |
| 295 index = word.find(p) |
| 296 if index == 0: |
| 297 # Found punctuation, return it. |
| 298 self._pushback(word[len(p):]) |
| 299 self.token = Token(p) |
| 300 return self.token |
| 301 elif index > 0: |
| 302 self._pushback(word[index:]) |
| 303 word = word[:index] |
| 304 # if reached, word doesn't contain any punctuation, so return it. |
| 305 self.token = Token('word', word) |
| 306 else: |
| 307 # No more tokens found, assume eof. |
| 308 self.token = Token('eof') |
| 309 return self.token |
| 310 |
| 311 def _pushback(self, word): |
| 312 """Puts word back onto the list of words.""" |
| 313 if word: |
| 314 self.words.insert(0, word) |
| 315 |
| 316 def _read_line(self): |
| 317 """Reads the next line of input, and returns it. Otherwise None.""" |
| 318 self.line_no += 1 |
| 319 line = self.input.readline() |
| 320 if line: |
| 321 return re.sub(r'#.*', '', line).strip() |
| 322 else: |
| 323 self.at_eof = True |
| 324 return '' |
| 325 |
| 326 def _unexpected(self, context='Unexpected line in input'): |
| 327 """"Reports that we didn't find the expected context. """ |
| 328 raise Exception('Line %d: %s' % (self.line_no, context)) |
| 329 |
| 330 def _name_if_not_none(id): |
| 331 """ Returns id unless string 'None' """ |
| 332 return None if id and id == 'None' else id |
| OLD | NEW |