Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 # | 2 # |
| 3 # Copyright 2012 The Native Client Authors. All rights reserved. | 3 # Copyright (c) 2012 The Native Client Authors. All rights reserved. |
| 4 # Use of this source code is governed by a BSD-style license that can | 4 # Use of this source code is governed by a BSD-style license that can be |
| 5 # be found in the LICENSE file. | 5 # found in the LICENSE file. |
| 6 # | 6 # |
| 7 | 7 |
| 8 """ | 8 """ |
| 9 A simple recursive-descent parser for the table file format. | 9 A simple recursive-descent parser for the table file format. |
| 10 | 10 |
| 11 The grammar implemented here is roughly (taking some liberties with whitespace | 11 The grammar implemented here is roughly (taking some liberties with whitespace |
| 12 and comment parsing): | 12 and comment parsing): |
| 13 | 13 |
| 14 table_file ::= ( BLANK_LINE | table_def ) end_of_file ; | 14 table_file ::= table+ eof ; |
| 15 table_def ::= "--" IDENT CITATION NL | 15 |
| 16 table_header | 16 arch ::= '(' word+ ')' |
| 17 ( table_row )+ ; | 17 citation ::= '(' word+ ')' |
| 18 table_header ::= ( IDENT "(" BITRANGE ")" )+ ; | 18 decoder_action ::= id (id (word (id)?)?)? |
| 19 table_row ::= ( PATTERN )+ ACTION ; | 19 decoder_method ::= '->' id |
| 20 | 20 footer ::= '+' '-' '-' |
| 21 IDENT = /[a-z0-9_]+/ | 21 header ::= "|" (id '(' int (':' int)? ')')+ |
| 22 CITATION = "(" /[^)]+/ ")" | 22 int ::= word (where word is a sequence of digits) |
| 23 BITRANGE = /[0-9]+/ (":" /[0-9]+/)? | 23 id ::= word (where word is sequence of letters, digits and _) |
| 24 PATTERN = /[10x_]+/ | 24 parenthesized_exp ::= '(' (word | punctuation)+ ')' |
| 25 ACTION = ( "=" IDENT | "->" IDENT ) ( "(" IDENT ")" )? | 25 row ::= '|' pattern+ (decoder_action arch? | decoder_method)? |
| 26 NL = a newline | 26 table ::= table_desc header row+ footer |
| 27 BLANK_LINE = what you might expect it to be | 27 table_desc ::= '+' '-' '-' id citation? |
| 28 | |
| 29 If a decoder_action has more than one element, the interpretation is as follows: | |
| 30 id[0] = action (plus optional architecture) to apply. | |
| 31 id[1] = Arm rule action corresponds to. | |
| 32 word = Bit pattern of rule. | |
| 33 id[3] = Name defining additional constraints for match. | |
| 28 """ | 34 """ |
| 29 | 35 |
| 30 import re | 36 import re |
| 31 import dgen_core | 37 import dgen_core |
| 32 | 38 |
| 33 # These globals track the parser state. | |
| 34 _in = None | |
| 35 _line_no = None | |
| 36 _tables = None | |
| 37 _line = None | |
| 38 _last_row = None | |
| 39 | |
| 40 | |
| 41 def parse_tables(input): | 39 def parse_tables(input): |
| 42 """Entry point for the parser. Input should be a file or file-like.""" | 40 """Entry point for the parser. Input should be a file or file-like.""" |
| 43 global _in, _line_no, _tables | 41 parser = Parser() |
| 44 _in = input | 42 return parser.parse(input) |
| 45 _line_no = 0 | 43 |
| 46 _tables = [] | 44 class Token(object): |
| 47 next_line() | 45 """Holds a (characterized) unit of text for the parser.""" |
| 48 | 46 |
| 49 while not end_of_file(): | 47 def __init__(self, kind, value=None): |
| 50 blank_line() or table_def() or unexpected() | 48 self.kind = kind |
| 51 | 49 self.value = value if value else kind |
| 52 return _tables | 50 |
| 53 | 51 class Parser(object): |
| 54 | 52 """Parses a set of tables from the input file.""" |
| 55 def blank_line(): | 53 |
| 56 if _line: | 54 def __init__(self): |
| 57 return False | 55 self.words = [] # Words left on current line, not yet parsed. |
| 58 | 56 self.line_no = 0 # The current line being parsed |
| 59 next_line(); | 57 self.token = None # The next token from the input. |
| 60 return True | 58 self.at_eof = False # True when end of file reached |
| 61 | 59 # Punctuation allowed. Must be ordered such that if |
| 62 | 60 # p1 != p2 are in the list, and p1.startswith(p2), then |
| 63 def table_def(): | 61 # p1 must appear before p2. |
| 64 global _last_row | 62 self.punctuation = ['->', '-', '+', '(', ')', '=', ':', '"', '|'] |
| 65 | 63 |
| 66 m = re.match(r'^-- ([^ ]+) \(([^)]+)\)', _line) | 64 def parse(self, input): |
| 67 if not m: return False | 65 self.input = input # The remaining input to parse |
| 68 | 66 decoder = dgen_core.Decoder() # The generated decoder of parse tables. |
| 69 table = dgen_core.Table(m.group(1), m.group(2)) | 67 # Read tables while there. |
| 70 next_line() | 68 while self._next_token().kind == '+': |
| 71 while blank_line(): pass | 69 self._table(decoder) |
| 72 | 70 |
| 73 table_header(table) | 71 if not self._next_token().kind == 'eof': |
| 74 _last_row = None | 72 self._unexpected('unrecognized input found') |
| 75 while not end_of_file() and not blank_line(): | 73 if not decoder.primary: |
| 76 table_row(table) | 74 self._unexpected('No primary table defined') |
| 77 | 75 if not decoder.tables(): |
| 78 _tables.append(table) | 76 self._unexpected('No tables defined') |
| 79 return True | 77 return decoder |
| 80 | 78 |
| 81 | 79 def _arch(self): |
| 82 def table_header(table): | 80 """ arch ::= '(' word+ ')' """ |
| 83 for col in _line.split(): | 81 return ' '.join(self._parenthesized_exp()) |
| 84 m = re.match(r'^([a-z0-9_]+)\(([0-9]+)(:([0-9]+))?\)$', col, re.I) | 82 |
| 85 if not m: raise Exception('Invalid column header: %s' % col) | 83 def _citation(self): |
| 86 | 84 """ citation ::= '(' word+ ')' """ |
| 87 hi_bit = int(m.group(2)) | 85 return ' '.join(self._parenthesized_exp()) |
| 88 if m.group(4): | 86 |
| 89 lo_bit = int(m.group(4)) | 87 def _decoder_action(self): |
| 88 """ decoder_action ::= id (id (id (id)?)?)? """ | |
| 89 self._read_token('=') | |
| 90 name = None | |
| 91 rule = None | |
| 92 pattern = None | |
| 93 constraints = None | |
| 94 count = 1 | |
| 95 while self._next_token().kind not in ['|', '+', '(']: | |
|
robertm
2012/04/17 17:12:19
not using a loop might be clearer
| |
| 96 if count == 1: | |
| 97 name = _name_if_not_none(self._id()) | |
| 98 elif count == 2: | |
| 99 rule = _name_if_not_none(self._id()) | |
| 100 elif count == 3: | |
| 101 pattern = _name_if_not_none(self._read_token('word').value) | |
| 102 elif count == 4: | |
| 103 constraints = _name_if_not_none(self._id()) | |
| 104 else: | |
| 105 self._unexpected('Too many entries in decoder action') | |
| 106 count += 1 | |
| 107 return dgen_core.DecoderAction(name, rule, pattern, constraints) | |
| 108 | |
| 109 def _decoder_method(self): | |
| 110 """ decoder_method ::= '->' id """ | |
| 111 self._read_token('->') | |
| 112 name = self._id() | |
| 113 return dgen_core.DecoderMethod(name) | |
| 114 | |
| 115 def _footer(self): | |
| 116 """ footer ::= '+' '-' '-' """ | |
| 117 self._read_token('+') | |
| 118 self._read_token('-') | |
| 119 self._read_token('-') | |
| 120 | |
| 121 def _header(self, table): | |
| 122 """ header ::= "|" (id '(' int (':' int)? ')')+ """ | |
| 123 self._read_token('|') | |
| 124 while not self._next_token().kind == '|': | |
| 125 name = self._read_token('word').value | |
| 126 self._read_token('(') | |
| 127 hi_bit = self._int() | |
| 128 lo_bit = hi_bit | |
| 129 if self._next_token().kind == ':': | |
| 130 self._read_token(':') | |
| 131 lo_bit = self._int() | |
| 132 self._read_token(')') | |
| 133 table.add_column(name, hi_bit, lo_bit) | |
| 134 | |
| 135 def _int(self): | |
| 136 """ int ::= word | |
| 137 | |
| 138 Int is a sequence of digits. Returns the corresponding integer. | |
| 139 """ | |
| 140 word = self._read_token('word').value | |
| 141 m = re.match(r'^([0-9]+)$', word) | |
| 142 if m: | |
| 143 return int(word) | |
| 144 else: | |
| 145 self._unexpected('integer expected but found "%s"' % word) | |
| 146 | |
| 147 def _id(self): | |
| 148 """ id ::= word | |
| 149 | |
| 150 Word starts with a letter, and followed by letters, digits, | |
| 151 and underscores. Returns the corresponding identifier. | |
| 152 """ | |
| 153 ident = self._read_token('word').value | |
| 154 m = re.match(r'^[a-zA-z][a-zA-z0-9_]*$', ident) | |
| 155 if not m: | |
| 156 self._unexpected('"%s" is not a valid identifier' % ident) | |
| 157 return ident | |
| 158 | |
| 159 def _parenthesized_exp(self, minlength=1): | |
| 160 """ parenthesized_exp ::= '(' (word | punctuation)+ ')' | |
| 161 | |
| 162 The punctuation doesn't include ')'. | |
| 163 Returns the sequence of token values parsed. | |
| 164 """ | |
| 165 self._read_token('(') | |
| 166 words = [] | |
| 167 while not self._at_eof() and self._next_token().kind != ')': | |
| 168 words.append(self._read_token().value) | |
| 169 if len(words) < minlength: | |
| 170 self._unexpected("len(parenthesized expresssion) < %s" % minlength) | |
| 171 self._read_token(')') | |
| 172 return words | |
| 173 | |
| 174 def _row(self, table, last_patterns=None, | |
| 175 last_action=None, last_arch= None): | |
| 176 """ row ::= '|' pattern+ (decoder_action arch? | decoder_method)? | |
| 177 | |
| 178 Passed in sequence of patterns and action from last row, | |
| 179 and returns list of patterns and action from this row. | |
| 180 """ | |
| 181 patterns = [] | |
| 182 expanded_patterns = [] | |
| 183 action = None | |
| 184 arch = None | |
| 185 self._read_token('|') | |
| 186 while self._next_token().kind not in ['=', '->', '|', '+']: | |
| 187 pattern = None | |
| 188 if self._next_token().kind == '"': | |
| 189 self._read_token('"') | |
|
robertm
2012/04/17 17:12:19
add a comment here
| |
| 190 if last_patterns: | |
| 191 pat_len = len(patterns) | |
| 192 last_pat_len = len(last_patterns) | |
| 193 if pat_len < last_pat_len: | |
| 194 pattern = last_patterns[len(patterns)] | |
| 195 elif pat_len == last_pat_len: | |
| 196 action = last_action | |
| 197 arch = last_arch | |
| 198 break | |
| 199 else: | |
| 200 self._unexpected('Can\'t determine value for "') | |
| 90 else: | 201 else: |
| 91 lo_bit = hi_bit | 202 self._unexpected('Can\'t determine value for "') |
| 92 table.add_column(m.group(1), hi_bit, lo_bit) | 203 elif self._next_token().kind == '-': |
| 93 next_line() | 204 pattern = self._read_token('-').value |
| 94 | |
| 95 | |
| 96 def table_row(table): | |
| 97 global _last_row | |
| 98 | |
| 99 row = _line.split() | |
| 100 for i in range(0, len(row)): | |
| 101 if row[i] == '"': row[i] = _last_row[i] | |
| 102 _last_row = row | |
| 103 | |
| 104 action = row[-1] | |
| 105 patterns = row[:-1] | |
| 106 table.add_row(patterns, action) | |
| 107 next_line() | |
| 108 | |
| 109 | |
| 110 def end_of_file(): | |
| 111 return _line is None | |
| 112 | |
| 113 | |
| 114 def next_line(): | |
| 115 "Reads the next non-comment line" | |
| 116 global _line_no, _line | |
| 117 | |
| 118 _line_no += 1 | |
| 119 _line = _in.readline() | |
| 120 while True: | |
| 121 if _line: | |
| 122 if _line[0] == '#': | |
| 123 # skip comment line and continue search. | |
| 124 _line_no += 1 | |
| 125 _line = _in.readline() | |
| 126 continue | |
| 127 _line = re.sub(r'#.*', '', _line).strip() | |
| 128 else: | 205 else: |
| 129 _line = None | 206 pattern = self._read_token('word').value |
| 130 # if reached, found line. | 207 if pattern: |
| 131 return | 208 col = len(patterns) |
| 132 | 209 patterns.append(pattern) |
| 133 def unexpected(): | 210 pattern = table.define_pattern(pattern, col) |
| 134 raise Exception('Line %d: Unexpected line in input: %s' % (_line_no, _line)) | 211 expanded_patterns.append(pattern) |
| 212 else: | |
| 213 self._unexpected("Pattern illegal, can't continue") | |
| 214 if self._next_token().kind == '=': | |
| 215 if action: | |
| 216 self._unexpected('action specified when " defined action') | |
| 217 action = self._decoder_action() | |
| 218 if self._next_token().kind == '(': | |
| 219 arch = self._arch() | |
| 220 elif self._next_token().kind == '->': | |
| 221 if action: | |
| 222 self._unexpected('action specified when " defined action') | |
| 223 action = self._decoder_method() | |
| 224 if self._next_token().kind in ['|', '+']: | |
| 225 if not action: | |
| 226 self._unexpected('No action defined for row') | |
| 227 table.add_row(expanded_patterns, action, arch) | |
| 228 return (patterns, action, arch) | |
| 229 else: | |
| 230 self._unexpected('Malformed row') | |
| 231 | |
| 232 def _table(self, decoder): | |
| 233 """ table ::= table_desc header row+ footer """ | |
| 234 table = self._table_desc() | |
| 235 print 'Reading table %s...' % table.name | |
| 236 self._header(table) | |
| 237 (pattern, action, arch) = self._row(table) | |
| 238 while not self._next_token().kind == '+': | |
| 239 (pattern, action, arch) = self._row(table, pattern, action, arch) | |
| 240 if not decoder.add(table): | |
| 241 self._unexpected('Multiple tables with name %s' % table.name) | |
| 242 self._footer() | |
| 243 | |
| 244 def _table_desc(self): | |
| 245 """ table_desc ::= '+' '-' '-' id citation? """ | |
| 246 self._read_token('+') | |
| 247 self._read_token('-') | |
| 248 self._read_token('-') | |
| 249 name = self._id() | |
| 250 citation = None | |
| 251 if self._next_token().kind == '(': | |
| 252 citation = self._citation() | |
| 253 return dgen_core.Table(name, citation) | |
| 254 | |
| 255 def _at_eof(self): | |
| 256 """Returns true if next token is the eof token.""" | |
| 257 return self._next_token().kind == 'eof' | |
| 258 | |
| 259 def _read_token(self, kind=None): | |
| 260 """Reads and returns the next token from input.""" | |
| 261 token = self._next_token() | |
| 262 self.token = None | |
| 263 if kind and kind != token.kind: | |
| 264 self._unexpected('Expected "%s" but found "%s"' | |
| 265 % (kind, token.kind)) | |
| 266 return token | |
| 267 | |
| 268 def _next_token(self): | |
| 269 """Returns the next token from the input.""" | |
| 270 # First seee if cached. | |
| 271 if self.token: return self.token | |
| 272 | |
| 273 # If no more tokens left on the current line. read | |
| 274 # input till more tokens are found | |
| 275 while not self.at_eof and not self.words: | |
| 276 self.words = self._read_line().split() | |
| 277 | |
| 278 if self.words: | |
| 279 # More tokens found. Convert the first word to a token. | |
| 280 word = self.words.pop(0) | |
| 281 # First remove any applicable punctuation. | |
| 282 for p in self.punctuation: | |
| 283 index = word.find(p) | |
| 284 if index == 0: | |
| 285 # Found punctuation, return it. | |
| 286 self._pushback(word[len(p):]) | |
| 287 self.token = Token(p) | |
| 288 return self.token | |
| 289 elif index > 0: | |
| 290 self._pushback(word[index:]) | |
| 291 word = word[:index] | |
| 292 # if reached, word doesn't contain any punctuation, so return it. | |
| 293 self.token = Token('word', word) | |
| 294 else: | |
| 295 # No more tokens found, assume eof. | |
| 296 self.token = Token('eof') | |
| 297 return self.token | |
| 298 | |
| 299 def _pushback(self, word): | |
| 300 """Puts word back onto the list of words.""" | |
| 301 if word: | |
| 302 self.words.insert(0, word) | |
| 303 | |
| 304 def _read_line(self): | |
| 305 """Reads the next line of input, and returns it. Otherwise None.""" | |
| 306 self.line_no += 1 | |
| 307 line = self.input.readline() | |
| 308 if line: | |
| 309 return re.sub(r'#.*', '', line).strip() | |
| 310 else: | |
| 311 self.at_eof = True | |
| 312 return '' | |
| 313 | |
| 314 def _unexpected(self, context='Unexpected line in input'): | |
| 315 """"Reports that we didn't find the expected context. """ | |
| 316 raise Exception('Line %d: %s' % (self.line_no, context)) | |
| 317 | |
| 318 def _name_if_not_none(id): | |
| 319 """ Returns id unless string 'None' """ | |
| 320 return None if id and id == 'None' else id | |
| OLD | NEW |