OLD | NEW |
| (Empty) |
1 #!/usr/bin/python | |
2 # Copyright (c) 2012 The Native Client Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 | |
7 """Generates C equivalent of the DFA defined in a dot file for the decoder test. | |
8 | |
9 Reads the dot file from stdin and writes the output to stdout. | |
10 | |
11 Some assumptions are made that does not allow parsing arbitrary dot files: | |
12 * The digraph definition starts by listing all nodes with a number of shapes: | |
13 * "shape = doublecircle" denotes a final state node | |
14 * all other states are listed under "shape = point" or "shape = circle" | |
15 * The initial state is pointed to by the edge going from node named "ENTRY". | |
16 * Each edge definition fits in one line. | |
17 * Edge label describes a list of bytes, byte ranges (written as "A..B", both A | |
18 and B are included in the range) or "DEF" (a synonym of "0..255"). | |
19 | |
20 An example of a dot file that can be parsed by this script: | |
21 digraph one_instruction_x86_64 { | |
22 node [ shape = point ]; | |
23 ENTRY; | |
24 node [ fixedsize = true, height = 0.65, shape = doublecircle ]; | |
25 3; | |
26 node [ shape = circle ]; | |
27 1 -> 2 [ label = "0..3 / action_1" ]; | |
28 2 -> 3 [ label = "DEF / action_2" ]; | |
29 1 -> 3 [ label = "15" ]; | |
30 ENTRY -> 1 [ label = "IN" ]; | |
31 } | |
32 """ | |
33 | |
34 import re | |
35 import sys | |
36 | |
37 | |
38 # Actions that trigger begin/end of a multibyte field that does not affect | |
39 # the structure of a command. | |
40 anyfield_begin_actions = [ | |
41 'disp8_operand_begin', 'disp32_operand_begin', 'disp64_operand_begin', | |
42 'rel8_operand_begin', 'rel16_operand_begin', 'rel32_operand_begin', | |
43 'imm8_operand_begin', 'imm16_operand_begin', 'imm32_operand_begin', | |
44 'imm64_operand_begin' | |
45 ] | |
46 | |
47 anyfield_end_actions = [ | |
48 'disp8_operand_end', 'disp32_operand_end', 'disp64_operand_end', | |
49 'rel8_operand_end', 'rel16_operand_end', 'rel32_operand_end', | |
50 'imm8_operand_end', 'imm16_operand_end', 'imm32_operand_end', | |
51 'imm64_operand_end' | |
52 ] | |
53 | |
54 | |
55 def CheckOnlyDefaultState(states, st): | |
56 """Asserts that there is only one transition, the default one.""" | |
57 | |
58 assert(states[st]['default_to'] != None and | |
59 len(states[st]['transitions']) == 0) | |
60 | |
61 | |
62 def InitStateIfNeeded(states, st): | |
63 """Initialize the information necessary for each state if it was not done. | |
64 | |
65 Each state is represented by the dictionary with keys: | |
66 'is_final': whether the state is final | |
67 'anyfield_begin': whether this state starts a multibyte field in an | |
68 instruction | |
69 'anyfield_end': whether this state ends a multibyte field | |
70 'default_to': a name of a state to transition to by default (i.e. if no | |
71 value from explicit transitions matches) | |
72 'transitions': A list of transitions. Each transition is a dictionary with | |
73 keys: | |
74 'begin_byte': the byte that begins the range of bytes, inclusive | |
75 'end_byte': the byte that ends the range, inclusive | |
76 'state_to': name of the state to transition to | |
77 | |
78 Args: | |
79 states: The dict containing information associated with each state. | |
80 st: The name of the state. | |
81 | |
82 Returns: | |
83 A state. For example: | |
84 {'is_final': False, | |
85 'anyfield_begin': False, | |
86 'anyfield_end': False, | |
87 'transitions': [{'begin_byte': '0', | |
88 'end_byte': '0', | |
89 'state_to': '100', | |
90 }], | |
91 'default_to': '200', | |
92 } | |
93 """ | |
94 | |
95 if not states.get(st, None): | |
96 states[st] = {'is_final': False, | |
97 'transitions': [], | |
98 'anyfield_begin': False, | |
99 'anyfield_end': False, | |
100 'default_to': None, | |
101 } | |
102 return states[st] | |
103 | |
104 | |
105 def Main(): | |
106 met_finals = False | |
107 started_edges = False | |
108 states = {} | |
109 max_state = 0 | |
110 for line in sys.stdin.readlines(): | |
111 # Find out which states are final. | |
112 if not started_edges: | |
113 if met_finals: | |
114 if re.match(r'\tnode \[', line): | |
115 started_edges = True | |
116 continue | |
117 final_state_m = re.match(r'\t(\w+);', line) | |
118 assert(final_state_m) | |
119 st_name = final_state_m.group(1) | |
120 InitStateIfNeeded(states, st_name)['is_final'] = True | |
121 if not met_finals and re.match(r'\tnode .* shape = doublecircle', line): | |
122 met_finals = True | |
123 continue | |
124 | |
125 # Parse an edge or state. | |
126 edge_m = re.match(r'\t(\w+) -> (\w+) \[([^\]]+)\];$', line) | |
127 if edge_m: | |
128 state_from = edge_m.group(1) | |
129 state_to = edge_m.group(2) | |
130 max_state = max(max_state, int(state_to)) | |
131 try: | |
132 max_state = max(max_state, int(state_from)) | |
133 except ValueError: | |
134 if state_from == 'ENTRY': | |
135 start_state = state_to | |
136 continue | |
137 | |
138 # Parse edge label. | |
139 text_m = re.match(r' label = "([^"]+)" ', edge_m.group(3)) | |
140 assert(text_m) | |
141 label_text = text_m.group(1) | |
142 | |
143 while True: | |
144 target_m = re.match(r'(, )?(\w+\.\.\w+|\w+)(.*)$', | |
145 label_text) | |
146 if not target_m: | |
147 break | |
148 label_text = target_m.group(3) | |
149 byte_range_text = target_m.group(2) | |
150 byte_range_m = re.match(r'(\w+)\.\.(\w+)', byte_range_text) | |
151 if byte_range_text == 'DEF': | |
152 InitStateIfNeeded(states, state_from)['default_to'] = state_to | |
153 else: | |
154 if byte_range_m: | |
155 begin_byte = byte_range_m.group(1) | |
156 end_byte = byte_range_m.group(2) | |
157 else: | |
158 begin_byte = byte_range_text | |
159 end_byte = begin_byte | |
160 InitStateIfNeeded(states, state_from) | |
161 InitStateIfNeeded(states, state_to) | |
162 states[state_from]['transitions'].append({'begin_byte': begin_byte, | |
163 'end_byte': end_byte, | |
164 'state_to': state_to, | |
165 }) | |
166 while True: | |
167 action_m = re.match('( / |, )(\w+)(.*)$', label_text) | |
168 if not action_m: | |
169 break | |
170 label_text = action_m.group(3) | |
171 action_name = action_m.group(2) | |
172 if action_name in anyfield_begin_actions: | |
173 InitStateIfNeeded(states, state_from)['anyfield_begin'] = True | |
174 CheckOnlyDefaultState(states, state_from) | |
175 if action_name in anyfield_end_actions: | |
176 InitStateIfNeeded(states, state_from)['anyfield_end'] = True | |
177 CheckOnlyDefaultState(states, state_from) | |
178 | |
179 # Generate the C definitions based on parsed dot file. | |
180 print '/* This code was generated by parse_dfa.py. */' | |
181 print | |
182 print '#include "test_dfa.h"' | |
183 print | |
184 print 'struct state states[] = {' | |
185 def BoolToStr(bool_value): | |
186 ret = 'false' | |
187 if bool_value: | |
188 ret = ' true' | |
189 return ret | |
190 for i in xrange(max_state + 1): | |
191 if not states.get(str(i),None): | |
192 print (' { .is_final = false, .anyfield_begin = false,' + | |
193 ' .anyfield_end = false }, /* %d */' % i) | |
194 continue | |
195 st = states[str(i)] | |
196 print (' { .is_final = %s, .anyfield_begin = %s,' + | |
197 ' .anyfield_end = %s }, /* %d */') % ( | |
198 BoolToStr(st['is_final']), | |
199 BoolToStr(st['anyfield_begin']), | |
200 BoolToStr(st['anyfield_end']), | |
201 i) | |
202 print '};' | |
203 print | |
204 | |
205 print 'void InitTransitions() {' | |
206 for i in xrange(max_state + 1): | |
207 if states.get(str(i),None): | |
208 default_to = states[str(i)]['default_to'] | |
209 default_transition_only = True | |
210 default_bytes = [True] * 256 | |
211 for tr in states[str(i)]['transitions']: | |
212 default_transition_only = False | |
213 for j in xrange(int(tr['begin_byte']), int(tr['end_byte']) + 1): | |
214 default_bytes[j] = False | |
215 print 'AddRange({0}, {1}, {2}, {3});'.format( | |
216 str(i), tr['begin_byte'], tr['end_byte'], tr['state_to']) | |
217 if default_to != None and default_transition_only: | |
218 print 'AddRange({0}, {1}, {2}, {3});'.format( | |
219 str(i), '0', '255', default_to) | |
220 elif default_to != None: | |
221 for j in xrange(0, 256): | |
222 if default_bytes[j]: | |
223 print 'AddRange({0}, {1}, {2}, {3});'.format( | |
224 str(i), str(j), str(j), default_to) | |
225 | |
226 print '}' | |
227 print | |
228 print 'uint16_t start_state = {0};'.format(start_state) | |
229 print | |
230 print 'uint16_t NumStates() { return %d; }' % (max_state + 1) | |
231 | |
232 | |
233 if __name__ == '__main__': | |
234 sys.exit(Main()) | |
OLD | NEW |