src/trusted/validator_ragel/parse_dfa.py - Issue 9348082: Move unreviewed files to unreviewed subdirectory

Side by Side Diff: src/trusted/validator_ragel/parse_dfa.py

Issue 9348082: Move unreviewed files to unreviewed subdirectory (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/

Patch Set: Created 8 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 #!/usr/bin/python

2 # Copyright (c) 2012 The Native Client Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.

5

6

7 """Generates C equivalent of the DFA defined in a dot file for the decoder test.

8

9 Reads the dot file from stdin and writes the output to stdout.

10

11 Some assumptions are made that does not allow parsing arbitrary dot files:

12 * The digraph definition starts by listing all nodes with a number of shapes:

13 * "shape = doublecircle" denotes a final state node

14 * all other states are listed under "shape = point" or "shape = circle"

15 * The initial state is pointed to by the edge going from node named "ENTRY".

16 * Each edge definition fits in one line.

17 * Edge label describes a list of bytes, byte ranges (written as "A..B", both A

18 and B are included in the range) or "DEF" (a synonym of "0..255").

19

20 An example of a dot file that can be parsed by this script:

21 digraph one_instruction_x86_64 {

22 node [ shape = point ];

23 ENTRY;

24 node [ fixedsize = true, height = 0.65, shape = doublecircle ];

25 3;

26 node [ shape = circle ];

27 1 -> 2 [ label = "0..3 / action_1" ];

28 2 -> 3 [ label = "DEF / action_2" ];

29 1 -> 3 [ label = "15" ];

30 ENTRY -> 1 [ label = "IN" ];

31 }

32 """

33

34 import re

35 import sys

36

37

38 # Actions that trigger begin/end of a multibyte field that does not affect

39 # the structure of a command.

40 anyfield_begin_actions = [

41 'disp8_operand_begin', 'disp32_operand_begin', 'disp64_operand_begin',

42 'rel8_operand_begin', 'rel16_operand_begin', 'rel32_operand_begin',

43 'imm8_operand_begin', 'imm16_operand_begin', 'imm32_operand_begin',

44 'imm64_operand_begin'

45 ]

46

47 anyfield_end_actions = [

48 'disp8_operand_end', 'disp32_operand_end', 'disp64_operand_end',

49 'rel8_operand_end', 'rel16_operand_end', 'rel32_operand_end',

50 'imm8_operand_end', 'imm16_operand_end', 'imm32_operand_end',

51 'imm64_operand_end'

52 ]

53

54

55 def CheckOnlyDefaultState(states, st):

56 """Asserts that there is only one transition, the default one."""

57

58 assert(states[st]['default_to'] != None and

59 len(states[st]['transitions']) == 0)

60

61

62 def InitStateIfNeeded(states, st):

63 """Initialize the information necessary for each state if it was not done.

64

65 Each state is represented by the dictionary with keys:

66 'is_final': whether the state is final

67 'anyfield_begin': whether this state starts a multibyte field in an

68 instruction

69 'anyfield_end': whether this state ends a multibyte field

70 'default_to': a name of a state to transition to by default (i.e. if no

71 value from explicit transitions matches)

72 'transitions': A list of transitions. Each transition is a dictionary with

73 keys:

74 'begin_byte': the byte that begins the range of bytes, inclusive

75 'end_byte': the byte that ends the range, inclusive

76 'state_to': name of the state to transition to

77

78 Args:

79 states: The dict containing information associated with each state.

80 st: The name of the state.

81

82 Returns:

83 A state. For example:

84 {'is_final': False,

85 'anyfield_begin': False,

86 'anyfield_end': False,

87 'transitions': [{'begin_byte': '0',

88 'end_byte': '0',

89 'state_to': '100',

90 }],

91 'default_to': '200',

92 }

93 """

94

95 if not states.get(st, None):

96 states[st] = {'is_final': False,

97 'transitions': [],

98 'anyfield_begin': False,

99 'anyfield_end': False,

100 'default_to': None,

101 }

102 return states[st]

103

104

105 def Main():

106 met_finals = False

107 started_edges = False

108 states = {}

109 max_state = 0

110 for line in sys.stdin.readlines():

111 # Find out which states are final.

112 if not started_edges:

113 if met_finals:

114 if re.match(r'\tnode \[', line):

115 started_edges = True

116 continue

117 final_state_m = re.match(r'\t(\w+);', line)

118 assert(final_state_m)

119 st_name = final_state_m.group(1)

120 InitStateIfNeeded(states, st_name)['is_final'] = True

121 if not met_finals and re.match(r'\tnode .* shape = doublecircle', line):

122 met_finals = True

123 continue

124

125 # Parse an edge or state.

126 edge_m = re.match(r'\t(\w+) -> (\w+) \[([^\]]+)\];$', line)

127 if edge_m:

128 state_from = edge_m.group(1)

129 state_to = edge_m.group(2)

130 max_state = max(max_state, int(state_to))

131 try:

132 max_state = max(max_state, int(state_from))

133 except ValueError:

134 if state_from == 'ENTRY':

135 start_state = state_to

136 continue

137

138 # Parse edge label.

139 text_m = re.match(r' label = "([^"]+)" ', edge_m.group(3))

140 assert(text_m)

141 label_text = text_m.group(1)

142

143 while True:

144 target_m = re.match(r'(, )?(\w+\.\.\w+\|\w+)(.*)$',

145 label_text)

146 if not target_m:

147 break

148 label_text = target_m.group(3)

149 byte_range_text = target_m.group(2)

150 byte_range_m = re.match(r'(\w+)\.\.(\w+)', byte_range_text)

151 if byte_range_text == 'DEF':

152 InitStateIfNeeded(states, state_from)['default_to'] = state_to

153 else:

154 if byte_range_m:

155 begin_byte = byte_range_m.group(1)

156 end_byte = byte_range_m.group(2)

157 else:

158 begin_byte = byte_range_text

159 end_byte = begin_byte

160 InitStateIfNeeded(states, state_from)

161 InitStateIfNeeded(states, state_to)

162 states[state_from]['transitions'].append({'begin_byte': begin_byte,

163 'end_byte': end_byte,

164 'state_to': state_to,

165 })

166 while True:

167 action_m = re.match('( / \|, )(\w+)(.*)$', label_text)

168 if not action_m:

169 break

170 label_text = action_m.group(3)

171 action_name = action_m.group(2)

172 if action_name in anyfield_begin_actions:

173 InitStateIfNeeded(states, state_from)['anyfield_begin'] = True

174 CheckOnlyDefaultState(states, state_from)

175 if action_name in anyfield_end_actions:

176 InitStateIfNeeded(states, state_from)['anyfield_end'] = True

177 CheckOnlyDefaultState(states, state_from)

178

179 # Generate the C definitions based on parsed dot file.

180 print '/* This code was generated by parse_dfa.py. */'

181 print

182 print '#include "test_dfa.h"'

183 print

184 print 'struct state states[] = {'

185 def BoolToStr(bool_value):

186 ret = 'false'

187 if bool_value:

188 ret = ' true'

189 return ret

190 for i in xrange(max_state + 1):

191 if not states.get(str(i),None):

192 print (' { .is_final = false, .anyfield_begin = false,' +

193 ' .anyfield_end = false }, /* %d */' % i)

194 continue

195 st = states[str(i)]

196 print (' { .is_final = %s, .anyfield_begin = %s,' +

197 ' .anyfield_end = %s }, /* %d */') % (

198 BoolToStr(st['is_final']),

199 BoolToStr(st['anyfield_begin']),

200 BoolToStr(st['anyfield_end']),

201 i)

202 print '};'

203 print

204

205 print 'void InitTransitions() {'

206 for i in xrange(max_state + 1):

207 if states.get(str(i),None):

208 default_to = states[str(i)]['default_to']

209 default_transition_only = True

210 default_bytes = [True] * 256

211 for tr in states[str(i)]['transitions']:

212 default_transition_only = False

213 for j in xrange(int(tr['begin_byte']), int(tr['end_byte']) + 1):

214 default_bytes[j] = False

215 print 'AddRange({0}, {1}, {2}, {3});'.format(

216 str(i), tr['begin_byte'], tr['end_byte'], tr['state_to'])

217 if default_to != None and default_transition_only:

218 print 'AddRange({0}, {1}, {2}, {3});'.format(

219 str(i), '0', '255', default_to)

220 elif default_to != None:

221 for j in xrange(0, 256):

222 if default_bytes[j]:

223 print 'AddRange({0}, {1}, {2}, {3});'.format(

224 str(i), str(j), str(j), default_to)

225

226 print '}'

227 print

228 print 'uint16_t start_state = {0};'.format(start_state)

229 print

230 print 'uint16_t NumStates() { return %d; }' % (max_state + 1)

231

232

233 if __name__ == '__main__':

234 sys.exit(Main())

OLD	NEW

« no previous file with comments | « src/trusted/validator_ragel/one-instruction-x86_64.rl ('k') | src/trusted/validator_ragel/run_objdump_test.py » ('j') | no next file with comments »