OLD | NEW |
---|---|
(Empty) | |
1 # Copyright 2014 The LUCI Authors. All rights reserved. | |
2 # Use of this source code is governed under the Apache License, Version 2.0 | |
3 # that can be found in the LICENSE file. | |
4 | |
5 """Provides simulator test coverage for individual recipes.""" | |
6 | |
7 import ast | |
8 import copy | |
9 import inspect | |
10 import re | |
11 import itertools | |
12 | |
13 from collections import OrderedDict, namedtuple, deque, defaultdict | |
14 | |
15 from . import env | |
16 import astunparse | |
17 import expect_tests | |
18 | |
19 | |
20 class _resolved(ast.AST): | |
21 """_resolved is a fake AST node which represents a resolved sub-expression. | |
22 It's used by _checkTransformer to replace portions of its AST with their | |
23 resolved equivalents.""" | |
24 def __init__(self, representation, value): | |
25 super(_resolved, self).__init__() | |
26 self.representation = representation | |
27 self.value = value | |
28 | |
29 | |
30 class _checkTransformer(ast.NodeTransformer): | |
31 """_checkTransformer is an ast NodeTransformer which extracts the helpful | |
32 subexpressions from a python expression (specificially, from an invocation of | |
33 the Checker). These subexpressions will be printed along with the check's | |
34 source code statement to provide context for the failed check. | |
35 | |
36 It knows the following transformations: | |
37 * all python identifiers will be resolved to their local variable meaning. | |
38 * `___ in <instance of dict>` will cause dict.keys() to be printed in lieu | |
39 of the entire dictionary. | |
40 * `a[b][c]` will cause `a[b]` and `a[b][c]` to be printed (for an arbitrary | |
41 level of recursion) | |
42 | |
43 The transformed ast is NOT a valid python AST... In particular, every reduced | |
44 subexpression will be a _resolved() where the `representation` is the code for | |
45 the subexpression (It could be any valid expression like `foo.bar()`), | |
46 and the `value` will be the eval'd value for that element. | |
47 | |
48 In addition to this, there will be a list of _resolved nodes in the | |
49 transformer's `extra` attribute for additional expressions which should be | |
50 printed for debugging usefulness, but didn't fit into the ast tree anywhere. | |
51 """ | |
52 | |
53 def __init__(self, lvars, gvars): | |
54 self.lvars = lvars | |
55 self.gvars = gvars | |
56 self.extras = [] | |
57 | |
58 def visit_Compare(self, node): | |
59 """Compare nodes occur for all sequences of comparison (`in`, gt, lt, etc.) | |
60 operators. We only want to match `___ in instanceof(dict)` here, so we | |
61 restrict this to Compare ops with a single operator which is `In`. | |
martiniss
2016/10/13 22:54:12
... which is `In` or `not in`?
iannucci
2016/10/13 23:05:05
done
| |
62 """ | |
63 node = self.generic_visit(node) | |
64 | |
65 if len(node.ops) == 1 and isinstance(node.ops[0], (ast.In, ast.NotIn)): | |
66 cmps = node.comparators | |
67 if len(cmps) == 1 and isinstance(cmps[0], _resolved): | |
68 rslvd = cmps[0] | |
69 if isinstance(rslvd.value, dict): | |
70 node = ast.Compare( | |
71 node.left, | |
72 node.ops, | |
73 [_resolved(rslvd.representation+".keys()", | |
74 sorted(rslvd.value.keys()))]) | |
75 | |
76 return node | |
77 | |
78 def visit_Subscript(self, node): | |
79 """Subscript nodes are anything which is __[__]. We only want to match __[x] | |
80 here so where the [x] is a regular Index expression (not an elipsis or | |
81 slice). We only handle cases where x is a constant, or a resolvable variable | |
82 lookup (so a variable lookup, index, etc.).""" | |
83 node = self.generic_visit(node) | |
84 | |
85 if (isinstance(node.slice, ast.Index) and | |
86 isinstance(node.value, _resolved)): | |
87 sliceVal = MISSING | |
88 sliceRepr = '' | |
89 if isinstance(node.slice.value, _resolved): | |
90 # (a[b])[c] | |
91 # will include `a[b]` in the extras. | |
92 self.extras.append(node.slice.value) | |
93 sliceVal = node.slice.value.value | |
94 sliceRepr = node.slice.value.representation | |
95 elif isinstance(node.slice.value, ast.Num): | |
96 sliceVal = node.slice.value.n | |
97 sliceRepr = repr(sliceVal) | |
98 elif isinstance(node.slice.value, ast.Str): | |
99 sliceVal = node.slice.value.s | |
100 sliceRepr = repr(sliceVal) | |
101 if sliceVal is not MISSING: | |
102 node = _resolved( | |
103 '%s[%s]' % (node.value.representation, sliceRepr), | |
104 node.value.value[sliceVal]) | |
105 | |
106 return node | |
107 | |
108 def visit_Name(self, node): | |
109 """Matches a single, simple identifier (e.g. variable). | |
110 | |
111 This will lookup the variable value from python constants (e.g. True), | |
112 followed by the frame's local variables, and finally by the frame's global | |
113 variables. | |
114 """ | |
115 consts = {'True': True, 'False': False, 'None': None} | |
martiniss
2016/10/13 22:54:12
O_O I didn't know these kinds of constants are ide
iannucci
2016/10/13 23:05:05
Yep
| |
116 val = consts.get( | |
117 node.id, self.lvars.get( | |
118 node.id, self.gvars.get( | |
119 node.id, MISSING))) | |
120 if val is not MISSING: | |
121 return _resolved(node.id, val) | |
122 return node | |
123 | |
124 | |
125 def render_user_value(val): | |
126 """Takes a subexpression user value, and attempts to render it in the most | |
127 useful way possible. | |
128 | |
129 Currently this will use render_re for compiled regular expressions, and will | |
130 fall back to repr() for everything else. | |
131 | |
132 It should be the goal of this function to return an `eval`able string that | |
133 would yield the equivalent value in a python interpreter. | |
134 """ | |
135 if isinstance(val, re._pattern_type): | |
136 return render_re(val) | |
137 return repr(val) | |
138 | |
139 | |
140 def render_re(regex): | |
141 """Renders a repr()-style value for a compiled regular expression.""" | |
142 actual_flags = [] | |
143 if regex.flags: | |
144 flags = [ | |
145 (re.IGNORECASE, 'IGNORECASE'), | |
146 (re.LOCALE, 'LOCALE'), | |
147 (re.UNICODE, 'UNICODE'), | |
148 (re.MULTILINE, 'MULTILINE'), | |
149 (re.DOTALL, 'DOTALL'), | |
150 (re.VERBOSE, 'VERBOSE'), | |
151 ] | |
152 for val, name in flags: | |
153 if regex.flags & val: | |
154 actual_flags.append(name) | |
155 if actual_flags: | |
156 return 're.compile(%r, %s)' % (regex.pattern, '|'.join(actual_flags)) | |
157 else: | |
158 return 're.compile(%r)' % regex.pattern | |
159 | |
160 | |
161 class Checker(object): | |
162 # filename -> {lineno -> [statements]} | |
163 _PARSED_FILE_CACHE = defaultdict(lambda: defaultdict(list)) | |
164 | |
165 def __init__(self, filename, lineno, func, args, kwargs, *ignores): | |
166 self.failed_checks = [] | |
167 | |
168 # _ignore_set is the set of objects that we should never print as local | |
169 # variables. We start this set off by including the actual Checker object, | |
170 # since there's no value to printing that. | |
171 self._ignore_set = {id(x) for x in ignores+(self,)} | |
172 | |
173 self._ctx_filename = filename | |
174 self._ctx_lineno = lineno | |
175 self._ctx_funcname = _nameOfCallable(func) | |
176 self._ctx_args = map(repr, args) | |
177 self._ctx_kwargs = {k: repr(v) for k, v in kwargs.iteritems()} | |
178 | |
179 def _get_statements_for_frame(self, frame): | |
180 """This parses the file containing frame, and then extracts all simple | |
181 statements (i.e. those which do not contain other statements). It then | |
182 returns the list of all statements (as AST nodes) which occur on the line | |
183 number indicated by the frame. | |
184 | |
185 The parse and statement extraction is cached in the _PARSED_FILE_CACHE class | |
186 variable, so multiple assertions in the same file only pay the parsing cost | |
187 once. | |
188 """ | |
189 raw_frame, filename, lineno, _, _, _ = frame | |
190 if filename not in self._PARSED_FILE_CACHE: | |
191 to_push = ['body', 'orelse', 'finalbody', 'excepthandler'] | |
martiniss
2016/10/13 22:54:12
Can you document what these are? I'm not sure what
iannucci
2016/10/13 23:05:05
added a bunch of comments
| |
192 lines, _ = inspect.findsource(raw_frame) | |
193 queue = deque([ast.parse(''.join(lines), filename)]) | |
194 while queue: | |
195 node = queue.pop() | |
196 had_statements = False | |
197 for key in to_push: | |
198 val = getattr(node, key, MISSING) | |
199 if val is not MISSING: | |
200 had_statements = True | |
201 queue.extend(val[::-1]) | |
202 if had_statements: | |
203 continue | |
204 max_line = max(map(lambda n: getattr(n, 'lineno', 0), ast.walk(node))) | |
205 self._PARSED_FILE_CACHE[filename][max_line].append(node) | |
206 return self._PARSED_FILE_CACHE[filename][lineno] | |
207 | |
208 def _process_frame(self, frame, with_vars): | |
209 """This processes a stack frame into an expect_tests.CheckFrame, which | |
210 includes file name, line number, function name (of the function containing | |
211 the frame), the parsed statement at that line, and the relevant local | |
212 variables/subexpressions (if with_vars is True). | |
213 | |
214 In addition to transforming the expression with _checkTransformer, this | |
215 will: | |
216 * omit subexpressions which resolve to callable()'s | |
217 * omit the overall step ordered dictionary | |
218 * transform all subexpression values using render_user_value(). | |
219 """ | |
220 nodes = self._get_statements_for_frame(frame) | |
221 raw_frame, filename, lineno, func_name, _, _ = frame | |
222 | |
223 varmap = None | |
224 if with_vars: | |
225 varmap = {} | |
226 | |
227 xfrmr = _checkTransformer(raw_frame.f_locals, raw_frame.f_globals) | |
228 xfrmd = xfrmr.visit(ast.Module(copy.deepcopy(nodes))) | |
229 | |
230 for n in itertools.chain(ast.walk(xfrmd), xfrmr.extras): | |
231 if isinstance(n, _resolved): | |
232 val = n.value | |
233 if isinstance(val, ast.AST): | |
234 continue | |
235 if n.representation in ('True', 'False', 'None'): | |
236 continue | |
237 if callable(val) or id(val) in self._ignore_set: | |
238 continue | |
239 if n.representation not in varmap: | |
240 varmap[n.representation] = render_user_value(val) | |
241 | |
242 return expect_tests.CheckFrame( | |
243 filename, | |
244 lineno, | |
245 func_name, | |
246 '; '.join(astunparse.unparse(n).strip() for n in nodes), | |
247 varmap | |
248 ) | |
249 | |
250 def _call_impl(self, hint, exp): | |
251 """This implements the bulk of what happens when you run `check(exp)`. It | |
252 will crawl back up the stack and extract information about all of the frames | |
253 which are relevent to the check, including file:lineno and the code | |
254 statement which occurs at that location for all the frames. | |
255 | |
256 On the last frame (the one that actually contains the check call), it will | |
257 also try to obtain relevant local values in the check so they can be printed | |
258 with the check to aid in debugging and diagnosis. It uses the parsed | |
259 statement found at that line to find all referenced local variables in that | |
260 frame. | |
261 """ | |
262 | |
263 if exp: | |
264 # TODO(iannucci): collect this in verbose mode. | |
265 # this check passed | |
266 return | |
267 | |
268 try: | |
269 frames = inspect.stack()[2:] | |
270 | |
271 # grab all frames which have self as a local variable (e.g. frames | |
272 # associated with this checker), excluding self.__call__. | |
273 try: | |
274 i = 0 | |
275 for i, f in enumerate(frames): | |
276 if self not in f[0].f_locals.itervalues(): | |
277 break | |
278 keep_frames = [self._process_frame(f, j == 0) | |
279 for j, f in enumerate(frames[:i-1])] | |
280 finally: | |
281 del f | |
282 | |
283 # order it so that innermost frame is at the bottom | |
284 keep_frames = keep_frames[::-1] | |
285 | |
286 self.failed_checks.append(expect_tests.Check( | |
287 hint, | |
288 self._ctx_filename, | |
289 self._ctx_lineno, | |
290 self._ctx_funcname, | |
291 self._ctx_args, | |
292 self._ctx_kwargs, | |
293 keep_frames, | |
294 False | |
295 )) | |
296 finally: | |
297 # avoid reference cycle as suggested by inspect docs. | |
298 del frames | |
299 | |
300 def __call__(self, arg1, arg2=None): | |
301 if arg2 is not None: | |
302 hint = arg1 | |
303 exp = arg2 | |
304 else: | |
305 hint = None | |
306 exp = arg1 | |
307 self._call_impl(hint, exp) | |
308 | |
309 | |
310 MISSING = object() | |
311 | |
312 | |
313 def VerifySubset(a, b): | |
314 """Verify subset verifies that `a` is a subset of `b` where a and b are both | |
315 JSON-ish types. They are also permitted to be OrderedDicts instead of | |
316 dictionaries. | |
317 | |
318 This verifies that a introduces no extra dictionary keys, list elements, etc. | |
319 and also ensures that the order of entries in an ordered type (such as a list | |
320 or an OrderedDict) remain the same from a to b. This also verifies that types | |
321 are consistent between a and b. | |
322 | |
323 As a special case, empty and single-element dictionaries are considered | |
324 subsets of an OrderedDict, even though their types don't precisely match. | |
325 | |
326 If a is a vaild subset of b, this returns None. Otherwise this returns | |
martiniss
2016/10/13 22:54:12
typo
iannucci
2016/10/13 23:05:05
Done.
| |
327 a descriptive message of what went wrong. | |
328 | |
329 Example: | |
330 print 'object'+VerifySubset({'a': 'thing'}, {'b': 'other', 'a': 'prime'}) | |
331 | |
332 OUTPUT: | |
333 object['a']: 'thing' != 'prime' | |
334 """ | |
335 if a is b: | |
336 return | |
337 | |
338 if isinstance(b, OrderedDict) and isinstance(a, dict): | |
339 # 0 and 1-element dicts can stand in for OrderedDicts. | |
340 if len(a) == 0: | |
341 return | |
342 elif len(a) == 1: | |
343 a = OrderedDict([a.popitem()]) | |
344 | |
345 if type(a) != type(b): | |
346 return ': type mismatch: %r v %r' % (type(a).__name__, type(b).__name__) | |
347 | |
348 if isinstance(a, OrderedDict): | |
349 last_idx = 0 | |
350 b_reverse_index = {k: (i, v) for i, (k, v) in enumerate(b.iteritems())} | |
351 for k, v in a.iteritems(): | |
352 j, b_val = b_reverse_index.get(k, (MISSING, MISSING)) | |
353 if j is MISSING: | |
354 return ': added key %r' % k | |
355 | |
356 if j < last_idx: | |
357 return ': key %r is out of order' % k | |
358 # j == last_idx is not possible, these are OrderedDicts | |
359 last_idx = j | |
360 | |
361 msg = VerifySubset(v, b_val) | |
362 if msg: | |
363 return '[%r]%s' % (k, msg) | |
364 | |
365 elif isinstance(a, dict): | |
366 for k, v in a.iteritems(): | |
367 b_val = b.get(k, MISSING) | |
368 if b_val is MISSING: | |
369 return ': added key %r' % k | |
370 | |
371 msg = VerifySubset(v, b_val) | |
372 if msg: | |
373 return '[%r]%s' % (k, msg) | |
374 | |
375 elif isinstance(a, list): | |
376 if len(a) > len(b): | |
377 return ': too long: %d v %d' % (len(a), len(b)) | |
378 | |
379 bi = ai = 0 | |
380 while bi < len(b) - 1 and ai < len(a) - 1: | |
381 msg = VerifySubset(a[ai], b[bi]) | |
382 if msg is None: | |
383 ai += 1 | |
384 bi += 1 | |
385 if ai != len(a) - 1: | |
386 return ': added %d elements' % (len(a)-1-ai) | |
387 | |
388 elif isinstance(a, (basestring, int, bool, type(None))): | |
389 if a != b: | |
390 return ': %r != %r' % (a, b) | |
391 | |
392 else: | |
393 return ': unknown type: %r' % (type(a).__name__) | |
394 | |
395 | |
396 def _nameOfCallable(c): | |
397 if hasattr(c, '__call__'): | |
398 return c.__class__.__name__+'.__call__' | |
399 if inspect.ismethod(c): | |
400 return c.im_class.__name__+'.'+c.__name__ | |
401 if inspect.isfunction(c): | |
402 return c.__name__ | |
403 return repr(c) | |
OLD | NEW |