Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(536)

Side by Side Diff: recipe_engine/checker.py

Issue 2387763003: Add initial postprocess unit test thingy. (Closed)
Patch Set: Rebase Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | recipe_engine/env.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2016 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file.
4
5 """Implements a Checker object which can be used in place of `assert` to check
6 conditions inside tests, but with much more debugging information, including
7 a smart selection of local variables mentioned inside of the call to check."""
8
9 import ast
10 import copy
11 import inspect
12 import re
13 import itertools
14
15 from collections import OrderedDict, namedtuple, deque, defaultdict
16
17 from . import env
18 import astunparse
19 import expect_tests
20
21
22 class _resolved(ast.AST):
23 """_resolved is a fake AST node which represents a resolved sub-expression.
24 It's used by _checkTransformer to replace portions of its AST with their
25 resolved equivalents."""
26 def __init__(self, representation, value):
27 super(_resolved, self).__init__()
28 self.representation = representation
29 self.value = value
30
31
32 class _checkTransformer(ast.NodeTransformer):
33 """_checkTransformer is an ast NodeTransformer which extracts the helpful
34 subexpressions from a python expression (specificially, from an invocation of
35 the Checker). These subexpressions will be printed along with the check's
36 source code statement to provide context for the failed check.
37
38 It knows the following transformations:
39 * all python identifiers will be resolved to their local variable meaning.
40 * `___ in <instance of dict>` will cause dict.keys() to be printed in lieu
41 of the entire dictionary.
42 * `a[b][c]` will cause `a[b]` and `a[b][c]` to be printed (for an arbitrary
43 level of recursion)
44
45 The transformed ast is NOT a valid python AST... In particular, every reduced
46 subexpression will be a _resolved() where the `representation` is the code for
47 the subexpression (It could be any valid expression like `foo.bar()`),
48 and the `value` will be the eval'd value for that element.
49
50 In addition to this, there will be a list of _resolved nodes in the
51 transformer's `extra` attribute for additional expressions which should be
52 printed for debugging usefulness, but didn't fit into the ast tree anywhere.
53 """
54
55 def __init__(self, lvars, gvars):
56 self.lvars = lvars
57 self.gvars = gvars
58 self.extras = []
59
60 def visit_Compare(self, node):
61 """Compare nodes occur for all sequences of comparison (`in`, gt, lt, etc.)
62 operators. We only want to match `___ in instanceof(dict)` here, so we
63 restrict this to Compare ops with a single operator which is `In` or
64 `NotIn`.
65 """
66 node = self.generic_visit(node)
67
68 if len(node.ops) == 1 and isinstance(node.ops[0], (ast.In, ast.NotIn)):
69 cmps = node.comparators
70 if len(cmps) == 1 and isinstance(cmps[0], _resolved):
71 rslvd = cmps[0]
72 if isinstance(rslvd.value, dict):
73 node = ast.Compare(
74 node.left,
75 node.ops,
76 [_resolved(rslvd.representation+".keys()",
77 sorted(rslvd.value.keys()))])
78
79 return node
80
81 def visit_Subscript(self, node):
82 """Subscript nodes are anything which is __[__]. We only want to match __[x]
83 here so where the [x] is a regular Index expression (not an elipsis or
84 slice). We only handle cases where x is a constant, or a resolvable variable
85 lookup (so a variable lookup, index, etc.)."""
86 node = self.generic_visit(node)
87
88 if (isinstance(node.slice, ast.Index) and
89 isinstance(node.value, _resolved)):
90 sliceVal = MISSING
91 sliceRepr = ''
92 if isinstance(node.slice.value, _resolved):
93 # (a[b])[c]
94 # will include `a[b]` in the extras.
95 self.extras.append(node.slice.value)
96 sliceVal = node.slice.value.value
97 sliceRepr = node.slice.value.representation
98 elif isinstance(node.slice.value, ast.Num):
99 sliceVal = node.slice.value.n
100 sliceRepr = repr(sliceVal)
101 elif isinstance(node.slice.value, ast.Str):
102 sliceVal = node.slice.value.s
103 sliceRepr = repr(sliceVal)
104 if sliceVal is not MISSING:
105 node = _resolved(
106 '%s[%s]' % (node.value.representation, sliceRepr),
107 node.value.value[sliceVal])
108
109 return node
110
111 def visit_Name(self, node):
112 """Matches a single, simple identifier (e.g. variable).
113
114 This will lookup the variable value from python constants (e.g. True),
115 followed by the frame's local variables, and finally by the frame's global
116 variables.
117 """
118 consts = {'True': True, 'False': False, 'None': None}
119 val = consts.get(
120 node.id, self.lvars.get(
121 node.id, self.gvars.get(
122 node.id, MISSING)))
123 if val is not MISSING:
124 return _resolved(node.id, val)
125 return node
126
127
128 def render_user_value(val):
129 """Takes a subexpression user value, and attempts to render it in the most
130 useful way possible.
131
132 Currently this will use render_re for compiled regular expressions, and will
133 fall back to repr() for everything else.
134
135 It should be the goal of this function to return an `eval`able string that
136 would yield the equivalent value in a python interpreter.
137 """
138 if isinstance(val, re._pattern_type):
139 return render_re(val)
140 return repr(val)
141
142
143 def render_re(regex):
144 """Renders a repr()-style value for a compiled regular expression."""
145 actual_flags = []
146 if regex.flags:
147 flags = [
148 (re.IGNORECASE, 'IGNORECASE'),
149 (re.LOCALE, 'LOCALE'),
150 (re.UNICODE, 'UNICODE'),
151 (re.MULTILINE, 'MULTILINE'),
152 (re.DOTALL, 'DOTALL'),
153 (re.VERBOSE, 'VERBOSE'),
154 ]
155 for val, name in flags:
156 if regex.flags & val:
157 actual_flags.append(name)
158 if actual_flags:
159 return 're.compile(%r, %s)' % (regex.pattern, '|'.join(actual_flags))
160 else:
161 return 're.compile(%r)' % regex.pattern
162
163
164 class Checker(object):
165 # filename -> {lineno -> [statements]}
166 _PARSED_FILE_CACHE = defaultdict(lambda: defaultdict(list))
167
168 def __init__(self, filename, lineno, func, args, kwargs, *ignores):
169 self.failed_checks = []
170
171 # _ignore_set is the set of objects that we should never print as local
172 # variables. We start this set off by including the actual Checker object,
173 # since there's no value to printing that.
174 self._ignore_set = {id(x) for x in ignores+(self,)}
175
176 self._ctx_filename = filename
177 self._ctx_lineno = lineno
178 self._ctx_funcname = _nameOfCallable(func)
179 self._ctx_args = map(repr, args)
180 self._ctx_kwargs = {k: repr(v) for k, v in kwargs.iteritems()}
181
182 def _get_statements_for_frame(self, frame):
183 """This parses the file containing frame, and then extracts all simple
184 statements (i.e. those which do not contain other statements). It then
185 returns the list of all statements (as AST nodes) which occur on the line
186 number indicated by the frame.
187
188 The parse and statement extraction is cached in the _PARSED_FILE_CACHE class
189 variable, so multiple assertions in the same file only pay the parsing cost
190 once.
191 """
192 raw_frame, filename, lineno, _, _, _ = frame
193 if filename not in self._PARSED_FILE_CACHE:
194 # multi-statement nodes like Module, FunctionDef, etc. have attributes on
195 # them like 'body' which house the list of statements they contain. The
196 # `to_push` list here is the set of all such attributes across all ast
197 # nodes. The goal is to add the CONTENTS of all multi-statement statements
198 # to the queue, and anything else is considered a 'single statement' for
199 # the purposes of this code.
200 to_push = ['body', 'orelse', 'finalbody', 'excepthandler']
201 lines, _ = inspect.findsource(raw_frame)
202 # Start with the entire parsed document (probably ast.Module).
203 queue = deque([ast.parse(''.join(lines), filename)])
204 while queue:
205 node = queue.pop()
206 had_statements = False
207 # Try to find any nested statements and push them into queue if they
208 # exist.
209 for key in to_push:
210 val = getattr(node, key, MISSING)
211 if val is not MISSING:
212 had_statements = True
213 # Because we're popping things off the start of the queue, and we
214 # want to append nodes to _PARSED_FILE_CACHE, we reverse the
215 # statements when we extend the queue with them.
216 queue.extend(val[::-1])
217 if had_statements:
218 continue
219 # node is a 'simple' statement (doesn't contain any nested statements),
220 # so find it's maxiumum line-number (e.g. the line number that would
221 # show up in a stack trace), and add it to _PARSED_FILE_CACHE. Note that
222 # even though this is a simple statement, it could still span multiple
223 # lines.
224 max_line = max(map(lambda n: getattr(n, 'lineno', 0), ast.walk(node)))
225 self._PARSED_FILE_CACHE[filename][max_line].append(node)
226 return self._PARSED_FILE_CACHE[filename][lineno]
227
228 def _process_frame(self, frame, with_vars):
229 """This processes a stack frame into an expect_tests.CheckFrame, which
230 includes file name, line number, function name (of the function containing
231 the frame), the parsed statement at that line, and the relevant local
232 variables/subexpressions (if with_vars is True).
233
234 In addition to transforming the expression with _checkTransformer, this
235 will:
236 * omit subexpressions which resolve to callable()'s
237 * omit the overall step ordered dictionary
238 * transform all subexpression values using render_user_value().
239 """
240 nodes = self._get_statements_for_frame(frame)
241 raw_frame, filename, lineno, func_name, _, _ = frame
242
243 varmap = None
244 if with_vars:
245 varmap = {}
246
247 xfrmr = _checkTransformer(raw_frame.f_locals, raw_frame.f_globals)
248 xfrmd = xfrmr.visit(ast.Module(copy.deepcopy(nodes)))
249
250 for n in itertools.chain(ast.walk(xfrmd), xfrmr.extras):
251 if isinstance(n, _resolved):
252 val = n.value
253 if isinstance(val, ast.AST):
254 continue
255 if n.representation in ('True', 'False', 'None'):
256 continue
257 if callable(val) or id(val) in self._ignore_set:
258 continue
259 if n.representation not in varmap:
260 varmap[n.representation] = render_user_value(val)
261
262 return expect_tests.CheckFrame(
263 filename,
264 lineno,
265 func_name,
266 '; '.join(astunparse.unparse(n).strip() for n in nodes),
267 varmap
268 )
269
270 def _call_impl(self, hint, exp):
271 """This implements the bulk of what happens when you run `check(exp)`. It
272 will crawl back up the stack and extract information about all of the frames
273 which are relevent to the check, including file:lineno and the code
274 statement which occurs at that location for all the frames.
275
276 On the last frame (the one that actually contains the check call), it will
277 also try to obtain relevant local values in the check so they can be printed
278 with the check to aid in debugging and diagnosis. It uses the parsed
279 statement found at that line to find all referenced local variables in that
280 frame.
281 """
282
283 if exp:
284 # TODO(iannucci): collect this in verbose mode.
285 # this check passed
286 return
287
288 try:
289 frames = inspect.stack()[2:]
290
291 # grab all frames which have self as a local variable (e.g. frames
292 # associated with this checker), excluding self.__call__.
293 try:
294 i = 0
295 for i, f in enumerate(frames):
296 if self not in f[0].f_locals.itervalues():
297 break
298 keep_frames = [self._process_frame(f, j == 0)
299 for j, f in enumerate(frames[:i-1])]
300 finally:
301 del f
302
303 # order it so that innermost frame is at the bottom
304 keep_frames = keep_frames[::-1]
305
306 self.failed_checks.append(expect_tests.Check(
307 hint,
308 self._ctx_filename,
309 self._ctx_lineno,
310 self._ctx_funcname,
311 self._ctx_args,
312 self._ctx_kwargs,
313 keep_frames,
314 False
315 ))
316 finally:
317 # avoid reference cycle as suggested by inspect docs.
318 del frames
319
320 def __call__(self, arg1, arg2=None):
321 if arg2 is not None:
322 hint = arg1
323 exp = arg2
324 else:
325 hint = None
326 exp = arg1
327 self._call_impl(hint, exp)
328
329
330 MISSING = object()
331
332
333 def VerifySubset(a, b):
334 """Verify subset verifies that `a` is a subset of `b` where a and b are both
335 JSON-ish types. They are also permitted to be OrderedDicts instead of
336 dictionaries.
337
338 This verifies that a introduces no extra dictionary keys, list elements, etc.
339 and also ensures that the order of entries in an ordered type (such as a list
340 or an OrderedDict) remain the same from a to b. This also verifies that types
341 are consistent between a and b.
342
343 As a special case, empty and single-element dictionaries are considered
344 subsets of an OrderedDict, even though their types don't precisely match.
345
346 If a is a valid subset of b, this returns None. Otherwise this returns
347 a descriptive message of what went wrong.
348
349 Example:
350 print 'object'+VerifySubset({'a': 'thing'}, {'b': 'other', 'a': 'prime'})
351
352 OUTPUT:
353 object['a']: 'thing' != 'prime'
354 """
355 if a is b:
356 return
357
358 if isinstance(b, OrderedDict) and isinstance(a, dict):
359 # 0 and 1-element dicts can stand in for OrderedDicts.
360 if len(a) == 0:
361 return
362 elif len(a) == 1:
363 a = OrderedDict([next(a.iteritems())])
364
365 if type(a) != type(b):
366 return ': type mismatch: %r v %r' % (type(a).__name__, type(b).__name__)
367
368 if isinstance(a, OrderedDict):
369 last_idx = 0
370 b_reverse_index = {k: (i, v) for i, (k, v) in enumerate(b.iteritems())}
371 for k, v in a.iteritems():
372 j, b_val = b_reverse_index.get(k, (MISSING, MISSING))
373 if j is MISSING:
374 return ': added key %r' % k
375
376 if j < last_idx:
377 return ': key %r is out of order' % k
378 # j == last_idx is not possible, these are OrderedDicts
379 last_idx = j
380
381 msg = VerifySubset(v, b_val)
382 if msg:
383 return '[%r]%s' % (k, msg)
384
385 elif isinstance(a, dict):
386 for k, v in a.iteritems():
387 b_val = b.get(k, MISSING)
388 if b_val is MISSING:
389 return ': added key %r' % k
390
391 msg = VerifySubset(v, b_val)
392 if msg:
393 return '[%r]%s' % (k, msg)
394
395 elif isinstance(a, list):
396 if len(a) > len(b):
397 return ': too long: %d v %d' % (len(a), len(b))
398
399 bi = ai = 0
400 while bi < len(b) - 1 and ai < len(a) - 1:
401 msg = VerifySubset(a[ai], b[bi])
402 if msg is None:
403 ai += 1
404 bi += 1
405 if ai != len(a) - 1:
406 return ': added %d elements' % (len(a)-1-ai)
407
408 elif isinstance(a, (basestring, int, bool, type(None))):
409 if a != b:
410 return ': %r != %r' % (a, b)
411
412 else:
413 return ': unknown type: %r' % (type(a).__name__)
414
415
416 def _nameOfCallable(c):
417 if hasattr(c, '__call__'):
418 return c.__class__.__name__+'.__call__'
419 if inspect.ismethod(c):
420 return c.im_class.__name__+'.'+c.__name__
421 if inspect.isfunction(c):
422 return c.__name__
423 return repr(c)
OLDNEW
« no previous file with comments | « no previous file | recipe_engine/env.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698