Index: recipe_engine/checker.py |
diff --git a/recipe_engine/checker.py b/recipe_engine/checker.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..9b51004ccae92d8bc818e25203f5a819153c7dd7 |
--- /dev/null |
+++ b/recipe_engine/checker.py |
@@ -0,0 +1,330 @@ |
+# Copyright 2014 The LUCI Authors. All rights reserved. |
+# Use of this source code is governed under the Apache License, Version 2.0 |
+# that can be found in the LICENSE file. |
+ |
+"""Provides simulator test coverage for individual recipes.""" |
+ |
+import ast |
+import copy |
+import re |
+import inspect |
+ |
+from collections import OrderedDict |
+ |
+from . import env |
+import astunparse |
+import expect_tests |
+ |
+class _checkTransformer(ast.NodeTransformer): |
+ """_checkTransformer is an ast NodeTransformer which extracts the helpful |
+ subexpressions from a python expression (specificially, from an invocation of |
+ the Checker). These subexpressions will be printed along with the check's |
+ source code statement to provide context for the failed check. |
+ |
+ It knows the following transformations: |
+ * all python identifiers will be resolved to their local variable meaning. |
+ * `___ in <instance of dict>` will cause dict.keys() to be printed in lieu |
+ of the entire dictionary. |
+ * `a[b][c]` will cause `a[b]` and `a[b][c]` to be printed (for an arbitrary |
+ level of recursion) |
+ |
+ The transformed ast is NOT a valid python AST... In particular, every reduced |
+ subexpression will be an ast.Name() where the id is the code for the |
+ subexpression (which may not be a valid name! It could be `foo.bar()`.), and |
+ the ctx will be the eval'd value for that element. |
+ |
+ In addition to this, there will be a list of ast.Name nodes in the |
+ transformer's `extra` attribute for additional expressions which should be |
+ printed for debugging usefulness, but didn't fit into the ast tree anywhere. |
+ """ |
+ |
+ def __init__(self, lvars, gvars): |
+ self.lvars = lvars |
+ self.gvars = gvars |
+ self.extras = [] |
+ |
+ def _eval(self, node): |
+ code = astunparse.unparse(node).strip() |
+ try: |
+ thing = eval(code, self.gvars, self.lvars) |
+ return ast.Name(code, thing) |
+ except NameError: |
+ return node |
+ |
+ def visit_Compare(self, node): |
+ # match `___ in instanceof(dict)` |
martiniss
2016/10/10 18:53:12
Could you turn this into a doc string? I didn't re
|
+ node = self.generic_visit(node) |
+ |
+ if len(node.ops) == 1 and isinstance(node.ops[0], ast.In): |
+ cmps = node.comparators |
+ if len(cmps) == 1 and isinstance(cmps[0], ast.Name): |
+ name = cmps[0] |
+ if isinstance(name.ctx, dict): |
+ node = ast.Compare( |
+ node.left, |
+ node.ops, |
+ [ast.Name(name.id+".keys()", name.ctx.keys())]) |
+ |
+ return node |
+ |
+ def visit_Subscript(self, node): |
+ # match __[a] |
martiniss
2016/10/10 18:53:12
Turn into docstring :)
|
+ node = self.generic_visit(node) |
+ if isinstance(node.slice, ast.Index): |
+ if isinstance(node.slice.value, ast.Name): |
+ self.extras.append(self._eval(node.slice.value)) |
+ node = self._eval(node) |
+ return node |
+ |
+ def visit_Name(self, node): |
+ # match foo |
martiniss
2016/10/10 18:53:12
Turn into docstring :)
|
+ return self._eval(node) |
+ |
+ |
+def render_user_value(val): |
+ """Takes a subexpression user value, and attempts to render it in the most |
+ useful way possible. |
+ |
+ Currently this will use render_re for compiled regular expressions, and will |
+ fall back to repr() for everything else. |
+ |
+ It should be the goal of this function to return an `eval`able string that |
+ would yield the equivalent value in a python interpreter. |
+ """ |
+ if isinstance(val, re._pattern_type): |
+ return render_re(val) |
+ return repr(val) |
+ |
+ |
+def render_re(regex): |
+ """Renders a repr()-style value for a compiled regular expression.""" |
+ actual_flags = [] |
+ if regex.flags: |
+ flags = [ |
+ (re.IGNORECASE, 'IGNORECASE'), |
+ (re.LOCALE, 'LOCALE'), |
+ (re.UNICODE, 'UNICODE'), |
+ (re.MULTILINE, 'MULTILINE'), |
+ (re.DOTALL, 'DOTALL'), |
+ (re.VERBOSE, 'VERBOSE'), |
+ ] |
+ for val, name in flags: |
+ if regex.flags & val: |
+ actual_flags.append(name) |
+ if actual_flags: |
+ return 're.compile(%r, %s)' % (regex.pattern, '|'.join(actual_flags)) |
+ else: |
+ return 're.compile(%r)' % regex.pattern |
+ |
+ |
+class Checker(object): |
+ def __init__(self, filename, lineno, funcname, args, kwargs, *ignores): |
+ self._failed_checks = [] |
+ |
+ # _ignore_set is the set of objects that we should never print as local |
+ # variables. We start this set off by including the actual Checker object, |
+ # since there's no value to printing that. |
+ self._ignore_set = {id(x) for x in ignores+(self,)} |
+ |
+ self._ctx_filename = filename |
+ self._ctx_lineno = lineno |
+ self._ctx_funcname = funcname |
+ self._ctx_args = map(repr, args) |
+ self._ctx_kwargs = {k: repr(v) for k, v in kwargs.iteritems()} |
+ |
+ def _process_frame(self, frame, with_vars): |
+ """This processes a stack frame into an expect_tests.CheckFrame, which |
+ includes file name, line number, function name (of the function containing |
+ the frame), the parsed statement at that line, and the relevant local |
+ variables/subexpressions (if with_vars is True). |
+ |
+ In addition to transforming the expression with _checkTransformer, this |
+ will: |
+ * omit subexpressions which resolve to callable()'s |
+ * omit the overall step ordered dictionary |
+ * transform all subexpression values using render_user_value(). |
+ """ |
+ raw_frame, filename, lineno, func_name, _, _ = frame |
+ |
+ filelines, _ = inspect.findsource(raw_frame) |
+ |
+ i = lineno-1 |
martiniss
2016/10/10 18:53:12
redundant?
|
+ # this dumb little loop will try to parse a node out of the ast which ends |
+ # at the line that shows up in the frame. To do this, tries parsing that |
+ # line, and if it fails, it adds a prefix line. It keeps doing this until |
+ # it gets a successful parse. |
+ for i in xrange(lineno-1, 0, -1): |
martiniss
2016/10/10 18:53:12
Couldn't you run into a situation like this:
def
|
+ try: |
+ to_parse = ''.join(filelines[i:lineno]).strip() |
+ node = ast.parse(to_parse) |
+ break |
+ except SyntaxError: |
+ continue |
+ varmap = None |
+ if with_vars: |
+ xfrmr = _checkTransformer(raw_frame.f_locals, raw_frame.f_globals) |
+ |
+ varmap = {} |
+ def add_node(n): |
+ if isinstance(n, ast.Name): |
+ val = n.ctx |
+ if isinstance(val, ast.AST): |
+ return |
+ if callable(val) or id(val) in self._ignore_set: |
+ return |
+ if n.id not in varmap: |
+ varmap[n.id] = render_user_value(val) |
+ map(add_node, ast.walk(xfrmr.visit(copy.deepcopy(node)))) |
+ # TODO(iannucci): only add extras if verbose is True |
+ map(add_node, xfrmr.extras) |
+ |
+ return expect_tests.CheckFrame( |
+ filename, |
+ lineno, |
+ func_name, |
+ astunparse.unparse(node).strip(), |
+ varmap |
+ ) |
+ |
+ def _call_impl(self, hint, exp): |
+ """This implements the bulk of what happens when you run `check(exp)`. It |
+ will crawl back up the stack and extract information about all of the frames |
+ which are relevent to the check, including file:lineno and the code |
+ statement which occurs at that location for all the frames. |
+ |
+ On the last frame (the one that actually contains the check call), it will |
+ also try to obtain relevant local values in the check so they can be printed |
+ with the check to aid in debugging and diagnosis. It uses the parsed |
+ statement found at that line to find all referenced local variables in that |
+ frame. |
+ """ |
+ |
+ if exp: |
+ # TODO(iannucci): collect this in verbose mode. |
+ # this check passed |
+ return |
+ |
+ try: |
+ frames = inspect.stack()[2:] |
+ |
+ # grab all frames which have self as a local variable (e.g. frames |
+ # associated with this checker), excluding self.__call__. |
+ try: |
+ i = 0 |
+ for i, f in enumerate(frames): |
+ if self not in f[0].f_locals.itervalues(): |
+ break |
+ keep_frames = [self._process_frame(f, j == 0) |
+ for j, f in enumerate(frames[:i-1])] |
+ finally: |
+ del f |
+ |
+ # order it so that innermost frame is at the bottom |
martiniss
2016/10/10 18:53:12
bottom? it's a list right?
|
+ keep_frames = keep_frames[::-1] |
+ |
+ self._failed_checks.append(expect_tests.Check( |
+ hint, |
+ self._ctx_filename, |
+ self._ctx_lineno, |
+ self._ctx_funcname, |
+ self._ctx_args, |
+ self._ctx_kwargs, |
+ keep_frames, |
+ False |
+ )) |
+ finally: |
+ # avoid reference cycle as suggested by inspect docs. |
+ del frames |
+ |
+ def __call__(self, arg1, arg2=None): |
+ if arg2 is not None: |
+ hint = arg1 |
+ exp = arg2 |
+ else: |
+ hint = None |
+ exp = arg1 |
+ self._call_impl(hint, exp) |
+ |
+ |
+MISSING = object() |
+ |
+ |
+def VerifySubset(a, b): |
martiniss
2016/10/10 18:53:12
Is a supposed to be a subset of b? docstring?
|
+ if a is b: |
+ return |
+ |
+ if isinstance(b, OrderedDict) and isinstance(a, dict): |
+ if len(a) == 0: |
+ # {} is an order-preserving subset of OrderedDict(). |
+ return |
+ elif len(a) == 1: |
martiniss
2016/10/10 18:53:12
what happens if len(a) > 1?
martiniss
2016/10/13 22:54:12
?
|
+ a = OrderedDict([a.popitem()]) |
+ |
+ if type(a) != type(b): |
+ return ': type mismatch: %r v %r' % (type(a).__name__, type(b).__name__) |
+ |
+ if isinstance(a, OrderedDict): |
+ last_idx = 0 |
+ b_reverse_index = {k: (i, v) for i, (k, v) in enumerate(b.iteritems())} |
+ for k, v in a.iteritems(): |
+ j, b_val = b_reverse_index.get(k, (MISSING, MISSING)) |
+ if j is MISSING: |
+ return ': added key %r' % k |
+ |
+ if j < last_idx: |
+ return ': key %r is out of order' % k |
+ # j == last_idx is not possible, these are OrderedDicts |
+ last_idx = j |
+ |
+ msg = VerifySubset(v, b_val) |
+ if msg: |
+ return '[%r]%s' % (k, msg) |
+ |
+ elif isinstance(a, dict): |
+ for k, v in a.iteritems(): |
+ b_val = b.get(k, MISSING) |
+ if b_val is MISSING: |
+ return ': added key %r' % k |
+ |
+ msg = VerifySubset(v, b_val) |
+ if msg: |
+ return '[%r]%s' % (k, msg) |
+ |
+ elif isinstance(a, list): |
+ # technically we could implement an edit-distance algorithm to show what the |
+ # smallest delta is between a and b... probably extreme. |
+ # |
+ # I'm not entirely convinced that the following is correct when a list |
+ # contains non-simple types (e.g. dicts, other lists), but it works for |
+ # simple types and it may work for complex types too. The only place where |
+ # complex types may show up would be in a `$result`, which can hold |
+ # arbitrary JSON, so I'm not too worried about it. |
+ |
+ if len(a) > len(b): |
+ return ': too long: %d v %d' % (len(a), len(b)) |
+ |
+ bi = ai = 0 |
+ while bi < len(b) - 1 and ai < len(a) - 1: |
+ msg = VerifySubset(a[ai], b[bi]) |
+ if msg is None: |
+ ai += 1 |
+ bi += 1 |
+ if ai != len(a) - 1: |
+ return ': added %d elements' % (len(a)-1-ai) |
+ |
+ elif isinstance(a, (basestring, int, bool, type(None))): |
+ if a != b: |
+ return ': %r != %r' % (a, b) |
+ |
+ else: |
+ return ': unknown type: %r' % (type(a).__name__) |
+ |
+ |
+def _nameOfCallable(c): |
+ if inspect.isfunction(c): |
+ return c.__name__ |
+ if inspect.ismethod(c): |
+ return c.im_class.__name__+'.'+c.__name__ |
+ if hasattr(c, '__class__') and hasattr(c, '__call__'): |
+ return c.__class__.__name__+'.__call__' |
+ return repr(c) |