recipe_engine/checker.py - Issue 2387763003: Add initial postprocess unit test thingy.

Side by Side Diff: recipe_engine/checker.py

Issue 2387763003: Add initial postprocess unit test thingy. (Closed)

Patch Set: Rebase Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 # Copyright 2016 The LUCI Authors. All rights reserved.

	2 # Use of this source code is governed under the Apache License, Version 2.0

	3 # that can be found in the LICENSE file.

	4

	5 """Implements a Checker object which can be used in place of `assert` to check

	6 conditions inside tests, but with much more debugging information, including

	7 a smart selection of local variables mentioned inside of the call to check."""

	8

	9 import ast

	10 import copy

	11 import inspect

	12 import re

	13 import itertools

	14

	15 from collections import OrderedDict, namedtuple, deque, defaultdict

	16

	17 from . import env

	18 import astunparse

	19 import expect_tests

	20

	21

	22 class _resolved(ast.AST):

	23 """_resolved is a fake AST node which represents a resolved sub-expression.

	24 It's used by _checkTransformer to replace portions of its AST with their

	25 resolved equivalents."""

	26 def __init__(self, representation, value):

	27 super(_resolved, self).__init__()

	28 self.representation = representation

	29 self.value = value

	30

	31

	32 class _checkTransformer(ast.NodeTransformer):

	33 """_checkTransformer is an ast NodeTransformer which extracts the helpful

	34 subexpressions from a python expression (specificially, from an invocation of

	35 the Checker). These subexpressions will be printed along with the check's

	36 source code statement to provide context for the failed check.

	37

	38 It knows the following transformations:

	39 * all python identifiers will be resolved to their local variable meaning.

	40 * `___ in <instance of dict>` will cause dict.keys() to be printed in lieu

	41 of the entire dictionary.

	42 * `a[b][c]` will cause `a[b]` and `a[b][c]` to be printed (for an arbitrary

	43 level of recursion)

	44

	45 The transformed ast is NOT a valid python AST... In particular, every reduced

	46 subexpression will be a _resolved() where the `representation` is the code for

	47 the subexpression (It could be any valid expression like `foo.bar()`),

	48 and the `value` will be the eval'd value for that element.

	49

	50 In addition to this, there will be a list of _resolved nodes in the

	51 transformer's `extra` attribute for additional expressions which should be

	52 printed for debugging usefulness, but didn't fit into the ast tree anywhere.

	53 """

	54

	55 def __init__(self, lvars, gvars):

	56 self.lvars = lvars

	57 self.gvars = gvars

	58 self.extras = []

	59

	60 def visit_Compare(self, node):

	61 """Compare nodes occur for all sequences of comparison (`in`, gt, lt, etc.)

	62 operators. We only want to match `___ in instanceof(dict)` here, so we

	63 restrict this to Compare ops with a single operator which is `In` or

	64 `NotIn`.

	65 """

	66 node = self.generic_visit(node)

	67

	68 if len(node.ops) == 1 and isinstance(node.ops[0], (ast.In, ast.NotIn)):

	69 cmps = node.comparators

	70 if len(cmps) == 1 and isinstance(cmps[0], _resolved):

	71 rslvd = cmps[0]

	72 if isinstance(rslvd.value, dict):

	73 node = ast.Compare(

	74 node.left,

	75 node.ops,

	76 [_resolved(rslvd.representation+".keys()",

	77 sorted(rslvd.value.keys()))])

	78

	79 return node

	80

	81 def visit_Subscript(self, node):

	82 """Subscript nodes are anything which is __[__]. We only want to match __[x]

	83 here so where the [x] is a regular Index expression (not an elipsis or

	84 slice). We only handle cases where x is a constant, or a resolvable variable

	85 lookup (so a variable lookup, index, etc.)."""

	86 node = self.generic_visit(node)

	87

	88 if (isinstance(node.slice, ast.Index) and

	89 isinstance(node.value, _resolved)):

	90 sliceVal = MISSING

	91 sliceRepr = ''

	92 if isinstance(node.slice.value, _resolved):

	93 # (a[b])[c]

	94 # will include `a[b]` in the extras.

	95 self.extras.append(node.slice.value)

	96 sliceVal = node.slice.value.value

	97 sliceRepr = node.slice.value.representation

	98 elif isinstance(node.slice.value, ast.Num):

	99 sliceVal = node.slice.value.n

	100 sliceRepr = repr(sliceVal)

	101 elif isinstance(node.slice.value, ast.Str):

	102 sliceVal = node.slice.value.s

	103 sliceRepr = repr(sliceVal)

	104 if sliceVal is not MISSING:

	105 node = _resolved(

	106 '%s[%s]' % (node.value.representation, sliceRepr),

	107 node.value.value[sliceVal])

	108

	109 return node

	110

	111 def visit_Name(self, node):

	112 """Matches a single, simple identifier (e.g. variable).

	113

	114 This will lookup the variable value from python constants (e.g. True),

	115 followed by the frame's local variables, and finally by the frame's global

	116 variables.

	117 """

	118 consts = {'True': True, 'False': False, 'None': None}

	119 val = consts.get(

	120 node.id, self.lvars.get(

	121 node.id, self.gvars.get(

	122 node.id, MISSING)))

	123 if val is not MISSING:

	124 return _resolved(node.id, val)

	125 return node

	126

	127

	128 def render_user_value(val):

	129 """Takes a subexpression user value, and attempts to render it in the most

	130 useful way possible.

	131

	132 Currently this will use render_re for compiled regular expressions, and will

	133 fall back to repr() for everything else.

	134

	135 It should be the goal of this function to return an `eval`able string that

	136 would yield the equivalent value in a python interpreter.

	137 """

	138 if isinstance(val, re._pattern_type):

	139 return render_re(val)

	140 return repr(val)

	141

	142

	143 def render_re(regex):

	144 """Renders a repr()-style value for a compiled regular expression."""

	145 actual_flags = []

	146 if regex.flags:

	147 flags = [

	148 (re.IGNORECASE, 'IGNORECASE'),

	149 (re.LOCALE, 'LOCALE'),

	150 (re.UNICODE, 'UNICODE'),

	151 (re.MULTILINE, 'MULTILINE'),

	152 (re.DOTALL, 'DOTALL'),

	153 (re.VERBOSE, 'VERBOSE'),

	154 ]

	155 for val, name in flags:

	156 if regex.flags & val:

	157 actual_flags.append(name)

	158 if actual_flags:

	159 return 're.compile(%r, %s)' % (regex.pattern, '\|'.join(actual_flags))

	160 else:

	161 return 're.compile(%r)' % regex.pattern

	162

	163

	164 class Checker(object):

	165 # filename -> {lineno -> [statements]}

	166 _PARSED_FILE_CACHE = defaultdict(lambda: defaultdict(list))

	167

	168 def __init__(self, filename, lineno, func, args, kwargs, *ignores):

	169 self.failed_checks = []

	170

	171 # _ignore_set is the set of objects that we should never print as local

	172 # variables. We start this set off by including the actual Checker object,

	173 # since there's no value to printing that.

	174 self._ignore_set = {id(x) for x in ignores+(self,)}

	175

	176 self._ctx_filename = filename

	177 self._ctx_lineno = lineno

	178 self._ctx_funcname = _nameOfCallable(func)

	179 self._ctx_args = map(repr, args)

	180 self._ctx_kwargs = {k: repr(v) for k, v in kwargs.iteritems()}

	181

	182 def _get_statements_for_frame(self, frame):

	183 """This parses the file containing frame, and then extracts all simple

	184 statements (i.e. those which do not contain other statements). It then

	185 returns the list of all statements (as AST nodes) which occur on the line

	186 number indicated by the frame.

	187

	188 The parse and statement extraction is cached in the _PARSED_FILE_CACHE class

	189 variable, so multiple assertions in the same file only pay the parsing cost

	190 once.

	191 """

	192 raw_frame, filename, lineno, _, _, _ = frame

	193 if filename not in self._PARSED_FILE_CACHE:

	194 # multi-statement nodes like Module, FunctionDef, etc. have attributes on

	195 # them like 'body' which house the list of statements they contain. The

	196 # `to_push` list here is the set of all such attributes across all ast

	197 # nodes. The goal is to add the CONTENTS of all multi-statement statements

	198 # to the queue, and anything else is considered a 'single statement' for

	199 # the purposes of this code.

	200 to_push = ['body', 'orelse', 'finalbody', 'excepthandler']

	201 lines, _ = inspect.findsource(raw_frame)

	202 # Start with the entire parsed document (probably ast.Module).

	203 queue = deque([ast.parse(''.join(lines), filename)])

	204 while queue:

	205 node = queue.pop()

	206 had_statements = False

	207 # Try to find any nested statements and push them into queue if they

	208 # exist.

	209 for key in to_push:

	210 val = getattr(node, key, MISSING)

	211 if val is not MISSING:

	212 had_statements = True

	213 # Because we're popping things off the start of the queue, and we

	214 # want to append nodes to _PARSED_FILE_CACHE, we reverse the

	215 # statements when we extend the queue with them.

	216 queue.extend(val[::-1])

	217 if had_statements:

	218 continue

	219 # node is a 'simple' statement (doesn't contain any nested statements),

	220 # so find it's maxiumum line-number (e.g. the line number that would

	221 # show up in a stack trace), and add it to _PARSED_FILE_CACHE. Note that

	222 # even though this is a simple statement, it could still span multiple

	223 # lines.

	224 max_line = max(map(lambda n: getattr(n, 'lineno', 0), ast.walk(node)))

	225 self._PARSED_FILE_CACHE[filename][max_line].append(node)

	226 return self._PARSED_FILE_CACHE[filename][lineno]

	227

	228 def _process_frame(self, frame, with_vars):

	229 """This processes a stack frame into an expect_tests.CheckFrame, which

	230 includes file name, line number, function name (of the function containing

	231 the frame), the parsed statement at that line, and the relevant local

	232 variables/subexpressions (if with_vars is True).

	233

	234 In addition to transforming the expression with _checkTransformer, this

	235 will:

	236 * omit subexpressions which resolve to callable()'s

	237 * omit the overall step ordered dictionary

	238 * transform all subexpression values using render_user_value().

	239 """

	240 nodes = self._get_statements_for_frame(frame)

	241 raw_frame, filename, lineno, func_name, _, _ = frame

	242

	243 varmap = None

	244 if with_vars:

	245 varmap = {}

	246

	247 xfrmr = _checkTransformer(raw_frame.f_locals, raw_frame.f_globals)

	248 xfrmd = xfrmr.visit(ast.Module(copy.deepcopy(nodes)))

	249

	250 for n in itertools.chain(ast.walk(xfrmd), xfrmr.extras):

	251 if isinstance(n, _resolved):

	252 val = n.value

	253 if isinstance(val, ast.AST):

	254 continue

	255 if n.representation in ('True', 'False', 'None'):

	256 continue

	257 if callable(val) or id(val) in self._ignore_set:

	258 continue

	259 if n.representation not in varmap:

	260 varmap[n.representation] = render_user_value(val)

	261

	262 return expect_tests.CheckFrame(

	263 filename,

	264 lineno,

	265 func_name,

	266 '; '.join(astunparse.unparse(n).strip() for n in nodes),

	267 varmap

	268 )

	269

	270 def _call_impl(self, hint, exp):

	271 """This implements the bulk of what happens when you run `check(exp)`. It

	272 will crawl back up the stack and extract information about all of the frames

	273 which are relevent to the check, including file:lineno and the code

	274 statement which occurs at that location for all the frames.

	275

	276 On the last frame (the one that actually contains the check call), it will

	277 also try to obtain relevant local values in the check so they can be printed

	278 with the check to aid in debugging and diagnosis. It uses the parsed

	279 statement found at that line to find all referenced local variables in that

	280 frame.

	281 """

	282

	283 if exp:

	284 # TODO(iannucci): collect this in verbose mode.

	285 # this check passed

	286 return

	287

	288 try:

	289 frames = inspect.stack()[2:]

	290

	291 # grab all frames which have self as a local variable (e.g. frames

	292 # associated with this checker), excluding self.__call__.

	293 try:

	294 i = 0

	295 for i, f in enumerate(frames):

	296 if self not in f[0].f_locals.itervalues():

	297 break

	298 keep_frames = [self._process_frame(f, j == 0)

	299 for j, f in enumerate(frames[:i-1])]

	300 finally:

	301 del f

	302

	303 # order it so that innermost frame is at the bottom

	304 keep_frames = keep_frames[::-1]

	305

	306 self.failed_checks.append(expect_tests.Check(

	307 hint,

	308 self._ctx_filename,

	309 self._ctx_lineno,

	310 self._ctx_funcname,

	311 self._ctx_args,

	312 self._ctx_kwargs,

	313 keep_frames,

	314 False

	315 ))

	316 finally:

	317 # avoid reference cycle as suggested by inspect docs.

	318 del frames

	319

	320 def __call__(self, arg1, arg2=None):

	321 if arg2 is not None:

	322 hint = arg1

	323 exp = arg2

	324 else:

	325 hint = None

	326 exp = arg1

	327 self._call_impl(hint, exp)

	328

	329

	330 MISSING = object()

	331

	332

	333 def VerifySubset(a, b):

	334 """Verify subset verifies that `a` is a subset of `b` where a and b are both

	335 JSON-ish types. They are also permitted to be OrderedDicts instead of

	336 dictionaries.

	337

	338 This verifies that a introduces no extra dictionary keys, list elements, etc.

	339 and also ensures that the order of entries in an ordered type (such as a list

	340 or an OrderedDict) remain the same from a to b. This also verifies that types

	341 are consistent between a and b.

	342

	343 As a special case, empty and single-element dictionaries are considered

	344 subsets of an OrderedDict, even though their types don't precisely match.

	345

	346 If a is a valid subset of b, this returns None. Otherwise this returns

	347 a descriptive message of what went wrong.

	348

	349 Example:

	350 print 'object'+VerifySubset({'a': 'thing'}, {'b': 'other', 'a': 'prime'})

	351

	352 OUTPUT:

	353 object['a']: 'thing' != 'prime'

	354 """

	355 if a is b:

	356 return

	357

	358 if isinstance(b, OrderedDict) and isinstance(a, dict):

	359 # 0 and 1-element dicts can stand in for OrderedDicts.

	360 if len(a) == 0:

	361 return

	362 elif len(a) == 1:

	363 a = OrderedDict([next(a.iteritems())])

	364

	365 if type(a) != type(b):

	366 return ': type mismatch: %r v %r' % (type(a).__name__, type(b).__name__)

	367

	368 if isinstance(a, OrderedDict):

	369 last_idx = 0

	370 b_reverse_index = {k: (i, v) for i, (k, v) in enumerate(b.iteritems())}

	371 for k, v in a.iteritems():

	372 j, b_val = b_reverse_index.get(k, (MISSING, MISSING))

	373 if j is MISSING:

	374 return ': added key %r' % k

	375

	376 if j < last_idx:

	377 return ': key %r is out of order' % k

	378 # j == last_idx is not possible, these are OrderedDicts

	379 last_idx = j

	380

	381 msg = VerifySubset(v, b_val)

	382 if msg:

	383 return '[%r]%s' % (k, msg)

	384

	385 elif isinstance(a, dict):

	386 for k, v in a.iteritems():

	387 b_val = b.get(k, MISSING)

	388 if b_val is MISSING:

	389 return ': added key %r' % k

	390

	391 msg = VerifySubset(v, b_val)

	392 if msg:

	393 return '[%r]%s' % (k, msg)

	394

	395 elif isinstance(a, list):

	396 if len(a) > len(b):

	397 return ': too long: %d v %d' % (len(a), len(b))

	398

	399 bi = ai = 0

	400 while bi < len(b) - 1 and ai < len(a) - 1:

	401 msg = VerifySubset(a[ai], b[bi])

	402 if msg is None:

	403 ai += 1

	404 bi += 1

	405 if ai != len(a) - 1:

	406 return ': added %d elements' % (len(a)-1-ai)

	407

	408 elif isinstance(a, (basestring, int, bool, type(None))):

	409 if a != b:

	410 return ': %r != %r' % (a, b)

	411

	412 else:

	413 return ': unknown type: %r' % (type(a).__name__)

	414

	415

	416 def _nameOfCallable(c):

	417 if hasattr(c, '__call__'):

	418 return c.__class__.__name__+'.__call__'

	419 if inspect.ismethod(c):

	420 return c.im_class.__name__+'.'+c.__name__

	421 if inspect.isfunction(c):

	422 return c.__name__

	423 return repr(c)

OLD	NEW

« no previous file with comments | « no previous file | recipe_engine/env.py » ('j') | no next file with comments »