OLD | NEW |
(Empty) | |
| 1 # Copyright 2016 The LUCI Authors. All rights reserved. |
| 2 # Use of this source code is governed under the Apache License, Version 2.0 |
| 3 # that can be found in the LICENSE file. |
| 4 |
| 5 """Implements a Checker object which can be used in place of `assert` to check |
| 6 conditions inside tests, but with much more debugging information, including |
| 7 a smart selection of local variables mentioned inside of the call to check.""" |
| 8 |
| 9 import ast |
| 10 import copy |
| 11 import inspect |
| 12 import re |
| 13 import itertools |
| 14 |
| 15 from collections import OrderedDict, namedtuple, deque, defaultdict |
| 16 |
| 17 from . import env |
| 18 import astunparse |
| 19 import expect_tests |
| 20 |
| 21 |
| 22 class _resolved(ast.AST): |
| 23 """_resolved is a fake AST node which represents a resolved sub-expression. |
| 24 It's used by _checkTransformer to replace portions of its AST with their |
| 25 resolved equivalents.""" |
| 26 def __init__(self, representation, value): |
| 27 super(_resolved, self).__init__() |
| 28 self.representation = representation |
| 29 self.value = value |
| 30 |
| 31 |
| 32 class _checkTransformer(ast.NodeTransformer): |
| 33 """_checkTransformer is an ast NodeTransformer which extracts the helpful |
| 34 subexpressions from a python expression (specificially, from an invocation of |
| 35 the Checker). These subexpressions will be printed along with the check's |
| 36 source code statement to provide context for the failed check. |
| 37 |
| 38 It knows the following transformations: |
| 39 * all python identifiers will be resolved to their local variable meaning. |
| 40 * `___ in <instance of dict>` will cause dict.keys() to be printed in lieu |
| 41 of the entire dictionary. |
| 42 * `a[b][c]` will cause `a[b]` and `a[b][c]` to be printed (for an arbitrary |
| 43 level of recursion) |
| 44 |
| 45 The transformed ast is NOT a valid python AST... In particular, every reduced |
| 46 subexpression will be a _resolved() where the `representation` is the code for |
| 47 the subexpression (It could be any valid expression like `foo.bar()`), |
| 48 and the `value` will be the eval'd value for that element. |
| 49 |
| 50 In addition to this, there will be a list of _resolved nodes in the |
| 51 transformer's `extra` attribute for additional expressions which should be |
| 52 printed for debugging usefulness, but didn't fit into the ast tree anywhere. |
| 53 """ |
| 54 |
| 55 def __init__(self, lvars, gvars): |
| 56 self.lvars = lvars |
| 57 self.gvars = gvars |
| 58 self.extras = [] |
| 59 |
| 60 def visit_Compare(self, node): |
| 61 """Compare nodes occur for all sequences of comparison (`in`, gt, lt, etc.) |
| 62 operators. We only want to match `___ in instanceof(dict)` here, so we |
| 63 restrict this to Compare ops with a single operator which is `In` or |
| 64 `NotIn`. |
| 65 """ |
| 66 node = self.generic_visit(node) |
| 67 |
| 68 if len(node.ops) == 1 and isinstance(node.ops[0], (ast.In, ast.NotIn)): |
| 69 cmps = node.comparators |
| 70 if len(cmps) == 1 and isinstance(cmps[0], _resolved): |
| 71 rslvd = cmps[0] |
| 72 if isinstance(rslvd.value, dict): |
| 73 node = ast.Compare( |
| 74 node.left, |
| 75 node.ops, |
| 76 [_resolved(rslvd.representation+".keys()", |
| 77 sorted(rslvd.value.keys()))]) |
| 78 |
| 79 return node |
| 80 |
| 81 def visit_Subscript(self, node): |
| 82 """Subscript nodes are anything which is __[__]. We only want to match __[x] |
| 83 here so where the [x] is a regular Index expression (not an elipsis or |
| 84 slice). We only handle cases where x is a constant, or a resolvable variable |
| 85 lookup (so a variable lookup, index, etc.).""" |
| 86 node = self.generic_visit(node) |
| 87 |
| 88 if (isinstance(node.slice, ast.Index) and |
| 89 isinstance(node.value, _resolved)): |
| 90 sliceVal = MISSING |
| 91 sliceRepr = '' |
| 92 if isinstance(node.slice.value, _resolved): |
| 93 # (a[b])[c] |
| 94 # will include `a[b]` in the extras. |
| 95 self.extras.append(node.slice.value) |
| 96 sliceVal = node.slice.value.value |
| 97 sliceRepr = node.slice.value.representation |
| 98 elif isinstance(node.slice.value, ast.Num): |
| 99 sliceVal = node.slice.value.n |
| 100 sliceRepr = repr(sliceVal) |
| 101 elif isinstance(node.slice.value, ast.Str): |
| 102 sliceVal = node.slice.value.s |
| 103 sliceRepr = repr(sliceVal) |
| 104 if sliceVal is not MISSING: |
| 105 node = _resolved( |
| 106 '%s[%s]' % (node.value.representation, sliceRepr), |
| 107 node.value.value[sliceVal]) |
| 108 |
| 109 return node |
| 110 |
| 111 def visit_Name(self, node): |
| 112 """Matches a single, simple identifier (e.g. variable). |
| 113 |
| 114 This will lookup the variable value from python constants (e.g. True), |
| 115 followed by the frame's local variables, and finally by the frame's global |
| 116 variables. |
| 117 """ |
| 118 consts = {'True': True, 'False': False, 'None': None} |
| 119 val = consts.get( |
| 120 node.id, self.lvars.get( |
| 121 node.id, self.gvars.get( |
| 122 node.id, MISSING))) |
| 123 if val is not MISSING: |
| 124 return _resolved(node.id, val) |
| 125 return node |
| 126 |
| 127 |
| 128 def render_user_value(val): |
| 129 """Takes a subexpression user value, and attempts to render it in the most |
| 130 useful way possible. |
| 131 |
| 132 Currently this will use render_re for compiled regular expressions, and will |
| 133 fall back to repr() for everything else. |
| 134 |
| 135 It should be the goal of this function to return an `eval`able string that |
| 136 would yield the equivalent value in a python interpreter. |
| 137 """ |
| 138 if isinstance(val, re._pattern_type): |
| 139 return render_re(val) |
| 140 return repr(val) |
| 141 |
| 142 |
| 143 def render_re(regex): |
| 144 """Renders a repr()-style value for a compiled regular expression.""" |
| 145 actual_flags = [] |
| 146 if regex.flags: |
| 147 flags = [ |
| 148 (re.IGNORECASE, 'IGNORECASE'), |
| 149 (re.LOCALE, 'LOCALE'), |
| 150 (re.UNICODE, 'UNICODE'), |
| 151 (re.MULTILINE, 'MULTILINE'), |
| 152 (re.DOTALL, 'DOTALL'), |
| 153 (re.VERBOSE, 'VERBOSE'), |
| 154 ] |
| 155 for val, name in flags: |
| 156 if regex.flags & val: |
| 157 actual_flags.append(name) |
| 158 if actual_flags: |
| 159 return 're.compile(%r, %s)' % (regex.pattern, '|'.join(actual_flags)) |
| 160 else: |
| 161 return 're.compile(%r)' % regex.pattern |
| 162 |
| 163 |
| 164 class Checker(object): |
| 165 # filename -> {lineno -> [statements]} |
| 166 _PARSED_FILE_CACHE = defaultdict(lambda: defaultdict(list)) |
| 167 |
| 168 def __init__(self, filename, lineno, func, args, kwargs, *ignores): |
| 169 self.failed_checks = [] |
| 170 |
| 171 # _ignore_set is the set of objects that we should never print as local |
| 172 # variables. We start this set off by including the actual Checker object, |
| 173 # since there's no value to printing that. |
| 174 self._ignore_set = {id(x) for x in ignores+(self,)} |
| 175 |
| 176 self._ctx_filename = filename |
| 177 self._ctx_lineno = lineno |
| 178 self._ctx_funcname = _nameOfCallable(func) |
| 179 self._ctx_args = map(repr, args) |
| 180 self._ctx_kwargs = {k: repr(v) for k, v in kwargs.iteritems()} |
| 181 |
| 182 def _get_statements_for_frame(self, frame): |
| 183 """This parses the file containing frame, and then extracts all simple |
| 184 statements (i.e. those which do not contain other statements). It then |
| 185 returns the list of all statements (as AST nodes) which occur on the line |
| 186 number indicated by the frame. |
| 187 |
| 188 The parse and statement extraction is cached in the _PARSED_FILE_CACHE class |
| 189 variable, so multiple assertions in the same file only pay the parsing cost |
| 190 once. |
| 191 """ |
| 192 raw_frame, filename, lineno, _, _, _ = frame |
| 193 if filename not in self._PARSED_FILE_CACHE: |
| 194 # multi-statement nodes like Module, FunctionDef, etc. have attributes on |
| 195 # them like 'body' which house the list of statements they contain. The |
| 196 # `to_push` list here is the set of all such attributes across all ast |
| 197 # nodes. The goal is to add the CONTENTS of all multi-statement statements |
| 198 # to the queue, and anything else is considered a 'single statement' for |
| 199 # the purposes of this code. |
| 200 to_push = ['body', 'orelse', 'finalbody', 'excepthandler'] |
| 201 lines, _ = inspect.findsource(raw_frame) |
| 202 # Start with the entire parsed document (probably ast.Module). |
| 203 queue = deque([ast.parse(''.join(lines), filename)]) |
| 204 while queue: |
| 205 node = queue.pop() |
| 206 had_statements = False |
| 207 # Try to find any nested statements and push them into queue if they |
| 208 # exist. |
| 209 for key in to_push: |
| 210 val = getattr(node, key, MISSING) |
| 211 if val is not MISSING: |
| 212 had_statements = True |
| 213 # Because we're popping things off the start of the queue, and we |
| 214 # want to append nodes to _PARSED_FILE_CACHE, we reverse the |
| 215 # statements when we extend the queue with them. |
| 216 queue.extend(val[::-1]) |
| 217 if had_statements: |
| 218 continue |
| 219 # node is a 'simple' statement (doesn't contain any nested statements), |
| 220 # so find it's maxiumum line-number (e.g. the line number that would |
| 221 # show up in a stack trace), and add it to _PARSED_FILE_CACHE. Note that |
| 222 # even though this is a simple statement, it could still span multiple |
| 223 # lines. |
| 224 max_line = max(map(lambda n: getattr(n, 'lineno', 0), ast.walk(node))) |
| 225 self._PARSED_FILE_CACHE[filename][max_line].append(node) |
| 226 return self._PARSED_FILE_CACHE[filename][lineno] |
| 227 |
| 228 def _process_frame(self, frame, with_vars): |
| 229 """This processes a stack frame into an expect_tests.CheckFrame, which |
| 230 includes file name, line number, function name (of the function containing |
| 231 the frame), the parsed statement at that line, and the relevant local |
| 232 variables/subexpressions (if with_vars is True). |
| 233 |
| 234 In addition to transforming the expression with _checkTransformer, this |
| 235 will: |
| 236 * omit subexpressions which resolve to callable()'s |
| 237 * omit the overall step ordered dictionary |
| 238 * transform all subexpression values using render_user_value(). |
| 239 """ |
| 240 nodes = self._get_statements_for_frame(frame) |
| 241 raw_frame, filename, lineno, func_name, _, _ = frame |
| 242 |
| 243 varmap = None |
| 244 if with_vars: |
| 245 varmap = {} |
| 246 |
| 247 xfrmr = _checkTransformer(raw_frame.f_locals, raw_frame.f_globals) |
| 248 xfrmd = xfrmr.visit(ast.Module(copy.deepcopy(nodes))) |
| 249 |
| 250 for n in itertools.chain(ast.walk(xfrmd), xfrmr.extras): |
| 251 if isinstance(n, _resolved): |
| 252 val = n.value |
| 253 if isinstance(val, ast.AST): |
| 254 continue |
| 255 if n.representation in ('True', 'False', 'None'): |
| 256 continue |
| 257 if callable(val) or id(val) in self._ignore_set: |
| 258 continue |
| 259 if n.representation not in varmap: |
| 260 varmap[n.representation] = render_user_value(val) |
| 261 |
| 262 return expect_tests.CheckFrame( |
| 263 filename, |
| 264 lineno, |
| 265 func_name, |
| 266 '; '.join(astunparse.unparse(n).strip() for n in nodes), |
| 267 varmap |
| 268 ) |
| 269 |
| 270 def _call_impl(self, hint, exp): |
| 271 """This implements the bulk of what happens when you run `check(exp)`. It |
| 272 will crawl back up the stack and extract information about all of the frames |
| 273 which are relevent to the check, including file:lineno and the code |
| 274 statement which occurs at that location for all the frames. |
| 275 |
| 276 On the last frame (the one that actually contains the check call), it will |
| 277 also try to obtain relevant local values in the check so they can be printed |
| 278 with the check to aid in debugging and diagnosis. It uses the parsed |
| 279 statement found at that line to find all referenced local variables in that |
| 280 frame. |
| 281 """ |
| 282 |
| 283 if exp: |
| 284 # TODO(iannucci): collect this in verbose mode. |
| 285 # this check passed |
| 286 return |
| 287 |
| 288 try: |
| 289 frames = inspect.stack()[2:] |
| 290 |
| 291 # grab all frames which have self as a local variable (e.g. frames |
| 292 # associated with this checker), excluding self.__call__. |
| 293 try: |
| 294 i = 0 |
| 295 for i, f in enumerate(frames): |
| 296 if self not in f[0].f_locals.itervalues(): |
| 297 break |
| 298 keep_frames = [self._process_frame(f, j == 0) |
| 299 for j, f in enumerate(frames[:i-1])] |
| 300 finally: |
| 301 del f |
| 302 |
| 303 # order it so that innermost frame is at the bottom |
| 304 keep_frames = keep_frames[::-1] |
| 305 |
| 306 self.failed_checks.append(expect_tests.Check( |
| 307 hint, |
| 308 self._ctx_filename, |
| 309 self._ctx_lineno, |
| 310 self._ctx_funcname, |
| 311 self._ctx_args, |
| 312 self._ctx_kwargs, |
| 313 keep_frames, |
| 314 False |
| 315 )) |
| 316 finally: |
| 317 # avoid reference cycle as suggested by inspect docs. |
| 318 del frames |
| 319 |
| 320 def __call__(self, arg1, arg2=None): |
| 321 if arg2 is not None: |
| 322 hint = arg1 |
| 323 exp = arg2 |
| 324 else: |
| 325 hint = None |
| 326 exp = arg1 |
| 327 self._call_impl(hint, exp) |
| 328 |
| 329 |
| 330 MISSING = object() |
| 331 |
| 332 |
| 333 def VerifySubset(a, b): |
| 334 """Verify subset verifies that `a` is a subset of `b` where a and b are both |
| 335 JSON-ish types. They are also permitted to be OrderedDicts instead of |
| 336 dictionaries. |
| 337 |
| 338 This verifies that a introduces no extra dictionary keys, list elements, etc. |
| 339 and also ensures that the order of entries in an ordered type (such as a list |
| 340 or an OrderedDict) remain the same from a to b. This also verifies that types |
| 341 are consistent between a and b. |
| 342 |
| 343 As a special case, empty and single-element dictionaries are considered |
| 344 subsets of an OrderedDict, even though their types don't precisely match. |
| 345 |
| 346 If a is a valid subset of b, this returns None. Otherwise this returns |
| 347 a descriptive message of what went wrong. |
| 348 |
| 349 Example: |
| 350 print 'object'+VerifySubset({'a': 'thing'}, {'b': 'other', 'a': 'prime'}) |
| 351 |
| 352 OUTPUT: |
| 353 object['a']: 'thing' != 'prime' |
| 354 """ |
| 355 if a is b: |
| 356 return |
| 357 |
| 358 if isinstance(b, OrderedDict) and isinstance(a, dict): |
| 359 # 0 and 1-element dicts can stand in for OrderedDicts. |
| 360 if len(a) == 0: |
| 361 return |
| 362 elif len(a) == 1: |
| 363 a = OrderedDict([next(a.iteritems())]) |
| 364 |
| 365 if type(a) != type(b): |
| 366 return ': type mismatch: %r v %r' % (type(a).__name__, type(b).__name__) |
| 367 |
| 368 if isinstance(a, OrderedDict): |
| 369 last_idx = 0 |
| 370 b_reverse_index = {k: (i, v) for i, (k, v) in enumerate(b.iteritems())} |
| 371 for k, v in a.iteritems(): |
| 372 j, b_val = b_reverse_index.get(k, (MISSING, MISSING)) |
| 373 if j is MISSING: |
| 374 return ': added key %r' % k |
| 375 |
| 376 if j < last_idx: |
| 377 return ': key %r is out of order' % k |
| 378 # j == last_idx is not possible, these are OrderedDicts |
| 379 last_idx = j |
| 380 |
| 381 msg = VerifySubset(v, b_val) |
| 382 if msg: |
| 383 return '[%r]%s' % (k, msg) |
| 384 |
| 385 elif isinstance(a, dict): |
| 386 for k, v in a.iteritems(): |
| 387 b_val = b.get(k, MISSING) |
| 388 if b_val is MISSING: |
| 389 return ': added key %r' % k |
| 390 |
| 391 msg = VerifySubset(v, b_val) |
| 392 if msg: |
| 393 return '[%r]%s' % (k, msg) |
| 394 |
| 395 elif isinstance(a, list): |
| 396 if len(a) > len(b): |
| 397 return ': too long: %d v %d' % (len(a), len(b)) |
| 398 |
| 399 bi = ai = 0 |
| 400 while bi < len(b) - 1 and ai < len(a) - 1: |
| 401 msg = VerifySubset(a[ai], b[bi]) |
| 402 if msg is None: |
| 403 ai += 1 |
| 404 bi += 1 |
| 405 if ai != len(a) - 1: |
| 406 return ': added %d elements' % (len(a)-1-ai) |
| 407 |
| 408 elif isinstance(a, (basestring, int, bool, type(None))): |
| 409 if a != b: |
| 410 return ': %r != %r' % (a, b) |
| 411 |
| 412 else: |
| 413 return ': unknown type: %r' % (type(a).__name__) |
| 414 |
| 415 |
| 416 def _nameOfCallable(c): |
| 417 if hasattr(c, '__call__'): |
| 418 return c.__class__.__name__+'.__call__' |
| 419 if inspect.ismethod(c): |
| 420 return c.im_class.__name__+'.'+c.__name__ |
| 421 if inspect.isfunction(c): |
| 422 return c.__name__ |
| 423 return repr(c) |
OLD | NEW |