Chromium Code Reviews| Index: testing_support/git_test_utils.py |
| diff --git a/testing_support/git_test_utils.py b/testing_support/git_test_utils.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..789f33162fcd31a5b99c2980ce45ba3a514b3712 |
| --- /dev/null |
| +++ b/testing_support/git_test_utils.py |
| @@ -0,0 +1,414 @@ |
| +# Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +import atexit |
| +import collections |
| +import copy |
| +import datetime |
| +import hashlib |
| +import os |
| +import shutil |
| +import subprocess |
| +import tempfile |
| +import unittest |
| + |
| + |
| +def git_hash_data(data, typ='blob'): |
| + """Calculate the git-style SHA1 for some data. |
| + |
| + Only supports 'blob' type data at the moment. |
| + """ |
| + assert typ == 'blob', "Only support blobs for now" |
| + return hashlib.sha1("blob %s\0%s" % (len(data), data)).hexdigest() |
| + |
| + |
| +class OrderedSet(collections.MutableSet): |
| + # from http://code.activestate.com/recipes/576694/ |
| + def __init__(self, iterable=None): |
| + self.end = end = [] |
| + end += [None, end, end] # sentinel node for doubly linked list |
| + self.data = {} # key --> [key, prev, next] |
| + if iterable is not None: |
| + self |= iterable |
| + |
| + def __contains__(self, key): |
| + return key in self.data |
| + |
| + def __eq__(self, other): |
| + if isinstance(other, OrderedSet): |
| + return len(self) == len(other) and list(self) == list(other) |
| + return set(self) == set(other) |
| + |
| + def __len__(self): |
| + return len(self.data) |
| + |
| + def __iter__(self): |
| + end = self.end |
| + curr = end[2] |
| + while curr is not end: |
| + yield curr[0] |
| + curr = curr[2] |
| + |
| + def __repr__(self): |
| + if not self: |
| + return '%s()' % (self.__class__.__name__,) |
| + return '%s(%r)' % (self.__class__.__name__, list(self)) |
| + |
| + def __reversed__(self): |
| + end = self.end |
| + curr = end[1] |
| + while curr is not end: |
| + yield curr[0] |
| + curr = curr[1] |
| + |
| + def add(self, key): |
| + if key not in self.data: |
| + end = self.end |
| + curr = end[1] |
| + curr[2] = end[1] = self.data[key] = [key, curr, end] |
| + |
| + def difference_update(self, *others): |
| + for other in others: |
| + for i in other: |
| + self.discard(i) |
| + |
| + def discard(self, key): |
| + if key in self.data: |
| + key, prev, nxt = self.data.pop(key) |
| + prev[2] = nxt |
| + nxt[1] = prev |
| + |
| + def pop(self, last=True): # pylint: disable=W0221 |
| + if not self: |
| + raise KeyError('set is empty') |
| + key = self.end[1][0] if last else self.end[2][0] |
| + self.discard(key) |
| + return key |
| + |
| + |
| +class GitRepoSchema(object): |
| + """A declarative git testing repo. |
| + |
| + Pass a schema to __init__ in the form of: |
| + A B C D |
| + B E D |
| + |
| + This is the repo |
| + |
| + A - B - C - D |
| + \ E / |
| + |
| + Whitespace doesn't matter. Each line is a declaration of which commits come |
| + before which other commits. |
| + |
| + Every commit gets a tag 'tag_%(commit)s' |
| + Every unique terminal commit gets a branch 'branch_%(commit)s' |
| + Last commit in First line is the branch 'master' |
| + Root commits get a ref 'root_%(commit)s' |
| + |
| + Timestamps are in topo order, earlier commits (as indicated by their presence |
| + in the schema) get earlier timestamps. Stamps start at the Unix Epoch, and |
| + increment by 1 day each. |
| + """ |
| + COMMIT = collections.namedtuple('COMMIT', 'name parents is_branch is_root') |
| + |
| + def walk(self): |
| + """Generator to walk the repo schema from roots to tips. |
| + |
| + Generates GitRepoSchema.COMMIT objects for each commit. |
| + |
| + Throws an AssertionError if it detects a cycle. |
| + """ |
| + is_root = True |
| + par_map = copy.deepcopy(self.par_map) |
| + while par_map: |
| + empty_keys = set(k for k, v in par_map.iteritems() if not v) |
| + if empty_keys: |
| + for k in sorted(empty_keys): |
| + yield self.COMMIT(k, self.par_map[k], |
| + not any(k in v for v in self.par_map.itervalues()), |
| + is_root) |
| + del par_map[k] |
| + for v in par_map.itervalues(): |
| + v.difference_update(empty_keys) |
| + is_root = False |
| + else: |
| + assert False, "Cycle detected! %s" % par_map |
| + |
| + def add_commits(self, schema): |
| + """Adds more commits from a schema into the existing Schema. |
| + |
| + Args: |
| + schema (str) - See class docstring for info on schema format. |
| + |
| + Throws an AssertionError if it detects a cycle. |
| + """ |
| + for commits in (l.split() for l in schema.splitlines() if l.strip()): |
| + parent = None |
| + for commit in commits: |
| + if commit not in self.par_map: |
| + self.par_map[commit] = OrderedSet() |
| + if parent is not None: |
| + self.par_map[commit].add(parent) |
| + parent = commit |
| + if parent and self.master is None: |
| + self.master = parent |
| + for _ in self.walk(): # This will throw if there are any cycles. |
| + pass |
| + |
| + def __init__(self, repo_schema="", |
|
M-A Ruel
2013/11/08 19:32:23
''
iannucci
2013/11/11 22:59:24
Done.
|
| + content_fn=lambda v: {v: {'data': v}}): |
| + """Builds a new GitRepoSchema. |
| + |
| + Args: |
| + repo_schema (str) - Initial schema for this repo. See class docstring for |
| + info on the schema format. |
| + content_fn ((commit_name) -> commit_data) - A function which will be |
| + lazily called to obtain data for each commit. The results of this |
| + function are cached (i.e. it will never be called twice for the same |
| + commit_name). See the docstring on the GitRepo class for the format of |
| + the data returned by this function. |
| + """ |
| + self.master = None |
| + self.par_map = {} |
| + self.data_cache = {} |
| + self.content_fn = content_fn |
| + self.add_commits(repo_schema) |
| + |
| + def reify(self): |
| + """Returns a real GitRepo for this GitRepoSchema""" |
| + return GitRepo(self) |
| + |
| + def data_for(self, commit): |
| + """Method to obtain data for a commit. |
| + |
| + See the docstring on the GitRepo class for the format of the returned data. |
| + |
| + Caches the result on this GitRepoSchema instance. |
| + """ |
| + if commit not in self.data_cache: |
| + self.data_cache[commit] = self.content_fn(commit) |
| + return self.data_cache[commit] |
| + |
| + |
| +class GitRepo(object): |
| + """Creates a real git repo for a GitRepoSchema. |
| + |
| + Obtains schema and content information from the GitRepoSchema. |
| + |
| + The format for the commit data supplied by GitRepoSchema.data_for is: |
| + { |
| + SPECIAL_KEY: special_value, |
| + ... |
| + "path/to/some/file": { 'data': "some data content for this file", |
| + 'mode': 0755 }, |
| + ... |
| + } |
| + |
| + The SPECIAL_KEYs are the following attribues of the GitRepo class: |
| + * AUTHOR_NAME |
| + * AUTHOR_EMAIL |
| + * AUTHOR_DATE - must be a datetime.datetime instance |
| + * COMMITTER_NAME |
| + * COMMITTER_EMAIL |
| + * COMMITTER_DATE - must be a datetime.datetime instance |
| + |
| + For file content, if 'data' is None, then this commit will `git rm` that file. |
| + """ |
| + BASE_TEMP_DIR = tempfile.mkdtemp(suffix='base', prefix='git_repo') |
| + atexit.register(shutil.rmtree, BASE_TEMP_DIR) |
| + |
| + # Singleton objects to specify specific data in a commit dictionary. |
| + AUTHOR_NAME = object() |
| + AUTHOR_EMAIL = object() |
| + AUTHOR_DATE = object() |
| + COMMITTER_NAME = object() |
| + COMMITTER_EMAIL = object() |
| + COMMITTER_DATE = object() |
| + |
| + DEFAULT_AUTHOR_NAME = 'Author McAuthorly' |
| + DEFAULT_AUTHOR_EMAIL = 'author@example.com' |
| + DEFAULT_COMMITTER_NAME = 'Charles Committish' |
| + DEFAULT_COMMITTER_EMAIL = 'commitish@example.com' |
| + |
| + COMMAND_OUTPUT = collections.namedtuple('COMMAND_OUTPUT', 'retcode stdout') |
| + |
| + def __init__(self, schema): |
| + """Makes new GitRepo. |
| + |
| + Automatically creates a temp folder under GitRepo.BASE_TEMP_DIR. It's |
| + recommended that you clean this repo up by calling nuke() on it, but if not, |
| + GitRepo will automatically clean up all allocated repos at the exit of the |
| + program (assuming a normal exit like with sys.exit) |
| + |
| + Args: |
| + schema - An instance of GitRepoSchema |
| + """ |
| + self.repo_path = tempfile.mkdtemp(dir=self.BASE_TEMP_DIR) |
| + self.commit_map = {} |
| + self._date = datetime.datetime(1970, 1, 1) |
| + |
| + self.git('init') |
| + for commit in schema.walk(): |
| + self._add_schema_commit(commit, schema.data_for(commit.name)) |
| + if schema.master: |
| + self.git('update-ref', 'master', self[schema.master]) |
| + |
| + def __getitem__(self, commit_name): |
| + """Allows you to get the hash of a commit by it's schema name. |
|
M-A Ruel
2013/11/08 19:32:23
Gets the hash of a ...
iannucci
2013/11/11 22:59:24
Done. Fixed the "it's" too :)
|
| + |
| + >>> r = GitRepo(GitRepoSchema('A B C')) |
| + >>> r['B'] |
| + '7381febe1da03b09da47f009963ab7998a974935' |
| + """ |
| + return self.commit_map[commit_name] |
| + |
| + def _add_schema_commit(self, commit, data): |
| + data = data or {} |
| + |
| + if commit.parents: |
| + parents = list(commit.parents) |
| + self.git('checkout', '--detach', '-q', self[parents[0]]) |
| + if len(parents) > 1: |
| + self.git('merge', '--no-commit', '-q', *[self[x] for x in parents[1:]]) |
| + else: |
| + self.git('checkout', '--orphan', 'root_%s' % commit.name) |
| + self.git('rm', '-rf', '.') |
| + |
| + env = {} |
| + for prefix in ('AUTHOR', 'COMMITTER'): |
| + for suffix in ('NAME', 'EMAIL', 'DATE'): |
| + singleton = '%s_%s' % (prefix, suffix) |
| + key = getattr(self, singleton) |
| + if key in data: |
| + val = data[key] |
| + else: |
| + if suffix == 'DATE': |
| + val = self._date |
| + self._date += datetime.timedelta(days=1) |
| + else: |
| + val = getattr(self, 'DEFAULT_%s' % singleton) |
| + env['GIT_%s' % singleton] = str(val) |
| + |
| + for fname, file_data in data.iteritems(): |
| + deleted = False |
| + if 'data' in file_data: |
| + data = file_data.get('data') |
| + if data is None: |
| + deleted = True |
| + self.git('rm', fname) |
| + else: |
| + path = os.path.join(self.repo_path, fname) |
| + pardir = os.path.dirname(path) |
| + if not os.path.exists(pardir): |
| + os.makedirs(pardir) |
| + with open(path, 'wb') as f: |
| + f.write(data) |
| + |
| + mode = file_data.get('mode') |
| + if mode and not deleted: |
| + os.chmod(path, mode) |
| + |
| + self.git('add', fname) |
| + |
| + rslt = self.git('commit', '--allow-empty', '-m', commit.name, env=env) |
| + assert rslt.retcode == 0, 'Failed to commit %s' % str(commit) |
| + self.commit_map[commit.name] = self.git('rev-parse', 'HEAD').stdout.strip() |
| + self.git('tag', 'tag_%s' % commit.name, self[commit.name]) |
| + if commit.is_branch: |
| + self.git('update-ref', 'branch_%s' % commit.name, self[commit.name]) |
| + |
| + def git(self, *args, **kwargs): |
| + """Runs a git command specified by |args| in this repo.""" |
| + assert self.repo_path is not None |
| + try: |
| + with open(os.devnull, 'wb') as devnull: |
| + output = subprocess.check_output( |
| + ('git',) + args, cwd=self.repo_path, stderr=devnull, **kwargs) |
| + return self.COMMAND_OUTPUT(0, output) |
| + except subprocess.CalledProcessError as e: |
| + return self.COMMAND_OUTPUT(e.returncode, e.output) |
| + |
| + def nuke(self): |
| + """Obliterates the git repo on disk. |
| + |
| + Causes this GitRepo to be unusable. |
| + """ |
| + shutil.rmtree(self.repo_path) |
| + self.repo_path = None |
| + |
| + def run(self, fn, *args, **kwargs): |
| + """Run a python function with the given args and kwargs with the cwd set to |
| + the git repo.""" |
| + assert self.repo_path is not None |
| + curdir = os.getcwd() |
| + try: |
| + os.chdir(self.repo_path) |
| + return fn(*args, **kwargs) |
| + finally: |
| + os.chdir(curdir) |
| + |
| + |
| +class GitRepoSchemaTestBase(unittest.TestCase): |
| + """A TestCase with a built-in GitRepoSchema. |
| + |
| + Expects a class variable REPO to be a GitRepoSchema string in the form |
| + described by that class. |
| + |
| + You may also set class variables in the form COMMIT_%(commit_name)s, which |
| + provide the content for the given commit_name commits. |
| + |
| + You probably will end up using either GitRepoReadOnlyTestBase or |
| + GitRepoReadWriteTestBase for real tests. |
| + """ |
| + REPO = None |
| + |
| + @classmethod |
| + def getRepoContent(cls, commit): |
| + return getattr(cls, 'COMMIT_%s' % commit, None) |
| + |
| + @classmethod |
| + def setUpClass(cls): |
| + super(GitRepoSchemaTestBase, cls).setUpClass() |
| + assert cls.REPO is not None |
| + cls.r_schema = GitRepoSchema(cls.REPO, cls.getRepoContent) |
| + |
| + |
| +class GitRepoReadOnlyTestBase(GitRepoSchemaTestBase): |
| + """Injects a GitRepo object given the schema and content from |
| + GitRepoSchemaTestBase into TestCase classes which subclass this. |
| + |
| + This GitRepo will appear as self.repo, and will be deleted and recreated once |
| + for the duration of all the tests in the subclass. |
| + """ |
| + REPO = None |
| + |
| + @classmethod |
| + def setUpClass(cls): |
| + super(GitRepoReadOnlyTestBase, cls).setUpClass() |
| + assert cls.REPO is not None |
| + cls.repo = cls.r_schema.reify() |
| + |
| + @classmethod |
| + def tearDownClass(cls): |
| + cls.repo.nuke() |
| + super(GitRepoReadOnlyTestBase, cls).tearDownClass() |
| + |
| + |
| +class GitRepoReadWriteTestBase(GitRepoSchemaTestBase): |
| + """Injects a GitRepo object given the schema and content from |
| + GitRepoSchemaTestBase into TestCase classes which subclass this. |
| + |
| + This GitRepo will appear as self.repo, and will be deleted and recreated for |
| + each test function in the subclass. |
| + """ |
| + REPO = None |
| + |
| + def setUp(self): |
| + super(GitRepoReadWriteTestBase, self).setUp() |
| + self.repo = self.r_schema.reify() |
| + |
| + def tearDown(self): |
| + self.repo.nuke() |
| + super(GitRepoReadWriteTestBase, self).tearDown() |