Chromium Code Reviews| Index: git_common.py | 
| diff --git a/git_common.py b/git_common.py | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..f4cfab7c33a50a8f0825d800593eb9ac495277f3 | 
| --- /dev/null | 
| +++ b/git_common.py | 
| @@ -0,0 +1,285 @@ | 
| +# Copyright (c) 2013 The Chromium Authors. All rights reserved. | 
| +# Use of this source code is governed by a BSD-style license that can be | 
| +# found in the LICENSE file. | 
| + | 
| +# Monkeypatch IMapIterator so that Ctrl-C can kill everything properly. | 
| +# Derived from https://gist.github.com/aljungberg/626518 | 
| +import multiprocessing.pool | 
| +from multiprocessing.pool import IMapIterator | 
| +def wrapper(func): | 
| + def wrap(self, timeout=None): | 
| + return func(self, timeout=timeout or 1e100) | 
| + return wrap | 
| +IMapIterator.next = wrapper(IMapIterator.next) | 
| +IMapIterator.__next__ = IMapIterator.next | 
| + | 
| + | 
| +import binascii | 
| +import contextlib | 
| +import functools | 
| +import logging | 
| +import signal | 
| +import subprocess | 
| +import sys | 
| +import tempfile | 
| +import threading | 
| + | 
| + | 
| +GIT_EXE = 'git.bat' if sys.platform.startswith('win') else 'git' | 
| + | 
| + | 
| +class CalledProcessError(Exception): | 
| + def __init__(self, returncode, cmd): | 
| + super(CalledProcessError, self).__init__() | 
| + self.returncode = returncode | 
| + self.cmd = cmd | 
| + | 
| + def __str__(self): | 
| + return ( | 
| + 'Command "%s" returned non-zero exit status %d' % | 
| + (self.cmd, self.returncode)) | 
| + | 
| + | 
| +def memoize_one(f): | 
| + """Memoizes a single-argument pure function. | 
| + | 
| + Values of None are not cached. | 
| + | 
| + Adds a mutable attribute to the decorated function: | 
| + * cache (dict) - Maps arg to f(arg) | 
| + | 
| + To clear the cache (e.g. between unit tests), just do: | 
| + |your_function|.cache.clear() | 
| + """ | 
| + cache = {} | 
| 
 
M-A Ruel
2013/11/08 19:32:23
I'd prefer if this decorator was thread safe. This
 
iannucci
2013/11/11 22:59:24
I made a threadsafe=bool option mandatory for this
 
 | 
| + | 
| + @functools.wraps(f) | 
| + def inner(arg): | 
| + ret = cache.get(arg) | 
| + if ret is None: | 
| + ret = f(arg) | 
| + if ret is not None: | 
| + cache[arg] = ret | 
| + return ret | 
| + inner.cache = cache | 
| + | 
| + return inner | 
| + | 
| + | 
| +def _ScopedPool_initer(orig, orig_args): # pragma: no cover | 
| + """Initializer method for ScopedPool's subprocesses. | 
| + | 
| + This helps ScopedPool handle Ctrl-C's correctly. | 
| + """ | 
| + signal.signal(signal.SIGINT, signal.SIG_IGN) | 
| + if orig: | 
| + orig(*orig_args) | 
| + | 
| + | 
| +@contextlib.contextmanager | 
| +def ScopedPool(*args, **kwargs): | 
| + if kwargs.pop('kind', None) == 'threads': | 
| + pool = multiprocessing.pool.ThreadPool(*args, **kwargs) | 
| + else: | 
| + orig, orig_args = kwargs.get('initializer'), kwargs.get('initargs', ()) | 
| + kwargs['initializer'] = _ScopedPool_initer | 
| + kwargs['initargs'] = orig, orig_args | 
| + pool = multiprocessing.pool.Pool(*args, **kwargs) | 
| + | 
| + try: | 
| + yield pool | 
| + pool.close() | 
| + except: | 
| + pool.terminate() | 
| + raise | 
| + finally: | 
| + pool.join() | 
| + | 
| + | 
| +class ProgressPrinter(object): | 
| + """Threaded single-stat status message printer.""" | 
| + def __init__(self, fmt, enabled=None, stream=sys.stderr, period=0.5): | 
| + """Create a ProgressPrinter. | 
| + | 
| + Use it as a context manager which produces a simple 'increment' method: | 
| + | 
| + with ProgressPrinter('(%%(count)d/%d)' % 1000) as inc: | 
| + for i in xrange(1000): | 
| + # do stuff | 
| + if i % 10 == 0: | 
| + inc(10) | 
| + | 
| + Args: | 
| + fmt - String format with a single '%(count)d' where the counter value | 
| + should go. | 
| + enabled (bool) - If this is None, will default to True if | 
| + logging.getLogger() is set to INFO or more verbose. | 
| + stream (file-like) - The stream to print status messages to. | 
| + period (float) - The time in seconds for the printer thread to wait | 
| + between printing. | 
| + """ | 
| + self.fmt = fmt | 
| + if enabled is None: # pragma: no cover | 
| + self.enabled = logging.getLogger().isEnabledFor(logging.INFO) | 
| + else: | 
| + self.enabled = enabled | 
| + | 
| + self._count = 0 | 
| + self._dead = False | 
| + self._dead_cond = threading.Condition() | 
| + self._stream = stream | 
| + self._thread = threading.Thread(target=self._run) | 
| + self._period = period | 
| + | 
| + def _emit(self, s): | 
| + if self.enabled: | 
| + self._stream.write('\r'+s) | 
| + self._stream.flush() | 
| + | 
| + def _run(self): | 
| + with self._dead_cond: | 
| + while not self._dead: | 
| + self._emit(self.fmt % {'count': self._count}) | 
| + self._dead_cond.wait(self._period) | 
| + self._emit((self.fmt+'\n') % {'count': self._count}) | 
| + | 
| + def inc(self, amount=1): | 
| + self._count += amount | 
| + | 
| + def __enter__(self): | 
| + self._thread.start() | 
| + return self.inc | 
| + | 
| + def __exit__(self, _exc_type, _exc_value, _traceback): | 
| + self._dead = True | 
| + with self._dead_cond: | 
| + self._dead_cond.notifyAll() | 
| + self._thread.join() | 
| + del self._thread | 
| + | 
| + | 
| +def parse_committishes(*committishes): | 
| + """This takes one or more committishes, and returns the binary-encoded git | 
| 
 
M-A Ruel
2013/11/08 19:32:23
"""Returns binary-encoded git ref (hash) for one o
 
iannucci
2013/11/11 22:59:24
Yeah I agree, the terminology in the git docs is N
 
 | 
| + hashes for them. | 
| + | 
| + A committish is anything which can resolve to a commit. Popular examples: | 
| + * "HEAD" | 
| + * "origin/master" | 
| + * "cool_branch~2" | 
| + | 
| + etc. | 
| + """ | 
| + try: | 
| + return map(binascii.unhexlify, hashes(*committishes)) | 
| + except CalledProcessError: | 
| + raise Exception('one of %s does not seem to be a valid commitish.' % | 
| + str(committishes)) | 
| + | 
| + | 
| +def _check_output(*popenargs, **kwargs): | 
| + """Run a Popen command, and return the stdout as a string. | 
| 
 
M-A Ruel
2013/11/08 19:32:23
Runs
and what about check_output? But it doesn't
 
iannucci
2013/11/11 22:59:24
Done
 
 | 
| + | 
| + Throws CalledProcessError if the command returns non-zero. | 
| + | 
| + kwargs: | 
| + indata (str) - Data to provide to the command on stdin. Mutually exclusive | 
| + with the Popen kwarg 'stdin'. | 
| + | 
| + Other than that, popenargs is *args to Popen, and **kwargs is... **kwargs to | 
| + Popen. | 
| + """ | 
| + kwargs.setdefault('stdout', subprocess.PIPE) | 
| + kwargs.setdefault('stderr', subprocess.PIPE) | 
| + indata = kwargs.pop('indata', None) | 
| + if indata is not None: | 
| + kwargs['stdin'] = subprocess.PIPE | 
| + process = subprocess.Popen(*popenargs, **kwargs) | 
| + output, _ = process.communicate(indata) | 
| + if process.returncode: | 
| + cmd = kwargs.get('args') | 
| + if cmd is None: | 
| + cmd = popenargs[0] | 
| + raise CalledProcessError(process.returncode, cmd) | 
| + return output | 
| + | 
| + | 
| +def run(*cmd, **kwargs): | 
| + """Runs a git command. Returns stdout as a string. | 
| + | 
| + If logging is DEBUG, we'll print the command before we run it. | 
| + | 
| + Output string is always strip()'d. | 
| + """ | 
| + cmd = (GIT_EXE,) + cmd | 
| + logging.debug('running: %s', " ".join(repr(tok) for tok in cmd)) | 
| + ret = _check_output(cmd, **kwargs) | 
| + ret = (ret or '').strip() | 
| + return ret | 
| + | 
| + | 
| +def hashes(*reflike): | 
| + return run('rev-parse', *reflike).splitlines() | 
| + | 
| + | 
| +def intern_f(f, kind='blob'): | 
| + """Interns a file object into the git object store. | 
| + | 
| + Args: | 
| + f (file-like object) - The file-like object to intern | 
| + kind (git object type) - One of 'blob', 'commit', 'tree', 'tag'. | 
| + | 
| + Returns the git hash of the interned object (hex encoded). | 
| + """ | 
| + ret = run('hash-object', '-t', kind, '-w', '--stdin', stdin=f) | 
| + f.close() | 
| + return ret | 
| + | 
| + | 
| +def tree(treeish, recurse=False): | 
| + """ | 
| + Args: | 
| + treeish - a git name which resolves to a tree (or to a commit). | 
| + recurse - include just this tree, or all of its decendants too. | 
| + | 
| + Returns a dict formatted like: | 
| + { 'file_name': (mode, type, ref) } | 
| + | 
| + mode is an integer where: | 
| + * 0040000 - Directory | 
| + * 0100644 - Regular non-executable file | 
| + * 0100664 - Regular non-executable group-writeable file | 
| + * 0100755 - Regular executable file | 
| + * 0120000 - Symbolic link | 
| + * 0160000 - Gitlink | 
| + | 
| + type is a string where it's one of 'blob', 'commit', 'tree', 'tag'. | 
| + | 
| + ref is the hex encoded hash of the entry. | 
| + """ | 
| + ret = {} | 
| + opts = ['ls-tree', '--full-tree'] | 
| + if recurse: | 
| + opts += ['-r'] | 
| + opts.append(treeish) | 
| + try: | 
| + for line in run(*opts).splitlines(): | 
| + mode, typ, ref, name = line.split(None, 3) | 
| + ret[name] = (mode, typ, ref) | 
| + except CalledProcessError: | 
| + return None | 
| + return ret | 
| + | 
| + | 
| +def mktree(treedict): | 
| + """Make a git tree object and return its hash. | 
| + | 
| + See tree for the values of mode, type, and ref. | 
| + | 
| + Args: | 
| + treedict - { name: (mode, type, ref) } | 
| + """ | 
| + with tempfile.TemporaryFile() as f: | 
| + for name, (mode, typ, ref) in treedict.iteritems(): | 
| + f.write('%s %s %s\t%s\0' % (mode, typ, ref, name)) | 
| + f.seek(0) | 
| + return run('mktree', '-z', stdin=f) |