Chromium Code Reviews| Index: git_number.py |
| diff --git a/git_number.py b/git_number.py |
| new file mode 100755 |
| index 0000000000000000000000000000000000000000..198e31b45f828a7f943bb046bafc4f2eb6b13153 |
| --- /dev/null |
| +++ b/git_number.py |
| @@ -0,0 +1,217 @@ |
| +#!/usr/bin/env python |
| +# Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +import collections |
| +import optparse |
| +import os |
| +import struct |
| +import subprocess |
| +import sys |
| +import tempfile |
| + |
| +import git_common |
| +from git_common import run_git, ProgressPrinter |
| + |
| + |
| +CHUNK_FMT = '!20sL' |
| +CHUNK_SIZE = struct.calcsize(CHUNK_FMT) |
| +DIRTY_TREES = collections.defaultdict(int) |
| +REF = 'refs/number/commits' |
| + |
| +# Number of bytes to use for the prefix on our internal number structure. |
| +# 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would |
| +# need to reimplement cache data structures to be a bit more sophisticated than |
| +# dicts. 1 seems to be just right. |
| +PREFIX_LEN = 1 |
| + |
| + |
| +@git_common.memoize_one |
| +def get_number_tree(prefix_bytes): |
| + """Return a dictionary of the blob contents specified by |prefix_bytes|. |
| + This is in the form of {<full binary ref>: <gen num> ...} |
| + |
| + >>> get_number_tree('\x83\xb4') |
| + {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...} |
| + """ |
| + ret = {} |
| + ref = '%s:%s' % (REF, git_common.pathlify(prefix_bytes)) |
| + |
| + p = subprocess.Popen(['git', 'cat-file', 'blob', ref], |
| + stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| + raw = buffer(p.communicate()[0]) |
| + for i in xrange(len(raw) / CHUNK_SIZE): |
| + commit_id, num = struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE) |
| + ret[commit_id] = num |
| + |
| + return ret |
| + |
| + |
| +@git_common.memoize_one |
| +def get_num(commit_id): |
| + """Takes a hash and returns the generation number for it or None if the |
| + commit_id is unknown.""" |
| + return get_number_tree(commit_id[:PREFIX_LEN]).get(commit_id) |
| + |
| + |
| +def intern_number_tree(tree): |
| + """Transforms a number tree (in the form returned by |get_number_tree|) into |
| + a git blob. |
| + |
| + Returns the git blob id as hex-encoded string. |
| + |
| + >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169} |
| + >>> intern_number_tree(d) |
| + 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce' |
| + """ |
| + with tempfile.TemporaryFile() as f: |
| + for k, v in sorted(tree.iteritems()): |
| + f.write(struct.pack(CHUNK_FMT, k, v)) |
| + f.seek(0) |
| + return git_common.git_intern_f(f) |
| + |
| + |
| +def leaf_map_fn((pre, tree)): |
| + """Converts a prefix and number tree into a git index line.""" |
| + return '100644 blob %s\t%s\0' % (intern_number_tree(tree), |
| + git_common.pathlify(pre)) |
| + |
| + |
| +def finalize(targets): |
| + """Saves all cache data to the git repository. |
| + |
| + After calculating the generation number for |targets|, call finalize() to |
| + save all the work to the git repository. |
| + |
| + This in particular saves the trees referred to by DIRTY_TREES. |
| + """ |
| + if not DIRTY_TREES: |
| + return |
| + |
| + msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues()) |
| + |
| + idx = os.path.join(run_git('rev-parse', '--git-dir'), 'number.idx') |
| + env = os.environ.copy() |
| + env['GIT_INDEX_FILE'] = idx |
| + |
| + with ProgressPrinter('Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)) as inc: |
| + run_git('read-tree', REF, env=env) |
| + |
| + prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES)) |
| + updater = subprocess.Popen(['git', 'update-index', '-z', '--index-info'], |
| + stdin=subprocess.PIPE, env=env) |
| + |
| + with git_common.ScopedPool() as leaf_pool: |
| + for item in leaf_pool.imap(leaf_map_fn, prefixes_trees): |
| + updater.stdin.write(item) |
| + inc() |
| + |
| + updater.stdin.close() |
| + updater.wait() |
| + |
| + tree_id = run_git('write-tree', env=env) |
| + commit_cmd = ['commit-tree', '-m', msg, '-p', git_common.git_hash(REF)] |
| + for t in targets: |
| + commit_cmd += ['-p', git_common.hexlify(t)] |
| + commit_cmd.append(tree_id) |
| + commit_id = run_git(*commit_cmd) |
| + run_git('update-ref', REF, commit_id) |
| + |
| + |
| +def preload_tree(prefix): |
| + """Returns the prefix and parsed tree object for the specified prefix.""" |
| + return prefix, get_number_tree(prefix) |
| + |
| + |
| +def all_prefixes(depth=PREFIX_LEN): |
| + for x in (chr(i) for i in xrange(255)): |
| + if depth > 1: |
| + for r in all_prefixes(depth-1): |
| + yield x+r |
| + else: |
| + yield x |
| + |
| + |
| +def load(targets): |
| + """Load/calculate the generation numbers for targets. |
|
M-A Ruel
2013/10/24 13:23:03
Loads and then calculates
iannucci
2013/10/25 00:52:41
Clarified this docstring.
|
| + |
| + Args: |
| + targets - An iterable of binary-encoded full git commit id hashes. |
| + """ |
| + if all(get_num(t) is not None for t in targets): |
| + return |
| + |
| + if git_common.git_tree(REF) is None: |
| + empty = git_common.git_mktree({}) |
| + commit_id = run_git('commit-tree', '-m', 'Initial commit from git-number', |
| + empty) |
| + run_git('update-ref', REF, commit_id) |
| + |
| + with git_common.ScopedPool() as pool: |
| + preload_iter = pool.imap_unordered(preload_tree, all_prefixes()) |
| + |
| + rev_list = [] |
| + |
| + with ProgressPrinter('Loading commits: %(count)d') as inc: |
| + # Curiously, buffering the list into memory seems to be the fastest |
| + # approach in python (as opposed to iterating over the lines in the |
| + # stdout as they're produced). GIL strikes again :/ |
| + cmd = [ |
| + 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF |
| + ] + map(git_common.hexlify, targets) |
| + for line in run_git(*cmd).splitlines(): |
| + tokens = map(git_common.unhexlify, line.split()) |
| + rev_list.append((tokens[0], tokens[1:])) |
| + inc() |
| + |
| + for prefix, tree in preload_iter: |
| + get_number_tree.cache[prefix] = tree |
| + |
| + with ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc: |
| + for commit_id, pars in rev_list: |
| + num = max(map(get_num, pars)) + 1 if pars else 0 |
| + |
| + prefix = commit_id[:PREFIX_LEN] |
| + get_number_tree(prefix)[commit_id] = num |
| + DIRTY_TREES[prefix] += 1 |
| + get_num.cache[commit_id] = num |
| + |
| + inc() |
| + |
| + |
| +def main(): |
| + try: |
| + parser = optparse.OptionParser( |
| + usage='usage: %prog [options] [<committish>]\n\n' |
| + '<committish> defaults to HEAD') |
| + parser.add_option('--no-cache', action='store_true', |
| + help='Do not actually cache anything we calculate.') |
| + parser.add_option('--reset', action='store_true', |
| + help='Reset the generation number cache and quit.') |
| + parser.add_option('-v', '--verbose', action='count', default=0, |
| + help='Be verbose. Use more times for more verbosity.') |
| + opts, args = parser.parse_args() |
| + |
| + if not args: |
| + args = ['HEAD'] |
| + |
| + git_common.VERBOSE_LEVEL = opts.verbose |
| + |
| + if opts.reset: |
|
M-A Ruel
2013/10/24 13:23:03
Personally, I'd start the try: here, or preferably
iannucci
2013/10/25 00:52:41
Cleaned this up as you suggest. Much better now :)
|
| + run_git('update-ref', '-d', REF) |
| + return 0 |
| + |
| + targets = git_common.parse_committishes(*args) |
| + load(targets) |
| + for t in targets: |
| + print get_num(t) |
| + if not opts.no_cache: |
| + finalize(targets) |
| + |
| + return 0 |
| + except KeyboardInterrupt: |
| + pass |
| + |
|
M-A Ruel
2013/10/24 13:23:03
2 lines
iannucci
2013/10/25 00:52:41
Done.
|
| +if __name__ == '__main__': |
| + sys.exit(main()) |