Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/env python | |
| 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Usage: %prog [options] [<commitref>]* | |
| 7 | |
| 8 If no <commitref>'s are supplied, it defaults to HEAD. | |
| 9 | |
| 10 Calculates the generation number for one or more commits in a git repo. | |
| 11 | |
| 12 Generation number of a commit C with parents P is defined as: | |
| 13 generation_number(C, []) = 0 | |
| 14 generation_number(C, P) = max(map(generation_number, P)) + 1 | |
| 15 | |
| 16 This number can be used to order commits relative to each other, as long as for | |
| 17 any pair of the commits, one is an ancestor of the other. | |
| 18 | |
| 19 Since calculating the generation number of a commit requires walking that | |
| 20 commit's entire history, this script caches all calculated data inside the git | |
| 21 repo that it operates on in the ref 'refs/number/commits'. | |
| 22 """ | |
| 23 | |
| 24 import binascii | |
| 25 import collections | |
| 26 import logging | |
| 27 import optparse | |
| 28 import os | |
| 29 import struct | |
| 30 import sys | |
| 31 import tempfile | |
| 32 | |
| 33 import git_common as git | |
| 34 import subprocess2 | |
| 35 | |
| 36 CHUNK_FMT = '!20sL' | |
| 37 CHUNK_SIZE = struct.calcsize(CHUNK_FMT) | |
| 38 DIRTY_TREES = collections.defaultdict(int) | |
| 39 REF = 'refs/number/commits' | |
| 40 | |
| 41 # Number of bytes to use for the prefix on our internal number structure. | |
| 42 # 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would | |
| 43 # need to reimplement cache data structures to be a bit more sophisticated than | |
| 44 # dicts. 1 seems to be just right. | |
| 45 PREFIX_LEN = 1 | |
| 46 | |
| 47 # Set this to 'threads' to gather coverage data while testing. | |
| 48 POOL_KIND = 'procs' | |
| 49 | |
| 50 | |
| 51 def pathlify(hash_prefix): | |
| 52 """Converts a binary object hash prefix into a posix path, one folder per | |
| 53 byte. | |
| 54 | |
| 55 >>> pathlify('\xDE\xAD') | |
| 56 'de/ad' | |
| 57 """ | |
| 58 return '/'.join('%02x' % ord(b) for b in hash_prefix) | |
| 59 | |
| 60 | |
| 61 @git.memoize_one(threadsafe=False) | |
| 62 def get_number_tree(prefix_bytes): | |
| 63 """Returns a dictionary of the git-number registry specified by | |
| 64 |prefix_bytes|. | |
| 65 | |
| 66 This is in the form of {<full binary ref>: <gen num> ...} | |
| 67 | |
| 68 >>> get_number_tree('\x83\xb4') | |
| 69 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...} | |
| 70 """ | |
| 71 ref = '%s:%s' % (REF, pathlify(prefix_bytes)) | |
| 72 | |
| 73 try: | |
| 74 raw = buffer(git.run('cat-file', 'blob', ref, autostrip=False)) | |
| 75 return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE) | |
| 76 for i in xrange(len(raw) / CHUNK_SIZE)) | |
| 77 except subprocess2.CalledProcessError: | |
| 78 return {} | |
| 79 | |
| 80 | |
| 81 @git.memoize_one(threadsafe=False) | |
| 82 def get_num(commit_hash): | |
| 83 """Takes a commit hash and returns the generation number for it or None if | |
| 84 the commit hash doesn't have a computed value yet.""" | |
| 85 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash) | |
| 86 | |
| 87 | |
| 88 def clear_caches(on_disk=False): | |
| 89 """Clears in-process caches for e.g. unit testing.""" | |
| 90 get_number_tree.clear() | |
| 91 get_num.clear() | |
| 92 if on_disk: | |
| 93 git.run('update-ref', '-d', REF) | |
| 94 | |
| 95 | |
| 96 def intern_number_tree(tree): | |
| 97 """Transforms a number tree (in the form returned by |get_number_tree|) into | |
| 98 a git blob. | |
| 99 | |
| 100 Returns the git blob id as hex-encoded string. | |
| 101 | |
| 102 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169} | |
| 103 >>> intern_number_tree(d) | |
| 104 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce' | |
| 105 """ | |
| 106 with tempfile.TemporaryFile() as f: | |
| 107 for k, v in sorted(tree.iteritems()): | |
| 108 f.write(struct.pack(CHUNK_FMT, k, v)) | |
| 109 f.seek(0) | |
| 110 return git.intern_f(f) | |
| 111 | |
| 112 | |
| 113 def leaf_map_fn((pre, tree)): | |
| 114 """Converts a prefix and number tree into a git index line.""" | |
| 115 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre)) | |
| 116 | |
| 117 | |
| 118 def finalize(targets): | |
| 119 """Saves all cache data to the git repository. | |
| 120 | |
| 121 After calculating the generation number for |targets|, call finalize() to | |
| 122 save all the work to the git repository. | |
| 123 | |
| 124 This in particular saves the trees referred to by DIRTY_TREES. | |
| 125 """ | |
| 126 if not DIRTY_TREES: | |
| 127 return | |
| 128 | |
| 129 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues()) | |
| 130 | |
| 131 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx') | |
| 132 env = os.environ.copy() | |
| 133 env['GIT_INDEX_FILE'] = idx | |
| 134 | |
| 135 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES) | |
| 136 with git.ProgressPrinter(progress_message) as inc: | |
| 137 git.run('read-tree', REF, env=env) | |
| 138 | |
| 139 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES)) | |
| 140 updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'], | |
| 141 stdin=subprocess2.PIPE, env=env) | |
| 142 | |
| 143 with git.ScopedPool(kind=POOL_KIND) as leaf_pool: | |
| 144 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees): | |
| 145 updater.stdin.write(item) | |
| 146 inc() | |
| 147 | |
| 148 updater.stdin.close() | |
| 149 updater.wait() | |
| 150 assert updater.returncode == 0 | |
| 151 | |
| 152 tree_id = git.run('write-tree', env=env) | |
| 153 commit_cmd = ['commit-tree', '-m', msg, '-p'] + git.hashes(REF) | |
| 154 for t in targets: | |
| 155 commit_cmd += ['-p', binascii.hexlify(t)] | |
|
M-A Ruel
2013/11/15 22:37:06
append
especially that the next line is using appe
iannucci
2013/11/15 23:18:00
extend :)
done.
| |
| 156 commit_cmd.append(tree_id) | |
| 157 commit_hash = git.run(*commit_cmd) | |
| 158 git.run('update-ref', REF, commit_hash) | |
| 159 DIRTY_TREES.clear() | |
| 160 | |
| 161 | |
| 162 def preload_tree(prefix): | |
| 163 """Returns the prefix and parsed tree object for the specified prefix.""" | |
| 164 return prefix, get_number_tree(prefix) | |
| 165 | |
| 166 | |
| 167 def all_prefixes(depth=PREFIX_LEN): | |
| 168 for x in (chr(i) for i in xrange(255)): | |
| 169 # This isn't covered because PREFIX_LEN currently == 1 | |
| 170 if depth > 1: # pragma: no cover | |
| 171 for r in all_prefixes(depth - 1): | |
| 172 yield x + r | |
| 173 else: | |
| 174 yield x | |
| 175 | |
| 176 | |
| 177 def load_generation_numbers(targets): | |
| 178 """Populates the caches of get_num and get_number_tree so they contain | |
| 179 the results for |targets|. | |
| 180 | |
| 181 Loads cached numbers from disk, and calculates missing numbers if one or | |
| 182 more of |targets| is newer than the cached calculations. | |
| 183 | |
| 184 Args: | |
| 185 targets - An iterable of binary-encoded full git commit hashes. | |
| 186 """ | |
| 187 # In case they pass us a generator, listify targets. | |
| 188 targets = list(targets) | |
| 189 | |
| 190 if all(get_num(t) is not None for t in targets): | |
| 191 return | |
| 192 | |
| 193 if git.tree(REF) is None: | |
| 194 empty = git.mktree({}) | |
| 195 commit_hash = git.run('commit-tree', '-m', 'Initial commit from git-number', | |
| 196 empty) | |
|
M-A Ruel
2013/11/15 22:37:06
wrong alignment
iannucci
2013/11/15 23:18:00
Done.
| |
| 197 git.run('update-ref', REF, commit_hash) | |
| 198 | |
| 199 with git.ScopedPool(kind=POOL_KIND) as pool: | |
| 200 preload_iter = pool.imap_unordered(preload_tree, all_prefixes()) | |
| 201 | |
| 202 rev_list = [] | |
| 203 | |
| 204 with git.ProgressPrinter('Loading commits: %(count)d') as inc: | |
| 205 # Curiously, buffering the list into memory seems to be the fastest | |
| 206 # approach in python (as opposed to iterating over the lines in the | |
| 207 # stdout as they're produced). GIL strikes again :/ | |
| 208 cmd = [ | |
| 209 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF | |
|
M-A Ruel
2013/11/15 22:37:06
Always keep a comma at the end when it's not termi
iannucci
2013/11/15 23:18:00
Done.
| |
| 210 ] + map(binascii.hexlify, targets) | |
| 211 for line in git.run(*cmd).splitlines(): | |
| 212 tokens = map(binascii.unhexlify, line.split()) | |
| 213 rev_list.append((tokens[0], tokens[1:])) | |
| 214 inc() | |
| 215 | |
| 216 get_number_tree.update(preload_iter) | |
| 217 | |
| 218 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc: | |
| 219 for commit_hash, pars in rev_list: | |
| 220 num = max(map(get_num, pars)) + 1 if pars else 0 | |
| 221 | |
| 222 prefix = commit_hash[:PREFIX_LEN] | |
| 223 get_number_tree(prefix)[commit_hash] = num | |
| 224 DIRTY_TREES[prefix] += 1 | |
| 225 get_num.set(commit_hash, num) | |
| 226 | |
| 227 inc() | |
| 228 | |
| 229 | |
| 230 def main(): # pragma: no cover | |
| 231 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__) | |
| 232 parser.add_option('--no-cache', action='store_true', | |
| 233 help='Do not actually cache anything we calculate.') | |
| 234 parser.add_option('--reset', action='store_true', | |
| 235 help='Reset the generation number cache and quit.') | |
| 236 parser.add_option('-v', '--verbose', action='count', default=0, | |
| 237 help='Be verbose. Use more times for more verbosity.') | |
| 238 opts, args = parser.parse_args() | |
| 239 | |
| 240 levels = [logging.ERROR, logging.INFO, logging.DEBUG] | |
| 241 logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)]) | |
| 242 | |
| 243 try: | |
| 244 if opts.reset: | |
| 245 clear_caches(on_disk=True) | |
| 246 return | |
| 247 | |
| 248 try: | |
| 249 targets = git.parse_commitrefs(*(args or ['HEAD'])) | |
| 250 except git.BadCommitRefException as e: | |
| 251 parser.error(e) | |
| 252 | |
| 253 load_generation_numbers(targets) | |
| 254 if not opts.no_cache: | |
| 255 finalize(targets) | |
| 256 | |
| 257 print '\n'.join(str(get_num(x)) for x in targets) | |
|
M-A Ruel
2013/11/15 22:37:06
Sometimes you do map, sometimes you use a generato
iannucci
2013/11/15 23:18:00
Yeah... this was an obvious one. Done.
| |
| 258 return 0 | |
| 259 except KeyboardInterrupt: | |
| 260 return 1 | |
| 261 | |
| 262 | |
| 263 if __name__ == '__main__': # pragma: no cover | |
| 264 sys.exit(main()) | |
| OLD | NEW |