Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(723)

Side by Side Diff: git_number.py

Issue 26109002: Add git-number script to calculate generation numbers for commits. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: address comments Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Usage: %prog [options] [<committish>]*
7
8 If <committish> is omitted, it defaults to HEAD.
9
10 Calculates the generation number for one or more commits in a git repo.
11
12 Generation number of a commit C with parents P is defined as:
13 generation_number(C, []) = 0
14 generation_number(C, P) = max(map(generation_number, P)) + 1
15
16 This number can be used to order commits relative to each other, as long as for
17 any pair of the commits, one is an ancestor of the other.
18
19 Since calculating the generation number of a commit requires walking that
20 commit's entire history, this script caches all calculated data inside the git
21 repo that it operates on in the ref 'refs/number/commits'.
22 """
23
24 import binascii
25 import collections
26 import logging
27 import optparse
28 import os
29 import struct
30 import subprocess
31 import sys
32 import tempfile
33
34 import git_common as git
35
36 CHUNK_FMT = '!20sL'
37 CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
38 DIRTY_TREES = collections.defaultdict(int)
39 REF = 'refs/number/commits'
40
41 # Number of bytes to use for the prefix on our internal number structure.
42 # 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
43 # need to reimplement cache data structures to be a bit more sophisticated than
44 # dicts. 1 seems to be just right.
45 PREFIX_LEN = 1
46
47 # Set this to 'threads' to gather coverage data while testing.
48 POOL_KIND = 'procs'
49
50
51 def pathlify(hash_prefix):
52 """Convert a binary git hash prefix into a posix path, one folder per byte.
M-A Ruel 2013/11/08 19:32:23 Converts S'es everywhere.
iannucci 2013/11/11 22:59:24 DE-IMPERITIZE ALL THE VERBS!
53
54 >>> pathlify("\xDE\xAD")
55 "de/ad"
56 """
57 return '/'.join('%02x' % ord(b) for b in hash_prefix)
58
59
60 @git.memoize_one
61 def get_number_tree(prefix_bytes):
62 """Returns a dictionary of the blob contents specified by |prefix_bytes|.
63 This is in the form of {<full binary ref>: <gen num> ...}
64
65 >>> get_number_tree('\x83\xb4')
66 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
67 """
68 ret = {}
69 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
70
71 p = subprocess.Popen(['git', 'cat-file', 'blob', ref],
M-A Ruel 2013/11/08 19:32:23 Why not use git.run()?
iannucci 2013/11/11 22:59:24 Good question... I'm pretty sure there was a reaso
72 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
73 raw = buffer(p.communicate()[0])
74 for i in xrange(len(raw) / CHUNK_SIZE):
75 commit_hash, num = struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
76 ret[commit_hash] = num
77
78 return ret
79
80
81 @git.memoize_one
82 def get_num(commit_hash):
83 """Takes a hash and returns the generation number for it or None if the
84 commit_hash is unknown."""
85 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
86
87
88 def clear_caches():
89 """Call to clear in-process caches for e.g. unit testing."""
90 get_number_tree.cache.clear()
91 get_num.cache.clear()
92
93
94 def intern_number_tree(tree):
95 """Transforms a number tree (in the form returned by |get_number_tree|) into
96 a git blob.
97
98 Returns the git blob id as hex-encoded string.
99
100 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
101 >>> intern_number_tree(d)
102 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
103 """
104 with tempfile.TemporaryFile() as f:
105 for k, v in sorted(tree.iteritems()):
106 f.write(struct.pack(CHUNK_FMT, k, v))
107 f.seek(0)
108 return git.intern_f(f)
109
110
111 def leaf_map_fn((pre, tree)):
112 """Converts a prefix and number tree into a git index line."""
113 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
114
115
116 def finalize(targets):
117 """Saves all cache data to the git repository.
118
119 After calculating the generation number for |targets|, call finalize() to
120 save all the work to the git repository.
121
122 This in particular saves the trees referred to by DIRTY_TREES.
123 """
124 if not DIRTY_TREES:
125 return
126
127 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
128
129 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
130 env = os.environ.copy()
131 env['GIT_INDEX_FILE'] = idx
132
133 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
134 with git.ProgressPrinter(progress_message) as inc:
135 git.run('read-tree', REF, env=env)
136
137 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
138 updater = subprocess.Popen(['git', 'update-index', '-z', '--index-info'],
139 stdin=subprocess.PIPE, env=env)
140
141 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
142 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
143 updater.stdin.write(item)
144 inc()
145
146 updater.stdin.close()
147 updater.wait()
148
149 tree_id = git.run('write-tree', env=env)
150 commit_cmd = ['commit-tree', '-m', msg, '-p'] + git.hashes(REF)
151 for t in targets:
152 commit_cmd += ['-p', binascii.hexlify(t)]
153 commit_cmd.append(tree_id)
154 commit_hash = git.run(*commit_cmd)
155 git.run('update-ref', REF, commit_hash)
156 DIRTY_TREES.clear()
157
158
159 def preload_tree(prefix):
160 """Returns the prefix and parsed tree object for the specified prefix."""
161 return prefix, get_number_tree(prefix)
162
163
164 def all_prefixes(depth=PREFIX_LEN):
165 for x in (chr(i) for i in xrange(255)):
166 # This isn't covered because PREFIX_LEN currently == 1
167 if depth > 1: # pragma: no cover
168 for r in all_prefixes(depth-1):
169 yield x+r
170 else:
171 yield x
172
173
174 def load(targets):
175 """Load the generation numbers for targets. Calculates missing numbers if
176 one or more of the targets is past the cached calculations.
177
178 Args:
179 targets - An iterable of binary-encoded full git commit id hashes.
180 """
181 if all(get_num(t) is not None for t in targets):
182 return
183
184 if git.tree(REF) is None:
185 empty = git.mktree({})
186 commit_hash = git.run('commit-tree', '-m', 'Initial commit from git-number',
187 empty)
188 git.run('update-ref', REF, commit_hash)
189
190 with git.ScopedPool(kind=POOL_KIND) as pool:
191 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
192
193 rev_list = []
194
195 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
196 # Curiously, buffering the list into memory seems to be the fastest
197 # approach in python (as opposed to iterating over the lines in the
198 # stdout as they're produced). GIL strikes again :/
199 cmd = [
200 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF
201 ] + map(binascii.hexlify, targets)
202 for line in git.run(*cmd).splitlines():
203 tokens = map(binascii.unhexlify, line.split())
204 rev_list.append((tokens[0], tokens[1:]))
205 inc()
206
207 for prefix, tree in preload_iter:
208 get_number_tree.cache[prefix] = tree
209
210 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
211 for commit_hash, pars in rev_list:
212 num = max(map(get_num, pars)) + 1 if pars else 0
213
214 prefix = commit_hash[:PREFIX_LEN]
215 get_number_tree(prefix)[commit_hash] = num
216 DIRTY_TREES[prefix] += 1
217 get_num.cache[commit_hash] = num
218
219 inc()
220
221
222 def git_number(do_reset, do_cache, target_refs):
223 if do_reset:
224 git.run('update-ref', '-d', REF)
225 return
226
227 targets = git.parse_committishes(*target_refs)
228 load(targets)
229 ret = map(get_num, targets)
230 if do_cache:
231 finalize(targets)
232
233 return ret
234
235
236 def main(): # pragma: no cover
237 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
238 parser.add_option('--no-cache', action='store_true',
239 help='Do not actually cache anything we calculate.')
240 parser.add_option('--reset', action='store_true',
241 help='Reset the generation number cache and quit.')
242 parser.add_option('-v', '--verbose', action='count', default=0,
243 help='Be verbose. Use more times for more verbosity.')
244 opts, args = parser.parse_args()
245
246 if opts.verbose == 1:
247 logging.getLogger().setLevel(logging.INFO)
248 elif opts.verbose >= 2:
249 logging.getLogger().setLevel(logging.DEBUG)
250
251 try:
252 ret = git_number(opts.reset, not opts.no_cache, args or ['HEAD'])
253 print '\n'.join(map(str, ret))
254 return 0
255 except KeyboardInterrupt:
256 pass
257
258
259 if __name__ == '__main__': # pragma: no cover
260 sys.exit(main())
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698