Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(196)

Side by Side Diff: git_number.py

Issue 26109002: Add git-number script to calculate generation numbers for commits. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: more fixins Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Usage: %prog [options] [<commitref>]*
7
8 If no <commitref>'s are supplied, it defaults to HEAD.
9
10 Calculates the generation number for one or more commits in a git repo.
11
12 Generation number of a commit C with parents P is defined as:
13 generation_number(C, []) = 0
14 generation_number(C, P) = max(map(generation_number, P)) + 1
15
16 This number can be used to order commits relative to each other, as long as for
17 any pair of the commits, one is an ancestor of the other.
18
19 Since calculating the generation number of a commit requires walking that
20 commit's entire history, this script caches all calculated data inside the git
21 repo that it operates on in the ref 'refs/number/commits'.
22 """
23
24 import binascii
25 import collections
26 import logging
27 import optparse
28 import os
29 import struct
30 import sys
31 import tempfile
32
33 import git_common as git
34 import subprocess2
35
36 CHUNK_FMT = '!20sL'
37 CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
38 DIRTY_TREES = collections.defaultdict(int)
39 REF = 'refs/number/commits'
40
41 # Number of bytes to use for the prefix on our internal number structure.
42 # 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
43 # need to reimplement cache data structures to be a bit more sophisticated than
44 # dicts. 1 seems to be just right.
45 PREFIX_LEN = 1
46
47 # Set this to 'threads' to gather coverage data while testing.
48 POOL_KIND = 'procs'
49
50
51 def pathlify(hash_prefix):
52 """Converts a binary object hash prefix into a posix path, one folder per
53 byte.
54
55 >>> pathlify('\xDE\xAD')
56 'de/ad'
57 """
58 return '/'.join('%02x' % ord(b) for b in hash_prefix)
59
60
61 @git.memoize_one(threadsafe=False)
62 def get_number_tree(prefix_bytes):
63 """Returns a dictionary of the git-number registry specified by
64 |prefix_bytes|.
65
66 This is in the form of {<full binary ref>: <gen num> ...}
67
68 >>> get_number_tree('\x83\xb4')
69 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
70 """
71 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
72
73 try:
74 raw = buffer(git.run('cat-file', 'blob', ref, autostrip=False))
75 return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
76 for i in xrange(len(raw) / CHUNK_SIZE))
77 except subprocess2.CalledProcessError:
78 return {}
79
80
81 @git.memoize_one(threadsafe=False)
82 def get_num(commit_hash):
83 """Takes a commit hash and returns the generation number for it or None if
84 the commit hash doesn't have a computed value yet."""
85 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
86
87
88 def clear_caches(on_disk=False):
89 """Clears in-process caches for e.g. unit testing."""
90 get_number_tree.clear()
91 get_num.clear()
92 if on_disk:
93 git.run('update-ref', '-d', REF)
94
95
96 def intern_number_tree(tree):
97 """Transforms a number tree (in the form returned by |get_number_tree|) into
98 a git blob.
99
100 Returns the git blob id as hex-encoded string.
101
102 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
103 >>> intern_number_tree(d)
104 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
105 """
106 with tempfile.TemporaryFile() as f:
107 for k, v in sorted(tree.iteritems()):
108 f.write(struct.pack(CHUNK_FMT, k, v))
109 f.seek(0)
110 return git.intern_f(f)
111
112
113 def leaf_map_fn((pre, tree)):
114 """Converts a prefix and number tree into a git index line."""
115 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
116
117
118 def finalize(targets):
119 """Saves all cache data to the git repository.
120
121 After calculating the generation number for |targets|, call finalize() to
122 save all the work to the git repository.
123
124 This in particular saves the trees referred to by DIRTY_TREES.
125 """
126 if not DIRTY_TREES:
127 return
128
129 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
130
131 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
132 env = os.environ.copy()
133 env['GIT_INDEX_FILE'] = idx
134
135 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
136 with git.ProgressPrinter(progress_message) as inc:
137 git.run('read-tree', REF, env=env)
138
139 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
140 updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'],
141 stdin=subprocess2.PIPE, env=env)
142
143 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
144 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
145 updater.stdin.write(item)
146 inc()
147
148 updater.stdin.close()
149 updater.wait()
150 assert updater.returncode == 0
151
152 tree_id = git.run('write-tree', env=env)
153 commit_cmd = ['commit-tree', '-m', msg, '-p'] + git.hashes(REF)
154 for t in targets:
155 commit_cmd += ['-p', binascii.hexlify(t)]
M-A Ruel 2013/11/15 22:37:06 append especially that the next line is using appe
iannucci 2013/11/15 23:18:00 extend :) done.
156 commit_cmd.append(tree_id)
157 commit_hash = git.run(*commit_cmd)
158 git.run('update-ref', REF, commit_hash)
159 DIRTY_TREES.clear()
160
161
162 def preload_tree(prefix):
163 """Returns the prefix and parsed tree object for the specified prefix."""
164 return prefix, get_number_tree(prefix)
165
166
167 def all_prefixes(depth=PREFIX_LEN):
168 for x in (chr(i) for i in xrange(255)):
169 # This isn't covered because PREFIX_LEN currently == 1
170 if depth > 1: # pragma: no cover
171 for r in all_prefixes(depth - 1):
172 yield x + r
173 else:
174 yield x
175
176
177 def load_generation_numbers(targets):
178 """Populates the caches of get_num and get_number_tree so they contain
179 the results for |targets|.
180
181 Loads cached numbers from disk, and calculates missing numbers if one or
182 more of |targets| is newer than the cached calculations.
183
184 Args:
185 targets - An iterable of binary-encoded full git commit hashes.
186 """
187 # In case they pass us a generator, listify targets.
188 targets = list(targets)
189
190 if all(get_num(t) is not None for t in targets):
191 return
192
193 if git.tree(REF) is None:
194 empty = git.mktree({})
195 commit_hash = git.run('commit-tree', '-m', 'Initial commit from git-number',
196 empty)
M-A Ruel 2013/11/15 22:37:06 wrong alignment
iannucci 2013/11/15 23:18:00 Done.
197 git.run('update-ref', REF, commit_hash)
198
199 with git.ScopedPool(kind=POOL_KIND) as pool:
200 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
201
202 rev_list = []
203
204 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
205 # Curiously, buffering the list into memory seems to be the fastest
206 # approach in python (as opposed to iterating over the lines in the
207 # stdout as they're produced). GIL strikes again :/
208 cmd = [
209 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF
M-A Ruel 2013/11/15 22:37:06 Always keep a comma at the end when it's not termi
iannucci 2013/11/15 23:18:00 Done.
210 ] + map(binascii.hexlify, targets)
211 for line in git.run(*cmd).splitlines():
212 tokens = map(binascii.unhexlify, line.split())
213 rev_list.append((tokens[0], tokens[1:]))
214 inc()
215
216 get_number_tree.update(preload_iter)
217
218 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
219 for commit_hash, pars in rev_list:
220 num = max(map(get_num, pars)) + 1 if pars else 0
221
222 prefix = commit_hash[:PREFIX_LEN]
223 get_number_tree(prefix)[commit_hash] = num
224 DIRTY_TREES[prefix] += 1
225 get_num.set(commit_hash, num)
226
227 inc()
228
229
230 def main(): # pragma: no cover
231 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
232 parser.add_option('--no-cache', action='store_true',
233 help='Do not actually cache anything we calculate.')
234 parser.add_option('--reset', action='store_true',
235 help='Reset the generation number cache and quit.')
236 parser.add_option('-v', '--verbose', action='count', default=0,
237 help='Be verbose. Use more times for more verbosity.')
238 opts, args = parser.parse_args()
239
240 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
241 logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)])
242
243 try:
244 if opts.reset:
245 clear_caches(on_disk=True)
246 return
247
248 try:
249 targets = git.parse_commitrefs(*(args or ['HEAD']))
250 except git.BadCommitRefException as e:
251 parser.error(e)
252
253 load_generation_numbers(targets)
254 if not opts.no_cache:
255 finalize(targets)
256
257 print '\n'.join(str(get_num(x)) for x in targets)
M-A Ruel 2013/11/15 22:37:06 Sometimes you do map, sometimes you use a generato
iannucci 2013/11/15 23:18:00 Yeah... this was an obvious one. Done.
258 return 0
259 except KeyboardInterrupt:
260 return 1
261
262
263 if __name__ == '__main__': # pragma: no cover
264 sys.exit(main())
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698