Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(546)

Side by Side Diff: git_number.py

Issue 26109002: Add git-number script to calculate generation numbers for commits. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: Now with tests! Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
M-A Ruel 2013/11/07 20:59:11 I like a short module docstring that tells what th
iannucci 2013/11/07 21:44:57 Done. (though I used usage= because description wa
6 import binascii
7 import collections
8 import logging
9 import optparse
10 import os
11 import struct
12 import subprocess
13 import sys
14 import tempfile
15
16 import git_common as git
17
18 CHUNK_FMT = '!20sL'
19 CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
20 DIRTY_TREES = collections.defaultdict(int)
21 REF = 'refs/number/commits'
22
23 # Number of bytes to use for the prefix on our internal number structure.
24 # 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
25 # need to reimplement cache data structures to be a bit more sophisticated than
26 # dicts. 1 seems to be just right.
27 PREFIX_LEN = 1
28
29 # Set this to 'threads' to gather coverage data while testing.
30 POOL_KIND = 'procs'
31
M-A Ruel 2013/11/07 20:59:11 one more line
iannucci 2013/11/07 21:44:57 Done.
32 @git.memoize_one
33 def get_number_tree(prefix_bytes):
34 """Return a dictionary of the blob contents specified by |prefix_bytes|.
M-A Ruel 2013/11/07 20:59:11 Returns
iannucci 2013/11/07 21:44:57 Done.
35 This is in the form of {<full binary ref>: <gen num> ...}
36
37 >>> get_number_tree('\x83\xb4')
38 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
39 """
40 ret = {}
41 ref = '%s:%s' % (REF, git.pathlify(prefix_bytes))
42
43 p = subprocess.Popen(['git', 'cat-file', 'blob', ref],
44 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
45 raw = buffer(p.communicate()[0])
46 for i in xrange(len(raw) / CHUNK_SIZE):
47 commit_hash, num = struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
48 ret[commit_hash] = num
49
50 return ret
51
52
53 @git.memoize_one
54 def get_num(commit_hash):
55 """Takes a hash and returns the generation number for it or None if the
56 commit_hash is unknown."""
57 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
58
M-A Ruel 2013/11/07 20:59:11 space more
iannucci 2013/11/07 21:44:57 Done.
59 def clear_caches():
60 get_number_tree.cache.clear()
M-A Ruel 2013/11/07 20:59:11 In general I'm not a fan of global caches, for exa
iannucci 2013/11/07 21:44:57 Fair point. It would make sense to refactor this i
61 get_num.cache.clear()
62
63 def intern_number_tree(tree):
64 """Transforms a number tree (in the form returned by |get_number_tree|) into
65 a git blob.
66
67 Returns the git blob id as hex-encoded string.
68
69 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
70 >>> intern_number_tree(d)
71 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
72 """
73 with tempfile.TemporaryFile() as f:
74 for k, v in sorted(tree.iteritems()):
75 f.write(struct.pack(CHUNK_FMT, k, v))
76 f.seek(0)
77 return git.intern_f(f)
78
79
80 def leaf_map_fn((pre, tree)):
M-A Ruel 2013/11/07 20:59:11 Is (()) intended?
iannucci 2013/11/07 21:44:57 Yeah, this function is invoked with a tuple (by mu
81 """Converts a prefix and number tree into a git index line."""
82 return '100644 blob %s\t%s\0' % (intern_number_tree(tree),
83 git.pathlify(pre))
84
85
86 def finalize(targets):
87 """Saves all cache data to the git repository.
88
89 After calculating the generation number for |targets|, call finalize() to
90 save all the work to the git repository.
91
92 This in particular saves the trees referred to by DIRTY_TREES.
93 """
94 if not DIRTY_TREES:
95 return
96
97 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
98
99 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
100 env = os.environ.copy()
101 env['GIT_INDEX_FILE'] = idx
102
103 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
104 with git.ProgressPrinter(progress_message) as inc:
105 git.run('read-tree', REF, env=env)
106
107 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
108 updater = subprocess.Popen(['git', 'update-index', '-z', '--index-info'],
109 stdin=subprocess.PIPE, env=env)
110
111 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
112 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
113 updater.stdin.write(item)
114 inc()
115
116 updater.stdin.close()
117 updater.wait()
118
119 tree_id = git.run('write-tree', env=env)
120 commit_cmd = ['commit-tree', '-m', msg, '-p'] + git.hashes(REF)
121 for t in targets:
122 commit_cmd += ['-p', binascii.hexlify(t)]
123 commit_cmd.append(tree_id)
124 commit_hash = git.run(*commit_cmd)
125 git.run('update-ref', REF, commit_hash)
126 DIRTY_TREES.clear()
127
128
129 def preload_tree(prefix):
130 """Returns the prefix and parsed tree object for the specified prefix."""
131 return prefix, get_number_tree(prefix)
132
133
134 def all_prefixes(depth=PREFIX_LEN):
135 for x in (chr(i) for i in xrange(255)):
136 # This isn't covered because PREFIX_LEN currently == 1
137 if depth > 1: # pragma: no cover
138 for r in all_prefixes(depth-1):
139 yield x+r
140 else:
141 yield x
142
143
144 def load(targets):
145 """Load the generation numbers for targets. Calculates missing numbers if
146 one or more of the targets is past the cached calculations.
147
148 Args:
149 targets - An iterable of binary-encoded full git commit id hashes.
150 """
151 if all(get_num(t) is not None for t in targets):
152 return
153
154 if git.tree(REF) is None:
155 empty = git.mktree({})
156 commit_hash = git.run('commit-tree', '-m', 'Initial commit from git-number',
157 empty)
158 git.run('update-ref', REF, commit_hash)
159
160 with git.ScopedPool(kind=POOL_KIND) as pool:
161 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
162
163 rev_list = []
164
165 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
166 # Curiously, buffering the list into memory seems to be the fastest
167 # approach in python (as opposed to iterating over the lines in the
168 # stdout as they're produced). GIL strikes again :/
169 cmd = [
170 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF
171 ] + map(binascii.hexlify, targets)
172 for line in git.run(*cmd).splitlines():
173 tokens = map(binascii.unhexlify, line.split())
174 rev_list.append((tokens[0], tokens[1:]))
175 inc()
176
177 for prefix, tree in preload_iter:
178 get_number_tree.cache[prefix] = tree
179
180 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
181 for commit_hash, pars in rev_list:
182 num = max(map(get_num, pars)) + 1 if pars else 0
183
184 prefix = commit_hash[:PREFIX_LEN]
185 get_number_tree(prefix)[commit_hash] = num
186 DIRTY_TREES[prefix] += 1
187 get_num.cache[commit_hash] = num
188
189 inc()
190
191
192 def git_number(do_reset, do_cache, target_refs):
193 if do_reset:
194 git.run('update-ref', '-d', REF)
195 return
196
197 targets = git.parse_committishes(*target_refs)
198 load(targets)
199 ret = map(get_num, targets)
200 if do_cache:
201 finalize(targets)
202
203 return ret
204
205
206 def main(): # pragma: no cover
207 parser = optparse.OptionParser(
208 usage='usage: %prog [options] [<committish>]\n\n'
209 '<committish> defaults to HEAD')
210 parser.add_option('--no-cache', action='store_true',
211 help='Do not actually cache anything we calculate.')
212 parser.add_option('--reset', action='store_true',
213 help='Reset the generation number cache and quit.')
214 parser.add_option('-v', '--verbose', action='count', default=0,
215 help='Be verbose. Use more times for more verbosity.')
216 opts, args = parser.parse_args()
217
218 if opts.verbose == 1:
219 logging.getLogger().setLevel(logging.INFO)
220 elif opts.verbose >= 2:
221 logging.getLogger().setLevel(logging.DEBUG)
222
223 try:
224 ret = git_number(opts.reset, not opts.no_cache, args or ['HEAD'])
225 print '\n'.join(map(str, ret))
226 return 0
227 except KeyboardInterrupt:
228 pass
229
230
231 if __name__ == '__main__': # pragma: no cover
232 sys.exit(main())
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698