Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(186)

Side by Side Diff: git_number.py

Issue 26109002: Add git-number script to calculate generation numbers for commits. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: Address comments (reupload!) Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« git_common.py ('K') | « git_common.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 import collections
7 import optparse
8 import os
9 import struct
10 import subprocess
11 import sys
12 import tempfile
13
14 import git_common
15 from git_common import run_git, ProgressPrinter
16
17
18 CHUNK_FMT = '!20sL'
19 CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
20 DIRTY_TREES = collections.defaultdict(int)
21 REF = 'refs/number/commits'
22
23 # Number of bytes to use for the prefix on our internal number structure.
24 # 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
25 # need to reimplement cache data structures to be a bit more sophisticated than
26 # dicts. 1 seems to be just right.
27 PREFIX_LEN = 1
28
29
30 @git_common.memoize_one
31 def get_number_tree(prefix_bytes):
32 """Return a dictionary of the blob contents specified by |prefix_bytes|.
33 This is in the form of {<full binary ref>: <gen num> ...}
34
35 >>> get_number_tree('\x83\xb4')
36 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
37 """
38 ret = {}
39 ref = '%s:%s' % (REF, git_common.pathlify(prefix_bytes))
40
41 p = subprocess.Popen(['git', 'cat-file', 'blob', ref],
42 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
43 raw = buffer(p.communicate()[0])
44 for i in xrange(len(raw) / CHUNK_SIZE):
45 commit_id, num = struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
46 ret[commit_id] = num
47
48 return ret
49
50
51 @git_common.memoize_one
52 def get_num(commit_id):
53 """Takes a hash and returns the generation number for it or None if the
54 commit_id is unknown."""
55 return get_number_tree(commit_id[:PREFIX_LEN]).get(commit_id)
56
57
58 def intern_number_tree(tree):
59 """Transforms a number tree (in the form returned by |get_number_tree|) into
60 a git blob.
61
62 Returns the git blob id as hex-encoded string.
63
64 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
65 >>> intern_number_tree(d)
66 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
67 """
68 with tempfile.TemporaryFile() as f:
69 for k, v in sorted(tree.iteritems()):
70 f.write(struct.pack(CHUNK_FMT, k, v))
71 f.seek(0)
72 return git_common.git_intern_f(f)
73
74
75 def leaf_map_fn((pre, tree)):
76 """Converts a prefix and number tree into a git index line."""
77 return '100644 blob %s\t%s\0' % (intern_number_tree(tree),
78 git_common.pathlify(pre))
79
80
81 def finalize(targets):
82 """Saves all cache data to the git repository.
83
84 After calculating the generation number for |targets|, call finalize() to
85 save all the work to the git repository.
86
87 This in particular saves the trees referred to by DIRTY_TREES.
88 """
89 if not DIRTY_TREES:
90 return
91
92 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
93
94 idx = os.path.join(run_git('rev-parse', '--git-dir'), 'number.idx')
95 env = os.environ.copy()
96 env['GIT_INDEX_FILE'] = idx
97
98 with ProgressPrinter('Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)) as inc:
99 run_git('read-tree', REF, env=env)
100
101 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
102 updater = subprocess.Popen(['git', 'update-index', '-z', '--index-info'],
103 stdin=subprocess.PIPE, env=env)
104
105 with git_common.ScopedPool() as leaf_pool:
106 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
107 updater.stdin.write(item)
108 inc()
109
110 updater.stdin.close()
111 updater.wait()
112
113 tree_id = run_git('write-tree', env=env)
114 commit_cmd = ['commit-tree', '-m', msg, '-p', git_common.git_hash(REF)]
115 for t in targets:
116 commit_cmd += ['-p', git_common.hexlify(t)]
117 commit_cmd.append(tree_id)
118 commit_id = run_git(*commit_cmd)
119 run_git('update-ref', REF, commit_id)
120
121
122 def preload_tree(prefix):
123 """Returns the prefix and parsed tree object for the specified prefix."""
124 return prefix, get_number_tree(prefix)
125
126
127 def all_prefixes(depth=PREFIX_LEN):
128 for x in (chr(i) for i in xrange(255)):
129 if depth > 1:
130 for r in all_prefixes(depth-1):
131 yield x+r
132 else:
133 yield x
134
135
136 def load(targets):
137 """Load/calculate the generation numbers for targets.
M-A Ruel 2013/10/24 13:23:03 Loads and then calculates
iannucci 2013/10/25 00:52:41 Clarified this docstring.
138
139 Args:
140 targets - An iterable of binary-encoded full git commit id hashes.
141 """
142 if all(get_num(t) is not None for t in targets):
143 return
144
145 if git_common.git_tree(REF) is None:
146 empty = git_common.git_mktree({})
147 commit_id = run_git('commit-tree', '-m', 'Initial commit from git-number',
148 empty)
149 run_git('update-ref', REF, commit_id)
150
151 with git_common.ScopedPool() as pool:
152 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
153
154 rev_list = []
155
156 with ProgressPrinter('Loading commits: %(count)d') as inc:
157 # Curiously, buffering the list into memory seems to be the fastest
158 # approach in python (as opposed to iterating over the lines in the
159 # stdout as they're produced). GIL strikes again :/
160 cmd = [
161 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF
162 ] + map(git_common.hexlify, targets)
163 for line in run_git(*cmd).splitlines():
164 tokens = map(git_common.unhexlify, line.split())
165 rev_list.append((tokens[0], tokens[1:]))
166 inc()
167
168 for prefix, tree in preload_iter:
169 get_number_tree.cache[prefix] = tree
170
171 with ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
172 for commit_id, pars in rev_list:
173 num = max(map(get_num, pars)) + 1 if pars else 0
174
175 prefix = commit_id[:PREFIX_LEN]
176 get_number_tree(prefix)[commit_id] = num
177 DIRTY_TREES[prefix] += 1
178 get_num.cache[commit_id] = num
179
180 inc()
181
182
183 def main():
184 try:
185 parser = optparse.OptionParser(
186 usage='usage: %prog [options] [<committish>]\n\n'
187 '<committish> defaults to HEAD')
188 parser.add_option('--no-cache', action='store_true',
189 help='Do not actually cache anything we calculate.')
190 parser.add_option('--reset', action='store_true',
191 help='Reset the generation number cache and quit.')
192 parser.add_option('-v', '--verbose', action='count', default=0,
193 help='Be verbose. Use more times for more verbosity.')
194 opts, args = parser.parse_args()
195
196 if not args:
197 args = ['HEAD']
198
199 git_common.VERBOSE_LEVEL = opts.verbose
200
201 if opts.reset:
M-A Ruel 2013/10/24 13:23:03 Personally, I'd start the try: here, or preferably
iannucci 2013/10/25 00:52:41 Cleaned this up as you suggest. Much better now :)
202 run_git('update-ref', '-d', REF)
203 return 0
204
205 targets = git_common.parse_committishes(*args)
206 load(targets)
207 for t in targets:
208 print get_num(t)
209 if not opts.no_cache:
210 finalize(targets)
211
212 return 0
213 except KeyboardInterrupt:
214 pass
215
M-A Ruel 2013/10/24 13:23:03 2 lines
iannucci 2013/10/25 00:52:41 Done.
216 if __name__ == '__main__':
217 sys.exit(main())
OLDNEW
« git_common.py ('K') | « git_common.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698