OLD | NEW |
| (Empty) |
1 # Copyright 2014 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 """Gnumd (Git NUMber Daemon): Adds metadata to git commits as they land in | |
6 a primary repo. | |
7 | |
8 This is a simple daemon which takes commits pushed to a pending ref, alters | |
9 their message with metadata, and then pushes the altered commits to a parallel | |
10 ref. | |
11 """ | |
12 | |
13 import collections | |
14 import logging | |
15 import re | |
16 import sys | |
17 import time | |
18 | |
19 LOGGER = logging.getLogger(__name__) | |
20 | |
21 from infra.services.gnumbd.support import git, data, util | |
22 | |
23 | |
24 DEFAULT_CONFIG_REF = 'refs/pending-config/main' | |
25 DEFAULT_REPO_DIR = 'gnumbd_repos' | |
26 FOOTER_PREFIX = 'Cr-' | |
27 COMMIT_POSITION = FOOTER_PREFIX + 'Commit-Position' | |
28 # takes a Ref and a number | |
29 FMT_COMMIT_POSITION = '{.ref}@{{#{:d}}}'.format | |
30 BRANCHED_FROM = FOOTER_PREFIX + 'Branched-From' | |
31 GIT_SVN_ID = 'git-svn-id' | |
32 | |
33 | |
34 ################################################################################ | |
35 # Exceptions | |
36 ################################################################################ | |
37 | |
38 class MalformedPositionFooter(Exception): | |
39 def __init__(self, commit, header, value): | |
40 super(MalformedPositionFooter, self).__init__( | |
41 'in {!r}: "{}: {}"'.format(commit, header, value)) | |
42 | |
43 | |
44 class NoPositionData(Exception): | |
45 def __init__(self, commit): | |
46 super(NoPositionData, self).__init__( | |
47 'No {!r} or git-svn-id found for {!r}'.format(COMMIT_POSITION, commit)) | |
48 | |
49 | |
50 ################################################################################ | |
51 # Commit Manipulation | |
52 ################################################################################ | |
53 | |
54 def content_of(commit): | |
55 """Calculates the content of |commit| such that a gnumbd-landed commit and | |
56 the original commit will compare as equals. Returns the content as a | |
57 data.CommitData object. | |
58 | |
59 This strips out: | |
60 * The parent(s) | |
61 * The committer date | |
62 * footers beginning with 'Cr-' | |
63 * the 'git-svn-id' footer. | |
64 | |
65 Stores a cached copy of the result data on the |commit| instance itself. | |
66 """ | |
67 if commit is None: | |
68 return git.INVALID | |
69 | |
70 if not hasattr(commit, '_cr_content'): | |
71 d = commit.data | |
72 footers = util.thaw(d.footers) | |
73 footers[GIT_SVN_ID] = None | |
74 for k in footers.keys(): | |
75 if k.startswith(FOOTER_PREFIX): | |
76 footers[k] = None | |
77 commit._cr_content = d.alter( | |
78 parents=(), | |
79 committer=d.committer.alter(timestamp=data.NULL_TIMESTAMP), | |
80 footers=footers) | |
81 return commit._cr_content # pylint: disable=W0212 | |
82 | |
83 | |
84 def get_position(commit, _position_re=re.compile('^(.*)@{#(\d*)}$')): | |
85 """Returns (ref, position number) for the given |commit|. | |
86 | |
87 Looks for the Cr-Commit-Position footer. If that's unavailable, it falls back | |
88 to the git-svn-id footer, passing back ref as None. | |
89 | |
90 May raise the MalformedPositionFooter or NoPositionData exceptions. | |
91 """ | |
92 f = commit.data.footers | |
93 current_pos = f.get(COMMIT_POSITION) | |
94 if current_pos: | |
95 assert len(current_pos) == 1 | |
96 current_pos = current_pos[0] | |
97 | |
98 m = _position_re.match(current_pos) | |
99 if not m: | |
100 raise MalformedPositionFooter(commit, COMMIT_POSITION, current_pos) | |
101 parent_ref = git.Ref(commit.repo, m.group(1)) | |
102 parent_num = int(m.group(2)) | |
103 else: | |
104 # TODO(iannucci): Remove this and rely on a manual initial commit? | |
105 svn_pos = f.get(GIT_SVN_ID) | |
106 if not svn_pos: | |
107 raise NoPositionData(commit) | |
108 | |
109 assert len(svn_pos) == 1 | |
110 svn_pos = svn_pos[0] | |
111 parent_ref = None | |
112 try: | |
113 parent_num = int(svn_pos.split()[0].split('@')[1]) | |
114 except (IndexError, ValueError): | |
115 raise MalformedPositionFooter(commit, GIT_SVN_ID, svn_pos) | |
116 | |
117 return parent_ref, parent_num | |
118 | |
119 | |
120 def synthesize_commit(commit, new_parent, ref, clock=time): | |
121 """Synthesizes a new Commit given |new_parent| and ref. | |
122 | |
123 The new commit will contain a Cr-Commit-Position footer, and possibly | |
124 Cr-Branched-From footers (if commit is on a branch). | |
125 | |
126 The new commit's committer date will also be updated to 'time.time()', or | |
127 the new parent's date + 1, whichever is higher. This means that within a branc
h, | |
128 commit timestamps will always increase (at least from the point where this | |
129 daemon went into service). | |
130 | |
131 @type commit: git.Commit | |
132 @type new_parent: git.Commit | |
133 @type ref: git.Ref | |
134 @kind clock: implements .time(), used for testing determinisim. | |
135 """ | |
136 # TODO(iannucci): See if there are any other footers we want to carry over | |
137 # between new_parent and commit | |
138 footers = collections.OrderedDict() | |
139 parent_ref, parent_num = get_position(new_parent) | |
140 # if parent_ref wasn't encoded, assume that the parent is on the same ref. | |
141 if parent_ref is None: | |
142 parent_ref = ref | |
143 | |
144 if parent_ref != ref: | |
145 footers[COMMIT_POSITION] = [FMT_COMMIT_POSITION(ref, 1)] | |
146 footers[BRANCHED_FROM] = [ | |
147 '%s-%s' % (new_parent.hsh, FMT_COMMIT_POSITION(parent_ref, parent_num)) | |
148 ] + list(new_parent.data.footers.get(BRANCHED_FROM, [])) | |
149 else: | |
150 footers[COMMIT_POSITION] = [FMT_COMMIT_POSITION(ref, parent_num + 1)] | |
151 footers[BRANCHED_FROM] = new_parent.data.footers.get(BRANCHED_FROM, ()) | |
152 | |
153 # TODO(iannucci): We could be more order-preserving of user supplied footers | |
154 # but I'm inclined not to care. This loop will be enough to keep stuff from | |
155 # Gerrit-landed commits. | |
156 for key, value in commit.data.footers.iteritems(): | |
157 if key.startswith(FOOTER_PREFIX) or key == GIT_SVN_ID: | |
158 LOGGER.warn('Dropping key on user commit %s: %r -> %r', | |
159 commit.hsh, key, value) | |
160 footers[key] = None | |
161 | |
162 # Ensure that every commit has a time which is at least 1 second after its | |
163 # parent, and reset the tz to UTC. | |
164 parent_time = new_parent.data.committer.timestamp.secs | |
165 new_parents = [] if new_parent is git.INVALID else [new_parent.hsh] | |
166 new_committer = commit.data.committer.alter( | |
167 timestamp=data.NULL_TIMESTAMP.alter( | |
168 secs=max(int(clock.time()), parent_time + 1))) | |
169 | |
170 return commit.alter( | |
171 parents=new_parents, | |
172 committer=new_committer, | |
173 footers=footers, | |
174 ) | |
175 | |
176 | |
177 ################################################################################ | |
178 # Core functionality | |
179 ################################################################################ | |
180 def get_new_commits(real_ref, pending_tag, pending_tip): | |
181 """Return a list of new pending commits to process. | |
182 | |
183 Ideally, real_ref, pending_tag and pending_tip should look something like: | |
184 | |
185 v pending_tag | |
186 A B C D E F <- pending_tip | |
187 A' B' C' <- master | |
188 | |
189 And this method would return [D E F]. | |
190 | |
191 If this arrangement is NOT the case, then this method can error out in a | |
192 variety of ways, depending on how the repo is mangled. The most common cases | |
193 are: | |
194 | |
195 v pending_tag | |
196 A B C D E F <- pending_tip | |
197 A' B' C' <- master | |
198 | |
199 AND | |
200 | |
201 v pending_tag | |
202 A B C D E F <- pending_tip | |
203 A' B' C' D' E' F' <- master | |
204 | |
205 In either case, pending_tag would be advanced, and the method would return | |
206 the commits beteween the tag's proper position and the tip. | |
207 | |
208 Other discrepancies are errors and this method will return an empty list. | |
209 | |
210 @type pending_tag: git.Ref | |
211 @type pending_tip: git.Ref | |
212 @type real_ref: git.Ref | |
213 @returns [git.Commit] | |
214 """ | |
215 assert pending_tag.commit != pending_tip.commit | |
216 i = 0 | |
217 new_commits = list(pending_tag.to(pending_tip)) | |
218 if not new_commits: | |
219 LOGGER.error('%r doesn\'t match %r, but there are no new_commits?', | |
220 pending_tag.ref, pending_tip.ref) | |
221 return [] | |
222 | |
223 for commit in new_commits: | |
224 parent = commit.parent | |
225 if parent is git.INVALID: | |
226 LOGGER.error('Cannot process pending merge commit %r', commit) | |
227 return [] | |
228 | |
229 if content_of(parent) == content_of(real_ref.commit): | |
230 break | |
231 | |
232 LOGGER.warn('Skipping already-processed commit on real_ref %r: %r', | |
233 real_ref, commit.hsh) | |
234 i += 1 | |
235 | |
236 if i > 0: | |
237 logging.warn('Catching up pending_tag %r (was %d behind)', pending_tag, i) | |
238 new_tag_val = new_commits[i-1] | |
239 if content_of(new_tag_val) != content_of(real_ref.commit): | |
240 LOGGER.error('Content of new tag %r does not match content of %r!', | |
241 new_tag_val.hsh, real_ref.commit.hsh) | |
242 return [] | |
243 new_commits = new_commits[i:] | |
244 pending_tag.fast_forward_push(new_tag_val) | |
245 | |
246 if not new_commits: | |
247 LOGGER.warn('Tag was lagging for %r by %d, but no new commits are pending', | |
248 real_ref, len(new_commits)) | |
249 return [] | |
250 | |
251 return new_commits | |
252 | |
253 | |
254 def process_ref(real_ref, pending_tag, new_commits, clock=time): | |
255 """Given a |real_ref|, its corresponding |pending_tag|, and a list of | |
256 |new_commits|, copy the |new_commits| to |real_ref|, and advance |pending_tag| | |
257 to match. | |
258 | |
259 Assumes that pending_tag starts at the equivalent of real_ref, and that | |
260 all commits in new_commits exist on pending_tag..pending_tip. | |
261 | |
262 Given: | |
263 | |
264 v pending_tag | |
265 A B C D E F <- pending_tip | |
266 A' B' C' <- master | |
267 | |
268 This function will produce: | |
269 | |
270 v pending_tag | |
271 A B C D E F <- pending_tip | |
272 A' B' C' D' E' F' <- master | |
273 | |
274 @type real_ref: git.Ref | |
275 @type pending_tag: git.Ref | |
276 @type new_commits: [git.Commit] | |
277 @kind clock: implements .time(), used for testing determinisim. | |
278 """ | |
279 # TODO(iannucci): use push --force-with-lease to reset pending to the real | |
280 # ref? | |
281 # TODO(iannucci): The ACL rejection message for the real ref should point | |
282 # users to the pending ref. | |
283 assert content_of(pending_tag.commit) == content_of(real_ref.commit) | |
284 real_parent = real_ref.commit | |
285 for commit in new_commits: | |
286 assert content_of(commit.parent) == content_of(real_parent) | |
287 synth_commit = synthesize_commit(commit, real_parent, real_ref, clock) | |
288 | |
289 # TODO(iannucci): do multi-ref atomic push here. | |
290 logging.info('Pushing synthesized commit %r for %r', synth_commit.hsh, | |
291 commit.hsh) | |
292 real_ref.fast_forward_push(synth_commit) | |
293 | |
294 logging.debug('Pushing pending_tag %r', pending_tag) | |
295 pending_tag.fast_forward_push(commit) | |
296 real_parent = synth_commit | |
297 | |
298 | |
299 def process_repo(repo, cref, clock=time): | |
300 """Execute a single pass over a fetched Repo. | |
301 | |
302 Will call |process_ref| for every branch indicated by the enabled_refglobs | |
303 config option. | |
304 """ | |
305 pending_tag_prefix = cref['pending_tag_prefix'] | |
306 pending_ref_prefix = cref['pending_ref_prefix'] | |
307 enabled_refglobs = cref['enabled_refglobs'] | |
308 | |
309 def join(prefix, ref): | |
310 return git.Ref(repo, '/'.join((prefix, ref.ref[len('refs/'):]))) | |
311 | |
312 for refglob in enabled_refglobs: | |
313 glob = join(pending_ref_prefix, git.Ref(repo, refglob)) | |
314 for pending_tip in repo.refglob(glob.ref): | |
315 # TODO(iannucci): each real_ref could have its own thread. | |
316 try: | |
317 real_ref = git.Ref(repo, pending_tip.ref.replace( | |
318 pending_ref_prefix, 'refs')) | |
319 | |
320 if real_ref.commit is git.INVALID: | |
321 LOGGER.error('Missing real ref %r', real_ref) | |
322 continue | |
323 | |
324 LOGGER.info('Processing %r', real_ref) | |
325 pending_tag = join(pending_tag_prefix, real_ref) | |
326 | |
327 if pending_tag.commit is git.INVALID: | |
328 LOGGER.error('Missing pending tag %r for %r', pending_tag, real_ref) | |
329 continue | |
330 | |
331 if pending_tag.commit != pending_tip.commit: | |
332 new_commits = get_new_commits(real_ref, pending_tag, pending_tip) | |
333 if new_commits: | |
334 process_ref(real_ref, pending_tag, new_commits, clock) | |
335 else: | |
336 if content_of(pending_tag.commit) != content_of(real_ref.commit): | |
337 LOGGER.error('%r and %r match, but %r\'s content doesn\'t match!', | |
338 pending_tag, pending_tip, real_ref) | |
339 else: | |
340 LOGGER.info('%r is up to date', real_ref) | |
341 except (NoPositionData, MalformedPositionFooter) as e: | |
342 LOGGER.error('%s %s', e.__class__.__name__, e) | |
343 except Exception: # pragma: no cover | |
344 LOGGER.exception('Uncaught exception while processing %r', real_ref) | |
345 | |
346 | |
347 def inner_loop(repo, cref, clock=time): | |
348 LOGGER.debug('fetching %r', repo) | |
349 repo.run('fetch', stdout=sys.stdout, stderr=sys.stderr) | |
350 cref.evaluate() | |
351 process_repo(repo, cref, clock) | |
OLD | NEW |