OLD | NEW |
| (Empty) |
1 # Copyright 2014 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 import collections | |
5 import hashlib | |
6 import logging | |
7 import re | |
8 | |
9 from cStringIO import StringIO | |
10 | |
11 from infra.services.gnumbd.support.util import cached_property, freeze | |
12 | |
13 LOGGER = logging.getLogger(__name__) | |
14 | |
15 ################################################################################ | |
16 # Exceptions | |
17 ################################################################################ | |
18 | |
19 class PartialCommit(Exception): | |
20 def __init__(self, hsh, raw): | |
21 super(PartialCommit, self).__init__( | |
22 'Commit %s has partial content: %r' % (hsh, raw)) | |
23 self.raw = raw | |
24 | |
25 | |
26 class UnexpectedHeader(Exception): | |
27 def __init__(self, hsh, header, value): | |
28 super(UnexpectedHeader, self).__init__( | |
29 'Unexpected header in commit %s: %r -> %r' % (hsh, header, value)) | |
30 | |
31 | |
32 ################################################################################ | |
33 # Base Class | |
34 ################################################################################ | |
35 | |
36 class Alterable(object): | |
37 def to_dict(self): # pragma: no cover | |
38 """The shallow dictionary representation of this object (i.e. the dictionary | |
39 may contain Alterable instances as values).""" | |
40 raise NotImplementedError() | |
41 | |
42 def alter(self, **kwargs): # pragma: no cover | |
43 """Returns a copy of self, except with the fields listed in kwargs replaced | |
44 with new values.""" | |
45 raise NotImplementedError() | |
46 | |
47 @classmethod | |
48 def from_raw(cls, data): # pragma: no cover | |
49 """Construct an instance of this class from a string.""" | |
50 raise NotImplementedError() | |
51 | |
52 | |
53 ################################################################################ | |
54 # Implementation | |
55 ################################################################################ | |
56 | |
57 class CommitTimestamp(Alterable): | |
58 def __init__(self, secs, sign, hours, mins): | |
59 super(CommitTimestamp, self).__init__() | |
60 assert isinstance(secs, int) | |
61 assert sign in '+-' | |
62 assert 0 <= hours < 24 | |
63 assert 0 <= mins < 60 | |
64 | |
65 self._secs = secs | |
66 self._sign = sign | |
67 self._hours = hours | |
68 self._mins = mins | |
69 | |
70 # Comparison & Representation | |
71 def __eq__(self, other): | |
72 return (self is other) or ( | |
73 isinstance(other, CommitTimestamp) and ( | |
74 self.secs == other.secs and | |
75 self.sign == other.sign and | |
76 self.hours == other.hours and | |
77 self.mins == other.mins | |
78 ) | |
79 ) | |
80 | |
81 def __ne__(self, other): | |
82 return not (self == other) | |
83 | |
84 def __repr__(self): | |
85 return 'CommitTimestamp(%r, %r, %r, %r)' % ( | |
86 self.secs, self.sign, self.hours, self.mins) | |
87 | |
88 def __str__(self): | |
89 return '%s %s' % (self.secs, self.tz_str) | |
90 | |
91 # Accessors | |
92 # pylint: disable=W0212 | |
93 hours = property(lambda self: self._hours) | |
94 mins = property(lambda self: self._mins) | |
95 secs = property(lambda self: self._secs) | |
96 sign = property(lambda self: self._sign) | |
97 | |
98 @property | |
99 def tz_str(self): | |
100 return '%s%02d%02d' % (self.sign, self.hours, self.mins) | |
101 | |
102 # Methods | |
103 def to_dict(self): | |
104 return {k: getattr(self, k) for k in ['secs', 'sign', 'hours', 'mins']} | |
105 | |
106 def alter(self, **kwargs): | |
107 new_args = self.to_dict() | |
108 assert set(new_args).issuperset(kwargs.keys()) | |
109 new_args.update(kwargs) | |
110 return CommitTimestamp(**new_args) | |
111 | |
112 @classmethod | |
113 def from_raw(cls, data): | |
114 # \d+ [+-]HHMM | |
115 secs, tz = data.split(' ') | |
116 return cls(int(secs), tz[0], int(tz[1:3]), int(tz[3:5])) | |
117 | |
118 | |
119 NULL_TIMESTAMP = CommitTimestamp(0, '+', 0, 0) | |
120 | |
121 | |
122 class CommitUser(Alterable): | |
123 def __init__(self, user, email, timestamp): | |
124 super(CommitUser, self).__init__() | |
125 assert isinstance(user, basestring) and user | |
126 assert isinstance(email, basestring) and email | |
127 assert isinstance(timestamp, CommitTimestamp) | |
128 self._user = user | |
129 self._email = email | |
130 self._timestamp = timestamp | |
131 | |
132 # Comparison & Representation | |
133 def __eq__(self, other): | |
134 return (self is other) or ( | |
135 isinstance(other, CommitUser) and ( | |
136 self.user == other.user and | |
137 self.email == other.email and | |
138 self.timestamp == other.timestamp | |
139 ) | |
140 ) | |
141 | |
142 def __ne__(self, other): | |
143 return not (self == other) | |
144 | |
145 def __repr__(self): | |
146 return 'CommitUser(%r, %r, %r)' % (self.user, self.email, self.timestamp) | |
147 | |
148 def __str__(self): | |
149 return '%s <%s> %s' % (self.user, self.email, self.timestamp) | |
150 | |
151 # Accessors | |
152 # pylint: disable=W0212 | |
153 user = property(lambda self: self._user) | |
154 email = property(lambda self: self._email) | |
155 timestamp = property(lambda self: self._timestamp) | |
156 | |
157 # Methods | |
158 def to_dict(self): | |
159 return {k: getattr(self, k) for k in ['user', 'email', 'timestamp']} | |
160 | |
161 def alter(self, **kwargs): | |
162 new_args = self.to_dict() | |
163 assert set(new_args).issuperset(kwargs.keys()) | |
164 new_args.update(kwargs) | |
165 return CommitUser(**new_args) | |
166 | |
167 @classmethod | |
168 def from_raw(cls, data): | |
169 # safe_string() ' <' safe_string() '> ' [TIMESTAMP] | |
170 user, rest = data.split(' <', 1) | |
171 email, rest = rest.split('> ', 1) | |
172 return cls(user, email, CommitTimestamp.from_raw(rest)) | |
173 | |
174 | |
175 class CommitData(Alterable): | |
176 """A workable data representation of a git commit object. | |
177 | |
178 Knows how to parse all the standard fields of a git commit object: | |
179 * tree | |
180 * parent(s) | |
181 * author | |
182 * committer | |
183 * commit message | |
184 | |
185 Also knows how to parse 'footers' which are an informally-defined mechanism to | |
186 append key-value pairs to the ends of commit messages. | |
187 | |
188 Footers are stored internally as a list of (key, value) pairs. This is in | |
189 order to provide full round-trip compatibility for CommitData, since footers | |
190 have no implied ordering, other than the ordering in the commit. Consider the | |
191 footers: | |
192 | |
193 A: 1 | |
194 B: 2 | |
195 A: 3 | |
196 | |
197 In order to represent this as something better than a list which maintains the | |
198 round-trip invariant, we would need a (Frozen)OrderedMultiDict, which would be | |
199 tricky to implement. | |
200 | |
201 Author and committer are treated as the format defined by CommitUser | |
202 """ | |
203 FOOTER_RE = re.compile(r'([-a-zA-Z]+): (.*)') | |
204 HASH_RE = re.compile(r'[0-9a-f]{40}') | |
205 | |
206 def __init__(self, tree, parents, author, committer, other_header_lines, | |
207 message_lines, footer_lines): | |
208 super(CommitData, self).__init__() | |
209 assert all('\n' not in h and self.HASH_RE.match(h) for h in parents) | |
210 assert '\n' not in tree and self.HASH_RE.match(tree) | |
211 assert isinstance(author, CommitUser) | |
212 assert isinstance(committer, CommitUser) | |
213 assert all(isinstance(l, str) for l in message_lines) | |
214 assert all(len(i) == 2 and all(isinstance(x, str) for x in i) | |
215 for i in other_header_lines) | |
216 assert all(len(i) == 2 and all(isinstance(x, str) for x in i) | |
217 for i in footer_lines) | |
218 | |
219 self._parents = freeze(parents) | |
220 self._tree = tree | |
221 self._author = author | |
222 self._committer = committer | |
223 self._other_header_lines = freeze(other_header_lines) | |
224 self._message_lines = freeze(message_lines) | |
225 self._footer_lines = freeze(footer_lines) | |
226 | |
227 # Comparison & Representation | |
228 def __eq__(self, other): | |
229 return (self is other) or ( | |
230 isinstance(other, CommitData) and ( | |
231 self.hsh == other.hsh | |
232 ) | |
233 ) | |
234 | |
235 def __ne__(self, other): | |
236 return not (self == other) | |
237 | |
238 def __repr__(self): | |
239 return ( | |
240 'CommitData({tree!r}, {parents!r}, {author!r}, {committer!r}, ' | |
241 '{other_header_lines!r}, {message_lines!r}, {footer_lines!r})' | |
242 ).format(**self.to_dict()) | |
243 | |
244 def __str__(self): | |
245 """Produces a string representation of this CommitData suitable for | |
246 consumption by `git hash-object`. | |
247 """ | |
248 ret = StringIO() | |
249 print >> ret, 'tree', self.tree | |
250 for parent in self.parents: | |
251 print >> ret, 'parent', parent | |
252 print >> ret, 'author', self.author | |
253 print >> ret, 'committer', self.committer | |
254 for key, value in self.other_header_lines: | |
255 print >> ret, key, value | |
256 print >> ret | |
257 print >> ret, '\n'.join(self.message_lines) | |
258 if self.footer_lines: | |
259 print >> ret | |
260 for key, value in self.footer_lines: | |
261 print >> ret, '%s: %s' % (key, value) | |
262 return ret.getvalue() | |
263 | |
264 # Accessors | |
265 # pylint: disable=W0212 | |
266 author = property(lambda self: self._author) | |
267 committer = property(lambda self: self._committer) | |
268 footer_lines = property(lambda self: self._footer_lines) | |
269 message_lines = property(lambda self: self._message_lines) | |
270 other_header_lines = property(lambda self: self._other_header_lines) | |
271 parents = property(lambda self: self._parents) | |
272 tree = property(lambda self: self._tree) | |
273 | |
274 @cached_property | |
275 def footers(self): | |
276 ret = collections.OrderedDict() | |
277 for key, value in self.footer_lines: | |
278 ret.setdefault(key, []).append(value) | |
279 return freeze(ret) | |
280 | |
281 @cached_property | |
282 def other_headers(self): | |
283 ret = collections.OrderedDict() | |
284 for key, value in self.other_header_lines: | |
285 ret.setdefault(key, []).append(value) | |
286 return freeze(ret) | |
287 | |
288 @cached_property | |
289 def hsh(self): | |
290 return hashlib.sha1(str(self)).hexdigest() | |
291 | |
292 # Methods | |
293 def to_dict(self): | |
294 return { | |
295 k: getattr(self, k) | |
296 for k in ['parents', 'tree', 'author', 'committer', | |
297 'other_header_lines', 'message_lines', 'footer_lines'] | |
298 } | |
299 | |
300 def alter(self, **kwargs): | |
301 """In addition to the normal fields on this class, you may also provide | |
302 'footers' and 'other_headers' instead of 'footer_lines' and | |
303 'other_header_lines' respectively. | |
304 | |
305 These are an OrderedDict, which will be merged into the existing *_lines | |
306 as described by merge_lines. | |
307 """ | |
308 new_args = self.to_dict() | |
309 if 'footers' in kwargs: | |
310 assert 'footer_lines' not in kwargs | |
311 new_args['footer_lines'] = self.merge_lines( | |
312 self.footer_lines, kwargs.pop('footers')) | |
313 if 'other_headers' in kwargs: | |
314 assert 'other_header_lines' not in kwargs | |
315 new_args['other_header_lines'] = self.merge_lines( | |
316 self.other_header_lines, kwargs.pop('other_headers')) | |
317 assert set(new_args).issuperset(kwargs.keys()) | |
318 new_args.update(kwargs) | |
319 return CommitData(**new_args) | |
320 | |
321 @staticmethod | |
322 def merge_lines(old_lines, new_dict): | |
323 """Produces new footer or other_header_lines given the old lines and the | |
324 new dictionary. | |
325 | |
326 Preserves the order of |old_lines| as much as possible. | |
327 | |
328 Rules: | |
329 * If a key is in new_dict, but the key is not in old_lines, the new | |
330 lines are added at the end. | |
331 * If a key is not in new_dict, it is passed through. | |
332 * If a key is equal to None in new_dict, lines with that key are removed. | |
333 * If a key is present in both, all entries in new_dict for that key are | |
334 inserted at the location of the first line in old_lines for that key | |
335 (and any other lines in old_lines with that key are removed). | |
336 | |
337 Args: | |
338 old_lines - a sequence of (key, value) pairs | |
339 new_dict - an OrderedDict of {key: [values]} or {key: None} | |
340 """ | |
341 old_dict = collections.OrderedDict() | |
342 for key, value in old_lines: | |
343 old_dict.setdefault(key, []).append(value) | |
344 | |
345 old_keys = set(old_dict) | |
346 | |
347 del_keys = {k for k, v in new_dict.iteritems() if not v} | |
348 new_keys = ({k for k, v in new_dict.iteritems() if v} | old_keys) - del_keys | |
349 | |
350 # delete keys | |
351 new_lines = [(k, v) for k, v in old_lines if k in new_keys] | |
352 | |
353 for change_key in (new_keys & old_keys): | |
354 insert_idx = None | |
355 to_nuke = set() | |
356 for i, (k, v) in enumerate(new_lines): | |
357 if k == change_key: | |
358 if insert_idx is None: | |
359 insert_idx = i | |
360 to_nuke.add(i) | |
361 assert to_nuke # because it's in old_keys | |
362 new_lines = [(k, v) for i, (k, v) in enumerate(new_lines) | |
363 if i not in to_nuke] | |
364 new_lines[insert_idx:insert_idx] = [ | |
365 (change_key, v) | |
366 for v in new_dict.get(change_key, old_dict[change_key]) | |
367 ] | |
368 | |
369 for add_key in new_dict: # Preserve sort order of new lines | |
370 if add_key in old_keys or add_key in del_keys: | |
371 continue | |
372 new_lines.extend((add_key, v) for v in new_dict[add_key]) | |
373 | |
374 return new_lines | |
375 | |
376 @classmethod | |
377 def from_raw(cls, data): | |
378 """Turns the raw output of `git cat-file commit` into a CommitData.""" | |
379 users = {} | |
380 parents = [] | |
381 tree = None | |
382 hsh_ref = [] | |
383 def hsh_fn(): | |
384 if not hsh_ref: | |
385 hsh_ref.append(hashlib.sha1(data).hexdigest()) | |
386 return hsh_ref[0] | |
387 | |
388 if data[-1:] != '\n': | |
389 raise PartialCommit(hsh_fn(), data) | |
390 | |
391 i = 0 | |
392 raw_lines = data.splitlines() | |
393 other_header_lines = [] | |
394 for line in raw_lines: | |
395 if not line: | |
396 break | |
397 header, data = line.split(' ', 1) | |
398 if header == 'parent': | |
399 parents.append(data) | |
400 elif header in ('author', 'committer'): | |
401 if header in users: | |
402 raise UnexpectedHeader(hsh_fn(), header, data) | |
403 users[header] = CommitUser.from_raw(data) | |
404 elif header == 'tree': | |
405 if tree: | |
406 raise UnexpectedHeader(hsh_fn(), header, data) | |
407 tree = data | |
408 else: | |
409 LOGGER.warn('Unexpected header in git commit %r: %r -> %r', | |
410 hsh_fn(), header, data) | |
411 other_header_lines.append((header, data)) | |
412 i += 1 | |
413 | |
414 raw_message = raw_lines[i+1:] | |
415 | |
416 # footers are lines in the form: | |
417 # ...message... | |
418 # <empty line> | |
419 # foo: data | |
420 # bar: other data | |
421 # ... | |
422 # | |
423 # If no empty line is found, they're considered not to exist. | |
424 # If one line in the footers doesn't match the 'key: value' format, none | |
425 # of the footers are considered to exist. | |
426 message_lines = raw_message | |
427 footer_lines = [] | |
428 | |
429 i = 0 | |
430 for line in reversed(raw_message): | |
431 if not line: | |
432 message_lines = raw_message[:-(i+1)] | |
433 break | |
434 | |
435 m = cls.FOOTER_RE.match(line) | |
436 if m: | |
437 footer_lines.append((m.group(1), m.group(2))) | |
438 else: | |
439 if i: | |
440 footer_lines = [] | |
441 LOGGER.warn('Malformed footers') | |
442 break | |
443 i += 1 | |
444 else: | |
445 LOGGER.warn('Footers comprise entire message') | |
446 message_lines = [] | |
447 | |
448 footer_lines.reverse() | |
449 | |
450 if not tree or set(('author', 'committer')).difference(users.keys()): | |
451 raise PartialCommit(hsh_fn(), data) | |
452 | |
453 return cls(tree, parents, users['author'], users['committer'], | |
454 other_header_lines, message_lines, footer_lines) | |
OLD | NEW |