Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(151)

Side by Side Diff: infra/services/gnumbd/support/data.py

Issue 355153002: Refactor infra git libs and testing. (Closed) Base URL: https://chromium.googlesource.com/infra/infra@fake_testing_support
Patch Set: Change config ref to have a sandard naming scheme Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4 import collections
5 import hashlib
6 import logging
7 import re
8
9 from cStringIO import StringIO
10
11 from infra.services.gnumbd.support.util import cached_property, freeze
12
13 LOGGER = logging.getLogger(__name__)
14
15 ################################################################################
16 # Exceptions
17 ################################################################################
18
19 class PartialCommit(Exception):
20 def __init__(self, hsh, raw):
21 super(PartialCommit, self).__init__(
22 'Commit %s has partial content: %r' % (hsh, raw))
23 self.raw = raw
24
25
26 class UnexpectedHeader(Exception):
27 def __init__(self, hsh, header, value):
28 super(UnexpectedHeader, self).__init__(
29 'Unexpected header in commit %s: %r -> %r' % (hsh, header, value))
30
31
32 ################################################################################
33 # Base Class
34 ################################################################################
35
36 class Alterable(object):
37 def to_dict(self): # pragma: no cover
38 """The shallow dictionary representation of this object (i.e. the dictionary
39 may contain Alterable instances as values)."""
40 raise NotImplementedError()
41
42 def alter(self, **kwargs): # pragma: no cover
43 """Returns a copy of self, except with the fields listed in kwargs replaced
44 with new values."""
45 raise NotImplementedError()
46
47 @classmethod
48 def from_raw(cls, data): # pragma: no cover
49 """Construct an instance of this class from a string."""
50 raise NotImplementedError()
51
52
53 ################################################################################
54 # Implementation
55 ################################################################################
56
57 class CommitTimestamp(Alterable):
58 def __init__(self, secs, sign, hours, mins):
59 super(CommitTimestamp, self).__init__()
60 assert isinstance(secs, int)
61 assert sign in '+-'
62 assert 0 <= hours < 24
63 assert 0 <= mins < 60
64
65 self._secs = secs
66 self._sign = sign
67 self._hours = hours
68 self._mins = mins
69
70 # Comparison & Representation
71 def __eq__(self, other):
72 return (self is other) or (
73 isinstance(other, CommitTimestamp) and (
74 self.secs == other.secs and
75 self.sign == other.sign and
76 self.hours == other.hours and
77 self.mins == other.mins
78 )
79 )
80
81 def __ne__(self, other):
82 return not (self == other)
83
84 def __repr__(self):
85 return 'CommitTimestamp(%r, %r, %r, %r)' % (
86 self.secs, self.sign, self.hours, self.mins)
87
88 def __str__(self):
89 return '%s %s' % (self.secs, self.tz_str)
90
91 # Accessors
92 # pylint: disable=W0212
93 hours = property(lambda self: self._hours)
94 mins = property(lambda self: self._mins)
95 secs = property(lambda self: self._secs)
96 sign = property(lambda self: self._sign)
97
98 @property
99 def tz_str(self):
100 return '%s%02d%02d' % (self.sign, self.hours, self.mins)
101
102 # Methods
103 def to_dict(self):
104 return {k: getattr(self, k) for k in ['secs', 'sign', 'hours', 'mins']}
105
106 def alter(self, **kwargs):
107 new_args = self.to_dict()
108 assert set(new_args).issuperset(kwargs.keys())
109 new_args.update(kwargs)
110 return CommitTimestamp(**new_args)
111
112 @classmethod
113 def from_raw(cls, data):
114 # \d+ [+-]HHMM
115 secs, tz = data.split(' ')
116 return cls(int(secs), tz[0], int(tz[1:3]), int(tz[3:5]))
117
118
119 NULL_TIMESTAMP = CommitTimestamp(0, '+', 0, 0)
120
121
122 class CommitUser(Alterable):
123 def __init__(self, user, email, timestamp):
124 super(CommitUser, self).__init__()
125 assert isinstance(user, basestring) and user
126 assert isinstance(email, basestring) and email
127 assert isinstance(timestamp, CommitTimestamp)
128 self._user = user
129 self._email = email
130 self._timestamp = timestamp
131
132 # Comparison & Representation
133 def __eq__(self, other):
134 return (self is other) or (
135 isinstance(other, CommitUser) and (
136 self.user == other.user and
137 self.email == other.email and
138 self.timestamp == other.timestamp
139 )
140 )
141
142 def __ne__(self, other):
143 return not (self == other)
144
145 def __repr__(self):
146 return 'CommitUser(%r, %r, %r)' % (self.user, self.email, self.timestamp)
147
148 def __str__(self):
149 return '%s <%s> %s' % (self.user, self.email, self.timestamp)
150
151 # Accessors
152 # pylint: disable=W0212
153 user = property(lambda self: self._user)
154 email = property(lambda self: self._email)
155 timestamp = property(lambda self: self._timestamp)
156
157 # Methods
158 def to_dict(self):
159 return {k: getattr(self, k) for k in ['user', 'email', 'timestamp']}
160
161 def alter(self, **kwargs):
162 new_args = self.to_dict()
163 assert set(new_args).issuperset(kwargs.keys())
164 new_args.update(kwargs)
165 return CommitUser(**new_args)
166
167 @classmethod
168 def from_raw(cls, data):
169 # safe_string() ' <' safe_string() '> ' [TIMESTAMP]
170 user, rest = data.split(' <', 1)
171 email, rest = rest.split('> ', 1)
172 return cls(user, email, CommitTimestamp.from_raw(rest))
173
174
175 class CommitData(Alterable):
176 """A workable data representation of a git commit object.
177
178 Knows how to parse all the standard fields of a git commit object:
179 * tree
180 * parent(s)
181 * author
182 * committer
183 * commit message
184
185 Also knows how to parse 'footers' which are an informally-defined mechanism to
186 append key-value pairs to the ends of commit messages.
187
188 Footers are stored internally as a list of (key, value) pairs. This is in
189 order to provide full round-trip compatibility for CommitData, since footers
190 have no implied ordering, other than the ordering in the commit. Consider the
191 footers:
192
193 A: 1
194 B: 2
195 A: 3
196
197 In order to represent this as something better than a list which maintains the
198 round-trip invariant, we would need a (Frozen)OrderedMultiDict, which would be
199 tricky to implement.
200
201 Author and committer are treated as the format defined by CommitUser
202 """
203 FOOTER_RE = re.compile(r'([-a-zA-Z]+): (.*)')
204 HASH_RE = re.compile(r'[0-9a-f]{40}')
205
206 def __init__(self, tree, parents, author, committer, other_header_lines,
207 message_lines, footer_lines):
208 super(CommitData, self).__init__()
209 assert all('\n' not in h and self.HASH_RE.match(h) for h in parents)
210 assert '\n' not in tree and self.HASH_RE.match(tree)
211 assert isinstance(author, CommitUser)
212 assert isinstance(committer, CommitUser)
213 assert all(isinstance(l, str) for l in message_lines)
214 assert all(len(i) == 2 and all(isinstance(x, str) for x in i)
215 for i in other_header_lines)
216 assert all(len(i) == 2 and all(isinstance(x, str) for x in i)
217 for i in footer_lines)
218
219 self._parents = freeze(parents)
220 self._tree = tree
221 self._author = author
222 self._committer = committer
223 self._other_header_lines = freeze(other_header_lines)
224 self._message_lines = freeze(message_lines)
225 self._footer_lines = freeze(footer_lines)
226
227 # Comparison & Representation
228 def __eq__(self, other):
229 return (self is other) or (
230 isinstance(other, CommitData) and (
231 self.hsh == other.hsh
232 )
233 )
234
235 def __ne__(self, other):
236 return not (self == other)
237
238 def __repr__(self):
239 return (
240 'CommitData({tree!r}, {parents!r}, {author!r}, {committer!r}, '
241 '{other_header_lines!r}, {message_lines!r}, {footer_lines!r})'
242 ).format(**self.to_dict())
243
244 def __str__(self):
245 """Produces a string representation of this CommitData suitable for
246 consumption by `git hash-object`.
247 """
248 ret = StringIO()
249 print >> ret, 'tree', self.tree
250 for parent in self.parents:
251 print >> ret, 'parent', parent
252 print >> ret, 'author', self.author
253 print >> ret, 'committer', self.committer
254 for key, value in self.other_header_lines:
255 print >> ret, key, value
256 print >> ret
257 print >> ret, '\n'.join(self.message_lines)
258 if self.footer_lines:
259 print >> ret
260 for key, value in self.footer_lines:
261 print >> ret, '%s: %s' % (key, value)
262 return ret.getvalue()
263
264 # Accessors
265 # pylint: disable=W0212
266 author = property(lambda self: self._author)
267 committer = property(lambda self: self._committer)
268 footer_lines = property(lambda self: self._footer_lines)
269 message_lines = property(lambda self: self._message_lines)
270 other_header_lines = property(lambda self: self._other_header_lines)
271 parents = property(lambda self: self._parents)
272 tree = property(lambda self: self._tree)
273
274 @cached_property
275 def footers(self):
276 ret = collections.OrderedDict()
277 for key, value in self.footer_lines:
278 ret.setdefault(key, []).append(value)
279 return freeze(ret)
280
281 @cached_property
282 def other_headers(self):
283 ret = collections.OrderedDict()
284 for key, value in self.other_header_lines:
285 ret.setdefault(key, []).append(value)
286 return freeze(ret)
287
288 @cached_property
289 def hsh(self):
290 return hashlib.sha1(str(self)).hexdigest()
291
292 # Methods
293 def to_dict(self):
294 return {
295 k: getattr(self, k)
296 for k in ['parents', 'tree', 'author', 'committer',
297 'other_header_lines', 'message_lines', 'footer_lines']
298 }
299
300 def alter(self, **kwargs):
301 """In addition to the normal fields on this class, you may also provide
302 'footers' and 'other_headers' instead of 'footer_lines' and
303 'other_header_lines' respectively.
304
305 These are an OrderedDict, which will be merged into the existing *_lines
306 as described by merge_lines.
307 """
308 new_args = self.to_dict()
309 if 'footers' in kwargs:
310 assert 'footer_lines' not in kwargs
311 new_args['footer_lines'] = self.merge_lines(
312 self.footer_lines, kwargs.pop('footers'))
313 if 'other_headers' in kwargs:
314 assert 'other_header_lines' not in kwargs
315 new_args['other_header_lines'] = self.merge_lines(
316 self.other_header_lines, kwargs.pop('other_headers'))
317 assert set(new_args).issuperset(kwargs.keys())
318 new_args.update(kwargs)
319 return CommitData(**new_args)
320
321 @staticmethod
322 def merge_lines(old_lines, new_dict):
323 """Produces new footer or other_header_lines given the old lines and the
324 new dictionary.
325
326 Preserves the order of |old_lines| as much as possible.
327
328 Rules:
329 * If a key is in new_dict, but the key is not in old_lines, the new
330 lines are added at the end.
331 * If a key is not in new_dict, it is passed through.
332 * If a key is equal to None in new_dict, lines with that key are removed.
333 * If a key is present in both, all entries in new_dict for that key are
334 inserted at the location of the first line in old_lines for that key
335 (and any other lines in old_lines with that key are removed).
336
337 Args:
338 old_lines - a sequence of (key, value) pairs
339 new_dict - an OrderedDict of {key: [values]} or {key: None}
340 """
341 old_dict = collections.OrderedDict()
342 for key, value in old_lines:
343 old_dict.setdefault(key, []).append(value)
344
345 old_keys = set(old_dict)
346
347 del_keys = {k for k, v in new_dict.iteritems() if not v}
348 new_keys = ({k for k, v in new_dict.iteritems() if v} | old_keys) - del_keys
349
350 # delete keys
351 new_lines = [(k, v) for k, v in old_lines if k in new_keys]
352
353 for change_key in (new_keys & old_keys):
354 insert_idx = None
355 to_nuke = set()
356 for i, (k, v) in enumerate(new_lines):
357 if k == change_key:
358 if insert_idx is None:
359 insert_idx = i
360 to_nuke.add(i)
361 assert to_nuke # because it's in old_keys
362 new_lines = [(k, v) for i, (k, v) in enumerate(new_lines)
363 if i not in to_nuke]
364 new_lines[insert_idx:insert_idx] = [
365 (change_key, v)
366 for v in new_dict.get(change_key, old_dict[change_key])
367 ]
368
369 for add_key in new_dict: # Preserve sort order of new lines
370 if add_key in old_keys or add_key in del_keys:
371 continue
372 new_lines.extend((add_key, v) for v in new_dict[add_key])
373
374 return new_lines
375
376 @classmethod
377 def from_raw(cls, data):
378 """Turns the raw output of `git cat-file commit` into a CommitData."""
379 users = {}
380 parents = []
381 tree = None
382 hsh_ref = []
383 def hsh_fn():
384 if not hsh_ref:
385 hsh_ref.append(hashlib.sha1(data).hexdigest())
386 return hsh_ref[0]
387
388 if data[-1:] != '\n':
389 raise PartialCommit(hsh_fn(), data)
390
391 i = 0
392 raw_lines = data.splitlines()
393 other_header_lines = []
394 for line in raw_lines:
395 if not line:
396 break
397 header, data = line.split(' ', 1)
398 if header == 'parent':
399 parents.append(data)
400 elif header in ('author', 'committer'):
401 if header in users:
402 raise UnexpectedHeader(hsh_fn(), header, data)
403 users[header] = CommitUser.from_raw(data)
404 elif header == 'tree':
405 if tree:
406 raise UnexpectedHeader(hsh_fn(), header, data)
407 tree = data
408 else:
409 LOGGER.warn('Unexpected header in git commit %r: %r -> %r',
410 hsh_fn(), header, data)
411 other_header_lines.append((header, data))
412 i += 1
413
414 raw_message = raw_lines[i+1:]
415
416 # footers are lines in the form:
417 # ...message...
418 # <empty line>
419 # foo: data
420 # bar: other data
421 # ...
422 #
423 # If no empty line is found, they're considered not to exist.
424 # If one line in the footers doesn't match the 'key: value' format, none
425 # of the footers are considered to exist.
426 message_lines = raw_message
427 footer_lines = []
428
429 i = 0
430 for line in reversed(raw_message):
431 if not line:
432 message_lines = raw_message[:-(i+1)]
433 break
434
435 m = cls.FOOTER_RE.match(line)
436 if m:
437 footer_lines.append((m.group(1), m.group(2)))
438 else:
439 if i:
440 footer_lines = []
441 LOGGER.warn('Malformed footers')
442 break
443 i += 1
444 else:
445 LOGGER.warn('Footers comprise entire message')
446 message_lines = []
447
448 footer_lines.reverse()
449
450 if not tree or set(('author', 'committer')).difference(users.keys()):
451 raise PartialCommit(hsh_fn(), data)
452
453 return cls(tree, parents, users['author'], users['committer'],
454 other_header_lines, message_lines, footer_lines)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698