Index: infra/services/gnumbd/support/data.py |
diff --git a/infra/services/gnumbd/support/data.py b/infra/services/gnumbd/support/data.py |
deleted file mode 100644 |
index 4a3a6c3e23f5d0d28a46861805559407d3b3b25b..0000000000000000000000000000000000000000 |
--- a/infra/services/gnumbd/support/data.py |
+++ /dev/null |
@@ -1,454 +0,0 @@ |
-# Copyright 2014 The Chromium Authors. All rights reserved. |
-# Use of this source code is governed by a BSD-style license that can be |
-# found in the LICENSE file. |
-import collections |
-import hashlib |
-import logging |
-import re |
- |
-from cStringIO import StringIO |
- |
-from infra.services.gnumbd.support.util import cached_property, freeze |
- |
-LOGGER = logging.getLogger(__name__) |
- |
-################################################################################ |
-# Exceptions |
-################################################################################ |
- |
-class PartialCommit(Exception): |
- def __init__(self, hsh, raw): |
- super(PartialCommit, self).__init__( |
- 'Commit %s has partial content: %r' % (hsh, raw)) |
- self.raw = raw |
- |
- |
-class UnexpectedHeader(Exception): |
- def __init__(self, hsh, header, value): |
- super(UnexpectedHeader, self).__init__( |
- 'Unexpected header in commit %s: %r -> %r' % (hsh, header, value)) |
- |
- |
-################################################################################ |
-# Base Class |
-################################################################################ |
- |
-class Alterable(object): |
- def to_dict(self): # pragma: no cover |
- """The shallow dictionary representation of this object (i.e. the dictionary |
- may contain Alterable instances as values).""" |
- raise NotImplementedError() |
- |
- def alter(self, **kwargs): # pragma: no cover |
- """Returns a copy of self, except with the fields listed in kwargs replaced |
- with new values.""" |
- raise NotImplementedError() |
- |
- @classmethod |
- def from_raw(cls, data): # pragma: no cover |
- """Construct an instance of this class from a string.""" |
- raise NotImplementedError() |
- |
- |
-################################################################################ |
-# Implementation |
-################################################################################ |
- |
-class CommitTimestamp(Alterable): |
- def __init__(self, secs, sign, hours, mins): |
- super(CommitTimestamp, self).__init__() |
- assert isinstance(secs, int) |
- assert sign in '+-' |
- assert 0 <= hours < 24 |
- assert 0 <= mins < 60 |
- |
- self._secs = secs |
- self._sign = sign |
- self._hours = hours |
- self._mins = mins |
- |
- # Comparison & Representation |
- def __eq__(self, other): |
- return (self is other) or ( |
- isinstance(other, CommitTimestamp) and ( |
- self.secs == other.secs and |
- self.sign == other.sign and |
- self.hours == other.hours and |
- self.mins == other.mins |
- ) |
- ) |
- |
- def __ne__(self, other): |
- return not (self == other) |
- |
- def __repr__(self): |
- return 'CommitTimestamp(%r, %r, %r, %r)' % ( |
- self.secs, self.sign, self.hours, self.mins) |
- |
- def __str__(self): |
- return '%s %s' % (self.secs, self.tz_str) |
- |
- # Accessors |
- # pylint: disable=W0212 |
- hours = property(lambda self: self._hours) |
- mins = property(lambda self: self._mins) |
- secs = property(lambda self: self._secs) |
- sign = property(lambda self: self._sign) |
- |
- @property |
- def tz_str(self): |
- return '%s%02d%02d' % (self.sign, self.hours, self.mins) |
- |
- # Methods |
- def to_dict(self): |
- return {k: getattr(self, k) for k in ['secs', 'sign', 'hours', 'mins']} |
- |
- def alter(self, **kwargs): |
- new_args = self.to_dict() |
- assert set(new_args).issuperset(kwargs.keys()) |
- new_args.update(kwargs) |
- return CommitTimestamp(**new_args) |
- |
- @classmethod |
- def from_raw(cls, data): |
- # \d+ [+-]HHMM |
- secs, tz = data.split(' ') |
- return cls(int(secs), tz[0], int(tz[1:3]), int(tz[3:5])) |
- |
- |
-NULL_TIMESTAMP = CommitTimestamp(0, '+', 0, 0) |
- |
- |
-class CommitUser(Alterable): |
- def __init__(self, user, email, timestamp): |
- super(CommitUser, self).__init__() |
- assert isinstance(user, basestring) and user |
- assert isinstance(email, basestring) and email |
- assert isinstance(timestamp, CommitTimestamp) |
- self._user = user |
- self._email = email |
- self._timestamp = timestamp |
- |
- # Comparison & Representation |
- def __eq__(self, other): |
- return (self is other) or ( |
- isinstance(other, CommitUser) and ( |
- self.user == other.user and |
- self.email == other.email and |
- self.timestamp == other.timestamp |
- ) |
- ) |
- |
- def __ne__(self, other): |
- return not (self == other) |
- |
- def __repr__(self): |
- return 'CommitUser(%r, %r, %r)' % (self.user, self.email, self.timestamp) |
- |
- def __str__(self): |
- return '%s <%s> %s' % (self.user, self.email, self.timestamp) |
- |
- # Accessors |
- # pylint: disable=W0212 |
- user = property(lambda self: self._user) |
- email = property(lambda self: self._email) |
- timestamp = property(lambda self: self._timestamp) |
- |
- # Methods |
- def to_dict(self): |
- return {k: getattr(self, k) for k in ['user', 'email', 'timestamp']} |
- |
- def alter(self, **kwargs): |
- new_args = self.to_dict() |
- assert set(new_args).issuperset(kwargs.keys()) |
- new_args.update(kwargs) |
- return CommitUser(**new_args) |
- |
- @classmethod |
- def from_raw(cls, data): |
- # safe_string() ' <' safe_string() '> ' [TIMESTAMP] |
- user, rest = data.split(' <', 1) |
- email, rest = rest.split('> ', 1) |
- return cls(user, email, CommitTimestamp.from_raw(rest)) |
- |
- |
-class CommitData(Alterable): |
- """A workable data representation of a git commit object. |
- |
- Knows how to parse all the standard fields of a git commit object: |
- * tree |
- * parent(s) |
- * author |
- * committer |
- * commit message |
- |
- Also knows how to parse 'footers' which are an informally-defined mechanism to |
- append key-value pairs to the ends of commit messages. |
- |
- Footers are stored internally as a list of (key, value) pairs. This is in |
- order to provide full round-trip compatibility for CommitData, since footers |
- have no implied ordering, other than the ordering in the commit. Consider the |
- footers: |
- |
- A: 1 |
- B: 2 |
- A: 3 |
- |
- In order to represent this as something better than a list which maintains the |
- round-trip invariant, we would need a (Frozen)OrderedMultiDict, which would be |
- tricky to implement. |
- |
- Author and committer are treated as the format defined by CommitUser |
- """ |
- FOOTER_RE = re.compile(r'([-a-zA-Z]+): (.*)') |
- HASH_RE = re.compile(r'[0-9a-f]{40}') |
- |
- def __init__(self, tree, parents, author, committer, other_header_lines, |
- message_lines, footer_lines): |
- super(CommitData, self).__init__() |
- assert all('\n' not in h and self.HASH_RE.match(h) for h in parents) |
- assert '\n' not in tree and self.HASH_RE.match(tree) |
- assert isinstance(author, CommitUser) |
- assert isinstance(committer, CommitUser) |
- assert all(isinstance(l, str) for l in message_lines) |
- assert all(len(i) == 2 and all(isinstance(x, str) for x in i) |
- for i in other_header_lines) |
- assert all(len(i) == 2 and all(isinstance(x, str) for x in i) |
- for i in footer_lines) |
- |
- self._parents = freeze(parents) |
- self._tree = tree |
- self._author = author |
- self._committer = committer |
- self._other_header_lines = freeze(other_header_lines) |
- self._message_lines = freeze(message_lines) |
- self._footer_lines = freeze(footer_lines) |
- |
- # Comparison & Representation |
- def __eq__(self, other): |
- return (self is other) or ( |
- isinstance(other, CommitData) and ( |
- self.hsh == other.hsh |
- ) |
- ) |
- |
- def __ne__(self, other): |
- return not (self == other) |
- |
- def __repr__(self): |
- return ( |
- 'CommitData({tree!r}, {parents!r}, {author!r}, {committer!r}, ' |
- '{other_header_lines!r}, {message_lines!r}, {footer_lines!r})' |
- ).format(**self.to_dict()) |
- |
- def __str__(self): |
- """Produces a string representation of this CommitData suitable for |
- consumption by `git hash-object`. |
- """ |
- ret = StringIO() |
- print >> ret, 'tree', self.tree |
- for parent in self.parents: |
- print >> ret, 'parent', parent |
- print >> ret, 'author', self.author |
- print >> ret, 'committer', self.committer |
- for key, value in self.other_header_lines: |
- print >> ret, key, value |
- print >> ret |
- print >> ret, '\n'.join(self.message_lines) |
- if self.footer_lines: |
- print >> ret |
- for key, value in self.footer_lines: |
- print >> ret, '%s: %s' % (key, value) |
- return ret.getvalue() |
- |
- # Accessors |
- # pylint: disable=W0212 |
- author = property(lambda self: self._author) |
- committer = property(lambda self: self._committer) |
- footer_lines = property(lambda self: self._footer_lines) |
- message_lines = property(lambda self: self._message_lines) |
- other_header_lines = property(lambda self: self._other_header_lines) |
- parents = property(lambda self: self._parents) |
- tree = property(lambda self: self._tree) |
- |
- @cached_property |
- def footers(self): |
- ret = collections.OrderedDict() |
- for key, value in self.footer_lines: |
- ret.setdefault(key, []).append(value) |
- return freeze(ret) |
- |
- @cached_property |
- def other_headers(self): |
- ret = collections.OrderedDict() |
- for key, value in self.other_header_lines: |
- ret.setdefault(key, []).append(value) |
- return freeze(ret) |
- |
- @cached_property |
- def hsh(self): |
- return hashlib.sha1(str(self)).hexdigest() |
- |
- # Methods |
- def to_dict(self): |
- return { |
- k: getattr(self, k) |
- for k in ['parents', 'tree', 'author', 'committer', |
- 'other_header_lines', 'message_lines', 'footer_lines'] |
- } |
- |
- def alter(self, **kwargs): |
- """In addition to the normal fields on this class, you may also provide |
- 'footers' and 'other_headers' instead of 'footer_lines' and |
- 'other_header_lines' respectively. |
- |
- These are an OrderedDict, which will be merged into the existing *_lines |
- as described by merge_lines. |
- """ |
- new_args = self.to_dict() |
- if 'footers' in kwargs: |
- assert 'footer_lines' not in kwargs |
- new_args['footer_lines'] = self.merge_lines( |
- self.footer_lines, kwargs.pop('footers')) |
- if 'other_headers' in kwargs: |
- assert 'other_header_lines' not in kwargs |
- new_args['other_header_lines'] = self.merge_lines( |
- self.other_header_lines, kwargs.pop('other_headers')) |
- assert set(new_args).issuperset(kwargs.keys()) |
- new_args.update(kwargs) |
- return CommitData(**new_args) |
- |
- @staticmethod |
- def merge_lines(old_lines, new_dict): |
- """Produces new footer or other_header_lines given the old lines and the |
- new dictionary. |
- |
- Preserves the order of |old_lines| as much as possible. |
- |
- Rules: |
- * If a key is in new_dict, but the key is not in old_lines, the new |
- lines are added at the end. |
- * If a key is not in new_dict, it is passed through. |
- * If a key is equal to None in new_dict, lines with that key are removed. |
- * If a key is present in both, all entries in new_dict for that key are |
- inserted at the location of the first line in old_lines for that key |
- (and any other lines in old_lines with that key are removed). |
- |
- Args: |
- old_lines - a sequence of (key, value) pairs |
- new_dict - an OrderedDict of {key: [values]} or {key: None} |
- """ |
- old_dict = collections.OrderedDict() |
- for key, value in old_lines: |
- old_dict.setdefault(key, []).append(value) |
- |
- old_keys = set(old_dict) |
- |
- del_keys = {k for k, v in new_dict.iteritems() if not v} |
- new_keys = ({k for k, v in new_dict.iteritems() if v} | old_keys) - del_keys |
- |
- # delete keys |
- new_lines = [(k, v) for k, v in old_lines if k in new_keys] |
- |
- for change_key in (new_keys & old_keys): |
- insert_idx = None |
- to_nuke = set() |
- for i, (k, v) in enumerate(new_lines): |
- if k == change_key: |
- if insert_idx is None: |
- insert_idx = i |
- to_nuke.add(i) |
- assert to_nuke # because it's in old_keys |
- new_lines = [(k, v) for i, (k, v) in enumerate(new_lines) |
- if i not in to_nuke] |
- new_lines[insert_idx:insert_idx] = [ |
- (change_key, v) |
- for v in new_dict.get(change_key, old_dict[change_key]) |
- ] |
- |
- for add_key in new_dict: # Preserve sort order of new lines |
- if add_key in old_keys or add_key in del_keys: |
- continue |
- new_lines.extend((add_key, v) for v in new_dict[add_key]) |
- |
- return new_lines |
- |
- @classmethod |
- def from_raw(cls, data): |
- """Turns the raw output of `git cat-file commit` into a CommitData.""" |
- users = {} |
- parents = [] |
- tree = None |
- hsh_ref = [] |
- def hsh_fn(): |
- if not hsh_ref: |
- hsh_ref.append(hashlib.sha1(data).hexdigest()) |
- return hsh_ref[0] |
- |
- if data[-1:] != '\n': |
- raise PartialCommit(hsh_fn(), data) |
- |
- i = 0 |
- raw_lines = data.splitlines() |
- other_header_lines = [] |
- for line in raw_lines: |
- if not line: |
- break |
- header, data = line.split(' ', 1) |
- if header == 'parent': |
- parents.append(data) |
- elif header in ('author', 'committer'): |
- if header in users: |
- raise UnexpectedHeader(hsh_fn(), header, data) |
- users[header] = CommitUser.from_raw(data) |
- elif header == 'tree': |
- if tree: |
- raise UnexpectedHeader(hsh_fn(), header, data) |
- tree = data |
- else: |
- LOGGER.warn('Unexpected header in git commit %r: %r -> %r', |
- hsh_fn(), header, data) |
- other_header_lines.append((header, data)) |
- i += 1 |
- |
- raw_message = raw_lines[i+1:] |
- |
- # footers are lines in the form: |
- # ...message... |
- # <empty line> |
- # foo: data |
- # bar: other data |
- # ... |
- # |
- # If no empty line is found, they're considered not to exist. |
- # If one line in the footers doesn't match the 'key: value' format, none |
- # of the footers are considered to exist. |
- message_lines = raw_message |
- footer_lines = [] |
- |
- i = 0 |
- for line in reversed(raw_message): |
- if not line: |
- message_lines = raw_message[:-(i+1)] |
- break |
- |
- m = cls.FOOTER_RE.match(line) |
- if m: |
- footer_lines.append((m.group(1), m.group(2))) |
- else: |
- if i: |
- footer_lines = [] |
- LOGGER.warn('Malformed footers') |
- break |
- i += 1 |
- else: |
- LOGGER.warn('Footers comprise entire message') |
- message_lines = [] |
- |
- footer_lines.reverse() |
- |
- if not tree or set(('author', 'committer')).difference(users.keys()): |
- raise PartialCommit(hsh_fn(), data) |
- |
- return cls(tree, parents, users['author'], users['committer'], |
- other_header_lines, message_lines, footer_lines) |