Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(28)

Unified Diff: infra/services/gnumbd/support/data.py

Issue 355153002: Refactor infra git libs and testing. (Closed) Base URL: https://chromium.googlesource.com/infra/infra@fake_testing_support
Patch Set: Change config ref to have a sandard naming scheme Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: infra/services/gnumbd/support/data.py
diff --git a/infra/services/gnumbd/support/data.py b/infra/services/gnumbd/support/data.py
deleted file mode 100644
index 4a3a6c3e23f5d0d28a46861805559407d3b3b25b..0000000000000000000000000000000000000000
--- a/infra/services/gnumbd/support/data.py
+++ /dev/null
@@ -1,454 +0,0 @@
-# Copyright 2014 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-import collections
-import hashlib
-import logging
-import re
-
-from cStringIO import StringIO
-
-from infra.services.gnumbd.support.util import cached_property, freeze
-
-LOGGER = logging.getLogger(__name__)
-
-################################################################################
-# Exceptions
-################################################################################
-
-class PartialCommit(Exception):
- def __init__(self, hsh, raw):
- super(PartialCommit, self).__init__(
- 'Commit %s has partial content: %r' % (hsh, raw))
- self.raw = raw
-
-
-class UnexpectedHeader(Exception):
- def __init__(self, hsh, header, value):
- super(UnexpectedHeader, self).__init__(
- 'Unexpected header in commit %s: %r -> %r' % (hsh, header, value))
-
-
-################################################################################
-# Base Class
-################################################################################
-
-class Alterable(object):
- def to_dict(self): # pragma: no cover
- """The shallow dictionary representation of this object (i.e. the dictionary
- may contain Alterable instances as values)."""
- raise NotImplementedError()
-
- def alter(self, **kwargs): # pragma: no cover
- """Returns a copy of self, except with the fields listed in kwargs replaced
- with new values."""
- raise NotImplementedError()
-
- @classmethod
- def from_raw(cls, data): # pragma: no cover
- """Construct an instance of this class from a string."""
- raise NotImplementedError()
-
-
-################################################################################
-# Implementation
-################################################################################
-
-class CommitTimestamp(Alterable):
- def __init__(self, secs, sign, hours, mins):
- super(CommitTimestamp, self).__init__()
- assert isinstance(secs, int)
- assert sign in '+-'
- assert 0 <= hours < 24
- assert 0 <= mins < 60
-
- self._secs = secs
- self._sign = sign
- self._hours = hours
- self._mins = mins
-
- # Comparison & Representation
- def __eq__(self, other):
- return (self is other) or (
- isinstance(other, CommitTimestamp) and (
- self.secs == other.secs and
- self.sign == other.sign and
- self.hours == other.hours and
- self.mins == other.mins
- )
- )
-
- def __ne__(self, other):
- return not (self == other)
-
- def __repr__(self):
- return 'CommitTimestamp(%r, %r, %r, %r)' % (
- self.secs, self.sign, self.hours, self.mins)
-
- def __str__(self):
- return '%s %s' % (self.secs, self.tz_str)
-
- # Accessors
- # pylint: disable=W0212
- hours = property(lambda self: self._hours)
- mins = property(lambda self: self._mins)
- secs = property(lambda self: self._secs)
- sign = property(lambda self: self._sign)
-
- @property
- def tz_str(self):
- return '%s%02d%02d' % (self.sign, self.hours, self.mins)
-
- # Methods
- def to_dict(self):
- return {k: getattr(self, k) for k in ['secs', 'sign', 'hours', 'mins']}
-
- def alter(self, **kwargs):
- new_args = self.to_dict()
- assert set(new_args).issuperset(kwargs.keys())
- new_args.update(kwargs)
- return CommitTimestamp(**new_args)
-
- @classmethod
- def from_raw(cls, data):
- # \d+ [+-]HHMM
- secs, tz = data.split(' ')
- return cls(int(secs), tz[0], int(tz[1:3]), int(tz[3:5]))
-
-
-NULL_TIMESTAMP = CommitTimestamp(0, '+', 0, 0)
-
-
-class CommitUser(Alterable):
- def __init__(self, user, email, timestamp):
- super(CommitUser, self).__init__()
- assert isinstance(user, basestring) and user
- assert isinstance(email, basestring) and email
- assert isinstance(timestamp, CommitTimestamp)
- self._user = user
- self._email = email
- self._timestamp = timestamp
-
- # Comparison & Representation
- def __eq__(self, other):
- return (self is other) or (
- isinstance(other, CommitUser) and (
- self.user == other.user and
- self.email == other.email and
- self.timestamp == other.timestamp
- )
- )
-
- def __ne__(self, other):
- return not (self == other)
-
- def __repr__(self):
- return 'CommitUser(%r, %r, %r)' % (self.user, self.email, self.timestamp)
-
- def __str__(self):
- return '%s <%s> %s' % (self.user, self.email, self.timestamp)
-
- # Accessors
- # pylint: disable=W0212
- user = property(lambda self: self._user)
- email = property(lambda self: self._email)
- timestamp = property(lambda self: self._timestamp)
-
- # Methods
- def to_dict(self):
- return {k: getattr(self, k) for k in ['user', 'email', 'timestamp']}
-
- def alter(self, **kwargs):
- new_args = self.to_dict()
- assert set(new_args).issuperset(kwargs.keys())
- new_args.update(kwargs)
- return CommitUser(**new_args)
-
- @classmethod
- def from_raw(cls, data):
- # safe_string() ' <' safe_string() '> ' [TIMESTAMP]
- user, rest = data.split(' <', 1)
- email, rest = rest.split('> ', 1)
- return cls(user, email, CommitTimestamp.from_raw(rest))
-
-
-class CommitData(Alterable):
- """A workable data representation of a git commit object.
-
- Knows how to parse all the standard fields of a git commit object:
- * tree
- * parent(s)
- * author
- * committer
- * commit message
-
- Also knows how to parse 'footers' which are an informally-defined mechanism to
- append key-value pairs to the ends of commit messages.
-
- Footers are stored internally as a list of (key, value) pairs. This is in
- order to provide full round-trip compatibility for CommitData, since footers
- have no implied ordering, other than the ordering in the commit. Consider the
- footers:
-
- A: 1
- B: 2
- A: 3
-
- In order to represent this as something better than a list which maintains the
- round-trip invariant, we would need a (Frozen)OrderedMultiDict, which would be
- tricky to implement.
-
- Author and committer are treated as the format defined by CommitUser
- """
- FOOTER_RE = re.compile(r'([-a-zA-Z]+): (.*)')
- HASH_RE = re.compile(r'[0-9a-f]{40}')
-
- def __init__(self, tree, parents, author, committer, other_header_lines,
- message_lines, footer_lines):
- super(CommitData, self).__init__()
- assert all('\n' not in h and self.HASH_RE.match(h) for h in parents)
- assert '\n' not in tree and self.HASH_RE.match(tree)
- assert isinstance(author, CommitUser)
- assert isinstance(committer, CommitUser)
- assert all(isinstance(l, str) for l in message_lines)
- assert all(len(i) == 2 and all(isinstance(x, str) for x in i)
- for i in other_header_lines)
- assert all(len(i) == 2 and all(isinstance(x, str) for x in i)
- for i in footer_lines)
-
- self._parents = freeze(parents)
- self._tree = tree
- self._author = author
- self._committer = committer
- self._other_header_lines = freeze(other_header_lines)
- self._message_lines = freeze(message_lines)
- self._footer_lines = freeze(footer_lines)
-
- # Comparison & Representation
- def __eq__(self, other):
- return (self is other) or (
- isinstance(other, CommitData) and (
- self.hsh == other.hsh
- )
- )
-
- def __ne__(self, other):
- return not (self == other)
-
- def __repr__(self):
- return (
- 'CommitData({tree!r}, {parents!r}, {author!r}, {committer!r}, '
- '{other_header_lines!r}, {message_lines!r}, {footer_lines!r})'
- ).format(**self.to_dict())
-
- def __str__(self):
- """Produces a string representation of this CommitData suitable for
- consumption by `git hash-object`.
- """
- ret = StringIO()
- print >> ret, 'tree', self.tree
- for parent in self.parents:
- print >> ret, 'parent', parent
- print >> ret, 'author', self.author
- print >> ret, 'committer', self.committer
- for key, value in self.other_header_lines:
- print >> ret, key, value
- print >> ret
- print >> ret, '\n'.join(self.message_lines)
- if self.footer_lines:
- print >> ret
- for key, value in self.footer_lines:
- print >> ret, '%s: %s' % (key, value)
- return ret.getvalue()
-
- # Accessors
- # pylint: disable=W0212
- author = property(lambda self: self._author)
- committer = property(lambda self: self._committer)
- footer_lines = property(lambda self: self._footer_lines)
- message_lines = property(lambda self: self._message_lines)
- other_header_lines = property(lambda self: self._other_header_lines)
- parents = property(lambda self: self._parents)
- tree = property(lambda self: self._tree)
-
- @cached_property
- def footers(self):
- ret = collections.OrderedDict()
- for key, value in self.footer_lines:
- ret.setdefault(key, []).append(value)
- return freeze(ret)
-
- @cached_property
- def other_headers(self):
- ret = collections.OrderedDict()
- for key, value in self.other_header_lines:
- ret.setdefault(key, []).append(value)
- return freeze(ret)
-
- @cached_property
- def hsh(self):
- return hashlib.sha1(str(self)).hexdigest()
-
- # Methods
- def to_dict(self):
- return {
- k: getattr(self, k)
- for k in ['parents', 'tree', 'author', 'committer',
- 'other_header_lines', 'message_lines', 'footer_lines']
- }
-
- def alter(self, **kwargs):
- """In addition to the normal fields on this class, you may also provide
- 'footers' and 'other_headers' instead of 'footer_lines' and
- 'other_header_lines' respectively.
-
- These are an OrderedDict, which will be merged into the existing *_lines
- as described by merge_lines.
- """
- new_args = self.to_dict()
- if 'footers' in kwargs:
- assert 'footer_lines' not in kwargs
- new_args['footer_lines'] = self.merge_lines(
- self.footer_lines, kwargs.pop('footers'))
- if 'other_headers' in kwargs:
- assert 'other_header_lines' not in kwargs
- new_args['other_header_lines'] = self.merge_lines(
- self.other_header_lines, kwargs.pop('other_headers'))
- assert set(new_args).issuperset(kwargs.keys())
- new_args.update(kwargs)
- return CommitData(**new_args)
-
- @staticmethod
- def merge_lines(old_lines, new_dict):
- """Produces new footer or other_header_lines given the old lines and the
- new dictionary.
-
- Preserves the order of |old_lines| as much as possible.
-
- Rules:
- * If a key is in new_dict, but the key is not in old_lines, the new
- lines are added at the end.
- * If a key is not in new_dict, it is passed through.
- * If a key is equal to None in new_dict, lines with that key are removed.
- * If a key is present in both, all entries in new_dict for that key are
- inserted at the location of the first line in old_lines for that key
- (and any other lines in old_lines with that key are removed).
-
- Args:
- old_lines - a sequence of (key, value) pairs
- new_dict - an OrderedDict of {key: [values]} or {key: None}
- """
- old_dict = collections.OrderedDict()
- for key, value in old_lines:
- old_dict.setdefault(key, []).append(value)
-
- old_keys = set(old_dict)
-
- del_keys = {k for k, v in new_dict.iteritems() if not v}
- new_keys = ({k for k, v in new_dict.iteritems() if v} | old_keys) - del_keys
-
- # delete keys
- new_lines = [(k, v) for k, v in old_lines if k in new_keys]
-
- for change_key in (new_keys & old_keys):
- insert_idx = None
- to_nuke = set()
- for i, (k, v) in enumerate(new_lines):
- if k == change_key:
- if insert_idx is None:
- insert_idx = i
- to_nuke.add(i)
- assert to_nuke # because it's in old_keys
- new_lines = [(k, v) for i, (k, v) in enumerate(new_lines)
- if i not in to_nuke]
- new_lines[insert_idx:insert_idx] = [
- (change_key, v)
- for v in new_dict.get(change_key, old_dict[change_key])
- ]
-
- for add_key in new_dict: # Preserve sort order of new lines
- if add_key in old_keys or add_key in del_keys:
- continue
- new_lines.extend((add_key, v) for v in new_dict[add_key])
-
- return new_lines
-
- @classmethod
- def from_raw(cls, data):
- """Turns the raw output of `git cat-file commit` into a CommitData."""
- users = {}
- parents = []
- tree = None
- hsh_ref = []
- def hsh_fn():
- if not hsh_ref:
- hsh_ref.append(hashlib.sha1(data).hexdigest())
- return hsh_ref[0]
-
- if data[-1:] != '\n':
- raise PartialCommit(hsh_fn(), data)
-
- i = 0
- raw_lines = data.splitlines()
- other_header_lines = []
- for line in raw_lines:
- if not line:
- break
- header, data = line.split(' ', 1)
- if header == 'parent':
- parents.append(data)
- elif header in ('author', 'committer'):
- if header in users:
- raise UnexpectedHeader(hsh_fn(), header, data)
- users[header] = CommitUser.from_raw(data)
- elif header == 'tree':
- if tree:
- raise UnexpectedHeader(hsh_fn(), header, data)
- tree = data
- else:
- LOGGER.warn('Unexpected header in git commit %r: %r -> %r',
- hsh_fn(), header, data)
- other_header_lines.append((header, data))
- i += 1
-
- raw_message = raw_lines[i+1:]
-
- # footers are lines in the form:
- # ...message...
- # <empty line>
- # foo: data
- # bar: other data
- # ...
- #
- # If no empty line is found, they're considered not to exist.
- # If one line in the footers doesn't match the 'key: value' format, none
- # of the footers are considered to exist.
- message_lines = raw_message
- footer_lines = []
-
- i = 0
- for line in reversed(raw_message):
- if not line:
- message_lines = raw_message[:-(i+1)]
- break
-
- m = cls.FOOTER_RE.match(line)
- if m:
- footer_lines.append((m.group(1), m.group(2)))
- else:
- if i:
- footer_lines = []
- LOGGER.warn('Malformed footers')
- break
- i += 1
- else:
- LOGGER.warn('Footers comprise entire message')
- message_lines = []
-
- footer_lines.reverse()
-
- if not tree or set(('author', 'committer')).difference(users.keys()):
- raise PartialCommit(hsh_fn(), data)
-
- return cls(tree, parents, users['author'], users['committer'],
- other_header_lines, message_lines, footer_lines)

Powered by Google App Engine
This is Rietveld 408576698