| Index: owners.py
|
| diff --git a/owners.py b/owners.py
|
| index 1b1775f08c48ecbbb720c7d622c30d5932e7f84e..a863a7853ca78ffa9740289cd1d90e558071e813 100644
|
| --- a/owners.py
|
| +++ b/owners.py
|
| @@ -49,6 +49,7 @@ Examples for all of these combinations can be found in tests/owners_unittest.py.
|
| """
|
|
|
| import collections
|
| +import random
|
| import re
|
|
|
|
|
| @@ -252,60 +253,60 @@ class Database(object):
|
| ('%s is not a "set" directive, "*", '
|
| 'or an email address: "%s"' % (line_type, directive)))
|
|
|
| -
|
| def _covering_set_of_owners_for(self, files):
|
| - # Get the set of directories from the files.
|
| - dirs = set()
|
| - for f in files:
|
| - dirs.add(self._enclosing_dir_with_owners(f))
|
| -
|
| -
|
| - owned_dirs = {}
|
| - dir_owners = {}
|
| -
|
| + dirs_remaining = set(self._enclosing_dir_with_owners(f) for f in files)
|
| + all_possible_owners = self._all_possible_owners(dirs_remaining)
|
| + suggested_owners = set()
|
| + while dirs_remaining:
|
| + owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining)
|
| + suggested_owners.add(owner)
|
| + for dirname, _ in all_possible_owners[owner]:
|
| + dirs_remaining.remove(dirname)
|
| + return suggested_owners
|
| +
|
| + def _all_possible_owners(self, dirs):
|
| + """Returns a list of (potential owner, distance-from-dir) tuples; a
|
| + distance of 1 is the lowest/closest possible distance (which makes the
|
| + subsequent math easier)."""
|
| + all_possible_owners = {}
|
| for current_dir in dirs:
|
| - # Get the list of owners for each directory.
|
| - current_owners = set()
|
| dirname = current_dir
|
| - while dirname in self.owners_for:
|
| - current_owners |= self.owners_for[dirname]
|
| + distance = 1
|
| + while True:
|
| + for owner in self.owners_for.get(dirname, []):
|
| + all_possible_owners.setdefault(owner, [])
|
| + # It's possible the same owner might match a directory from
|
| + # multiple files, and we only want the closest entry.
|
| + if not any(current_dir == el[0] for el in all_possible_owners[owner]):
|
| + all_possible_owners[owner].append((current_dir, distance))
|
| if self._stop_looking(dirname):
|
| break
|
| - prev_parent = dirname
|
| dirname = self.os_path.dirname(dirname)
|
| - if prev_parent == dirname:
|
| - break
|
| -
|
| - # Map each directory to a list of its owners.
|
| - dir_owners[current_dir] = current_owners
|
| -
|
| - # Add the directory to the list of each owner.
|
| - for owner in current_owners:
|
| - owned_dirs.setdefault(owner, set()).add(current_dir)
|
| -
|
| - final_owners = set()
|
| - while dirs:
|
| - # Find the owner that has the most directories.
|
| - max_count = 0
|
| - max_owner = None
|
| - owner_count = {}
|
| - for dirname in dirs:
|
| - for owner in dir_owners[dirname]:
|
| - count = owner_count.get(owner, 0) + 1
|
| - owner_count[owner] = count
|
| - if count >= max_count:
|
| - max_owner = owner
|
| - max_count = count
|
| -
|
| - # If no more directories have OWNERS, we're done.
|
| - if not max_owner:
|
| - break
|
| -
|
| - final_owners.add(max_owner)
|
| -
|
| - # Remove all directories owned by the current owner from the remaining
|
| - # list.
|
| - for dirname in owned_dirs[max_owner]:
|
| - dirs.discard(dirname)
|
| -
|
| - return final_owners
|
| + distance += 1
|
| + return all_possible_owners
|
| +
|
| + @staticmethod
|
| + def lowest_cost_owner(all_possible_owners, dirs):
|
| + # We want to minimize both the number of reviewers and the distance
|
| + # from the files/dirs needing reviews. The "pow(X, 1.75)" below is
|
| + # an arbitrarily-selected scaling factor that seems to work well - it
|
| + # will select one reviewer in the parent directory over three reviewers
|
| + # in subdirs, but not one reviewer over just two.
|
| + total_costs_by_owner = {}
|
| + for owner in all_possible_owners:
|
| + total_distance = 0
|
| + num_directories_owned = 0
|
| + for dirname, distance in all_possible_owners[owner]:
|
| + if dirname in dirs:
|
| + total_distance += distance
|
| + num_directories_owned += 1
|
| + if num_directories_owned:
|
| + total_costs_by_owner[owner] = (total_distance /
|
| + pow(num_directories_owned, 1.75))
|
| +
|
| + # Return the lowest cost owner. In the case of a tie, pick one randomly.
|
| + lowest_cost = min(total_costs_by_owner.itervalues())
|
| + lowest_cost_owners = filter(
|
| + lambda owner: total_costs_by_owner[owner] == lowest_cost,
|
| + total_costs_by_owner)
|
| + return random.Random().choice(lowest_cost_owners)
|
|
|