| Index: tools/isolate/run_test_from_archive.py | 
| diff --git a/tools/isolate/run_test_from_archive.py b/tools/isolate/run_test_from_archive.py | 
| index f0c039321ce342d5098fcfa4ab794476d06aefea..9290072dddb8f6b3545519b6340de37b7636f4e9 100755 | 
| --- a/tools/isolate/run_test_from_archive.py | 
| +++ b/tools/isolate/run_test_from_archive.py | 
| @@ -9,6 +9,7 @@ Keeps a local cache. | 
| """ | 
|  | 
| import ctypes | 
| +import hashlib | 
| import json | 
| import logging | 
| import optparse | 
| @@ -144,13 +145,6 @@ def is_same_filesystem(path1, path2): | 
| return os.stat(path1).st_dev == os.stat(path2).st_dev | 
|  | 
|  | 
| -def open_remote(file_or_url): | 
| -  """Reads a file or url.""" | 
| -  if re.match(r'^https?://.+$', file_or_url): | 
| -    return urllib.urlopen(file_or_url) | 
| -  return open(file_or_url, 'rb') | 
| - | 
| - | 
| def get_free_space(path): | 
| """Returns the number of free bytes.""" | 
| if sys.platform == 'win32': | 
| @@ -216,6 +210,13 @@ def load_manifest(content): | 
| raise ConfigError( | 
| 'Did not expect both \'sha-1\' and \'link\', got: %r' % subvalue) | 
|  | 
| +    elif key == 'includes': | 
| +      if not isinstance(value, list): | 
| +        raise ConfigError('Expected list, got %r' % value) | 
| +      for subvalue in value: | 
| +        if not RE_IS_SHA1.match(subvalue): | 
| +          raise ConfigError('Expected sha-1, got %r' % subvalue) | 
| + | 
| elif key == 'read_only': | 
| if not isinstance(value, bool): | 
| raise ConfigError('Expected bool, got %r' % value) | 
| @@ -413,12 +414,14 @@ class Cache(object): | 
| # oldest item. | 
| self.state = [] | 
|  | 
| +    # Items currently being fetched. Keep it local to reduce lock contention. | 
| +    self._pending_queue = set() | 
| + | 
| # Profiling values. | 
| # The files added and removed are stored as tuples of the filename and | 
| # the file size. | 
| self.files_added = [] | 
| self.files_removed = [] | 
| -    self.time_retrieving_files = 0 | 
|  | 
| if not os.path.isdir(self.cache_dir): | 
| os.makedirs(self.cache_dir) | 
| @@ -443,8 +446,6 @@ class Cache(object): | 
| len(self.files_added)) | 
| logging.info('Size of files added to cache: %i', | 
| sum(item[1] for item in self.files_added)) | 
| -    logging.info('Time taken (in seconds) to add files to cache: %s', | 
| -                 self.time_retrieving_files) | 
| logging.debug('All files added:') | 
| logging.debug(self.files_added) | 
|  | 
| @@ -510,30 +511,30 @@ class Cache(object): | 
|  | 
| self.save() | 
|  | 
| -  def retrieve(self, item): | 
| -    """Retrieves a file from the remote and add it to the cache.""" | 
| +  def retrieve(self, priority, item): | 
| +    """Retrieves a file from the remote, if not already cached, and adds it to | 
| +    the cache. | 
| +    """ | 
| assert not '/' in item | 
| +    path = self.path(item) | 
| try: | 
| index = self.state.index(item) | 
| # Was already in cache. Update it's LRU value. | 
| self.state.pop(index) | 
| self.state.append(item) | 
| -      return False | 
| +      os.utime(path, None) | 
| except ValueError: | 
| -      out = self.path(item) | 
| -      start_retrieve = time.time() | 
| -      self.remote.fetch_item(Remote.MED, item, out) | 
| -      # TODO(maruel): Temporarily fetch the files serially. | 
| -      self.remote.get_result() | 
| -      if os.path.exists(out): | 
| -        self.state.append(item) | 
| -        self.files_added.append((out, os.stat(out).st_size)) | 
| -      else: | 
| -        logging.error('File, %s, not placed in cache' % item) | 
| -      self.time_retrieving_files += time.time() - start_retrieve | 
| -      return True | 
| -    finally: | 
| -      self.save() | 
| +      if item in self._pending_queue: | 
| +        # Already pending. The same object could be referenced multiple times. | 
| +        return | 
| +      self.remote.fetch_item(priority, item, path) | 
| +      self._pending_queue.add(item) | 
| + | 
| +  def add(self, filepath, obj): | 
| +    """Forcibly adds a file to the cache.""" | 
| +    if not obj in self.state: | 
| +      link_file(self.path(obj), filepath, HARDLINK) | 
| +      self.state.append(obj) | 
|  | 
| def path(self, item): | 
| """Returns the path to one item.""" | 
| @@ -543,51 +544,240 @@ class Cache(object): | 
| """Saves the LRU ordering.""" | 
| json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':')) | 
|  | 
| +  def wait_for(self, items): | 
| +    """Starts a loop that waits for at least one of |items| to be retrieved. | 
| + | 
| +    Returns the first item retrieved. | 
| +    """ | 
| +    # Flush items already present. | 
| +    for item in items: | 
| +      if item in self.state: | 
| +        return item | 
| + | 
| +    assert all(i in self._pending_queue for i in items), ( | 
| +        items, self._pending_queue) | 
| +    # Note that: | 
| +    #   len(self._pending_queue) == | 
| +    #   ( len(self.remote._workers) - self.remote._ready + | 
| +    #     len(self._remote._queue) + len(self._remote.done)) | 
| +    # There is no lock-free way to verify that. | 
| +    while self._pending_queue: | 
| +      item = self.remote.get_result() | 
| +      self._pending_queue.remove(item) | 
| +      self.state.append(item) | 
| +      if item in items: | 
| +        return item | 
| + | 
| + | 
| +class Manifest(object): | 
| +  """Represents a single parsed manifest, e.g. a .results file.""" | 
| +  def __init__(self, obj_hash): | 
| +    """|obj_hash| is really the sha-1 of the file.""" | 
| +    logging.debug('Manifest(%s)' % obj_hash) | 
| +    self.obj_hash = obj_hash | 
| +    # Set once all the left-side of the tree is parsed. 'Tree' here means the | 
| +    # manifest and all the manifest recursively included by it with 'includes' | 
| +    # key. The order of each manifest sha-1 in 'includes' is important, as the | 
| +    # later ones are not processed until the firsts are retrieved and read. | 
| +    self.can_fetch = False | 
| + | 
| +    # Raw data. | 
| +    self.data = {} | 
| +    # A Manifest instance, one per object in self.includes. | 
| +    self.children = [] | 
| + | 
| +    # Set once the manifest is loaded. | 
| +    self._manifest_parsed = False | 
| +    # Set once the files are fetched. | 
| +    self.files_fetched = False | 
| + | 
| +  def load(self, content): | 
| +    """Verifies the manifest is valid and loads this object with the json data. | 
| +    """ | 
| +    logging.debug('Manifest.load(%s)' % self.obj_hash) | 
| +    assert not self._manifest_parsed | 
| +    self.data = load_manifest(content) | 
| +    self.children = [Manifest(i) for i in self.data.get('includes', [])] | 
| +    self._manifest_parsed = True | 
| + | 
| +  def fetch_files(self, cache, files): | 
| +    """Adds files in this manifest not present in files dictionary. | 
|  | 
| -def run_tha_test(manifest, cache_dir, remote, policies): | 
| +    Preemptively request files. | 
| + | 
| +    Note that |files| is modified by this function. | 
| +    """ | 
| +    assert self.can_fetch | 
| +    if not self._manifest_parsed or self.files_fetched: | 
| +      return | 
| +    logging.info('fetch_files(%s)' % self.obj_hash) | 
| +    for filepath, properties in self.data.get('files', {}).iteritems(): | 
| +      # Root manifest has priority on the files being mapped. In particular, | 
| +      # overriden files must not be fetched. | 
| +      if filepath not in files: | 
| +        files[filepath] = properties | 
| +        if 'sha-1' in properties: | 
| +          # Preemptively request files. | 
| +          logging.info('fetching %s' % filepath) | 
| +          cache.retrieve(Remote.MED, properties['sha-1']) | 
| +    self.files_fetched = True | 
| + | 
| + | 
| +class Settings(object): | 
| +  """Results of a completely parsed manifest.""" | 
| +  def __init__(self): | 
| +    self.command = [] | 
| +    self.files = {} | 
| +    self.read_only = None | 
| +    self.relative_cwd = None | 
| +    # The main manifest. | 
| +    self.root = None | 
| +    logging.debug('Settings') | 
| + | 
| +  def load(self, cache, root_manifest_hash): | 
| +    """Loads the manifest and all the included manifests asynchronously. | 
| + | 
| +    It enables support for included manifest. They are processed in strict order | 
| +    but fetched asynchronously from the cache. This is important so that a file | 
| +    in an included manifest that is overridden by an embedding manifest is not | 
| +    fetched neededlessly. The includes are fetched in one pass and the files are | 
| +    fetched as soon as all the manifests on the left-side of the tree were | 
| +    fetched. | 
| + | 
| +    The prioritization is very important here for nested manifests. 'includes' | 
| +    have the highest priority and the algorithm is optimized for both deep and | 
| +    wide manifests. A deep one is a long link of manifest referenced one at a | 
| +    time by one item in 'includes'. A wide one has a large number of 'includes' | 
| +    in a single manifest. 'left' is defined as an included manifest earlier in | 
| +    the 'includes' list. So the order of the elements in 'includes' is | 
| +    important. | 
| +    """ | 
| +    self.root = Manifest(root_manifest_hash) | 
| +    cache.retrieve(Remote.HIGH, root_manifest_hash) | 
| +    pending = {root_manifest_hash: self.root} | 
| +    # Keeps the list of retrieved items to refuse recursive includes. | 
| +    retrieved = [root_manifest_hash] | 
| + | 
| +    def update_self(node): | 
| +      node.fetch_files(cache, self.files) | 
| +      # Grabs properties. | 
| +      if not self.command and node.data.get('command'): | 
| +        self.command = node.data['command'] | 
| +      if self.read_only is None and node.data.get('read_only') is not None: | 
| +        self.read_only = node.data['read_only'] | 
| +      if (self.relative_cwd is None and | 
| +          node.data.get('relative_cwd') is not None): | 
| +        self.relative_cwd = node.data['relative_cwd'] | 
| + | 
| +    def traverse_tree(node): | 
| +      if node.can_fetch: | 
| +        if not node.files_fetched: | 
| +          update_self(node) | 
| +        will_break = False | 
| +        for i in node.children: | 
| +          if not i.can_fetch: | 
| +            if will_break: | 
| +              break | 
| +            # Automatically mark the first one as fetcheable. | 
| +            i.can_fetch = True | 
| +            will_break = True | 
| +          traverse_tree(i) | 
| + | 
| +    while pending: | 
| +      item_hash = cache.wait_for(pending) | 
| +      item = pending.pop(item_hash) | 
| +      item.load(open(cache.path(item_hash), 'r').read()) | 
| +      if item_hash == root_manifest_hash: | 
| +        # It's the root item. | 
| +        item.can_fetch = True | 
| + | 
| +      for new_child in item.children: | 
| +        h = new_child.obj_hash | 
| +        if h in retrieved: | 
| +          raise ConfigError('Manifest %s is retrieved recursively' % h) | 
| +        pending[h] = new_child | 
| +        cache.retrieve(Remote.HIGH, h) | 
| + | 
| +      # Traverse the whole tree to see if files can now be fetched. | 
| +      traverse_tree(self.root) | 
| +    def check(n): | 
| +      return all(check(x) for x in n.children) and n.files_fetched | 
| +    assert check(self.root) | 
| +    self.relative_cwd = self.relative_cwd or '' | 
| +    self.read_only = self.read_only or False | 
| + | 
| + | 
| +def run_tha_test(manifest_hash, cache_dir, remote, policies): | 
| """Downloads the dependencies in the cache, hardlinks them into a temporary | 
| directory and runs the executable. | 
| """ | 
| +  settings = Settings() | 
| with Cache(cache_dir, Remote(remote), policies) as cache: | 
| outdir = make_temp_dir('run_tha_test', cache_dir) | 
| - | 
| -    if not 'files' in manifest: | 
| -      print >> sys.stderr, 'No file to map' | 
| -      return 1 | 
| -    if not 'command' in manifest: | 
| -      print >> sys.stderr, 'No command to map run' | 
| -      return 1 | 
| - | 
| try: | 
| -      with Profiler('GetFiles') as _prof: | 
| -        for filepath, properties in manifest['files'].iteritems(): | 
| +      # Initiate all the files download. | 
| +      with Profiler('GetManifests') as _prof: | 
| +        # Optionally support local files. | 
| +        if not RE_IS_SHA1.match(manifest_hash): | 
| +          # Adds it in the cache. While not strictly necessary, this simplifies | 
| +          # the rest. | 
| +          h = hashlib.sha1(open(manifest_hash, 'r').read()).hexdigest() | 
| +          cache.add(manifest_hash, h) | 
| +          manifest_hash = h | 
| +        settings.load(cache, manifest_hash) | 
| + | 
| +      if not settings.command: | 
| +        print >> sys.stderr, 'No command to run' | 
| +        return 1 | 
| + | 
| +      with Profiler('GetRest') as _prof: | 
| +        logging.debug('Creating directories') | 
| +        # Creates the tree of directories to create. | 
| +        directories = set(os.path.dirname(f) for f in settings.files) | 
| +        for item in directories: | 
| +          directories.add(os.path.dirname(item)) | 
| +        for d in sorted(directories): | 
| +          if d: | 
| +            os.mkdir(os.path.join(outdir, d)) | 
| + | 
| +        # Creates the links if necessary. | 
| +        for filepath, properties in settings.files.iteritems(): | 
| +          if 'link' not in properties: | 
| +            continue | 
| outfile = os.path.join(outdir, filepath) | 
| -          outfiledir = os.path.dirname(outfile) | 
| -          if not os.path.isdir(outfiledir): | 
| -            os.makedirs(outfiledir) | 
| -          if 'sha-1' in properties: | 
| -            # A normal file. | 
| -            infile = properties['sha-1'] | 
| -            cache.retrieve(infile) | 
| -            link_file(outfile, cache.path(infile), HARDLINK) | 
| -          elif 'link' in properties: | 
| -            # A symlink. | 
| -            os.symlink(properties['link'], outfile) | 
| -          else: | 
| -            raise ConfigError('Unexpected entry: %s' % properties) | 
| +          os.symlink(properties['link'], outfile) | 
| +          if 'mode' in properties: | 
| +            # It's not set on Windows. | 
| +            os.chmod(outfile, properties['mode']) | 
| + | 
| +        # Remaining files to be processed. | 
| +        # Note that files could still be not be downloaded yet here. | 
| +        remaining = dict( | 
| +            (props['sha-1'], (filepath, props)) | 
| +            for filepath, props in settings.files.iteritems() | 
| +            if 'sha-1' in props) | 
| + | 
| +        # Do bookkeeping while files are being downloaded in the background. | 
| +        cwd = os.path.join(outdir, settings.relative_cwd) | 
| +        if not os.path.isdir(cwd): | 
| +          os.makedirs(cwd) | 
| +        cmd = settings.command[:] | 
| +        # Ensure paths are correctly separated on windows. | 
| +        cmd[0] = cmd[0].replace('/', os.path.sep) | 
| +        cmd = fix_python_path(cmd) | 
| + | 
| +        # Now block on the remaining files to be downloaded and mapped. | 
| +        while remaining: | 
| +          obj = cache.wait_for(remaining) | 
| +          filepath, properties = remaining.pop(obj) | 
| +          outfile = os.path.join(outdir, filepath) | 
| +          link_file(outfile, cache.path(obj), HARDLINK) | 
| if 'mode' in properties: | 
| # It's not set on Windows. | 
| os.chmod(outfile, properties['mode']) | 
|  | 
| -      cwd = os.path.join(outdir, manifest.get('relative_cwd', '')) | 
| -      if not os.path.isdir(cwd): | 
| -        os.makedirs(cwd) | 
| -      if manifest.get('read_only'): | 
| +      if settings.read_only: | 
| make_writable(outdir, True) | 
| -      cmd = manifest['command'] | 
| -      # Ensure paths are correctly separated on windows. | 
| -      cmd[0] = cmd[0].replace('/', os.path.sep) | 
| -      cmd = fix_python_path(cmd) | 
| logging.info('Running %s, cwd=%s' % (cmd, cwd)) | 
| try: | 
| with Profiler('RunTest') as _prof: | 
| @@ -659,21 +849,11 @@ def main(): | 
| if args: | 
| parser.error('Unsupported args %s' % ' '.join(args)) | 
|  | 
| -  if options.hash: | 
| -    # First calculate the reference to it. | 
| -    options.manifest = '%s/%s' % (options.remote.rstrip('/'), options.hash) | 
| -  try: | 
| -    manifest = load_manifest(open_remote(options.manifest).read()) | 
| -  except IOError as e: | 
| -    parser.error( | 
| -        'Failed to read manifest %s; remote:%s; hash:%s; %s' % | 
| -        (options.manifest, options.remote, options.hash, str(e))) | 
| - | 
| policies = CachePolicies( | 
| options.max_cache_size, options.min_free_space, options.max_items) | 
| try: | 
| return run_tha_test( | 
| -        manifest, | 
| +        options.manifest or options.hash, | 
| os.path.abspath(options.cache), | 
| options.remote, | 
| policies) | 
|  |