| Index: tools/telemetry/telemetry/page_set_archive_info.py
|
| diff --git a/tools/telemetry/telemetry/page_set_archive_info.py b/tools/telemetry/telemetry/page_set_archive_info.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..4eda1d69f3406f2cd2f237d7fdbea06bb9e4a1c0
|
| --- /dev/null
|
| +++ b/tools/telemetry/telemetry/page_set_archive_info.py
|
| @@ -0,0 +1,123 @@
|
| +# Copyright (c) 2013 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +import json
|
| +import logging
|
| +import os
|
| +import re
|
| +import shutil
|
| +
|
| +class PageSetArchiveInfo(object):
|
| + def __init__(self, archive_data_file_path, page_set_file_path, data):
|
| + self._archive_data_file_path = archive_data_file_path
|
| + self._archive_data_file_dir = os.path.dirname(archive_data_file_path)
|
| + # Back pointer to the page set file.
|
| + self._page_set_file_path = page_set_file_path
|
| +
|
| + # Map from the relative path (as it appears in the metadata file) of the
|
| + # .wpr file to a list of urls it supports.
|
| + self._wpr_file_to_urls = data['archives']
|
| +
|
| + # Map from the page url to a relative path (as it appears in the metadata
|
| + # file) of the .wpr file.
|
| + self._url_to_wpr_file = dict()
|
| + # Find out the wpr file names for each page.
|
| + for wpr_file in data['archives']:
|
| + page_urls = data['archives'][wpr_file]
|
| + for url in page_urls:
|
| + self._url_to_wpr_file[url] = wpr_file
|
| + self.temp_target_wpr_file_path = None
|
| +
|
| + @classmethod
|
| + def FromFile(cls, file_path, page_set_file_path):
|
| + with open(file_path, 'r') as f:
|
| + data = json.load(f)
|
| + return cls(file_path, page_set_file_path, data)
|
| +
|
| + def WprFilePathForPage(self, page):
|
| + if self.temp_target_wpr_file_path:
|
| + return self.temp_target_wpr_file_path
|
| + wpr_file = self._url_to_wpr_file.get(page.url, None)
|
| + if wpr_file:
|
| + return self._WprFileNameToPath(wpr_file)
|
| + return None
|
| +
|
| + def AddNewTemporaryRecording(self, temp_target_wpr_file_path):
|
| + self.temp_target_wpr_file_path = temp_target_wpr_file_path
|
| +
|
| + def AddRecordedPages(self, pages):
|
| + (target_wpr_file, target_wpr_file_path) = self._NextWprFileName()
|
| + for page in pages:
|
| + self._SetWprFileForPage(page, target_wpr_file)
|
| + shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path)
|
| + self._WriteToFile()
|
| + self._DeleteAbandonedWprFiles()
|
| +
|
| + def _DeleteAbandonedWprFiles(self):
|
| + # Update the metadata so that the abandoned wpr files don't have empty url
|
| + # arrays.
|
| + abandoned_wpr_files = self._AbandonedWprFiles()
|
| + for wpr_file in abandoned_wpr_files:
|
| + del self._wpr_file_to_urls[wpr_file]
|
| + # Don't fail if we're unable to delete some of the files.
|
| + wpr_file_path = self._WprFileNameToPath(wpr_file)
|
| + try:
|
| + os.remove(wpr_file_path)
|
| + except Exception:
|
| + logging.warning('Failed to delete file: %s' % wpr_file_path)
|
| +
|
| + def _AbandonedWprFiles(self):
|
| + abandoned_wpr_files = []
|
| + for wpr_file, urls in self._wpr_file_to_urls.iteritems():
|
| + if not urls:
|
| + abandoned_wpr_files.append(wpr_file)
|
| + return abandoned_wpr_files
|
| +
|
| + def _WriteToFile(self):
|
| + """Writes the metadata into the file passed as constructor parameter."""
|
| + metadata = dict()
|
| + metadata['description'] = (
|
| + 'Describes the Web Page Replay archives for a page set. Don\'t edit by '
|
| + 'hand! Use record_wpr for updating.')
|
| + # Pointer from the metadata to the page set .json file.
|
| + metadata['page_set'] = os.path.relpath(self._page_set_file_path,
|
| + self._archive_data_file_dir)
|
| + metadata['archives'] = self._wpr_file_to_urls.copy()
|
| + # Don't write data for abandones archives.
|
| + abandoned_wpr_files = self._AbandonedWprFiles()
|
| + for wpr_file in abandoned_wpr_files:
|
| + del metadata['archives'][wpr_file]
|
| +
|
| + with open(self._archive_data_file_path, 'w') as f:
|
| + json.dump(metadata, f, indent=4)
|
| + f.flush()
|
| +
|
| + def _WprFileNameToPath(self, wpr_file):
|
| + return os.path.abspath(os.path.join(self._archive_data_file_dir, wpr_file))
|
| +
|
| + def _NextWprFileName(self):
|
| + """Creates a new file name for a wpr archive file."""
|
| + # The names are of the format "some_thing_number.wpr". Read the numbers.
|
| + highest_number = -1
|
| + base = None
|
| + for wpr_file in self._wpr_file_to_urls:
|
| + match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)
|
| + if not match:
|
| + raise Exception('Illegal wpr file name ' + wpr_file)
|
| + highest_number = max(int(match.groupdict()['NUMBER']), highest_number)
|
| + if base and match.groupdict()['BASE'] != base:
|
| + raise Exception('Illegal wpr file name ' + wpr_file +
|
| + ', doesn\'t begin with ' + base)
|
| + base = match.groupdict()['BASE']
|
| + new_filename = '%s_%03d.wpr' % (base, highest_number + 1)
|
| + return new_filename, self._WprFileNameToPath(new_filename)
|
| +
|
| + def _SetWprFileForPage(self, page, wpr_file):
|
| + """For modifying the metadata when we're going to record a new archive."""
|
| + old_wpr_file = self._url_to_wpr_file.get(page.url, None)
|
| + if old_wpr_file:
|
| + self._wpr_file_to_urls[old_wpr_file].remove(page.url)
|
| + self._url_to_wpr_file[page.url] = wpr_file
|
| + if wpr_file not in self._wpr_file_to_urls:
|
| + self._wpr_file_to_urls[wpr_file] = []
|
| + self._wpr_file_to_urls[wpr_file].append(page.url)
|
|
|