OLD | NEW |
(Empty) | |
| 1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 import collections |
| 5 import json |
| 6 import logging |
| 7 import os |
| 8 import re |
| 9 |
| 10 class PageSetArchiveInfo(object): |
| 11 def __init__(self, archive_data_file_path, page_set_file_path, data): |
| 12 self._archive_data_file_path = archive_data_file_path |
| 13 self._archive_data_file_dir = os.path.dirname(archive_data_file_path) |
| 14 # Back pointer to the page set file. |
| 15 self._page_set_file_path = page_set_file_path |
| 16 |
| 17 # Map from the relative path (as it appears in the metadata file) of the |
| 18 # .wpr file to a list of urls it supports. |
| 19 self._wpr_file_to_urls = collections.OrderedDict(data['archives']) |
| 20 |
| 21 # Map from the page url to a relative path (as it appears in the metadata |
| 22 # file) of the .wpr file. |
| 23 self._url_to_wpr_file = dict() |
| 24 # Find out the wpr file names for each page. |
| 25 for wpr_file in data['archives']: |
| 26 page_urls = data['archives'][wpr_file] |
| 27 for url in page_urls: |
| 28 self._url_to_wpr_file[url] = wpr_file |
| 29 |
| 30 @classmethod |
| 31 def FromFile(cls, file_path, page_set_file_path): |
| 32 with open(file_path, 'r') as f: |
| 33 data = json.load(f) |
| 34 return cls(file_path, page_set_file_path, data) |
| 35 |
| 36 def WprFileForPage(self, page): |
| 37 return self._url_to_wpr_file.get(page.url, None) |
| 38 |
| 39 def WprFilePathForPage(self, page): |
| 40 wpr_file = self.WprFileForPage(page) |
| 41 if wpr_file: |
| 42 return self._WprFileNameToPath(wpr_file) |
| 43 return None |
| 44 |
| 45 def AddNewRecording(self, pages): |
| 46 (target_wpr_file, target_wpr_file_path) = self._NextWprFileName() |
| 47 for page in pages: |
| 48 self._SetWprFileForPage(page, target_wpr_file) |
| 49 return target_wpr_file_path |
| 50 |
| 51 def DeleteAbandonedWprFiles(self): |
| 52 # Update the metadata so that the abandoned wpr files don't have empty url |
| 53 # arrays. |
| 54 abandoned_wpr_files = self.AbandonedWprFiles() |
| 55 for wpr_file in abandoned_wpr_files: |
| 56 del self._wpr_file_to_urls[wpr_file] |
| 57 # Don't fail if we're unable to delete some of the files. |
| 58 wpr_file_path = self._WprFileNameToPath(wpr_file) |
| 59 try: |
| 60 os.remove(wpr_file_path) |
| 61 except Exception: |
| 62 logging.warning('Failed to delete file: %s' % wpr_file_path) |
| 63 |
| 64 def WriteToFile(self): |
| 65 """Writes the metadata into the file passed as constructor parameter.""" |
| 66 metadata = dict() |
| 67 metadata['description'] = ( |
| 68 'Describes the Web Page Replay archives for a page set. Don\'t edit by ' |
| 69 'hand! Use record_wpr for updating.') |
| 70 # Pointer from the metadata to the page set .json file. |
| 71 metadata['page_set'] = os.path.relpath(self._page_set_file_path, |
| 72 self._archive_data_file_dir) |
| 73 metadata['archives'] = self._wpr_file_to_urls.copy() |
| 74 # Don't write data for abandones archives. |
| 75 abandoned_wpr_files = self.AbandonedWprFiles() |
| 76 for wpr_file in abandoned_wpr_files: |
| 77 del metadata['archives'][wpr_file] |
| 78 |
| 79 with open(self._archive_data_file_path, 'w') as f: |
| 80 json.dump(metadata, f, indent=4) |
| 81 f.flush() |
| 82 |
| 83 def AbandonedWprFiles(self): |
| 84 abandoned_wpr_files = [] |
| 85 for wpr_file, urls in self._wpr_file_to_urls.iteritems(): |
| 86 if not urls: |
| 87 abandoned_wpr_files.append(wpr_file) |
| 88 return abandoned_wpr_files |
| 89 |
| 90 def _WprFileNameToPath(self, wpr_file): |
| 91 return os.path.abspath(os.path.join(self._archive_data_file_dir, wpr_file)) |
| 92 |
| 93 def _NextWprFileName(self): |
| 94 """Creates a new file name for a wpr archive file.""" |
| 95 # The names are of the format "some_thing_number.wpr". Read the numbers. |
| 96 highest_number = -1 |
| 97 base = None |
| 98 for wpr_file in self._wpr_file_to_urls: |
| 99 match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file) |
| 100 if not match: |
| 101 raise Exception('Illegal wpr file name ' + wpr_file) |
| 102 highest_number = max(int(match.groupdict()['NUMBER']), highest_number) |
| 103 if base and match.groupdict()['BASE'] != base: |
| 104 raise Exception('Illegal wpr file name ' + wpr_file + |
| 105 ', doesn\'t begin with ' + base) |
| 106 base = match.groupdict()['BASE'] |
| 107 new_filename = '%s_%03d.wpr' % (base, highest_number + 1) |
| 108 return new_filename, self._WprFileNameToPath(new_filename) |
| 109 |
| 110 def _SetWprFileForPage(self, page, wpr_file): |
| 111 """For modifying the metadata when we're going to record a new archive.""" |
| 112 old_wpr_file = self.WprFileForPage(page) |
| 113 if old_wpr_file: |
| 114 self._wpr_file_to_urls[old_wpr_file].remove(page.url) |
| 115 self._url_to_wpr_file[page.url] = wpr_file |
| 116 if wpr_file not in self._wpr_file_to_urls: |
| 117 self._wpr_file_to_urls[wpr_file] = [] |
| 118 self._wpr_file_to_urls[wpr_file].append(page.url) |
OLD | NEW |