OLD | NEW |
| (Empty) |
1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 import json | |
5 import logging | |
6 import os | |
7 import re | |
8 import shutil | |
9 | |
10 class PageSetArchiveInfo(object): | |
11 def __init__(self, archive_data_file_path, page_set_file_path, data): | |
12 self._archive_data_file_path = archive_data_file_path | |
13 self._archive_data_file_dir = os.path.dirname(archive_data_file_path) | |
14 # Back pointer to the page set file. | |
15 self._page_set_file_path = page_set_file_path | |
16 | |
17 # Map from the relative path (as it appears in the metadata file) of the | |
18 # .wpr file to a list of urls it supports. | |
19 self._wpr_file_to_urls = data['archives'] | |
20 | |
21 # Map from the page url to a relative path (as it appears in the metadata | |
22 # file) of the .wpr file. | |
23 self._url_to_wpr_file = dict() | |
24 # Find out the wpr file names for each page. | |
25 for wpr_file in data['archives']: | |
26 page_urls = data['archives'][wpr_file] | |
27 for url in page_urls: | |
28 self._url_to_wpr_file[url] = wpr_file | |
29 self.temp_target_wpr_file_path = None | |
30 | |
31 @classmethod | |
32 def FromFile(cls, file_path, page_set_file_path): | |
33 if os.path.exists(file_path): | |
34 with open(file_path, 'r') as f: | |
35 data = json.load(f) | |
36 return cls(file_path, page_set_file_path, data) | |
37 return cls(file_path, page_set_file_path, {'archives': {}}) | |
38 | |
39 def WprFilePathForPage(self, page): | |
40 if self.temp_target_wpr_file_path: | |
41 return self.temp_target_wpr_file_path | |
42 wpr_file = self._url_to_wpr_file.get(page.url, None) | |
43 if wpr_file: | |
44 return self._WprFileNameToPath(wpr_file) | |
45 return None | |
46 | |
47 def AddNewTemporaryRecording(self, temp_target_wpr_file_path): | |
48 self.temp_target_wpr_file_path = temp_target_wpr_file_path | |
49 | |
50 def AddRecordedPages(self, pages): | |
51 (target_wpr_file, target_wpr_file_path) = self._NextWprFileName() | |
52 for page in pages: | |
53 self._SetWprFileForPage(page, target_wpr_file) | |
54 shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path) | |
55 self._WriteToFile() | |
56 self._DeleteAbandonedWprFiles() | |
57 | |
58 def _DeleteAbandonedWprFiles(self): | |
59 # Update the metadata so that the abandoned wpr files don't have empty url | |
60 # arrays. | |
61 abandoned_wpr_files = self._AbandonedWprFiles() | |
62 for wpr_file in abandoned_wpr_files: | |
63 del self._wpr_file_to_urls[wpr_file] | |
64 # Don't fail if we're unable to delete some of the files. | |
65 wpr_file_path = self._WprFileNameToPath(wpr_file) | |
66 try: | |
67 os.remove(wpr_file_path) | |
68 except Exception: | |
69 logging.warning('Failed to delete file: %s' % wpr_file_path) | |
70 | |
71 def _AbandonedWprFiles(self): | |
72 abandoned_wpr_files = [] | |
73 for wpr_file, urls in self._wpr_file_to_urls.iteritems(): | |
74 if not urls: | |
75 abandoned_wpr_files.append(wpr_file) | |
76 return abandoned_wpr_files | |
77 | |
78 def _WriteToFile(self): | |
79 """Writes the metadata into the file passed as constructor parameter.""" | |
80 metadata = dict() | |
81 metadata['description'] = ( | |
82 'Describes the Web Page Replay archives for a page set. Don\'t edit by ' | |
83 'hand! Use record_wpr for updating.') | |
84 # Pointer from the metadata to the page set .json file. | |
85 metadata['page_set'] = os.path.relpath(self._page_set_file_path, | |
86 self._archive_data_file_dir) | |
87 metadata['archives'] = self._wpr_file_to_urls.copy() | |
88 # Don't write data for abandones archives. | |
89 abandoned_wpr_files = self._AbandonedWprFiles() | |
90 for wpr_file in abandoned_wpr_files: | |
91 del metadata['archives'][wpr_file] | |
92 | |
93 with open(self._archive_data_file_path, 'w') as f: | |
94 json.dump(metadata, f, indent=4) | |
95 f.flush() | |
96 | |
97 def _WprFileNameToPath(self, wpr_file): | |
98 return os.path.abspath(os.path.join(self._archive_data_file_dir, wpr_file)) | |
99 | |
100 def _NextWprFileName(self): | |
101 """Creates a new file name for a wpr archive file.""" | |
102 # The names are of the format "some_thing_number.wpr". Read the numbers. | |
103 highest_number = -1 | |
104 base = None | |
105 for wpr_file in self._wpr_file_to_urls: | |
106 match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file) | |
107 if not match: | |
108 raise Exception('Illegal wpr file name ' + wpr_file) | |
109 highest_number = max(int(match.groupdict()['NUMBER']), highest_number) | |
110 if base and match.groupdict()['BASE'] != base: | |
111 raise Exception('Illegal wpr file name ' + wpr_file + | |
112 ', doesn\'t begin with ' + base) | |
113 base = match.groupdict()['BASE'] | |
114 if not base: | |
115 # If we're creating a completely new info file, use the base name of the | |
116 # page set file. | |
117 base = os.path.splitext(os.path.basename(self._page_set_file_path))[0] | |
118 new_filename = '%s_%03d.wpr' % (base, highest_number + 1) | |
119 return new_filename, self._WprFileNameToPath(new_filename) | |
120 | |
121 def _SetWprFileForPage(self, page, wpr_file): | |
122 """For modifying the metadata when we're going to record a new archive.""" | |
123 old_wpr_file = self._url_to_wpr_file.get(page.url, None) | |
124 if old_wpr_file: | |
125 self._wpr_file_to_urls[old_wpr_file].remove(page.url) | |
126 self._url_to_wpr_file[page.url] = wpr_file | |
127 if wpr_file not in self._wpr_file_to_urls: | |
128 self._wpr_file_to_urls[wpr_file] = [] | |
129 self._wpr_file_to_urls[wpr_file].append(page.url) | |
OLD | NEW |