Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(406)

Side by Side Diff: tools/telemetry/telemetry/page_set_archive_info.py

Issue 11881051: Telemetry: add a metadata layer between page set and .wpr. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: . Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4 import collections
5 import json
6 import logging
7 import os
8 import re
9
10 class PageSetArchiveInfo(object):
11 def __init__(self, archive_data_file_path, page_set_file_path, data):
12 self._archive_data_file_path = archive_data_file_path
13 self._archive_data_file_dir = os.path.dirname(archive_data_file_path)
14 # Back pointer to the page set file.
15 self._page_set_file_path = page_set_file_path
16
17 # Map from the relative path (as it appears in the metadata file) of the
18 # .wpr file to a list of urls it supports.
19 self._wpr_file_to_urls = collections.OrderedDict(data['archives'])
20
21 # Map from the page url to a relative path (as it appears in the metadata
22 # file) of the .wpr file.
23 self._url_to_wpr_file = dict()
24 # Find out the wpr file names for each page.
25 for wpr_file in data['archives']:
26 page_urls = data['archives'][wpr_file]
27 for url in page_urls:
28 self._url_to_wpr_file[url] = wpr_file
29
30 @classmethod
31 def FromFile(cls, file_path, page_set_file_path):
32 with open(file_path, 'r') as f:
33 data = json.load(f)
34 return cls(file_path, page_set_file_path, data)
35
36 def WprFileForPage(self, page):
37 return self._url_to_wpr_file.get(page.url, None)
38
39 def WprFilePathForPage(self, page):
40 wpr_file = self.WprFileForPage(page)
41 if wpr_file:
42 return self._WprFileNameToPath(wpr_file)
43 return None
44
45 def AddNewRecording(self, pages):
46 (target_wpr_file, target_wpr_file_path) = self._NextWprFileName()
47 for page in pages:
48 self._SetWprFileForPage(page, target_wpr_file)
49 return target_wpr_file_path
50
51 def DeleteAbandonedWprFiles(self):
52 # Update the metadata so that the abandoned wpr files don't have empty url
53 # arrays.
54 abandoned_wpr_files = self.AbandonedWprFiles()
55 for wpr_file in abandoned_wpr_files:
56 del self._wpr_file_to_urls[wpr_file]
57 # Don't fail if we're unable to delete some of the files.
58 wpr_file_path = self._WprFileNameToPath(wpr_file)
59 try:
60 os.remove(wpr_file_path)
61 except Exception:
62 logging.warning('Failed to delete file: %s' % wpr_file_path)
63
64 def WriteToFile(self):
65 """Writes the metadata into the file passed as constructor parameter."""
66 metadata = dict()
67 metadata['description'] = (
68 'Describes the Web Page Replay archives for a page set. Don\'t edit by '
69 'hand! Use record_wpr for updating.')
70 # Pointer from the metadata to the page set .json file.
71 metadata['page_set'] = os.path.relpath(self._page_set_file_path,
72 self._archive_data_file_dir)
73 metadata['archives'] = self._wpr_file_to_urls.copy()
74 # Don't write data for abandones archives.
75 abandoned_wpr_files = self.AbandonedWprFiles()
76 for wpr_file in abandoned_wpr_files:
77 del metadata['archives'][wpr_file]
78
79 with open(self._archive_data_file_path, 'w') as f:
80 json.dump(metadata, f, indent=4)
81 f.flush()
82
83 def AbandonedWprFiles(self):
84 abandoned_wpr_files = []
85 for wpr_file, urls in self._wpr_file_to_urls.iteritems():
86 if not urls:
87 abandoned_wpr_files.append(wpr_file)
88 return abandoned_wpr_files
89
90 def _WprFileNameToPath(self, wpr_file):
91 return os.path.abspath(os.path.join(self._archive_data_file_dir, wpr_file))
92
93 def _NextWprFileName(self):
94 """Creates a new file name for a wpr archive file."""
95 # The names are of the format "some_thing_number.wpr". Read the numbers.
96 highest_number = -1
97 base = None
98 for wpr_file in self._wpr_file_to_urls:
99 match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)
100 if not match:
101 raise Exception('Illegal wpr file name ' + wpr_file)
102 highest_number = max(int(match.groupdict()['NUMBER']), highest_number)
103 if base and match.groupdict()['BASE'] != base:
104 raise Exception('Illegal wpr file name ' + wpr_file +
105 ', doesn\'t begin with ' + base)
106 base = match.groupdict()['BASE']
107 new_filename = '%s_%03d.wpr' % (base, highest_number + 1)
108 return new_filename, self._WprFileNameToPath(new_filename)
109
110 def _SetWprFileForPage(self, page, wpr_file):
111 """For modifying the metadata when we're going to record a new archive."""
112 old_wpr_file = self.WprFileForPage(page)
113 if old_wpr_file:
114 self._wpr_file_to_urls[old_wpr_file].remove(page.url)
115 self._url_to_wpr_file[page.url] = wpr_file
116 if wpr_file not in self._wpr_file_to_urls:
117 self._wpr_file_to_urls[wpr_file] = []
118 self._wpr_file_to_urls[wpr_file].append(page.url)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698