tools/telemetry/telemetry/page_set_archive_info.py - Issue 11881051: Telemetry: add a metadata layer between page set and .wpr.

Side by Side Diff: tools/telemetry/telemetry/page_set_archive_info.py

Issue 11881051: Telemetry: add a metadata layer between page set and .wpr. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: . Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« tools/telemetry/telemetry/page_set.py ('K') | « tools/telemetry/telemetry/page_set.py ('k') | tools/telemetry/telemetry/page_set_archive_info_unittest.py » ('j') | tools/telemetry/telemetry/record_wpr.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 # Copyright (c) 2013 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4 import collections

	5 import json

	6 import logging

	7 import os

	8 import re

	9

	10 class PageSetArchiveInfo(object):

	11 def __init__(self, archive_data_file_path, page_set_file_path, data):

	12 self._archive_data_file_path = archive_data_file_path

	13 self._archive_data_file_dir = os.path.dirname(archive_data_file_path)

	14 # Back pointer to the page set file.

	15 self._page_set_file_path = page_set_file_path

	16

	17 # Map from the relative path (as it appears in the metadata file) of the

	18 # .wpr file to a list of urls it supports.

	19 self._wpr_file_to_urls = collections.OrderedDict(data['archives'])

	20

	21 # Map from the page url to a relative path (as it appears in the metadata

	22 # file) of the .wpr file.

	23 self._url_to_wpr_file = dict()

	24 # Find out the wpr file names for each page.

	25 for wpr_file in data['archives']:

	26 page_urls = data['archives'][wpr_file]

	27 for url in page_urls:

	28 self._url_to_wpr_file[url] = wpr_file

	29

	30 @classmethod

	31 def FromFile(cls, file_path, page_set_file_path):

	32 with open(file_path, 'r') as f:

	33 data = json.load(f)

	34 return cls(file_path, page_set_file_path, data)

	35

	36 def WprFileForPage(self, page):

	37 return self._url_to_wpr_file.get(page.url, None)

	38

	39 def WprFilePathForPage(self, page):

	40 wpr_file = self.WprFileForPage(page)

	41 if wpr_file:

	42 return self._WprFileNameToPath(wpr_file)

	43 return None

	44

	45 def AddNewRecording(self, pages):

	46 (target_wpr_file, target_wpr_file_path) = self._NextWprFileName()

	47 for page in pages:

	48 self._SetWprFileForPage(page, target_wpr_file)

	49 return target_wpr_file_path

	50

	51 def DeleteAbandonedWprFiles(self):

	52 # Update the metadata so that the abandoned wpr files don't have empty url

	53 # arrays.

	54 abandoned_wpr_files = self.AbandonedWprFiles()

	55 for wpr_file in abandoned_wpr_files:

	56 del self._wpr_file_to_urls[wpr_file]

	57 # Don't fail if we're unable to delete some of the files.

	58 wpr_file_path = self._WprFileNameToPath(wpr_file)

	59 try:

	60 os.remove(wpr_file_path)

	61 except Exception:

	62 logging.warning('Failed to delete file: %s' % wpr_file_path)

	63

	64 def WriteToFile(self):

	65 """Writes the metadata into the file passed as constructor parameter."""

	66 metadata = dict()

	67 metadata['description'] = (

	68 'Describes the Web Page Replay archives for a page set. Don\'t edit by '

	69 'hand! Use record_wpr for updating.')

	70 # Pointer from the metadata to the page set .json file.

	71 metadata['page_set'] = os.path.relpath(self._page_set_file_path,

	72 self._archive_data_file_dir)

	73 metadata['archives'] = self._wpr_file_to_urls.copy()

	74 # Don't write data for abandones archives.

	75 abandoned_wpr_files = self.AbandonedWprFiles()

	76 for wpr_file in abandoned_wpr_files:

	77 del metadata['archives'][wpr_file]

	78

	79 with open(self._archive_data_file_path, 'w') as f:

	80 json.dump(metadata, f, indent=4)

	81 f.flush()

	82

	83 def AbandonedWprFiles(self):

	84 abandoned_wpr_files = []

	85 for wpr_file, urls in self._wpr_file_to_urls.iteritems():

	86 if not urls:

	87 abandoned_wpr_files.append(wpr_file)

	88 return abandoned_wpr_files

	89

	90 def _WprFileNameToPath(self, wpr_file):

	91 return os.path.abspath(os.path.join(self._archive_data_file_dir, wpr_file))

	92

	93 def _NextWprFileName(self):

	94 """Creates a new file name for a wpr archive file."""

	95 # The names are of the format "some_thing_number.wpr". Read the numbers.

	96 highest_number = -1

	97 base = None

	98 for wpr_file in self._wpr_file_to_urls:

	99 match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)

	100 if not match:

	101 raise Exception('Illegal wpr file name ' + wpr_file)

	102 highest_number = max(int(match.groupdict()['NUMBER']), highest_number)

	103 if base and match.groupdict()['BASE'] != base:

	104 raise Exception('Illegal wpr file name ' + wpr_file +

	105 ', doesn\'t begin with ' + base)

	106 base = match.groupdict()['BASE']

	107 new_filename = '%s_%03d.wpr' % (base, highest_number + 1)

	108 return new_filename, self._WprFileNameToPath(new_filename)

	109

	110 def _SetWprFileForPage(self, page, wpr_file):

	111 """For modifying the metadata when we're going to record a new archive."""

	112 old_wpr_file = self.WprFileForPage(page)

	113 if old_wpr_file:

	114 self._wpr_file_to_urls[old_wpr_file].remove(page.url)

	115 self._url_to_wpr_file[page.url] = wpr_file

	116 if wpr_file not in self._wpr_file_to_urls:

	117 self._wpr_file_to_urls[wpr_file] = []

	118 self._wpr_file_to_urls[wpr_file].append(page.url)

OLD	NEW