tools/telemetry/telemetry/page/page_set_archive_info.py - platform/external/chromium_org - Git at Google

 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 import json
 import logging
 import os
 import re
 import shutil

 from telemetry.page import cloud_storage


 def _UpdateHashFile(file_path):
   with open(file_path + '.sha1', 'wb') as f:
     f.write(cloud_storage.GetHash(file_path))
     f.flush()


 class PageSetArchiveInfo(object):
   def __init__(self, archive_data_file_path, page_set_file_path, data):
     self._archive_data_file_path = archive_data_file_path
     self._archive_data_file_dir = os.path.dirname(archive_data_file_path)
     # Back pointer to the page set file.
     self._page_set_file_path = page_set_file_path

     for archive_path in data['archives']:
       cloud_storage.GetIfChanged(cloud_storage.DEFAULT_BUCKET, archive_path)

     # Map from the relative path (as it appears in the metadata file) of the
     # .wpr file to a list of urls it supports.
     self._wpr_file_to_urls = data['archives']

     # Map from the page url to a relative path (as it appears in the metadata
     # file) of the .wpr file.
     self._url_to_wpr_file = dict()
     # Find out the wpr file names for each page.
     for wpr_file in data['archives']:
       page_urls = data['archives'][wpr_file]
       for url in page_urls:
         self._url_to_wpr_file[url] = wpr_file
     self.temp_target_wpr_file_path = None

   @classmethod
   def FromFile(cls, file_path, page_set_file_path):
     cloud_storage.GetIfChanged(cloud_storage.DEFAULT_BUCKET, file_path)

     if os.path.exists(file_path):
       with open(file_path, 'r') as f:
         data = json.load(f)
         return cls(file_path, page_set_file_path, data)
     return cls(file_path, page_set_file_path, {'archives': {}})

   def WprFilePathForPage(self, page):
     if self.temp_target_wpr_file_path:
       return self.temp_target_wpr_file_path
     wpr_file = self._url_to_wpr_file.get(page.url, None)
     if wpr_file:
       return self._WprFileNameToPath(wpr_file)
     return None

   def AddNewTemporaryRecording(self, temp_target_wpr_file_path):
     self.temp_target_wpr_file_path = temp_target_wpr_file_path

   def AddRecordedPages(self, urls):
     (target_wpr_file, target_wpr_file_path) = self._NextWprFileName()
     for url in urls:
       self._SetWprFileForPage(url, target_wpr_file)
     shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path)
     _UpdateHashFile(target_wpr_file_path)
     self._WriteToFile()
     self._DeleteAbandonedWprFiles()

   def _DeleteAbandonedWprFiles(self):
     # Update the metadata so that the abandoned wpr files don't have empty url
     # arrays.
     abandoned_wpr_files = self._AbandonedWprFiles()
     for wpr_file in abandoned_wpr_files:
       del self._wpr_file_to_urls[wpr_file]
       # Don't fail if we're unable to delete some of the files.
       wpr_file_path = self._WprFileNameToPath(wpr_file)
       try:
         os.remove(wpr_file_path)
       except Exception:
         logging.warning('Failed to delete file: %s' % wpr_file_path)

   def _AbandonedWprFiles(self):
     abandoned_wpr_files = []
     for wpr_file, urls in self._wpr_file_to_urls.iteritems():
       if not urls:
         abandoned_wpr_files.append(wpr_file)
     return abandoned_wpr_files

   def _WriteToFile(self):
     """Writes the metadata into the file passed as constructor parameter."""
     metadata = dict()
     metadata['description'] = (
         'Describes the Web Page Replay archives for a page set. Don\'t edit by '
         'hand! Use record_wpr for updating.')
     # Pointer from the metadata to the page set .json file.
     metadata['page_set'] = os.path.relpath(self._page_set_file_path,
                                            self._archive_data_file_dir)
     metadata['archives'] = self._wpr_file_to_urls.copy()
     # Don't write data for abandoned archives.
     abandoned_wpr_files = self._AbandonedWprFiles()
     for wpr_file in abandoned_wpr_files:
       del metadata['archives'][wpr_file]

     with open(self._archive_data_file_path, 'w') as f:
       json.dump(metadata, f, indent=4)
       f.flush()
     _UpdateHashFile(self._archive_data_file_path)

   def _WprFileNameToPath(self, wpr_file):
     return os.path.abspath(os.path.join(self._archive_data_file_dir, wpr_file))

   def _NextWprFileName(self):
     """Creates a new file name for a wpr archive file."""
     # The names are of the format "some_thing_number.wpr". Read the numbers.
     highest_number = -1
     base = None
     for wpr_file in self._wpr_file_to_urls:
       match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)
       if not match:
         raise Exception('Illegal wpr file name ' + wpr_file)
       highest_number = max(int(match.groupdict()['NUMBER']), highest_number)
       if base and match.groupdict()['BASE'] != base:
         raise Exception('Illegal wpr file name ' + wpr_file +
                         ', doesn\'t begin with ' + base)
       base = match.groupdict()['BASE']
     if not base:
       # If we're creating a completely new info file, use the base name of the
       # page set file.
       base = os.path.splitext(os.path.basename(self._page_set_file_path))[0]
     new_filename = '%s_%03d.wpr' % (base, highest_number + 1)
     return new_filename, self._WprFileNameToPath(new_filename)

   def _SetWprFileForPage(self, url, wpr_file):
     """For modifying the metadata when we're going to record a new archive."""
     old_wpr_file = self._url_to_wpr_file.get(url, None)
     if old_wpr_file:
       self._wpr_file_to_urls[old_wpr_file].remove(url)
     self._url_to_wpr_file[url] = wpr_file
     if wpr_file not in self._wpr_file_to_urls:
       self._wpr_file_to_urls[wpr_file] = []
     self._wpr_file_to_urls[wpr_file].append(url)
	# Copyright (c) 2013 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	import json
	import logging
	import os
	import re
	import shutil

	from telemetry.page import cloud_storage


	def _UpdateHashFile(file_path):
	with open(file_path + '.sha1', 'wb') as f:
	f.write(cloud_storage.GetHash(file_path))
	f.flush()


	class PageSetArchiveInfo(object):
	def __init__(self, archive_data_file_path, page_set_file_path, data):
	self._archive_data_file_path = archive_data_file_path
	self._archive_data_file_dir = os.path.dirname(archive_data_file_path)
	# Back pointer to the page set file.
	self._page_set_file_path = page_set_file_path

	for archive_path in data['archives']:
	cloud_storage.GetIfChanged(cloud_storage.DEFAULT_BUCKET, archive_path)

	# Map from the relative path (as it appears in the metadata file) of the
	# .wpr file to a list of urls it supports.
	self._wpr_file_to_urls = data['archives']

	# Map from the page url to a relative path (as it appears in the metadata
	# file) of the .wpr file.
	self._url_to_wpr_file = dict()
	# Find out the wpr file names for each page.
	for wpr_file in data['archives']:
	page_urls = data['archives'][wpr_file]
	for url in page_urls:
	self._url_to_wpr_file[url] = wpr_file
	self.temp_target_wpr_file_path = None

	@classmethod
	def FromFile(cls, file_path, page_set_file_path):
	cloud_storage.GetIfChanged(cloud_storage.DEFAULT_BUCKET, file_path)

	if os.path.exists(file_path):
	with open(file_path, 'r') as f:
	data = json.load(f)
	return cls(file_path, page_set_file_path, data)
	return cls(file_path, page_set_file_path, {'archives': {}})

	def WprFilePathForPage(self, page):
	if self.temp_target_wpr_file_path:
	return self.temp_target_wpr_file_path
	wpr_file = self._url_to_wpr_file.get(page.url, None)
	if wpr_file:
	return self._WprFileNameToPath(wpr_file)
	return None

	def AddNewTemporaryRecording(self, temp_target_wpr_file_path):
	self.temp_target_wpr_file_path = temp_target_wpr_file_path

	def AddRecordedPages(self, urls):
	(target_wpr_file, target_wpr_file_path) = self._NextWprFileName()
	for url in urls:
	self._SetWprFileForPage(url, target_wpr_file)
	shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path)
	_UpdateHashFile(target_wpr_file_path)
	self._WriteToFile()
	self._DeleteAbandonedWprFiles()

	def _DeleteAbandonedWprFiles(self):
	# Update the metadata so that the abandoned wpr files don't have empty url
	# arrays.
	abandoned_wpr_files = self._AbandonedWprFiles()
	for wpr_file in abandoned_wpr_files:
	del self._wpr_file_to_urls[wpr_file]
	# Don't fail if we're unable to delete some of the files.
	wpr_file_path = self._WprFileNameToPath(wpr_file)
	try:
	os.remove(wpr_file_path)
	except Exception:
	logging.warning('Failed to delete file: %s' % wpr_file_path)

	def _AbandonedWprFiles(self):
	abandoned_wpr_files = []
	for wpr_file, urls in self._wpr_file_to_urls.iteritems():
	if not urls:
	abandoned_wpr_files.append(wpr_file)
	return abandoned_wpr_files

	def _WriteToFile(self):
	"""Writes the metadata into the file passed as constructor parameter."""
	metadata = dict()
	metadata['description'] = (
	'Describes the Web Page Replay archives for a page set. Don\'t edit by '
	'hand! Use record_wpr for updating.')
	# Pointer from the metadata to the page set .json file.
	metadata['page_set'] = os.path.relpath(self._page_set_file_path,
	self._archive_data_file_dir)
	metadata['archives'] = self._wpr_file_to_urls.copy()
	# Don't write data for abandoned archives.
	abandoned_wpr_files = self._AbandonedWprFiles()
	for wpr_file in abandoned_wpr_files:
	del metadata['archives'][wpr_file]

	with open(self._archive_data_file_path, 'w') as f:
	json.dump(metadata, f, indent=4)
	f.flush()
	_UpdateHashFile(self._archive_data_file_path)

	def _WprFileNameToPath(self, wpr_file):
	return os.path.abspath(os.path.join(self._archive_data_file_dir, wpr_file))

	def _NextWprFileName(self):
	"""Creates a new file name for a wpr archive file."""
	# The names are of the format "some_thing_number.wpr". Read the numbers.
	highest_number = -1
	base = None
	for wpr_file in self._wpr_file_to_urls:
	match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)
	if not match:
	raise Exception('Illegal wpr file name ' + wpr_file)
	highest_number = max(int(match.groupdict()['NUMBER']), highest_number)
	if base and match.groupdict()['BASE'] != base:
	raise Exception('Illegal wpr file name ' + wpr_file +
	', doesn\'t begin with ' + base)
	base = match.groupdict()['BASE']
	if not base:
	# If we're creating a completely new info file, use the base name of the
	# page set file.
	base = os.path.splitext(os.path.basename(self._page_set_file_path))[0]
	new_filename = '%s_%03d.wpr' % (base, highest_number + 1)
	return new_filename, self._WprFileNameToPath(new_filename)

	def _SetWprFileForPage(self, url, wpr_file):
	"""For modifying the metadata when we're going to record a new archive."""
	old_wpr_file = self._url_to_wpr_file.get(url, None)
	if old_wpr_file:
	self._wpr_file_to_urls[old_wpr_file].remove(url)
	self._url_to_wpr_file[url] = wpr_file
	if wpr_file not in self._wpr_file_to_urls:
	self._wpr_file_to_urls[wpr_file] = []
	self._wpr_file_to_urls[wpr_file].append(url)