catapult/telemetry/telemetry/wpr/archive_info.py - platform/external/chromium-trace - Git at Google

 # Copyright 2013 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import json
 import logging
 import os
 import re
 import shutil
 import tempfile

 from catapult_base import cloud_storage  # pylint: disable=import-error


 def AssertValidCloudStorageBucket(bucket):
   is_valid = bucket in (None,
                         cloud_storage.PUBLIC_BUCKET,
                         cloud_storage.PARTNER_BUCKET,
                         cloud_storage.INTERNAL_BUCKET)
   if not is_valid:
     raise ValueError("Cloud storage privacy bucket %s is invalid" % bucket)


 class ArchiveError(Exception):
   pass


 class WprArchiveInfo(object):
   def __init__(self, file_path, data, bucket):
     AssertValidCloudStorageBucket(bucket)
     self._file_path = file_path
     self._base_dir = os.path.dirname(file_path)
     self._data = data
     self._bucket = bucket

     # Ensure directory exists.
     if not os.path.exists(self._base_dir):
       os.makedirs(self._base_dir)

     # Map from the relative path (as it appears in the metadata file) of the
     # .wpr file to a list of story names it supports.
     self._wpr_file_to_story_names = data['archives']

     # Map from the story name to a relative path (as it appears
     # in the metadata file) of the .wpr file.
     self._story_name_to_wpr_file = dict()
     # Find out the wpr file names for each story.
     for wpr_file in data['archives']:
       story_names = data['archives'][wpr_file]
       for story_name in story_names:
         self._story_name_to_wpr_file[story_name] = wpr_file
     self.temp_target_wpr_file_path = None

   @classmethod
   def FromFile(cls, file_path, bucket):
     if os.path.exists(file_path):
       with open(file_path, 'r') as f:
         data = json.load(f)
         return cls(file_path, data, bucket)
     return cls(file_path, {'archives': {}}, bucket)

   def DownloadArchivesIfNeeded(self):
     """Downloads archives iff the Archive has a bucket parameter and the user
     has permission to access the bucket.

     Raises cloud storage Permissions or Credentials error when there is no
     local copy of the archive and the user doesn't have permission to access
     the archive's bucket.

     Warns when a bucket is not specified or when the user doesn't have
     permission to access the archive's bucket but a local copy of the archive
     exists.
     """
     # Download all .wpr files.
     if not self._bucket:
       logging.warning('Story set in %s has no bucket specified, and '
                       'cannot be downloaded from cloud_storage.', )
       return
     assert 'archives' in self._data, 'Invalid data format in %s. \'archives\'' \
                                      ' field is needed' % self._file_path
     for archive_path in self._data['archives']:
       archive_path = self._WprFileNameToPath(archive_path)
       try:
         cloud_storage.GetIfChanged(archive_path, self._bucket)
       except (cloud_storage.CredentialsError, cloud_storage.PermissionError):
         if os.path.exists(archive_path):
           # If the archive exists, assume the user recorded their own and
           # simply warn.
           logging.warning('Need credentials to update WPR archive: %s',
                           archive_path)
         else:
           logging.error("You either aren't authenticated or don't have "
                         "permission to use the archives for this page set."
                         "\nYou may need to run gsutil config."
                         "\nYou can find instructions for gsutil config at: "
                         "http://www.chromium.org/developers/telemetry/"
                         "upload_to_cloud_storage")
           raise

   def WprFilePathForStory(self, story):
     if self.temp_target_wpr_file_path:
       return self.temp_target_wpr_file_path
     wpr_file = self._story_name_to_wpr_file.get(story.display_name, None)
     if wpr_file is None and hasattr(story, 'url'):
       # Some old pages always use the URL to identify a page rather than the
       # display_name, so try to look for that.
       wpr_file = self._story_name_to_wpr_file.get(story.url, None)
     if wpr_file:
       return self._WprFileNameToPath(wpr_file)
     return None

   def AddNewTemporaryRecording(self, temp_wpr_file_path=None):
     if temp_wpr_file_path is None:
       temp_wpr_file_handle, temp_wpr_file_path = tempfile.mkstemp()
       os.close(temp_wpr_file_handle)
     self.temp_target_wpr_file_path = temp_wpr_file_path

   def AddRecordedStories(self, stories, upload_to_cloud_storage=False):
     if not stories:
       os.remove(self.temp_target_wpr_file_path)
       return

     (target_wpr_file, target_wpr_file_path) = self._NextWprFileName()
     for story in stories:
       self._SetWprFileForStory(story.display_name, target_wpr_file)
     shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path)

     # Update the hash file.
     target_wpr_file_hash = cloud_storage.CalculateHash(target_wpr_file_path)
     with open(target_wpr_file_path + '.sha1', 'wb') as f:
       f.write(target_wpr_file_hash)
       f.flush()

     self._WriteToFile()
     self._DeleteAbandonedWprFiles()

     # Upload to cloud storage
     if upload_to_cloud_storage:
       if not self._bucket:
         logging.warning('StorySet must have bucket specified to upload '
                         'stories to cloud storage.')
         return
       try:
         cloud_storage.Insert(self._bucket, target_wpr_file_hash,
                              target_wpr_file_path)
       except cloud_storage.CloudStorageError, e:
         logging.warning('Failed to upload wpr file %s to cloud storage. '
                         'Error:%s' % target_wpr_file_path, e)

   def _DeleteAbandonedWprFiles(self):
     # Update the metadata so that the abandoned wpr files don't have
     # empty story name arrays.
     abandoned_wpr_files = self._AbandonedWprFiles()
     for wpr_file in abandoned_wpr_files:
       del self._wpr_file_to_story_names[wpr_file]
       # Don't fail if we're unable to delete some of the files.
       wpr_file_path = self._WprFileNameToPath(wpr_file)
       try:
         os.remove(wpr_file_path)
       except Exception:
         logging.warning('Failed to delete file: %s' % wpr_file_path)

   def _AbandonedWprFiles(self):
     abandoned_wpr_files = []
     for wpr_file, story_names in (
         self._wpr_file_to_story_names.iteritems()):
       if not story_names:
         abandoned_wpr_files.append(wpr_file)
     return abandoned_wpr_files

   def _WriteToFile(self):
     """Writes the metadata into the file passed as constructor parameter."""
     metadata = dict()
     metadata['description'] = (
         'Describes the Web Page Replay archives for a story set. '
         'Don\'t edit by hand! Use record_wpr for updating.')
     metadata['archives'] = self._wpr_file_to_story_names.copy()
     # Don't write data for abandoned archives.
     abandoned_wpr_files = self._AbandonedWprFiles()
     for wpr_file in abandoned_wpr_files:
       del metadata['archives'][wpr_file]

     with open(self._file_path, 'w') as f:
       json.dump(metadata, f, indent=4)
       f.flush()

   def _WprFileNameToPath(self, wpr_file):
     return os.path.abspath(os.path.join(self._base_dir, wpr_file))

   def _NextWprFileName(self):
     """Creates a new file name for a wpr archive file."""
     # The names are of the format "some_thing_number.wpr". Read the numbers.
     highest_number = -1
     base = None
     for wpr_file in self._wpr_file_to_story_names:
       match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)
       if not match:
         raise Exception('Illegal wpr file name ' + wpr_file)
       highest_number = max(int(match.groupdict()['NUMBER']), highest_number)
       if base and match.groupdict()['BASE'] != base:
         raise Exception('Illegal wpr file name ' + wpr_file +
                         ', doesn\'t begin with ' + base)
       base = match.groupdict()['BASE']
     if not base:
       # If we're creating a completely new info file, use the base name of the
       # story set file.
       base = os.path.splitext(os.path.basename(self._file_path))[0]
     new_filename = '%s_%03d.wpr' % (base, highest_number + 1)
     return new_filename, self._WprFileNameToPath(new_filename)

   def _SetWprFileForStory(self, story_name, wpr_file):
     """For modifying the metadata when we're going to record a new archive."""
     old_wpr_file = self._story_name_to_wpr_file.get(story_name, None)
     if old_wpr_file:
       self._wpr_file_to_story_names[old_wpr_file].remove(story_name)
     self._story_name_to_wpr_file[story_name] = wpr_file
     if wpr_file not in self._wpr_file_to_story_names:
       self._wpr_file_to_story_names[wpr_file] = []
     self._wpr_file_to_story_names[wpr_file].append(story_name)
	# Copyright 2013 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import json
	import logging
	import os
	import re
	import shutil
	import tempfile

	from catapult_base import cloud_storage # pylint: disable=import-error


	def AssertValidCloudStorageBucket(bucket):
	is_valid = bucket in (None,
	cloud_storage.PUBLIC_BUCKET,
	cloud_storage.PARTNER_BUCKET,
	cloud_storage.INTERNAL_BUCKET)
	if not is_valid:
	raise ValueError("Cloud storage privacy bucket %s is invalid" % bucket)


	class ArchiveError(Exception):
	pass


	class WprArchiveInfo(object):
	def __init__(self, file_path, data, bucket):
	AssertValidCloudStorageBucket(bucket)
	self._file_path = file_path
	self._base_dir = os.path.dirname(file_path)
	self._data = data
	self._bucket = bucket

	# Ensure directory exists.
	if not os.path.exists(self._base_dir):
	os.makedirs(self._base_dir)

	# Map from the relative path (as it appears in the metadata file) of the
	# .wpr file to a list of story names it supports.
	self._wpr_file_to_story_names = data['archives']

	# Map from the story name to a relative path (as it appears
	# in the metadata file) of the .wpr file.
	self._story_name_to_wpr_file = dict()
	# Find out the wpr file names for each story.
	for wpr_file in data['archives']:
	story_names = data['archives'][wpr_file]
	for story_name in story_names:
	self._story_name_to_wpr_file[story_name] = wpr_file
	self.temp_target_wpr_file_path = None

	@classmethod
	def FromFile(cls, file_path, bucket):
	if os.path.exists(file_path):
	with open(file_path, 'r') as f:
	data = json.load(f)
	return cls(file_path, data, bucket)
	return cls(file_path, {'archives': {}}, bucket)

	def DownloadArchivesIfNeeded(self):
	"""Downloads archives iff the Archive has a bucket parameter and the user
	has permission to access the bucket.

	Raises cloud storage Permissions or Credentials error when there is no
	local copy of the archive and the user doesn't have permission to access
	the archive's bucket.

	Warns when a bucket is not specified or when the user doesn't have
	permission to access the archive's bucket but a local copy of the archive
	exists.
	"""
	# Download all .wpr files.
	if not self._bucket:
	logging.warning('Story set in %s has no bucket specified, and '
	'cannot be downloaded from cloud_storage.', )
	return
	assert 'archives' in self._data, 'Invalid data format in %s. \'archives\'' \
	' field is needed' % self._file_path
	for archive_path in self._data['archives']:
	archive_path = self._WprFileNameToPath(archive_path)
	try:
	cloud_storage.GetIfChanged(archive_path, self._bucket)
	except (cloud_storage.CredentialsError, cloud_storage.PermissionError):
	if os.path.exists(archive_path):
	# If the archive exists, assume the user recorded their own and
	# simply warn.
	logging.warning('Need credentials to update WPR archive: %s',
	archive_path)
	else:
	logging.error("You either aren't authenticated or don't have "
	"permission to use the archives for this page set."
	"\nYou may need to run gsutil config."
	"\nYou can find instructions for gsutil config at: "
	"http://www.chromium.org/developers/telemetry/"
	"upload_to_cloud_storage")
	raise

	def WprFilePathForStory(self, story):
	if self.temp_target_wpr_file_path:
	return self.temp_target_wpr_file_path
	wpr_file = self._story_name_to_wpr_file.get(story.display_name, None)
	if wpr_file is None and hasattr(story, 'url'):
	# Some old pages always use the URL to identify a page rather than the
	# display_name, so try to look for that.
	wpr_file = self._story_name_to_wpr_file.get(story.url, None)
	if wpr_file:
	return self._WprFileNameToPath(wpr_file)
	return None

	def AddNewTemporaryRecording(self, temp_wpr_file_path=None):
	if temp_wpr_file_path is None:
	temp_wpr_file_handle, temp_wpr_file_path = tempfile.mkstemp()
	os.close(temp_wpr_file_handle)
	self.temp_target_wpr_file_path = temp_wpr_file_path

	def AddRecordedStories(self, stories, upload_to_cloud_storage=False):
	if not stories:
	os.remove(self.temp_target_wpr_file_path)
	return

	(target_wpr_file, target_wpr_file_path) = self._NextWprFileName()
	for story in stories:
	self._SetWprFileForStory(story.display_name, target_wpr_file)
	shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path)

	# Update the hash file.
	target_wpr_file_hash = cloud_storage.CalculateHash(target_wpr_file_path)
	with open(target_wpr_file_path + '.sha1', 'wb') as f:
	f.write(target_wpr_file_hash)
	f.flush()

	self._WriteToFile()
	self._DeleteAbandonedWprFiles()

	# Upload to cloud storage
	if upload_to_cloud_storage:
	if not self._bucket:
	logging.warning('StorySet must have bucket specified to upload '
	'stories to cloud storage.')
	return
	try:
	cloud_storage.Insert(self._bucket, target_wpr_file_hash,
	target_wpr_file_path)
	except cloud_storage.CloudStorageError, e:
	logging.warning('Failed to upload wpr file %s to cloud storage. '
	'Error:%s' % target_wpr_file_path, e)

	def _DeleteAbandonedWprFiles(self):
	# Update the metadata so that the abandoned wpr files don't have
	# empty story name arrays.
	abandoned_wpr_files = self._AbandonedWprFiles()
	for wpr_file in abandoned_wpr_files:
	del self._wpr_file_to_story_names[wpr_file]
	# Don't fail if we're unable to delete some of the files.
	wpr_file_path = self._WprFileNameToPath(wpr_file)
	try:
	os.remove(wpr_file_path)
	except Exception:
	logging.warning('Failed to delete file: %s' % wpr_file_path)

	def _AbandonedWprFiles(self):
	abandoned_wpr_files = []
	for wpr_file, story_names in (
	self._wpr_file_to_story_names.iteritems()):
	if not story_names:
	abandoned_wpr_files.append(wpr_file)
	return abandoned_wpr_files

	def _WriteToFile(self):
	"""Writes the metadata into the file passed as constructor parameter."""
	metadata = dict()
	metadata['description'] = (
	'Describes the Web Page Replay archives for a story set. '
	'Don\'t edit by hand! Use record_wpr for updating.')
	metadata['archives'] = self._wpr_file_to_story_names.copy()
	# Don't write data for abandoned archives.
	abandoned_wpr_files = self._AbandonedWprFiles()
	for wpr_file in abandoned_wpr_files:
	del metadata['archives'][wpr_file]

	with open(self._file_path, 'w') as f:
	json.dump(metadata, f, indent=4)
	f.flush()

	def _WprFileNameToPath(self, wpr_file):
	return os.path.abspath(os.path.join(self._base_dir, wpr_file))

	def _NextWprFileName(self):
	"""Creates a new file name for a wpr archive file."""
	# The names are of the format "some_thing_number.wpr". Read the numbers.
	highest_number = -1
	base = None
	for wpr_file in self._wpr_file_to_story_names:
	match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)
	if not match:
	raise Exception('Illegal wpr file name ' + wpr_file)
	highest_number = max(int(match.groupdict()['NUMBER']), highest_number)
	if base and match.groupdict()['BASE'] != base:
	raise Exception('Illegal wpr file name ' + wpr_file +
	', doesn\'t begin with ' + base)
	base = match.groupdict()['BASE']
	if not base:
	# If we're creating a completely new info file, use the base name of the
	# story set file.
	base = os.path.splitext(os.path.basename(self._file_path))[0]
	new_filename = '%s_%03d.wpr' % (base, highest_number + 1)
	return new_filename, self._WprFileNameToPath(new_filename)

	def _SetWprFileForStory(self, story_name, wpr_file):
	"""For modifying the metadata when we're going to record a new archive."""
	old_wpr_file = self._story_name_to_wpr_file.get(story_name, None)
	if old_wpr_file:
	self._wpr_file_to_story_names[old_wpr_file].remove(story_name)
	self._story_name_to_wpr_file[story_name] = wpr_file
	if wpr_file not in self._wpr_file_to_story_names:
	self._wpr_file_to_story_names[wpr_file] = []
	self._wpr_file_to_story_names[wpr_file].append(story_name)