chrome/common/extensions/docs/server2/subversion_file_system.py - platform/external/chromium_org - Git at Google

 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import posixpath
 import traceback
 import xml.dom.minidom as xml
 from xml.parsers.expat import ExpatError

 from appengine_url_fetcher import AppEngineUrlFetcher
 from appengine_wrappers import IsDownloadError
 from docs_server_utils import StringIdentity
 from file_system import (
     FileNotFoundError, FileSystem, FileSystemError, StatInfo)
 from future import Future
 import url_constants


 def _ParseHTML(html):
   '''Unfortunately, the viewvc page has a stray </div> tag, so this takes care
   of all mismatched tags.
   '''
   try:
     return xml.parseString(html)
   except ExpatError as e:
     return _ParseHTML('\n'.join(
         line for (i, line) in enumerate(html.split('\n'))
         if e.lineno != i + 1))

 def _InnerText(node):
   '''Like node.innerText in JS DOM, but strips surrounding whitespace.
   '''
   text = []
   if node.nodeValue:
     text.append(node.nodeValue)
   if hasattr(node, 'childNodes'):
     for child_node in node.childNodes:
       text.append(_InnerText(child_node))
   return ''.join(text).strip()

 def _CreateStatInfo(html):
   parent_version = None
   child_versions = {}

   # Try all of the tables until we find the ones that contain the data (the
   # directory and file versions are in different tables).
   for table in _ParseHTML(html).getElementsByTagName('table'):
     # Within the table there is a list of files. However, there may be some
     # things beforehand; a header, "parent directory" list, etc. We will deal
     # with that below by being generous and just ignoring such rows.
     rows = table.getElementsByTagName('tr')

     for row in rows:
       cells = row.getElementsByTagName('td')

       # The version of the directory will eventually appear in the soup of
       # table rows, like this:
       #
       # <tr>
       #   <td>Directory revision:</td>
       #   <td><a href=... title="Revision 214692">214692</a> (of...)</td>
       # </tr>
       #
       # So look out for that.
       if len(cells) == 2 and _InnerText(cells[0]) == 'Directory revision:':
         links = cells[1].getElementsByTagName('a')
         if len(links) != 2:
           raise FileSystemError('ViewVC assumption invalid: directory ' +
                                 'revision content did not have 2 <a> ' +
                                 ' elements, instead %s' % _InnerText(cells[1]))
         this_parent_version = _InnerText(links[0])
         int(this_parent_version)  # sanity check
         if parent_version is not None:
           raise FileSystemError('There was already a parent version %s, and ' +
                                 ' we just found a second at %s' %
                                 (parent_version, this_parent_version))
         parent_version = this_parent_version

       # The version of each file is a list of rows with 5 cells: name, version,
       # age, author, and last log entry. Maybe the columns will change; we're
       # at the mercy viewvc, but this constant can be easily updated.
       if len(cells) != 5:
         continue
       name_element, version_element, _, __, ___ = cells

       name = _InnerText(name_element)  # note: will end in / for directories
       try:
         version = int(_InnerText(version_element))
       except StandardError:
         continue
       child_versions[name] = str(version)

     if parent_version and child_versions:
       break

   return StatInfo(parent_version, child_versions)


 class SubversionFileSystem(FileSystem):
   '''Class to fetch resources from src.chromium.org.
   '''
   @staticmethod
   def Create(branch='trunk', revision=None):
     if branch == 'trunk':
       svn_path = 'trunk/src'
     else:
       svn_path = 'branches/%s/src' % branch
     return SubversionFileSystem(
         AppEngineUrlFetcher('%s/%s' % (url_constants.SVN_URL, svn_path)),
         AppEngineUrlFetcher('%s/%s' % (url_constants.VIEWVC_URL, svn_path)),
         svn_path,
         revision=revision)

   def __init__(self, file_fetcher, stat_fetcher, svn_path, revision=None):
     self._file_fetcher = file_fetcher
     self._stat_fetcher = stat_fetcher
     self._svn_path = svn_path
     self._revision = revision

   def Read(self, paths, skip_not_found=False):
     args = None
     if self._revision is not None:
       # |fetcher| gets from svn.chromium.org which uses p= for version.
       args = 'p=%s' % self._revision

     def apply_args(path):
       return path if args is None else '%s?%s' % (path, args)

     def list_dir(directory):
       dom = xml.parseString(directory)
       files = [elem.childNodes[0].data
                for elem in dom.getElementsByTagName('a')]
       if '..' in files:
         files.remove('..')
       return files

     # A list of tuples of the form (path, Future).
     fetches = [(path, self._file_fetcher.FetchAsync(apply_args(path)))
                for path in paths]

     def resolve():
       value = {}
       for path, future in fetches:
         try:
           result = future.Get()
         except Exception as e:
           if skip_not_found and IsDownloadError(e): continue
           exc_type = (FileNotFoundError if IsDownloadError(e)
                                        else FileSystemError)
           raise exc_type('%s fetching %s for Get: %s' %
                          (type(e).__name__, path, traceback.format_exc()))
         if result.status_code == 404:
           if skip_not_found: continue
           raise FileNotFoundError(
               'Got 404 when fetching %s for Get, content %s' %
               (path, result.content))
         if result.status_code != 200:
           raise FileSystemError('Got %s when fetching %s for Get, content %s' %
               (result.status_code, path, result.content))
         if path.endswith('/'):
           value[path] = list_dir(result.content)
         else:
           value[path] = result.content
       return value
     return Future(callback=resolve)

   def Refresh(self):
     return Future(value=())

   def Stat(self, path):
     return self.StatAsync(path).Get()

   def StatAsync(self, path):
     directory, filename = posixpath.split(path)
     if self._revision is not None:
       # |stat_fetch| uses viewvc which uses pathrev= for version.
       directory += '?pathrev=%s' % self._revision

     result_future = self._stat_fetcher.FetchAsync(directory)
     def resolve():
       try:
         result = result_future.Get()
       except Exception as e:
         exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError
         raise exc_type('%s fetching %s for Stat: %s' %
                        (type(e).__name__, path, traceback.format_exc()))

       if result.status_code == 404:
         raise FileNotFoundError('Got 404 when fetching %s for Stat, '
                                 'content %s' % (path, result.content))
       if result.status_code != 200:
         raise FileNotFoundError('Got %s when fetching %s for Stat, content %s' %
                                 (result.status_code, path, result.content))

       stat_info = _CreateStatInfo(result.content)
       if stat_info.version is None:
         raise FileSystemError('Failed to find version of dir %s' % directory)
       if path == '' or path.endswith('/'):
         return stat_info
       if filename not in stat_info.child_versions:
         raise FileNotFoundError(
             '%s from %s was not in child versions for Stat' % (filename, path))
       return StatInfo(stat_info.child_versions[filename])

     return Future(callback=resolve)

   def GetIdentity(self):
     # NOTE: no revision here, since it would mess up the caching of reads. It
     # probably doesn't matter since all the caching classes will use the result
     # of Stat to decide whether to re-read - and Stat has a ceiling of the
     # revision - so when the revision changes, so might Stat. That is enough.
     return '@'.join((self.__class__.__name__, StringIdentity(self._svn_path)))
	# Copyright (c) 2012 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import posixpath
	import traceback
	import xml.dom.minidom as xml
	from xml.parsers.expat import ExpatError

	from appengine_url_fetcher import AppEngineUrlFetcher
	from appengine_wrappers import IsDownloadError
	from docs_server_utils import StringIdentity
	from file_system import (
	FileNotFoundError, FileSystem, FileSystemError, StatInfo)
	from future import Future
	import url_constants


	def _ParseHTML(html):
	'''Unfortunately, the viewvc page has a stray </div> tag, so this takes care
	of all mismatched tags.
	'''
	try:
	return xml.parseString(html)
	except ExpatError as e:
	return _ParseHTML('\n'.join(
	line for (i, line) in enumerate(html.split('\n'))
	if e.lineno != i + 1))

	def _InnerText(node):
	'''Like node.innerText in JS DOM, but strips surrounding whitespace.
	'''
	text = []
	if node.nodeValue:
	text.append(node.nodeValue)
	if hasattr(node, 'childNodes'):
	for child_node in node.childNodes:
	text.append(_InnerText(child_node))
	return ''.join(text).strip()

	def _CreateStatInfo(html):
	parent_version = None
	child_versions = {}

	# Try all of the tables until we find the ones that contain the data (the
	# directory and file versions are in different tables).
	for table in _ParseHTML(html).getElementsByTagName('table'):
	# Within the table there is a list of files. However, there may be some
	# things beforehand; a header, "parent directory" list, etc. We will deal
	# with that below by being generous and just ignoring such rows.
	rows = table.getElementsByTagName('tr')

	for row in rows:
	cells = row.getElementsByTagName('td')

	# The version of the directory will eventually appear in the soup of
	# table rows, like this:
	#
	# <tr>
	# <td>Directory revision:</td>
	# <td><a href=... title="Revision 214692">214692</a> (of...)</td>
	# </tr>
	#
	# So look out for that.
	if len(cells) == 2 and _InnerText(cells[0]) == 'Directory revision:':
	links = cells[1].getElementsByTagName('a')
	if len(links) != 2:
	raise FileSystemError('ViewVC assumption invalid: directory ' +
	'revision content did not have 2 <a> ' +
	' elements, instead %s' % _InnerText(cells[1]))
	this_parent_version = _InnerText(links[0])
	int(this_parent_version) # sanity check
	if parent_version is not None:
	raise FileSystemError('There was already a parent version %s, and ' +
	' we just found a second at %s' %
	(parent_version, this_parent_version))
	parent_version = this_parent_version

	# The version of each file is a list of rows with 5 cells: name, version,
	# age, author, and last log entry. Maybe the columns will change; we're
	# at the mercy viewvc, but this constant can be easily updated.
	if len(cells) != 5:
	continue
	name_element, version_element, _, __, ___ = cells

	name = _InnerText(name_element) # note: will end in / for directories
	try:
	version = int(_InnerText(version_element))
	except StandardError:
	continue
	child_versions[name] = str(version)

	if parent_version and child_versions:
	break

	return StatInfo(parent_version, child_versions)


	class SubversionFileSystem(FileSystem):
	'''Class to fetch resources from src.chromium.org.
	'''
	@staticmethod
	def Create(branch='trunk', revision=None):
	if branch == 'trunk':
	svn_path = 'trunk/src'
	else:
	svn_path = 'branches/%s/src' % branch
	return SubversionFileSystem(
	AppEngineUrlFetcher('%s/%s' % (url_constants.SVN_URL, svn_path)),
	AppEngineUrlFetcher('%s/%s' % (url_constants.VIEWVC_URL, svn_path)),
	svn_path,
	revision=revision)

	def __init__(self, file_fetcher, stat_fetcher, svn_path, revision=None):
	self._file_fetcher = file_fetcher
	self._stat_fetcher = stat_fetcher
	self._svn_path = svn_path
	self._revision = revision

	def Read(self, paths, skip_not_found=False):
	args = None
	if self._revision is not None:
	# \|fetcher\| gets from svn.chromium.org which uses p= for version.
	args = 'p=%s' % self._revision

	def apply_args(path):
	return path if args is None else '%s?%s' % (path, args)

	def list_dir(directory):
	dom = xml.parseString(directory)
	files = [elem.childNodes[0].data
	for elem in dom.getElementsByTagName('a')]
	if '..' in files:
	files.remove('..')
	return files

	# A list of tuples of the form (path, Future).
	fetches = [(path, self._file_fetcher.FetchAsync(apply_args(path)))
	for path in paths]

	def resolve():
	value = {}
	for path, future in fetches:
	try:
	result = future.Get()
	except Exception as e:
	if skip_not_found and IsDownloadError(e): continue
	exc_type = (FileNotFoundError if IsDownloadError(e)
	else FileSystemError)
	raise exc_type('%s fetching %s for Get: %s' %
	(type(e).__name__, path, traceback.format_exc()))
	if result.status_code == 404:
	if skip_not_found: continue
	raise FileNotFoundError(
	'Got 404 when fetching %s for Get, content %s' %
	(path, result.content))
	if result.status_code != 200:
	raise FileSystemError('Got %s when fetching %s for Get, content %s' %
	(result.status_code, path, result.content))
	if path.endswith('/'):
	value[path] = list_dir(result.content)
	else:
	value[path] = result.content
	return value
	return Future(callback=resolve)

	def Refresh(self):
	return Future(value=())

	def Stat(self, path):
	return self.StatAsync(path).Get()

	def StatAsync(self, path):
	directory, filename = posixpath.split(path)
	if self._revision is not None:
	# \|stat_fetch\| uses viewvc which uses pathrev= for version.
	directory += '?pathrev=%s' % self._revision

	result_future = self._stat_fetcher.FetchAsync(directory)
	def resolve():
	try:
	result = result_future.Get()
	except Exception as e:
	exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError
	raise exc_type('%s fetching %s for Stat: %s' %
	(type(e).__name__, path, traceback.format_exc()))

	if result.status_code == 404:
	raise FileNotFoundError('Got 404 when fetching %s for Stat, '
	'content %s' % (path, result.content))
	if result.status_code != 200:
	raise FileNotFoundError('Got %s when fetching %s for Stat, content %s' %
	(result.status_code, path, result.content))

	stat_info = _CreateStatInfo(result.content)
	if stat_info.version is None:
	raise FileSystemError('Failed to find version of dir %s' % directory)
	if path == '' or path.endswith('/'):
	return stat_info
	if filename not in stat_info.child_versions:
	raise FileNotFoundError(
	'%s from %s was not in child versions for Stat' % (filename, path))
	return StatInfo(stat_info.child_versions[filename])

	return Future(callback=resolve)

	def GetIdentity(self):
	# NOTE: no revision here, since it would mess up the caching of reads. It
	# probably doesn't matter since all the caching classes will use the result
	# of Stat to decide whether to re-read - and Stat has a ceiling of the
	# revision - so when the revision changes, so might Stat. That is enough.
	return '@'.join((self.__class__.__name__, StringIdentity(self._svn_path)))