blob: 40fa8ae773befad7705f412b2691d47438278269 [file] [log] [blame]
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import json
import logging
from cStringIO import StringIO
import posixpath
import sys
from zipfile import BadZipfile, ZipFile
import appengine_blobstore as blobstore
from appengine_url_fetcher import AppEngineUrlFetcher
from appengine_wrappers import urlfetch
from docs_server_utils import StringIdentity
from file_system import FileNotFoundError, FileSystem, StatInfo
from future import Future, Gettable
from object_store_creator import ObjectStoreCreator
import url_constants
_GITHUB_REPOS_NAMESPACE = 'GithubRepos'
def _LoadCredentials(object_store_creator):
'''Returns (username, password) from |password_store|.
'''
password_store = object_store_creator.Create(
GithubFileSystem,
app_version=None,
category='password',
start_empty=False)
password_data = password_store.GetMulti(('username', 'password')).Get()
return password_data.get('username'), password_data.get('password')
class GithubFileSystem(FileSystem):
'''Allows reading from a github.com repository.
'''
@staticmethod
def Create(owner, repo, object_store_creator):
'''Creates a GithubFileSystem that corresponds to a single github repository
specified by |owner| and |repo|.
'''
return GithubFileSystem(
url_constants.GITHUB_REPOS,
owner,
repo,
object_store_creator,
AppEngineUrlFetcher)
@staticmethod
def ForTest(repo, fake_fetcher, path=None, object_store_creator=None):
'''Creates a GithubFIleSystem that can be used for testing. It reads zip
files and commit data from server2/test_data/github_file_system/test_owner
instead of github.com. It reads from files specified by |repo|.
'''
return GithubFileSystem(
path if path is not None else 'test_data/github_file_system',
'test_owner',
repo,
object_store_creator or ObjectStoreCreator.ForTest(),
fake_fetcher)
def __init__(self, base_url, owner, repo, object_store_creator, Fetcher):
self._repo_key = '%s/%s' % (owner, repo)
self._repo_url = '%s/%s/%s' % (base_url, owner, repo)
self._blobstore = blobstore.AppEngineBlobstore()
# Lookup the chrome github api credentials.
self._username, self._password = _LoadCredentials(object_store_creator)
self._fetcher = Fetcher(self._repo_url)
self._stat_cache = object_store_creator.Create(
GithubFileSystem, category='stat-cache')
self._repo_zip = Future(value=None)
def _GetNamelist(self):
'''Returns a list of all file names in a repository zip file.
'''
zipfile = self._repo_zip.Get()
if zipfile is None:
return []
return zipfile.namelist()
def _GetVersion(self):
'''Returns the currently cached version of the repository. The version is a
'sha' hash value.
'''
return self._stat_cache.Get(self._repo_key).Get()
def _FetchLiveVersion(self):
'''Fetches the current repository version from github.com and returns it.
The version is a 'sha' hash value.
'''
# TODO(kalman): Do this asynchronously (use FetchAsync).
result = self._fetcher.Fetch(
'commits/HEAD', username=self._username, password=self._password)
try:
return json.loads(result.content)['commit']['tree']['sha']
except (KeyError, ValueError):
logging.warn('Error parsing JSON from repo %s' % self._repo_url)
def Refresh(self):
'''Compares the cached and live stat versions to see if the cached
repository is out of date. If it is, an async fetch is started and a
Future is returned. When this Future is evaluated, the fetch will be
completed and the results cached.
If no update is needed, None will be returned.
'''
version = self._FetchLiveVersion()
repo_zip_url = self._repo_url + '/zipball'
def persist_fetch(fetch):
'''Completes |fetch| and stores the results in blobstore.
'''
try:
blob = fetch.Get().content
except urlfetch.DownloadError:
logging.error(
'%s: Failed to download zip file from repository %s' % repo_zip_url)
else:
try:
zipfile = ZipFile(StringIO(blob))
except BadZipfile as error:
logging.error(
'%s: Bad zip file returned from url %s' % (error, repo_zip_url))
else:
self._blobstore.Set(repo_zip_url, blob, _GITHUB_REPOS_NAMESPACE)
self._repo_zip = Future(value=zipfile)
self._stat_cache.Set(self._repo_key, version)
# If the cached and live stat versions are different fetch the new repo.
if version != self._stat_cache.Get('stat').Get():
fetch = self._fetcher.FetchAsync(
'zipball', username=self._username, password=self._password)
return Future(delegate=Gettable(lambda: persist_fetch(fetch)))
return Future(value=None)
def Read(self, paths, binary=False):
'''Returns a directory mapping |paths| to the contents of the file at each
path. If path ends with a '/', it is treated as a directory and is mapped to
a list of filenames in that directory.
|binary| is ignored.
'''
names = self._GetNamelist()
if not names:
# No files in this repository.
def raise_file_not_found():
raise FileNotFoundError('No paths can be found, repository is empty')
return Future(delegate=Gettable(raise_file_not_found))
else:
prefix = names[0].split('/')[0]
reads = {}
for path in paths:
full_path = posixpath.join(prefix, path)
if path == '' or path.endswith('/'): # If path is a directory...
trimmed_paths = []
for f in filter(lambda s: s.startswith(full_path), names):
if not '/' in f[len(full_path):-1] and not f == full_path:
trimmed_paths.append(f[len(full_path):])
reads[path] = trimmed_paths
else:
try:
reads[path] = self._repo_zip.Get().read(full_path)
except KeyError as error:
return Future(exc_info=(FileNotFoundError,
FileNotFoundError(error),
sys.exc_info()[2]))
return Future(value=reads)
def Stat(self, path):
'''Stats |path| returning its version as as StatInfo object. If |path| ends
with a '/', it is assumed to be a directory and the StatInfo object returned
includes child_versions for all paths in the directory.
File paths do not include the name of the zip file, which is arbitrary and
useless to consumers.
Because the repository will only be downloaded once per server version, all
stat versions are always 0.
'''
# Trim off the zip file's name.
path = path.lstrip('/')
trimmed = [f.split('/', 1)[1] for f in self._GetNamelist()]
if path not in trimmed:
raise FileNotFoundError("No stat found for '%s' in %s" % (path, trimmed))
version = self._GetVersion()
child_paths = {}
if path == '' or path.endswith('/'):
# Deal with a directory
for f in filter(lambda s: s.startswith(path), trimmed):
filename = f[len(path):]
if not '/' in filename and not f == path:
child_paths[filename] = StatInfo(version)
return StatInfo(version, child_paths or None)
def GetIdentity(self):
return '%s' % StringIdentity(self.__class__.__name__ + self._repo_key)
def __repr__(self):
return '<%s: key=%s, url=%s>' % (type(self).__name__,
self._repo_key,
self._repo_url)