| # Copyright 2014 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| |
| from base64 import b64decode |
| from itertools import izip |
| import json |
| import logging |
| import posixpath |
| import time |
| import traceback |
| |
| from appengine_url_fetcher import AppEngineUrlFetcher |
| from appengine_wrappers import IsDownloadError, app_identity |
| from docs_server_utils import StringIdentity |
| from file_system import (FileNotFoundError, |
| FileSystem, |
| FileSystemError, |
| FileSystemThrottledError, |
| StatInfo) |
| from future import All, Future |
| from path_util import AssertIsValid, IsDirectory, ToDirectory |
| from third_party.json_schema_compiler.memoize import memoize |
| from url_constants import (GITILES_BASE, |
| GITILES_SRC_ROOT, |
| GITILES_BRANCHES_PATH, |
| GITILES_OAUTH2_SCOPE) |
| |
| |
| _JSON_FORMAT = '?format=JSON' |
| _TEXT_FORMAT = '?format=TEXT' |
| _AUTH_PATH_PREFIX = '/a' |
| |
| |
| def _ParseGitilesJson(json_data): |
| '''json.loads with fix-up for non-executable JSON. Use this to parse any JSON |
| data coming from Gitiles views. |
| ''' |
| return json.loads(json_data[json_data.find('{'):]) |
| |
| |
| def _CreateStatInfo(json_data): |
| '''Returns a StatInfo object comprised of the tree ID for |json_data|, |
| as well as the tree IDs for the entries in |json_data|. |
| ''' |
| tree = _ParseGitilesJson(json_data) |
| return StatInfo(tree['id'], |
| dict((e['name'], e['id']) for e in tree['entries'])) |
| |
| |
| class GitilesFileSystem(FileSystem): |
| '''Class to fetch filesystem data from the Chromium project's gitiles |
| service. |
| ''' |
| _logged_tokens = set() |
| |
| @classmethod |
| def Create(cls, branch='master', commit=None): |
| token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE) |
| |
| # Log the access token (once per token) so that it can be sneakily re-used |
| # in development. |
| if token not in cls._logged_tokens: |
| logging.info('Got token %s for scope %s' % (token, GITILES_OAUTH2_SCOPE)) |
| cls._logged_tokens.add(token) |
| |
| path_prefix = '' if token is None else _AUTH_PATH_PREFIX |
| if commit: |
| base_url = '%s%s/%s/%s' % ( |
| GITILES_BASE, path_prefix, GITILES_SRC_ROOT, commit) |
| elif branch is 'master': |
| base_url = '%s%s/%s/master' % ( |
| GITILES_BASE, path_prefix, GITILES_SRC_ROOT) |
| else: |
| base_url = '%s%s/%s/%s/%s' % ( |
| GITILES_BASE, path_prefix, GITILES_SRC_ROOT, |
| GITILES_BRANCHES_PATH, branch) |
| return GitilesFileSystem(AppEngineUrlFetcher(), base_url, branch, commit) |
| |
| def __init__(self, fetcher, base_url, branch, commit): |
| self._fetcher = fetcher |
| self._base_url = base_url |
| self._branch = branch |
| self._commit = commit |
| |
| def _FetchAsync(self, url): |
| '''Convenience wrapper for fetcher.FetchAsync, so callers don't |
| need to use posixpath.join. |
| ''' |
| AssertIsValid(url) |
| access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE) |
| return self._fetcher.FetchAsync('%s/%s' % (self._base_url, url), |
| access_token=access_token) |
| |
| def _ResolveFetchContent(self, path, fetch_future, skip_not_found=False): |
| '''Returns a future to cleanly resolve |fetch_future|. |
| ''' |
| def handle(e): |
| if skip_not_found and IsDownloadError(e): |
| return None |
| exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError |
| raise exc_type('%s fetching %s for Get from %s: %s' % |
| (type(e).__name__, path, self._base_url, traceback.format_exc())) |
| |
| def get_content(result): |
| if result.status_code == 404: |
| if skip_not_found: |
| return None |
| raise FileNotFoundError('Got 404 when fetching %s for Get from %s' % |
| (path, self._base_url)) |
| if result.status_code == 429: |
| logging.warning('Access throttled when fetching %s for Get from %s' % |
| (path, self._base_url)) |
| raise FileSystemThrottledError( |
| 'Access throttled when fetching %s for Get from %s' % |
| (path, self._base_url)) |
| if result.status_code != 200: |
| raise FileSystemError( |
| 'Got %s when fetching %s for Get from %s, content %s' % |
| (result.status_code, path, self._base_url, result.content)) |
| return result.content |
| |
| return fetch_future.Then(get_content, handle) |
| |
| def Read(self, paths, skip_not_found=False): |
| # Directory content is formatted in JSON in Gitiles as follows: |
| # |
| # { |
| # "id": "12a5464de48d2c46bc0b2dc78fafed75aab554fa", # The tree ID. |
| # "entries": [ |
| # { |
| # "mode": 33188, |
| # "type": "blob", |
| # "id": "ab971ca447bc4bce415ed4498369e00164d91cb6", # File ID. |
| # "name": ".gitignore" |
| # }, |
| # ... |
| # ] |
| # } |
| def list_dir(json_data): |
| entries = _ParseGitilesJson(json_data).get('entries', []) |
| return [e['name'] + ('/' if e['type'] == 'tree' else '') for e in entries] |
| |
| def fixup_url_format(path): |
| # By default, Gitiles URLs display resources in HTML. To get resources |
| # suitable for our consumption, a '?format=' string must be appended to |
| # the URL. The format may be one of 'JSON' or 'TEXT' for directory or |
| # text resources, respectively. |
| return path + (_JSON_FORMAT if IsDirectory(path) else _TEXT_FORMAT) |
| |
| # A list of tuples of the form (path, Future). |
| fetches = [(path, self._FetchAsync(fixup_url_format(path))) |
| for path in paths] |
| |
| def parse_contents(results): |
| value = {} |
| for path, content in izip(paths, results): |
| if content is None: |
| continue |
| # Gitiles encodes text content in base64 (see |
| # http://tools.ietf.org/html/rfc4648 for info about base64). |
| value[path] = (list_dir if IsDirectory(path) else b64decode)(content) |
| return value |
| |
| return All(self._ResolveFetchContent(path, future, skip_not_found) |
| for path, future in fetches).Then(parse_contents) |
| |
| def Refresh(self): |
| return Future(value=()) |
| |
| @memoize |
| def _GetCommitInfo(self, key): |
| '''Gets the commit information specified by |key|. |
| |
| The JSON view for commit info looks like: |
| { |
| "commit": "8fd578e1a7b142cd10a4387861f05fb9459b69e2", # Commit ID. |
| "tree": "3ade65d8a91eadd009a6c9feea8f87db2c528a53", # Tree ID. |
| "parents": [ |
| "a477c787fe847ae0482329f69b39ce0fde047359" # Previous commit ID. |
| ], |
| "author": { |
| "name": "...", |
| "email": "...", |
| "time": "Tue Aug 12 17:17:21 2014" |
| }, |
| "committer": { |
| "name": "...", |
| "email": "...", |
| "time": "Tue Aug 12 17:18:28 2014" |
| }, |
| "message": "...", |
| "tree_diff": [...] |
| } |
| ''' |
| # Commit information for a branch is obtained by appending '?format=JSON' |
| # to the branch URL. Note that '<gitiles_url>/<branch>?format=JSON' is |
| # different from '<gitiles_url>/<branch>/?format=JSON': the latter serves |
| # the root directory JSON content, whereas the former serves the branch |
| # commit info JSON content. |
| |
| access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE) |
| fetch_future = self._fetcher.FetchAsync(self._base_url + _JSON_FORMAT, |
| access_token=access_token) |
| content_future = self._ResolveFetchContent(self._base_url, fetch_future) |
| return content_future.Then(lambda json: _ParseGitilesJson(json)[key]) |
| |
| def GetCommitID(self): |
| '''Returns a future that resolves to the commit ID for this branch. |
| ''' |
| return self._GetCommitInfo('commit') |
| |
| def GetPreviousCommitID(self): |
| '''Returns a future that resolves to the previous commit ID for this branch. |
| ''' |
| return self._GetCommitInfo('parents').Then(lambda parents: parents[0]) |
| |
| def StatAsync(self, path): |
| dir_, filename = posixpath.split(path) |
| def stat(content): |
| stat_info = _CreateStatInfo(content) |
| if stat_info.version is None: |
| raise FileSystemError('Failed to find version of dir %s' % dir_) |
| if IsDirectory(path): |
| return stat_info |
| if filename not in stat_info.child_versions: |
| raise FileNotFoundError( |
| '%s from %s was not in child versions for Stat' % (filename, path)) |
| return StatInfo(stat_info.child_versions[filename]) |
| |
| fetch_future = self._FetchAsync(ToDirectory(dir_) + _JSON_FORMAT) |
| return self._ResolveFetchContent(path, fetch_future).Then(stat) |
| |
| def GetIdentity(self): |
| if self._branch == 'master': |
| # A master FS always carries the same identity even if pinned to a commit. |
| str_id = 'master' |
| elif self._commit is not None: |
| str_id = self._commit |
| else: |
| str_id = '%s/%s' % (GITILES_BRANCHES_PATH, self._branch) |
| return '@'.join((self.__class__.__name__, StringIdentity( |
| '%s/%s/%s' % (GITILES_BASE, GITILES_SRC_ROOT, str_id)))) |
| |
| def GetVersion(self): |
| return self._commit |