blob: d68c6e5114f21036f41d6d7dc05073f1e6b67ec9 [file] [log] [blame]
# Copyright 2014 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""An OAuth2 client library.
This library provides a client implementation of the OAuth2 protocol (see
https://developers.google.com/storage/docs/authentication.html#oauth).
**** Experimental API ****
This module is experimental and is subject to modification or removal without
notice.
"""
# This implementation is a wrapper around the oauth2client implementation
# that implements caching of access tokens independent of refresh
# tokens (in the python API client oauth2client, there is a single class that
# encapsulates both refresh and access tokens).
from __future__ import absolute_import
import cgi
import datetime
import errno
from hashlib import sha1
import json
import logging
import os
import socket
import tempfile
import threading
import urllib
if os.environ.get('USER_AGENT'):
import boto
boto.UserAgent += os.environ.get('USER_AGENT')
from boto import config
import httplib2
from oauth2client import service_account
from oauth2client.client import AccessTokenRefreshError
from oauth2client.client import Credentials
from oauth2client.client import EXPIRY_FORMAT
from oauth2client.client import HAS_CRYPTO
from oauth2client.client import OAuth2Credentials
from retry_decorator.retry_decorator import retry as Retry
import socks
if HAS_CRYPTO:
from oauth2client.client import SignedJwtAssertionCredentials
LOG = logging.getLogger('oauth2_client')
# Lock used for checking/exchanging refresh token, so multithreaded
# operation doesn't attempt concurrent refreshes.
token_exchange_lock = threading.Lock()
DEFAULT_SCOPE = 'https://www.googleapis.com/auth/devstorage.full_control'
METADATA_SERVER = 'http://metadata.google.internal'
META_TOKEN_URI = (METADATA_SERVER + '/computeMetadata/v1/instance/'
'service-accounts/default/token')
META_HEADERS = {
'X-Google-Metadata-Request': 'True'
}
# Note: this is copied from gsutil's gslib.cred_types. It should be kept in
# sync. Also note that this library does not use HMAC, but it's preserved from
# gsutil's copy to maintain compatibility.
class CredTypes(object):
HMAC = "HMAC"
OAUTH2_SERVICE_ACCOUNT = "OAuth 2.0 Service Account"
OAUTH2_USER_ACCOUNT = "Oauth 2.0 User Account"
GCE = "GCE"
class Error(Exception):
"""Base exception for the OAuth2 module."""
pass
class AuthorizationCodeExchangeError(Error):
"""Error trying to exchange an authorization code into a refresh token."""
pass
class TokenCache(object):
"""Interface for OAuth2 token caches."""
def PutToken(self, key, value):
raise NotImplementedError
def GetToken(self, key):
raise NotImplementedError
class NoopTokenCache(TokenCache):
"""A stub implementation of TokenCache that does nothing."""
def PutToken(self, key, value):
pass
def GetToken(self, key):
return None
class InMemoryTokenCache(TokenCache):
"""An in-memory token cache.
The cache is implemented by a python dict, and inherits the thread-safety
properties of dict.
"""
def __init__(self):
super(InMemoryTokenCache, self).__init__()
self.cache = dict()
def PutToken(self, key, value):
LOG.debug('InMemoryTokenCache.PutToken: key=%s', key)
self.cache[key] = value
def GetToken(self, key):
value = self.cache.get(key, None)
LOG.debug('InMemoryTokenCache.GetToken: key=%s%s present',
key, ' not' if value is None else '')
return value
class FileSystemTokenCache(TokenCache):
"""An implementation of a token cache that persists tokens on disk.
Each token object in the cache is stored in serialized form in a separate
file. The cache file's name can be configured via a path pattern that is
parameterized by the key under which a value is cached and optionally the
current processes uid as obtained by os.getuid().
Since file names are generally publicly visible in the system, it is important
that the cache key does not leak information about the token's value. If
client code computes cache keys from token values, a cryptographically strong
one-way function must be used.
"""
def __init__(self, path_pattern=None):
"""Creates a FileSystemTokenCache.
Args:
path_pattern: Optional string argument to specify the path pattern for
cache files. The argument should be a path with format placeholders
'%(key)s' and optionally '%(uid)s'. If the argument is omitted, the
default pattern
<tmpdir>/oauth2client-tokencache.%(uid)s.%(key)s
is used, where <tmpdir> is replaced with the system temp dir as
obtained from tempfile.gettempdir().
"""
super(FileSystemTokenCache, self).__init__()
self.path_pattern = path_pattern
if not path_pattern:
self.path_pattern = os.path.join(
tempfile.gettempdir(), 'oauth2_client-tokencache.%(uid)s.%(key)s')
def CacheFileName(self, key):
uid = '_'
try:
# os.getuid() doesn't seem to work in Windows
uid = str(os.getuid())
except:
pass
return self.path_pattern % {'key': key, 'uid': uid}
def PutToken(self, key, value):
"""Serializes the value to the key's filename.
To ensure that written tokens aren't leaked to a different users, we
a) unlink an existing cache file, if any (to ensure we don't fall victim
to symlink attacks and the like),
b) create a new file with O_CREAT | O_EXCL (to ensure nobody is trying to
race us)
If either of these steps fail, we simply give up (but log a warning). Not
caching access tokens is not catastrophic, and failure to create a file
can happen for either of the following reasons:
- someone is attacking us as above, in which case we want to default to
safe operation (not write the token);
- another legitimate process is racing us; in this case one of the two
will win and write the access token, which is fine;
- we don't have permission to remove the old file or write to the
specified directory, in which case we can't recover
Args:
key: the hash key to store.
value: the access_token value to serialize.
"""
cache_file = self.CacheFileName(key)
LOG.debug('FileSystemTokenCache.PutToken: key=%s, cache_file=%s',
key, cache_file)
try:
os.unlink(cache_file)
except:
# Ignore failure to unlink the file; if the file exists and can't be
# unlinked, the subsequent open with O_CREAT | O_EXCL will fail.
pass
flags = os.O_RDWR | os.O_CREAT | os.O_EXCL
# Accommodate Windows; stolen from python2.6/tempfile.py.
if hasattr(os, 'O_NOINHERIT'):
flags |= os.O_NOINHERIT
if hasattr(os, 'O_BINARY'):
flags |= os.O_BINARY
try:
fd = os.open(cache_file, flags, 0600)
except (OSError, IOError) as e:
LOG.warning('FileSystemTokenCache.PutToken: '
'Failed to create cache file %s: %s', cache_file, e)
return
f = os.fdopen(fd, 'w+b')
f.write(value.Serialize())
f.close()
def GetToken(self, key):
"""Returns a deserialized access token from the key's filename."""
value = None
cache_file = self.CacheFileName(key)
try:
f = open(cache_file)
value = AccessToken.UnSerialize(f.read())
f.close()
except (IOError, OSError) as e:
if e.errno != errno.ENOENT:
LOG.warning('FileSystemTokenCache.GetToken: '
'Failed to read cache file %s: %s', cache_file, e)
except Exception as e:
LOG.warning('FileSystemTokenCache.GetToken: '
'Failed to read cache file %s (possibly corrupted): %s',
cache_file, e)
LOG.debug('FileSystemTokenCache.GetToken: key=%s%s present (cache_file=%s)',
key, ' not' if value is None else '', cache_file)
return value
class OAuth2Client(object):
"""Common logic for OAuth2 clients."""
def __init__(self, cache_key_base, access_token_cache=None,
datetime_strategy=datetime.datetime, auth_uri=None,
token_uri=None, disable_ssl_certificate_validation=False,
proxy_host=None, proxy_port=None, proxy_user=None,
proxy_pass=None, ca_certs_file=None):
# datetime_strategy is used to invoke utcnow() on; it is injected into the
# constructor for unit testing purposes.
self.auth_uri = auth_uri
self.token_uri = token_uri
self.cache_key_base = cache_key_base
self.datetime_strategy = datetime_strategy
self.access_token_cache = access_token_cache or InMemoryTokenCache()
self.disable_ssl_certificate_validation = disable_ssl_certificate_validation
self.ca_certs_file = ca_certs_file
if proxy_host and proxy_port:
self._proxy_info = httplib2.ProxyInfo(socks.PROXY_TYPE_HTTP,
proxy_host,
proxy_port,
proxy_user=proxy_user,
proxy_pass=proxy_pass,
proxy_rdns=True)
else:
self._proxy_info = None
def CreateHttpRequest(self):
return httplib2.Http(
ca_certs=self.ca_certs_file,
disable_ssl_certificate_validation=(
self.disable_ssl_certificate_validation),
proxy_info=self._proxy_info)
def GetAccessToken(self):
"""Obtains an access token for this client.
This client's access token cache is first checked for an existing,
not-yet-expired access token. If none is found, the client obtains a fresh
access token from the OAuth2 provider's token endpoint.
Returns:
The cached or freshly obtained AccessToken.
Raises:
AccessTokenRefreshError if an error occurs.
"""
# Ensure only one thread at a time attempts to get (and possibly refresh)
# the access token. This doesn't prevent concurrent refresh attempts across
# multiple gsutil instances, but at least protects against multiple threads
# simultaneously attempting to refresh when gsutil -m is used.
token_exchange_lock.acquire()
try:
cache_key = self.CacheKey()
LOG.debug('GetAccessToken: checking cache for key %s', cache_key)
access_token = self.access_token_cache.GetToken(cache_key)
LOG.debug('GetAccessToken: token from cache: %s', access_token)
if access_token is None or access_token.ShouldRefresh():
LOG.debug('GetAccessToken: fetching fresh access token...')
access_token = self.FetchAccessToken()
LOG.debug('GetAccessToken: fresh access token: %s', access_token)
self.access_token_cache.PutToken(cache_key, access_token)
return access_token
finally:
token_exchange_lock.release()
def CacheKey(self):
"""Computes a cache key.
The cache key is computed as the SHA1 hash of the refresh token for user
accounts, or the hash of the gs_service_client_id for service accounts,
which satisfies the FileSystemTokenCache requirement that cache keys do not
leak information about token values.
Returns:
A hash key.
"""
h = sha1()
h.update(self.cache_key_base)
return h.hexdigest()
def GetAuthorizationHeader(self):
"""Gets the access token HTTP authorization header value.
Returns:
The value of an Authorization HTTP header that authenticates
requests with an OAuth2 access token.
"""
return 'Bearer %s' % self.GetAccessToken().token
class _BaseOAuth2ServiceAccountClient(OAuth2Client):
"""Base class for OAuth2ServiceAccountClients.
Args:
client_id: The OAuth2 client ID of this client.
access_token_cache: An optional instance of a TokenCache. If omitted or
None, an InMemoryTokenCache is used.
auth_uri: The URI for OAuth2 authorization.
token_uri: The URI used to refresh access tokens.
datetime_strategy: datetime module strategy to use.
disable_ssl_certificate_validation: True if certifications should not be
validated.
proxy_host: An optional string specifying the host name of an HTTP proxy
to be used.
proxy_port: An optional int specifying the port number of an HTTP proxy
to be used.
proxy_user: An optional string specifying the user name for interacting
with the HTTP proxy.
proxy_pass: An optional string specifying the password for interacting
with the HTTP proxy.
ca_certs_file: The cacerts.txt file to use.
"""
def __init__(self, client_id, access_token_cache=None, auth_uri=None,
token_uri=None, datetime_strategy=datetime.datetime,
disable_ssl_certificate_validation=False,
proxy_host=None, proxy_port=None, proxy_user=None,
proxy_pass=None, ca_certs_file=None):
super(_BaseOAuth2ServiceAccountClient, self).__init__(
cache_key_base=client_id, auth_uri=auth_uri, token_uri=token_uri,
access_token_cache=access_token_cache,
datetime_strategy=datetime_strategy,
disable_ssl_certificate_validation=disable_ssl_certificate_validation,
proxy_host=proxy_host, proxy_port=proxy_port, proxy_user=proxy_user,
proxy_pass=proxy_pass, ca_certs_file=ca_certs_file)
self._client_id = client_id
def FetchAccessToken(self):
credentials = self.GetCredentials()
http = self.CreateHttpRequest()
credentials.refresh(http)
return AccessToken(credentials.access_token, credentials.token_expiry,
datetime_strategy=self.datetime_strategy)
class OAuth2ServiceAccountClient(_BaseOAuth2ServiceAccountClient):
"""An OAuth2 service account client using .p12 or .pem keys."""
def __init__(self, client_id, private_key, password,
access_token_cache=None, auth_uri=None, token_uri=None,
datetime_strategy=datetime.datetime,
disable_ssl_certificate_validation=False,
proxy_host=None, proxy_port=None, proxy_user=None,
proxy_pass=None, ca_certs_file=None):
# Avoid long repeated kwargs list.
# pylint: disable=g-doc-args
"""Creates an OAuth2ServiceAccountClient.
Args:
client_id: The OAuth2 client ID of this client.
private_key: The private key associated with this service account.
password: The private key password used for the crypto signer.
Keyword arguments match the _BaseOAuth2ServiceAccountClient class.
"""
# pylint: enable=g-doc-args
super(OAuth2ServiceAccountClient, self).__init__(
client_id, auth_uri=auth_uri, token_uri=token_uri,
access_token_cache=access_token_cache,
datetime_strategy=datetime_strategy,
disable_ssl_certificate_validation=disable_ssl_certificate_validation,
proxy_host=proxy_host, proxy_port=proxy_port, proxy_user=proxy_user,
proxy_pass=proxy_pass, ca_certs_file=ca_certs_file)
self._private_key = private_key
self._password = password
def GetCredentials(self):
if HAS_CRYPTO:
return SignedJwtAssertionCredentials(
self._client_id, self._private_key, scope=DEFAULT_SCOPE,
private_key_password=self._password)
else:
raise MissingDependencyError(
'Service account authentication requires PyOpenSSL. Please install '
'this library and try again.')
# TODO: oauth2client should expose _ServiceAccountCredentials as it is the only
# way to properly set scopes. In the longer term this class should probably
# be refactored into oauth2client directly in a way that allows for setting of
# user agent and scopes. https://github.com/google/oauth2client/issues/164
# pylint: disable=protected-access
class ServiceAccountCredentials(service_account._ServiceAccountCredentials):
def to_json(self):
self.service_account_name = self._service_account_email
strip = (['_private_key'] +
Credentials.NON_SERIALIZED_MEMBERS)
return super(ServiceAccountCredentials, self)._to_json(strip)
@classmethod
def from_json(cls, s):
try:
data = json.loads(s)
retval = ServiceAccountCredentials(
service_account_id=data['_service_account_id'],
service_account_email=data['_service_account_email'],
private_key_id=data['_private_key_id'],
private_key_pkcs8_text=data['_private_key_pkcs8_text'],
scopes=[DEFAULT_SCOPE])
# TODO: Need to define user agent here,
# but it is not known until runtime.
retval.invalid = data['invalid']
retval.access_token = data['access_token']
if 'token_expiry' in data:
retval.token_expiry = datetime.datetime.strptime(
data['token_expiry'], EXPIRY_FORMAT)
return retval
except KeyError, e:
raise Exception('Your JSON credentials are invalid; '
'missing required entry %s.' % e[0])
# pylint: enable=protected-access
class OAuth2JsonServiceAccountClient(_BaseOAuth2ServiceAccountClient):
"""An OAuth2 service account client using .json keys."""
def __init__(self, client_id, service_account_email, private_key_id,
private_key_pkcs8_text, access_token_cache=None, auth_uri=None,
token_uri=None, datetime_strategy=datetime.datetime,
disable_ssl_certificate_validation=False,
proxy_host=None, proxy_port=None, proxy_user=None,
proxy_pass=None, ca_certs_file=None):
# Avoid long repeated kwargs list.
# pylint: disable=g-doc-args
"""Creates an OAuth2JsonServiceAccountClient.
Args:
client_id: The OAuth2 client ID of this client.
client_email: The email associated with this client.
private_key_id: The private key id associated with this service account.
private_key_pkcs8_text: The pkcs8 text containing the private key data.
Keyword arguments match the _BaseOAuth2ServiceAccountClient class.
"""
# pylint: enable=g-doc-args
super(OAuth2JsonServiceAccountClient, self).__init__(
client_id, auth_uri=auth_uri, token_uri=token_uri,
access_token_cache=access_token_cache,
datetime_strategy=datetime_strategy,
disable_ssl_certificate_validation=disable_ssl_certificate_validation,
proxy_host=proxy_host, proxy_port=proxy_port, proxy_user=proxy_user,
proxy_pass=proxy_pass, ca_certs_file=ca_certs_file)
self._service_account_email = service_account_email
self._private_key_id = private_key_id
self._private_key_pkcs8_text = private_key_pkcs8_text
def GetCredentials(self):
return ServiceAccountCredentials(
service_account_id=self._client_id,
service_account_email=self._service_account_email,
private_key_id=self._private_key_id,
private_key_pkcs8_text=self._private_key_pkcs8_text,
scopes=[DEFAULT_SCOPE])
# TODO: Need to plumb user agent through here.
class GsAccessTokenRefreshError(Exception):
"""Transient error when requesting access token."""
def __init__(self, e):
super(Exception, self).__init__(e)
class GsInvalidRefreshTokenError(Exception):
def __init__(self, e):
super(Exception, self).__init__(e)
class MissingDependencyError(Exception):
def __init__(self, e):
super(Exception, self).__init__(e)
class OAuth2UserAccountClient(OAuth2Client):
"""An OAuth2 client."""
def __init__(self, token_uri, client_id, client_secret, refresh_token,
auth_uri=None, access_token_cache=None,
datetime_strategy=datetime.datetime,
disable_ssl_certificate_validation=False,
proxy_host=None, proxy_port=None, proxy_user=None,
proxy_pass=None, ca_certs_file=None):
"""Creates an OAuth2UserAccountClient.
Args:
token_uri: The URI used to refresh access tokens.
client_id: The OAuth2 client ID of this client.
client_secret: The OAuth2 client secret of this client.
refresh_token: The token used to refresh the access token.
auth_uri: The URI for OAuth2 authorization.
access_token_cache: An optional instance of a TokenCache. If omitted or
None, an InMemoryTokenCache is used.
datetime_strategy: datetime module strategy to use.
disable_ssl_certificate_validation: True if certifications should not be
validated.
proxy_host: An optional string specifying the host name of an HTTP proxy
to be used.
proxy_port: An optional int specifying the port number of an HTTP proxy
to be used.
proxy_user: An optional string specifying the user name for interacting
with the HTTP proxy.
proxy_pass: An optional string specifying the password for interacting
with the HTTP proxy.
ca_certs_file: The cacerts.txt file to use.
"""
super(OAuth2UserAccountClient, self).__init__(
cache_key_base=refresh_token, auth_uri=auth_uri, token_uri=token_uri,
access_token_cache=access_token_cache,
datetime_strategy=datetime_strategy,
disable_ssl_certificate_validation=disable_ssl_certificate_validation,
proxy_host=proxy_host, proxy_port=proxy_port, proxy_user=proxy_user,
proxy_pass=proxy_pass, ca_certs_file=ca_certs_file)
self.token_uri = token_uri
self.client_id = client_id
self.client_secret = client_secret
self.refresh_token = refresh_token
def GetCredentials(self):
"""Fetches a credentials objects from the provider's token endpoint."""
access_token = self.GetAccessToken()
credentials = OAuth2Credentials(
access_token.token, self.client_id, self.client_secret,
self.refresh_token, access_token.expiry, self.token_uri, None)
return credentials
@Retry(GsAccessTokenRefreshError,
tries=config.get('OAuth2', 'oauth2_refresh_retries', 6),
timeout_secs=1)
def FetchAccessToken(self):
"""Fetches an access token from the provider's token endpoint.
Fetches an access token from this client's OAuth2 provider's token endpoint.
Returns:
The fetched AccessToken.
"""
try:
http = self.CreateHttpRequest()
credentials = OAuth2Credentials(None, self.client_id, self.client_secret,
self.refresh_token, None, self.token_uri, None)
credentials.refresh(http)
return AccessToken(credentials.access_token,
credentials.token_expiry, datetime_strategy=self.datetime_strategy)
except AccessTokenRefreshError, e:
if 'Invalid response 403' in e.message:
# This is the most we can do at the moment to accurately detect rate
# limiting errors since they come back as 403s with no further
# information.
raise GsAccessTokenRefreshError(e)
elif 'invalid_grant' in e.message:
LOG.info("""
Attempted to retrieve an access token from an invalid refresh token. Two common
cases in which you will see this error are:
1. Your refresh token was revoked.
2. Your refresh token was typed incorrectly.
""")
raise GsInvalidRefreshTokenError(e)
else:
raise
class OAuth2GCEClient(OAuth2Client):
"""OAuth2 client for GCE instance."""
def __init__(self):
super(OAuth2GCEClient, self).__init__(
cache_key_base='',
# Only InMemoryTokenCache can be used with empty cache_key_base.
access_token_cache=InMemoryTokenCache())
@Retry(GsAccessTokenRefreshError,
tries=6,
timeout_secs=1)
def FetchAccessToken(self):
response = None
try:
http = httplib2.Http()
response, content = http.request(META_TOKEN_URI, method='GET',
body=None, headers=META_HEADERS)
except Exception:
raise GsAccessTokenRefreshError()
if response.status == 200:
d = json.loads(content)
return AccessToken(
d['access_token'],
datetime.datetime.now() +
datetime.timedelta(seconds=d.get('expires_in', 0)),
datetime_strategy=self.datetime_strategy)
def _IsGCE():
try:
http = httplib2.Http()
response, _ = http.request(METADATA_SERVER)
return response.status == 200
except (httplib2.ServerNotFoundError, socket.error):
# We might see something like "No route to host" propagated as a socket
# error. We might also catch transient socket errors, but at that point
# we're going to fail anyway, just with a different error message. With
# this approach, we'll avoid having to enumerate all possible non-transient
# socket errors.
return False
except Exception, e:
LOG.warning("Failed to determine whether we're running on GCE, so we'll"
"assume that we aren't: %s", e)
return False
return False
def CreateOAuth2GCEClient():
return OAuth2GCEClient() if _IsGCE() else None
class AccessToken(object):
"""Encapsulates an OAuth2 access token."""
def __init__(self, token, expiry, datetime_strategy=datetime.datetime):
self.token = token
self.expiry = expiry
self.datetime_strategy = datetime_strategy
@staticmethod
def UnSerialize(query):
"""Creates an AccessToken object from its serialized form."""
def GetValue(d, key):
return (d.get(key, [None]))[0]
kv = cgi.parse_qs(query)
if not kv['token']:
return None
expiry = None
expiry_tuple = GetValue(kv, 'expiry')
if expiry_tuple:
try:
expiry = datetime.datetime(
*[int(n) for n in expiry_tuple.split(',')])
except:
return None
return AccessToken(GetValue(kv, 'token'), expiry)
def Serialize(self):
"""Serializes this object as URI-encoded key-value pairs."""
# There's got to be a better way to serialize a datetime. Unfortunately,
# there is no reliable way to convert into a unix epoch.
kv = {'token': self.token}
if self.expiry:
t = self.expiry
tupl = (t.year, t.month, t.day, t.hour, t.minute, t.second, t.microsecond)
kv['expiry'] = ','.join([str(i) for i in tupl])
return urllib.urlencode(kv)
def ShouldRefresh(self, time_delta=300):
"""Whether the access token needs to be refreshed.
Args:
time_delta: refresh access token when it expires within time_delta secs.
Returns:
True if the token is expired or about to expire, False if the
token should be expected to work. Note that the token may still
be rejected, e.g. if it has been revoked server-side.
"""
if self.expiry is None:
return False
return (self.datetime_strategy.utcnow()
+ datetime.timedelta(seconds=time_delta) > self.expiry)
def __eq__(self, other):
return self.token == other.token and self.expiry == other.expiry
def __ne__(self, other):
return not self.__eq__(other)
def __str__(self):
return 'AccessToken(token=%s, expiry=%sZ)' % (self.token, self.expiry)