| from __future__ import absolute_import |
| import collections |
| import functools |
| import logging |
| import warnings |
| |
| from ._collections import RecentlyUsedContainer |
| from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool |
| from .connectionpool import port_by_scheme |
| from .exceptions import ( |
| LocationValueError, |
| MaxRetryError, |
| ProxySchemeUnknown, |
| InvalidProxyConfigurationWarning, |
| ) |
| from .packages import six |
| from .packages.six.moves.urllib.parse import urljoin |
| from .request import RequestMethods |
| from .util.url import parse_url |
| from .util.retry import Retry |
| |
| |
| __all__ = ["PoolManager", "ProxyManager", "proxy_from_url"] |
| |
| |
| log = logging.getLogger(__name__) |
| |
| SSL_KEYWORDS = ( |
| "key_file", |
| "cert_file", |
| "cert_reqs", |
| "ca_certs", |
| "ssl_version", |
| "ca_cert_dir", |
| "ssl_context", |
| "key_password", |
| ) |
| |
| # All known keyword arguments that could be provided to the pool manager, its |
| # pools, or the underlying connections. This is used to construct a pool key. |
| _key_fields = ( |
| "key_scheme", # str |
| "key_host", # str |
| "key_port", # int |
| "key_timeout", # int or float or Timeout |
| "key_retries", # int or Retry |
| "key_strict", # bool |
| "key_block", # bool |
| "key_source_address", # str |
| "key_key_file", # str |
| "key_key_password", # str |
| "key_cert_file", # str |
| "key_cert_reqs", # str |
| "key_ca_certs", # str |
| "key_ssl_version", # str |
| "key_ca_cert_dir", # str |
| "key_ssl_context", # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext |
| "key_maxsize", # int |
| "key_headers", # dict |
| "key__proxy", # parsed proxy url |
| "key__proxy_headers", # dict |
| "key_socket_options", # list of (level (int), optname (int), value (int or str)) tuples |
| "key__socks_options", # dict |
| "key_assert_hostname", # bool or string |
| "key_assert_fingerprint", # str |
| "key_server_hostname", # str |
| ) |
| |
| #: The namedtuple class used to construct keys for the connection pool. |
| #: All custom key schemes should include the fields in this key at a minimum. |
| PoolKey = collections.namedtuple("PoolKey", _key_fields) |
| |
| |
| def _default_key_normalizer(key_class, request_context): |
| """ |
| Create a pool key out of a request context dictionary. |
| |
| According to RFC 3986, both the scheme and host are case-insensitive. |
| Therefore, this function normalizes both before constructing the pool |
| key for an HTTPS request. If you wish to change this behaviour, provide |
| alternate callables to ``key_fn_by_scheme``. |
| |
| :param key_class: |
| The class to use when constructing the key. This should be a namedtuple |
| with the ``scheme`` and ``host`` keys at a minimum. |
| :type key_class: namedtuple |
| :param request_context: |
| A dictionary-like object that contain the context for a request. |
| :type request_context: dict |
| |
| :return: A namedtuple that can be used as a connection pool key. |
| :rtype: PoolKey |
| """ |
| # Since we mutate the dictionary, make a copy first |
| context = request_context.copy() |
| context["scheme"] = context["scheme"].lower() |
| context["host"] = context["host"].lower() |
| |
| # These are both dictionaries and need to be transformed into frozensets |
| for key in ("headers", "_proxy_headers", "_socks_options"): |
| if key in context and context[key] is not None: |
| context[key] = frozenset(context[key].items()) |
| |
| # The socket_options key may be a list and needs to be transformed into a |
| # tuple. |
| socket_opts = context.get("socket_options") |
| if socket_opts is not None: |
| context["socket_options"] = tuple(socket_opts) |
| |
| # Map the kwargs to the names in the namedtuple - this is necessary since |
| # namedtuples can't have fields starting with '_'. |
| for key in list(context.keys()): |
| context["key_" + key] = context.pop(key) |
| |
| # Default to ``None`` for keys missing from the context |
| for field in key_class._fields: |
| if field not in context: |
| context[field] = None |
| |
| return key_class(**context) |
| |
| |
| #: A dictionary that maps a scheme to a callable that creates a pool key. |
| #: This can be used to alter the way pool keys are constructed, if desired. |
| #: Each PoolManager makes a copy of this dictionary so they can be configured |
| #: globally here, or individually on the instance. |
| key_fn_by_scheme = { |
| "http": functools.partial(_default_key_normalizer, PoolKey), |
| "https": functools.partial(_default_key_normalizer, PoolKey), |
| } |
| |
| pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool} |
| |
| |
| class PoolManager(RequestMethods): |
| """ |
| Allows for arbitrary requests while transparently keeping track of |
| necessary connection pools for you. |
| |
| :param num_pools: |
| Number of connection pools to cache before discarding the least |
| recently used pool. |
| |
| :param headers: |
| Headers to include with all requests, unless other headers are given |
| explicitly. |
| |
| :param \\**connection_pool_kw: |
| Additional parameters are used to create fresh |
| :class:`urllib3.connectionpool.ConnectionPool` instances. |
| |
| Example:: |
| |
| >>> manager = PoolManager(num_pools=2) |
| >>> r = manager.request('GET', 'http://google.com/') |
| >>> r = manager.request('GET', 'http://google.com/mail') |
| >>> r = manager.request('GET', 'http://yahoo.com/') |
| >>> len(manager.pools) |
| 2 |
| |
| """ |
| |
| proxy = None |
| |
| def __init__(self, num_pools=10, headers=None, **connection_pool_kw): |
| RequestMethods.__init__(self, headers) |
| self.connection_pool_kw = connection_pool_kw |
| self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) |
| |
| # Locally set the pool classes and keys so other PoolManagers can |
| # override them. |
| self.pool_classes_by_scheme = pool_classes_by_scheme |
| self.key_fn_by_scheme = key_fn_by_scheme.copy() |
| |
| def __enter__(self): |
| return self |
| |
| def __exit__(self, exc_type, exc_val, exc_tb): |
| self.clear() |
| # Return False to re-raise any potential exceptions |
| return False |
| |
| def _new_pool(self, scheme, host, port, request_context=None): |
| """ |
| Create a new :class:`ConnectionPool` based on host, port, scheme, and |
| any additional pool keyword arguments. |
| |
| If ``request_context`` is provided, it is provided as keyword arguments |
| to the pool class used. This method is used to actually create the |
| connection pools handed out by :meth:`connection_from_url` and |
| companion methods. It is intended to be overridden for customization. |
| """ |
| pool_cls = self.pool_classes_by_scheme[scheme] |
| if request_context is None: |
| request_context = self.connection_pool_kw.copy() |
| |
| # Although the context has everything necessary to create the pool, |
| # this function has historically only used the scheme, host, and port |
| # in the positional args. When an API change is acceptable these can |
| # be removed. |
| for key in ("scheme", "host", "port"): |
| request_context.pop(key, None) |
| |
| if scheme == "http": |
| for kw in SSL_KEYWORDS: |
| request_context.pop(kw, None) |
| |
| return pool_cls(host, port, **request_context) |
| |
| def clear(self): |
| """ |
| Empty our store of pools and direct them all to close. |
| |
| This will not affect in-flight connections, but they will not be |
| re-used after completion. |
| """ |
| self.pools.clear() |
| |
| def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): |
| """ |
| Get a :class:`ConnectionPool` based on the host, port, and scheme. |
| |
| If ``port`` isn't given, it will be derived from the ``scheme`` using |
| ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is |
| provided, it is merged with the instance's ``connection_pool_kw`` |
| variable and used to create the new connection pool, if one is |
| needed. |
| """ |
| |
| if not host: |
| raise LocationValueError("No host specified.") |
| |
| request_context = self._merge_pool_kwargs(pool_kwargs) |
| request_context["scheme"] = scheme or "http" |
| if not port: |
| port = port_by_scheme.get(request_context["scheme"].lower(), 80) |
| request_context["port"] = port |
| request_context["host"] = host |
| |
| return self.connection_from_context(request_context) |
| |
| def connection_from_context(self, request_context): |
| """ |
| Get a :class:`ConnectionPool` based on the request context. |
| |
| ``request_context`` must at least contain the ``scheme`` key and its |
| value must be a key in ``key_fn_by_scheme`` instance variable. |
| """ |
| scheme = request_context["scheme"].lower() |
| pool_key_constructor = self.key_fn_by_scheme[scheme] |
| pool_key = pool_key_constructor(request_context) |
| |
| return self.connection_from_pool_key(pool_key, request_context=request_context) |
| |
| def connection_from_pool_key(self, pool_key, request_context=None): |
| """ |
| Get a :class:`ConnectionPool` based on the provided pool key. |
| |
| ``pool_key`` should be a namedtuple that only contains immutable |
| objects. At a minimum it must have the ``scheme``, ``host``, and |
| ``port`` fields. |
| """ |
| with self.pools.lock: |
| # If the scheme, host, or port doesn't match existing open |
| # connections, open a new ConnectionPool. |
| pool = self.pools.get(pool_key) |
| if pool: |
| return pool |
| |
| # Make a fresh ConnectionPool of the desired type |
| scheme = request_context["scheme"] |
| host = request_context["host"] |
| port = request_context["port"] |
| pool = self._new_pool(scheme, host, port, request_context=request_context) |
| self.pools[pool_key] = pool |
| |
| return pool |
| |
| def connection_from_url(self, url, pool_kwargs=None): |
| """ |
| Similar to :func:`urllib3.connectionpool.connection_from_url`. |
| |
| If ``pool_kwargs`` is not provided and a new pool needs to be |
| constructed, ``self.connection_pool_kw`` is used to initialize |
| the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs`` |
| is provided, it is used instead. Note that if a new pool does not |
| need to be created for the request, the provided ``pool_kwargs`` are |
| not used. |
| """ |
| u = parse_url(url) |
| return self.connection_from_host( |
| u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs |
| ) |
| |
| def _merge_pool_kwargs(self, override): |
| """ |
| Merge a dictionary of override values for self.connection_pool_kw. |
| |
| This does not modify self.connection_pool_kw and returns a new dict. |
| Any keys in the override dictionary with a value of ``None`` are |
| removed from the merged dictionary. |
| """ |
| base_pool_kwargs = self.connection_pool_kw.copy() |
| if override: |
| for key, value in override.items(): |
| if value is None: |
| try: |
| del base_pool_kwargs[key] |
| except KeyError: |
| pass |
| else: |
| base_pool_kwargs[key] = value |
| return base_pool_kwargs |
| |
| def urlopen(self, method, url, redirect=True, **kw): |
| """ |
| Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` |
| with custom cross-host redirect logic and only sends the request-uri |
| portion of the ``url``. |
| |
| The given ``url`` parameter must be absolute, such that an appropriate |
| :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. |
| """ |
| u = parse_url(url) |
| conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) |
| |
| kw["assert_same_host"] = False |
| kw["redirect"] = False |
| |
| if "headers" not in kw: |
| kw["headers"] = self.headers.copy() |
| |
| if self.proxy is not None and u.scheme == "http": |
| response = conn.urlopen(method, url, **kw) |
| else: |
| response = conn.urlopen(method, u.request_uri, **kw) |
| |
| redirect_location = redirect and response.get_redirect_location() |
| if not redirect_location: |
| return response |
| |
| # Support relative URLs for redirecting. |
| redirect_location = urljoin(url, redirect_location) |
| |
| # RFC 7231, Section 6.4.4 |
| if response.status == 303: |
| method = "GET" |
| |
| retries = kw.get("retries") |
| if not isinstance(retries, Retry): |
| retries = Retry.from_int(retries, redirect=redirect) |
| |
| # Strip headers marked as unsafe to forward to the redirected location. |
| # Check remove_headers_on_redirect to avoid a potential network call within |
| # conn.is_same_host() which may use socket.gethostbyname() in the future. |
| if retries.remove_headers_on_redirect and not conn.is_same_host( |
| redirect_location |
| ): |
| headers = list(six.iterkeys(kw["headers"])) |
| for header in headers: |
| if header.lower() in retries.remove_headers_on_redirect: |
| kw["headers"].pop(header, None) |
| |
| try: |
| retries = retries.increment(method, url, response=response, _pool=conn) |
| except MaxRetryError: |
| if retries.raise_on_redirect: |
| response.drain_conn() |
| raise |
| return response |
| |
| kw["retries"] = retries |
| kw["redirect"] = redirect |
| |
| log.info("Redirecting %s -> %s", url, redirect_location) |
| |
| response.drain_conn() |
| return self.urlopen(method, redirect_location, **kw) |
| |
| |
| class ProxyManager(PoolManager): |
| """ |
| Behaves just like :class:`PoolManager`, but sends all requests through |
| the defined proxy, using the CONNECT method for HTTPS URLs. |
| |
| :param proxy_url: |
| The URL of the proxy to be used. |
| |
| :param proxy_headers: |
| A dictionary containing headers that will be sent to the proxy. In case |
| of HTTP they are being sent with each request, while in the |
| HTTPS/CONNECT case they are sent only once. Could be used for proxy |
| authentication. |
| |
| Example: |
| >>> proxy = urllib3.ProxyManager('http://localhost:3128/') |
| >>> r1 = proxy.request('GET', 'http://google.com/') |
| >>> r2 = proxy.request('GET', 'http://httpbin.org/') |
| >>> len(proxy.pools) |
| 1 |
| >>> r3 = proxy.request('GET', 'https://httpbin.org/') |
| >>> r4 = proxy.request('GET', 'https://twitter.com/') |
| >>> len(proxy.pools) |
| 3 |
| |
| """ |
| |
| def __init__( |
| self, |
| proxy_url, |
| num_pools=10, |
| headers=None, |
| proxy_headers=None, |
| **connection_pool_kw |
| ): |
| |
| if isinstance(proxy_url, HTTPConnectionPool): |
| proxy_url = "%s://%s:%i" % ( |
| proxy_url.scheme, |
| proxy_url.host, |
| proxy_url.port, |
| ) |
| proxy = parse_url(proxy_url) |
| if not proxy.port: |
| port = port_by_scheme.get(proxy.scheme, 80) |
| proxy = proxy._replace(port=port) |
| |
| if proxy.scheme not in ("http", "https"): |
| raise ProxySchemeUnknown(proxy.scheme) |
| |
| self.proxy = proxy |
| self.proxy_headers = proxy_headers or {} |
| |
| connection_pool_kw["_proxy"] = self.proxy |
| connection_pool_kw["_proxy_headers"] = self.proxy_headers |
| |
| super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw) |
| |
| def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): |
| if scheme == "https": |
| return super(ProxyManager, self).connection_from_host( |
| host, port, scheme, pool_kwargs=pool_kwargs |
| ) |
| |
| return super(ProxyManager, self).connection_from_host( |
| self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs |
| ) |
| |
| def _set_proxy_headers(self, url, headers=None): |
| """ |
| Sets headers needed by proxies: specifically, the Accept and Host |
| headers. Only sets headers not provided by the user. |
| """ |
| headers_ = {"Accept": "*/*"} |
| |
| netloc = parse_url(url).netloc |
| if netloc: |
| headers_["Host"] = netloc |
| |
| if headers: |
| headers_.update(headers) |
| return headers_ |
| |
| def _validate_proxy_scheme_url_selection(self, url_scheme): |
| if url_scheme == "https" and self.proxy.scheme == "https": |
| warnings.warn( |
| "Your proxy configuration specified an HTTPS scheme for the proxy. " |
| "Are you sure you want to use HTTPS to contact the proxy? " |
| "This most likely indicates an error in your configuration. " |
| "Read this issue for more info: " |
| "https://github.com/urllib3/urllib3/issues/1850", |
| InvalidProxyConfigurationWarning, |
| stacklevel=3, |
| ) |
| |
| def urlopen(self, method, url, redirect=True, **kw): |
| "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." |
| u = parse_url(url) |
| self._validate_proxy_scheme_url_selection(u.scheme) |
| |
| if u.scheme == "http": |
| # For proxied HTTPS requests, httplib sets the necessary headers |
| # on the CONNECT to the proxy. For HTTP, we'll definitely |
| # need to set 'Host' at the very least. |
| headers = kw.get("headers", self.headers) |
| kw["headers"] = self._set_proxy_headers(url, headers) |
| |
| return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) |
| |
| |
| def proxy_from_url(url, **kw): |
| return ProxyManager(proxy_url=url, **kw) |