| # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) |
| # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php |
| """ |
| An application that proxies WSGI requests to a remote server. |
| |
| TODO: |
| |
| * Send ``Via`` header? It's not clear to me this is a Via in the |
| style of a typical proxy. |
| |
| * Other headers or metadata? I put in X-Forwarded-For, but that's it. |
| |
| * Signed data of non-HTTP keys? This would be for things like |
| REMOTE_USER. |
| |
| * Something to indicate what the original URL was? The original host, |
| scheme, and base path. |
| |
| * Rewriting ``Location`` headers? mod_proxy does this. |
| |
| * Rewriting body? (Probably not on this one -- that can be done with |
| a different middleware that wraps this middleware) |
| |
| * Example:: |
| |
| use = egg:Paste#proxy |
| address = http://server3:8680/exist/rest/db/orgs/sch/config/ |
| allowed_request_methods = GET |
| |
| """ |
| |
| from six.moves import http_client as httplib |
| from six.moves.urllib import parse as urlparse |
| from six.moves.urllib.parse import quote |
| import six |
| |
| from paste import httpexceptions |
| from paste.util.converters import aslist |
| |
| # Remove these headers from response (specify lower case header |
| # names): |
| filtered_headers = ( |
| 'transfer-encoding', |
| 'connection', |
| 'keep-alive', |
| 'proxy-authenticate', |
| 'proxy-authorization', |
| 'te', |
| 'trailers', |
| 'upgrade', |
| ) |
| |
| class Proxy(object): |
| |
| def __init__(self, address, allowed_request_methods=(), |
| suppress_http_headers=()): |
| self.address = address |
| self.parsed = urlparse.urlsplit(address) |
| self.scheme = self.parsed[0].lower() |
| self.host = self.parsed[1] |
| self.path = self.parsed[2] |
| self.allowed_request_methods = [ |
| x.lower() for x in allowed_request_methods if x] |
| |
| self.suppress_http_headers = [ |
| x.lower() for x in suppress_http_headers if x] |
| |
| def __call__(self, environ, start_response): |
| if (self.allowed_request_methods and |
| environ['REQUEST_METHOD'].lower() not in self.allowed_request_methods): |
| return httpexceptions.HTTPBadRequest("Disallowed")(environ, start_response) |
| |
| if self.scheme == 'http': |
| ConnClass = httplib.HTTPConnection |
| elif self.scheme == 'https': |
| ConnClass = httplib.HTTPSConnection |
| else: |
| raise ValueError( |
| "Unknown scheme for %r: %r" % (self.address, self.scheme)) |
| conn = ConnClass(self.host) |
| headers = {} |
| for key, value in environ.items(): |
| if key.startswith('HTTP_'): |
| key = key[5:].lower().replace('_', '-') |
| if key == 'host' or key in self.suppress_http_headers: |
| continue |
| headers[key] = value |
| headers['host'] = self.host |
| if 'REMOTE_ADDR' in environ: |
| headers['x-forwarded-for'] = environ['REMOTE_ADDR'] |
| if environ.get('CONTENT_TYPE'): |
| headers['content-type'] = environ['CONTENT_TYPE'] |
| if environ.get('CONTENT_LENGTH'): |
| if environ['CONTENT_LENGTH'] == '-1': |
| # This is a special case, where the content length is basically undetermined |
| body = environ['wsgi.input'].read(-1) |
| headers['content-length'] = str(len(body)) |
| else: |
| headers['content-length'] = environ['CONTENT_LENGTH'] |
| length = int(environ['CONTENT_LENGTH']) |
| body = environ['wsgi.input'].read(length) |
| else: |
| body = '' |
| |
| path_info = quote(environ['PATH_INFO']) |
| if self.path: |
| request_path = path_info |
| if request_path and request_path[0] == '/': |
| request_path = request_path[1:] |
| |
| path = urlparse.urljoin(self.path, request_path) |
| else: |
| path = path_info |
| if environ.get('QUERY_STRING'): |
| path += '?' + environ['QUERY_STRING'] |
| |
| conn.request(environ['REQUEST_METHOD'], |
| path, |
| body, headers) |
| res = conn.getresponse() |
| headers_out = parse_headers(res.msg) |
| |
| status = '%s %s' % (res.status, res.reason) |
| start_response(status, headers_out) |
| # @@: Default? |
| length = res.getheader('content-length') |
| if length is not None: |
| body = res.read(int(length)) |
| else: |
| body = res.read() |
| conn.close() |
| return [body] |
| |
| def make_proxy(global_conf, address, allowed_request_methods="", |
| suppress_http_headers=""): |
| """ |
| Make a WSGI application that proxies to another address: |
| |
| ``address`` |
| the full URL ending with a trailing ``/`` |
| |
| ``allowed_request_methods``: |
| a space seperated list of request methods (e.g., ``GET POST``) |
| |
| ``suppress_http_headers`` |
| a space seperated list of http headers (lower case, without |
| the leading ``http_``) that should not be passed on to target |
| host |
| """ |
| allowed_request_methods = aslist(allowed_request_methods) |
| suppress_http_headers = aslist(suppress_http_headers) |
| return Proxy( |
| address, |
| allowed_request_methods=allowed_request_methods, |
| suppress_http_headers=suppress_http_headers) |
| |
| |
| class TransparentProxy(object): |
| |
| """ |
| A proxy that sends the request just as it was given, including |
| respecting HTTP_HOST, wsgi.url_scheme, etc. |
| |
| This is a way of translating WSGI requests directly to real HTTP |
| requests. All information goes in the environment; modify it to |
| modify the way the request is made. |
| |
| If you specify ``force_host`` (and optionally ``force_scheme``) |
| then HTTP_HOST won't be used to determine where to connect to; |
| instead a specific host will be connected to, but the ``Host`` |
| header in the request will remain intact. |
| """ |
| |
| def __init__(self, force_host=None, |
| force_scheme='http'): |
| self.force_host = force_host |
| self.force_scheme = force_scheme |
| |
| def __repr__(self): |
| return '<%s %s force_host=%r force_scheme=%r>' % ( |
| self.__class__.__name__, |
| hex(id(self)), |
| self.force_host, self.force_scheme) |
| |
| def __call__(self, environ, start_response): |
| scheme = environ['wsgi.url_scheme'] |
| if self.force_host is None: |
| conn_scheme = scheme |
| else: |
| conn_scheme = self.force_scheme |
| if conn_scheme == 'http': |
| ConnClass = httplib.HTTPConnection |
| elif conn_scheme == 'https': |
| ConnClass = httplib.HTTPSConnection |
| else: |
| raise ValueError( |
| "Unknown scheme %r" % scheme) |
| if 'HTTP_HOST' not in environ: |
| raise ValueError( |
| "WSGI environ must contain an HTTP_HOST key") |
| host = environ['HTTP_HOST'] |
| if self.force_host is None: |
| conn_host = host |
| else: |
| conn_host = self.force_host |
| conn = ConnClass(conn_host) |
| headers = {} |
| for key, value in environ.items(): |
| if key.startswith('HTTP_'): |
| key = key[5:].lower().replace('_', '-') |
| headers[key] = value |
| headers['host'] = host |
| if 'REMOTE_ADDR' in environ and 'HTTP_X_FORWARDED_FOR' not in environ: |
| headers['x-forwarded-for'] = environ['REMOTE_ADDR'] |
| if environ.get('CONTENT_TYPE'): |
| headers['content-type'] = environ['CONTENT_TYPE'] |
| if environ.get('CONTENT_LENGTH'): |
| length = int(environ['CONTENT_LENGTH']) |
| body = environ['wsgi.input'].read(length) |
| if length == -1: |
| environ['CONTENT_LENGTH'] = str(len(body)) |
| elif 'CONTENT_LENGTH' not in environ: |
| body = '' |
| length = 0 |
| else: |
| body = '' |
| length = 0 |
| |
| path = (environ.get('SCRIPT_NAME', '') |
| + environ.get('PATH_INFO', '')) |
| path = quote(path) |
| if 'QUERY_STRING' in environ: |
| path += '?' + environ['QUERY_STRING'] |
| conn.request(environ['REQUEST_METHOD'], |
| path, body, headers) |
| res = conn.getresponse() |
| headers_out = parse_headers(res.msg) |
| |
| status = '%s %s' % (res.status, res.reason) |
| start_response(status, headers_out) |
| # @@: Default? |
| length = res.getheader('content-length') |
| if length is not None: |
| body = res.read(int(length)) |
| else: |
| body = res.read() |
| conn.close() |
| return [body] |
| |
| def parse_headers(message): |
| """ |
| Turn a Message object into a list of WSGI-style headers. |
| """ |
| headers_out = [] |
| if six.PY3: |
| for header, value in message.items(): |
| if header.lower() not in filtered_headers: |
| headers_out.append((header, value)) |
| else: |
| for full_header in message.headers: |
| if not full_header: |
| # Shouldn't happen, but we'll just ignore |
| continue |
| if full_header[0].isspace(): |
| # Continuation line, add to the last header |
| if not headers_out: |
| raise ValueError( |
| "First header starts with a space (%r)" % full_header) |
| last_header, last_value = headers_out.pop() |
| value = last_value + ' ' + full_header.strip() |
| headers_out.append((last_header, value)) |
| continue |
| try: |
| header, value = full_header.split(':', 1) |
| except: |
| raise ValueError("Invalid header: %r" % full_header) |
| value = value.strip() |
| if header.lower() not in filtered_headers: |
| headers_out.append((header, value)) |
| return headers_out |
| |
| def make_transparent_proxy( |
| global_conf, force_host=None, force_scheme='http'): |
| """ |
| Create a proxy that connects to a specific host, but does |
| absolutely no other filtering, including the Host header. |
| """ |
| return TransparentProxy(force_host=force_host, |
| force_scheme=force_scheme) |