blob: 67d4b1be94131829f8a75d07b876f6cf37c5ba48 [file] [log] [blame]
# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
"""
An application that proxies WSGI requests to a remote server.
TODO:
* Send ``Via`` header? It's not clear to me this is a Via in the
style of a typical proxy.
* Other headers or metadata? I put in X-Forwarded-For, but that's it.
* Signed data of non-HTTP keys? This would be for things like
REMOTE_USER.
* Something to indicate what the original URL was? The original host,
scheme, and base path.
* Rewriting ``Location`` headers? mod_proxy does this.
* Rewriting body? (Probably not on this one -- that can be done with
a different middleware that wraps this middleware)
* Example::
use = egg:Paste#proxy
address = http://server3:8680/exist/rest/db/orgs/sch/config/
allowed_request_methods = GET
"""
from six.moves import http_client as httplib
from six.moves.urllib import parse as urlparse
from six.moves.urllib.parse import quote
import six
from paste import httpexceptions
from paste.util.converters import aslist
# Remove these headers from response (specify lower case header
# names):
filtered_headers = (
'transfer-encoding',
'connection',
'keep-alive',
'proxy-authenticate',
'proxy-authorization',
'te',
'trailers',
'upgrade',
)
class Proxy(object):
def __init__(self, address, allowed_request_methods=(),
suppress_http_headers=()):
self.address = address
self.parsed = urlparse.urlsplit(address)
self.scheme = self.parsed[0].lower()
self.host = self.parsed[1]
self.path = self.parsed[2]
self.allowed_request_methods = [
x.lower() for x in allowed_request_methods if x]
self.suppress_http_headers = [
x.lower() for x in suppress_http_headers if x]
def __call__(self, environ, start_response):
if (self.allowed_request_methods and
environ['REQUEST_METHOD'].lower() not in self.allowed_request_methods):
return httpexceptions.HTTPBadRequest("Disallowed")(environ, start_response)
if self.scheme == 'http':
ConnClass = httplib.HTTPConnection
elif self.scheme == 'https':
ConnClass = httplib.HTTPSConnection
else:
raise ValueError(
"Unknown scheme for %r: %r" % (self.address, self.scheme))
conn = ConnClass(self.host)
headers = {}
for key, value in environ.items():
if key.startswith('HTTP_'):
key = key[5:].lower().replace('_', '-')
if key == 'host' or key in self.suppress_http_headers:
continue
headers[key] = value
headers['host'] = self.host
if 'REMOTE_ADDR' in environ:
headers['x-forwarded-for'] = environ['REMOTE_ADDR']
if environ.get('CONTENT_TYPE'):
headers['content-type'] = environ['CONTENT_TYPE']
if environ.get('CONTENT_LENGTH'):
if environ['CONTENT_LENGTH'] == '-1':
# This is a special case, where the content length is basically undetermined
body = environ['wsgi.input'].read(-1)
headers['content-length'] = str(len(body))
else:
headers['content-length'] = environ['CONTENT_LENGTH']
length = int(environ['CONTENT_LENGTH'])
body = environ['wsgi.input'].read(length)
else:
body = ''
path_info = quote(environ['PATH_INFO'])
if self.path:
request_path = path_info
if request_path and request_path[0] == '/':
request_path = request_path[1:]
path = urlparse.urljoin(self.path, request_path)
else:
path = path_info
if environ.get('QUERY_STRING'):
path += '?' + environ['QUERY_STRING']
conn.request(environ['REQUEST_METHOD'],
path,
body, headers)
res = conn.getresponse()
headers_out = parse_headers(res.msg)
status = '%s %s' % (res.status, res.reason)
start_response(status, headers_out)
# @@: Default?
length = res.getheader('content-length')
if length is not None:
body = res.read(int(length))
else:
body = res.read()
conn.close()
return [body]
def make_proxy(global_conf, address, allowed_request_methods="",
suppress_http_headers=""):
"""
Make a WSGI application that proxies to another address:
``address``
the full URL ending with a trailing ``/``
``allowed_request_methods``:
a space seperated list of request methods (e.g., ``GET POST``)
``suppress_http_headers``
a space seperated list of http headers (lower case, without
the leading ``http_``) that should not be passed on to target
host
"""
allowed_request_methods = aslist(allowed_request_methods)
suppress_http_headers = aslist(suppress_http_headers)
return Proxy(
address,
allowed_request_methods=allowed_request_methods,
suppress_http_headers=suppress_http_headers)
class TransparentProxy(object):
"""
A proxy that sends the request just as it was given, including
respecting HTTP_HOST, wsgi.url_scheme, etc.
This is a way of translating WSGI requests directly to real HTTP
requests. All information goes in the environment; modify it to
modify the way the request is made.
If you specify ``force_host`` (and optionally ``force_scheme``)
then HTTP_HOST won't be used to determine where to connect to;
instead a specific host will be connected to, but the ``Host``
header in the request will remain intact.
"""
def __init__(self, force_host=None,
force_scheme='http'):
self.force_host = force_host
self.force_scheme = force_scheme
def __repr__(self):
return '<%s %s force_host=%r force_scheme=%r>' % (
self.__class__.__name__,
hex(id(self)),
self.force_host, self.force_scheme)
def __call__(self, environ, start_response):
scheme = environ['wsgi.url_scheme']
if self.force_host is None:
conn_scheme = scheme
else:
conn_scheme = self.force_scheme
if conn_scheme == 'http':
ConnClass = httplib.HTTPConnection
elif conn_scheme == 'https':
ConnClass = httplib.HTTPSConnection
else:
raise ValueError(
"Unknown scheme %r" % scheme)
if 'HTTP_HOST' not in environ:
raise ValueError(
"WSGI environ must contain an HTTP_HOST key")
host = environ['HTTP_HOST']
if self.force_host is None:
conn_host = host
else:
conn_host = self.force_host
conn = ConnClass(conn_host)
headers = {}
for key, value in environ.items():
if key.startswith('HTTP_'):
key = key[5:].lower().replace('_', '-')
headers[key] = value
headers['host'] = host
if 'REMOTE_ADDR' in environ and 'HTTP_X_FORWARDED_FOR' not in environ:
headers['x-forwarded-for'] = environ['REMOTE_ADDR']
if environ.get('CONTENT_TYPE'):
headers['content-type'] = environ['CONTENT_TYPE']
if environ.get('CONTENT_LENGTH'):
length = int(environ['CONTENT_LENGTH'])
body = environ['wsgi.input'].read(length)
if length == -1:
environ['CONTENT_LENGTH'] = str(len(body))
elif 'CONTENT_LENGTH' not in environ:
body = ''
length = 0
else:
body = ''
length = 0
path = (environ.get('SCRIPT_NAME', '')
+ environ.get('PATH_INFO', ''))
path = quote(path)
if 'QUERY_STRING' in environ:
path += '?' + environ['QUERY_STRING']
conn.request(environ['REQUEST_METHOD'],
path, body, headers)
res = conn.getresponse()
headers_out = parse_headers(res.msg)
status = '%s %s' % (res.status, res.reason)
start_response(status, headers_out)
# @@: Default?
length = res.getheader('content-length')
if length is not None:
body = res.read(int(length))
else:
body = res.read()
conn.close()
return [body]
def parse_headers(message):
"""
Turn a Message object into a list of WSGI-style headers.
"""
headers_out = []
if six.PY3:
for header, value in message.items():
if header.lower() not in filtered_headers:
headers_out.append((header, value))
else:
for full_header in message.headers:
if not full_header:
# Shouldn't happen, but we'll just ignore
continue
if full_header[0].isspace():
# Continuation line, add to the last header
if not headers_out:
raise ValueError(
"First header starts with a space (%r)" % full_header)
last_header, last_value = headers_out.pop()
value = last_value + ' ' + full_header.strip()
headers_out.append((last_header, value))
continue
try:
header, value = full_header.split(':', 1)
except:
raise ValueError("Invalid header: %r" % full_header)
value = value.strip()
if header.lower() not in filtered_headers:
headers_out.append((header, value))
return headers_out
def make_transparent_proxy(
global_conf, force_host=None, force_scheme='http'):
"""
Create a proxy that connects to a specific host, but does
absolutely no other filtering, including the Host header.
"""
return TransparentProxy(force_host=force_host,
force_scheme=force_scheme)