blob: 746410b19e57b8753e654796525a7a1c1e956d6b [file] [log] [blame]
# -*- test-case-name: twisted.web.test.test_proxy -*-
# Copyright (c) 2001-2010 Twisted Matrix Laboratories.
# See LICENSE for details.
"""
Simplistic HTTP proxy support.
This comes in two main variants - the Proxy and the ReverseProxy.
When a Proxy is in use, a browser trying to connect to a server (say,
www.yahoo.com) will be intercepted by the Proxy, and the proxy will covertly
connect to the server, and return the result.
When a ReverseProxy is in use, the client connects directly to the ReverseProxy
(say, www.yahoo.com) which farms off the request to one of a pool of servers,
and returns the result.
Normally, a Proxy is used on the client end of an Internet connection, while a
ReverseProxy is used on the server end.
"""
import urlparse
from urllib import quote as urlquote
from twisted.internet import reactor
from twisted.internet.protocol import ClientFactory
from twisted.web.resource import Resource
from twisted.web.server import NOT_DONE_YET
from twisted.web.http import HTTPClient, Request, HTTPChannel
class ProxyClient(HTTPClient):
"""
Used by ProxyClientFactory to implement a simple web proxy.
@ivar _finished: A flag which indicates whether or not the original request
has been finished yet.
"""
_finished = False
def __init__(self, command, rest, version, headers, data, father):
self.father = father
self.command = command
self.rest = rest
if "proxy-connection" in headers:
del headers["proxy-connection"]
headers["connection"] = "close"
headers.pop('keep-alive', None)
self.headers = headers
self.data = data
def connectionMade(self):
self.sendCommand(self.command, self.rest)
for header, value in self.headers.items():
self.sendHeader(header, value)
self.endHeaders()
self.transport.write(self.data)
def handleStatus(self, version, code, message):
self.father.setResponseCode(int(code), message)
def handleHeader(self, key, value):
# t.web.server.Request sets default values for these headers in its
# 'process' method. When these headers are received from the remote
# server, they ought to override the defaults, rather than append to
# them.
if key.lower() in ['server', 'date', 'content-type']:
self.father.responseHeaders.setRawHeaders(key, [value])
else:
self.father.responseHeaders.addRawHeader(key, value)
def handleResponsePart(self, buffer):
self.father.write(buffer)
def handleResponseEnd(self):
"""
Finish the original request, indicating that the response has been
completely written to it, and disconnect the outgoing transport.
"""
if not self._finished:
self._finished = True
self.father.finish()
self.transport.loseConnection()
class ProxyClientFactory(ClientFactory):
"""
Used by ProxyRequest to implement a simple web proxy.
"""
protocol = ProxyClient
def __init__(self, command, rest, version, headers, data, father):
self.father = father
self.command = command
self.rest = rest
self.headers = headers
self.data = data
self.version = version
def buildProtocol(self, addr):
return self.protocol(self.command, self.rest, self.version,
self.headers, self.data, self.father)
def clientConnectionFailed(self, connector, reason):
"""
Report a connection failure in a response to the incoming request as
an error.
"""
self.father.setResponseCode(501, "Gateway error")
self.father.responseHeaders.addRawHeader("Content-Type", "text/html")
self.father.write("<H1>Could not connect</H1>")
self.father.finish()
class ProxyRequest(Request):
"""
Used by Proxy to implement a simple web proxy.
@ivar reactor: the reactor used to create connections.
@type reactor: object providing L{twisted.internet.interfaces.IReactorTCP}
"""
protocols = {'http': ProxyClientFactory}
ports = {'http': 80}
def __init__(self, channel, queued, reactor=reactor):
Request.__init__(self, channel, queued)
self.reactor = reactor
def process(self):
parsed = urlparse.urlparse(self.uri)
protocol = parsed[0]
host = parsed[1]
port = self.ports[protocol]
if ':' in host:
host, port = host.split(':')
port = int(port)
rest = urlparse.urlunparse(('', '') + parsed[2:])
if not rest:
rest = rest + '/'
class_ = self.protocols[protocol]
headers = self.getAllHeaders().copy()
if 'host' not in headers:
headers['host'] = host
self.content.seek(0, 0)
s = self.content.read()
clientFactory = class_(self.method, rest, self.clientproto, headers,
s, self)
self.reactor.connectTCP(host, port, clientFactory)
class Proxy(HTTPChannel):
"""
This class implements a simple web proxy.
Since it inherits from L{twisted.protocols.http.HTTPChannel}, to use it you
should do something like this::
from twisted.web import http
f = http.HTTPFactory()
f.protocol = Proxy
Make the HTTPFactory a listener on a port as per usual, and you have
a fully-functioning web proxy!
"""
requestFactory = ProxyRequest
class ReverseProxyRequest(Request):
"""
Used by ReverseProxy to implement a simple reverse proxy.
@ivar proxyClientFactoryClass: a proxy client factory class, used to create
new connections.
@type proxyClientFactoryClass: L{ClientFactory}
@ivar reactor: the reactor used to create connections.
@type reactor: object providing L{twisted.internet.interfaces.IReactorTCP}
"""
proxyClientFactoryClass = ProxyClientFactory
def __init__(self, channel, queued, reactor=reactor):
Request.__init__(self, channel, queued)
self.reactor = reactor
def process(self):
"""
Handle this request by connecting to the proxied server and forwarding
it there, then forwarding the response back as the response to this
request.
"""
self.received_headers['host'] = self.factory.host
clientFactory = self.proxyClientFactoryClass(
self.method, self.uri, self.clientproto, self.getAllHeaders(),
self.content.read(), self)
self.reactor.connectTCP(self.factory.host, self.factory.port,
clientFactory)
class ReverseProxy(HTTPChannel):
"""
Implements a simple reverse proxy.
For details of usage, see the file examples/proxy.py.
"""
requestFactory = ReverseProxyRequest
class ReverseProxyResource(Resource):
"""
Resource that renders the results gotten from another server
Put this resource in the tree to cause everything below it to be relayed
to a different server.
@ivar proxyClientFactoryClass: a proxy client factory class, used to create
new connections.
@type proxyClientFactoryClass: L{ClientFactory}
@ivar reactor: the reactor used to create connections.
@type reactor: object providing L{twisted.internet.interfaces.IReactorTCP}
"""
proxyClientFactoryClass = ProxyClientFactory
def __init__(self, host, port, path, reactor=reactor):
"""
@param host: the host of the web server to proxy.
@type host: C{str}
@param port: the port of the web server to proxy.
@type port: C{port}
@param path: the base path to fetch data from. Note that you shouldn't
put any trailing slashes in it, it will be added automatically in
request. For example, if you put B{/foo}, a request on B{/bar} will
be proxied to B{/foo/bar}. Any required encoding of special
characters (such as " " or "/") should have been done already.
@type path: C{str}
"""
Resource.__init__(self)
self.host = host
self.port = port
self.path = path
self.reactor = reactor
def getChild(self, path, request):
"""
Create and return a proxy resource with the same proxy configuration
as this one, except that its path also contains the segment given by
C{path} at the end.
"""
return ReverseProxyResource(
self.host, self.port, self.path + '/' + urlquote(path, safe=""),
self.reactor)
def render(self, request):
"""
Render a request by forwarding it to the proxied server.
"""
# RFC 2616 tells us that we can omit the port if it's the default port,
# but we have to provide it otherwise
if self.port == 80:
host = self.host
else:
host = "%s:%d" % (self.host, self.port)
request.received_headers['host'] = host
request.content.seek(0, 0)
qs = urlparse.urlparse(request.uri)[4]
if qs:
rest = self.path + '?' + qs
else:
rest = self.path
clientFactory = self.proxyClientFactoryClass(
request.method, rest, request.clientproto,
request.getAllHeaders(), request.content.read(), request)
self.reactor.connectTCP(self.host, self.port, clientFactory)
return NOT_DONE_YET