root / trunk / twisted / web / proxy.py

Revision 26744, 8.9 kB (checked in by exarkun, 2 months ago)

Merge finish-proxied-requests-2677

Author: exarkun
Reviewer: therve
Fixes: #2677

Use the incoming IRequest object in the HTTP proxy when generating
a response to it rather than using its undocumented transport object.
This makes the proxy more generally useful (as it might support custom
request objects now), fixes a bug which led to proxied requests not
being logged, and simplifies the proxy code slightly be removing the
need for it to know much about the protocol-level formatting of HTTP.

Line 
1 # -*- test-case-name: twisted.web.test.test_proxy -*-
2 # Copyright (c) 2001-2007 Twisted Matrix Laboratories.
3 # See LICENSE for details.
4
5 """
6 Simplistic HTTP proxy support.
7
8 This comes in two main variants - the Proxy and the ReverseProxy.
9
10 When a Proxy is in use, a browser trying to connect to a server (say,
11 www.yahoo.com) will be intercepted by the Proxy, and the proxy will covertly
12 connect to the server, and return the result.
13
14 When a ReverseProxy is in use, the client connects directly to the ReverseProxy
15 (say, www.yahoo.com) which farms off the request to one of a pool of servers,
16 and returns the result.
17
18 Normally, a Proxy is used on the client end of an Internet connection, while a
19 ReverseProxy is used on the server end.
20 """
21
22 import urlparse
23 from urllib import quote as urlquote
24
25 from twisted.internet import reactor
26 from twisted.internet.protocol import ClientFactory
27 from twisted.web.resource import Resource
28 from twisted.web.server import NOT_DONE_YET
29 from twisted.web.http import HTTPClient, Request, HTTPChannel
30
31
32
33 class ProxyClient(HTTPClient):
34     """
35     Used by ProxyClientFactory to implement a simple web proxy.
36
37     @ivar _finished: A flag which indicates whether or not the original request
38         has been finished yet.
39     """
40     _finished = False
41
42     def __init__(self, command, rest, version, headers, data, father):
43         self.father = father
44         self.command = command
45         self.rest = rest
46         if "proxy-connection" in headers:
47             del headers["proxy-connection"]
48         headers["connection"] = "close"
49         self.headers = headers
50         self.data = data
51
52
53     def connectionMade(self):
54         self.sendCommand(self.command, self.rest)
55         for header, value in self.headers.items():
56             self.sendHeader(header, value)
57         self.endHeaders()
58         self.transport.write(self.data)
59
60
61     def handleStatus(self, version, code, message):
62         self.father.setResponseCode(int(code), message)
63
64
65     def handleHeader(self, key, value):
66         self.father.responseHeaders.addRawHeader(key, value)
67
68
69     def handleResponsePart(self, buffer):
70         self.father.write(buffer)
71
72
73     def handleResponseEnd(self):
74         """
75         Finish the original request, indicating that the response has been
76         completely written to it, and disconnect the outgoing transport.
77         """
78         if not self._finished:
79             self._finished = True
80             self.father.finish()
81             self.transport.loseConnection()
82
83
84
85 class ProxyClientFactory(ClientFactory):
86     """
87     Used by ProxyRequest to implement a simple web proxy.
88     """
89
90     protocol = ProxyClient
91
92
93     def __init__(self, command, rest, version, headers, data, father):
94         self.father = father
95         self.command = command
96         self.rest = rest
97         self.headers = headers
98         self.data = data
99         self.version = version
100
101
102     def buildProtocol(self, addr):
103         return self.protocol(self.command, self.rest, self.version,
104                              self.headers, self.data, self.father)
105
106
107     def clientConnectionFailed(self, connector, reason):
108         """
109         Report a connection failure in a response to the incoming request as
110         an error.
111         """
112         self.father.setResponseCode(501, "Gateway error")
113         self.father.responseHeaders.addRawHeader("Content-Type", "text/html")
114         self.father.write("<H1>Could not connect</H1>")
115         self.father.finish()
116
117
118
119 class ProxyRequest(Request):
120     """
121     Used by Proxy to implement a simple web proxy.
122
123     @ivar reactor: the reactor used to create connections.
124     @type reactor: object providing L{twisted.internet.interfaces.IReactorTCP}
125     """
126
127     protocols = {'http': ProxyClientFactory}
128     ports = {'http': 80}
129
130     def __init__(self, channel, queued, reactor=reactor):
131         Request.__init__(self, channel, queued)
132         self.reactor = reactor
133
134
135     def process(self):
136         parsed = urlparse.urlparse(self.uri)
137         protocol = parsed[0]
138         host = parsed[1]
139         port = self.ports[protocol]
140         if ':' in host:
141             host, port = host.split(':')
142             port = int(port)
143         rest = urlparse.urlunparse(('', '') + parsed[2:])
144         if not rest:
145             rest = rest + '/'
146         class_ = self.protocols[protocol]
147         headers = self.getAllHeaders().copy()
148         if 'host' not in headers:
149             headers['host'] = host
150         self.content.seek(0, 0)
151         s = self.content.read()
152         clientFactory = class_(self.method, rest, self.clientproto, headers,
153                                s, self)
154         self.reactor.connectTCP(host, port, clientFactory)
155
156
157
158 class Proxy(HTTPChannel):
159     """
160     This class implements a simple web proxy.
161
162     Since it inherits from L{twisted.protocols.http.HTTPChannel}, to use it you
163     should do something like this::
164
165         from twisted.web import http
166         f = http.HTTPFactory()
167         f.protocol = Proxy
168
169     Make the HTTPFactory a listener on a port as per usual, and you have
170     a fully-functioning web proxy!
171     """
172
173     requestFactory = ProxyRequest
174
175
176
177 class ReverseProxyRequest(Request):
178     """
179     Used by ReverseProxy to implement a simple reverse proxy.
180
181     @ivar proxyClientFactoryClass: a proxy client factory class, used to create
182         new connections.
183     @type proxyClientFactoryClass: L{ClientFactory}
184
185     @ivar reactor: the reactor used to create connections.
186     @type reactor: object providing L{twisted.internet.interfaces.IReactorTCP}
187     """
188
189     proxyClientFactoryClass = ProxyClientFactory
190
191     def __init__(self, channel, queued, reactor=reactor):
192         Request.__init__(self, channel, queued)
193         self.reactor = reactor
194
195
196     def process(self):
197         """
198         Handle this request by connecting to the proxied server and forwarding
199         it there, then forwarding the response back as the response to this
200         request.
201         """
202         self.received_headers['host'] = self.factory.host
203         clientFactory = self.proxyClientFactoryClass(
204             self.method, self.uri, self.clientproto, self.getAllHeaders(),
205             self.content.read(), self)
206         self.reactor.connectTCP(self.factory.host, self.factory.port,
207                                 clientFactory)
208
209
210
211 class ReverseProxy(HTTPChannel):
212     """
213     Implements a simple reverse proxy.
214
215     For details of usage, see the file examples/proxy.py.
216     """
217
218     requestFactory = ReverseProxyRequest
219
220
221
222 class ReverseProxyResource(Resource):
223     """
224     Resource that renders the results gotten from another server
225
226     Put this resource in the tree to cause everything below it to be relayed
227     to a different server.
228
229     @ivar proxyClientFactoryClass: a proxy client factory class, used to create
230         new connections.
231     @type proxyClientFactoryClass: L{ClientFactory}
232
233     @ivar reactor: the reactor used to create connections.
234     @type reactor: object providing L{twisted.internet.interfaces.IReactorTCP}
235     """
236
237     proxyClientFactoryClass = ProxyClientFactory
238
239
240     def __init__(self, host, port, path, reactor=reactor):
241         """
242         @param host: the host of the web server to proxy.
243         @type host: C{str}
244
245         @param port: the port of the web server to proxy.
246         @type port: C{port}
247
248         @param path: the base path to fetch data from. Note that you shouldn't
249             put any trailing slashes in it, it will be added automatically in
250             request. For example, if you put B{/foo}, a request on B{/bar} will
251             be proxied to B{/foo/bar}.  Any required encoding of special
252             characters (such as " " or "/") should have been done already.
253
254         @type path: C{str}
255         """
256         Resource.__init__(self)
257         self.host = host
258         self.port = port
259         self.path = path
260         self.reactor = reactor
261
262
263     def getChild(self, path, request):
264         """
265         Create and return a proxy resource with the same proxy configuration
266         as this one, except that its path also contains the segment given by
267         C{path} at the end.
268         """
269         return ReverseProxyResource(
270             self.host, self.port, self.path + '/' + urlquote(path, safe=""))
271
272
273     def render(self, request):
274         """
275         Render a request by forwarding it to the proxied server.
276         """
277         # RFC 2616 tells us that we can omit the port if it's the default port,
278         # but we have to provide it otherwise
279         if self.port == 80:
280             host = self.host
281         else:
282             host = "%s:%d" % (self.host, self.port)
283         request.received_headers['host'] = host
284         request.content.seek(0, 0)
285         qs = urlparse.urlparse(request.uri)[4]
286         if qs:
287             rest = self.path + '?' + qs
288         else:
289             rest = self.path
290         clientFactory = self.proxyClientFactoryClass(
291             request.method, rest, request.clientproto,
292             request.getAllHeaders(), request.content.read(), request)
293         self.reactor.connectTCP(self.host, self.port, clientFactory)
294         return NOT_DONE_YET
Note: See TracBrowser for help on using the browser.