root / trunk / twisted / web2 / http.py

Revision 24441, 17.2 kB (checked in by thijs, 1 year ago)

Merge maintainer-email-2438: Get rid of references to maintainer email addresses from code.

Author: thijs
Reviewer: exarkun
Fixes: #2438

Line 
1 # -*- test-case-name: twisted.web2.test.test_http -*-
2 # Copyright (c) 2001-2004 Twisted Matrix Laboratories.
3 # See LICENSE for details.
4
5 """HyperText Transfer Protocol implementation.
6
7 The second coming.
8
9 Maintainer: James Y Knight
10
11 """
12 #        import traceback; log.msg(''.join(traceback.format_stack()))
13
14 # system imports
15 import socket
16 import time
17 import cgi
18
19 # twisted imports
20 from twisted.internet import interfaces, error
21 from twisted.python import log, components
22 from zope.interface import implements
23
24 # sibling imports
25 from twisted.web2 import responsecode
26 from twisted.web2 import http_headers
27 from twisted.web2 import iweb
28 from twisted.web2 import stream
29 from twisted.web2.stream import IByteStream
30
31 defaultPortForScheme = {'http': 80, 'https':443, 'ftp':21}
32
33 def splitHostPort(scheme, hostport):
34     """Split the host in "host:port" format into host and port fields.
35     If port was not specified, use the default for the given scheme, if
36     known. Returns a tuple of (hostname, portnumber)."""
37    
38     # Split hostport into host and port
39     hostport = hostport.split(':', 1)
40     try:
41         if len(hostport) == 2:
42             return hostport[0], int(hostport[1])
43     except ValueError:
44         pass
45     return hostport[0], defaultPortForScheme.get(scheme, 0)
46
47
48 def parseVersion(strversion):
49     """Parse version strings of the form Protocol '/' Major '.' Minor. E.g. 'HTTP/1.1'.
50     Returns (protocol, major, minor).
51     Will raise ValueError on bad syntax."""
52
53     proto, strversion = strversion.split('/')
54     major, minor = strversion.split('.')
55     major, minor = int(major), int(minor)
56     if major < 0 or minor < 0:
57         raise ValueError("negative number")
58     return (proto.lower(), major, minor)
59
60
61 class HTTPError(Exception):
62     def __init__(self, codeOrResponse):
63         """An Exception for propagating HTTP Error Responses.
64
65         @param codeOrResponse: The numeric HTTP code or a complete http.Response
66             object.
67         @type codeOrResponse: C{int} or L{http.Response}
68         """
69         Exception.__init__(self)
70         self.response = iweb.IResponse(codeOrResponse)
71
72     def __repr__(self):
73         return "<%s %s>" % (self.__class__.__name__, self.response)
74
75
76 class Response(object):
77     """An object representing an HTTP Response to be sent to the client.
78     """
79     implements(iweb.IResponse)
80    
81     code = responsecode.OK
82     headers = None
83     stream = None
84    
85     def __init__(self, code=None, headers=None, stream=None):
86         """
87         @param code: The HTTP status code for this Response
88         @type code: C{int}
89         
90         @param headers: Headers to be sent to the client.
91         @type headers: C{dict}, L{twisted.web2.http_headers.Headers}, or
92             C{None}
93         
94         @param stream: Content body to send to the HTTP client
95         @type stream: L{twisted.web2.stream.IByteStream}
96         """
97
98         if code is not None:
99             self.code = int(code)
100
101         if headers is not None:
102             if isinstance(headers, dict):
103                 headers = http_headers.Headers(headers)
104             self.headers=headers
105         else:
106             self.headers = http_headers.Headers()
107
108         if stream is not None:
109             self.stream = IByteStream(stream)
110
111     def __repr__(self):
112         if self.stream is None:
113             streamlen = None
114         else:
115             streamlen = self.stream.length
116
117         return "<%s.%s code=%d, streamlen=%s>" % (self.__module__, self.__class__.__name__, self.code, streamlen)
118
119
120 class StatusResponse (Response):
121     """
122     A L{Response} object which simply contains a status code and a description of
123     what happened.
124     """
125     def __init__(self, code, description, title=None):
126         """
127         @param code: a response code in L{responsecode.RESPONSES}.
128         @param description: a string description.
129         @param title: the message title.  If not specified or C{None}, defaults
130             to C{responsecode.RESPONSES[code]}.
131         """
132         if title is None:
133             title = cgi.escape(responsecode.RESPONSES[code])
134
135         output = "".join((
136             "<html>",
137             "<head>",
138             "<title>%s</title>" % (title,),
139             "</head>",
140             "<body>",
141             "<h1>%s</h1>" % (title,),
142             "<p>%s</p>" % (cgi.escape(description),),
143             "</body>",
144             "</html>",
145         ))
146
147         if type(output) == unicode:
148             output = output.encode("utf-8")
149             mime_params = {"charset": "utf-8"}
150         else:
151             mime_params = {}
152
153         super(StatusResponse, self).__init__(code=code, stream=output)
154
155         self.headers.setHeader("content-type", http_headers.MimeType("text", "html", mime_params))
156
157         self.description = description
158
159     def __repr__(self):
160         return "<%s %s %s>" % (self.__class__.__name__, self.code, self.description)
161
162
163 class RedirectResponse (StatusResponse):
164     """
165     A L{Response} object that contains a redirect to another network location.
166     """
167     def __init__(self, location):
168         """
169         @param location: the URI to redirect to.
170         """
171         super(RedirectResponse, self).__init__(
172             responsecode.MOVED_PERMANENTLY,
173             "Document moved to %s." % (location,)
174         )
175
176         self.headers.setHeader("location", location)
177
178        
179 def NotModifiedResponse(oldResponse=None):
180     if oldResponse is not None:
181         headers=http_headers.Headers()
182         for header in (
183             # Required from sec 10.3.5:
184             'date', 'etag', 'content-location', 'expires',
185             'cache-control', 'vary',
186             # Others:
187             'server', 'proxy-authenticate', 'www-authenticate', 'warning'):
188             value = oldResponse.headers.getRawHeaders(header)
189             if value is not None:
190                 headers.setRawHeaders(header, value)
191     else:
192         headers = None
193     return Response(code=responsecode.NOT_MODIFIED, headers=headers)
194    
195
196 def checkPreconditions(request, response=None, entityExists=True, etag=None, lastModified=None):
197     """Check to see if this request passes the conditional checks specified
198     by the client. May raise an HTTPError with result codes L{NOT_MODIFIED}
199     or L{PRECONDITION_FAILED}, as appropriate.
200
201     This function is called automatically as an output filter for GET and
202     HEAD requests. With GET/HEAD, it is not important for the precondition
203     check to occur before doing the action, as the method is non-destructive.
204
205     However, if you are implementing other request methods, like PUT
206     for your resource, you will need to call this after determining
207     the etag and last-modified time of the existing resource but
208     before actually doing the requested action. In that case,
209
210     This examines the appropriate request headers for conditionals,
211     (If-Modified-Since, If-Unmodified-Since, If-Match, If-None-Match,
212     or If-Range), compares with the etag and last and
213     and then sets the response code as necessary.
214
215     @param response: This should be provided for GET/HEAD methods. If
216              it is specified, the etag and lastModified arguments will
217              be retrieved automatically from the response headers and
218              shouldn't be separately specified. Not providing the
219              response with a GET request may cause the emitted
220              "Not Modified" responses to be non-conformant.
221             
222     @param entityExists: Set to False if the entity in question doesn't
223              yet exist. Necessary for PUT support with 'If-None-Match: *'.
224             
225     @param etag: The etag of the resource to check against, or None.
226     
227     @param lastModified: The last modified date of the resource to check
228               against, or None.
229               
230     @raise: HTTPError: Raised when the preconditions fail, in order to
231              abort processing and emit an error page.
232
233     """
234     if response:
235         assert etag is None and lastModified is None
236         # if the code is some sort of error code, don't do anything
237         if not ((response.code >= 200 and response.code <= 299)
238                 or response.code == responsecode.PRECONDITION_FAILED):
239             return False
240         etag = response.headers.getHeader("etag")
241         lastModified = response.headers.getHeader("last-modified")
242    
243     def matchETag(tags, allowWeak):
244         if entityExists and '*' in tags:
245             return True
246         if etag is None:
247             return False
248         return ((allowWeak or not etag.weak) and
249                 ([etagmatch for etagmatch in tags if etag.match(etagmatch, strongCompare=not allowWeak)]))
250
251     # First check if-match/if-unmodified-since
252     # If either one fails, we return PRECONDITION_FAILED
253     match = request.headers.getHeader("if-match")
254     if match:
255         if not matchETag(match, False):
256             raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource does not have a matching ETag."))
257
258     unmod_since = request.headers.getHeader("if-unmodified-since")
259     if unmod_since:
260         if not lastModified or lastModified > unmod_since:
261             raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource has changed."))
262
263     # Now check if-none-match/if-modified-since.
264     # This bit is tricky, because of the requirements when both IMS and INM
265     # are present. In that case, you can't return a failure code
266     # unless *both* checks think it failed.
267     # Also, if the INM check succeeds, ignore IMS, because INM is treated
268     # as more reliable.
269
270     # I hope I got the logic right here...the RFC is quite poorly written
271     # in this area. Someone might want to verify the testcase against
272     # RFC wording.
273
274     # If IMS header is later than current time, ignore it.
275     notModified = None
276     ims = request.headers.getHeader('if-modified-since')
277     if ims:
278         notModified = (ims < time.time() and lastModified and lastModified <= ims)
279
280     inm = request.headers.getHeader("if-none-match")
281     if inm:
282         if request.method in ("HEAD", "GET"):
283             # If it's a range request, don't allow a weak ETag, as that
284             # would break.
285             canBeWeak = not request.headers.hasHeader('Range')
286             if notModified != False and matchETag(inm, canBeWeak):
287                 raise HTTPError(NotModifiedResponse(response))
288         else:
289             if notModified != False and matchETag(inm, False):
290                 raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource has a matching ETag."))
291     else:
292         if notModified == True:
293             if request.method in ("HEAD", "GET"):
294                 raise HTTPError(NotModifiedResponse(response))
295             else:
296                 # S14.25 doesn't actually say what to do for a failing IMS on
297                 # non-GET methods. But Precondition Failed makes sense to me.
298                 raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource has not changed."))
299
300 def checkIfRange(request, response):
301     """Checks for the If-Range header, and if it exists, checks if the
302     test passes. Returns true if the server should return partial data."""
303
304     ifrange = request.headers.getHeader("if-range")
305
306     if ifrange is None:
307         return True
308     if isinstance(ifrange, http_headers.ETag):
309         return ifrange.match(response.headers.getHeader("etag"), strongCompare=True)
310     else:
311         return ifrange == response.headers.getHeader("last-modified")
312
313
314 class _NotifyingProducerStream(stream.ProducerStream):
315     doStartReading = None
316
317     def __init__(self, length=None, doStartReading=None):
318         stream.ProducerStream.__init__(self, length=length)
319         self.doStartReading = doStartReading
320    
321     def read(self):
322         if self.doStartReading is not None:
323             doStartReading = self.doStartReading
324             self.doStartReading = None
325             doStartReading()
326            
327         return stream.ProducerStream.read(self)
328
329     def write(self, data):
330         self.doStartReading = None
331         stream.ProducerStream.write(self, data)
332
333     def finish(self):
334         self.doStartReading = None
335         stream.ProducerStream.finish(self)
336
337
338 # response codes that must have empty bodies
339 NO_BODY_CODES = (responsecode.NO_CONTENT, responsecode.NOT_MODIFIED)
340
341 class Request(object):
342     """A HTTP request.
343
344     Subclasses should override the process() method to determine how
345     the request will be processed.
346     
347     @ivar method: The HTTP method that was used.
348     @ivar uri: The full URI that was requested (includes arguments).
349     @ivar headers: All received headers
350     @ivar clientproto: client HTTP version
351     @ivar stream: incoming data stream.
352     """
353    
354     implements(iweb.IRequest, interfaces.IConsumer)
355    
356     known_expects = ('100-continue',)
357    
358     def __init__(self, chanRequest, command, path, version, contentLength, headers):
359         """
360         @param chanRequest: the channel request we're associated with.
361         """
362         self.chanRequest = chanRequest
363         self.method = command
364         self.uri = path
365         self.clientproto = version
366        
367         self.headers = headers
368        
369         if '100-continue' in self.headers.getHeader('expect', ()):
370             doStartReading = self._sendContinue
371         else:
372             doStartReading = None
373         self.stream = _NotifyingProducerStream(contentLength, doStartReading)
374         self.stream.registerProducer(self.chanRequest, True)
375        
376     def checkExpect(self):
377         """Ensure there are no expectations that cannot be met.
378         Checks Expect header against self.known_expects."""
379         expects = self.headers.getHeader('expect', ())
380         for expect in expects:
381             if expect not in self.known_expects:
382                 raise HTTPError(responsecode.EXPECTATION_FAILED)
383    
384     def process(self):
385         """Called by channel to let you process the request.
386         
387         Can be overridden by a subclass to do something useful."""
388         pass
389    
390     def handleContentChunk(self, data):
391         """Callback from channel when a piece of data has been received.
392         Puts the data in .stream"""
393         self.stream.write(data)
394    
395     def handleContentComplete(self):
396         """Callback from channel when all data has been received. """
397         self.stream.unregisterProducer()
398         self.stream.finish()
399        
400     def connectionLost(self, reason):
401         """connection was lost"""
402         pass
403
404     def __repr__(self):
405         return '<%s %s %s>'% (self.method, self.uri, self.clientproto)
406
407     def _sendContinue(self):
408         self.chanRequest.writeIntermediateResponse(responsecode.CONTINUE)
409
410     def _finished(self, x):
411         """We are finished writing data."""
412         self.chanRequest.finish()
413
414     def _error(self, reason):
415         if reason.check(error.ConnectionLost):
416             log.msg("Request error: " + reason.getErrorMessage())
417         else:
418             log.err(reason)
419             # Only bother with cleanup on errors other than lost connection.
420             self.chanRequest.abortConnection()
421        
422     def writeResponse(self, response):
423         """
424         Write a response.
425         """
426         if self.stream.doStartReading is not None:
427             # Expect: 100-continue was requested, but 100 response has not been
428             # sent, and there's a possibility that data is still waiting to be
429             # sent.
430             #
431             # Ideally this means the remote side will not send any data.
432             # However, because of compatibility requirements, it might timeout,
433             # and decide to do so anyways at the same time we're sending back
434             # this response. Thus, the read state is unknown after this.
435             # We must close the connection.
436             self.chanRequest.channel.setReadPersistent(False)
437             # Nothing more will be read
438             self.chanRequest.allContentReceived()
439
440         if response.code != responsecode.NOT_MODIFIED:
441             # Not modified response is *special* and doesn't get a content-length.
442             if response.stream is None:
443                 response.headers.setHeader('content-length', 0)
444             elif response.stream.length is not None:
445                 response.headers.setHeader('content-length', response.stream.length)
446         self.chanRequest.writeHeaders(response.code, response.headers)
447        
448         # if this is a "HEAD" request, or a special response code,
449         # don't return any data.
450         if self.method == "HEAD" or response.code in NO_BODY_CODES:
451             if response.stream is not None:
452                 response.stream.close()
453             self._finished(None)
454             return
455            
456         d = stream.StreamProducer(response.stream).beginProducing(self.chanRequest)
457         d.addCallback(self._finished).addErrback(self._error)
458
459    
460 from twisted.web2 import compat
461 components.registerAdapter(compat.makeOldRequestAdapter, iweb.IRequest, iweb.IOldRequest)
462 components.registerAdapter(compat.OldNevowResourceAdapter, iweb.IOldNevowResource, iweb.IResource)
463 components.registerAdapter(Response, int, iweb.IResponse)
464
465 try:
466     # If twisted.web is installed, add an adapter for it
467     from twisted.web import resource
468 except:
469     pass
470 else:
471     components.registerAdapter(compat.OldResourceAdapter, resource.IResource, iweb.IOldNevowResource)
472
473 __all__ = ['HTTPError', 'NotModifiedResponse', 'Request', 'Response', 'checkIfRange', 'checkPreconditions', 'defaultPortForScheme', 'parseVersion', 'splitHostPort']
474
Note: See TracBrowser for help on using the browser.