root / trunk / twisted / web2 / server.py

Revision 25457, 20.7 kB (checked in by exarkun, 8 months ago)

Merge hashlib-2763-3

Author: wsanchez, exarkun
Reviewer: exarkun, mwhudson
Fixes: #2763

Replace uses of md5 and sha modules in Twisted with use of a new twisted.python.hashlib
module which transparently uses the new hashlib standard library module if it is available
or falls back to md5 and sha if not.

Line 
1 # -*- test-case-name: twisted.web2.test.test_server -*-
2 # Copyright (c) 2001-2008 Twisted Matrix Laboratories.
3 # See LICENSE for details.
4
5 """
6 This is a web-server which integrates with the twisted.internet
7 infrastructure.
8 """
9
10 # System Imports
11 import cgi, time, urlparse
12 from urllib import quote, unquote
13 from urlparse import urlsplit
14
15 import weakref
16
17 from zope.interface import implements
18 # Twisted Imports
19 from twisted.internet import defer
20 from twisted.python import log, failure
21
22 # Sibling Imports
23 from twisted.web2 import http, iweb, fileupload, responsecode
24 from twisted.web2 import http_headers
25 from twisted.web2.filter.range import rangefilter
26 from twisted.web2 import error
27
28 from twisted.web2 import version as web2_version
29 from twisted import __version__ as twisted_version
30
31 VERSION = "Twisted/%s TwistedWeb/%s" % (twisted_version, web2_version)
32 _errorMarker = object()
33
34
35 def defaultHeadersFilter(request, response):
36     if not response.headers.hasHeader('server'):
37         response.headers.setHeader('server', VERSION)
38     if not response.headers.hasHeader('date'):
39         response.headers.setHeader('date', time.time())
40     return response
41 defaultHeadersFilter.handleErrors = True
42
43 def preconditionfilter(request, response):
44     if request.method in ("GET", "HEAD"):
45         http.checkPreconditions(request, response)
46     return response
47
48 def doTrace(request):
49     request = iweb.IRequest(request)
50     txt = "%s %s HTTP/%d.%d\r\n" % (request.method, request.uri,
51                                     request.clientproto[0], request.clientproto[1])
52
53     l=[]
54     for name, valuelist in request.headers.getAllRawHeaders():
55         for value in valuelist:
56             l.append("%s: %s\r\n" % (name, value))
57     txt += ''.join(l)
58
59     return http.Response(
60         responsecode.OK,
61         {'content-type': http_headers.MimeType('message', 'http')},
62         txt)
63
64
65 def parsePOSTData(request, maxMem=100*1024, maxFields=1024,
66                   maxSize=10*1024*1024):
67     """
68     Parse data of a POST request.
69
70     @param request: the request to parse.
71     @type request: L{twisted.web2.http.Request}.
72     @param maxMem: maximum memory used during the parsing of the data.
73     @type maxMem: C{int}
74     @param maxFields: maximum number of form fields allowed.
75     @type maxFields: C{int}
76     @param maxSize: maximum size of file upload allowed.
77     @type maxSize: C{int}
78
79     @return: a deferred that will fire when the parsing is done. The deferred
80         itself doesn't hold a return value, the request is modified directly.
81     @rtype: C{defer.Deferred}
82     """
83     if request.stream.length == 0:
84         return defer.succeed(None)
85
86     parser = None
87     ctype = request.headers.getHeader('content-type')
88
89     if ctype is None:
90         return defer.succeed(None)
91
92     def updateArgs(data):
93         args = data
94         request.args.update(args)
95
96     def updateArgsAndFiles(data):
97         args, files = data
98         request.args.update(args)
99         request.files.update(files)
100
101     def error(f):
102         f.trap(fileupload.MimeFormatError)
103         raise http.HTTPError(
104             http.StatusResponse(responsecode.BAD_REQUEST, str(f.value)))
105
106     if (ctype.mediaType == 'application'
107         and ctype.mediaSubtype == 'x-www-form-urlencoded'):
108         d = fileupload.parse_urlencoded(request.stream)
109         d.addCallbacks(updateArgs, error)
110         return d
111     elif (ctype.mediaType == 'multipart'
112           and ctype.mediaSubtype == 'form-data'):
113         boundary = ctype.params.get('boundary')
114         if boundary is None:
115             return defer.fail(http.HTTPError(
116                     http.StatusResponse(
117                         responsecode.BAD_REQUEST,
118                         "Boundary not specified in Content-Type.")))
119         d = fileupload.parseMultipartFormData(request.stream, boundary,
120                                               maxMem, maxFields, maxSize)
121         d.addCallbacks(updateArgsAndFiles, error)
122         return d
123     else:
124         return defer.fail(http.HTTPError(
125             http.StatusResponse(
126                 responsecode.BAD_REQUEST,
127                 "Invalid content-type: %s/%s" % (
128                     ctype.mediaType, ctype.mediaSubtype))))
129
130
131 class StopTraversal(object):
132     """
133     Indicates to Request._handleSegment that it should stop handling
134     path segments.
135     """
136     pass
137
138
139 class Request(http.Request):
140     """
141     vars:
142     site
143
144     remoteAddr
145
146     scheme
147     host
148     port
149     path
150     params
151     querystring
152
153     args
154     files
155
156     prepath
157     postpath
158
159     @ivar path: The path only (arguments not included).
160     @ivar args: All of the arguments, including URL and POST arguments.
161     @type args: A mapping of strings (the argument names) to lists of values.
162                 i.e., ?foo=bar&foo=baz&quux=spam results in
163                 {'foo': ['bar', 'baz'], 'quux': ['spam']}.
164
165     """
166     implements(iweb.IRequest)
167
168     site = None
169     _initialprepath = None
170     responseFilters = [rangefilter, preconditionfilter,
171                        error.defaultErrorHandler, defaultHeadersFilter]
172
173     def __init__(self, *args, **kw):
174         if kw.has_key('site'):
175             self.site = kw['site']
176             del kw['site']
177         if kw.has_key('prepathuri'):
178             self._initialprepath = kw['prepathuri']
179             del kw['prepathuri']
180
181         # Copy response filters from the class
182         self.responseFilters = self.responseFilters[:]
183         self.files = {}
184         self.resources = []
185         http.Request.__init__(self, *args, **kw)
186
187     def addResponseFilter(self, f, atEnd=False):
188         if atEnd:
189             self.responseFilters.append(f)
190         else:
191             self.responseFilters.insert(0, f)
192
193     def unparseURL(self, scheme=None, host=None, port=None,
194                    path=None, params=None, querystring=None, fragment=None):
195         """Turn the request path into a url string. For any pieces of
196         the url that are not specified, use the value from the
197         request. The arguments have the same meaning as the same named
198         attributes of Request."""
199
200         if scheme is None: scheme = self.scheme
201         if host is None: host = self.host
202         if port is None: port = self.port
203         if path is None: path = self.path
204         if params is None: params = self.params
205         if querystring is None: query = self.querystring
206         if fragment is None: fragment = ''
207
208         if port == http.defaultPortForScheme.get(scheme, 0):
209             hostport = host
210         else:
211             hostport = host + ':' + str(port)
212
213         return urlparse.urlunparse((
214             scheme, hostport, path,
215             params, querystring, fragment))
216
217     def _parseURL(self):
218         if self.uri[0] == '/':
219             # Can't use urlparse for request_uri because urlparse
220             # wants to be given an absolute or relative URI, not just
221             # an abs_path, and thus gets '//foo' wrong.
222             self.scheme = self.host = self.path = self.params = self.querystring = ''
223             if '?' in self.uri:
224                 self.path, self.querystring = self.uri.split('?', 1)
225             else:
226                 self.path = self.uri
227             if ';' in self.path:
228                 self.path, self.params = self.path.split(';', 1)
229         else:
230             # It is an absolute uri, use standard urlparse
231             (self.scheme, self.host, self.path,
232              self.params, self.querystring, fragment) = urlparse.urlparse(self.uri)
233
234         if self.querystring:
235             self.args = cgi.parse_qs(self.querystring, True)
236         else:
237             self.args = {}
238
239         path = map(unquote, self.path[1:].split('/'))
240         if self._initialprepath:
241             # We were given an initial prepath -- this is for supporting
242             # CGI-ish applications where part of the path has already
243             # been processed
244             prepath = map(unquote, self._initialprepath[1:].split('/'))
245
246             if path[:len(prepath)] == prepath:
247                 self.prepath = prepath
248                 self.postpath = path[len(prepath):]
249             else:
250                 self.prepath = []
251                 self.postpath = path
252         else:
253             self.prepath = []
254             self.postpath = path
255         #print "_parseURL", self.uri, (self.uri, self.scheme, self.host, self.path, self.params, self.querystring)
256
257     def _fixupURLParts(self):
258         hostaddr, secure = self.chanRequest.getHostInfo()
259         if not self.scheme:
260             self.scheme = ('http', 'https')[secure]
261
262         if self.host:
263             self.host, self.port = http.splitHostPort(self.scheme, self.host)
264         else:
265             # If GET line wasn't an absolute URL
266             host = self.headers.getHeader('host')
267             if host:
268                 self.host, self.port = http.splitHostPort(self.scheme, host)
269             else:
270                 # When no hostname specified anywhere, either raise an
271                 # error, or use the interface hostname, depending on
272                 # protocol version
273                 if self.clientproto >= (1,1):
274                     raise http.HTTPError(responsecode.BAD_REQUEST)
275                 self.host = hostaddr.host
276                 self.port = hostaddr.port
277
278
279     def process(self):
280         "Process a request."
281         try:
282             self.checkExpect()
283             resp = self.preprocessRequest()
284             if resp is not None:
285                 self._cbFinishRender(resp).addErrback(self._processingFailed)
286                 return
287             self._parseURL()
288             self._fixupURLParts()
289             self.remoteAddr = self.chanRequest.getRemoteHost()
290         except:
291             failedDeferred = self._processingFailed(failure.Failure())
292             return
293
294         d = defer.Deferred()
295         d.addCallback(self._getChild, self.site.resource, self.postpath)
296         d.addCallback(lambda res, req: res.renderHTTP(req), self)
297         d.addCallback(self._cbFinishRender)
298         d.addErrback(self._processingFailed)
299         d.callback(None)
300
301     def preprocessRequest(self):
302         """Do any request processing that doesn't follow the normal
303         resource lookup procedure. "OPTIONS *" is handled here, for
304         example. This would also be the place to do any CONNECT
305         processing."""
306
307         if self.method == "OPTIONS" and self.uri == "*":
308             response = http.Response(responsecode.OK)
309             response.headers.setHeader('allow', ('GET', 'HEAD', 'OPTIONS', 'TRACE'))
310             return response
311         # This is where CONNECT would go if we wanted it
312         return None
313
314     def _getChild(self, _, res, path, updatepaths=True):
315         """Call res.locateChild, and pass the result on to _handleSegment."""
316
317         self.resources.append(res)
318
319         if not path:
320             return res
321
322         result = res.locateChild(self, path)
323         if isinstance(result, defer.Deferred):
324             return result.addCallback(self._handleSegment, res, path, updatepaths)
325         else:
326             return self._handleSegment(result, res, path, updatepaths)
327
328     def _handleSegment(self, result, res, path, updatepaths):
329         """Handle the result of a locateChild call done in _getChild."""
330
331         newres, newpath = result
332         # If the child resource is None then display a error page
333         if newres is None:
334             raise http.HTTPError(responsecode.NOT_FOUND)
335
336         # If we got a deferred then we need to call back later, once the
337         # child is actually available.
338         if isinstance(newres, defer.Deferred):
339             return newres.addCallback(
340                 lambda actualRes: self._handleSegment(
341                     (actualRes, newpath), res, path, updatepaths)
342                 )
343
344         if path:
345             url = quote("/" + "/".join(path))
346         else:
347             url = "/"
348
349         if newpath is StopTraversal:
350             # We need to rethink how to do this.
351             #if newres is res:
352                 self._rememberResource(res, url)
353                 return res
354             #else:
355             #    raise ValueError("locateChild must not return StopTraversal with a resource other than self.")
356
357         newres = iweb.IResource(newres)
358         if newres is res:
359             assert not newpath is path, "URL traversal cycle detected when attempting to locateChild %r from resource %r." % (path, res)
360             assert len(newpath) < len(path), "Infinite loop impending..."
361
362         if updatepaths:
363             # We found a Resource... update the request.prepath and postpath
364             for x in xrange(len(path) - len(newpath)):
365                 self.prepath.append(self.postpath.pop(0))
366
367         child = self._getChild(None, newres, newpath, updatepaths=updatepaths)
368         self._rememberResource(child, url)
369
370         return child
371
372     _urlsByResource = weakref.WeakKeyDictionary()
373
374     def _rememberResource(self, resource, url):
375         """
376         Remember the URL of a visited resource.
377         """
378         self._urlsByResource[resource] = url
379         return resource
380
381     def urlForResource(self, resource):
382         """
383         Looks up the URL of the given resource if this resource was found while
384         processing this request.  Specifically, this includes the requested
385         resource, and resources looked up via L{locateResource}.
386
387         Note that a resource may be found at multiple URIs; if the same resource
388         is visited at more than one location while processing this request,
389         this method will return one of those URLs, but which one is not defined,
390         nor whether the same URL is returned in subsequent calls.
391
392         @param resource: the resource to find a URI for.  This resource must
393             have been obtained from the request (ie. via its C{uri} attribute, or
394             through its C{locateResource} or C{locateChildResource} methods).
395         @return: a valid URL for C{resource} in this request.
396         @raise NoURLForResourceError: if C{resource} has no URL in this request
397             (because it was not obtained from the request).
398         """
399         resource = self._urlsByResource.get(resource, None)
400         if resource is None:
401             raise NoURLForResourceError(resource)
402         return resource
403
404     def locateResource(self, url):
405         """
406         Looks up the resource with the given URL.
407         @param uri: The URL of the desired resource.
408         @return: a L{Deferred} resulting in the L{IResource} at the
409             given URL or C{None} if no such resource can be located.
410         @raise HTTPError: If C{url} is not a URL on the site that this
411             request is being applied to.  The contained response will
412             have a status code of L{responsecode.BAD_GATEWAY}.
413         @raise HTTPError: If C{url} contains a query or fragment.
414             The contained response will have a status code of
415             L{responsecode.BAD_REQUEST}.
416         """
417         if url is None: return None
418
419         #
420         # Parse the URL
421         #
422         (scheme, host, path, query, fragment) = urlsplit(url)
423
424         if query or fragment:
425             raise http.HTTPError(http.StatusResponse(
426                 responsecode.BAD_REQUEST,
427                 "URL may not contain a query or fragment: %s" % (url,)
428             ))
429
430         # The caller shouldn't be asking a request on one server to lookup a
431         # resource on some other server.
432         if (scheme and scheme != self.scheme) or (host and host != self.headers.getHeader("host")):
433             raise http.HTTPError(http.StatusResponse(
434                 responsecode.BAD_GATEWAY,
435                 "URL is not on this site (%s://%s/): %s" % (scheme, self.headers.getHeader("host"), url)
436             ))
437
438         segments = path.split("/")
439         assert segments[0] == "", "URL path didn't begin with '/': %s" % (path,)
440         segments = map(unquote, segments[1:])
441
442         def notFound(f):
443             f.trap(http.HTTPError)
444             if f.value.response.code != responsecode.NOT_FOUND:
445                 return f
446             return None
447
448         d = defer.maybeDeferred(self._getChild, None, self.site.resource, segments, updatepaths=False)
449         d.addCallback(self._rememberResource, path)
450         d.addErrback(notFound)
451         return d
452
453     def locateChildResource(self, parent, childName):
454         """
455         Looks up the child resource with the given name given the parent
456         resource.  This is similar to locateResource(), but doesn't have to
457         start the lookup from the root resource, so it is potentially faster.
458         @param parent: the parent of the resource being looked up.  This resource
459             must have been obtained from the request (ie. via its C{uri} attribute,
460             or through its C{locateResource} or C{locateChildResource} methods).
461         @param childName: the name of the child of C{parent} to looked up.
462             to C{parent}.
463         @return: a L{Deferred} resulting in the L{IResource} at the
464             given URL or C{None} if no such resource can be located.
465         @raise NoURLForResourceError: if C{resource} was not obtained from the
466             request.
467         """
468         if parent is None or childName is None:
469             return None
470
471         assert "/" not in childName, "Child name may not contain '/': %s" % (childName,)
472
473         parentURL = self.urlForResource(parent)
474         if not parentURL.endswith("/"):
475             parentURL += "/"
476         url = parentURL + quote(childName)
477
478         segment = childName
479
480         def notFound(f):
481             f.trap(http.HTTPError)
482             if f.value.response.code != responsecode.NOT_FOUND:
483                 return f
484             return None
485
486         d = defer.maybeDeferred(self._getChild, None, parent, [segment], updatepaths=False)
487         d.addCallback(self._rememberResource, url)
488         d.addErrback(notFound)
489         return d
490
491     def _processingFailed(self, reason):
492         if reason.check(http.HTTPError) is not None:
493             # If the exception was an HTTPError, leave it alone
494             d = defer.succeed(reason.value.response)
495         else:
496             # Otherwise, it was a random exception, so give a
497             # ICanHandleException implementer a chance to render the page.
498             def _processingFailed_inner(reason):
499                 handler = iweb.ICanHandleException(self, self)
500                 return handler.renderHTTP_exception(self, reason)
501             d = defer.maybeDeferred(_processingFailed_inner, reason)
502
503         d.addCallback(self._cbFinishRender)
504         d.addErrback(self._processingReallyFailed, reason)
505         return d
506
507     def _processingReallyFailed(self, reason, origReason):
508         log.msg("Exception rendering error page:", isErr=1)
509         log.err(reason)
510         log.msg("Original exception:", isErr=1)
511         log.err(origReason)
512
513         body = ("<html><head><title>Internal Server Error</title></head>"
514                 "<body><h1>Internal Server Error</h1>An error occurred rendering the requested page. Additionally, an error occured rendering the error page.</body></html>")
515
516         response = http.Response(
517             responsecode.INTERNAL_SERVER_ERROR,
518             {'content-type': http_headers.MimeType('text','html')},
519             body)
520         self.writeResponse(response)
521
522     def _cbFinishRender(self, result):
523         def filterit(response, f):
524             if (hasattr(f, 'handleErrors') or
525                 (response.code >= 200 and response.code < 300)):
526                 return f(self, response)
527             else:
528                 return response
529
530         response = iweb.IResponse(result, None)
531         if response:
532             d = defer.Deferred()
533             for f in self.responseFilters:
534                 d.addCallback(filterit, f)
535             d.addCallback(self.writeResponse)
536             d.callback(response)
537             return d
538
539         resource = iweb.IResource(result, None)
540         if resource:
541             self.resources.append(resource)
542             d = defer.maybeDeferred(resource.renderHTTP, self)
543             d.addCallback(self._cbFinishRender)
544             return d
545
546         raise TypeError("html is not a resource or a response")
547
548     def renderHTTP_exception(self, req, reason):
549         log.msg("Exception rendering:", isErr=1)
550         log.err(reason)
551
552         body = ("<html><head><title>Internal Server Error</title></head>"
553                 "<body><h1>Internal Server Error</h1>An error occurred rendering the requested page. More information is available in the server log.</body></html>")
554
555         return http.Response(
556             responsecode.INTERNAL_SERVER_ERROR,
557             {'content-type': http_headers.MimeType('text','html')},
558             body)
559
560 class Site(object):
561     def __init__(self, resource):
562         """Initialize.
563         """
564         self.resource = iweb.IResource(resource)
565
566     def __call__(self, *args, **kwargs):
567         return Request(site=self, *args, **kwargs)
568
569
570 class NoURLForResourceError(RuntimeError):
571     def __init__(self, resource):
572         RuntimeError.__init__(self, "Resource %r has no URL in this request." % (resource,))
573         self.resource = resource
574
575
576 __all__ = ['Request', 'Site', 'StopTraversal', 'VERSION', 'defaultHeadersFilter', 'doTrace', 'parsePOSTData', 'preconditionfilter', 'NoURLForResourceError']
Note: See TracBrowser for help on using the browser.