root / trunk / twisted / web2 / fileupload.py

Revision 18460, 11.9 kB (checked in by foom, 3 years ago)

Fix wsgi.input's readline, readlines, and iter methods. In detail:

  • Remove BufferedStream?.readline maxLength argument.
  • Add size argument to wsgi.input's readline(). This is not part of the WSGI spec, but code
    such as python stdlib cgi.py require it anyways. (#1451)
  • Add BufferedStream?.readline size argument, to enable above.
  • Fix wsgi.readlines() and wsgi.readline() to not add extraneous delimiters. (#2170)
  • Make BufferedStream?.readline return delimiter as part of the result, to enable above.
  • Add an iter method to wsgi.input, which is required by the spec. (#2166)
  • Change fileupload's use of BufferedStream?.readline to conform with above changes.

Also, add some docstrings.

Author: mkerrin, jknight
Reviewer: exarkun
Merges branch: readline-1451
Fixes #1451
Fixes #2166
Fixes #2170

Line 
1 from __future__ import generators
2
3 import re
4 from zope.interface import implements
5 import urllib
6 import tempfile
7
8 from twisted.internet import defer
9 from twisted.web2.stream import IStream, FileStream, BufferedStream, readStream
10 from twisted.web2.stream import generatorToStream, readAndDiscard
11 from twisted.web2 import http_headers
12 from cStringIO import StringIO
13
14 ###################################
15 #####  Multipart MIME Reader  #####
16 ###################################
17
18 class MimeFormatError(Exception):
19     pass
20
21 # parseContentDispositionFormData is absolutely horrible, but as
22 # browsers don't seem to believe in sensible quoting rules, it's
23 # really the only way to handle the header.  (Quotes can be in the
24 # filename, unescaped)
25 cd_regexp = re.compile(
26     ' *form-data; *name="([^"]*)"(?:; *filename="(.*)")?$',
27     re.IGNORECASE)
28
29 def parseContentDispositionFormData(value):
30     match = cd_regexp.match(value)
31     if not match:
32         # Error parsing.
33         raise ValueError("Unknown content-disposition format.")
34     name=match.group(1)
35     filename=match.group(2)
36     return name, filename
37
38
39 #@defer.deferredGenerator
40 def _readHeaders(stream):
41     """Read the MIME headers. Assumes we've just finished reading in the
42     boundary string."""
43
44     ctype = fieldname = filename = None
45     headers = []
46    
47     # Now read headers
48     while 1:
49         line = stream.readline(size=1024)
50         if isinstance(line, defer.Deferred):
51             line = defer.waitForDeferred(line)
52             yield line
53             line = line.getResult()
54         #print "GOT", line
55         if not line.endswith('\r\n'):
56             if line == "":
57                 raise MimeFormatError("Unexpected end of stream.")
58             else:
59                 raise MimeFormatError("Header line too long")
60
61         line = line[:-2] # strip \r\n
62         if line == "":
63             break # End of headers
64         
65         parts = line.split(':', 1)
66         if len(parts) != 2:
67             raise MimeFormatError("Header did not have a :")
68         name, value = parts
69         name = name.lower()
70         headers.append((name, value))
71        
72         if name == "content-type":
73             ctype = http_headers.parseContentType(http_headers.tokenize((value,), foldCase=False))
74         elif name == "content-disposition":
75             fieldname, filename = parseContentDispositionFormData(value)
76        
77     if ctype is None:
78         ctype == http_headers.MimeType('application', 'octet-stream')
79     if fieldname is None:
80         raise MimeFormatError('Content-disposition invalid or omitted.')
81
82     # End of headers, return (field name, content-type, filename)
83     yield fieldname, filename, ctype
84     return
85 _readHeaders = defer.deferredGenerator(_readHeaders)
86
87
88 class _BoundaryWatchingStream(object):
89     def __init__(self, stream, boundary):
90         self.stream = stream
91         self.boundary = boundary
92         self.data = ''
93         self.deferred = defer.Deferred()
94        
95     length = None # unknown
96     def read(self):
97         if self.stream is None:
98             if self.deferred is not None:
99                 deferred = self.deferred
100                 self.deferred = None
101                 deferred.callback(None)
102             return None
103         newdata = self.stream.read()
104         if isinstance(newdata, defer.Deferred):
105             return newdata.addCallbacks(self._gotRead, self._gotError)
106         return self._gotRead(newdata)
107
108     def _gotRead(self, newdata):
109         if not newdata:
110             raise MimeFormatError("Unexpected EOF")
111         # BLECH, converting buffer back into string.
112         self.data += str(newdata)
113         data = self.data
114         boundary = self.boundary
115         off = data.find(boundary)
116        
117         if off == -1:
118             # No full boundary, check for the first character
119             off = data.rfind(boundary[0], max(0, len(data)-len(boundary)))
120             if off != -1:
121                 # We could have a partial boundary, store it for next time
122                 self.data = data[off:]
123                 return data[:off]
124             else:
125                 self.data = ''
126                 return data
127         else:
128             self.stream.pushback(data[off+len(boundary):])
129             self.stream = None
130             return data[:off]
131
132     def _gotError(self, err):
133         # Propogate error back to MultipartMimeStream also
134         if self.deferred is not None:
135             deferred = self.deferred
136             self.deferred = None
137             deferred.errback(err)
138         return err
139    
140     def close(self):
141         # Assume error will be raised again and handled by MMS?
142         readAndDiscard(self).addErrback(lambda _: None)
143        
144 class MultipartMimeStream(object):
145     implements(IStream)
146     def __init__(self, stream, boundary):
147         self.stream = BufferedStream(stream)
148         self.boundary = "--"+boundary
149         self.first = True
150        
151     def read(self):
152         """
153         Return a deferred which will fire with a tuple of:
154         (fieldname, filename, ctype, dataStream)
155         or None when all done.
156         
157         Format errors will be sent to the errback.
158         
159         Returns None when all done.
160
161         IMPORTANT: you *must* exhaust dataStream returned by this call
162         before calling .read() again!
163         """
164         if self.first:
165             self.first = False
166             d = self._readFirstBoundary()
167         else:
168             d = self._readBoundaryLine()
169         d.addCallback(self._doReadHeaders)
170         d.addCallback(self._gotHeaders)
171         return d
172
173     def _readFirstBoundary(self):
174         #print "_readFirstBoundary"
175         line = self.stream.readline(size=1024)
176         if isinstance(line, defer.Deferred):
177             line = defer.waitForDeferred(line)
178             yield line
179             line = line.getResult()
180         if line != self.boundary + '\r\n':
181             raise MimeFormatError("Extra data before first boundary: %r looking for: %r" % (line, self.boundary + '\r\n'))
182        
183         self.boundary = "\r\n"+self.boundary
184         yield True
185         return
186     _readFirstBoundary = defer.deferredGenerator(_readFirstBoundary)
187
188     def _readBoundaryLine(self):
189         #print "_readBoundaryLine"
190         line = self.stream.readline(size=1024)
191         if isinstance(line, defer.Deferred):
192             line = defer.waitForDeferred(line)
193             yield line
194             line = line.getResult()
195        
196         if line == "--\r\n":
197             # THE END!
198             yield False
199             return
200         elif line != "\r\n":
201             raise MimeFormatError("Unexpected data on same line as boundary: %r" % (line,))
202         yield True
203         return
204     _readBoundaryLine = defer.deferredGenerator(_readBoundaryLine)
205
206     def _doReadHeaders(self, morefields):
207         #print "_doReadHeaders", morefields
208         if not morefields:
209             return None
210         return _readHeaders(self.stream)
211    
212     def _gotHeaders(self, headers):
213         if headers is None:
214             return None
215         bws = _BoundaryWatchingStream(self.stream, self.boundary)
216         self.deferred = bws.deferred
217         ret=list(headers)
218         ret.append(bws)
219         return tuple(ret)
220
221
222 def readIntoFile(stream, outFile, maxlen):
223     """Read the stream into a file, but not if it's longer than maxlen.
224     Returns Deferred which will be triggered on finish.
225     """
226     curlen = [0]
227     def done(_):
228         return _
229     def write(data):
230         curlen[0] += len(data)
231         if curlen[0] > maxlen:
232             raise MimeFormatError("Maximum length of %d bytes exceeded." %
233                                   maxlen)
234        
235         outFile.write(data)
236     return readStream(stream, write).addBoth(done)
237
238 #@defer.deferredGenerator
239 def parseMultipartFormData(stream, boundary,
240                            maxMem=100*1024, maxFields=1024, maxSize=10*1024*1024):
241     # If the stream length is known to be too large upfront, abort immediately
242     
243     if stream.length is not None and stream.length > maxSize:
244         raise MimeFormatError("Maximum length of %d bytes exceeded." %
245                                   maxSize)
246    
247     mms = MultipartMimeStream(stream, boundary)
248     numFields = 0
249     args = {}
250     files = {}
251    
252     while 1:
253         datas = mms.read()
254         if isinstance(datas, defer.Deferred):
255             datas = defer.waitForDeferred(datas)
256             yield datas
257             datas = datas.getResult()
258         if datas is None:
259             break
260        
261         numFields+=1
262         if numFields == maxFields:
263             raise MimeFormatError("Maximum number of fields %d exceeded"%maxFields)
264        
265         # Parse data
266         fieldname, filename, ctype, stream = datas
267         if filename is None:
268             # Not a file
269             outfile = StringIO()
270             maxBuf = min(maxSize, maxMem)
271         else:
272             outfile = tempfile.NamedTemporaryFile()
273             maxBuf = maxSize
274         x = readIntoFile(stream, outfile, maxBuf)
275         if isinstance(x, defer.Deferred):
276             x = defer.waitForDeferred(x)
277             yield x
278             x = x.getResult()
279         if filename is None:
280             # Is a normal form field
281             outfile.seek(0)
282             data = outfile.read()
283             args.setdefault(fieldname, []).append(data)
284             maxMem -= len(data)
285             maxSize -= len(data)
286         else:
287             # Is a file upload
288             maxSize -= outfile.tell()
289             outfile.seek(0)
290             files.setdefault(fieldname, []).append((filename, ctype, outfile))
291        
292        
293     yield args, files
294     return
295 parseMultipartFormData = defer.deferredGenerator(parseMultipartFormData)
296
297 ###################################
298 ##### x-www-urlencoded reader #####
299 ###################################
300
301
302 def parse_urlencoded_stream(input, maxMem=100*1024,
303                      keep_blank_values=False, strict_parsing=False):
304     lastdata = ''
305     still_going=1
306    
307     while still_going:
308         try:
309             yield input.wait
310             data = input.next()
311         except StopIteration:
312             pairs = [lastdata]
313             still_going=0
314         else:
315             maxMem -= len(data)
316             if maxMem < 0:
317                 raise MimeFormatError("Maximum length of %d bytes exceeded." %
318                                       maxMem)
319             pairs = str(data).split('&')
320             pairs[0] = lastdata + pairs[0]
321             lastdata=pairs.pop()
322        
323         for name_value in pairs:
324             nv = name_value.split('=', 1)
325             if len(nv) != 2:
326                 if strict_parsing:
327                     raise MimeFormatError("bad query field: %s") % `name_value`
328                 continue
329             if len(nv[1]) or keep_blank_values:
330                 name = urllib.unquote(nv[0].replace('+', ' '))
331                 value = urllib.unquote(nv[1].replace('+', ' '))
332                 yield name, value
333 parse_urlencoded_stream = generatorToStream(parse_urlencoded_stream)
334
335 def parse_urlencoded(stream, maxMem=100*1024, maxFields=1024,
336                      keep_blank_values=False, strict_parsing=False):
337     d = {}
338     numFields = 0
339
340     s=parse_urlencoded_stream(stream, maxMem, keep_blank_values, strict_parsing)
341    
342     while 1:
343         datas = s.read()
344         if isinstance(datas, defer.Deferred):
345             datas = defer.waitForDeferred(datas)
346             yield datas
347             datas = datas.getResult()
348         if datas is None:
349             break
350         name, value = datas
351        
352         numFields += 1
353         if numFields == maxFields:
354             raise MimeFormatError("Maximum number of fields %d exceeded"%maxFields)
355        
356         if name in d:
357             d[name].append(value)
358         else:
359             d[name] = [value]
360     yield d
361     return
362 parse_urlencoded = defer.deferredGenerator(parse_urlencoded)
363
364
365 if __name__ == '__main__':
366     d = parseMultipartFormData(
367         FileStream(open("upload.txt")), "----------0xKhTmLbOuNdArY")
368     from twisted.python import log
369     d.addErrback(log.err)
370     def pr(s):
371         print s
372     d.addCallback(pr)
373
374 __all__ = ['parseMultipartFormData', 'parse_urlencoded', 'parse_urlencoded_stream', 'MultipartMimeStream', 'MimeFormatError']
Note: See TracBrowser for help on using the browser.