=== modified file 'twisted/web/http.py'
--- twisted/web/http.py	2009-04-21 12:41:39 +0000
+++ twisted/web/http.py	2009-04-24 05:42:44 +0000
@@ -9,7 +9,6 @@
 
 Future Plans:
  - HTTP client support will at some point be refactored to support HTTP/1.1.
- - Accept chunked data from clients in server.
  - Other missing HTTP features from the RFC.
 
 Maintainer: Itamar Shtull-Trauring
@@ -36,6 +35,7 @@
 from twisted.python import log
 try: # try importing the fast, C version
     from twisted.protocols._c_urlarg import unquote
+    unquote # shut up pyflakes
 except ImportError:
     from urllib import unquote
 
@@ -1279,7 +1279,7 @@
     Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616,
     section 3.6.1.  This protocol can interpret the contents of a request or
     response body which uses the I{chunked} Transfer-Encoding.  It cannot
-    interpret any of the rest of the HTTP protocol.
+    interpret any of the rest of the HTTP protocol.  It ignores trailers.
 
     It may make sense for _ChunkedTransferDecoder to be an actual IProtocol
     implementation.  Currently, the only user of this class will only ever
@@ -1298,7 +1298,8 @@
     @ivar finishCallback: A one-argument callable which will be invoked when
         the terminal chunk is received.  It will be invoked with all bytes
         which were delivered to this protocol which came after the terminal
-        chunk.
+        chunk.  These bytes are *not* the trailer; they might be the beginning
+        of the next request or response.
 
     @ivar length: Counter keeping track of how many more bytes in a chunk there
         are to receive.
@@ -1315,12 +1316,17 @@
         will be accepted.
     """
     state = 'chunk-length'
-    finish = False
 
     def __init__(self, dataCallback, finishCallback):
         self.dataCallback = dataCallback
         self.finishCallback = finishCallback
         self._buffer = ''
+        
+        # While an HTTP/1.1 chunk has no size limit in the specification, a
+        # reasonable limit must be established to prevent untrusted input from
+        # causing excessive string concatenation in the parser. A limit of 17 bytes
+        # (max FFFFFFFFFFFFFFFFF) can support chunks up to 2**68-1 bytes.
+        self._maximumChunkSizeStringLength = 17
 
 
     def dataReceived(self, data):
@@ -1335,33 +1341,74 @@
                 if '\r\n' in data:
                     line, rest = data.split('\r\n', 1)
                     parts = line.split(';')
-                    self.length = int(parts[0], 16)
+                    # HEX in RFC 2616 section 2.2 does not include the minus
+                    # sign; negative numbers and negative zero are not allowed.
+                    if parts[0][0] == '-':
+                        raise RuntimeError(
+                            "_ChunkedTransferDecoder.dataReceived received "
+                            "negative chunk length in parts %s" % (repr(parts),))
+                    if len(parts[0]) > self._maximumChunkSizeStringLength:
+                        raise RuntimeError(
+                            "_ChunkedTransferDecoder.dataReceived received "
+                            "too-long chunk length in parts %s" % (repr(parts),))
+                    try:
+                        self.length = int(parts[0], 16)
+                    except ValueError:
+                        raise RuntimeError(
+                            "_ChunkedTransferDecoder.dataReceived received "
+                            "unparsable chunk length in parts %s" % (repr(parts),))
                     if self.length == 0:
                         self.state = 'trailer'
-                        self.finish = True
                     else:
                         self.state = 'body'
                     data = rest
                 else:
+                    # Throw away HTTP/1.1 chunk-extensions every time,
+                    # but keep the semicolon so that additional chunk-extension
+                    # data doesn't get interpreted as part of the chunk-length.
+                    if ';' in data:
+                        reattachCR = (data[-1] == '\r')
+                        data = data[:data.find(';')+1].strip()
+                        if reattachCR:
+                            data += '\r'
+                        extraByte = 1
+                    else:
+                        extraByte = 0
+
+                    if len(data) > (self._maximumChunkSizeStringLength + extraByte):
+                        raise RuntimeError(
+                            "_ChunkedTransferDecoder.dataReceived buffered "
+                            "too-long chunk length %s" % (repr(data),))
                     self._buffer = data
                     data = ''
-            elif self.state == 'trailer':
+            elif self.state == 'crlf':
                 if data.startswith('\r\n'):
                     data = data[2:]
-                    if self.finish:
-                        self.state = 'finished'
-                        self.finishCallback(data)
-                        data = ''
-                    else:
-                        self.state = 'chunk-length'
-                else:
+                    self.state = 'chunk-length'
+                elif data == '\r':
                     self._buffer = data
                     data = ''
+                else:
+                    raise RuntimeError(
+                        "_ChunkedTransferDecoder.dataReceived was looking for "
+                        "CRLF, not %s" % (repr(data),))
+            elif self.state == 'trailer':
+                # The goal is to throw away as much of the trailer as
+                # possible every time, while hoping to get the end-of-trailer.
+                CRLF_at = data.find('\r\n')
+                if CRLF_at != -1:
+                    # The *first* '\r\n' ends the trailer.
+                    data = data[CRLF_at+2:]
+                    self.state = 'finished'
+                    self.finishCallback(data)
+                elif data[-1] == '\r':
+                    self._buffer = data[-1]
+                data = ''   
             elif self.state == 'body':
                 if len(data) >= self.length:
                     chunk, data = data[:self.length], data[self.length:]
                     self.dataCallback(chunk)
-                    self.state = 'trailer'
+                    self.state = 'crlf'
                 elif len(data) < self.length:
                     self.length -= len(data)
                     self.dataCallback(data)

=== modified file 'twisted/web/test/test_http.py'
--- twisted/web/test/test_http.py	2009-01-26 00:53:09 +0000
+++ twisted/web/test/test_http.py	2009-04-24 05:51:16 +0000
@@ -527,8 +527,12 @@
         """
         p = http._ChunkedTransferDecoder(None, lambda bytes: None)
         p.dataReceived('0\r\n\r\n')
-        self.assertRaises(RuntimeError, p.dataReceived, 'hello')
-
+        exc = self.assertRaises(RuntimeError, p.dataReceived, 'hello')
+        self.assertEqual(
+            str(exc),
+            "_ChunkedTransferDecoder.dataReceived called after last "
+            "chunk was processed")
+            
 
     def test_earlyConnectionLose(self):
         """
@@ -574,6 +578,168 @@
         self.assertEqual(successes, [True])
 
 
+    def test_trailerUsesNoMemory(self):
+        """
+        L{_ChunkedTransferDecoder.dataReceived} does not waste memory
+        buffering pieces of the trailer, which is always ignored anyway.
+
+        This test is very implementation-specific because the parser exhibits
+        no public behavior while ignoring the trailer.
+        """
+        L = []
+        finished = []
+        p = http._ChunkedTransferDecoder(L.append, finished.append)
+        p.dataReceived('3\r\nabc\r\n0\r\nTRAILER')
+        self.assertEqual(len(p._buffer), 0)
+        p.dataReceived('MORE TRAILER')
+        self.assertEqual(len(p._buffer), 0)
+        p.dataReceived('Here comes a CR: \r')
+        self.assertEqual(len(p._buffer), 1)
+        p.dataReceived('But no newline!')
+        self.assertEqual(len(p._buffer), 0)
+        p.dataReceived('Really finish the trailer now: \r\n')
+        self.assertEqual(len(p._buffer), 0)
+        self.assertEqual(L, ['abc'])
+        self.assertEqual(finished, [''])
+
+
+    def test_chunkExtensionsUseNoMemory(self):
+        """
+        L{_ChunkedTransferDecoder.dataReceived} does not waste memory
+        buffering pieces of chunk extensions, which are always ignored anyway.
+
+        This test is very implementation-specific because the parser exhibits
+        no public behavior while ignoring the chunk extensions.
+        """
+        L = []
+        finished = []
+        p = http._ChunkedTransferDecoder(L.append, finished.append)
+        p.dataReceived('3\r\nabc\r\n4; hello=yes')
+        originalLength = len(p._buffer)
+        # feed it some more ignored chunk-extension
+        p.dataReceived('-still-ignored')
+        self.assertEqual(len(p._buffer), originalLength)
+
+
+    def test_limitedChunkLengthBuffering(self):
+        """
+        L{_ChunkedTransferDecoder.dataReceived} does not allow input
+        to endlessly fill its buffer with a chunk length string.
+        """
+        L = []
+        p = http._ChunkedTransferDecoder(L.append, None)
+        max = p._maximumChunkSizeStringLength
+
+        p.dataReceived('2\r\nab\r\n')
+        exc = self.assertRaises(RuntimeError, lambda: p.dataReceived('3' * (max+1)))
+        self.assertEqual(
+            str(exc),
+            "_ChunkedTransferDecoder.dataReceived buffered too-long "
+            "chunk length '333333333333333333'")
+
+
+    def test_limitedChunkLengthBufferingShort(self):
+        """
+        L{_ChunkedTransferDecoder.dataReceived} does not allow input
+        to endlessly fill its buffer with a chunk length string, even when
+        the data is delivered with multiple calls.
+        """
+        L = []
+        p = http._ChunkedTransferDecoder(L.append, None)
+        max = p._maximumChunkSizeStringLength
+
+        p.dataReceived('2\r\nab\r\n')
+        for s in '3' * max:
+            p.dataReceived(s)
+        exc = self.assertRaises(RuntimeError, lambda: p.dataReceived('3' * 1))
+        self.assertEqual(
+            str(exc),
+            "_ChunkedTransferDecoder.dataReceived buffered too-long "
+            "chunk length '333333333333333333'")
+
+
+    def test_chunkLengthNotTooLong(self):
+        """
+
+        """
+        L = []
+        p = http._ChunkedTransferDecoder(L.append, None)
+        max = p._maximumChunkSizeStringLength
+
+        p.dataReceived('2\r\nab\r\n')
+
+        chunkLenString = ('3' * (max+1))
+        exc = self.assertRaises(RuntimeError,
+            lambda: p.dataReceived(chunkLenString + '\r\n'))
+            
+        self.assertEqual(
+            str(exc),
+            "_ChunkedTransferDecoder.dataReceived received "
+            "too-long chunk length in parts %s" % (repr([chunkLenString]),))
+
+
+    def test_chunkLengthSemicolonMath(self):
+        """
+        L{_ChunkedTransferDecoder.dataReceived} doesn't include
+        the length of the semicolon or chunk-extension data when
+        determining the length of the chunk-length bytes.
+        """
+        L = []
+        p = http._ChunkedTransferDecoder(L.append, None)
+        max = p._maximumChunkSizeStringLength
+
+        p.dataReceived((('3' * (max)) + '; long-extension-completely-ignored=yes'))
+
+
+    def test_chunkLengthNotUnparsable(self):
+        """
+
+        """
+        L = []
+        p = http._ChunkedTransferDecoder(L.append, None)
+
+        p.dataReceived('2\r\nab\r\n')
+
+        chunkLenString = ('G')
+        exc = self.assertRaises(RuntimeError,
+            lambda: p.dataReceived(chunkLenString + '\r\n'))
+
+        self.assertEqual(
+            str(exc),
+            "_ChunkedTransferDecoder.dataReceived received "
+            "unparsable chunk length in parts %s" % (repr([chunkLenString]),))
+
+
+    def test_chunkLengthNotNegative(self):
+        """
+
+        """
+        L = []
+        p = http._ChunkedTransferDecoder(L.append, None)
+
+        p.dataReceived('2\r\nab\r\n')
+        exc = self.assertRaises(RuntimeError, lambda: p.dataReceived('-1\r\n'))
+        self.assertEqual(
+            str(exc),
+            "_ChunkedTransferDecoder.dataReceived received "
+            "negative chunk length in parts %s" % (repr(['-1']),))
+
+
+    def test_chunkLengthNotNegativeZero(self):
+        """
+
+        """
+        L = []
+        p = http._ChunkedTransferDecoder(L.append, None)
+
+        p.dataReceived('2\r\nab\r\n')
+        exc = self.assertRaises(RuntimeError, lambda: p.dataReceived('-0\r\n'))
+        self.assertEqual(
+            str(exc),
+            "_ChunkedTransferDecoder.dataReceived received "
+            "negative chunk length in parts %s" % (repr(['-0']),))
+
+
 
 class ChunkingTestCase(unittest.TestCase):
 

