Ticket #3795: secure-chunked-3795-6.diff

File secure-chunked-3795-6.diff, 18.9 KB (added by ivank, 8 years ago)

address exarkun's first review; fix trailer-parsing problems

  • twisted/web/http.py

    === modified file 'twisted/web/http.py'
     
    99
    1010Future Plans:
    1111 - HTTP client support will at some point be refactored to support HTTP/1.1.
    12  - Accept chunked data from clients in server.
    1312 - Other missing HTTP features from the RFC.
    1413
    1514Maintainer: Itamar Shtull-Trauring
     
    3635from twisted.python import log
    3736try: # try importing the fast, C version
    3837    from twisted.protocols._c_urlarg import unquote
     38    unquote # shut up pyflakes
    3939except ImportError:
    4040    from urllib import unquote
    4141
     
    12791279    Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616,
    12801280    section 3.6.1.  This protocol can interpret the contents of a request or
    12811281    response body which uses the I{chunked} Transfer-Encoding.  It cannot
    1282     interpret any of the rest of the HTTP protocol.
     1282    interpret any of the rest of the HTTP protocol.  It ignores trailers.
    12831283
    12841284    It may make sense for _ChunkedTransferDecoder to be an actual IProtocol
    12851285    implementation.  Currently, the only user of this class will only ever
     
    12981298    @ivar finishCallback: A one-argument callable which will be invoked when
    12991299        the terminal chunk is received.  It will be invoked with all bytes
    13001300        which were delivered to this protocol which came after the terminal
    1301         chunk.
     1301        chunk.  These bytes are I{not} the trailer; they might be the beginning
     1302        of the next request or response.
    13021303
    13031304    @ivar length: Counter keeping track of how many more bytes in a chunk there
    13041305        are to receive.
    13051306
    13061307    @ivar state: One of C{'chunk-length'}, C{'trailer'}, C{'body'}, or
    13071308        C{'finished'}.  For C{'chunk-length'}, data for the chunk length line
    1308         is currently being read.  For C{'trailer'}, the CR LF pair which
    1309         follows each chunk is being read.  For C{'body'}, the contents of a
    1310         chunk are being read.  For C{'finished'}, the last chunk has been
    1311         completely read and no more input is valid.
     1309        is currently being read.  For C{'body'}, the contents of a chunk are
     1310        being read.  For C{'crlf'}, the CR LF pair which follows each chunk is
     1311        being read.  For C{'trailer'}, the trailer is being read and ignored.
     1312        For C{'finished'}, the last chunk has been completely read and no more
     1313        input is valid.
    13121314
    1313     @ivar finish: A flag indicating that the last chunk has been started.  When
    1314         it finishes, the state will change to C{'finished'} and no more data
    1315         will be accepted.
     1315    @ivar _bodyEndsWith: One of I{CR LF} or I{CR LF CR LF}.  When I{CR LF}, the
     1316        parser is still searching for the end of an empty trailer.  When
     1317        I{CR LF CR LF}, the parser is searching for the end of a non-empty
     1318        trailer.
    13161319    """
    13171320    state = 'chunk-length'
    1318     finish = False
    13191321
    13201322    def __init__(self, dataCallback, finishCallback):
    13211323        self.dataCallback = dataCallback
    13221324        self.finishCallback = finishCallback
    13231325        self._buffer = ''
     1326        self._bodyEndsWith = '\r\n'
     1327
     1328        # While an HTTP/1.1 chunk has no size limit in the specification, a
     1329        # reasonable limit must be established to prevent untrusted input from
     1330        # causing excessive string concatenation in the parser. A limit of 17 bytes
     1331        # (max FFFFFFFFFFFFFFFFF) can support chunks up to 2**68-1 bytes.
     1332        self._maximumChunkSizeStringLength = 17
    13241333
    13251334
    13261335    def dataReceived(self, data):
     
    13341343            if self.state == 'chunk-length':
    13351344                if '\r\n' in data:
    13361345                    line, rest = data.split('\r\n', 1)
    1337                     parts = line.split(';')
    1338                     self.length = int(parts[0], 16)
     1346                    parts = line.split(';', 1)
     1347                    chunkSizeString = parts[0]
     1348                    if len(chunkSizeString) > self._maximumChunkSizeStringLength:
     1349                        raise RuntimeError(
     1350                            "_ChunkedTransferDecoder.dataReceived received "
     1351                            "too-long chunk length in parts %s" % (parts,))
     1352                    # HEX in RFC 2616 section 2.2 does not include the minus
     1353                    # sign, but int('-0', 16) == 0, so 'negative zero' chunks
     1354                    # are accepted here.
     1355                    # Spaces around the HEX are not allowed, but int(..., 16)
     1356                    # will still parse it, so padded HEX is accepted here.
     1357                    try:
     1358                        self.length = int(chunkSizeString, 16)
     1359                    except ValueError:
     1360                        raise RuntimeError(
     1361                            "_ChunkedTransferDecoder.dataReceived received "
     1362                            "unparsable chunk length in parts %s" % (parts,))
     1363                    if self.length < 0:
     1364                        raise RuntimeError(
     1365                            "_ChunkedTransferDecoder.dataReceived received "
     1366                            "negative chunk length in parts %s" % (parts,))
    13391367                    if self.length == 0:
    13401368                        self.state = 'trailer'
    1341                         self.finish = True
    13421369                    else:
    13431370                        self.state = 'body'
    13441371                    data = rest
    13451372                else:
    1346                     self._buffer = data
    1347                     data = ''
    1348             elif self.state == 'trailer':
    1349                 if data.startswith('\r\n'):
    1350                     data = data[2:]
    1351                     if self.finish:
    1352                         self.state = 'finished'
    1353                         self.finishCallback(data)
    1354                         data = ''
     1373                    # Throw away HTTP/1.1 chunk-extensions every time, but keep
     1374                    # the semicolon so that additional chunk-extension data
     1375                    # doesn't get interpreted as part of the chunk-length.
     1376                    if ';' in data:
     1377                        reattachCR = (data[-1] == '\r')
     1378                        data = data[:data.find(';') + 1]
     1379                        if reattachCR:
     1380                            data += '\r'
     1381                        extraByte = 1
    13551382                    else:
    1356                         self.state = 'chunk-length'
    1357                 else:
     1383                        extraByte = 0
     1384
     1385                    if len(data) > (self._maximumChunkSizeStringLength + extraByte):
     1386                        raise RuntimeError(
     1387                            "_ChunkedTransferDecoder.dataReceived buffered "
     1388                            "too-long chunk length %s" % (repr(data),))
    13581389                    self._buffer = data
    13591390                    data = ''
    13601391            elif self.state == 'body':
    13611392                if len(data) >= self.length:
    13621393                    chunk, data = data[:self.length], data[self.length:]
    13631394                    self.dataCallback(chunk)
    1364                     self.state = 'trailer'
     1395                    self.state = 'crlf'
    13651396                elif len(data) < self.length:
    13661397                    self.length -= len(data)
    13671398                    self.dataCallback(data)
    13681399                    data = ''
     1400            elif self.state == 'crlf':
     1401                if data.startswith('\r\n'):
     1402                    data = data[2:]
     1403                    self.state = 'chunk-length'
     1404                elif data == '\r':
     1405                    self._buffer = data
     1406                    data = ''
     1407                else:
     1408                    raise RuntimeError(
     1409                        "_ChunkedTransferDecoder.dataReceived was looking for "
     1410                        "CRLF, not %s" % (repr(data),))
     1411            elif self.state == 'trailer':
     1412                # The goal is to throw away as much of the trailer as possible
     1413                # every time, while hoping to get the end-of-trailer.
     1414               
     1415                if self._bodyEndsWith == '\r\n' and data == '\r':
     1416                    # This case is ambiguous until dataReceived gets another byte.
     1417                    # `data' could be the CR in the CRLF to terminate an empty
     1418                    # trailer, or the beginning of an non-empty trailer
     1419                    # starting with \r.
     1420                    self._buffer = data
     1421                    data = ''
     1422                    return
     1423               
     1424                trailerEnd = data.find(self._bodyEndsWith)
     1425                if self._bodyEndsWith == '\r\n' and trailerEnd != 0:
     1426                    self._bodyEndsWith = '\r\n\r\n'
     1427                    trailerEnd = data.find(self._bodyEndsWith)
     1428
     1429                if trailerEnd != -1:
     1430                    data = data[trailerEnd + len(self._bodyEndsWith):]
     1431                    self.state = 'finished'
     1432                    self.finishCallback(data)
     1433                else:
     1434                    for ending in ('\r\n\r', '\r\n', '\r'):
     1435                        if data.endswith(ending):
     1436                            self._buffer = ending
     1437                            break
     1438                data = ''
    13691439            elif self.state == 'finished':
    13701440                raise RuntimeError(
    13711441                    "_ChunkedTransferDecoder.dataReceived called after last "
  • twisted/web/test/test_http.py

    === modified file 'twisted/web/test/test_http.py'
     
    497497        self.assertEqual(L, ['abc'])
    498498
    499499
     500    def test_extensionsShort(self):
     501        """
     502        L{_ChunkedTransferDecoder.dataReceived} disregards chunk-extension
     503        fields, even when the data is delivered with multiple calls.
     504
     505        This should exercise the reattachCR condition in the parser.
     506        """
     507        L = []
     508        p = http._ChunkedTransferDecoder(L.append, None)
     509        for s in '3; x-foo=bar\r\nabc\r\n':
     510            p.dataReceived(s)
     511        self.assertEqual(L, ['a', 'b', 'c'])
     512
     513
    500514    def test_finish(self):
    501515        """
    502516        L{_ChunkedTransferDecoder.dataReceived} interprets a zero-length
     
    520534        self.assertEqual(finished, ['hello'])
    521535
    522536
     537    def test_extraTrailer(self):
     538        """
     539
     540        """
     541        finished = []
     542        p = http._ChunkedTransferDecoder(None, finished.append)
     543        p.dataReceived('0\r\nLINE 1\r\n\r\nhello')
     544        self.assertEqual(finished, ['hello'])
     545
     546
     547    def test_extraTrailerMultiline(self):
     548        """
     549        L{_ChunkedTransferDecoder.dataReceived} understands the trailers can
     550        span multiple entity-headers. But since the parser ignores trailers, it
     551        can treat entity-headers as lines.
     552        """
     553        finished = []
     554        p = http._ChunkedTransferDecoder(None, finished.append)
     555        p.dataReceived('0\r\nLINE 1\r\nLINE 2\r\n\r\nhello')
     556        self.assertEqual(finished, ['hello'])
     557
     558
     559    def test_extraTrailerMultilineShort(self):
     560        """
     561        L{_ChunkedTransferDecoder.dataReceived} understands the trailers can
     562        span multiple entity-headers, when delivered with multiple calls.
     563        """
     564        finished = []
     565        p = http._ChunkedTransferDecoder(None, finished.append)
     566        for s in '0\r\nLINE 1\r\nLINE 2\r\n\r':
     567            p.dataReceived(s)
     568        p.dataReceived('\nhello')
     569        self.assertEqual(finished, ['hello'])
     570
     571
    523572    def test_afterFinished(self):
    524573        """
    525574        L{_ChunkedTransferDecoder.dataReceived} raises L{RuntimeError} if it
     
    527576        """
    528577        p = http._ChunkedTransferDecoder(None, lambda bytes: None)
    529578        p.dataReceived('0\r\n\r\n')
    530         self.assertRaises(RuntimeError, p.dataReceived, 'hello')
    531 
     579        exc = self.assertRaises(RuntimeError, p.dataReceived, 'hello')
     580        self.assertEqual(
     581            str(exc),
     582            "_ChunkedTransferDecoder.dataReceived called after last "
     583            "chunk was processed")
     584           
    532585
    533586    def test_earlyConnectionLose(self):
    534587        """
     
    574627        self.assertEqual(successes, [True])
    575628
    576629
     630    def test_trailerUsesNoMemory(self):
     631        """
     632        L{_ChunkedTransferDecoder.dataReceived} does not waste memory
     633        buffering pieces of the trailer, which is always ignored anyway.
     634
     635        This test is very implementation-specific because the parser exhibits
     636        no public behavior while ignoring the trailer.
     637        """
     638        L = []
     639        p = http._ChunkedTransferDecoder(L.append, lambda bytes: None)
     640        p.dataReceived('3\r\nabc\r\n0\r\nTrailer')
     641        self.assertEqual(len(p._buffer), 0)
     642        p.dataReceived('More trailer')
     643        self.assertEqual(len(p._buffer), 0)
     644        p.dataReceived('Here comes a CR: \r')
     645        self.assertEqual(len(p._buffer), 1)
     646        p.dataReceived('But no newline!')
     647        self.assertEqual(len(p._buffer), 0)
     648        p.dataReceived('Make it think it might end: \r\n\r')
     649        self.assertEqual(len(p._buffer), 3)
     650        p.dataReceived("But it didn't!")
     651        self.assertEqual(len(p._buffer), 0)
     652        p.dataReceived('Really finish the trailer now: \r\n\r\n')
     653        self.assertEqual(len(p._buffer), 0)
     654        self.assertEqual(L, ['abc'])
     655
     656
     657    def test_chunkExtensionsUseNoMemory(self):
     658        """
     659        L{_ChunkedTransferDecoder.dataReceived} does not waste memory
     660        buffering pieces of chunk extensions, which are always ignored anyway.
     661
     662        This test is very implementation-specific because the parser exhibits
     663        no public behavior while ignoring the chunk extensions.
     664        """
     665        L = []
     666        finished = []
     667        p = http._ChunkedTransferDecoder(L.append, finished.append)
     668        p.dataReceived('3\r\nabc\r\n4; hello=yes')
     669        originalLength = len(p._buffer)
     670        # feed it some more ignored chunk-extension
     671        p.dataReceived('-still-ignored')
     672        self.assertEqual(len(p._buffer), originalLength)
     673
     674
     675    def test_limitedChunkLengthBuffering(self):
     676        """
     677        L{_ChunkedTransferDecoder.dataReceived} does not allow input
     678        to endlessly fill its buffer with a chunk length string.
     679        """
     680        L = []
     681        p = http._ChunkedTransferDecoder(L.append, None)
     682        max = p._maximumChunkSizeStringLength
     683
     684        p.dataReceived('2\r\nab\r\n')
     685        exc = self.assertRaises(RuntimeError, p.dataReceived, '3' * (max + 1))
     686        self.assertEqual(
     687            str(exc),
     688            "_ChunkedTransferDecoder.dataReceived buffered too-long "
     689            "chunk length '333333333333333333'")
     690
     691
     692    def test_limitedChunkLengthBufferingShort(self):
     693        """
     694        L{_ChunkedTransferDecoder.dataReceived} does not allow input
     695        to endlessly fill its buffer with a chunk length string, even when
     696        the data is delivered with multiple calls.
     697        """
     698        L = []
     699        p = http._ChunkedTransferDecoder(L.append, None)
     700        max = p._maximumChunkSizeStringLength
     701
     702        p.dataReceived('2\r\nab\r\n')
     703        for s in '3' * max:
     704            p.dataReceived(s)
     705        exc = self.assertRaises(RuntimeError, p.dataReceived, '3' * 1)
     706        self.assertEqual(
     707            str(exc),
     708            "_ChunkedTransferDecoder.dataReceived buffered too-long "
     709            "chunk length '333333333333333333'")
     710
     711
     712    def test_chunkLengthNotTooLong(self):
     713        """
     714
     715        """
     716        L = []
     717        p = http._ChunkedTransferDecoder(L.append, None)
     718        max = p._maximumChunkSizeStringLength
     719
     720        p.dataReceived('2\r\nab\r\n')
     721
     722        chunkLenString = ('3' * (max+1))
     723        exc = self.assertRaises(
     724            RuntimeError, p.dataReceived, chunkLenString + '\r\n')
     725           
     726        self.assertEqual(
     727            str(exc),
     728            "_ChunkedTransferDecoder.dataReceived received "
     729            "too-long chunk length in parts %s" % (repr([chunkLenString]),))
     730
     731
     732    def test_chunkLengthSemicolonMath(self):
     733        """
     734        L{_ChunkedTransferDecoder.dataReceived} doesn't include
     735        the length of the semicolon or chunk-extension data when
     736        determining the length of the chunk-length bytes.
     737        """
     738        L = []
     739        p = http._ChunkedTransferDecoder(L.append, None)
     740        max = p._maximumChunkSizeStringLength
     741
     742        p.dataReceived((('3' * (max)) + '; long-extension-completely-ignored=yes'))
     743
     744
     745    def test_chunkLengthNotUnparsable(self):
     746        """
     747
     748        """
     749        L = []
     750        p = http._ChunkedTransferDecoder(L.append, None)
     751
     752        p.dataReceived('2\r\nab\r\n')
     753
     754        chunkLenString = ('G')
     755        exc = self.assertRaises(
     756            RuntimeError, p.dataReceived, chunkLenString + '\r\n')
     757
     758        self.assertEqual(
     759            str(exc),
     760            "_ChunkedTransferDecoder.dataReceived received "
     761            "unparsable chunk length in parts %s" % (repr([chunkLenString]),))
     762
     763
     764    def test_chunkLengthNotNegative(self):
     765        """
     766
     767        """
     768        L = []
     769        p = http._ChunkedTransferDecoder(L.append, None)
     770
     771        p.dataReceived('2\r\nab\r\n')
     772        exc = self.assertRaises(RuntimeError, p.dataReceived, '-1\r\n')
     773        self.assertEqual(
     774            str(exc),
     775            "_ChunkedTransferDecoder.dataReceived received "
     776            "negative chunk length in parts %s" % (repr(['-1']),))
     777
     778
     779    def test_chunkLengthNotNegativeWithPadding(self):
     780        """
     781
     782        """
     783        L = []
     784        p = http._ChunkedTransferDecoder(L.append, None)
     785
     786        p.dataReceived('2\r\nab\r\n')
     787        exc = self.assertRaises(RuntimeError, p.dataReceived, ' -1\r\n')
     788        self.assertEqual(
     789            str(exc),
     790            "_ChunkedTransferDecoder.dataReceived received "
     791            "negative chunk length in parts %s" % (repr([' -1']),))
     792
     793
     794    def test_afterChunkNotCRLFErrorByte1(self):
     795        """
     796
     797        """
     798        L = []
     799        p = http._ChunkedTransferDecoder(L.append, None)
     800
     801        p.dataReceived('2\r\nab')
     802        exc = self.assertRaises(RuntimeError, p.dataReceived, 'X')
     803        self.assertEqual(
     804            str(exc),
     805            "_ChunkedTransferDecoder.dataReceived was looking for "
     806            "CRLF, not %s" % (repr('X'),))
     807
     808
     809    def test_afterChunkNotCRLFErrorTwoBytes(self):
     810        """
     811
     812        """
     813        L = []
     814        p = http._ChunkedTransferDecoder(L.append, None)
     815
     816        p.dataReceived('2\r\nab')
     817        exc = self.assertRaises(RuntimeError, p.dataReceived, '\rX')
     818        self.assertEqual(
     819            str(exc),
     820            "_ChunkedTransferDecoder.dataReceived was looking for "
     821            "CRLF, not %s" % (repr('\rX'),))
     822
     823
     824    def test_afterChunkNotCRLFErrorByte2(self):
     825        """
     826
     827        """
     828        L = []
     829        p = http._ChunkedTransferDecoder(L.append, None)
     830
     831        p.dataReceived('2\r\nab')
     832        p.dataReceived('\r')
     833        exc = self.assertRaises(RuntimeError, p.dataReceived, 'X')
     834        self.assertEqual(
     835            str(exc),
     836            "_ChunkedTransferDecoder.dataReceived was looking for "
     837            "CRLF, not %s" % (repr('\rX'),))
     838
     839
     840
     841    def test_chunkLengthNegativeZeroOkay(self):
     842        """
     843
     844        """
     845        L = []
     846        p = http._ChunkedTransferDecoder(L.append, None)
     847
     848        p.dataReceived('2\r\nab\r\n')
     849        p.dataReceived('-0\r\n')
     850
     851
    577852
    578853class ChunkingTestCase(unittest.TestCase):
    579854