Ticket #3795: secure-chunked-3795-6.diff

File secure-chunked-3795-6.diff, 18.9 KB (added by ivank, 6 years ago)

address exarkun's first review; fix trailer-parsing problems

  • twisted/web/http.py

    === modified file 'twisted/web/http.py'
     
    99 
    1010Future Plans: 
    1111 - HTTP client support will at some point be refactored to support HTTP/1.1. 
    12  - Accept chunked data from clients in server. 
    1312 - Other missing HTTP features from the RFC. 
    1413 
    1514Maintainer: Itamar Shtull-Trauring 
     
    3635from twisted.python import log 
    3736try: # try importing the fast, C version 
    3837    from twisted.protocols._c_urlarg import unquote 
     38    unquote # shut up pyflakes 
    3939except ImportError: 
    4040    from urllib import unquote 
    4141 
     
    12791279    Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616, 
    12801280    section 3.6.1.  This protocol can interpret the contents of a request or 
    12811281    response body which uses the I{chunked} Transfer-Encoding.  It cannot 
    1282     interpret any of the rest of the HTTP protocol. 
     1282    interpret any of the rest of the HTTP protocol.  It ignores trailers. 
    12831283 
    12841284    It may make sense for _ChunkedTransferDecoder to be an actual IProtocol 
    12851285    implementation.  Currently, the only user of this class will only ever 
     
    12981298    @ivar finishCallback: A one-argument callable which will be invoked when 
    12991299        the terminal chunk is received.  It will be invoked with all bytes 
    13001300        which were delivered to this protocol which came after the terminal 
    1301         chunk. 
     1301        chunk.  These bytes are I{not} the trailer; they might be the beginning 
     1302        of the next request or response. 
    13021303 
    13031304    @ivar length: Counter keeping track of how many more bytes in a chunk there 
    13041305        are to receive. 
    13051306 
    13061307    @ivar state: One of C{'chunk-length'}, C{'trailer'}, C{'body'}, or 
    13071308        C{'finished'}.  For C{'chunk-length'}, data for the chunk length line 
    1308         is currently being read.  For C{'trailer'}, the CR LF pair which 
    1309         follows each chunk is being read.  For C{'body'}, the contents of a 
    1310         chunk are being read.  For C{'finished'}, the last chunk has been 
    1311         completely read and no more input is valid. 
     1309        is currently being read.  For C{'body'}, the contents of a chunk are 
     1310        being read.  For C{'crlf'}, the CR LF pair which follows each chunk is 
     1311        being read.  For C{'trailer'}, the trailer is being read and ignored. 
     1312        For C{'finished'}, the last chunk has been completely read and no more 
     1313        input is valid. 
    13121314 
    1313     @ivar finish: A flag indicating that the last chunk has been started.  When 
    1314         it finishes, the state will change to C{'finished'} and no more data 
    1315         will be accepted. 
     1315    @ivar _bodyEndsWith: One of I{CR LF} or I{CR LF CR LF}.  When I{CR LF}, the 
     1316        parser is still searching for the end of an empty trailer.  When 
     1317        I{CR LF CR LF}, the parser is searching for the end of a non-empty 
     1318        trailer. 
    13161319    """ 
    13171320    state = 'chunk-length' 
    1318     finish = False 
    13191321 
    13201322    def __init__(self, dataCallback, finishCallback): 
    13211323        self.dataCallback = dataCallback 
    13221324        self.finishCallback = finishCallback 
    13231325        self._buffer = '' 
     1326        self._bodyEndsWith = '\r\n' 
     1327 
     1328        # While an HTTP/1.1 chunk has no size limit in the specification, a 
     1329        # reasonable limit must be established to prevent untrusted input from 
     1330        # causing excessive string concatenation in the parser. A limit of 17 bytes 
     1331        # (max FFFFFFFFFFFFFFFFF) can support chunks up to 2**68-1 bytes. 
     1332        self._maximumChunkSizeStringLength = 17 
    13241333 
    13251334 
    13261335    def dataReceived(self, data): 
     
    13341343            if self.state == 'chunk-length': 
    13351344                if '\r\n' in data: 
    13361345                    line, rest = data.split('\r\n', 1) 
    1337                     parts = line.split(';') 
    1338                     self.length = int(parts[0], 16) 
     1346                    parts = line.split(';', 1) 
     1347                    chunkSizeString = parts[0] 
     1348                    if len(chunkSizeString) > self._maximumChunkSizeStringLength: 
     1349                        raise RuntimeError( 
     1350                            "_ChunkedTransferDecoder.dataReceived received " 
     1351                            "too-long chunk length in parts %s" % (parts,)) 
     1352                    # HEX in RFC 2616 section 2.2 does not include the minus 
     1353                    # sign, but int('-0', 16) == 0, so 'negative zero' chunks 
     1354                    # are accepted here. 
     1355                    # Spaces around the HEX are not allowed, but int(..., 16) 
     1356                    # will still parse it, so padded HEX is accepted here. 
     1357                    try: 
     1358                        self.length = int(chunkSizeString, 16) 
     1359                    except ValueError: 
     1360                        raise RuntimeError( 
     1361                            "_ChunkedTransferDecoder.dataReceived received " 
     1362                            "unparsable chunk length in parts %s" % (parts,)) 
     1363                    if self.length < 0: 
     1364                        raise RuntimeError( 
     1365                            "_ChunkedTransferDecoder.dataReceived received " 
     1366                            "negative chunk length in parts %s" % (parts,)) 
    13391367                    if self.length == 0: 
    13401368                        self.state = 'trailer' 
    1341                         self.finish = True 
    13421369                    else: 
    13431370                        self.state = 'body' 
    13441371                    data = rest 
    13451372                else: 
    1346                     self._buffer = data 
    1347                     data = '' 
    1348             elif self.state == 'trailer': 
    1349                 if data.startswith('\r\n'): 
    1350                     data = data[2:] 
    1351                     if self.finish: 
    1352                         self.state = 'finished' 
    1353                         self.finishCallback(data) 
    1354                         data = '' 
     1373                    # Throw away HTTP/1.1 chunk-extensions every time, but keep 
     1374                    # the semicolon so that additional chunk-extension data 
     1375                    # doesn't get interpreted as part of the chunk-length. 
     1376                    if ';' in data: 
     1377                        reattachCR = (data[-1] == '\r') 
     1378                        data = data[:data.find(';') + 1] 
     1379                        if reattachCR: 
     1380                            data += '\r' 
     1381                        extraByte = 1 
    13551382                    else: 
    1356                         self.state = 'chunk-length' 
    1357                 else: 
     1383                        extraByte = 0 
     1384 
     1385                    if len(data) > (self._maximumChunkSizeStringLength + extraByte): 
     1386                        raise RuntimeError( 
     1387                            "_ChunkedTransferDecoder.dataReceived buffered " 
     1388                            "too-long chunk length %s" % (repr(data),)) 
    13581389                    self._buffer = data 
    13591390                    data = '' 
    13601391            elif self.state == 'body': 
    13611392                if len(data) >= self.length: 
    13621393                    chunk, data = data[:self.length], data[self.length:] 
    13631394                    self.dataCallback(chunk) 
    1364                     self.state = 'trailer' 
     1395                    self.state = 'crlf' 
    13651396                elif len(data) < self.length: 
    13661397                    self.length -= len(data) 
    13671398                    self.dataCallback(data) 
    13681399                    data = '' 
     1400            elif self.state == 'crlf': 
     1401                if data.startswith('\r\n'): 
     1402                    data = data[2:] 
     1403                    self.state = 'chunk-length' 
     1404                elif data == '\r': 
     1405                    self._buffer = data 
     1406                    data = '' 
     1407                else: 
     1408                    raise RuntimeError( 
     1409                        "_ChunkedTransferDecoder.dataReceived was looking for " 
     1410                        "CRLF, not %s" % (repr(data),)) 
     1411            elif self.state == 'trailer': 
     1412                # The goal is to throw away as much of the trailer as possible 
     1413                # every time, while hoping to get the end-of-trailer. 
     1414                 
     1415                if self._bodyEndsWith == '\r\n' and data == '\r': 
     1416                    # This case is ambiguous until dataReceived gets another byte. 
     1417                    # `data' could be the CR in the CRLF to terminate an empty 
     1418                    # trailer, or the beginning of an non-empty trailer 
     1419                    # starting with \r. 
     1420                    self._buffer = data 
     1421                    data = '' 
     1422                    return 
     1423                 
     1424                trailerEnd = data.find(self._bodyEndsWith) 
     1425                if self._bodyEndsWith == '\r\n' and trailerEnd != 0: 
     1426                    self._bodyEndsWith = '\r\n\r\n' 
     1427                    trailerEnd = data.find(self._bodyEndsWith) 
     1428 
     1429                if trailerEnd != -1: 
     1430                    data = data[trailerEnd + len(self._bodyEndsWith):] 
     1431                    self.state = 'finished' 
     1432                    self.finishCallback(data) 
     1433                else: 
     1434                    for ending in ('\r\n\r', '\r\n', '\r'): 
     1435                        if data.endswith(ending): 
     1436                            self._buffer = ending 
     1437                            break 
     1438                data = '' 
    13691439            elif self.state == 'finished': 
    13701440                raise RuntimeError( 
    13711441                    "_ChunkedTransferDecoder.dataReceived called after last " 
  • twisted/web/test/test_http.py

    === modified file 'twisted/web/test/test_http.py'
     
    497497        self.assertEqual(L, ['abc']) 
    498498 
    499499 
     500    def test_extensionsShort(self): 
     501        """ 
     502        L{_ChunkedTransferDecoder.dataReceived} disregards chunk-extension 
     503        fields, even when the data is delivered with multiple calls. 
     504 
     505        This should exercise the reattachCR condition in the parser. 
     506        """ 
     507        L = [] 
     508        p = http._ChunkedTransferDecoder(L.append, None) 
     509        for s in '3; x-foo=bar\r\nabc\r\n': 
     510            p.dataReceived(s) 
     511        self.assertEqual(L, ['a', 'b', 'c']) 
     512 
     513 
    500514    def test_finish(self): 
    501515        """ 
    502516        L{_ChunkedTransferDecoder.dataReceived} interprets a zero-length 
     
    520534        self.assertEqual(finished, ['hello']) 
    521535 
    522536 
     537    def test_extraTrailer(self): 
     538        """ 
     539 
     540        """ 
     541        finished = [] 
     542        p = http._ChunkedTransferDecoder(None, finished.append) 
     543        p.dataReceived('0\r\nLINE 1\r\n\r\nhello') 
     544        self.assertEqual(finished, ['hello']) 
     545 
     546 
     547    def test_extraTrailerMultiline(self): 
     548        """ 
     549        L{_ChunkedTransferDecoder.dataReceived} understands the trailers can 
     550        span multiple entity-headers. But since the parser ignores trailers, it 
     551        can treat entity-headers as lines. 
     552        """ 
     553        finished = [] 
     554        p = http._ChunkedTransferDecoder(None, finished.append) 
     555        p.dataReceived('0\r\nLINE 1\r\nLINE 2\r\n\r\nhello') 
     556        self.assertEqual(finished, ['hello']) 
     557 
     558 
     559    def test_extraTrailerMultilineShort(self): 
     560        """ 
     561        L{_ChunkedTransferDecoder.dataReceived} understands the trailers can 
     562        span multiple entity-headers, when delivered with multiple calls. 
     563        """ 
     564        finished = [] 
     565        p = http._ChunkedTransferDecoder(None, finished.append) 
     566        for s in '0\r\nLINE 1\r\nLINE 2\r\n\r': 
     567            p.dataReceived(s) 
     568        p.dataReceived('\nhello') 
     569        self.assertEqual(finished, ['hello']) 
     570 
     571 
    523572    def test_afterFinished(self): 
    524573        """ 
    525574        L{_ChunkedTransferDecoder.dataReceived} raises L{RuntimeError} if it 
     
    527576        """ 
    528577        p = http._ChunkedTransferDecoder(None, lambda bytes: None) 
    529578        p.dataReceived('0\r\n\r\n') 
    530         self.assertRaises(RuntimeError, p.dataReceived, 'hello') 
    531  
     579        exc = self.assertRaises(RuntimeError, p.dataReceived, 'hello') 
     580        self.assertEqual( 
     581            str(exc), 
     582            "_ChunkedTransferDecoder.dataReceived called after last " 
     583            "chunk was processed") 
     584             
    532585 
    533586    def test_earlyConnectionLose(self): 
    534587        """ 
     
    574627        self.assertEqual(successes, [True]) 
    575628 
    576629 
     630    def test_trailerUsesNoMemory(self): 
     631        """ 
     632        L{_ChunkedTransferDecoder.dataReceived} does not waste memory 
     633        buffering pieces of the trailer, which is always ignored anyway. 
     634 
     635        This test is very implementation-specific because the parser exhibits 
     636        no public behavior while ignoring the trailer. 
     637        """ 
     638        L = [] 
     639        p = http._ChunkedTransferDecoder(L.append, lambda bytes: None) 
     640        p.dataReceived('3\r\nabc\r\n0\r\nTrailer') 
     641        self.assertEqual(len(p._buffer), 0) 
     642        p.dataReceived('More trailer') 
     643        self.assertEqual(len(p._buffer), 0) 
     644        p.dataReceived('Here comes a CR: \r') 
     645        self.assertEqual(len(p._buffer), 1) 
     646        p.dataReceived('But no newline!') 
     647        self.assertEqual(len(p._buffer), 0) 
     648        p.dataReceived('Make it think it might end: \r\n\r') 
     649        self.assertEqual(len(p._buffer), 3) 
     650        p.dataReceived("But it didn't!") 
     651        self.assertEqual(len(p._buffer), 0) 
     652        p.dataReceived('Really finish the trailer now: \r\n\r\n') 
     653        self.assertEqual(len(p._buffer), 0) 
     654        self.assertEqual(L, ['abc']) 
     655 
     656 
     657    def test_chunkExtensionsUseNoMemory(self): 
     658        """ 
     659        L{_ChunkedTransferDecoder.dataReceived} does not waste memory 
     660        buffering pieces of chunk extensions, which are always ignored anyway. 
     661 
     662        This test is very implementation-specific because the parser exhibits 
     663        no public behavior while ignoring the chunk extensions. 
     664        """ 
     665        L = [] 
     666        finished = [] 
     667        p = http._ChunkedTransferDecoder(L.append, finished.append) 
     668        p.dataReceived('3\r\nabc\r\n4; hello=yes') 
     669        originalLength = len(p._buffer) 
     670        # feed it some more ignored chunk-extension 
     671        p.dataReceived('-still-ignored') 
     672        self.assertEqual(len(p._buffer), originalLength) 
     673 
     674 
     675    def test_limitedChunkLengthBuffering(self): 
     676        """ 
     677        L{_ChunkedTransferDecoder.dataReceived} does not allow input 
     678        to endlessly fill its buffer with a chunk length string. 
     679        """ 
     680        L = [] 
     681        p = http._ChunkedTransferDecoder(L.append, None) 
     682        max = p._maximumChunkSizeStringLength 
     683 
     684        p.dataReceived('2\r\nab\r\n') 
     685        exc = self.assertRaises(RuntimeError, p.dataReceived, '3' * (max + 1)) 
     686        self.assertEqual( 
     687            str(exc), 
     688            "_ChunkedTransferDecoder.dataReceived buffered too-long " 
     689            "chunk length '333333333333333333'") 
     690 
     691 
     692    def test_limitedChunkLengthBufferingShort(self): 
     693        """ 
     694        L{_ChunkedTransferDecoder.dataReceived} does not allow input 
     695        to endlessly fill its buffer with a chunk length string, even when 
     696        the data is delivered with multiple calls. 
     697        """ 
     698        L = [] 
     699        p = http._ChunkedTransferDecoder(L.append, None) 
     700        max = p._maximumChunkSizeStringLength 
     701 
     702        p.dataReceived('2\r\nab\r\n') 
     703        for s in '3' * max: 
     704            p.dataReceived(s) 
     705        exc = self.assertRaises(RuntimeError, p.dataReceived, '3' * 1) 
     706        self.assertEqual( 
     707            str(exc), 
     708            "_ChunkedTransferDecoder.dataReceived buffered too-long " 
     709            "chunk length '333333333333333333'") 
     710 
     711 
     712    def test_chunkLengthNotTooLong(self): 
     713        """ 
     714 
     715        """ 
     716        L = [] 
     717        p = http._ChunkedTransferDecoder(L.append, None) 
     718        max = p._maximumChunkSizeStringLength 
     719 
     720        p.dataReceived('2\r\nab\r\n') 
     721 
     722        chunkLenString = ('3' * (max+1)) 
     723        exc = self.assertRaises( 
     724            RuntimeError, p.dataReceived, chunkLenString + '\r\n') 
     725             
     726        self.assertEqual( 
     727            str(exc), 
     728            "_ChunkedTransferDecoder.dataReceived received " 
     729            "too-long chunk length in parts %s" % (repr([chunkLenString]),)) 
     730 
     731 
     732    def test_chunkLengthSemicolonMath(self): 
     733        """ 
     734        L{_ChunkedTransferDecoder.dataReceived} doesn't include 
     735        the length of the semicolon or chunk-extension data when 
     736        determining the length of the chunk-length bytes. 
     737        """ 
     738        L = [] 
     739        p = http._ChunkedTransferDecoder(L.append, None) 
     740        max = p._maximumChunkSizeStringLength 
     741 
     742        p.dataReceived((('3' * (max)) + '; long-extension-completely-ignored=yes')) 
     743 
     744 
     745    def test_chunkLengthNotUnparsable(self): 
     746        """ 
     747 
     748        """ 
     749        L = [] 
     750        p = http._ChunkedTransferDecoder(L.append, None) 
     751 
     752        p.dataReceived('2\r\nab\r\n') 
     753 
     754        chunkLenString = ('G') 
     755        exc = self.assertRaises( 
     756            RuntimeError, p.dataReceived, chunkLenString + '\r\n') 
     757 
     758        self.assertEqual( 
     759            str(exc), 
     760            "_ChunkedTransferDecoder.dataReceived received " 
     761            "unparsable chunk length in parts %s" % (repr([chunkLenString]),)) 
     762 
     763 
     764    def test_chunkLengthNotNegative(self): 
     765        """ 
     766 
     767        """ 
     768        L = [] 
     769        p = http._ChunkedTransferDecoder(L.append, None) 
     770 
     771        p.dataReceived('2\r\nab\r\n') 
     772        exc = self.assertRaises(RuntimeError, p.dataReceived, '-1\r\n') 
     773        self.assertEqual( 
     774            str(exc), 
     775            "_ChunkedTransferDecoder.dataReceived received " 
     776            "negative chunk length in parts %s" % (repr(['-1']),)) 
     777 
     778 
     779    def test_chunkLengthNotNegativeWithPadding(self): 
     780        """ 
     781 
     782        """ 
     783        L = [] 
     784        p = http._ChunkedTransferDecoder(L.append, None) 
     785 
     786        p.dataReceived('2\r\nab\r\n') 
     787        exc = self.assertRaises(RuntimeError, p.dataReceived, ' -1\r\n') 
     788        self.assertEqual( 
     789            str(exc), 
     790            "_ChunkedTransferDecoder.dataReceived received " 
     791            "negative chunk length in parts %s" % (repr([' -1']),)) 
     792 
     793 
     794    def test_afterChunkNotCRLFErrorByte1(self): 
     795        """ 
     796 
     797        """ 
     798        L = [] 
     799        p = http._ChunkedTransferDecoder(L.append, None) 
     800 
     801        p.dataReceived('2\r\nab') 
     802        exc = self.assertRaises(RuntimeError, p.dataReceived, 'X') 
     803        self.assertEqual( 
     804            str(exc), 
     805            "_ChunkedTransferDecoder.dataReceived was looking for " 
     806            "CRLF, not %s" % (repr('X'),)) 
     807 
     808 
     809    def test_afterChunkNotCRLFErrorTwoBytes(self): 
     810        """ 
     811 
     812        """ 
     813        L = [] 
     814        p = http._ChunkedTransferDecoder(L.append, None) 
     815 
     816        p.dataReceived('2\r\nab') 
     817        exc = self.assertRaises(RuntimeError, p.dataReceived, '\rX') 
     818        self.assertEqual( 
     819            str(exc), 
     820            "_ChunkedTransferDecoder.dataReceived was looking for " 
     821            "CRLF, not %s" % (repr('\rX'),)) 
     822 
     823 
     824    def test_afterChunkNotCRLFErrorByte2(self): 
     825        """ 
     826 
     827        """ 
     828        L = [] 
     829        p = http._ChunkedTransferDecoder(L.append, None) 
     830 
     831        p.dataReceived('2\r\nab') 
     832        p.dataReceived('\r') 
     833        exc = self.assertRaises(RuntimeError, p.dataReceived, 'X') 
     834        self.assertEqual( 
     835            str(exc), 
     836            "_ChunkedTransferDecoder.dataReceived was looking for " 
     837            "CRLF, not %s" % (repr('\rX'),)) 
     838 
     839 
     840 
     841    def test_chunkLengthNegativeZeroOkay(self): 
     842        """ 
     843 
     844        """ 
     845        L = [] 
     846        p = http._ChunkedTransferDecoder(L.append, None) 
     847 
     848        p.dataReceived('2\r\nab\r\n') 
     849        p.dataReceived('-0\r\n') 
     850 
     851 
    577852 
    578853class ChunkingTestCase(unittest.TestCase): 
    579854