Ticket #3795: secure-chunked-3795-8.diff

File secure-chunked-3795-8.diff, 20.4 KB (added by ivank, 6 years ago)

a faster/cleaner version of the splitting idea in patch 7; but the implementation is still confusing

  • twisted/web/http.py

    === modified file 'twisted/web/http.py'
     
    99 
    1010Future Plans: 
    1111 - HTTP client support will at some point be refactored to support HTTP/1.1. 
    12  - Accept chunked data from clients in server. 
    1312 - Other missing HTTP features from the RFC. 
    1413 
    1514Maintainer: Itamar Shtull-Trauring 
     
    3635from twisted.python import log 
    3736try: # try importing the fast, C version 
    3837    from twisted.protocols._c_urlarg import unquote 
     38    unquote # shut up pyflakes 
    3939except ImportError: 
    4040    from urllib import unquote 
    4141 
     
    12791279    Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616, 
    12801280    section 3.6.1.  This protocol can interpret the contents of a request or 
    12811281    response body which uses the I{chunked} Transfer-Encoding.  It cannot 
    1282     interpret any of the rest of the HTTP protocol. 
     1282    interpret any of the rest of the HTTP protocol.  It ignores trailers. 
    12831283 
    12841284    It may make sense for _ChunkedTransferDecoder to be an actual IProtocol 
    12851285    implementation.  Currently, the only user of this class will only ever 
     
    12981298    @ivar finishCallback: A one-argument callable which will be invoked when 
    12991299        the terminal chunk is received.  It will be invoked with all bytes 
    13001300        which were delivered to this protocol which came after the terminal 
    1301         chunk. 
     1301        chunk.  These bytes are B{not} the trailer; they might be the beginning 
     1302        of the next request or response. 
    13021303 
    13031304    @ivar length: Counter keeping track of how many more bytes in a chunk there 
    13041305        are to receive. 
    13051306 
    13061307    @ivar state: One of C{'chunk-length'}, C{'trailer'}, C{'body'}, or 
    13071308        C{'finished'}.  For C{'chunk-length'}, data for the chunk length line 
    1308         is currently being read.  For C{'trailer'}, the CR LF pair which 
    1309         follows each chunk is being read.  For C{'body'}, the contents of a 
    1310         chunk are being read.  For C{'finished'}, the last chunk has been 
    1311         completely read and no more input is valid. 
     1309        is currently being read.  For C{'body'}, the contents of a chunk are 
     1310        being read.  For C{'crlf'}, the CR LF pair which follows each chunk is 
     1311        being read.  For C{'trailer'}, the trailer is being read and ignored. 
     1312        For C{'almost-finished'}, the extra bytes for the C{finishCallback} are 
     1313        being prepared. For C{'finished'}, no more input is valid, and an 
     1314        exception is raised. 
    13121315 
    1313     @ivar finish: A flag indicating that the last chunk has been started.  When 
    1314         it finishes, the state will change to C{'finished'} and no more data 
    1315         will be accepted. 
     1316    @ivar _bodyEndsWith: One of I{CR LF} or I{CR LF CR LF}.  When I{CR LF}, the 
     1317        parser is still searching for the end of an empty trailer.  When 
     1318        I{CR LF CR LF}, the parser is searching for the end of a non-empty 
     1319        trailer. 
    13161320    """ 
    13171321    state = 'chunk-length' 
    1318     finish = False 
    13191322 
    13201323    def __init__(self, dataCallback, finishCallback): 
    13211324        self.dataCallback = dataCallback 
    13221325        self.finishCallback = finishCallback 
    13231326        self._buffer = '' 
     1327        self._bodyEndsWith = '\r\n' 
     1328 
     1329        # While an HTTP/1.1 chunk has no size limit in the specification, a 
     1330        # reasonable limit must be established to prevent untrusted input from 
     1331        # causing excessive string concatenation in the parser. A limit of 17 bytes 
     1332        # (max FFFFFFFFFFFFFFFFF) can support chunks up to 2**68-1 bytes. 
     1333        self._maximumChunkSizeStringLength = 17 
     1334 
     1335        # This list will (very temporarily) buffer the bytes to be sent to 
     1336        # `finishCallback'. Another call to `dataReceived' will never add more 
     1337        # to this buffer. 
     1338        self._extraBytes = [] 
    13241339 
    13251340 
    13261341    def dataReceived(self, data): 
     
    13281343        Interpret data from a request or response body which uses the 
    13291344        I{chunked} Transfer-Encoding. 
    13301345        """ 
     1346 
     1347        # This is a dumb-by-design "tokenizer" which prevents `_handlePiece' 
     1348        # from repeatedly slicing (and thus copying) the "same" data, when 
     1349        # small chunks in a large `data' string is given to `dataReceived'. 
     1350        blocks = data.split('\r\n') 
     1351        lastBlockNum = len(blocks) - 1 
     1352        for n, block in enumerate(blocks): 
     1353            self._handlePiece(block) 
     1354            if n != lastBlockNum: 
     1355                # string split removed the separator, but the parser needs it. 
     1356                self._handlePiece('\r\n') 
     1357 
     1358        if self.state == 'almost-finished': 
     1359            self.state = 'finished' 
     1360            self.finishCallback(''.join(self._extraBytes)) 
     1361 
     1362 
     1363    def _handlePiece(self, data): 
     1364        """ 
     1365        Interpret a smaller piece of data. Always call C{dataReceived} instead. 
     1366        This method cannot move into the C{'finished'} state and call 
     1367        C{finishCallback}. 
     1368        """ 
    13311369        data = self._buffer + data 
    13321370        self._buffer = '' 
    13331371        while data: 
    13341372            if self.state == 'chunk-length': 
    13351373                if '\r\n' in data: 
    13361374                    line, rest = data.split('\r\n', 1) 
    1337                     parts = line.split(';') 
    1338                     self.length = int(parts[0], 16) 
     1375                    parts = line.split(';', 1) 
     1376                    chunkSizeString = parts[0] 
     1377                    if len(chunkSizeString) > self._maximumChunkSizeStringLength: 
     1378                        raise RuntimeError( 
     1379                            "_ChunkedTransferDecoder.dataReceived received " 
     1380                            "too-long chunk length %s" % (repr(chunkSizeString),)) 
     1381                    # HEX in RFC 2616 section 2.2 does not include the minus 
     1382                    # sign, but int('-0', 16) == 0, so 'negative zero' chunks 
     1383                    # are accepted here. 
     1384                    # Spaces around the HEX are not allowed, but int(..., 16) 
     1385                    # will still parse it, so padded HEX is accepted here. 
     1386                    try: 
     1387                        self.length = int(chunkSizeString, 16) 
     1388                    except ValueError: 
     1389                        raise RuntimeError( 
     1390                            "_ChunkedTransferDecoder.dataReceived received " 
     1391                            "unparsable chunk length in parts %s" % (parts,)) 
     1392                    if self.length < 0: 
     1393                        raise RuntimeError( 
     1394                            "_ChunkedTransferDecoder.dataReceived received " 
     1395                            "negative chunk length in parts %s" % (parts,)) 
    13391396                    if self.length == 0: 
    13401397                        self.state = 'trailer' 
    1341                         self.finish = True 
    13421398                    else: 
    13431399                        self.state = 'body' 
    13441400                    data = rest 
    13451401                else: 
    1346                     self._buffer = data 
    1347                     data = '' 
    1348             elif self.state == 'trailer': 
    1349                 if data.startswith('\r\n'): 
    1350                     data = data[2:] 
    1351                     if self.finish: 
    1352                         self.state = 'finished' 
    1353                         self.finishCallback(data) 
    1354                         data = '' 
     1402                    # Throw away HTTP/1.1 chunk-extensions every time, but keep 
     1403                    # the semicolon so that additional chunk-extension data 
     1404                    # doesn't get interpreted as part of the chunk-length. 
     1405                    if ';' in data: 
     1406                        reattachCR = (data[-1] == '\r') 
     1407                        data = data[:data.find(';') + 1] 
     1408                        if reattachCR: 
     1409                            data += '\r' 
     1410                        extraByte = 1 
    13551411                    else: 
    1356                         self.state = 'chunk-length' 
    1357                 else: 
     1412                        extraByte = 0 
     1413 
     1414                    if len(data) > (self._maximumChunkSizeStringLength + extraByte): 
     1415                        raise RuntimeError( 
     1416                            "_ChunkedTransferDecoder.dataReceived received " 
     1417                            "too-long chunk length %s" % (repr(data),)) 
    13581418                    self._buffer = data 
    13591419                    data = '' 
    13601420            elif self.state == 'body': 
    13611421                if len(data) >= self.length: 
    13621422                    chunk, data = data[:self.length], data[self.length:] 
    13631423                    self.dataCallback(chunk) 
    1364                     self.state = 'trailer' 
     1424                    self.state = 'crlf' 
    13651425                elif len(data) < self.length: 
    13661426                    self.length -= len(data) 
    13671427                    self.dataCallback(data) 
    13681428                    data = '' 
     1429            elif self.state == 'crlf': 
     1430                if data.startswith('\r\n'): 
     1431                    data = data[2:] 
     1432                    self.state = 'chunk-length' 
     1433                elif data == '\r': 
     1434                    self._buffer = data 
     1435                    data = '' 
     1436                else: 
     1437                    raise RuntimeError( 
     1438                        "_ChunkedTransferDecoder.dataReceived was looking for " 
     1439                        "CRLF, not %s" % (repr(data),)) 
     1440            elif self.state == 'trailer': 
     1441                # The goal is to throw away as much of the trailer as possible 
     1442                # every time, while hoping to get the end-of-trailer. 
     1443 
     1444                if self._bodyEndsWith == '\r\n' and data == '\r': 
     1445                    # This case is ambiguous until dataReceived gets another byte. 
     1446                    # `data' could be the CR in the CRLF to terminate an empty 
     1447                    # trailer, or the beginning of an non-empty trailer 
     1448                    # starting with \r. 
     1449                    self._buffer = data 
     1450                    data = '' 
     1451                    return 
     1452 
     1453                trailerEnd = data.find(self._bodyEndsWith) 
     1454                if self._bodyEndsWith == '\r\n' and trailerEnd != 0: 
     1455                    self._bodyEndsWith = '\r\n\r\n' 
     1456                    trailerEnd = data.find(self._bodyEndsWith) 
     1457 
     1458                if trailerEnd != -1: 
     1459                    data = data[trailerEnd + len(self._bodyEndsWith):] 
     1460                    self.state = 'almost-finished' 
     1461                    self._extraBytes.append(data) 
     1462                else: 
     1463                    for ending in ('\r\n\r', '\r\n', '\r'): 
     1464                        if data.endswith(ending): 
     1465                            self._buffer = ending 
     1466                            break 
     1467                data = '' 
     1468            elif self.state == 'almost-finished': 
     1469                self._extraBytes.append(data) 
     1470                data = '' 
    13691471            elif self.state == 'finished': 
    13701472                raise RuntimeError( 
    13711473                    "_ChunkedTransferDecoder.dataReceived called after last " 
  • twisted/web/test/test_http.py

    === modified file 'twisted/web/test/test_http.py'
     
    497497        self.assertEqual(L, ['abc']) 
    498498 
    499499 
     500    def test_extensionsShort(self): 
     501        """ 
     502        L{_ChunkedTransferDecoder.dataReceived} disregards chunk-extension 
     503        fields, even when the data is delivered with multiple calls. 
     504 
     505        This should exercise the reattachCR condition in the parser. 
     506        """ 
     507        L = [] 
     508        p = http._ChunkedTransferDecoder(L.append, None) 
     509        for s in '3; x-foo=bar\r\nabc\r\n': 
     510            p.dataReceived(s) 
     511        self.assertEqual(L, ['a', 'b', 'c']) 
     512 
     513 
    500514    def test_finish(self): 
    501515        """ 
    502516        L{_ChunkedTransferDecoder.dataReceived} interprets a zero-length 
     
    520534        self.assertEqual(finished, ['hello']) 
    521535 
    522536 
     537    def test_extraTrailer(self): 
     538        """ 
     539 
     540        """ 
     541        finished = [] 
     542        p = http._ChunkedTransferDecoder(None, finished.append) 
     543        p.dataReceived('0\r\nLINE 1\r\n\r\nhello') 
     544        self.assertEqual(finished, ['hello']) 
     545 
     546 
     547    def test_extraTrailerMultiline(self): 
     548        """ 
     549        L{_ChunkedTransferDecoder.dataReceived} understands the trailers can 
     550        span multiple entity-headers. But since the parser ignores trailers, it 
     551        can treat entity-headers as lines. 
     552        """ 
     553        finished = [] 
     554        p = http._ChunkedTransferDecoder(None, finished.append) 
     555        p.dataReceived('0\r\nLINE 1\r\nLINE 2\r\n\r\nhello') 
     556        self.assertEqual(finished, ['hello']) 
     557 
     558 
     559    def test_extraTrailerMultilineShort(self): 
     560        """ 
     561        L{_ChunkedTransferDecoder.dataReceived} understands the trailers can 
     562        span multiple entity-headers, when delivered with multiple calls. 
     563        """ 
     564        finished = [] 
     565        p = http._ChunkedTransferDecoder(None, finished.append) 
     566        for s in '0\r\nLINE 1\r\nLINE 2\r\n\r': 
     567            p.dataReceived(s) 
     568        p.dataReceived('\nhello') 
     569        self.assertEqual(finished, ['hello']) 
     570 
     571 
    523572    def test_afterFinished(self): 
    524573        """ 
    525574        L{_ChunkedTransferDecoder.dataReceived} raises L{RuntimeError} if it 
     
    527576        """ 
    528577        p = http._ChunkedTransferDecoder(None, lambda bytes: None) 
    529578        p.dataReceived('0\r\n\r\n') 
    530         self.assertRaises(RuntimeError, p.dataReceived, 'hello') 
    531  
     579        exc = self.assertRaises(RuntimeError, p.dataReceived, 'hello') 
     580        self.assertEqual( 
     581            str(exc), 
     582            "_ChunkedTransferDecoder.dataReceived called after last " 
     583            "chunk was processed") 
     584             
    532585 
    533586    def test_earlyConnectionLose(self): 
    534587        """ 
     
    574627        self.assertEqual(successes, [True]) 
    575628 
    576629 
     630    def test_trailerUsesNoMemory(self): 
     631        """ 
     632        L{_ChunkedTransferDecoder.dataReceived} does not waste memory 
     633        buffering pieces of the trailer, which is always ignored anyway. 
     634 
     635        This test is very implementation-specific because the parser exhibits 
     636        no public behavior while ignoring the trailer. 
     637        """ 
     638        L = [] 
     639        p = http._ChunkedTransferDecoder(L.append, lambda bytes: None) 
     640        p.dataReceived('3\r\nabc\r\n0\r\nTrailer') 
     641        self.assertEqual(len(p._buffer), 0) 
     642        p.dataReceived('More trailer') 
     643        self.assertEqual(len(p._buffer), 0) 
     644        p.dataReceived('Here comes a CR: \r') 
     645        self.assertEqual(len(p._buffer), 1) 
     646        p.dataReceived('But no newline!') 
     647        self.assertEqual(len(p._buffer), 0) 
     648        p.dataReceived('Make it think it might end: \r\n\r') 
     649        self.assertEqual(len(p._buffer), 3) 
     650        p.dataReceived("But it didn't!") 
     651        self.assertEqual(len(p._buffer), 0) 
     652        p.dataReceived('Really finish the trailer now: \r\n\r\n') 
     653        self.assertEqual(len(p._buffer), 0) 
     654        self.assertEqual(L, ['abc']) 
     655 
     656 
     657    def test_chunkExtensionsUseNoMemory(self): 
     658        """ 
     659        L{_ChunkedTransferDecoder.dataReceived} does not waste memory 
     660        buffering pieces of chunk extensions, which are always ignored anyway. 
     661 
     662        This test is very implementation-specific because the parser exhibits 
     663        no public behavior while ignoring the chunk extensions. 
     664        """ 
     665        L = [] 
     666        finished = [] 
     667        p = http._ChunkedTransferDecoder(L.append, finished.append) 
     668        p.dataReceived('3\r\nabc\r\n4; hello=yes') 
     669        originalLength = len(p._buffer) 
     670        # feed it some more ignored chunk-extension 
     671        p.dataReceived('-still-ignored') 
     672        self.assertEqual(len(p._buffer), originalLength) 
     673 
     674 
     675    def test_limitedChunkLengthBuffering(self): 
     676        """ 
     677        L{_ChunkedTransferDecoder.dataReceived} does not allow input 
     678        to endlessly fill its buffer with a chunk length string. 
     679        """ 
     680        L = [] 
     681        p = http._ChunkedTransferDecoder(L.append, None) 
     682        max = p._maximumChunkSizeStringLength 
     683 
     684        p.dataReceived('2\r\nab\r\n') 
     685        exc = self.assertRaises(RuntimeError, p.dataReceived, '3' * (max + 1)) 
     686        self.assertEqual( 
     687            str(exc), 
     688            "_ChunkedTransferDecoder.dataReceived received too-long " 
     689            "chunk length '333333333333333333'") 
     690 
     691 
     692    def test_limitedChunkLengthBufferingShort(self): 
     693        """ 
     694        L{_ChunkedTransferDecoder.dataReceived} does not allow input 
     695        to endlessly fill its buffer with a chunk length string, even when 
     696        the data is delivered with multiple calls. 
     697        """ 
     698        L = [] 
     699        p = http._ChunkedTransferDecoder(L.append, None) 
     700        max = p._maximumChunkSizeStringLength 
     701 
     702        p.dataReceived('2\r\nab\r\n') 
     703        for s in '3' * max: 
     704            p.dataReceived(s) 
     705        exc = self.assertRaises(RuntimeError, p.dataReceived, '3' * 1) 
     706        self.assertEqual( 
     707            str(exc), 
     708            "_ChunkedTransferDecoder.dataReceived received too-long " 
     709            "chunk length '333333333333333333'") 
     710 
     711 
     712    def test_chunkLengthNotTooLong(self): 
     713        """ 
     714 
     715        """ 
     716        L = [] 
     717        p = http._ChunkedTransferDecoder(L.append, None) 
     718        max = p._maximumChunkSizeStringLength 
     719 
     720        p.dataReceived('2\r\nab\r\n') 
     721 
     722        chunkLenString = ('3' * (max+1)) 
     723        exc = self.assertRaises( 
     724            RuntimeError, p.dataReceived, chunkLenString + '\r\n') 
     725             
     726        self.assertEqual( 
     727            str(exc), 
     728            "_ChunkedTransferDecoder.dataReceived received " 
     729            "too-long chunk length %s" % (repr(chunkLenString),)) 
     730 
     731 
     732    def test_chunkLengthSemicolonMath(self): 
     733        """ 
     734        L{_ChunkedTransferDecoder.dataReceived} doesn't include 
     735        the length of the semicolon or chunk-extension data when 
     736        determining the length of the chunk-length bytes. 
     737        """ 
     738        L = [] 
     739        p = http._ChunkedTransferDecoder(L.append, None) 
     740        max = p._maximumChunkSizeStringLength 
     741 
     742        p.dataReceived((('3' * (max)) + '; long-extension-completely-ignored=yes')) 
     743 
     744 
     745    def test_chunkLengthNotUnparsable(self): 
     746        """ 
     747 
     748        """ 
     749        L = [] 
     750        p = http._ChunkedTransferDecoder(L.append, None) 
     751 
     752        p.dataReceived('2\r\nab\r\n') 
     753 
     754        chunkLenString = ('G') 
     755        exc = self.assertRaises( 
     756            RuntimeError, p.dataReceived, chunkLenString + '\r\n') 
     757 
     758        self.assertEqual( 
     759            str(exc), 
     760            "_ChunkedTransferDecoder.dataReceived received " 
     761            "unparsable chunk length in parts %s" % (repr([chunkLenString]),)) 
     762 
     763 
     764    def test_chunkLengthNotNegative(self): 
     765        """ 
     766 
     767        """ 
     768        L = [] 
     769        p = http._ChunkedTransferDecoder(L.append, None) 
     770 
     771        p.dataReceived('2\r\nab\r\n') 
     772        exc = self.assertRaises(RuntimeError, p.dataReceived, '-1\r\n') 
     773        self.assertEqual( 
     774            str(exc), 
     775            "_ChunkedTransferDecoder.dataReceived received " 
     776            "negative chunk length in parts %s" % (repr(['-1']),)) 
     777 
     778 
     779    def test_chunkLengthNotNegativeWithPadding(self): 
     780        """ 
     781 
     782        """ 
     783        L = [] 
     784        p = http._ChunkedTransferDecoder(L.append, None) 
     785 
     786        p.dataReceived('2\r\nab\r\n') 
     787        exc = self.assertRaises(RuntimeError, p.dataReceived, ' -1\r\n') 
     788        self.assertEqual( 
     789            str(exc), 
     790            "_ChunkedTransferDecoder.dataReceived received " 
     791            "negative chunk length in parts %s" % (repr([' -1']),)) 
     792 
     793 
     794    def test_afterChunkNotCRLFErrorByte1(self): 
     795        """ 
     796 
     797        """ 
     798        L = [] 
     799        p = http._ChunkedTransferDecoder(L.append, None) 
     800 
     801        p.dataReceived('2\r\nab') 
     802        exc = self.assertRaises(RuntimeError, p.dataReceived, 'X') 
     803        self.assertEqual( 
     804            str(exc), 
     805            "_ChunkedTransferDecoder.dataReceived was looking for " 
     806            "CRLF, not %s" % (repr('X'),)) 
     807 
     808 
     809    def test_afterChunkNotCRLFErrorTwoBytes(self): 
     810        """ 
     811 
     812        """ 
     813        L = [] 
     814        p = http._ChunkedTransferDecoder(L.append, None) 
     815 
     816        p.dataReceived('2\r\nab') 
     817        exc = self.assertRaises(RuntimeError, p.dataReceived, '\rX') 
     818        self.assertEqual( 
     819            str(exc), 
     820            "_ChunkedTransferDecoder.dataReceived was looking for " 
     821            "CRLF, not %s" % (repr('\rX'),)) 
     822 
     823 
     824    def test_afterChunkNotCRLFErrorByte2(self): 
     825        """ 
     826 
     827        """ 
     828        L = [] 
     829        p = http._ChunkedTransferDecoder(L.append, None) 
     830 
     831        p.dataReceived('2\r\nab') 
     832        p.dataReceived('\r') 
     833        exc = self.assertRaises(RuntimeError, p.dataReceived, 'X') 
     834        self.assertEqual( 
     835            str(exc), 
     836            "_ChunkedTransferDecoder.dataReceived was looking for " 
     837            "CRLF, not %s" % (repr('\rX'),)) 
     838 
     839 
     840 
     841    def test_chunkLengthNegativeZeroOkay(self): 
     842        """ 
     843 
     844        """ 
     845        L = [] 
     846        p = http._ChunkedTransferDecoder(L.append, None) 
     847 
     848        p.dataReceived('2\r\nab\r\n') 
     849        p.dataReceived('-0\r\n') 
     850 
     851 
    577852 
    578853class ChunkingTestCase(unittest.TestCase): 
    579854