Ticket #3795: secure-chunked-3795-7-hack.diff

File secure-chunked-3795-7-hack.diff, 20.3 KB (added by ivank, 6 years ago)

make strings with small chunks parse faster at the expense of normal ones (horrible hack)

  • twisted/web/http.py

    === modified file 'twisted/web/http.py'
     
    99 
    1010Future Plans: 
    1111 - HTTP client support will at some point be refactored to support HTTP/1.1. 
    12  - Accept chunked data from clients in server. 
    1312 - Other missing HTTP features from the RFC. 
    1413 
    1514Maintainer: Itamar Shtull-Trauring 
     
    2625import calendar 
    2726import warnings 
    2827import os 
     28import re 
    2929from urlparse import urlparse as _urlparse 
    3030 
    3131from zope.interface import implements 
     
    3636from twisted.python import log 
    3737try: # try importing the fast, C version 
    3838    from twisted.protocols._c_urlarg import unquote 
     39    unquote # shut up pyflakes 
    3940except ImportError: 
    4041    from urllib import unquote 
    4142 
     
    12791280    Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616, 
    12801281    section 3.6.1.  This protocol can interpret the contents of a request or 
    12811282    response body which uses the I{chunked} Transfer-Encoding.  It cannot 
    1282     interpret any of the rest of the HTTP protocol. 
     1283    interpret any of the rest of the HTTP protocol.  It ignores trailers. 
    12831284 
    12841285    It may make sense for _ChunkedTransferDecoder to be an actual IProtocol 
    12851286    implementation.  Currently, the only user of this class will only ever 
     
    12981299    @ivar finishCallback: A one-argument callable which will be invoked when 
    12991300        the terminal chunk is received.  It will be invoked with all bytes 
    13001301        which were delivered to this protocol which came after the terminal 
    1301         chunk. 
     1302        chunk.  These bytes are I{not} the trailer; they might be the beginning 
     1303        of the next request or response. 
    13021304 
    13031305    @ivar length: Counter keeping track of how many more bytes in a chunk there 
    13041306        are to receive. 
    13051307 
    13061308    @ivar state: One of C{'chunk-length'}, C{'trailer'}, C{'body'}, or 
    13071309        C{'finished'}.  For C{'chunk-length'}, data for the chunk length line 
    1308         is currently being read.  For C{'trailer'}, the CR LF pair which 
    1309         follows each chunk is being read.  For C{'body'}, the contents of a 
    1310         chunk are being read.  For C{'finished'}, the last chunk has been 
    1311         completely read and no more input is valid. 
     1310        is currently being read.  For C{'body'}, the contents of a chunk are 
     1311        being read.  For C{'crlf'}, the CR LF pair which follows each chunk is 
     1312        being read.  For C{'trailer'}, the trailer is being read and ignored. 
     1313        For C{'finished'}, the last chunk has been completely read and no more 
     1314        input is valid. 
    13121315 
    1313     @ivar finish: A flag indicating that the last chunk has been started.  When 
    1314         it finishes, the state will change to C{'finished'} and no more data 
    1315         will be accepted. 
     1316    @ivar _bodyEndsWith: One of I{CR LF} or I{CR LF CR LF}.  When I{CR LF}, the 
     1317        parser is still searching for the end of an empty trailer.  When 
     1318        I{CR LF CR LF}, the parser is searching for the end of a non-empty 
     1319        trailer. 
    13161320    """ 
    13171321    state = 'chunk-length' 
    1318     finish = False 
    13191322 
    13201323    def __init__(self, dataCallback, finishCallback): 
    13211324        self.dataCallback = dataCallback 
    13221325        self.finishCallback = finishCallback 
    13231326        self._buffer = '' 
     1327        self._bodyEndsWith = '\r\n' 
     1328 
     1329        # While an HTTP/1.1 chunk has no size limit in the specification, a 
     1330        # reasonable limit must be established to prevent untrusted input from 
     1331        # causing excessive string concatenation in the parser. A limit of 17 bytes 
     1332        # (max FFFFFFFFFFFFFFFFF) can support chunks up to 2**68-1 bytes. 
     1333        self._maximumChunkSizeStringLength = 17 
     1334 
     1335        self._splitter = re.compile(r'(\r\n)') 
     1336        self._extraBytes = [] 
    13241337 
    13251338 
    13261339    def dataReceived(self, data): 
     
    13281341        Interpret data from a request or response body which uses the 
    13291342        I{chunked} Transfer-Encoding. 
    13301343        """ 
    1331         data = self._buffer + data 
    1332         self._buffer = '' 
     1344 
     1345        # If there was a _buffer left over from last time, send it in 
     1346        # directly to avoid one string concatenation. 
     1347        if self._buffer: 
     1348            block = self._buffer 
     1349            self._buffer = '' 
     1350            self._handleBlock(block) 
     1351 
     1352        if self.state == 'finished': 
     1353            raise RuntimeError( 
     1354                "_ChunkedTransferDecoder.dataReceived called after last " 
     1355                "chunk was processed") 
     1356 
     1357        # This is a dumb-by-design "tokenizer" for faster processing of 
     1358        # small chunks delivered in a big `data' string. 
     1359        for block in self._splitter.split(data): 
     1360            self._handleBlock(block) 
     1361        if self._extraBytes: 
     1362            self.finishCallback(''.join(self._extraBytes)) 
     1363 
     1364 
     1365    def _handleBlock(self, data): 
     1366        if self._buffer: 
     1367            data = self._buffer + data 
     1368            self._buffer = '' 
    13331369        while data: 
    13341370            if self.state == 'chunk-length': 
    13351371                if '\r\n' in data: 
    13361372                    line, rest = data.split('\r\n', 1) 
    1337                     parts = line.split(';') 
    1338                     self.length = int(parts[0], 16) 
     1373                    parts = line.split(';', 1) 
     1374                    chunkSizeString = parts[0] 
     1375                    if len(chunkSizeString) > self._maximumChunkSizeStringLength: 
     1376                        raise RuntimeError( 
     1377                            "_ChunkedTransferDecoder.dataReceived received " 
     1378                            "too-long chunk length %s" % (repr(chunkSizeString),)) 
     1379                    # HEX in RFC 2616 section 2.2 does not include the minus 
     1380                    # sign, but int('-0', 16) == 0, so 'negative zero' chunks 
     1381                    # are accepted here. 
     1382                    # Spaces around the HEX are not allowed, but int(..., 16) 
     1383                    # will still parse it, so padded HEX is accepted here. 
     1384                    try: 
     1385                        self.length = int(chunkSizeString, 16) 
     1386                    except ValueError: 
     1387                        raise RuntimeError( 
     1388                            "_ChunkedTransferDecoder.dataReceived received " 
     1389                            "unparsable chunk length in parts %s" % (parts,)) 
     1390                    if self.length < 0: 
     1391                        raise RuntimeError( 
     1392                            "_ChunkedTransferDecoder.dataReceived received " 
     1393                            "negative chunk length in parts %s" % (parts,)) 
    13391394                    if self.length == 0: 
    13401395                        self.state = 'trailer' 
    1341                         self.finish = True 
    13421396                    else: 
    13431397                        self.state = 'body' 
    13441398                    data = rest 
    13451399                else: 
    1346                     self._buffer = data 
    1347                     data = '' 
    1348             elif self.state == 'trailer': 
    1349                 if data.startswith('\r\n'): 
    1350                     data = data[2:] 
    1351                     if self.finish: 
    1352                         self.state = 'finished' 
    1353                         self.finishCallback(data) 
    1354                         data = '' 
     1400                    # Throw away HTTP/1.1 chunk-extensions every time, but keep 
     1401                    # the semicolon so that additional chunk-extension data 
     1402                    # doesn't get interpreted as part of the chunk-length. 
     1403                    if ';' in data: 
     1404                        reattachCR = (data[-1] == '\r') 
     1405                        data = data[:data.find(';') + 1] 
     1406                        if reattachCR: 
     1407                            data += '\r' 
     1408                        extraByte = 1 
    13551409                    else: 
    1356                         self.state = 'chunk-length' 
    1357                 else: 
     1410                        extraByte = 0 
     1411 
     1412                    if len(data) > (self._maximumChunkSizeStringLength + extraByte): 
     1413                        raise RuntimeError( 
     1414                            "_ChunkedTransferDecoder.dataReceived received " 
     1415                            "too-long chunk length %s" % (repr(data),)) 
    13581416                    self._buffer = data 
    13591417                    data = '' 
    13601418            elif self.state == 'body': 
    13611419                if len(data) >= self.length: 
    13621420                    chunk, data = data[:self.length], data[self.length:] 
    13631421                    self.dataCallback(chunk) 
    1364                     self.state = 'trailer' 
     1422                    self.state = 'crlf' 
    13651423                elif len(data) < self.length: 
    13661424                    self.length -= len(data) 
    13671425                    self.dataCallback(data) 
    13681426                    data = '' 
     1427            elif self.state == 'crlf': 
     1428                if data.startswith('\r\n'): 
     1429                    data = data[2:] 
     1430                    self.state = 'chunk-length' 
     1431                elif data == '\r': 
     1432                    self._buffer = data 
     1433                    data = '' 
     1434                else: 
     1435                    raise RuntimeError( 
     1436                        "_ChunkedTransferDecoder.dataReceived was looking for " 
     1437                        "CRLF, not %s" % (repr(data),)) 
     1438            elif self.state == 'trailer': 
     1439                # The goal is to throw away as much of the trailer as possible 
     1440                # every time, while hoping to get the end-of-trailer. 
     1441 
     1442                if self._bodyEndsWith == '\r\n' and data == '\r': 
     1443                    # This case is ambiguous until dataReceived gets another byte. 
     1444                    # `data' could be the CR in the CRLF to terminate an empty 
     1445                    # trailer, or the beginning of an non-empty trailer 
     1446                    # starting with \r. 
     1447                    self._buffer = data 
     1448                    data = '' 
     1449                    return 
     1450 
     1451                trailerEnd = data.find(self._bodyEndsWith) 
     1452                if self._bodyEndsWith == '\r\n' and trailerEnd != 0: 
     1453                    self._bodyEndsWith = '\r\n\r\n' 
     1454                    trailerEnd = data.find(self._bodyEndsWith) 
     1455 
     1456                if trailerEnd != -1: 
     1457                    data = data[trailerEnd + len(self._bodyEndsWith):] 
     1458                    self.state = 'finished' 
     1459                    self._extraBytes.append(data) 
     1460                else: 
     1461                    for ending in ('\r\n\r', '\r\n', '\r'): 
     1462                        if data.endswith(ending): 
     1463                            self._buffer = ending 
     1464                            break 
     1465                data = '' 
    13691466            elif self.state == 'finished': 
    1370                 raise RuntimeError( 
    1371                     "_ChunkedTransferDecoder.dataReceived called after last " 
    1372                     "chunk was processed") 
     1467                self._extraBytes.append(data) 
     1468                data = '' 
    13731469 
    13741470 
    13751471    def noMoreData(self): 
  • twisted/web/test/test_http.py

    === modified file 'twisted/web/test/test_http.py'
     
    497497        self.assertEqual(L, ['abc']) 
    498498 
    499499 
     500    def test_extensionsShort(self): 
     501        """ 
     502        L{_ChunkedTransferDecoder.dataReceived} disregards chunk-extension 
     503        fields, even when the data is delivered with multiple calls. 
     504 
     505        This should exercise the reattachCR condition in the parser. 
     506        """ 
     507        L = [] 
     508        p = http._ChunkedTransferDecoder(L.append, None) 
     509        for s in '3; x-foo=bar\r\nabc\r\n': 
     510            p.dataReceived(s) 
     511        self.assertEqual(L, ['a', 'b', 'c']) 
     512 
     513 
    500514    def test_finish(self): 
    501515        """ 
    502516        L{_ChunkedTransferDecoder.dataReceived} interprets a zero-length 
     
    520534        self.assertEqual(finished, ['hello']) 
    521535 
    522536 
     537    def test_extraTrailer(self): 
     538        """ 
     539 
     540        """ 
     541        finished = [] 
     542        p = http._ChunkedTransferDecoder(None, finished.append) 
     543        p.dataReceived('0\r\nLINE 1\r\n\r\nhello') 
     544        self.assertEqual(finished, ['hello']) 
     545 
     546 
     547    def test_extraTrailerMultiline(self): 
     548        """ 
     549        L{_ChunkedTransferDecoder.dataReceived} understands the trailers can 
     550        span multiple entity-headers. But since the parser ignores trailers, it 
     551        can treat entity-headers as lines. 
     552        """ 
     553        finished = [] 
     554        p = http._ChunkedTransferDecoder(None, finished.append) 
     555        p.dataReceived('0\r\nLINE 1\r\nLINE 2\r\n\r\nhello') 
     556        self.assertEqual(finished, ['hello']) 
     557 
     558 
     559    def test_extraTrailerMultilineShort(self): 
     560        """ 
     561        L{_ChunkedTransferDecoder.dataReceived} understands the trailers can 
     562        span multiple entity-headers, when delivered with multiple calls. 
     563        """ 
     564        finished = [] 
     565        p = http._ChunkedTransferDecoder(None, finished.append) 
     566        for s in '0\r\nLINE 1\r\nLINE 2\r\n\r': 
     567            p.dataReceived(s) 
     568        p.dataReceived('\nhello') 
     569        self.assertEqual(finished, ['hello']) 
     570 
     571 
    523572    def test_afterFinished(self): 
    524573        """ 
    525574        L{_ChunkedTransferDecoder.dataReceived} raises L{RuntimeError} if it 
     
    527576        """ 
    528577        p = http._ChunkedTransferDecoder(None, lambda bytes: None) 
    529578        p.dataReceived('0\r\n\r\n') 
    530         self.assertRaises(RuntimeError, p.dataReceived, 'hello') 
    531  
     579        exc = self.assertRaises(RuntimeError, p.dataReceived, 'hello') 
     580        self.assertEqual( 
     581            str(exc), 
     582            "_ChunkedTransferDecoder.dataReceived called after last " 
     583            "chunk was processed") 
     584             
    532585 
    533586    def test_earlyConnectionLose(self): 
    534587        """ 
     
    574627        self.assertEqual(successes, [True]) 
    575628 
    576629 
     630    def test_trailerUsesNoMemory(self): 
     631        """ 
     632        L{_ChunkedTransferDecoder.dataReceived} does not waste memory 
     633        buffering pieces of the trailer, which is always ignored anyway. 
     634 
     635        This test is very implementation-specific because the parser exhibits 
     636        no public behavior while ignoring the trailer. 
     637        """ 
     638        L = [] 
     639        p = http._ChunkedTransferDecoder(L.append, lambda bytes: None) 
     640        p.dataReceived('3\r\nabc\r\n0\r\nTrailer') 
     641        self.assertEqual(len(p._buffer), 0) 
     642        p.dataReceived('More trailer') 
     643        self.assertEqual(len(p._buffer), 0) 
     644        p.dataReceived('Here comes a CR: \r') 
     645        self.assertEqual(len(p._buffer), 1) 
     646        p.dataReceived('But no newline!') 
     647        self.assertEqual(len(p._buffer), 0) 
     648        p.dataReceived('Make it think it might end: \r\n\r') 
     649        self.assertEqual(len(p._buffer), 3) 
     650        p.dataReceived("But it didn't!") 
     651        self.assertEqual(len(p._buffer), 0) 
     652        p.dataReceived('Really finish the trailer now: \r\n\r\n') 
     653        self.assertEqual(len(p._buffer), 0) 
     654        self.assertEqual(L, ['abc']) 
     655 
     656 
     657    def test_chunkExtensionsUseNoMemory(self): 
     658        """ 
     659        L{_ChunkedTransferDecoder.dataReceived} does not waste memory 
     660        buffering pieces of chunk extensions, which are always ignored anyway. 
     661 
     662        This test is very implementation-specific because the parser exhibits 
     663        no public behavior while ignoring the chunk extensions. 
     664        """ 
     665        L = [] 
     666        finished = [] 
     667        p = http._ChunkedTransferDecoder(L.append, finished.append) 
     668        p.dataReceived('3\r\nabc\r\n4; hello=yes') 
     669        originalLength = len(p._buffer) 
     670        # feed it some more ignored chunk-extension 
     671        p.dataReceived('-still-ignored') 
     672        self.assertEqual(len(p._buffer), originalLength) 
     673 
     674 
     675    def test_limitedChunkLengthBuffering(self): 
     676        """ 
     677        L{_ChunkedTransferDecoder.dataReceived} does not allow input 
     678        to endlessly fill its buffer with a chunk length string. 
     679        """ 
     680        L = [] 
     681        p = http._ChunkedTransferDecoder(L.append, None) 
     682        max = p._maximumChunkSizeStringLength 
     683 
     684        p.dataReceived('2\r\nab\r\n') 
     685        exc = self.assertRaises(RuntimeError, p.dataReceived, '3' * (max + 1)) 
     686        self.assertEqual( 
     687            str(exc), 
     688            "_ChunkedTransferDecoder.dataReceived received too-long " 
     689            "chunk length '333333333333333333'") 
     690 
     691 
     692    def test_limitedChunkLengthBufferingShort(self): 
     693        """ 
     694        L{_ChunkedTransferDecoder.dataReceived} does not allow input 
     695        to endlessly fill its buffer with a chunk length string, even when 
     696        the data is delivered with multiple calls. 
     697        """ 
     698        L = [] 
     699        p = http._ChunkedTransferDecoder(L.append, None) 
     700        max = p._maximumChunkSizeStringLength 
     701 
     702        p.dataReceived('2\r\nab\r\n') 
     703        for s in '3' * max: 
     704            p.dataReceived(s) 
     705        exc = self.assertRaises(RuntimeError, p.dataReceived, '3' * 1) 
     706        self.assertEqual( 
     707            str(exc), 
     708            "_ChunkedTransferDecoder.dataReceived received too-long " 
     709            "chunk length '333333333333333333'") 
     710 
     711 
     712    def test_chunkLengthNotTooLong(self): 
     713        """ 
     714 
     715        """ 
     716        L = [] 
     717        p = http._ChunkedTransferDecoder(L.append, None) 
     718        max = p._maximumChunkSizeStringLength 
     719 
     720        p.dataReceived('2\r\nab\r\n') 
     721 
     722        chunkLenString = ('3' * (max+1)) 
     723        exc = self.assertRaises( 
     724            RuntimeError, p.dataReceived, chunkLenString + '\r\n') 
     725             
     726        self.assertEqual( 
     727            str(exc), 
     728            "_ChunkedTransferDecoder.dataReceived received " 
     729            "too-long chunk length %s" % (repr(chunkLenString),)) 
     730 
     731 
     732    def test_chunkLengthSemicolonMath(self): 
     733        """ 
     734        L{_ChunkedTransferDecoder.dataReceived} doesn't include 
     735        the length of the semicolon or chunk-extension data when 
     736        determining the length of the chunk-length bytes. 
     737        """ 
     738        L = [] 
     739        p = http._ChunkedTransferDecoder(L.append, None) 
     740        max = p._maximumChunkSizeStringLength 
     741 
     742        p.dataReceived((('3' * (max)) + '; long-extension-completely-ignored=yes')) 
     743 
     744 
     745    def test_chunkLengthNotUnparsable(self): 
     746        """ 
     747 
     748        """ 
     749        L = [] 
     750        p = http._ChunkedTransferDecoder(L.append, None) 
     751 
     752        p.dataReceived('2\r\nab\r\n') 
     753 
     754        chunkLenString = ('G') 
     755        exc = self.assertRaises( 
     756            RuntimeError, p.dataReceived, chunkLenString + '\r\n') 
     757 
     758        self.assertEqual( 
     759            str(exc), 
     760            "_ChunkedTransferDecoder.dataReceived received " 
     761            "unparsable chunk length in parts %s" % (repr([chunkLenString]),)) 
     762 
     763 
     764    def test_chunkLengthNotNegative(self): 
     765        """ 
     766 
     767        """ 
     768        L = [] 
     769        p = http._ChunkedTransferDecoder(L.append, None) 
     770 
     771        p.dataReceived('2\r\nab\r\n') 
     772        exc = self.assertRaises(RuntimeError, p.dataReceived, '-1\r\n') 
     773        self.assertEqual( 
     774            str(exc), 
     775            "_ChunkedTransferDecoder.dataReceived received " 
     776            "negative chunk length in parts %s" % (repr(['-1']),)) 
     777 
     778 
     779    def test_chunkLengthNotNegativeWithPadding(self): 
     780        """ 
     781 
     782        """ 
     783        L = [] 
     784        p = http._ChunkedTransferDecoder(L.append, None) 
     785 
     786        p.dataReceived('2\r\nab\r\n') 
     787        exc = self.assertRaises(RuntimeError, p.dataReceived, ' -1\r\n') 
     788        self.assertEqual( 
     789            str(exc), 
     790            "_ChunkedTransferDecoder.dataReceived received " 
     791            "negative chunk length in parts %s" % (repr([' -1']),)) 
     792 
     793 
     794    def test_afterChunkNotCRLFErrorByte1(self): 
     795        """ 
     796 
     797        """ 
     798        L = [] 
     799        p = http._ChunkedTransferDecoder(L.append, None) 
     800 
     801        p.dataReceived('2\r\nab') 
     802        exc = self.assertRaises(RuntimeError, p.dataReceived, 'X') 
     803        self.assertEqual( 
     804            str(exc), 
     805            "_ChunkedTransferDecoder.dataReceived was looking for " 
     806            "CRLF, not %s" % (repr('X'),)) 
     807 
     808 
     809    def test_afterChunkNotCRLFErrorTwoBytes(self): 
     810        """ 
     811 
     812        """ 
     813        L = [] 
     814        p = http._ChunkedTransferDecoder(L.append, None) 
     815 
     816        p.dataReceived('2\r\nab') 
     817        exc = self.assertRaises(RuntimeError, p.dataReceived, '\rX') 
     818        self.assertEqual( 
     819            str(exc), 
     820            "_ChunkedTransferDecoder.dataReceived was looking for " 
     821            "CRLF, not %s" % (repr('\rX'),)) 
     822 
     823 
     824    def test_afterChunkNotCRLFErrorByte2(self): 
     825        """ 
     826 
     827        """ 
     828        L = [] 
     829        p = http._ChunkedTransferDecoder(L.append, None) 
     830 
     831        p.dataReceived('2\r\nab') 
     832        p.dataReceived('\r') 
     833        exc = self.assertRaises(RuntimeError, p.dataReceived, 'X') 
     834        self.assertEqual( 
     835            str(exc), 
     836            "_ChunkedTransferDecoder.dataReceived was looking for " 
     837            "CRLF, not %s" % (repr('\rX'),)) 
     838 
     839 
     840 
     841    def test_chunkLengthNegativeZeroOkay(self): 
     842        """ 
     843 
     844        """ 
     845        L = [] 
     846        p = http._ChunkedTransferDecoder(L.append, None) 
     847 
     848        p.dataReceived('2\r\nab\r\n') 
     849        p.dataReceived('-0\r\n') 
     850 
     851 
    577852 
    578853class ChunkingTestCase(unittest.TestCase): 
    579854