Ticket #3795: secure-chunked-3795-8.diff

File secure-chunked-3795-8.diff, 20.4 KB (added by ivank, 7 years ago)

a faster/cleaner version of the splitting idea in patch 7; but the implementation is still confusing

  • twisted/web/http.py

    === modified file 'twisted/web/http.py'
     
    99
    1010Future Plans:
    1111 - HTTP client support will at some point be refactored to support HTTP/1.1.
    12  - Accept chunked data from clients in server.
    1312 - Other missing HTTP features from the RFC.
    1413
    1514Maintainer: Itamar Shtull-Trauring
     
    3635from twisted.python import log
    3736try: # try importing the fast, C version
    3837    from twisted.protocols._c_urlarg import unquote
     38    unquote # shut up pyflakes
    3939except ImportError:
    4040    from urllib import unquote
    4141
     
    12791279    Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616,
    12801280    section 3.6.1.  This protocol can interpret the contents of a request or
    12811281    response body which uses the I{chunked} Transfer-Encoding.  It cannot
    1282     interpret any of the rest of the HTTP protocol.
     1282    interpret any of the rest of the HTTP protocol.  It ignores trailers.
    12831283
    12841284    It may make sense for _ChunkedTransferDecoder to be an actual IProtocol
    12851285    implementation.  Currently, the only user of this class will only ever
     
    12981298    @ivar finishCallback: A one-argument callable which will be invoked when
    12991299        the terminal chunk is received.  It will be invoked with all bytes
    13001300        which were delivered to this protocol which came after the terminal
    1301         chunk.
     1301        chunk.  These bytes are B{not} the trailer; they might be the beginning
     1302        of the next request or response.
    13021303
    13031304    @ivar length: Counter keeping track of how many more bytes in a chunk there
    13041305        are to receive.
    13051306
    13061307    @ivar state: One of C{'chunk-length'}, C{'trailer'}, C{'body'}, or
    13071308        C{'finished'}.  For C{'chunk-length'}, data for the chunk length line
    1308         is currently being read.  For C{'trailer'}, the CR LF pair which
    1309         follows each chunk is being read.  For C{'body'}, the contents of a
    1310         chunk are being read.  For C{'finished'}, the last chunk has been
    1311         completely read and no more input is valid.
     1309        is currently being read.  For C{'body'}, the contents of a chunk are
     1310        being read.  For C{'crlf'}, the CR LF pair which follows each chunk is
     1311        being read.  For C{'trailer'}, the trailer is being read and ignored.
     1312        For C{'almost-finished'}, the extra bytes for the C{finishCallback} are
     1313        being prepared. For C{'finished'}, no more input is valid, and an
     1314        exception is raised.
    13121315
    1313     @ivar finish: A flag indicating that the last chunk has been started.  When
    1314         it finishes, the state will change to C{'finished'} and no more data
    1315         will be accepted.
     1316    @ivar _bodyEndsWith: One of I{CR LF} or I{CR LF CR LF}.  When I{CR LF}, the
     1317        parser is still searching for the end of an empty trailer.  When
     1318        I{CR LF CR LF}, the parser is searching for the end of a non-empty
     1319        trailer.
    13161320    """
    13171321    state = 'chunk-length'
    1318     finish = False
    13191322
    13201323    def __init__(self, dataCallback, finishCallback):
    13211324        self.dataCallback = dataCallback
    13221325        self.finishCallback = finishCallback
    13231326        self._buffer = ''
     1327        self._bodyEndsWith = '\r\n'
     1328
     1329        # While an HTTP/1.1 chunk has no size limit in the specification, a
     1330        # reasonable limit must be established to prevent untrusted input from
     1331        # causing excessive string concatenation in the parser. A limit of 17 bytes
     1332        # (max FFFFFFFFFFFFFFFFF) can support chunks up to 2**68-1 bytes.
     1333        self._maximumChunkSizeStringLength = 17
     1334
     1335        # This list will (very temporarily) buffer the bytes to be sent to
     1336        # `finishCallback'. Another call to `dataReceived' will never add more
     1337        # to this buffer.
     1338        self._extraBytes = []
    13241339
    13251340
    13261341    def dataReceived(self, data):
     
    13281343        Interpret data from a request or response body which uses the
    13291344        I{chunked} Transfer-Encoding.
    13301345        """
     1346
     1347        # This is a dumb-by-design "tokenizer" which prevents `_handlePiece'
     1348        # from repeatedly slicing (and thus copying) the "same" data, when
     1349        # small chunks in a large `data' string is given to `dataReceived'.
     1350        blocks = data.split('\r\n')
     1351        lastBlockNum = len(blocks) - 1
     1352        for n, block in enumerate(blocks):
     1353            self._handlePiece(block)
     1354            if n != lastBlockNum:
     1355                # string split removed the separator, but the parser needs it.
     1356                self._handlePiece('\r\n')
     1357
     1358        if self.state == 'almost-finished':
     1359            self.state = 'finished'
     1360            self.finishCallback(''.join(self._extraBytes))
     1361
     1362
     1363    def _handlePiece(self, data):
     1364        """
     1365        Interpret a smaller piece of data. Always call C{dataReceived} instead.
     1366        This method cannot move into the C{'finished'} state and call
     1367        C{finishCallback}.
     1368        """
    13311369        data = self._buffer + data
    13321370        self._buffer = ''
    13331371        while data:
    13341372            if self.state == 'chunk-length':
    13351373                if '\r\n' in data:
    13361374                    line, rest = data.split('\r\n', 1)
    1337                     parts = line.split(';')
    1338                     self.length = int(parts[0], 16)
     1375                    parts = line.split(';', 1)
     1376                    chunkSizeString = parts[0]
     1377                    if len(chunkSizeString) > self._maximumChunkSizeStringLength:
     1378                        raise RuntimeError(
     1379                            "_ChunkedTransferDecoder.dataReceived received "
     1380                            "too-long chunk length %s" % (repr(chunkSizeString),))
     1381                    # HEX in RFC 2616 section 2.2 does not include the minus
     1382                    # sign, but int('-0', 16) == 0, so 'negative zero' chunks
     1383                    # are accepted here.
     1384                    # Spaces around the HEX are not allowed, but int(..., 16)
     1385                    # will still parse it, so padded HEX is accepted here.
     1386                    try:
     1387                        self.length = int(chunkSizeString, 16)
     1388                    except ValueError:
     1389                        raise RuntimeError(
     1390                            "_ChunkedTransferDecoder.dataReceived received "
     1391                            "unparsable chunk length in parts %s" % (parts,))
     1392                    if self.length < 0:
     1393                        raise RuntimeError(
     1394                            "_ChunkedTransferDecoder.dataReceived received "
     1395                            "negative chunk length in parts %s" % (parts,))
    13391396                    if self.length == 0:
    13401397                        self.state = 'trailer'
    1341                         self.finish = True
    13421398                    else:
    13431399                        self.state = 'body'
    13441400                    data = rest
    13451401                else:
    1346                     self._buffer = data
    1347                     data = ''
    1348             elif self.state == 'trailer':
    1349                 if data.startswith('\r\n'):
    1350                     data = data[2:]
    1351                     if self.finish:
    1352                         self.state = 'finished'
    1353                         self.finishCallback(data)
    1354                         data = ''
     1402                    # Throw away HTTP/1.1 chunk-extensions every time, but keep
     1403                    # the semicolon so that additional chunk-extension data
     1404                    # doesn't get interpreted as part of the chunk-length.
     1405                    if ';' in data:
     1406                        reattachCR = (data[-1] == '\r')
     1407                        data = data[:data.find(';') + 1]
     1408                        if reattachCR:
     1409                            data += '\r'
     1410                        extraByte = 1
    13551411                    else:
    1356                         self.state = 'chunk-length'
    1357                 else:
     1412                        extraByte = 0
     1413
     1414                    if len(data) > (self._maximumChunkSizeStringLength + extraByte):
     1415                        raise RuntimeError(
     1416                            "_ChunkedTransferDecoder.dataReceived received "
     1417                            "too-long chunk length %s" % (repr(data),))
    13581418                    self._buffer = data
    13591419                    data = ''
    13601420            elif self.state == 'body':
    13611421                if len(data) >= self.length:
    13621422                    chunk, data = data[:self.length], data[self.length:]
    13631423                    self.dataCallback(chunk)
    1364                     self.state = 'trailer'
     1424                    self.state = 'crlf'
    13651425                elif len(data) < self.length:
    13661426                    self.length -= len(data)
    13671427                    self.dataCallback(data)
    13681428                    data = ''
     1429            elif self.state == 'crlf':
     1430                if data.startswith('\r\n'):
     1431                    data = data[2:]
     1432                    self.state = 'chunk-length'
     1433                elif data == '\r':
     1434                    self._buffer = data
     1435                    data = ''
     1436                else:
     1437                    raise RuntimeError(
     1438                        "_ChunkedTransferDecoder.dataReceived was looking for "
     1439                        "CRLF, not %s" % (repr(data),))
     1440            elif self.state == 'trailer':
     1441                # The goal is to throw away as much of the trailer as possible
     1442                # every time, while hoping to get the end-of-trailer.
     1443
     1444                if self._bodyEndsWith == '\r\n' and data == '\r':
     1445                    # This case is ambiguous until dataReceived gets another byte.
     1446                    # `data' could be the CR in the CRLF to terminate an empty
     1447                    # trailer, or the beginning of an non-empty trailer
     1448                    # starting with \r.
     1449                    self._buffer = data
     1450                    data = ''
     1451                    return
     1452
     1453                trailerEnd = data.find(self._bodyEndsWith)
     1454                if self._bodyEndsWith == '\r\n' and trailerEnd != 0:
     1455                    self._bodyEndsWith = '\r\n\r\n'
     1456                    trailerEnd = data.find(self._bodyEndsWith)
     1457
     1458                if trailerEnd != -1:
     1459                    data = data[trailerEnd + len(self._bodyEndsWith):]
     1460                    self.state = 'almost-finished'
     1461                    self._extraBytes.append(data)
     1462                else:
     1463                    for ending in ('\r\n\r', '\r\n', '\r'):
     1464                        if data.endswith(ending):
     1465                            self._buffer = ending
     1466                            break
     1467                data = ''
     1468            elif self.state == 'almost-finished':
     1469                self._extraBytes.append(data)
     1470                data = ''
    13691471            elif self.state == 'finished':
    13701472                raise RuntimeError(
    13711473                    "_ChunkedTransferDecoder.dataReceived called after last "
  • twisted/web/test/test_http.py

    === modified file 'twisted/web/test/test_http.py'
     
    497497        self.assertEqual(L, ['abc'])
    498498
    499499
     500    def test_extensionsShort(self):
     501        """
     502        L{_ChunkedTransferDecoder.dataReceived} disregards chunk-extension
     503        fields, even when the data is delivered with multiple calls.
     504
     505        This should exercise the reattachCR condition in the parser.
     506        """
     507        L = []
     508        p = http._ChunkedTransferDecoder(L.append, None)
     509        for s in '3; x-foo=bar\r\nabc\r\n':
     510            p.dataReceived(s)
     511        self.assertEqual(L, ['a', 'b', 'c'])
     512
     513
    500514    def test_finish(self):
    501515        """
    502516        L{_ChunkedTransferDecoder.dataReceived} interprets a zero-length
     
    520534        self.assertEqual(finished, ['hello'])
    521535
    522536
     537    def test_extraTrailer(self):
     538        """
     539
     540        """
     541        finished = []
     542        p = http._ChunkedTransferDecoder(None, finished.append)
     543        p.dataReceived('0\r\nLINE 1\r\n\r\nhello')
     544        self.assertEqual(finished, ['hello'])
     545
     546
     547    def test_extraTrailerMultiline(self):
     548        """
     549        L{_ChunkedTransferDecoder.dataReceived} understands the trailers can
     550        span multiple entity-headers. But since the parser ignores trailers, it
     551        can treat entity-headers as lines.
     552        """
     553        finished = []
     554        p = http._ChunkedTransferDecoder(None, finished.append)
     555        p.dataReceived('0\r\nLINE 1\r\nLINE 2\r\n\r\nhello')
     556        self.assertEqual(finished, ['hello'])
     557
     558
     559    def test_extraTrailerMultilineShort(self):
     560        """
     561        L{_ChunkedTransferDecoder.dataReceived} understands the trailers can
     562        span multiple entity-headers, when delivered with multiple calls.
     563        """
     564        finished = []
     565        p = http._ChunkedTransferDecoder(None, finished.append)
     566        for s in '0\r\nLINE 1\r\nLINE 2\r\n\r':
     567            p.dataReceived(s)
     568        p.dataReceived('\nhello')
     569        self.assertEqual(finished, ['hello'])
     570
     571
    523572    def test_afterFinished(self):
    524573        """
    525574        L{_ChunkedTransferDecoder.dataReceived} raises L{RuntimeError} if it
     
    527576        """
    528577        p = http._ChunkedTransferDecoder(None, lambda bytes: None)
    529578        p.dataReceived('0\r\n\r\n')
    530         self.assertRaises(RuntimeError, p.dataReceived, 'hello')
    531 
     579        exc = self.assertRaises(RuntimeError, p.dataReceived, 'hello')
     580        self.assertEqual(
     581            str(exc),
     582            "_ChunkedTransferDecoder.dataReceived called after last "
     583            "chunk was processed")
     584           
    532585
    533586    def test_earlyConnectionLose(self):
    534587        """
     
    574627        self.assertEqual(successes, [True])
    575628
    576629
     630    def test_trailerUsesNoMemory(self):
     631        """
     632        L{_ChunkedTransferDecoder.dataReceived} does not waste memory
     633        buffering pieces of the trailer, which is always ignored anyway.
     634
     635        This test is very implementation-specific because the parser exhibits
     636        no public behavior while ignoring the trailer.
     637        """
     638        L = []
     639        p = http._ChunkedTransferDecoder(L.append, lambda bytes: None)
     640        p.dataReceived('3\r\nabc\r\n0\r\nTrailer')
     641        self.assertEqual(len(p._buffer), 0)
     642        p.dataReceived('More trailer')
     643        self.assertEqual(len(p._buffer), 0)
     644        p.dataReceived('Here comes a CR: \r')
     645        self.assertEqual(len(p._buffer), 1)
     646        p.dataReceived('But no newline!')
     647        self.assertEqual(len(p._buffer), 0)
     648        p.dataReceived('Make it think it might end: \r\n\r')
     649        self.assertEqual(len(p._buffer), 3)
     650        p.dataReceived("But it didn't!")
     651        self.assertEqual(len(p._buffer), 0)
     652        p.dataReceived('Really finish the trailer now: \r\n\r\n')
     653        self.assertEqual(len(p._buffer), 0)
     654        self.assertEqual(L, ['abc'])
     655
     656
     657    def test_chunkExtensionsUseNoMemory(self):
     658        """
     659        L{_ChunkedTransferDecoder.dataReceived} does not waste memory
     660        buffering pieces of chunk extensions, which are always ignored anyway.
     661
     662        This test is very implementation-specific because the parser exhibits
     663        no public behavior while ignoring the chunk extensions.
     664        """
     665        L = []
     666        finished = []
     667        p = http._ChunkedTransferDecoder(L.append, finished.append)
     668        p.dataReceived('3\r\nabc\r\n4; hello=yes')
     669        originalLength = len(p._buffer)
     670        # feed it some more ignored chunk-extension
     671        p.dataReceived('-still-ignored')
     672        self.assertEqual(len(p._buffer), originalLength)
     673
     674
     675    def test_limitedChunkLengthBuffering(self):
     676        """
     677        L{_ChunkedTransferDecoder.dataReceived} does not allow input
     678        to endlessly fill its buffer with a chunk length string.
     679        """
     680        L = []
     681        p = http._ChunkedTransferDecoder(L.append, None)
     682        max = p._maximumChunkSizeStringLength
     683
     684        p.dataReceived('2\r\nab\r\n')
     685        exc = self.assertRaises(RuntimeError, p.dataReceived, '3' * (max + 1))
     686        self.assertEqual(
     687            str(exc),
     688            "_ChunkedTransferDecoder.dataReceived received too-long "
     689            "chunk length '333333333333333333'")
     690
     691
     692    def test_limitedChunkLengthBufferingShort(self):
     693        """
     694        L{_ChunkedTransferDecoder.dataReceived} does not allow input
     695        to endlessly fill its buffer with a chunk length string, even when
     696        the data is delivered with multiple calls.
     697        """
     698        L = []
     699        p = http._ChunkedTransferDecoder(L.append, None)
     700        max = p._maximumChunkSizeStringLength
     701
     702        p.dataReceived('2\r\nab\r\n')
     703        for s in '3' * max:
     704            p.dataReceived(s)
     705        exc = self.assertRaises(RuntimeError, p.dataReceived, '3' * 1)
     706        self.assertEqual(
     707            str(exc),
     708            "_ChunkedTransferDecoder.dataReceived received too-long "
     709            "chunk length '333333333333333333'")
     710
     711
     712    def test_chunkLengthNotTooLong(self):
     713        """
     714
     715        """
     716        L = []
     717        p = http._ChunkedTransferDecoder(L.append, None)
     718        max = p._maximumChunkSizeStringLength
     719
     720        p.dataReceived('2\r\nab\r\n')
     721
     722        chunkLenString = ('3' * (max+1))
     723        exc = self.assertRaises(
     724            RuntimeError, p.dataReceived, chunkLenString + '\r\n')
     725           
     726        self.assertEqual(
     727            str(exc),
     728            "_ChunkedTransferDecoder.dataReceived received "
     729            "too-long chunk length %s" % (repr(chunkLenString),))
     730
     731
     732    def test_chunkLengthSemicolonMath(self):
     733        """
     734        L{_ChunkedTransferDecoder.dataReceived} doesn't include
     735        the length of the semicolon or chunk-extension data when
     736        determining the length of the chunk-length bytes.
     737        """
     738        L = []
     739        p = http._ChunkedTransferDecoder(L.append, None)
     740        max = p._maximumChunkSizeStringLength
     741
     742        p.dataReceived((('3' * (max)) + '; long-extension-completely-ignored=yes'))
     743
     744
     745    def test_chunkLengthNotUnparsable(self):
     746        """
     747
     748        """
     749        L = []
     750        p = http._ChunkedTransferDecoder(L.append, None)
     751
     752        p.dataReceived('2\r\nab\r\n')
     753
     754        chunkLenString = ('G')
     755        exc = self.assertRaises(
     756            RuntimeError, p.dataReceived, chunkLenString + '\r\n')
     757
     758        self.assertEqual(
     759            str(exc),
     760            "_ChunkedTransferDecoder.dataReceived received "
     761            "unparsable chunk length in parts %s" % (repr([chunkLenString]),))
     762
     763
     764    def test_chunkLengthNotNegative(self):
     765        """
     766
     767        """
     768        L = []
     769        p = http._ChunkedTransferDecoder(L.append, None)
     770
     771        p.dataReceived('2\r\nab\r\n')
     772        exc = self.assertRaises(RuntimeError, p.dataReceived, '-1\r\n')
     773        self.assertEqual(
     774            str(exc),
     775            "_ChunkedTransferDecoder.dataReceived received "
     776            "negative chunk length in parts %s" % (repr(['-1']),))
     777
     778
     779    def test_chunkLengthNotNegativeWithPadding(self):
     780        """
     781
     782        """
     783        L = []
     784        p = http._ChunkedTransferDecoder(L.append, None)
     785
     786        p.dataReceived('2\r\nab\r\n')
     787        exc = self.assertRaises(RuntimeError, p.dataReceived, ' -1\r\n')
     788        self.assertEqual(
     789            str(exc),
     790            "_ChunkedTransferDecoder.dataReceived received "
     791            "negative chunk length in parts %s" % (repr([' -1']),))
     792
     793
     794    def test_afterChunkNotCRLFErrorByte1(self):
     795        """
     796
     797        """
     798        L = []
     799        p = http._ChunkedTransferDecoder(L.append, None)
     800
     801        p.dataReceived('2\r\nab')
     802        exc = self.assertRaises(RuntimeError, p.dataReceived, 'X')
     803        self.assertEqual(
     804            str(exc),
     805            "_ChunkedTransferDecoder.dataReceived was looking for "
     806            "CRLF, not %s" % (repr('X'),))
     807
     808
     809    def test_afterChunkNotCRLFErrorTwoBytes(self):
     810        """
     811
     812        """
     813        L = []
     814        p = http._ChunkedTransferDecoder(L.append, None)
     815
     816        p.dataReceived('2\r\nab')
     817        exc = self.assertRaises(RuntimeError, p.dataReceived, '\rX')
     818        self.assertEqual(
     819            str(exc),
     820            "_ChunkedTransferDecoder.dataReceived was looking for "
     821            "CRLF, not %s" % (repr('\rX'),))
     822
     823
     824    def test_afterChunkNotCRLFErrorByte2(self):
     825        """
     826
     827        """
     828        L = []
     829        p = http._ChunkedTransferDecoder(L.append, None)
     830
     831        p.dataReceived('2\r\nab')
     832        p.dataReceived('\r')
     833        exc = self.assertRaises(RuntimeError, p.dataReceived, 'X')
     834        self.assertEqual(
     835            str(exc),
     836            "_ChunkedTransferDecoder.dataReceived was looking for "
     837            "CRLF, not %s" % (repr('\rX'),))
     838
     839
     840
     841    def test_chunkLengthNegativeZeroOkay(self):
     842        """
     843
     844        """
     845        L = []
     846        p = http._ChunkedTransferDecoder(L.append, None)
     847
     848        p.dataReceived('2\r\nab\r\n')
     849        p.dataReceived('-0\r\n')
     850
     851
    577852
    578853class ChunkingTestCase(unittest.TestCase):
    579854