Ticket #3795: secure-chunked-3795-2.diff

File secure-chunked-3795-2.diff, 19.8 KB (added by ivank, 7 years ago)

secure _ChunkedTransferDecoder while remaining bug-compatible

  • twisted/web/http.py

    === modified file 'twisted/web/http.py' (properties changed: -x to +x)
     
    99
    1010Future Plans:
    1111 - HTTP client support will at some point be refactored to support HTTP/1.1.
    12  - Accept chunked data from clients in server.
    1312 - Other missing HTTP features from the RFC.
    1413
    1514Maintainer: Itamar Shtull-Trauring
     
    3635from twisted.python import log
    3736try: # try importing the fast, C version
    3837    from twisted.protocols._c_urlarg import unquote
     38    unquote # shut up pyflakes
    3939except ImportError:
    4040    from urllib import unquote
    4141
     
    12961296        time application data is received.
    12971297
    12981298    @ivar finishCallback: A one-argument callable which will be invoked when
    1299         the terminal chunk is received.  It will be invoked with all bytes
    1300         which were delivered to this protocol which came after the terminal
    1301         chunk.
     1299        the beginning of the terminal chunk are received.  It will be invoked
     1300        with an empty string, or one or more bytes, depending on how many
     1301        bytes happened to be delivered at that time. Use terminalDelivery
     1302        to get the entire terminal chunk.
     1303
     1304    @ivar terminalDelivery: A one-argument callable which will be invoked
     1305        whenever one or more bytes of the terminal chunk are received.
     1306        It will be invoked one or more times with the bytes which were
     1307        delivered to this protocol which came after the terminal chunk.
    13021308
    13031309    @ivar length: Counter keeping track of how many more bytes in a chunk there
    13041310        are to receive.
     
    13131319    @ivar finish: A flag indicating that the last chunk has been started.  When
    13141320        it finishes, the state will change to C{'finished'} and no more data
    13151321        will be accepted.
     1322
     1323    @ivar _maximumChunkSize: The maximum size (in bytes) of chunk to
     1324        support. The implementation MAY accept larger chunks.
    13161325    """
    13171326    state = 'chunk-length'
    13181327    finish = False
    13191328
    1320     def __init__(self, dataCallback, finishCallback):
     1329    # 2**64 in hex is 10000000000000000 and requires 17 bytes to represent
     1330    # in chunked encoding. This is used only to limit the amount of (non-data)
     1331    # bytes to be buffered.
     1332    _maximumChunkSize = 2**64
     1333
     1334    def __init__(self, dataCallback, finishCallback, terminalDelivery=None):
    13211335        self.dataCallback = dataCallback
    13221336        self.finishCallback = finishCallback
     1337        self.terminalDelivery = terminalDelivery
    13231338        self._buffer = ''
     1339        self._maximumChunkLengthBytes = len(hex(self._maximumChunkSize)[2:])
    13241340
    13251341
    13261342    def dataReceived(self, data):
     
    13311347        data = self._buffer + data
    13321348        self._buffer = ''
    13331349        while data:
    1334             if self.state == 'chunk-length':
     1350            if self.state == 'ignore-extension':
     1351                if '\r' in data:
     1352                    if '\r\n' in data:
     1353                        self.state = 'chunk-length'
     1354                    else:
     1355                        self._buffer = self._savedPreExtensionChunkBytes
     1356                        if data[-1] == '\r':
     1357                            # The next byte to arrive could be a \n.
     1358                            # In that case, the 'ignore-extension' logic will run again
     1359                            # and switch to state 'chunk-length'.
     1360
     1361                            # The next byte to arrive could be NOT \n.
     1362                            # In that case, the stray \r in the chunk extension will be
     1363                            # ignored, and self._buffer will be reset back to
     1364                            # self._savedPreExtensionChunkBytes
     1365
     1366                            self._buffer += '\r'
     1367                        data = ''
     1368                else:
     1369                    self._buffer = self._savedPreExtensionChunkBytes
     1370                    data = ''
     1371            elif self.state == 'chunk-length':
    13351372                if '\r\n' in data:
    13361373                    line, rest = data.split('\r\n', 1)
    13371374                    parts = line.split(';')
    1338                     self.length = int(parts[0], 16)
    1339                     if self.length == 0:
     1375                    if len(parts[0].strip()) > self._maximumChunkLengthBytes:
     1376                        raise RuntimeError(
     1377                            "_ChunkedTransferDecoder.dataReceived got strange bytes for "
     1378                            "chunk size in parts %s" % (repr(parts),))
     1379                    try:
     1380                        length = int(parts[0], 16)
     1381                    except ValueError:
     1382                        raise RuntimeError(
     1383                            "_ChunkedTransferDecoder.dataReceived got invalid "
     1384                            "chunk size in parts %s" % (repr(parts),))
     1385                    self.length = length
     1386                    if self.length < 0:
     1387                        raise RuntimeError(
     1388                            "_ChunkedTransferDecoder.dataReceived got negative "
     1389                            "chunk size in parts %s" % (repr(parts),))
     1390                    elif self.length == 0:
    13401391                        self.state = 'trailer'
    13411392                        self.finish = True
    13421393                    else:
    13431394                        self.state = 'body'
     1395
    13441396                    data = rest
    13451397                else:
    1346                     self._buffer = data
    1347                     data = ''
     1398                    beforeSemicolon = data.split(';', 1)[0].strip()
     1399                    if len(beforeSemicolon) > self._maximumChunkLengthBytes:
     1400                        raise RuntimeError(
     1401                            "_ChunkedTransferDecoder.dataReceived"
     1402                            "got too many bytes for chunk-length.")
     1403                    if ';' in data:
     1404                        self._savedPreExtensionChunkBytes = beforeSemicolon
     1405                        self.state = 'ignore-extension'
     1406                        data = ''
     1407                    else:
     1408                        self._buffer = data
     1409                        data = ''
    13481410            elif self.state == 'trailer':
    1349                 if data.startswith('\r\n'):
     1411                if data.startswith('\r\n'):               
    13501412                    data = data[2:]
    13511413                    if self.finish:
    1352                         self.state = 'finished'
     1414                        # state must be changed to 'terminal-chunk' before finishCallback
     1415                        # so that twisted.web.test.test_http.test_reentrantFinishedNoMoreData passes
     1416                        self.state = 'terminal-chunk'
     1417                        # this is not the entire terminal chunk, but it is called
     1418                        # for backwards compatibility.
    13531419                        self.finishCallback(data)
    1354                         data = ''
    13551420                    else:
    13561421                        self.state = 'chunk-length'
    1357                 else:
     1422                elif data == '\r':
    13581423                    self._buffer = data
    13591424                    data = ''
     1425                else:
     1426                    raise RuntimeError(
     1427                        "_ChunkedTransferDecoder.dataReceived got bytes %s "
     1428                        "instead of the desired CRLF trailer." % (repr(data),))
     1429            elif self.state == 'terminal-chunk':
     1430                if data.endswith('\r\n\r\n'):
     1431                    b = 4
     1432                elif data.endswith('\r\n\r'):
     1433                    b = 3
     1434                elif data.endswith('\r\n'):
     1435                    b = 2
     1436                elif data.endswith('\r'):
     1437                    b = 1
     1438                else:
     1439                    b = 0
     1440
     1441                if self.terminalDelivery:
     1442                    if b == 0:
     1443                        if data:
     1444                            self.terminalDelivery(data)
     1445                    else:
     1446                        part = data[:-b]
     1447                        if part:
     1448                            self.terminalDelivery(part)
     1449
     1450                if b is 0:
     1451                    data = ''
     1452                else:
     1453                    self._buffer = data[-b:]
     1454                    data = ''
     1455
     1456                if b is 4:
     1457                    self.state = 'finished'
     1458
    13601459            elif self.state == 'body':
    13611460                if len(data) >= self.length:
    13621461                    chunk, data = data[:self.length], data[self.length:]
     
    13771476        Verify that all data has been received.  If it has not been, raise
    13781477        L{_DataLoss}.
    13791478        """
    1380         if self.state != 'finished':
     1479        # trailers are optional, and we don't know if we'll ever get one.
     1480        if self.state not in ('finished', 'terminal-chunk'):
    13811481            raise _DataLoss(
    13821482                "Chunked decoder in %r state, still expecting more data to "
    13831483                "get to finished state." % (self.state,))
  • twisted/web/test/test_http.py

    === modified file 'twisted/web/test/test_http.py' (properties changed: -x to +x)
     
    461461        self.assertEqual(L, ['abc', '12345', '0123456789'])
    462462
    463463
     464    def test_decodingBadTrailerFirstByte(self):
     465        """
     466        L{_ChunkedTransferDecoder.dataReceived} raises RuntimeError when
     467        it expects the first byte of the trailer but gets something else.
     468        """
     469        L = []
     470        p = http._ChunkedTransferDecoder(L.append, None)
     471        p.dataReceived('3\r\nabc\r\n4\r\n1234')
     472        self.assertRaises(RuntimeError, lambda: p.dataReceived('5'))
     473
     474
     475    def test_decodingButBadSecondByte(self):
     476        """
     477        L{_ChunkedTransferDecoder.dataReceived} raises RuntimeError when
     478        it expects the second byte of the trailer but gets something else.
     479        """
     480        L = []
     481        p = http._ChunkedTransferDecoder(L.append, None)
     482        p.dataReceived('3\r\nabc\r\n4\r\n1234')
     483        p.dataReceived('\r')
     484        self.assertRaises(RuntimeError, lambda: p.dataReceived('x'))
     485
     486
     487    def test_chunkLengthNegative(self):
     488        """
     489        L{_ChunkedTransferDecoder.dataReceived} raises RuntimeError
     490        when the chunk length string is negative.
     491        """
     492        L = []
     493        p = http._ChunkedTransferDecoder(L.append, None)
     494        self.assertRaises(RuntimeError, lambda: p.dataReceived('-3\r\nsomething'))
     495
     496
     497    def test_chunkLengthNegativeZero(self):
     498        """
     499        L{_ChunkedTransferDecoder.dataReceived} does not raise RuntimeError
     500        when the chunk length is "-0".
     501
     502        At this time, stricter RFC2616 validation would bring no benefits.
     503        """
     504        L = []
     505        finished = []
     506        p = http._ChunkedTransferDecoder(L.append, finished.append)
     507        p.dataReceived('1\r\nX\r\n-0\r\n\r\n')
     508        self.assertEqual(L, ['X'])
     509        self.assertEqual(finished, [''])
     510
     511
     512    def test_tooLongChunkLength(self):
     513        """
     514        L{_ChunkedTransferDecoder.dataReceived} raises RuntimeError
     515        when the chunk length string is too long.
     516        """
     517        L = []
     518        p = http._ChunkedTransferDecoder(L.append, None)
     519        max = p._maximumChunkLengthBytes
     520
     521        self.assertRaises(RuntimeError, lambda: p.dataReceived(
     522            ('9' * (max+1)) + '\r\n' + ('s' * (max+1))))
     523
     524
     525    def test_tooLongChunkLengthWithExtension(self):
     526        """
     527        L{_ChunkedTransferDecoder.dataReceived} immediately raises
     528        RuntimeError when the chunk length string is too long, even when
     529        it contains the beginning of a chunk extension.
     530        """
     531        L = []
     532        p = http._ChunkedTransferDecoder(L.append, None)
     533        max = p._maximumChunkLengthBytes
     534
     535        self.assertRaises(RuntimeError, lambda: p.dataReceived(
     536            (('9' * (max+1)) + ';')))
     537
     538
     539    def test_tooLongChunkLengthWithExtensionGoodMath(self):
     540        """
     541        L{_ChunkedTransferDecoder.dataReceived} doesn't include
     542        the length of the semicolon when determining the length
     543        of the chunked length string.
     544        """
     545        L = []
     546        p = http._ChunkedTransferDecoder(L.append, None)
     547        max = p._maximumChunkLengthBytes
     548
     549        p.dataReceived((('9' * (max)) + ';'))
     550
     551
     552    def test_chunkTooLongSeparate(self):
     553        """
     554        L{_ChunkedTransferDecoder.dataReceived} raises RuntimeError
     555        when the chunk length string is too long, even when the last byte
     556        is sent separately.
     557        """
     558        L = []
     559        p = http._ChunkedTransferDecoder(L.append, None)
     560        max = p._maximumChunkLengthBytes
     561
     562        p.dataReceived(('9' * max))
     563
     564        self.assertRaises(RuntimeError, lambda: p.dataReceived('9'))
     565
     566
     567    def test_chunkInvalidHex(self):
     568        """
     569        L{_ChunkedTransferDecoder.dataReceived} raises RuntimeError
     570        when the chunk is invalid hex.
     571        """
     572        L = []
     573        p = http._ChunkedTransferDecoder(L.append, None)
     574        self.assertRaises(RuntimeError, lambda: p.dataReceived(
     575            '9G\r\nsomething'))
     576
     577
     578    def test_chunkInvalidGarbage(self):
     579        """
     580        L{_ChunkedTransferDecoder.dataReceived} raises RuntimeError
     581        when the chunk is garbage.
     582        """
     583        L = []
     584        p = http._ChunkedTransferDecoder(L.append, None)
     585        self.assertRaises(RuntimeError, lambda: p.dataReceived(
     586            '#$\][,.\'!@#*&\r\nsomething'))
     587
     588
     589    def test_shortStrayCRInExtension(self):
     590        """
     591        L{_ChunkedTransferDecoder.dataReceived} ignores CR in chunk
     592        extensions.
     593        """
     594        L = []
     595        finished = []
     596        p = http._ChunkedTransferDecoder(L.append, finished.append)
     597        for s in '3\r\nabc\r\n5; extension=some\rthing\r\n12345\r\n0\r\n\r\n':
     598            p.dataReceived(s)
     599        self.assertEqual(L, ['a', 'b', 'c', '1', '2', '3', '4', '5'])
     600        self.assertEqual(finished, [''])
     601
     602
    464603    def test_short(self):
    465604        """
    466605        L{_ChunkedTransferDecoder.dataReceived} decodes chunks broken up and
     
    497636        self.assertEqual(L, ['abc'])
    498637
    499638
     639    def test_extensions(self):
     640        """
     641        L{_ChunkedTransferDecoder.dataReceived} disregards chunk-extension
     642        fields, even when they are long.
     643        """
     644        L = []
     645        p = http._ChunkedTransferDecoder(L.append, None)
     646        p.dataReceived('3; x-foo-long-long-long=bar-pretty-long\r\nabc\r\n')
     647        self.assertEqual(L, ['abc'])
     648
     649
     650    def test_extensionsShortDelivery(self):
     651        """
     652        L{_ChunkedTransferDecoder.dataReceived} disregards chunk-extension
     653        fields, even when delivered in multiple calls.
     654        """
     655        L = []
     656        p = http._ChunkedTransferDecoder(L.append, None)
     657        for s in '3; x-foo-long-long-long=bar-pretty-long\r\nabc\r\n':
     658            p.dataReceived(s)
     659        self.assertEqual(L, ['a', 'b', 'c'])
     660
     661
     662    def test_extensionsShortDeliveryVariant(self):
     663        """
     664        L{_ChunkedTransferDecoder.dataReceived} disregards chunk-extension
     665        fields, even when delivered in multiple calls, with the first call including
     666        the semicolon and space.
     667        """
     668        L = []
     669        p = http._ChunkedTransferDecoder(L.append, None)
     670        p.dataReceived('3; ')
     671        for s in 'x-foo-long-long-long=bar-pretty-long\r\nabc\r\n':
     672            p.dataReceived(s)
     673        self.assertEqual(L, ['a', 'b', 'c'])
     674
     675
     676    def test_extensionsShortDeliveryTwoBytes(self):
     677        """
     678        L{_ChunkedTransferDecoder.dataReceived} disregards chunk-extension
     679        fields, even when the chunk requires two bytes of chunk-length bytes to
     680        represent the length.
     681        """
     682        L = []
     683        p = http._ChunkedTransferDecoder(L.append, None)
     684        # 0x14 == 20
     685        for s in '14; x-foo-long-long-long=bar-pretty-long\r\n'+('a'*20)+'\r\n':
     686            p.dataReceived(s)
     687        self.assertEqual(L, ['a']*20)
     688
     689
    500690    def test_finish(self):
    501691        """
    502692        L{_ChunkedTransferDecoder.dataReceived} interprets a zero-length
     
    520710        self.assertEqual(finished, ['hello'])
    521711
    522712
    523     def test_afterFinished(self):
     713    def test_extraOnlyFirst(self):
     714        """
     715        Only the first part of the terminal chunk get passed to finishCallback.
     716        """
     717        finished = []
     718        p = http._ChunkedTransferDecoder(None, finished.append)
     719        p.dataReceived('0\r\n\r\nh')
     720        p.dataReceived('ello')
     721        self.assertEqual(finished, ['h'])
     722
     723
     724    def test_extraOnlyFirstAndTerminal(self):
     725        """
     726        Only the first part of the terminal chunk get passed to finishCallback,
     727        but the terminalDelivery can get the whole thing.
     728        """
     729        finished = []
     730        terminal = []
     731        p = http._ChunkedTransferDecoder(None, finished.append,
     732            terminalDelivery=terminal.append)
     733        p.dataReceived('0\r\n\r\nh')
     734        p.dataReceived('ello')
     735        self.assertEqual(finished, ['h'])
     736        self.assertEqual(terminal, ['h', 'ello'])
     737
     738
     739    def test_extraOnlyFirstAndTerminalAndData(self):
     740        """
     741        Only the first part of the terminal chunk get passed to finishCallback,
     742        and that the terminalDelivery can get the whole thing.
     743
     744        Test that this works when there are data chunks, too.
     745        """
     746        data = []
     747        finished = []
     748        terminal = []
     749        p = http._ChunkedTransferDecoder(data.append, finished.append,
     750            terminalDelivery=terminal.append)
     751        p.dataReceived('1\r\nX\r\n0\r\n\r\nh')
     752        p.dataReceived('ello')
     753        self.assertEqual(data, ['X'])
     754        self.assertEqual(finished, ['h'])
     755        self.assertEqual(terminal, ['h', 'ello'])
     756
     757
     758    def test_extraOnlyFirstAndTerminalAndDataAndCR(self):
     759        """
     760        A disaster.
     761        """
     762        data = []
     763        finished = []
     764        terminal = []
     765        p = http._ChunkedTransferDecoder(data.append, finished.append,
     766            terminalDelivery=terminal.append)
     767        p.dataReceived('1\r\nX\r\n0\r\n\r\nh\r')
     768        p.dataReceived('\n\r\n')
     769        self.assertEqual(data, ['X'])
     770        self.assertEqual(finished, ['h\r'])
     771        self.assertEqual(terminal, ['h'])
     772
     773
     774    def test_extraOnlyFirstAndTerminalAndDataAndCRLF(self):
     775        """
     776        Another disaster.
     777        """
     778        data = []
     779        finished = []
     780        terminal = []
     781        p = http._ChunkedTransferDecoder(data.append, finished.append,
     782            terminalDelivery=terminal.append)
     783        p.dataReceived('1\r\nX\r\n0\r\n\r\nh\r\n')
     784        p.dataReceived('P')
     785        p.dataReceived('Q')
     786        p.dataReceived('\r\n\r')
     787        p.dataReceived('R')
     788        p.dataReceived('\r\n\r\n')
     789        self.assertEqual(data, ['X'])
     790        self.assertEqual(finished, ['h\r\n'])
     791
     792        # This is very strange, but it is the intended behavior.
     793        # \r 's and \n 's are buffered slightly until it is sure that
     794        # it's not an \r\n\r\n
     795        self.assertEqual(terminal, ['h', '\r\nP', 'Q', '\r\n\rR'])
     796
     797
     798    def test_afterFinishedNoData(self):
    524799        """
    525800        L{_ChunkedTransferDecoder.dataReceived} raises L{RuntimeError} if it
    526         is called after it has seen the last chunk.
     801        is called after it has seen the last chunk (parsing without any data sent earlier).
    527802        """
    528         p = http._ChunkedTransferDecoder(None, lambda bytes: None)
     803        finish = []
     804        p = http._ChunkedTransferDecoder(None, finish.append)
    529805        p.dataReceived('0\r\n\r\n')
     806        self.assertEqual(finish, [''])
     807        p.dataReceived('TRAILER\r\n\r\n')
     808        self.assertRaises(RuntimeError, p.dataReceived, 'hello')
     809
     810
     811    def test_afterFinishedWithData(self):
     812        """
     813        L{_ChunkedTransferDecoder.dataReceived} raises L{RuntimeError} if it
     814        is called after it has seen the last chunk (parsing with data sent earlier).
     815        """
     816        data = []
     817        finish = []
     818        p = http._ChunkedTransferDecoder(data.append, finish.append)
     819        p.dataReceived('1\r\nX\r\n0\r\n\r\n')
     820        self.assertEqual(finish, [''])
     821        self.assertEqual(data, ['X'])
     822        p.dataReceived('TRAILER\r\n\r\n')     
    530823        self.assertRaises(RuntimeError, p.dataReceived, 'hello')
    531824
    532825