Ticket #3795: secure-chunked-3795-8.diff
| File secure-chunked-3795-8.diff, 20.4 KB (added by ivank, 4 years ago) |
|---|
-
twisted/web/http.py
=== modified file 'twisted/web/http.py'
9 9 10 10 Future Plans: 11 11 - HTTP client support will at some point be refactored to support HTTP/1.1. 12 - Accept chunked data from clients in server.13 12 - Other missing HTTP features from the RFC. 14 13 15 14 Maintainer: Itamar Shtull-Trauring … … 36 35 from twisted.python import log 37 36 try: # try importing the fast, C version 38 37 from twisted.protocols._c_urlarg import unquote 38 unquote # shut up pyflakes 39 39 except ImportError: 40 40 from urllib import unquote 41 41 … … 1279 1279 Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616, 1280 1280 section 3.6.1. This protocol can interpret the contents of a request or 1281 1281 response body which uses the I{chunked} Transfer-Encoding. It cannot 1282 interpret any of the rest of the HTTP protocol. 1282 interpret any of the rest of the HTTP protocol. It ignores trailers. 1283 1283 1284 1284 It may make sense for _ChunkedTransferDecoder to be an actual IProtocol 1285 1285 implementation. Currently, the only user of this class will only ever … … 1298 1298 @ivar finishCallback: A one-argument callable which will be invoked when 1299 1299 the terminal chunk is received. It will be invoked with all bytes 1300 1300 which were delivered to this protocol which came after the terminal 1301 chunk. 1301 chunk. These bytes are B{not} the trailer; they might be the beginning 1302 of the next request or response. 1302 1303 1303 1304 @ivar length: Counter keeping track of how many more bytes in a chunk there 1304 1305 are to receive. 1305 1306 1306 1307 @ivar state: One of C{'chunk-length'}, C{'trailer'}, C{'body'}, or 1307 1308 C{'finished'}. For C{'chunk-length'}, data for the chunk length line 1308 is currently being read. For C{'trailer'}, the CR LF pair which 1309 follows each chunk is being read. For C{'body'}, the contents of a 1310 chunk are being read. For C{'finished'}, the last chunk has been 1311 completely read and no more input is valid. 1309 is currently being read. For C{'body'}, the contents of a chunk are 1310 being read. For C{'crlf'}, the CR LF pair which follows each chunk is 1311 being read. For C{'trailer'}, the trailer is being read and ignored. 1312 For C{'almost-finished'}, the extra bytes for the C{finishCallback} are 1313 being prepared. For C{'finished'}, no more input is valid, and an 1314 exception is raised. 1312 1315 1313 @ivar finish: A flag indicating that the last chunk has been started. When 1314 it finishes, the state will change to C{'finished'} and no more data 1315 will be accepted. 1316 @ivar _bodyEndsWith: One of I{CR LF} or I{CR LF CR LF}. When I{CR LF}, the 1317 parser is still searching for the end of an empty trailer. When 1318 I{CR LF CR LF}, the parser is searching for the end of a non-empty 1319 trailer. 1316 1320 """ 1317 1321 state = 'chunk-length' 1318 finish = False1319 1322 1320 1323 def __init__(self, dataCallback, finishCallback): 1321 1324 self.dataCallback = dataCallback 1322 1325 self.finishCallback = finishCallback 1323 1326 self._buffer = '' 1327 self._bodyEndsWith = '\r\n' 1328 1329 # While an HTTP/1.1 chunk has no size limit in the specification, a 1330 # reasonable limit must be established to prevent untrusted input from 1331 # causing excessive string concatenation in the parser. A limit of 17 bytes 1332 # (max FFFFFFFFFFFFFFFFF) can support chunks up to 2**68-1 bytes. 1333 self._maximumChunkSizeStringLength = 17 1334 1335 # This list will (very temporarily) buffer the bytes to be sent to 1336 # `finishCallback'. Another call to `dataReceived' will never add more 1337 # to this buffer. 1338 self._extraBytes = [] 1324 1339 1325 1340 1326 1341 def dataReceived(self, data): … … 1328 1343 Interpret data from a request or response body which uses the 1329 1344 I{chunked} Transfer-Encoding. 1330 1345 """ 1346 1347 # This is a dumb-by-design "tokenizer" which prevents `_handlePiece' 1348 # from repeatedly slicing (and thus copying) the "same" data, when 1349 # small chunks in a large `data' string is given to `dataReceived'. 1350 blocks = data.split('\r\n') 1351 lastBlockNum = len(blocks) - 1 1352 for n, block in enumerate(blocks): 1353 self._handlePiece(block) 1354 if n != lastBlockNum: 1355 # string split removed the separator, but the parser needs it. 1356 self._handlePiece('\r\n') 1357 1358 if self.state == 'almost-finished': 1359 self.state = 'finished' 1360 self.finishCallback(''.join(self._extraBytes)) 1361 1362 1363 def _handlePiece(self, data): 1364 """ 1365 Interpret a smaller piece of data. Always call C{dataReceived} instead. 1366 This method cannot move into the C{'finished'} state and call 1367 C{finishCallback}. 1368 """ 1331 1369 data = self._buffer + data 1332 1370 self._buffer = '' 1333 1371 while data: 1334 1372 if self.state == 'chunk-length': 1335 1373 if '\r\n' in data: 1336 1374 line, rest = data.split('\r\n', 1) 1337 parts = line.split(';') 1338 self.length = int(parts[0], 16) 1375 parts = line.split(';', 1) 1376 chunkSizeString = parts[0] 1377 if len(chunkSizeString) > self._maximumChunkSizeStringLength: 1378 raise RuntimeError( 1379 "_ChunkedTransferDecoder.dataReceived received " 1380 "too-long chunk length %s" % (repr(chunkSizeString),)) 1381 # HEX in RFC 2616 section 2.2 does not include the minus 1382 # sign, but int('-0', 16) == 0, so 'negative zero' chunks 1383 # are accepted here. 1384 # Spaces around the HEX are not allowed, but int(..., 16) 1385 # will still parse it, so padded HEX is accepted here. 1386 try: 1387 self.length = int(chunkSizeString, 16) 1388 except ValueError: 1389 raise RuntimeError( 1390 "_ChunkedTransferDecoder.dataReceived received " 1391 "unparsable chunk length in parts %s" % (parts,)) 1392 if self.length < 0: 1393 raise RuntimeError( 1394 "_ChunkedTransferDecoder.dataReceived received " 1395 "negative chunk length in parts %s" % (parts,)) 1339 1396 if self.length == 0: 1340 1397 self.state = 'trailer' 1341 self.finish = True1342 1398 else: 1343 1399 self.state = 'body' 1344 1400 data = rest 1345 1401 else: 1346 self._buffer = data1347 data = ''1348 elif self.state == 'trailer':1349 if data.startswith('\r\n'):1350 data = data[2:]1351 if self.finish:1352 self.state = 'finished'1353 self.finishCallback(data)1354 data = ''1402 # Throw away HTTP/1.1 chunk-extensions every time, but keep 1403 # the semicolon so that additional chunk-extension data 1404 # doesn't get interpreted as part of the chunk-length. 1405 if ';' in data: 1406 reattachCR = (data[-1] == '\r') 1407 data = data[:data.find(';') + 1] 1408 if reattachCR: 1409 data += '\r' 1410 extraByte = 1 1355 1411 else: 1356 self.state = 'chunk-length' 1357 else: 1412 extraByte = 0 1413 1414 if len(data) > (self._maximumChunkSizeStringLength + extraByte): 1415 raise RuntimeError( 1416 "_ChunkedTransferDecoder.dataReceived received " 1417 "too-long chunk length %s" % (repr(data),)) 1358 1418 self._buffer = data 1359 1419 data = '' 1360 1420 elif self.state == 'body': 1361 1421 if len(data) >= self.length: 1362 1422 chunk, data = data[:self.length], data[self.length:] 1363 1423 self.dataCallback(chunk) 1364 self.state = ' trailer'1424 self.state = 'crlf' 1365 1425 elif len(data) < self.length: 1366 1426 self.length -= len(data) 1367 1427 self.dataCallback(data) 1368 1428 data = '' 1429 elif self.state == 'crlf': 1430 if data.startswith('\r\n'): 1431 data = data[2:] 1432 self.state = 'chunk-length' 1433 elif data == '\r': 1434 self._buffer = data 1435 data = '' 1436 else: 1437 raise RuntimeError( 1438 "_ChunkedTransferDecoder.dataReceived was looking for " 1439 "CRLF, not %s" % (repr(data),)) 1440 elif self.state == 'trailer': 1441 # The goal is to throw away as much of the trailer as possible 1442 # every time, while hoping to get the end-of-trailer. 1443 1444 if self._bodyEndsWith == '\r\n' and data == '\r': 1445 # This case is ambiguous until dataReceived gets another byte. 1446 # `data' could be the CR in the CRLF to terminate an empty 1447 # trailer, or the beginning of an non-empty trailer 1448 # starting with \r. 1449 self._buffer = data 1450 data = '' 1451 return 1452 1453 trailerEnd = data.find(self._bodyEndsWith) 1454 if self._bodyEndsWith == '\r\n' and trailerEnd != 0: 1455 self._bodyEndsWith = '\r\n\r\n' 1456 trailerEnd = data.find(self._bodyEndsWith) 1457 1458 if trailerEnd != -1: 1459 data = data[trailerEnd + len(self._bodyEndsWith):] 1460 self.state = 'almost-finished' 1461 self._extraBytes.append(data) 1462 else: 1463 for ending in ('\r\n\r', '\r\n', '\r'): 1464 if data.endswith(ending): 1465 self._buffer = ending 1466 break 1467 data = '' 1468 elif self.state == 'almost-finished': 1469 self._extraBytes.append(data) 1470 data = '' 1369 1471 elif self.state == 'finished': 1370 1472 raise RuntimeError( 1371 1473 "_ChunkedTransferDecoder.dataReceived called after last " -
twisted/web/test/test_http.py
=== modified file 'twisted/web/test/test_http.py'
497 497 self.assertEqual(L, ['abc']) 498 498 499 499 500 def test_extensionsShort(self): 501 """ 502 L{_ChunkedTransferDecoder.dataReceived} disregards chunk-extension 503 fields, even when the data is delivered with multiple calls. 504 505 This should exercise the reattachCR condition in the parser. 506 """ 507 L = [] 508 p = http._ChunkedTransferDecoder(L.append, None) 509 for s in '3; x-foo=bar\r\nabc\r\n': 510 p.dataReceived(s) 511 self.assertEqual(L, ['a', 'b', 'c']) 512 513 500 514 def test_finish(self): 501 515 """ 502 516 L{_ChunkedTransferDecoder.dataReceived} interprets a zero-length … … 520 534 self.assertEqual(finished, ['hello']) 521 535 522 536 537 def test_extraTrailer(self): 538 """ 539 540 """ 541 finished = [] 542 p = http._ChunkedTransferDecoder(None, finished.append) 543 p.dataReceived('0\r\nLINE 1\r\n\r\nhello') 544 self.assertEqual(finished, ['hello']) 545 546 547 def test_extraTrailerMultiline(self): 548 """ 549 L{_ChunkedTransferDecoder.dataReceived} understands the trailers can 550 span multiple entity-headers. But since the parser ignores trailers, it 551 can treat entity-headers as lines. 552 """ 553 finished = [] 554 p = http._ChunkedTransferDecoder(None, finished.append) 555 p.dataReceived('0\r\nLINE 1\r\nLINE 2\r\n\r\nhello') 556 self.assertEqual(finished, ['hello']) 557 558 559 def test_extraTrailerMultilineShort(self): 560 """ 561 L{_ChunkedTransferDecoder.dataReceived} understands the trailers can 562 span multiple entity-headers, when delivered with multiple calls. 563 """ 564 finished = [] 565 p = http._ChunkedTransferDecoder(None, finished.append) 566 for s in '0\r\nLINE 1\r\nLINE 2\r\n\r': 567 p.dataReceived(s) 568 p.dataReceived('\nhello') 569 self.assertEqual(finished, ['hello']) 570 571 523 572 def test_afterFinished(self): 524 573 """ 525 574 L{_ChunkedTransferDecoder.dataReceived} raises L{RuntimeError} if it … … 527 576 """ 528 577 p = http._ChunkedTransferDecoder(None, lambda bytes: None) 529 578 p.dataReceived('0\r\n\r\n') 530 self.assertRaises(RuntimeError, p.dataReceived, 'hello') 531 579 exc = self.assertRaises(RuntimeError, p.dataReceived, 'hello') 580 self.assertEqual( 581 str(exc), 582 "_ChunkedTransferDecoder.dataReceived called after last " 583 "chunk was processed") 584 532 585 533 586 def test_earlyConnectionLose(self): 534 587 """ … … 574 627 self.assertEqual(successes, [True]) 575 628 576 629 630 def test_trailerUsesNoMemory(self): 631 """ 632 L{_ChunkedTransferDecoder.dataReceived} does not waste memory 633 buffering pieces of the trailer, which is always ignored anyway. 634 635 This test is very implementation-specific because the parser exhibits 636 no public behavior while ignoring the trailer. 637 """ 638 L = [] 639 p = http._ChunkedTransferDecoder(L.append, lambda bytes: None) 640 p.dataReceived('3\r\nabc\r\n0\r\nTrailer') 641 self.assertEqual(len(p._buffer), 0) 642 p.dataReceived('More trailer') 643 self.assertEqual(len(p._buffer), 0) 644 p.dataReceived('Here comes a CR: \r') 645 self.assertEqual(len(p._buffer), 1) 646 p.dataReceived('But no newline!') 647 self.assertEqual(len(p._buffer), 0) 648 p.dataReceived('Make it think it might end: \r\n\r') 649 self.assertEqual(len(p._buffer), 3) 650 p.dataReceived("But it didn't!") 651 self.assertEqual(len(p._buffer), 0) 652 p.dataReceived('Really finish the trailer now: \r\n\r\n') 653 self.assertEqual(len(p._buffer), 0) 654 self.assertEqual(L, ['abc']) 655 656 657 def test_chunkExtensionsUseNoMemory(self): 658 """ 659 L{_ChunkedTransferDecoder.dataReceived} does not waste memory 660 buffering pieces of chunk extensions, which are always ignored anyway. 661 662 This test is very implementation-specific because the parser exhibits 663 no public behavior while ignoring the chunk extensions. 664 """ 665 L = [] 666 finished = [] 667 p = http._ChunkedTransferDecoder(L.append, finished.append) 668 p.dataReceived('3\r\nabc\r\n4; hello=yes') 669 originalLength = len(p._buffer) 670 # feed it some more ignored chunk-extension 671 p.dataReceived('-still-ignored') 672 self.assertEqual(len(p._buffer), originalLength) 673 674 675 def test_limitedChunkLengthBuffering(self): 676 """ 677 L{_ChunkedTransferDecoder.dataReceived} does not allow input 678 to endlessly fill its buffer with a chunk length string. 679 """ 680 L = [] 681 p = http._ChunkedTransferDecoder(L.append, None) 682 max = p._maximumChunkSizeStringLength 683 684 p.dataReceived('2\r\nab\r\n') 685 exc = self.assertRaises(RuntimeError, p.dataReceived, '3' * (max + 1)) 686 self.assertEqual( 687 str(exc), 688 "_ChunkedTransferDecoder.dataReceived received too-long " 689 "chunk length '333333333333333333'") 690 691 692 def test_limitedChunkLengthBufferingShort(self): 693 """ 694 L{_ChunkedTransferDecoder.dataReceived} does not allow input 695 to endlessly fill its buffer with a chunk length string, even when 696 the data is delivered with multiple calls. 697 """ 698 L = [] 699 p = http._ChunkedTransferDecoder(L.append, None) 700 max = p._maximumChunkSizeStringLength 701 702 p.dataReceived('2\r\nab\r\n') 703 for s in '3' * max: 704 p.dataReceived(s) 705 exc = self.assertRaises(RuntimeError, p.dataReceived, '3' * 1) 706 self.assertEqual( 707 str(exc), 708 "_ChunkedTransferDecoder.dataReceived received too-long " 709 "chunk length '333333333333333333'") 710 711 712 def test_chunkLengthNotTooLong(self): 713 """ 714 715 """ 716 L = [] 717 p = http._ChunkedTransferDecoder(L.append, None) 718 max = p._maximumChunkSizeStringLength 719 720 p.dataReceived('2\r\nab\r\n') 721 722 chunkLenString = ('3' * (max+1)) 723 exc = self.assertRaises( 724 RuntimeError, p.dataReceived, chunkLenString + '\r\n') 725 726 self.assertEqual( 727 str(exc), 728 "_ChunkedTransferDecoder.dataReceived received " 729 "too-long chunk length %s" % (repr(chunkLenString),)) 730 731 732 def test_chunkLengthSemicolonMath(self): 733 """ 734 L{_ChunkedTransferDecoder.dataReceived} doesn't include 735 the length of the semicolon or chunk-extension data when 736 determining the length of the chunk-length bytes. 737 """ 738 L = [] 739 p = http._ChunkedTransferDecoder(L.append, None) 740 max = p._maximumChunkSizeStringLength 741 742 p.dataReceived((('3' * (max)) + '; long-extension-completely-ignored=yes')) 743 744 745 def test_chunkLengthNotUnparsable(self): 746 """ 747 748 """ 749 L = [] 750 p = http._ChunkedTransferDecoder(L.append, None) 751 752 p.dataReceived('2\r\nab\r\n') 753 754 chunkLenString = ('G') 755 exc = self.assertRaises( 756 RuntimeError, p.dataReceived, chunkLenString + '\r\n') 757 758 self.assertEqual( 759 str(exc), 760 "_ChunkedTransferDecoder.dataReceived received " 761 "unparsable chunk length in parts %s" % (repr([chunkLenString]),)) 762 763 764 def test_chunkLengthNotNegative(self): 765 """ 766 767 """ 768 L = [] 769 p = http._ChunkedTransferDecoder(L.append, None) 770 771 p.dataReceived('2\r\nab\r\n') 772 exc = self.assertRaises(RuntimeError, p.dataReceived, '-1\r\n') 773 self.assertEqual( 774 str(exc), 775 "_ChunkedTransferDecoder.dataReceived received " 776 "negative chunk length in parts %s" % (repr(['-1']),)) 777 778 779 def test_chunkLengthNotNegativeWithPadding(self): 780 """ 781 782 """ 783 L = [] 784 p = http._ChunkedTransferDecoder(L.append, None) 785 786 p.dataReceived('2\r\nab\r\n') 787 exc = self.assertRaises(RuntimeError, p.dataReceived, ' -1\r\n') 788 self.assertEqual( 789 str(exc), 790 "_ChunkedTransferDecoder.dataReceived received " 791 "negative chunk length in parts %s" % (repr([' -1']),)) 792 793 794 def test_afterChunkNotCRLFErrorByte1(self): 795 """ 796 797 """ 798 L = [] 799 p = http._ChunkedTransferDecoder(L.append, None) 800 801 p.dataReceived('2\r\nab') 802 exc = self.assertRaises(RuntimeError, p.dataReceived, 'X') 803 self.assertEqual( 804 str(exc), 805 "_ChunkedTransferDecoder.dataReceived was looking for " 806 "CRLF, not %s" % (repr('X'),)) 807 808 809 def test_afterChunkNotCRLFErrorTwoBytes(self): 810 """ 811 812 """ 813 L = [] 814 p = http._ChunkedTransferDecoder(L.append, None) 815 816 p.dataReceived('2\r\nab') 817 exc = self.assertRaises(RuntimeError, p.dataReceived, '\rX') 818 self.assertEqual( 819 str(exc), 820 "_ChunkedTransferDecoder.dataReceived was looking for " 821 "CRLF, not %s" % (repr('\rX'),)) 822 823 824 def test_afterChunkNotCRLFErrorByte2(self): 825 """ 826 827 """ 828 L = [] 829 p = http._ChunkedTransferDecoder(L.append, None) 830 831 p.dataReceived('2\r\nab') 832 p.dataReceived('\r') 833 exc = self.assertRaises(RuntimeError, p.dataReceived, 'X') 834 self.assertEqual( 835 str(exc), 836 "_ChunkedTransferDecoder.dataReceived was looking for " 837 "CRLF, not %s" % (repr('\rX'),)) 838 839 840 841 def test_chunkLengthNegativeZeroOkay(self): 842 """ 843 844 """ 845 L = [] 846 p = http._ChunkedTransferDecoder(L.append, None) 847 848 p.dataReceived('2\r\nab\r\n') 849 p.dataReceived('-0\r\n') 850 851 577 852 578 853 class ChunkingTestCase(unittest.TestCase): 579 854
