Ticket #3795: secure-chunked-3795-6.diff
| File secure-chunked-3795-6.diff, 18.9 KB (added by ivank, 4 years ago) |
|---|
-
twisted/web/http.py
=== modified file 'twisted/web/http.py'
9 9 10 10 Future Plans: 11 11 - HTTP client support will at some point be refactored to support HTTP/1.1. 12 - Accept chunked data from clients in server.13 12 - Other missing HTTP features from the RFC. 14 13 15 14 Maintainer: Itamar Shtull-Trauring … … 36 35 from twisted.python import log 37 36 try: # try importing the fast, C version 38 37 from twisted.protocols._c_urlarg import unquote 38 unquote # shut up pyflakes 39 39 except ImportError: 40 40 from urllib import unquote 41 41 … … 1279 1279 Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616, 1280 1280 section 3.6.1. This protocol can interpret the contents of a request or 1281 1281 response body which uses the I{chunked} Transfer-Encoding. It cannot 1282 interpret any of the rest of the HTTP protocol. 1282 interpret any of the rest of the HTTP protocol. It ignores trailers. 1283 1283 1284 1284 It may make sense for _ChunkedTransferDecoder to be an actual IProtocol 1285 1285 implementation. Currently, the only user of this class will only ever … … 1298 1298 @ivar finishCallback: A one-argument callable which will be invoked when 1299 1299 the terminal chunk is received. It will be invoked with all bytes 1300 1300 which were delivered to this protocol which came after the terminal 1301 chunk. 1301 chunk. These bytes are I{not} the trailer; they might be the beginning 1302 of the next request or response. 1302 1303 1303 1304 @ivar length: Counter keeping track of how many more bytes in a chunk there 1304 1305 are to receive. 1305 1306 1306 1307 @ivar state: One of C{'chunk-length'}, C{'trailer'}, C{'body'}, or 1307 1308 C{'finished'}. For C{'chunk-length'}, data for the chunk length line 1308 is currently being read. For C{'trailer'}, the CR LF pair which 1309 follows each chunk is being read. For C{'body'}, the contents of a 1310 chunk are being read. For C{'finished'}, the last chunk has been 1311 completely read and no more input is valid. 1309 is currently being read. For C{'body'}, the contents of a chunk are 1310 being read. For C{'crlf'}, the CR LF pair which follows each chunk is 1311 being read. For C{'trailer'}, the trailer is being read and ignored. 1312 For C{'finished'}, the last chunk has been completely read and no more 1313 input is valid. 1312 1314 1313 @ivar finish: A flag indicating that the last chunk has been started. When 1314 it finishes, the state will change to C{'finished'} and no more data 1315 will be accepted. 1315 @ivar _bodyEndsWith: One of I{CR LF} or I{CR LF CR LF}. When I{CR LF}, the 1316 parser is still searching for the end of an empty trailer. When 1317 I{CR LF CR LF}, the parser is searching for the end of a non-empty 1318 trailer. 1316 1319 """ 1317 1320 state = 'chunk-length' 1318 finish = False1319 1321 1320 1322 def __init__(self, dataCallback, finishCallback): 1321 1323 self.dataCallback = dataCallback 1322 1324 self.finishCallback = finishCallback 1323 1325 self._buffer = '' 1326 self._bodyEndsWith = '\r\n' 1327 1328 # While an HTTP/1.1 chunk has no size limit in the specification, a 1329 # reasonable limit must be established to prevent untrusted input from 1330 # causing excessive string concatenation in the parser. A limit of 17 bytes 1331 # (max FFFFFFFFFFFFFFFFF) can support chunks up to 2**68-1 bytes. 1332 self._maximumChunkSizeStringLength = 17 1324 1333 1325 1334 1326 1335 def dataReceived(self, data): … … 1334 1343 if self.state == 'chunk-length': 1335 1344 if '\r\n' in data: 1336 1345 line, rest = data.split('\r\n', 1) 1337 parts = line.split(';') 1338 self.length = int(parts[0], 16) 1346 parts = line.split(';', 1) 1347 chunkSizeString = parts[0] 1348 if len(chunkSizeString) > self._maximumChunkSizeStringLength: 1349 raise RuntimeError( 1350 "_ChunkedTransferDecoder.dataReceived received " 1351 "too-long chunk length in parts %s" % (parts,)) 1352 # HEX in RFC 2616 section 2.2 does not include the minus 1353 # sign, but int('-0', 16) == 0, so 'negative zero' chunks 1354 # are accepted here. 1355 # Spaces around the HEX are not allowed, but int(..., 16) 1356 # will still parse it, so padded HEX is accepted here. 1357 try: 1358 self.length = int(chunkSizeString, 16) 1359 except ValueError: 1360 raise RuntimeError( 1361 "_ChunkedTransferDecoder.dataReceived received " 1362 "unparsable chunk length in parts %s" % (parts,)) 1363 if self.length < 0: 1364 raise RuntimeError( 1365 "_ChunkedTransferDecoder.dataReceived received " 1366 "negative chunk length in parts %s" % (parts,)) 1339 1367 if self.length == 0: 1340 1368 self.state = 'trailer' 1341 self.finish = True1342 1369 else: 1343 1370 self.state = 'body' 1344 1371 data = rest 1345 1372 else: 1346 self._buffer = data1347 data = ''1348 elif self.state == 'trailer':1349 if data.startswith('\r\n'):1350 data = data[2:]1351 if self.finish:1352 self.state = 'finished'1353 self.finishCallback(data)1354 data = ''1373 # Throw away HTTP/1.1 chunk-extensions every time, but keep 1374 # the semicolon so that additional chunk-extension data 1375 # doesn't get interpreted as part of the chunk-length. 1376 if ';' in data: 1377 reattachCR = (data[-1] == '\r') 1378 data = data[:data.find(';') + 1] 1379 if reattachCR: 1380 data += '\r' 1381 extraByte = 1 1355 1382 else: 1356 self.state = 'chunk-length' 1357 else: 1383 extraByte = 0 1384 1385 if len(data) > (self._maximumChunkSizeStringLength + extraByte): 1386 raise RuntimeError( 1387 "_ChunkedTransferDecoder.dataReceived buffered " 1388 "too-long chunk length %s" % (repr(data),)) 1358 1389 self._buffer = data 1359 1390 data = '' 1360 1391 elif self.state == 'body': 1361 1392 if len(data) >= self.length: 1362 1393 chunk, data = data[:self.length], data[self.length:] 1363 1394 self.dataCallback(chunk) 1364 self.state = ' trailer'1395 self.state = 'crlf' 1365 1396 elif len(data) < self.length: 1366 1397 self.length -= len(data) 1367 1398 self.dataCallback(data) 1368 1399 data = '' 1400 elif self.state == 'crlf': 1401 if data.startswith('\r\n'): 1402 data = data[2:] 1403 self.state = 'chunk-length' 1404 elif data == '\r': 1405 self._buffer = data 1406 data = '' 1407 else: 1408 raise RuntimeError( 1409 "_ChunkedTransferDecoder.dataReceived was looking for " 1410 "CRLF, not %s" % (repr(data),)) 1411 elif self.state == 'trailer': 1412 # The goal is to throw away as much of the trailer as possible 1413 # every time, while hoping to get the end-of-trailer. 1414 1415 if self._bodyEndsWith == '\r\n' and data == '\r': 1416 # This case is ambiguous until dataReceived gets another byte. 1417 # `data' could be the CR in the CRLF to terminate an empty 1418 # trailer, or the beginning of an non-empty trailer 1419 # starting with \r. 1420 self._buffer = data 1421 data = '' 1422 return 1423 1424 trailerEnd = data.find(self._bodyEndsWith) 1425 if self._bodyEndsWith == '\r\n' and trailerEnd != 0: 1426 self._bodyEndsWith = '\r\n\r\n' 1427 trailerEnd = data.find(self._bodyEndsWith) 1428 1429 if trailerEnd != -1: 1430 data = data[trailerEnd + len(self._bodyEndsWith):] 1431 self.state = 'finished' 1432 self.finishCallback(data) 1433 else: 1434 for ending in ('\r\n\r', '\r\n', '\r'): 1435 if data.endswith(ending): 1436 self._buffer = ending 1437 break 1438 data = '' 1369 1439 elif self.state == 'finished': 1370 1440 raise RuntimeError( 1371 1441 "_ChunkedTransferDecoder.dataReceived called after last " -
twisted/web/test/test_http.py
=== modified file 'twisted/web/test/test_http.py'
497 497 self.assertEqual(L, ['abc']) 498 498 499 499 500 def test_extensionsShort(self): 501 """ 502 L{_ChunkedTransferDecoder.dataReceived} disregards chunk-extension 503 fields, even when the data is delivered with multiple calls. 504 505 This should exercise the reattachCR condition in the parser. 506 """ 507 L = [] 508 p = http._ChunkedTransferDecoder(L.append, None) 509 for s in '3; x-foo=bar\r\nabc\r\n': 510 p.dataReceived(s) 511 self.assertEqual(L, ['a', 'b', 'c']) 512 513 500 514 def test_finish(self): 501 515 """ 502 516 L{_ChunkedTransferDecoder.dataReceived} interprets a zero-length … … 520 534 self.assertEqual(finished, ['hello']) 521 535 522 536 537 def test_extraTrailer(self): 538 """ 539 540 """ 541 finished = [] 542 p = http._ChunkedTransferDecoder(None, finished.append) 543 p.dataReceived('0\r\nLINE 1\r\n\r\nhello') 544 self.assertEqual(finished, ['hello']) 545 546 547 def test_extraTrailerMultiline(self): 548 """ 549 L{_ChunkedTransferDecoder.dataReceived} understands the trailers can 550 span multiple entity-headers. But since the parser ignores trailers, it 551 can treat entity-headers as lines. 552 """ 553 finished = [] 554 p = http._ChunkedTransferDecoder(None, finished.append) 555 p.dataReceived('0\r\nLINE 1\r\nLINE 2\r\n\r\nhello') 556 self.assertEqual(finished, ['hello']) 557 558 559 def test_extraTrailerMultilineShort(self): 560 """ 561 L{_ChunkedTransferDecoder.dataReceived} understands the trailers can 562 span multiple entity-headers, when delivered with multiple calls. 563 """ 564 finished = [] 565 p = http._ChunkedTransferDecoder(None, finished.append) 566 for s in '0\r\nLINE 1\r\nLINE 2\r\n\r': 567 p.dataReceived(s) 568 p.dataReceived('\nhello') 569 self.assertEqual(finished, ['hello']) 570 571 523 572 def test_afterFinished(self): 524 573 """ 525 574 L{_ChunkedTransferDecoder.dataReceived} raises L{RuntimeError} if it … … 527 576 """ 528 577 p = http._ChunkedTransferDecoder(None, lambda bytes: None) 529 578 p.dataReceived('0\r\n\r\n') 530 self.assertRaises(RuntimeError, p.dataReceived, 'hello') 531 579 exc = self.assertRaises(RuntimeError, p.dataReceived, 'hello') 580 self.assertEqual( 581 str(exc), 582 "_ChunkedTransferDecoder.dataReceived called after last " 583 "chunk was processed") 584 532 585 533 586 def test_earlyConnectionLose(self): 534 587 """ … … 574 627 self.assertEqual(successes, [True]) 575 628 576 629 630 def test_trailerUsesNoMemory(self): 631 """ 632 L{_ChunkedTransferDecoder.dataReceived} does not waste memory 633 buffering pieces of the trailer, which is always ignored anyway. 634 635 This test is very implementation-specific because the parser exhibits 636 no public behavior while ignoring the trailer. 637 """ 638 L = [] 639 p = http._ChunkedTransferDecoder(L.append, lambda bytes: None) 640 p.dataReceived('3\r\nabc\r\n0\r\nTrailer') 641 self.assertEqual(len(p._buffer), 0) 642 p.dataReceived('More trailer') 643 self.assertEqual(len(p._buffer), 0) 644 p.dataReceived('Here comes a CR: \r') 645 self.assertEqual(len(p._buffer), 1) 646 p.dataReceived('But no newline!') 647 self.assertEqual(len(p._buffer), 0) 648 p.dataReceived('Make it think it might end: \r\n\r') 649 self.assertEqual(len(p._buffer), 3) 650 p.dataReceived("But it didn't!") 651 self.assertEqual(len(p._buffer), 0) 652 p.dataReceived('Really finish the trailer now: \r\n\r\n') 653 self.assertEqual(len(p._buffer), 0) 654 self.assertEqual(L, ['abc']) 655 656 657 def test_chunkExtensionsUseNoMemory(self): 658 """ 659 L{_ChunkedTransferDecoder.dataReceived} does not waste memory 660 buffering pieces of chunk extensions, which are always ignored anyway. 661 662 This test is very implementation-specific because the parser exhibits 663 no public behavior while ignoring the chunk extensions. 664 """ 665 L = [] 666 finished = [] 667 p = http._ChunkedTransferDecoder(L.append, finished.append) 668 p.dataReceived('3\r\nabc\r\n4; hello=yes') 669 originalLength = len(p._buffer) 670 # feed it some more ignored chunk-extension 671 p.dataReceived('-still-ignored') 672 self.assertEqual(len(p._buffer), originalLength) 673 674 675 def test_limitedChunkLengthBuffering(self): 676 """ 677 L{_ChunkedTransferDecoder.dataReceived} does not allow input 678 to endlessly fill its buffer with a chunk length string. 679 """ 680 L = [] 681 p = http._ChunkedTransferDecoder(L.append, None) 682 max = p._maximumChunkSizeStringLength 683 684 p.dataReceived('2\r\nab\r\n') 685 exc = self.assertRaises(RuntimeError, p.dataReceived, '3' * (max + 1)) 686 self.assertEqual( 687 str(exc), 688 "_ChunkedTransferDecoder.dataReceived buffered too-long " 689 "chunk length '333333333333333333'") 690 691 692 def test_limitedChunkLengthBufferingShort(self): 693 """ 694 L{_ChunkedTransferDecoder.dataReceived} does not allow input 695 to endlessly fill its buffer with a chunk length string, even when 696 the data is delivered with multiple calls. 697 """ 698 L = [] 699 p = http._ChunkedTransferDecoder(L.append, None) 700 max = p._maximumChunkSizeStringLength 701 702 p.dataReceived('2\r\nab\r\n') 703 for s in '3' * max: 704 p.dataReceived(s) 705 exc = self.assertRaises(RuntimeError, p.dataReceived, '3' * 1) 706 self.assertEqual( 707 str(exc), 708 "_ChunkedTransferDecoder.dataReceived buffered too-long " 709 "chunk length '333333333333333333'") 710 711 712 def test_chunkLengthNotTooLong(self): 713 """ 714 715 """ 716 L = [] 717 p = http._ChunkedTransferDecoder(L.append, None) 718 max = p._maximumChunkSizeStringLength 719 720 p.dataReceived('2\r\nab\r\n') 721 722 chunkLenString = ('3' * (max+1)) 723 exc = self.assertRaises( 724 RuntimeError, p.dataReceived, chunkLenString + '\r\n') 725 726 self.assertEqual( 727 str(exc), 728 "_ChunkedTransferDecoder.dataReceived received " 729 "too-long chunk length in parts %s" % (repr([chunkLenString]),)) 730 731 732 def test_chunkLengthSemicolonMath(self): 733 """ 734 L{_ChunkedTransferDecoder.dataReceived} doesn't include 735 the length of the semicolon or chunk-extension data when 736 determining the length of the chunk-length bytes. 737 """ 738 L = [] 739 p = http._ChunkedTransferDecoder(L.append, None) 740 max = p._maximumChunkSizeStringLength 741 742 p.dataReceived((('3' * (max)) + '; long-extension-completely-ignored=yes')) 743 744 745 def test_chunkLengthNotUnparsable(self): 746 """ 747 748 """ 749 L = [] 750 p = http._ChunkedTransferDecoder(L.append, None) 751 752 p.dataReceived('2\r\nab\r\n') 753 754 chunkLenString = ('G') 755 exc = self.assertRaises( 756 RuntimeError, p.dataReceived, chunkLenString + '\r\n') 757 758 self.assertEqual( 759 str(exc), 760 "_ChunkedTransferDecoder.dataReceived received " 761 "unparsable chunk length in parts %s" % (repr([chunkLenString]),)) 762 763 764 def test_chunkLengthNotNegative(self): 765 """ 766 767 """ 768 L = [] 769 p = http._ChunkedTransferDecoder(L.append, None) 770 771 p.dataReceived('2\r\nab\r\n') 772 exc = self.assertRaises(RuntimeError, p.dataReceived, '-1\r\n') 773 self.assertEqual( 774 str(exc), 775 "_ChunkedTransferDecoder.dataReceived received " 776 "negative chunk length in parts %s" % (repr(['-1']),)) 777 778 779 def test_chunkLengthNotNegativeWithPadding(self): 780 """ 781 782 """ 783 L = [] 784 p = http._ChunkedTransferDecoder(L.append, None) 785 786 p.dataReceived('2\r\nab\r\n') 787 exc = self.assertRaises(RuntimeError, p.dataReceived, ' -1\r\n') 788 self.assertEqual( 789 str(exc), 790 "_ChunkedTransferDecoder.dataReceived received " 791 "negative chunk length in parts %s" % (repr([' -1']),)) 792 793 794 def test_afterChunkNotCRLFErrorByte1(self): 795 """ 796 797 """ 798 L = [] 799 p = http._ChunkedTransferDecoder(L.append, None) 800 801 p.dataReceived('2\r\nab') 802 exc = self.assertRaises(RuntimeError, p.dataReceived, 'X') 803 self.assertEqual( 804 str(exc), 805 "_ChunkedTransferDecoder.dataReceived was looking for " 806 "CRLF, not %s" % (repr('X'),)) 807 808 809 def test_afterChunkNotCRLFErrorTwoBytes(self): 810 """ 811 812 """ 813 L = [] 814 p = http._ChunkedTransferDecoder(L.append, None) 815 816 p.dataReceived('2\r\nab') 817 exc = self.assertRaises(RuntimeError, p.dataReceived, '\rX') 818 self.assertEqual( 819 str(exc), 820 "_ChunkedTransferDecoder.dataReceived was looking for " 821 "CRLF, not %s" % (repr('\rX'),)) 822 823 824 def test_afterChunkNotCRLFErrorByte2(self): 825 """ 826 827 """ 828 L = [] 829 p = http._ChunkedTransferDecoder(L.append, None) 830 831 p.dataReceived('2\r\nab') 832 p.dataReceived('\r') 833 exc = self.assertRaises(RuntimeError, p.dataReceived, 'X') 834 self.assertEqual( 835 str(exc), 836 "_ChunkedTransferDecoder.dataReceived was looking for " 837 "CRLF, not %s" % (repr('\rX'),)) 838 839 840 841 def test_chunkLengthNegativeZeroOkay(self): 842 """ 843 844 """ 845 L = [] 846 p = http._ChunkedTransferDecoder(L.append, None) 847 848 p.dataReceived('2\r\nab\r\n') 849 p.dataReceived('-0\r\n') 850 851 577 852 578 853 class ChunkingTestCase(unittest.TestCase): 579 854
