Ticket #3803: better-LineReceiver-2.diff

File better-LineReceiver-2.diff, 14.6 KB (added by ivank, 12 years ago)

changes over #1: remove cStringIO nonsense, add test for delimiter change while in raw mode

  • twisted/protocols/basic.py

    === modified file 'twisted/protocols/basic.py'
     
    1313import re
    1414import struct
    1515
     16try:
     17    from collections import deque
     18    deque # shut up pyflakes
     19except ImportError:
     20    class deque(list):
     21        def popleft(self):
     22            return self.pop(0)
     23
     24from cStringIO import StringIO
    1625from zope.interface import implements
    1726
    1827# Twisted imports
     
    184193
    185194
    186195class LineReceiver(protocol.Protocol, _PauseableMixin):
    187     """A protocol that receives lines and/or raw data, depending on mode.
     196    r"""
     197    A protocol that receives lines and/or raw data, depending on mode.
    188198
    189199    In line mode, each line that's received becomes a callback to
    190200    L{lineReceived}.  In raw data mode, each chunk of raw data becomes a
     
    193203
    194204    This is useful for line-oriented protocols such as IRC, HTTP, POP, etc.
    195205
     206    LineReceiver
     207        does not rely on string concatenation optimizations available in
     208        CPython 2.5+ and other implementations.
     209
     210        stays fast when `data' contains many lines delivered at once, unless
     211        there is excess toggling between line mode and raw mode, with a large
     212        `extra' being passed to setLineMode each time.
     213            (note: many lines at once may be slow if Python is missing
     214            collections.dequeue, available since CPython 2.4)
     215
     216        searches for the delimiter only in recently-received data, preventing
     217        unnecessary searching of the delimiter in a long buffer.
     218
     219    XXX TODO: Why is this here?
     220    Fixed:
     221        #3277 - LineReceiver may drop a delimiter (newline) when calling ...
     222        #3050 - t.p.basic.LineReceiver StackOverflow
     223    Probably fixed: (TODO: add tests to make sure)
     224        #3353 - lineLengthExceeded behaviour varies between LineReceiver and
     225            LineOnlyReceiver
     226    Not fixed (old behavior kept):
     227        #2215 - If lineReceived returns true value, the connection is shut down
     228            and this value used as an error message
     229        #3542 - twisted.protocols.basic.LineReceiver's lineLengthExceeded
     230            should not cause transport to shut down without reporting an error ...
     231
    196232    @cvar delimiter: The line-ending delimiter to use. By default this is
    197                      '\\r\\n'.
    198     @cvar MAX_LENGTH: The maximum length of a line to allow (If a
    199                       sent line is longer than this, the connection is dropped).
    200                       Default is 16384.
     233        '\r\n'.
     234    @cvar MAX_LENGTH: The maximum length of a line to allow, excluding
     235        the delimiter. If a received line is longer than this,
     236        L{lineLengthExceeded} is called, which by default drops the connection.
     237        Default is 16384.
    201238    """
    202239    line_mode = 1
    203     __buffer = ''
     240    _lineBuffer = None
     241    _buffer = None
    204242    delimiter = '\r\n'
    205243    MAX_LENGTH = 16384
    206244
     245    # When clearing _buffer, the implementation should always create new
     246    # StringIO objects instead of truncate(0), because truncate(0) does not
     247    # free any memory with cStringIO.StringIO, and doesn't free much memory in
     248    # StringIO.StringIO (tested CPython 2.6.1).
     249
    207250    def clearLineBuffer(self):
    208251        """
    209252        Clear buffered data.
     
    211254        @return: All of the cleared buffered data.
    212255        @rtype: C{str}
    213256        """
    214         b = self.__buffer
    215         self.__buffer = ""
     257
     258        if self._buffer is None:
     259            self._buffer = StringIO()
     260        if self._lineBuffer is None:
     261            self._lineBuffer = deque()
     262
     263        # This temporarily appends _buffer into _lineBuffer to avoid creating
     264        # an extra temporary list or string.
     265        self._buffer.seek(0, 0)
     266        self._lineBuffer.append(self._buffer.read())
     267        b = self.delimiter.join(self._lineBuffer)
     268        self._lineBuffer = deque()
     269        self._buffer = StringIO()
     270       
    216271        return b
    217272
     273    def _addToBuffer(self, data):
     274        """
     275        Append L{data} to the internal buffer.
     276        """
     277
     278        # When in line mode, this will convert data in L{_buffer} into lines in
     279        # L{_lineBuffer}.
     280
     281        # _addToBuffer is called internally even when paused, so that the
     282        # delimiter search optimization doesn't break.
     283
     284        if self._buffer is None:
     285            self._buffer = StringIO()
     286        if self._lineBuffer is None:
     287            self._lineBuffer = deque()
     288
     289        self._buffer.write(data)
     290
     291        if self.line_mode:
     292            # The idea is to look for the delimiter in a subset of the buffer.
     293            # This prevents slowdown if the line length is long and the bytes
     294            # are being received slowly.
     295            self._buffer.seek(-(len(data)+len(self.delimiter)), 2)
     296
     297            # This does two things: get up to len(self.delimiter) bytes,
     298            # and always seek to the very end.
     299            searchArea = self._buffer.read()
     300
     301            if self.delimiter in searchArea:
     302                self._buffer.seek(0, 0)
     303                splitted = self._buffer.read().split(self.delimiter)
     304                self._buffer = StringIO()
     305                self._buffer.write(splitted.pop())
     306                self._lineBuffer.extend(splitted)
     307
    218308    def dataReceived(self, data):
    219         """Protocol.dataReceived.
     309        """
    220310        Translates bytes into lines, and calls lineReceived (or
    221311        rawDataReceived, depending on mode.)
    222312        """
    223         self.__buffer = self.__buffer+data
    224         while self.line_mode and not self.paused:
    225             try:
    226                 line, self.__buffer = self.__buffer.split(self.delimiter, 1)
    227             except ValueError:
    228                 if len(self.__buffer) > self.MAX_LENGTH:
    229                     line, self.__buffer = self.__buffer, ''
    230                     return self.lineLengthExceeded(line)
    231                 break
    232             else:
    233                 linelength = len(line)
    234                 if linelength > self.MAX_LENGTH:
    235                     exceeded = line + self.__buffer
    236                     self.__buffer = ''
     313
     314        self._addToBuffer(data)
     315
     316        while not self.paused:
     317            if self.line_mode:
     318                if not self._lineBuffer: # no more lines
     319                    # Only *after* there are no more lines is it appropriate to
     320                    # return with lineLengthExceeded due to the _buffer's
     321                    # unsplittable excess size.
     322
     323                    # The old LineReceiver would reject a line smaller than the
     324                    # MAX_LENGTH if only part of the delimiter had arrived.
     325                    # This bug is fixed. `minus one' because if the end of
     326                    # delimiter came, it would have been split and handled already.
     327                    if self._buffer.tell() > self.MAX_LENGTH + len(self.delimiter) - 1:
     328                        return self.lineLengthExceeded(self.clearLineBuffer())
     329                    break
     330
     331                line = self._lineBuffer.popleft()
     332                if len(line) > self.MAX_LENGTH:
     333                    exceeded = line + self.delimiter + self.clearLineBuffer()
    237334                    return self.lineLengthExceeded(exceeded)
    238335                why = self.lineReceived(line)
    239336                if why or self.transport and self.transport.disconnecting:
     337                    # disconnect.
     338
     339                    # "The original reason for this behavior is a micro-optimization
     340                    # to avoid the necessity of raising exceptions in order to drop the
     341                    # connection. It's very old, and probably not terribly effective as an
     342                    # optimization. However, I certainly don't care enough about this to
     343                    # change it, especially given that it might break existing code that
     344                    # relied upon this bizarre convention. For what it's worth, it's a mirror
     345                    # of the same convention in dataReceived."
     346                    #   - glyph, http://twistedmatrix.com/trac/ticket/2215
    240347                    return why
    241         else:
    242             if not self.paused:
    243                 data=self.__buffer
    244                 self.__buffer=''
     348            else:
     349                data = self.clearLineBuffer()
    245350                if data:
    246                     return self.rawDataReceived(data)
     351                    why = self.rawDataReceived(data)
     352                    if why or self.transport and self.transport.disconnecting:
     353                        # disconnect. (see above comment)
     354                        return why
     355                else:
     356                    break         
    247357
    248358    def setLineMode(self, extra=''):
    249359        """Sets the line-mode of this receiver.
     
    257367        within a lineReceived callback.
    258368        """
    259369        self.line_mode = 1
    260         if extra:
    261             return self.dataReceived(extra)
     370        self._addToBuffer(extra)
    262371
    263372    def setRawMode(self):
    264373        """Sets the raw mode of this receiver.
  • twisted/test/test_protocols.py

    === modified file 'twisted/test/test_protocols.py'
     
    1313from twisted.test import proto_helpers
    1414
    1515
     16class FlippingLineTester(basic.LineReceiver):
     17    """
     18    A line receiver that flips between line and raw data modes after one byte.
     19    """
     20
     21    delimiter = '\n'
     22
     23    lines = None
     24    raws = 0
     25
     26    def lineReceived(self, line):
     27        """
     28        Set the mode to raw.
     29        """
     30        if self.lines is None:
     31            self.lines = []
     32        self.lines.append(line)           
     33        self.setRawMode()
     34
     35    def rawDataReceived(self, data):
     36        """
     37        Set the mode back to line.
     38        """
     39        self.raws += 1
     40        self.setLineMode(data[1:])
     41
     42
     43
     44class ChangeDelimReceiver(basic.LineReceiver):
     45    """
     46    """
     47    lines = []
     48    datas = []
     49    switched = False
     50
     51    def rawDataReceived(self, data):
     52        self.datas.append(data)
     53        self.delimiter = 'X'
     54        if len(self.datas) == 1:
     55            self.setLineMode()
     56
     57    def lineReceived(self, line):
     58        self.lines.append(line)
     59        if not self.switched:
     60            self.setRawMode()
     61            self.switched = True
     62
     63
     64
    1665class LineTester(basic.LineReceiver):
    1766    """
    1867    A line receiver that parses data received and make actions on some tokens.
     
    229278    rawpause_output2 = ['twiddle1', 'twiddle2', 'len 5', 'rawpause', '12345',
    230279                        'twiddle3']
    231280
     281    def test_pausing2(self):
     282        """
     283        Pausing doesn't interfere with the StringIO seek (delimiter search) optimizations.
     284        """
     285
     286        t = proto_helpers.StringTransport()
     287
     288        class Rec1(basic.LineReceiver):
     289            lines = []
     290            def lineReceived(self, line):
     291                self.lines.append(line)
     292        lr = Rec1()
     293        lr.makeConnection(t)
     294
     295        lr.dataReceived('hello1')
     296        lr.pauseProducing()
     297        lr.dataReceived('hello2\r\n')
     298        lr.dataReceived('hello3')
     299        lr.resumeProducing()
     300        self.assertEqual(lr.lines, ['hello1hello2'])
     301
    232302    def test_rawPausing(self):
    233303        """
    234304        Test pause inside raw date receiving.
     
    275345                          ['produce', 'hello world', 'unproduce', 'goodbye'])
    276346
    277347
     348    def test_longLineWithDelimiter(self):
     349        """
     350        When MAX_LENGTH is exceeded *and* a delimiter has been received,
     351        lineLengthExceeded is called with the right bytes.
     352
     353        See http://twistedmatrix.com/trac/ticket/3277
     354        """
     355        # Set up a line receiver with a short MAX_LENGTH that logs
     356        # lineLengthExceeded events.
     357        class LineReceiverThatRecords(basic.LineReceiver):
     358            MAX_LENGTH = 10
     359            def connectionMade(self):
     360                self.calls = []
     361            def lineReceived(self, line):
     362                self.calls.append(('lineReceived', line))
     363            def lineLengthExceeded(self, line):
     364                self.calls.append(('lineLengthExceeded', line))
     365        lineReceiver = LineReceiverThatRecords()
     366        t = proto_helpers.StringIOWithoutClosing()
     367        lineReceiver.makeConnection(protocol.FileWrapper(t))
     368        # Call dataReceived with two lines, the first longer than MAX_LENGTH.
     369        longLine = ('x' * 11) + '\r\n'
     370        nextLine = 'next line\r\n'
     371        lineReceiver.dataReceived(longLine + nextLine)
     372        # We expect lineLengthExceeded to be called with exactly what we just
     373        # passed dataReceived.  lineReceived is not called.
     374        expectedCalls = [('lineLengthExceeded', longLine + nextLine)]
     375        self.assertEqual(expectedCalls, lineReceiver.calls)
     376
     377
    278378    def test_clearLineBuffer(self):
    279379        """
    280380        L{LineReceiver.clearLineBuffer} removes all buffered data and returns
     
    297397        self.assertEqual(protocol.rest, '')
    298398
    299399
     400    def test_clearLineBuffer2(self):
     401        """
     402        L{LineReceiver.clearLineBuffer} removes all buffered data and returns
     403        it as a C{str} and can be called from beneath C{dataReceived}.
     404
     405        (without a non-re-entrant clearLineBuffer call)
     406        """
     407        class ClearingReceiver(basic.LineReceiver):
     408            lines = []
     409            def lineReceived(self, line):
     410                #print 'lines was', self.lines
     411                self.lines.append(line)
     412
     413        protocol = ClearingReceiver()
     414        protocol.dataReceived('foo\r\nbar\r\nbaz')
     415        self.assertEqual(protocol.lines, ['foo', 'bar'])
     416        rest = protocol.clearLineBuffer()
     417        self.assertEqual(rest, 'baz')
     418
     419        # Deliver another line to make sure the previously buffered data is
     420        # really gone.
     421        protocol.dataReceived('quux\r\n')
     422        self.assertEqual(protocol.lines, ['foo', 'bar', 'quux'])
     423        rest = protocol.clearLineBuffer()
     424        self.assertEqual(rest, '')
     425
     426
     427    def testStackRecursion(self):
     428        """
     429        Switch modes many times on the same data, and make sure the stack
     430        does not overflow.
     431        """
     432        import sys
     433
     434        a = FlippingLineTester()
     435        t = proto_helpers.StringIOWithoutClosing()
     436        a.makeConnection(protocol.FileWrapper(t))
     437        limit = sys.getrecursionlimit()
     438        a.dataReceived('x\nx' * limit)
     439        self.assertEqual(a.lines, ['x'] * limit)
     440        self.assertEqual(a.raws, limit)
     441
     442
     443    def test_changeDelimiter(self):
     444        """
     445        Changing the delimiter is okay in some cases.
     446        """
     447        protocol = ChangeDelimReceiver()
     448        protocol.dataReceived('foo\r\nbar')
     449        protocol.dataReceived('XbazXrest')
     450        self.assertEqual(protocol.lines, ['foo', '', 'baz'])
     451        self.assertEqual(protocol.datas, ['bar'])
     452
     453
    300454
    301455class LineOnlyReceiverTestCase(unittest.TestCase):
    302456    """