Ticket #3803: better-LineReceiver-1.diff

File better-LineReceiver-1.diff, 14.1 KB (added by ivank, 12 years ago)

"attached implementation"

  • twisted/protocols/basic.py

    === modified file 'twisted/protocols/basic.py'
     
    1313import re
    1414import struct
    1515
     16try:
     17    from collections import deque
     18    deque # shut up pyflakes
     19except ImportError:
     20    class deque(list):
     21        def popleft(self):
     22            return self.pop(0)
     23
     24# cStringIO and io.BytesIO as of CPython 2.6.1 are much slower than StringIO
     25# for many appends.  StringIO in CPython 2.3.7 is even faster for many
     26# appends than in CPython 2.6.1, so it's probably safe to use StringIO
     27# everywhere.
     28from StringIO import StringIO
    1629from zope.interface import implements
    1730
    1831# Twisted imports
     
    184197
    185198
    186199class LineReceiver(protocol.Protocol, _PauseableMixin):
    187     """A protocol that receives lines and/or raw data, depending on mode.
     200    r"""
     201    A protocol that receives lines and/or raw data, depending on mode.
    188202
    189203    In line mode, each line that's received becomes a callback to
    190204    L{lineReceived}.  In raw data mode, each chunk of raw data becomes a
     
    193207
    194208    This is useful for line-oriented protocols such as IRC, HTTP, POP, etc.
    195209
     210    LineReceiver
     211        does not rely on string concatenation optimizations available in
     212        CPython 2.5+ and other implementations.
     213
     214        stays fast when `data' contains many lines delivered at once, unless
     215        there is excess toggling between line mode and raw mode, with a large
     216        `extra' being passed to setLineMode each time.
     217            (note: many lines at once may be slow if Python is missing
     218            collections.dequeue, available since CPython 2.4)
     219
     220        searches for the delimiter only in recently-received data, preventing
     221        unnecessary searching of the delimiter in a long buffer.
     222
     223    XXX TODO: Why is this here?
     224    Fixed:
     225        #3277 - LineReceiver may drop a delimiter (newline) when calling ...
     226        #3050 - t.p.basic.LineReceiver StackOverflow
     227    Probably fixed: (TODO: add tests to make sure)
     228        #3353 - lineLengthExceeded behaviour varies between LineReceiver and
     229            LineOnlyReceiver
     230    Not fixed (old behavior kept):
     231        #2215 - If lineReceived returns true value, the connection is shut down
     232            and this value used as an error message
     233        #3542 - twisted.protocols.basic.LineReceiver's lineLengthExceeded
     234            should not cause transport to shut down without reporting an error ...
     235
    196236    @cvar delimiter: The line-ending delimiter to use. By default this is
    197                      '\\r\\n'.
    198     @cvar MAX_LENGTH: The maximum length of a line to allow (If a
    199                       sent line is longer than this, the connection is dropped).
    200                       Default is 16384.
     237        '\r\n'.
     238    @cvar MAX_LENGTH: The maximum length of a line to allow, excluding
     239        the delimiter. If a received line is longer than this,
     240        L{lineLengthExceeded} is called, which by default drops the connection.
     241        Default is 16384.
    201242    """
    202243    line_mode = 1
    203     __buffer = ''
     244    _lineBuffer = None
     245    _buffer = None
    204246    delimiter = '\r\n'
    205247    MAX_LENGTH = 16384
    206248
     249    # When clearing _buffer, the implementation should always create new
     250    # StringIO objects instead of truncate(0), because truncate(0) does not
     251    # free any memory with cStringIO.StringIO, and doesn't free much memory in
     252    # StringIO.StringIO (tested CPython 2.6.1).
     253
    207254    def clearLineBuffer(self):
    208255        """
    209256        Clear buffered data.
     
    211258        @return: All of the cleared buffered data.
    212259        @rtype: C{str}
    213260        """
    214         b = self.__buffer
    215         self.__buffer = ""
     261
     262        if self._buffer is None:
     263            self._buffer = StringIO()
     264        if self._lineBuffer is None:
     265            self._lineBuffer = deque()
     266
     267        # This temporarily appends _buffer into _lineBuffer to avoid creating
     268        # an extra temporary list or string.
     269        self._buffer.seek(0, 0)
     270        self._lineBuffer.append(self._buffer.read())
     271        b = self.delimiter.join(self._lineBuffer)
     272        self._lineBuffer = deque()
     273        self._buffer = StringIO()
     274       
    216275        return b
    217276
     277    def _addToBuffer(self, data):
     278        """
     279        Append L{data} to the internal buffer.
     280        """
     281
     282        # When in line mode, this will convert data in L{_buffer} into lines in
     283        # L{_lineBuffer}.
     284
     285        # _addToBuffer is called internally even when paused, so that the
     286        # delimiter search optimization doesn't break.
     287
     288        if self._buffer is None:
     289            self._buffer = StringIO()
     290        if self._lineBuffer is None:
     291            self._lineBuffer = deque()
     292
     293        self._buffer.write(data)
     294
     295        if self.line_mode:
     296            # The idea is to look for the delimiter in a subset of the buffer.
     297            # This prevents slowdown if the line length is long and the bytes
     298            # are being received slowly.
     299            self._buffer.seek(-(len(data)+len(self.delimiter)), 2)
     300
     301            # This does two things: get up to len(self.delimiter) bytes,
     302            # and always seek to the very end.
     303            searchArea = self._buffer.read()
     304
     305            if self.delimiter in searchArea:
     306                self._buffer.seek(0, 0)
     307                splitted = self._buffer.read().split(self.delimiter)
     308                self._buffer = StringIO()
     309                self._buffer.write(splitted.pop())
     310                self._lineBuffer.extend(splitted)
     311
    218312    def dataReceived(self, data):
    219         """Protocol.dataReceived.
     313        """
    220314        Translates bytes into lines, and calls lineReceived (or
    221315        rawDataReceived, depending on mode.)
    222316        """
    223         self.__buffer = self.__buffer+data
    224         while self.line_mode and not self.paused:
    225             try:
    226                 line, self.__buffer = self.__buffer.split(self.delimiter, 1)
    227             except ValueError:
    228                 if len(self.__buffer) > self.MAX_LENGTH:
    229                     line, self.__buffer = self.__buffer, ''
    230                     return self.lineLengthExceeded(line)
    231                 break
    232             else:
    233                 linelength = len(line)
    234                 if linelength > self.MAX_LENGTH:
    235                     exceeded = line + self.__buffer
    236                     self.__buffer = ''
     317
     318        self._addToBuffer(data)
     319
     320        while not self.paused:
     321            if self.line_mode:
     322                if not self._lineBuffer: # no more lines
     323                    # Only *after* there are no more lines is it appropriate to
     324                    # return with lineLengthExceeded due to the _buffer's
     325                    # unsplittable excess size.
     326
     327                    # The old LineReceiver would reject a line smaller than the
     328                    # MAX_LENGTH if only part of the delimeter had arrived.
     329                    # This bug is fixed. `minus one' because if the end of
     330                    # delimiter came, it would have been split and handled already.
     331                    if self._buffer.tell() > self.MAX_LENGTH + len(self.delimiter) - 1:
     332                        return self.lineLengthExceeded(self.clearLineBuffer())
     333                    break
     334
     335                line = self._lineBuffer.popleft()
     336                if len(line) > self.MAX_LENGTH:
     337                    exceeded = line + self.delimiter + self.clearLineBuffer()
    237338                    return self.lineLengthExceeded(exceeded)
    238339                why = self.lineReceived(line)
    239340                if why or self.transport and self.transport.disconnecting:
     341                    # disconnect.
     342
     343                    # "The original reason for this behavior is a micro-optimization
     344                    # to avoid the necessity of raising exceptions in order to drop the
     345                    # connection. It's very old, and probably not terribly effective as an
     346                    # optimization. However, I certainly don't care enough about this to
     347                    # change it, especially given that it might break existing code that
     348                    # relied upon this bizarre convention. For what it's worth, it's a mirror
     349                    # of the same convention in dataReceived."
     350                    #   - glyph, http://twistedmatrix.com/trac/ticket/2215
    240351                    return why
    241         else:
    242             if not self.paused:
    243                 data=self.__buffer
    244                 self.__buffer=''
     352            else:
     353                data = self.clearLineBuffer()
    245354                if data:
    246                     return self.rawDataReceived(data)
     355                    why = self.rawDataReceived(data)
     356                    if why or self.transport and self.transport.disconnecting:
     357                        # disconnect. (see above comment)
     358                        return why
     359                else:
     360                    break         
    247361
    248362    def setLineMode(self, extra=''):
    249363        """Sets the line-mode of this receiver.
     
    257371        within a lineReceived callback.
    258372        """
    259373        self.line_mode = 1
    260         if extra:
    261             return self.dataReceived(extra)
     374        self._addToBuffer(extra)
    262375
    263376    def setRawMode(self):
    264377        """Sets the raw mode of this receiver.
  • twisted/test/test_protocols.py

    === modified file 'twisted/test/test_protocols.py'
     
    1313from twisted.test import proto_helpers
    1414
    1515
     16class FlippingLineTester(basic.LineReceiver):
     17    """
     18    A line receiver that flips between line and raw data modes after one byte.
     19    """
     20
     21    delimiter = '\n'
     22
     23    lines = None
     24    raws = 0
     25
     26    def lineReceived(self, line):
     27        """
     28        Set the mode to raw.
     29        """
     30        if self.lines is None:
     31            self.lines = []
     32        self.lines.append(line)           
     33        self.setRawMode()
     34
     35    def rawDataReceived(self, data):
     36        """
     37        Set the mode back to line.
     38        """
     39        self.raws += 1
     40        self.setLineMode(data[1:])
     41
     42
    1643class LineTester(basic.LineReceiver):
    1744    """
    1845    A line receiver that parses data received and make actions on some tokens.
     
    229256    rawpause_output2 = ['twiddle1', 'twiddle2', 'len 5', 'rawpause', '12345',
    230257                        'twiddle3']
    231258
     259    def test_pausing2(self):
     260        """
     261        Pausing doesn't interfere with the StringIO seek (delimiter search) optimizations.
     262        """
     263
     264        t = proto_helpers.StringTransport()
     265
     266        class Rec1(basic.LineReceiver):
     267            lines = []
     268            def lineReceived(self, line):
     269                self.lines.append(line)
     270        lr = Rec1()
     271        lr.makeConnection(t)
     272
     273        lr.dataReceived('hello1')
     274        lr.pauseProducing()
     275        lr.dataReceived('hello2\r\n')
     276        lr.dataReceived('hello3')
     277        lr.resumeProducing()
     278        self.assertEqual(lr.lines, ['hello1hello2'])
     279
    232280    def test_rawPausing(self):
    233281        """
    234282        Test pause inside raw date receiving.
     
    275323                          ['produce', 'hello world', 'unproduce', 'goodbye'])
    276324
    277325
     326    def test_longLineWithDelimiter(self):
     327        """
     328        When MAX_LENGTH is exceeded *and* a delimiter has been received,
     329        lineLengthExceeded is called with the right bytes.
     330
     331        See http://twistedmatrix.com/trac/ticket/3277
     332        """
     333        # Set up a line receiver with a short MAX_LENGTH that logs
     334        # lineLengthExceeded events.
     335        class LineReceiverThatRecords(basic.LineReceiver):
     336            MAX_LENGTH = 10
     337            def connectionMade(self):
     338                self.calls = []
     339            def lineReceived(self, line):
     340                self.calls.append(('lineReceived', line))
     341            def lineLengthExceeded(self, line):
     342                self.calls.append(('lineLengthExceeded', line))
     343        lineReceiver = LineReceiverThatRecords()
     344        t = proto_helpers.StringIOWithoutClosing()
     345        lineReceiver.makeConnection(protocol.FileWrapper(t))
     346        # Call dataReceived with two lines, the first longer than MAX_LENGTH.
     347        longLine = ('x' * 11) + '\r\n'
     348        nextLine = 'next line\r\n'
     349        lineReceiver.dataReceived(longLine + nextLine)
     350        # We expect lineLengthExceeded to be called with exactly what we just
     351        # passed dataReceived.  lineReceived is not called.
     352        expectedCalls = [('lineLengthExceeded', longLine + nextLine)]
     353        self.assertEqual(expectedCalls, lineReceiver.calls)
     354
     355
    278356    def test_clearLineBuffer(self):
    279357        """
    280358        L{LineReceiver.clearLineBuffer} removes all buffered data and returns
     
    297375        self.assertEqual(protocol.rest, '')
    298376
    299377
     378    def test_clearLineBuffer2(self):
     379        """
     380        L{LineReceiver.clearLineBuffer} removes all buffered data and returns
     381        it as a C{str} and can be called from beneath C{dataReceived}.
     382
     383        (without a non-re-entrant clearLineBuffer call)
     384        """
     385        class ClearingReceiver(basic.LineReceiver):
     386            lines = []
     387            def lineReceived(self, line):
     388                #print 'lines was', self.lines
     389                self.lines.append(line)
     390
     391        protocol = ClearingReceiver()
     392        protocol.dataReceived('foo\r\nbar\r\nbaz')
     393        self.assertEqual(protocol.lines, ['foo', 'bar'])
     394        rest = protocol.clearLineBuffer()
     395        self.assertEqual(rest, 'baz')
     396
     397        # Deliver another line to make sure the previously buffered data is
     398        # really gone.
     399        protocol.dataReceived('quux\r\n')
     400        self.assertEqual(protocol.lines, ['foo', 'bar', 'quux'])
     401        rest = protocol.clearLineBuffer()
     402        self.assertEqual(rest, '')
     403
     404
     405    def testStackRecursion(self):
     406        """
     407        Switch modes many times on the same data, and make sure the stack
     408        does not overflow.
     409        """
     410        import sys
     411
     412        a = FlippingLineTester()
     413        t = proto_helpers.StringIOWithoutClosing()
     414        a.makeConnection(protocol.FileWrapper(t))
     415        limit = sys.getrecursionlimit()
     416        a.dataReceived('x\nx' * limit)
     417        self.assertEqual(a.lines, ['x'] * limit)
     418        self.assertEqual(a.raws, limit)
     419
     420
    300421
    301422class LineOnlyReceiverTestCase(unittest.TestCase):
    302423    """