[Twisted-web] cache freeing with timeout and max size limit per page

Andrea Arcangeli andrea at cpushare.com
Thu Sep 8 00:44:23 MDT 2005


On Sat, Sep 03, 2005 at 03:37:40AM +0200, Andrea Arcangeli wrote:
> And here again my latest version of the usual caching patches based on
> dialtone's stuff, that prevents high traffic pages to hurt.

I reworked the whole caching patch, because I've too many variations of
the same page (with different args) and I can't keep all of them in
memory at the same time, so now I'm freeing the cache once the timeout
triggers (it's invalid cache anyway, it doesn't worth to keep it in ram)
and I let the traffic to choose and cache the high traffic ones. I also
added a per-page cache limit to be sure not to swap/oom-kill etc...
Even when the cache size limit triggers, the cache code has a positive
effect of preventing to run the same query many times for all waiting
connections.

I believe these features could be useful for more than just my little
webapp.

All you have to do to enable caching is to add:

	cache = True
	lifetime = 0 (infinite/notimeout) or >0 (timeout) default is 0
	max_cache_size = bytes (0 means to send to all waiting clients
				but no cache), None is the default
				(None means cache size unlimited)

to the declaration of your rand.Page inherited istances.
	
With this applied I can as usual serve >200req/sec saturating the
bandwidth of the server, without it I can serve only 2/3 req/sec.

Despite being very new (not well tested) code, I tried to put it online,
let's see what happens ;).

Index: Nevow/nevow/util.py
===================================================================
--- Nevow/nevow/util.py	(revision 1765)
+++ Nevow/nevow/util.py	(working copy)
@@ -133,6 +133,7 @@
     from twisted.python.failure import Failure
     from twisted.trial.unittest import deferredError
     from twisted.python import log
+    from twisted.internet import reactor
 
     try:
         # work with twisted before retrial
Index: Nevow/nevow/rend.py
===================================================================
--- Nevow/nevow/rend.py	(revision 1765)
+++ Nevow/nevow/rend.py	(working copy)
@@ -481,6 +481,56 @@
         self.children[name] = child
     
 
+class PageCache(object):
+    def __init__(self):
+        self.__db = {}
+    def cacheIDX(self, ctx):
+        return str(url.URL.fromContext(ctx))
+    def __storeCache(self, cacheIDX, c):
+        self.__db[cacheIDX] = c
+    def __deleteCache(self, cacheIDX):
+        del self.__db[cacheIDX]
+    def __deleteCacheData(self, cacheIDX, page):
+        size = self.__db[cacheIDX][1]
+        assert len(self.__db[cacheIDX][0]) == size
+        page.subCacheSize(size)
+        self.__deleteCache(cacheIDX)
+    def __lookupCache(self, cacheIDX):
+        return self.__db.get(cacheIDX)
+    def getCache(self, ctx):
+        cacheIDX = self.cacheIDX(ctx)
+        c = self.__lookupCache(cacheIDX)
+
+        if c is None:
+            self.__storeCache(cacheIDX, [util.Deferred()])
+            return
+
+        if isinstance(c[0], util.Deferred):
+            d = util.Deferred()
+            c.append(d)
+            return d
+
+        return c[0]
+    def cacheRendered(self, ctx, data, page):
+        cacheIDX = self.cacheIDX(ctx)
+        defer_list = self.__lookupCache(cacheIDX)
+        assert(isinstance(defer_list[0], util.Deferred))
+        size = len(data)
+        if page.canCache(size):
+            # overwrite the deferred with the data
+            timer = None
+            if page.lifetime > 0:
+                timer = util.reactor.callLater(page.lifetime,
+                                               self.__deleteCacheData, cacheIDX, page)
+            page.addCacheSize(size)
+            self.__storeCache(cacheIDX, (data, size, timer, ))
+        else:
+            self.__deleteCache(cacheIDX)
+        for d in defer_list:
+            d.callback(data)
+
+_CACHE = PageCache()
+
 class Page(Fragment, ConfigurableFactory, ChildLookupMixin):
     """A page is the main Nevow resource and renders a document loaded
     via the document factory (docFactory).
@@ -494,8 +544,27 @@
     afterRender = None
     addSlash = None
 
+    cache = False
+    lifetime = 0
+    max_cache_size = None
+    __cache_size = 0
+
     flattenFactory = lambda self, *args: flat.flattenFactory(*args)
 
+    def hasCache(self, ctx):
+        if not self.cache:
+            return
+        return _CACHE.getCache(ctx)
+    def addCacheSize(self, size):
+        assert self.canCache(size)
+        self.__cache_size += size
+    def subCacheSize(self, size):
+        self.__cache_size -= size
+        assert self.__cache_size >= 0
+    def canCache(self, size):
+        return self.max_cache_size is None or \
+               self.__cache_size + size <= self.max_cache_size
+
     def renderHTTP(self, ctx):
         if self.beforeRender is not None:
             return util.maybeDeferred(self.beforeRender,ctx).addCallback(
@@ -520,11 +589,20 @@
             if self.afterRender is not None:
                 return util.maybeDeferred(self.afterRender,ctx)
 
-        if self.buffered:
+        c = self.hasCache(ctx)
+        if c is not None:
+            assert self.afterRender is None
+            finishRequest()
+            return c
+
+        if self.buffered or self.cache:
             io = StringIO()
             writer = io.write
             def finisher(result):
-                request.write(io.getvalue())
+                c = io.getvalue()
+                if self.cache:
+                    _CACHE.cacheRendered(ctx, c, self)
+                request.write(c)
                 return util.maybeDeferred(finishRequest).addCallback(lambda r: result)
         else:
             writer = request.write



More information about the Twisted-web mailing list