[Twisted-Python] deferDirWalk code snipped

Michal Pasternak michal at pasternak.w.lub.pl
Wed Mar 3 15:16:14 EST 2004


Hi,

attached is a simple defer analogue to os.path.walk. It can be paused, it
can be restarted. I've wrote this code making some simple gtk backup
application. Perhaps this could be placed in examples somewhere (or even
included in the library). If someone would like to comment on my code, feel
free to do it, I always like constructive criticism ;)

Keep hacking,
-- 
Michal Pasternak :: http://pasternak.w.lub.pl :: http://winsrc.sf.net
"There's so much comedy on television. Does that cause comedy in the streets?" 
	-- Dick Cavett
-------------- next part --------------
#
# deferDirWalk.py
# (C) 2004 Michal Pasternak
# This code is public domain
#

import dircache
from twisted.internet import defer

class deferDirWalk:
    def __init__(self, baseDir, statusProc = None, skipList = [], getMTime = False):
        """
        statusProc is sent 3 arguments: total files processed, total size of files processed and the last directory name,
        skipList is a list of directories, that should be skipped,
        getMTime is a boolean value, which tells the walker to get mtimes of files also
        """
        self.baseDir = baseDir
        self.skipList = skipList
        self.statusProc = statusProc
        self.getMTime = getMTime
        self.restart()

    def restart(self):
        self.dirsToCheck = [os.path.realpath(self.baseDir)]
        self.totalSize = 0
        self.doPause = False
        self.working = False
        self.fileDict = {}  

    def run(self):
        """
        returns a defer, which will be called after processing of directories is finished.
        """
        self.d = defer.Deferred()
        self.working = True
        reactor.callLater(0.1, self.nextStep)
        return self.d

    def nextStep(self):
        if len(self.dirsToCheck):
            if not self.doPause:
                for entry in dircache.listdir(self.dirsToCheck[0]):
                    p = os.path.join(self.dirsToCheck[0], entry)
                    if os.path.islink(p):
                        continue
                    elif os.path.isdir(p):
                        try:
                            self.skipList.index(entry)
                        except ValueError:
                            self.dirsToCheck.append(p)
                    elif os.path.isfile(p):
                        try:
                            s = os.path.getsize(p)
                            if self.getMTime:
                                self.fileDict[p]=(s, os.path.getmtime(p))
                            else:
                                self.fileDict[p]=(s, None)                                
                            self.totalSize+=s
                        except OSError:
                            log("Cannot stat %s" % p)
                if self.statusProc:
                    self.statusProc(len(self.fileDict.items()),
                                    self.totalSize,
                                    self.dirsToCheck[0])
                self.dirsToCheck = self.dirsToCheck[1:]
            reactor.callLater(0.01, self.nextStep)
        else:
            self.working = False
            self.d.callback((self.fileDict, self.totalSize))

    def pause(self, doPause):
        """
        if doPause is True, work of walker is paused;
        it is unpaused if it is not
        """
        self.doPause = doPause


if __name__ == "__main__":
    from twisted.internet import reactor
    import os, sys

    def archiveWalkStatus(fileCount, fileSize, lastDir):
        sys.stdout.write("%s (analyzed %i files (total size: %.2f MB))...\r" % (lastDir, fileCount, float(fileSize / (1024.0*1024.0))))
        sys.stdout.flush()

    def archiveWalkDone(retVal):
        (fileDict, fileSize) = retVal
        sys.stdout.write("\nJob done!\n")
        sys.stdout.write("Totals:\n\tfiles: %i\n\tsize: %i bytes\n\n" % (len(fileDict.keys()), fileSize))
        sys.stdout.flush()
        reactor.stop()

    if len(sys.argv)<2:
        sys.stdout.write("usage: deferDirWalk.py dirname\n")
        sys.exit(1)
        
    archiveWalker = deferDirWalk(os.path.realpath(sys.argv[1]), archiveWalkStatus)
    archiveWalker.run().addCallback(archiveWalkDone)
    reactor.run()


More information about the Twisted-Python mailing list