Changeset 22464
- Timestamp:
- 02/07/2008 03:23:49 AM (3 years ago)
- Location:
- trunk/twisted
- Files:
-
- 1 removed
- 1 modified
- 1 copied
-
python/test/test_zipstream.py (copied) (copied from branches/filezipfile-2996/twisted/python/test/test_zipstream.py)
-
python/zipstream.py (modified) (7 diffs)
-
test/test_zipstream.py (deleted)
Legend:
- Unmodified
- Added
- Removed
-
trunk/twisted/python/zipstream.py
r17448 r22464 1 """An extremely asynch approach to unzipping files. This allows you 2 to unzip a little bit of a file at a time, which means it can 3 integrate nicely with a reactor.1 # -*- test-case-name: twisted.python.test.test_zipstream -*- 2 # Copyright (c) 2001-2008 Twisted Matrix Laboratories. 3 # See LICENSE for details. 4 4 5 5 """ 6 7 from __future__ import generators 8 6 An incremental approach to unzipping files. This allows you to unzip a little 7 bit of a file at a time, which means you can report progress as a file unzips. 8 """ 9 10 import warnings 9 11 import zipfile 10 12 import os.path 11 import binascii12 13 import zlib 13 14 import struct 14 15 16 _fileHeaderSize = struct.calcsize(zipfile.structFileHeader) 17 15 18 class ChunkingZipFile(zipfile.ZipFile): 16 """A ZipFile object which, with readfile(), also gives you access 17 to a filelike object for each entry. 18 """ 19 """ 20 A ZipFile object which, with readfile(), also gives you access to a 21 filelike object for each entry. 22 """ 23 19 24 def readfile(self, name): 20 """Return file-like object for name.""" 25 """ 26 Return file-like object for name. 27 """ 21 28 if self.mode not in ("r", "a"): 22 raise RuntimeError , 'read() requires mode "r" or "a"'29 raise RuntimeError('read() requires mode "r" or "a"') 23 30 if not self.fp: 24 raise RuntimeError , \25 "Attempt to read ZIP archive that was already closed"31 raise RuntimeError( 32 "Attempt to read ZIP archive that was already closed") 26 33 zinfo = self.getinfo(name) 27 34 28 35 self.fp.seek(zinfo.header_offset, 0) 29 36 30 # Skip the file header: 31 fheader = self.fp.read(30) 37 fheader = self.fp.read(_fileHeaderSize) 32 38 if fheader[0:4] != zipfile.stringFileHeader: 33 raise zipfile.BadZipfile , "Bad magic number for file header"39 raise zipfile.BadZipfile("Bad magic number for file header") 34 40 35 41 fheader = struct.unpack(zipfile.structFileHeader, fheader) 36 42 fname = self.fp.read(fheader[zipfile._FH_FILENAME_LENGTH]) 43 37 44 if fheader[zipfile._FH_EXTRA_FIELD_LENGTH]: 38 45 self.fp.read(fheader[zipfile._FH_EXTRA_FIELD_LENGTH]) 39 46 40 47 if fname != zinfo.orig_filename: 41 raise zipfile.BadZipfile , \42 'File name in directory "%s" and header "%s" differ.' % (43 zinfo.orig_filename, fname)48 raise zipfile.BadZipfile( 49 'File name in directory "%s" and header "%s" differ.' % ( 50 zinfo.orig_filename, fname)) 44 51 45 52 if zinfo.compress_type == zipfile.ZIP_STORED: 46 return ZipFileEntry(self .fp, zinfo.compress_size)53 return ZipFileEntry(self, zinfo.compress_size) 47 54 elif zinfo.compress_type == zipfile.ZIP_DEFLATED: 48 if not zlib: 49 raise RuntimeError, \ 50 "De-compression requires the (missing) zlib module" 51 return DeflatedZipFileEntry(self.fp, zinfo.compress_size) 55 return DeflatedZipFileEntry(self, zinfo.compress_size) 52 56 else: 53 raise zipfile.BadZipfile, \ 54 "Unsupported compression method %d for file %s" % \ 55 (zinfo.compress_type, name) 56 57 def read(self, name): 58 """Return file bytes (as a string) for name.""" 59 f = self.readfile(name) 60 zinfo = self.getinfo(name) 61 bytes = f.read() 62 crc = binascii.crc32(bytes) 63 if crc != zinfo.CRC: 64 raise zipfile.BadZipfile, "Bad CRC-32 for file %s" % name 65 return bytes 66 67 68 class ZipFileEntry: 69 """File-like object used to read an uncompressed entry in a ZipFile""" 70 71 def __init__(self, fp, length): 72 self.fp = fp 73 self.readBytes = 0 57 raise zipfile.BadZipfile( 58 "Unsupported compression method %d for file %s" % 59 (zinfo.compress_type, name)) 60 61 62 63 class _FileEntry(object): 64 """ 65 Abstract superclass of both compressed and uncompressed variants of 66 file-like objects within a zip archive. 67 68 @ivar chunkingZipFile: a chunking zip file. 69 @type chunkingZipFile: L{ChunkingZipFile} 70 71 @ivar length: The number of bytes within the zip file that represent this 72 file. (This is the size on disk, not the number of decompressed bytes 73 which will result from reading it.) 74 75 @ivar fp: the underlying file object (that contains pkzip data). Do not 76 touch this, please. It will quite likely move or go away. 77 78 @ivar closed: File-like 'closed' attribute; True before this file has been 79 closed, False after. 80 @type closed: L{bool} 81 82 @ivar finished: An older, broken synonym for 'closed'. Do not touch this, 83 please. 84 @type finished: L{int} 85 """ 86 def __init__(self, chunkingZipFile, length): 87 """ 88 Create a L{_FileEntry} from a L{ChunkingZipFile}. 89 """ 90 self.chunkingZipFile = chunkingZipFile 91 self.fp = self.chunkingZipFile.fp 74 92 self.length = length 75 93 self.finished = 0 76 94 self.closed = False 95 96 97 def isatty(self): 98 """ 99 Returns false because zip files should not be ttys 100 """ 101 return False 102 103 104 def close(self): 105 """ 106 Close self (file-like object) 107 """ 108 self.closed = True 109 self.finished = 1 110 del self.fp 111 112 113 def readline(self): 114 """ 115 Read a line. 116 """ 117 bytes = "" 118 for byte in iter(lambda : self.read(1), ""): 119 bytes += byte 120 if byte == "\n": 121 break 122 return bytes 123 124 125 def next(self): 126 """ 127 Implement next as file does (like readline, except raises StopIteration 128 at EOF) 129 """ 130 nextline = self.readline() 131 if nextline: 132 return nextline 133 raise StopIteration() 134 135 136 def readlines(self): 137 """ 138 Returns a list of all the lines 139 """ 140 return list(self) 141 142 143 def xreadlines(self): 144 """ 145 Returns an iterator (so self) 146 """ 147 return self 148 149 150 def __iter__(self): 151 """ 152 Returns an iterator (so self) 153 """ 154 return self 155 156 157 158 class ZipFileEntry(_FileEntry): 159 """ 160 File-like object used to read an uncompressed entry in a ZipFile 161 """ 162 163 def __init__(self, chunkingZipFile, length): 164 _FileEntry.__init__(self, chunkingZipFile, length) 165 self.readBytes = 0 166 167 77 168 def tell(self): 78 169 return self.readBytes 79 170 171 80 172 def read(self, n=None): 81 173 if n is None: … … 83 175 if n == 0 or self.finished: 84 176 return '' 85 86 data = self.fp.read(min(n, self.length - self.readBytes))177 data = self.chunkingZipFile.fp.read( 178 min(n, self.length - self.readBytes)) 87 179 self.readBytes += len(data) 88 180 if self.readBytes == self.length or len(data) < n: … … 90 182 return data 91 183 92 def close(self): 93 self.finished = 1 94 del self.fp 95 96 97 class DeflatedZipFileEntry: 98 """File-like object used to read a deflated entry in a ZipFile""" 99 100 def __init__(self, fp, length): 101 self.fp = fp 184 185 186 class DeflatedZipFileEntry(_FileEntry): 187 """ 188 File-like object used to read a deflated entry in a ZipFile 189 """ 190 191 def __init__(self, chunkingZipFile, length): 192 _FileEntry.__init__(self, chunkingZipFile, length) 102 193 self.returnedBytes = 0 103 194 self.readBytes = 0 104 195 self.decomp = zlib.decompressobj(-15) 105 196 self.buffer = "" 106 self.length = length 107 self.finished = 0 108 197 198 109 199 def tell(self): 110 200 return self.returnedBytes 111 201 202 112 203 def read(self, n=None): 113 204 if self.finished: … … 115 206 if n is None: 116 207 result = [self.buffer,] 117 result.append(self.decomp.decompress(self.fp.read(self.length - self.readBytes))) 208 result.append( 209 self.decomp.decompress( 210 self.chunkingZipFile.fp.read( 211 self.length - self.readBytes))) 118 212 result.append(self.decomp.decompress("Z")) 119 213 result.append(self.decomp.flush()) … … 125 219 else: 126 220 while len(self.buffer) < n: 127 data = self.fp.read(min(n, 1024, self.length - self.readBytes)) 221 data = self.chunkingZipFile.fp.read( 222 min(n, 1024, self.length - self.readBytes)) 128 223 self.readBytes += len(data) 129 224 if not data: 130 result = self.buffer + self.decomp.decompress("Z") + self.decomp.flush() 225 result = (self.buffer 226 + self.decomp.decompress("Z") 227 + self.decomp.flush()) 131 228 self.finished = 1 132 229 self.buffer = "" … … 139 236 self.returnedBytes += len(result) 140 237 return result 141 142 def close(self): 143 self.finished = 1 144 del self.fp 238 145 239 146 240 147 241 def unzip(filename, directory=".", overwrite=0): 148 """Unzip the file 242 """ 243 Unzip the file 244 149 245 @param filename: the name of the zip file 150 246 @param directory: the directory into which the files will be … … 157 253 pass 158 254 159 DIR_BIT=16 255 DIR_BIT = 16 256 160 257 def unzipIter(filename, directory='.', overwrite=0): 161 """Return a generator for the zipfile. This implementation will 162 yield after every file. 258 """ 259 Return a generator for the zipfile. This implementation will yield 260 after every file. 163 261 164 262 The value it yields is the number of files left to unzip. 165 263 """ 166 zf=zipfile.ZipFile(filename, 'r') 167 names=zf.namelist() 168 if not os.path.exists(directory): os.makedirs(directory) 169 remaining=countZipFileEntries(filename) 264 zf = zipfile.ZipFile(filename, 'r') 265 names = zf.namelist() 266 if not os.path.exists(directory): 267 os.makedirs(directory) 268 remaining = len(zf.namelist()) 170 269 for entry in names: 171 remaining =remaining -1172 isdir =zf.getinfo(entry).external_attr & DIR_BIT173 f =os.path.join(directory, entry)270 remaining -= 1 271 isdir = zf.getinfo(entry).external_attr & DIR_BIT 272 f = os.path.join(directory, entry) 174 273 if isdir: 175 274 # overwrite flag only applies to files 176 if not os.path.exists(f): os.makedirs(f) 275 if not os.path.exists(f): 276 os.makedirs(f) 177 277 else: 178 278 # create the directory the file will be in first, 179 279 # since we can't guarantee it exists 180 fdir =os.path.split(f)[0]280 fdir = os.path.split(f)[0] 181 281 if not os.path.exists(fdir): 182 282 os.makedirs(f) 183 283 if overwrite or not os.path.exists(f): 184 outfile =file(f, 'wb')284 outfile = file(f, 'wb') 185 285 outfile.write(zf.read(entry)) 186 286 outfile.close() 187 287 yield remaining 188 288 289 189 290 def countZipFileChunks(filename, chunksize): 190 """Predict the number of chunks that will be extracted from the 191 entire zipfile, given chunksize blocks. 192 """ 193 totalchunks=0 194 zf=ChunkingZipFile(filename) 291 """ 292 Predict the number of chunks that will be extracted from the entire 293 zipfile, given chunksize blocks. 294 """ 295 totalchunks = 0 296 zf = ChunkingZipFile(filename) 195 297 for info in zf.infolist(): 196 totalchunks =totalchunks+countFileChunks(info, chunksize)298 totalchunks += countFileChunks(info, chunksize) 197 299 return totalchunks 198 300 301 199 302 def countFileChunks(zipinfo, chunksize): 200 size=zipinfo.file_size 201 count=size/chunksize 202 if size%chunksize > 0: 203 count=count+1 204 # each file counts as at least one chunk 303 """ 304 Count the number of chunks that will result from the given L{ZipInfo}. 305 306 @param zipinfo: a L{zipfile.ZipInfo} instance describing an entry in a zip 307 archive to be counted. 308 309 @return: the number of chunks present in the zip file. (Even an empty file 310 counts as one chunk.) 311 @rtype: L{int} 312 """ 313 count, extra = divmod(zipinfo.file_size, chunksize) 314 if extra > 0: 315 count += 1 205 316 return count or 1 206 317 318 207 319 def countZipFileEntries(filename): 208 zf=zipfile.ZipFile(filename) 320 """ 321 Count the number of entries in a zip archive. (Don't use this function.) 322 323 @param filename: The filename of a zip archive. 324 @type filename: L{str} 325 """ 326 warnings.warn("countZipFileEntries is deprecated.", 327 DeprecationWarning, 2) 328 zf = zipfile.ZipFile(filename) 209 329 return len(zf.namelist()) 330 210 331 211 332 def unzipIterChunky(filename, directory='.', overwrite=0, 212 333 chunksize=4096): 213 """Return a generator for the zipfile. This implementation will 214 yield after every chunksize uncompressed bytes, or at the end of a 215 file, whichever comes first. 334 """ 335 Return a generator for the zipfile. This implementation will yield after 336 every chunksize uncompressed bytes, or at the end of a file, whichever 337 comes first. 216 338 217 339 The value it yields is the number of chunks left to unzip. 218 340 """ 219 czf=ChunkingZipFile(filename, 'r') 220 if not os.path.exists(directory): os.makedirs(directory) 221 remaining=countZipFileChunks(filename, chunksize) 222 names=czf.namelist() 223 infos=czf.infolist() 224 341 czf = ChunkingZipFile(filename, 'r') 342 if not os.path.exists(directory): 343 os.makedirs(directory) 344 remaining = countZipFileChunks(filename, chunksize) 345 names = czf.namelist() 346 infos = czf.infolist() 347 225 348 for entry, info in zip(names, infos): 226 isdir =info.external_attr & DIR_BIT227 f =os.path.join(directory, entry)349 isdir = info.external_attr & DIR_BIT 350 f = os.path.join(directory, entry) 228 351 if isdir: 229 352 # overwrite flag only applies to files 230 if not os.path.exists(f): os.makedirs(f)231 remaining=remaining-1232 assert remaining>=0353 if not os.path.exists(f): 354 os.makedirs(f) 355 remaining -= 1 233 356 yield remaining 234 357 else: 235 358 # create the directory the file will be in first, 236 359 # since we can't guarantee it exists 237 fdir =os.path.split(f)[0]360 fdir = os.path.split(f)[0] 238 361 if not os.path.exists(fdir): 239 362 os.makedirs(f) 240 363 if overwrite or not os.path.exists(f): 241 outfile=file(f, 'wb') 242 fp=czf.readfile(entry) 243 if info.file_size==0: 244 remaining=remaining-1 245 assert remaining>=0 364 outfile = file(f, 'wb') 365 fp = czf.readfile(entry) 366 if info.file_size == 0: 367 remaining -= 1 246 368 yield remaining 247 fread=fp.read 248 ftell=fp.tell 249 owrite=outfile.write 250 size=info.file_size 251 while ftell() < size: 252 hunk=fread(chunksize) 253 owrite(hunk) 254 remaining=remaining-1 255 assert remaining>=0 369 while fp.tell() < info.file_size: 370 hunk = fp.read(chunksize) 371 outfile.write(hunk) 372 remaining -= 1 256 373 yield remaining 257 374 outfile.close() 258 375 else: 259 remaining=remaining-countFileChunks(info, chunksize) 260 assert remaining>=0 376 remaining -= countFileChunks(info, chunksize) 261 377 yield remaining
