blob: e95e2b2d452df4e155eac31533dbae3123537f49 [file] [log] [blame]
xf.li86118912025-03-19 20:07:27 -07001"""
2Read and write ZIP files.
3
4XXX references to utf-8 need further investigation.
5"""
6import binascii
7import functools
8import importlib.util
9import io
10import itertools
11import os
12import posixpath
13import shutil
14import stat
15import struct
16import sys
17import threading
18import time
19import contextlib
20
21try:
22 import zlib # We may need its compression method
23 crc32 = zlib.crc32
24except ImportError:
25 zlib = None
26 crc32 = binascii.crc32
27
28try:
29 import bz2 # We may need its compression method
30except ImportError:
31 bz2 = None
32
33try:
34 import lzma # We may need its compression method
35except ImportError:
36 lzma = None
37
38__all__ = ["BadZipFile", "BadZipfile", "error",
39 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
40 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
41 "Path"]
42
43class BadZipFile(Exception):
44 pass
45
46
47class LargeZipFile(Exception):
48 """
49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50 and those extensions are disabled.
51 """
52
53error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
54
55
56ZIP64_LIMIT = (1 << 31) - 1
57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
58ZIP_MAX_COMMENT = (1 << 16) - 1
59
60# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
63ZIP_BZIP2 = 12
64ZIP_LZMA = 14
65# Other ZIP compression methods not supported
66
67DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
70LZMA_VERSION = 63
71# we recognize (but not necessarily support) all features up to that version
72MAX_EXTRACT_VERSION = 63
73
74# Below are some formats and associated data for reading/writing headers using
75# the struct module. The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
79
80# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
82structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
85
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
102stringCentralDir = b"PK\001\002"
103sizeCentralDir = struct.calcsize(structCentralDir)
104
105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
110_CD_EXTRACT_SYSTEM = 4
111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
126# The "local file header" structure, magic number, size, and indices
127# (section V.A in the format document)
128structFileHeader = "<4s2B4HL2L2H"
129stringFileHeader = b"PK\003\004"
130sizeFileHeader = struct.calcsize(structFileHeader)
131
132_FH_SIGNATURE = 0
133_FH_EXTRACT_VERSION = 1
134_FH_EXTRACT_SYSTEM = 2
135_FH_GENERAL_PURPOSE_FLAG_BITS = 3
136_FH_COMPRESSION_METHOD = 4
137_FH_LAST_MOD_TIME = 5
138_FH_LAST_MOD_DATE = 6
139_FH_CRC = 7
140_FH_COMPRESSED_SIZE = 8
141_FH_UNCOMPRESSED_SIZE = 9
142_FH_FILENAME_LENGTH = 10
143_FH_EXTRA_FIELD_LENGTH = 11
144
145# The "Zip64 end of central directory locator" structure, magic number, and size
146structEndArchive64Locator = "<4sLQL"
147stringEndArchive64Locator = b"PK\x06\x07"
148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
149
150# The "Zip64 end of central directory" record, magic number, size, and indices
151# (section V.G in the format document)
152structEndArchive64 = "<4sQ2H2L4Q"
153stringEndArchive64 = b"PK\x06\x06"
154sizeEndCentDir64 = struct.calcsize(structEndArchive64)
155
156_CD64_SIGNATURE = 0
157_CD64_DIRECTORY_RECSIZE = 1
158_CD64_CREATE_VERSION = 2
159_CD64_EXTRACT_VERSION = 3
160_CD64_DISK_NUMBER = 4
161_CD64_DISK_NUMBER_START = 5
162_CD64_NUMBER_ENTRIES_THIS_DISK = 6
163_CD64_NUMBER_ENTRIES_TOTAL = 7
164_CD64_DIRECTORY_SIZE = 8
165_CD64_OFFSET_START_CENTDIR = 9
166
167_DD_SIGNATURE = 0x08074b50
168
169_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
170
171def _strip_extra(extra, xids):
172 # Remove Extra Fields with specified IDs.
173 unpack = _EXTRA_FIELD_STRUCT.unpack
174 modified = False
175 buffer = []
176 start = i = 0
177 while i + 4 <= len(extra):
178 xid, xlen = unpack(extra[i : i + 4])
179 j = i + 4 + xlen
180 if xid in xids:
181 if i != start:
182 buffer.append(extra[start : i])
183 start = j
184 modified = True
185 i = j
186 if not modified:
187 return extra
188 return b''.join(buffer)
189
190def _check_zipfile(fp):
191 try:
192 if _EndRecData(fp):
193 return True # file has correct magic number
194 except OSError:
195 pass
196 return False
197
198def is_zipfile(filename):
199 """Quickly see if a file is a ZIP file by checking the magic number.
200
201 The filename argument may be a file or file-like object too.
202 """
203 result = False
204 try:
205 if hasattr(filename, "read"):
206 result = _check_zipfile(fp=filename)
207 else:
208 with open(filename, "rb") as fp:
209 result = _check_zipfile(fp)
210 except OSError:
211 pass
212 return result
213
214def _EndRecData64(fpin, offset, endrec):
215 """
216 Read the ZIP64 end-of-archive records and use that to update endrec
217 """
218 try:
219 fpin.seek(offset - sizeEndCentDir64Locator, 2)
220 except OSError:
221 # If the seek fails, the file is not large enough to contain a ZIP64
222 # end-of-archive record, so just return the end record we were given.
223 return endrec
224
225 data = fpin.read(sizeEndCentDir64Locator)
226 if len(data) != sizeEndCentDir64Locator:
227 return endrec
228 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
229 if sig != stringEndArchive64Locator:
230 return endrec
231
232 if diskno != 0 or disks > 1:
233 raise BadZipFile("zipfiles that span multiple disks are not supported")
234
235 # Assume no 'zip64 extensible data'
236 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
237 data = fpin.read(sizeEndCentDir64)
238 if len(data) != sizeEndCentDir64:
239 return endrec
240 sig, sz, create_version, read_version, disk_num, disk_dir, \
241 dircount, dircount2, dirsize, diroffset = \
242 struct.unpack(structEndArchive64, data)
243 if sig != stringEndArchive64:
244 return endrec
245
246 # Update the original endrec using data from the ZIP64 record
247 endrec[_ECD_SIGNATURE] = sig
248 endrec[_ECD_DISK_NUMBER] = disk_num
249 endrec[_ECD_DISK_START] = disk_dir
250 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
251 endrec[_ECD_ENTRIES_TOTAL] = dircount2
252 endrec[_ECD_SIZE] = dirsize
253 endrec[_ECD_OFFSET] = diroffset
254 return endrec
255
256
257def _EndRecData(fpin):
258 """Return data from the "End of Central Directory" record, or None.
259
260 The data is a list of the nine items in the ZIP "End of central dir"
261 record followed by a tenth item, the file seek offset of this record."""
262
263 # Determine file size
264 fpin.seek(0, 2)
265 filesize = fpin.tell()
266
267 # Check to see if this is ZIP file with no archive comment (the
268 # "end of central directory" structure should be the last item in the
269 # file if this is the case).
270 try:
271 fpin.seek(-sizeEndCentDir, 2)
272 except OSError:
273 return None
274 data = fpin.read()
275 if (len(data) == sizeEndCentDir and
276 data[0:4] == stringEndArchive and
277 data[-2:] == b"\000\000"):
278 # the signature is correct and there's no comment, unpack structure
279 endrec = struct.unpack(structEndArchive, data)
280 endrec=list(endrec)
281
282 # Append a blank comment and record start offset
283 endrec.append(b"")
284 endrec.append(filesize - sizeEndCentDir)
285
286 # Try to read the "Zip64 end of central directory" structure
287 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
288
289 # Either this is not a ZIP file, or it is a ZIP file with an archive
290 # comment. Search the end of the file for the "end of central directory"
291 # record signature. The comment is the last item in the ZIP file and may be
292 # up to 64K long. It is assumed that the "end of central directory" magic
293 # number does not appear in the comment.
294 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
295 fpin.seek(maxCommentStart, 0)
296 data = fpin.read()
297 start = data.rfind(stringEndArchive)
298 if start >= 0:
299 # found the magic number; attempt to unpack and interpret
300 recData = data[start:start+sizeEndCentDir]
301 if len(recData) != sizeEndCentDir:
302 # Zip file is corrupted.
303 return None
304 endrec = list(struct.unpack(structEndArchive, recData))
305 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
306 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
307 endrec.append(comment)
308 endrec.append(maxCommentStart + start)
309
310 # Try to read the "Zip64 end of central directory" structure
311 return _EndRecData64(fpin, maxCommentStart + start - filesize,
312 endrec)
313
314 # Unable to find a valid end of central directory structure
315 return None
316
317
318class ZipInfo (object):
319 """Class with attributes describing each file in the ZIP archive."""
320
321 __slots__ = (
322 'orig_filename',
323 'filename',
324 'date_time',
325 'compress_type',
326 '_compresslevel',
327 'comment',
328 'extra',
329 'create_system',
330 'create_version',
331 'extract_version',
332 'reserved',
333 'flag_bits',
334 'volume',
335 'internal_attr',
336 'external_attr',
337 'header_offset',
338 'CRC',
339 'compress_size',
340 'file_size',
341 '_raw_time',
342 )
343
344 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
345 self.orig_filename = filename # Original file name in archive
346
347 # Terminate the file name at the first null byte. Null bytes in file
348 # names are used as tricks by viruses in archives.
349 null_byte = filename.find(chr(0))
350 if null_byte >= 0:
351 filename = filename[0:null_byte]
352 # This is used to ensure paths in generated ZIP files always use
353 # forward slashes as the directory separator, as required by the
354 # ZIP format specification.
355 if os.sep != "/" and os.sep in filename:
356 filename = filename.replace(os.sep, "/")
357
358 self.filename = filename # Normalized file name
359 self.date_time = date_time # year, month, day, hour, min, sec
360
361 if date_time[0] < 1980:
362 raise ValueError('ZIP does not support timestamps before 1980')
363
364 # Standard values:
365 self.compress_type = ZIP_STORED # Type of compression for the file
366 self._compresslevel = None # Level for the compressor
367 self.comment = b"" # Comment for each file
368 self.extra = b"" # ZIP extra data
369 if sys.platform == 'win32':
370 self.create_system = 0 # System which created ZIP archive
371 else:
372 # Assume everything else is unix-y
373 self.create_system = 3 # System which created ZIP archive
374 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
375 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
376 self.reserved = 0 # Must be zero
377 self.flag_bits = 0 # ZIP flag bits
378 self.volume = 0 # Volume number of file header
379 self.internal_attr = 0 # Internal attributes
380 self.external_attr = 0 # External file attributes
381 # Other attributes are set by class ZipFile:
382 # header_offset Byte offset to the file header
383 # CRC CRC-32 of the uncompressed file
384 # compress_size Size of the compressed file
385 # file_size Size of the uncompressed file
386
387 def __repr__(self):
388 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
389 if self.compress_type != ZIP_STORED:
390 result.append(' compress_type=%s' %
391 compressor_names.get(self.compress_type,
392 self.compress_type))
393 hi = self.external_attr >> 16
394 lo = self.external_attr & 0xFFFF
395 if hi:
396 result.append(' filemode=%r' % stat.filemode(hi))
397 if lo:
398 result.append(' external_attr=%#x' % lo)
399 isdir = self.is_dir()
400 if not isdir or self.file_size:
401 result.append(' file_size=%r' % self.file_size)
402 if ((not isdir or self.compress_size) and
403 (self.compress_type != ZIP_STORED or
404 self.file_size != self.compress_size)):
405 result.append(' compress_size=%r' % self.compress_size)
406 result.append('>')
407 return ''.join(result)
408
409 def FileHeader(self, zip64=None):
410 """Return the per-file header as a bytes object."""
411 dt = self.date_time
412 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
413 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
414 if self.flag_bits & 0x08:
415 # Set these to zero because we write them after the file data
416 CRC = compress_size = file_size = 0
417 else:
418 CRC = self.CRC
419 compress_size = self.compress_size
420 file_size = self.file_size
421
422 extra = self.extra
423
424 min_version = 0
425 if zip64 is None:
426 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
427 if zip64:
428 fmt = '<HHQQ'
429 extra = extra + struct.pack(fmt,
430 1, struct.calcsize(fmt)-4, file_size, compress_size)
431 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
432 if not zip64:
433 raise LargeZipFile("Filesize would require ZIP64 extensions")
434 # File is larger than what fits into a 4 byte integer,
435 # fall back to the ZIP64 extension
436 file_size = 0xffffffff
437 compress_size = 0xffffffff
438 min_version = ZIP64_VERSION
439
440 if self.compress_type == ZIP_BZIP2:
441 min_version = max(BZIP2_VERSION, min_version)
442 elif self.compress_type == ZIP_LZMA:
443 min_version = max(LZMA_VERSION, min_version)
444
445 self.extract_version = max(min_version, self.extract_version)
446 self.create_version = max(min_version, self.create_version)
447 filename, flag_bits = self._encodeFilenameFlags()
448 header = struct.pack(structFileHeader, stringFileHeader,
449 self.extract_version, self.reserved, flag_bits,
450 self.compress_type, dostime, dosdate, CRC,
451 compress_size, file_size,
452 len(filename), len(extra))
453 return header + filename + extra
454
455 def _encodeFilenameFlags(self):
456 try:
457 return self.filename.encode('ascii'), self.flag_bits
458 except UnicodeEncodeError:
459 return self.filename.encode('utf-8'), self.flag_bits | 0x800
460
461 def _decodeExtra(self):
462 # Try to decode the extra field.
463 extra = self.extra
464 unpack = struct.unpack
465 while len(extra) >= 4:
466 tp, ln = unpack('<HH', extra[:4])
467 if ln+4 > len(extra):
468 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
469 if tp == 0x0001:
470 if ln >= 24:
471 counts = unpack('<QQQ', extra[4:28])
472 elif ln == 16:
473 counts = unpack('<QQ', extra[4:20])
474 elif ln == 8:
475 counts = unpack('<Q', extra[4:12])
476 elif ln == 0:
477 counts = ()
478 else:
479 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
480
481 idx = 0
482
483 # ZIP64 extension (large files and/or large archives)
484 if self.file_size in (0xffffffffffffffff, 0xffffffff):
485 if len(counts) <= idx:
486 raise BadZipFile(
487 "Corrupt zip64 extra field. File size not found."
488 )
489 self.file_size = counts[idx]
490 idx += 1
491
492 if self.compress_size == 0xFFFFFFFF:
493 if len(counts) <= idx:
494 raise BadZipFile(
495 "Corrupt zip64 extra field. Compress size not found."
496 )
497 self.compress_size = counts[idx]
498 idx += 1
499
500 if self.header_offset == 0xffffffff:
501 if len(counts) <= idx:
502 raise BadZipFile(
503 "Corrupt zip64 extra field. Header offset not found."
504 )
505 old = self.header_offset
506 self.header_offset = counts[idx]
507 idx+=1
508
509 extra = extra[ln+4:]
510
511 @classmethod
512 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
513 """Construct an appropriate ZipInfo for a file on the filesystem.
514
515 filename should be the path to a file or directory on the filesystem.
516
517 arcname is the name which it will have within the archive (by default,
518 this will be the same as filename, but without a drive letter and with
519 leading path separators removed).
520 """
521 if isinstance(filename, os.PathLike):
522 filename = os.fspath(filename)
523 st = os.stat(filename)
524 isdir = stat.S_ISDIR(st.st_mode)
525 mtime = time.localtime(st.st_mtime)
526 date_time = mtime[0:6]
527 if not strict_timestamps and date_time[0] < 1980:
528 date_time = (1980, 1, 1, 0, 0, 0)
529 elif not strict_timestamps and date_time[0] > 2107:
530 date_time = (2107, 12, 31, 23, 59, 59)
531 # Create ZipInfo instance to store file information
532 if arcname is None:
533 arcname = filename
534 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
535 while arcname[0] in (os.sep, os.altsep):
536 arcname = arcname[1:]
537 if isdir:
538 arcname += '/'
539 zinfo = cls(arcname, date_time)
540 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
541 if isdir:
542 zinfo.file_size = 0
543 zinfo.external_attr |= 0x10 # MS-DOS directory flag
544 else:
545 zinfo.file_size = st.st_size
546
547 return zinfo
548
549 def is_dir(self):
550 """Return True if this archive member is a directory."""
551 return self.filename[-1] == '/'
552
553
554# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
555# internal keys. We noticed that a direct implementation is faster than
556# relying on binascii.crc32().
557
558_crctable = None
559def _gen_crc(crc):
560 for j in range(8):
561 if crc & 1:
562 crc = (crc >> 1) ^ 0xEDB88320
563 else:
564 crc >>= 1
565 return crc
566
567# ZIP supports a password-based form of encryption. Even though known
568# plaintext attacks have been found against it, it is still useful
569# to be able to get data out of such a file.
570#
571# Usage:
572# zd = _ZipDecrypter(mypwd)
573# plain_bytes = zd(cypher_bytes)
574
575def _ZipDecrypter(pwd):
576 key0 = 305419896
577 key1 = 591751049
578 key2 = 878082192
579
580 global _crctable
581 if _crctable is None:
582 _crctable = list(map(_gen_crc, range(256)))
583 crctable = _crctable
584
585 def crc32(ch, crc):
586 """Compute the CRC32 primitive on one byte."""
587 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
588
589 def update_keys(c):
590 nonlocal key0, key1, key2
591 key0 = crc32(c, key0)
592 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
593 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
594 key2 = crc32(key1 >> 24, key2)
595
596 for p in pwd:
597 update_keys(p)
598
599 def decrypter(data):
600 """Decrypt a bytes object."""
601 result = bytearray()
602 append = result.append
603 for c in data:
604 k = key2 | 2
605 c ^= ((k * (k^1)) >> 8) & 0xFF
606 update_keys(c)
607 append(c)
608 return bytes(result)
609
610 return decrypter
611
612
613class LZMACompressor:
614
615 def __init__(self):
616 self._comp = None
617
618 def _init(self):
619 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
620 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
621 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
622 ])
623 return struct.pack('<BBH', 9, 4, len(props)) + props
624
625 def compress(self, data):
626 if self._comp is None:
627 return self._init() + self._comp.compress(data)
628 return self._comp.compress(data)
629
630 def flush(self):
631 if self._comp is None:
632 return self._init() + self._comp.flush()
633 return self._comp.flush()
634
635
636class LZMADecompressor:
637
638 def __init__(self):
639 self._decomp = None
640 self._unconsumed = b''
641 self.eof = False
642
643 def decompress(self, data):
644 if self._decomp is None:
645 self._unconsumed += data
646 if len(self._unconsumed) <= 4:
647 return b''
648 psize, = struct.unpack('<H', self._unconsumed[2:4])
649 if len(self._unconsumed) <= 4 + psize:
650 return b''
651
652 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
653 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
654 self._unconsumed[4:4 + psize])
655 ])
656 data = self._unconsumed[4 + psize:]
657 del self._unconsumed
658
659 result = self._decomp.decompress(data)
660 self.eof = self._decomp.eof
661 return result
662
663
664compressor_names = {
665 0: 'store',
666 1: 'shrink',
667 2: 'reduce',
668 3: 'reduce',
669 4: 'reduce',
670 5: 'reduce',
671 6: 'implode',
672 7: 'tokenize',
673 8: 'deflate',
674 9: 'deflate64',
675 10: 'implode',
676 12: 'bzip2',
677 14: 'lzma',
678 18: 'terse',
679 19: 'lz77',
680 97: 'wavpack',
681 98: 'ppmd',
682}
683
684def _check_compression(compression):
685 if compression == ZIP_STORED:
686 pass
687 elif compression == ZIP_DEFLATED:
688 if not zlib:
689 raise RuntimeError(
690 "Compression requires the (missing) zlib module")
691 elif compression == ZIP_BZIP2:
692 if not bz2:
693 raise RuntimeError(
694 "Compression requires the (missing) bz2 module")
695 elif compression == ZIP_LZMA:
696 if not lzma:
697 raise RuntimeError(
698 "Compression requires the (missing) lzma module")
699 else:
700 raise NotImplementedError("That compression method is not supported")
701
702
703def _get_compressor(compress_type, compresslevel=None):
704 if compress_type == ZIP_DEFLATED:
705 if compresslevel is not None:
706 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
707 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
708 elif compress_type == ZIP_BZIP2:
709 if compresslevel is not None:
710 return bz2.BZ2Compressor(compresslevel)
711 return bz2.BZ2Compressor()
712 # compresslevel is ignored for ZIP_LZMA
713 elif compress_type == ZIP_LZMA:
714 return LZMACompressor()
715 else:
716 return None
717
718
719def _get_decompressor(compress_type):
720 _check_compression(compress_type)
721 if compress_type == ZIP_STORED:
722 return None
723 elif compress_type == ZIP_DEFLATED:
724 return zlib.decompressobj(-15)
725 elif compress_type == ZIP_BZIP2:
726 return bz2.BZ2Decompressor()
727 elif compress_type == ZIP_LZMA:
728 return LZMADecompressor()
729 else:
730 descr = compressor_names.get(compress_type)
731 if descr:
732 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
733 else:
734 raise NotImplementedError("compression type %d" % (compress_type,))
735
736
737class _SharedFile:
738 def __init__(self, file, pos, close, lock, writing):
739 self._file = file
740 self._pos = pos
741 self._close = close
742 self._lock = lock
743 self._writing = writing
744 self.seekable = file.seekable
745 self.tell = file.tell
746
747 def seek(self, offset, whence=0):
748 with self._lock:
749 if self._writing():
750 raise ValueError("Can't reposition in the ZIP file while "
751 "there is an open writing handle on it. "
752 "Close the writing handle before trying to read.")
753 self._file.seek(offset, whence)
754 self._pos = self._file.tell()
755 return self._pos
756
757 def read(self, n=-1):
758 with self._lock:
759 if self._writing():
760 raise ValueError("Can't read from the ZIP file while there "
761 "is an open writing handle on it. "
762 "Close the writing handle before trying to read.")
763 self._file.seek(self._pos)
764 data = self._file.read(n)
765 self._pos = self._file.tell()
766 return data
767
768 def close(self):
769 if self._file is not None:
770 fileobj = self._file
771 self._file = None
772 self._close(fileobj)
773
774# Provide the tell method for unseekable stream
775class _Tellable:
776 def __init__(self, fp):
777 self.fp = fp
778 self.offset = 0
779
780 def write(self, data):
781 n = self.fp.write(data)
782 self.offset += n
783 return n
784
785 def tell(self):
786 return self.offset
787
788 def flush(self):
789 self.fp.flush()
790
791 def close(self):
792 self.fp.close()
793
794
795class ZipExtFile(io.BufferedIOBase):
796 """File-like object for reading an archive member.
797 Is returned by ZipFile.open().
798 """
799
800 # Max size supported by decompressor.
801 MAX_N = 1 << 31 - 1
802
803 # Read from compressed files in 4k blocks.
804 MIN_READ_SIZE = 4096
805
806 # Chunk size to read during seek
807 MAX_SEEK_READ = 1 << 24
808
809 def __init__(self, fileobj, mode, zipinfo, pwd=None,
810 close_fileobj=False):
811 self._fileobj = fileobj
812 self._pwd = pwd
813 self._close_fileobj = close_fileobj
814
815 self._compress_type = zipinfo.compress_type
816 self._compress_left = zipinfo.compress_size
817 self._left = zipinfo.file_size
818
819 self._decompressor = _get_decompressor(self._compress_type)
820
821 self._eof = False
822 self._readbuffer = b''
823 self._offset = 0
824
825 self.newlines = None
826
827 self.mode = mode
828 self.name = zipinfo.filename
829
830 if hasattr(zipinfo, 'CRC'):
831 self._expected_crc = zipinfo.CRC
832 self._running_crc = crc32(b'')
833 else:
834 self._expected_crc = None
835
836 self._seekable = False
837 try:
838 if fileobj.seekable():
839 self._orig_compress_start = fileobj.tell()
840 self._orig_compress_size = zipinfo.compress_size
841 self._orig_file_size = zipinfo.file_size
842 self._orig_start_crc = self._running_crc
843 self._seekable = True
844 except AttributeError:
845 pass
846
847 self._decrypter = None
848 if pwd:
849 if zipinfo.flag_bits & 0x8:
850 # compare against the file type from extended local headers
851 check_byte = (zipinfo._raw_time >> 8) & 0xff
852 else:
853 # compare against the CRC otherwise
854 check_byte = (zipinfo.CRC >> 24) & 0xff
855 h = self._init_decrypter()
856 if h != check_byte:
857 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
858
859
860 def _init_decrypter(self):
861 self._decrypter = _ZipDecrypter(self._pwd)
862 # The first 12 bytes in the cypher stream is an encryption header
863 # used to strengthen the algorithm. The first 11 bytes are
864 # completely random, while the 12th contains the MSB of the CRC,
865 # or the MSB of the file time depending on the header type
866 # and is used to check the correctness of the password.
867 header = self._fileobj.read(12)
868 self._compress_left -= 12
869 return self._decrypter(header)[11]
870
871 def __repr__(self):
872 result = ['<%s.%s' % (self.__class__.__module__,
873 self.__class__.__qualname__)]
874 if not self.closed:
875 result.append(' name=%r mode=%r' % (self.name, self.mode))
876 if self._compress_type != ZIP_STORED:
877 result.append(' compress_type=%s' %
878 compressor_names.get(self._compress_type,
879 self._compress_type))
880 else:
881 result.append(' [closed]')
882 result.append('>')
883 return ''.join(result)
884
885 def readline(self, limit=-1):
886 """Read and return a line from the stream.
887
888 If limit is specified, at most limit bytes will be read.
889 """
890
891 if limit < 0:
892 # Shortcut common case - newline found in buffer.
893 i = self._readbuffer.find(b'\n', self._offset) + 1
894 if i > 0:
895 line = self._readbuffer[self._offset: i]
896 self._offset = i
897 return line
898
899 return io.BufferedIOBase.readline(self, limit)
900
901 def peek(self, n=1):
902 """Returns buffered bytes without advancing the position."""
903 if n > len(self._readbuffer) - self._offset:
904 chunk = self.read(n)
905 if len(chunk) > self._offset:
906 self._readbuffer = chunk + self._readbuffer[self._offset:]
907 self._offset = 0
908 else:
909 self._offset -= len(chunk)
910
911 # Return up to 512 bytes to reduce allocation overhead for tight loops.
912 return self._readbuffer[self._offset: self._offset + 512]
913
914 def readable(self):
915 return True
916
917 def read(self, n=-1):
918 """Read and return up to n bytes.
919 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
920 """
921 if n is None or n < 0:
922 buf = self._readbuffer[self._offset:]
923 self._readbuffer = b''
924 self._offset = 0
925 while not self._eof:
926 buf += self._read1(self.MAX_N)
927 return buf
928
929 end = n + self._offset
930 if end < len(self._readbuffer):
931 buf = self._readbuffer[self._offset:end]
932 self._offset = end
933 return buf
934
935 n = end - len(self._readbuffer)
936 buf = self._readbuffer[self._offset:]
937 self._readbuffer = b''
938 self._offset = 0
939 while n > 0 and not self._eof:
940 data = self._read1(n)
941 if n < len(data):
942 self._readbuffer = data
943 self._offset = n
944 buf += data[:n]
945 break
946 buf += data
947 n -= len(data)
948 return buf
949
950 def _update_crc(self, newdata):
951 # Update the CRC using the given data.
952 if self._expected_crc is None:
953 # No need to compute the CRC if we don't have a reference value
954 return
955 self._running_crc = crc32(newdata, self._running_crc)
956 # Check the CRC if we're at the end of the file
957 if self._eof and self._running_crc != self._expected_crc:
958 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
959
960 def read1(self, n):
961 """Read up to n bytes with at most one read() system call."""
962
963 if n is None or n < 0:
964 buf = self._readbuffer[self._offset:]
965 self._readbuffer = b''
966 self._offset = 0
967 while not self._eof:
968 data = self._read1(self.MAX_N)
969 if data:
970 buf += data
971 break
972 return buf
973
974 end = n + self._offset
975 if end < len(self._readbuffer):
976 buf = self._readbuffer[self._offset:end]
977 self._offset = end
978 return buf
979
980 n = end - len(self._readbuffer)
981 buf = self._readbuffer[self._offset:]
982 self._readbuffer = b''
983 self._offset = 0
984 if n > 0:
985 while not self._eof:
986 data = self._read1(n)
987 if n < len(data):
988 self._readbuffer = data
989 self._offset = n
990 buf += data[:n]
991 break
992 if data:
993 buf += data
994 break
995 return buf
996
997 def _read1(self, n):
998 # Read up to n compressed bytes with at most one read() system call,
999 # decrypt and decompress them.
1000 if self._eof or n <= 0:
1001 return b''
1002
1003 # Read from file.
1004 if self._compress_type == ZIP_DEFLATED:
1005 ## Handle unconsumed data.
1006 data = self._decompressor.unconsumed_tail
1007 if n > len(data):
1008 data += self._read2(n - len(data))
1009 else:
1010 data = self._read2(n)
1011
1012 if self._compress_type == ZIP_STORED:
1013 self._eof = self._compress_left <= 0
1014 elif self._compress_type == ZIP_DEFLATED:
1015 n = max(n, self.MIN_READ_SIZE)
1016 data = self._decompressor.decompress(data, n)
1017 self._eof = (self._decompressor.eof or
1018 self._compress_left <= 0 and
1019 not self._decompressor.unconsumed_tail)
1020 if self._eof:
1021 data += self._decompressor.flush()
1022 else:
1023 data = self._decompressor.decompress(data)
1024 self._eof = self._decompressor.eof or self._compress_left <= 0
1025
1026 data = data[:self._left]
1027 self._left -= len(data)
1028 if self._left <= 0:
1029 self._eof = True
1030 self._update_crc(data)
1031 return data
1032
1033 def _read2(self, n):
1034 if self._compress_left <= 0:
1035 return b''
1036
1037 n = max(n, self.MIN_READ_SIZE)
1038 n = min(n, self._compress_left)
1039
1040 data = self._fileobj.read(n)
1041 self._compress_left -= len(data)
1042 if not data:
1043 raise EOFError
1044
1045 if self._decrypter is not None:
1046 data = self._decrypter(data)
1047 return data
1048
1049 def close(self):
1050 try:
1051 if self._close_fileobj:
1052 self._fileobj.close()
1053 finally:
1054 super().close()
1055
1056 def seekable(self):
1057 return self._seekable
1058
1059 def seek(self, offset, whence=0):
1060 if not self._seekable:
1061 raise io.UnsupportedOperation("underlying stream is not seekable")
1062 curr_pos = self.tell()
1063 if whence == 0: # Seek from start of file
1064 new_pos = offset
1065 elif whence == 1: # Seek from current position
1066 new_pos = curr_pos + offset
1067 elif whence == 2: # Seek from EOF
1068 new_pos = self._orig_file_size + offset
1069 else:
1070 raise ValueError("whence must be os.SEEK_SET (0), "
1071 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1072
1073 if new_pos > self._orig_file_size:
1074 new_pos = self._orig_file_size
1075
1076 if new_pos < 0:
1077 new_pos = 0
1078
1079 read_offset = new_pos - curr_pos
1080 buff_offset = read_offset + self._offset
1081
1082 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1083 # Just move the _offset index if the new position is in the _readbuffer
1084 self._offset = buff_offset
1085 read_offset = 0
1086 elif read_offset < 0:
1087 # Position is before the current position. Reset the ZipExtFile
1088 self._fileobj.seek(self._orig_compress_start)
1089 self._running_crc = self._orig_start_crc
1090 self._compress_left = self._orig_compress_size
1091 self._left = self._orig_file_size
1092 self._readbuffer = b''
1093 self._offset = 0
1094 self._decompressor = _get_decompressor(self._compress_type)
1095 self._eof = False
1096 read_offset = new_pos
1097 if self._decrypter is not None:
1098 self._init_decrypter()
1099
1100 while read_offset > 0:
1101 read_len = min(self.MAX_SEEK_READ, read_offset)
1102 self.read(read_len)
1103 read_offset -= read_len
1104
1105 return self.tell()
1106
1107 def tell(self):
1108 if not self._seekable:
1109 raise io.UnsupportedOperation("underlying stream is not seekable")
1110 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1111 return filepos
1112
1113
1114class _ZipWriteFile(io.BufferedIOBase):
1115 def __init__(self, zf, zinfo, zip64):
1116 self._zinfo = zinfo
1117 self._zip64 = zip64
1118 self._zipfile = zf
1119 self._compressor = _get_compressor(zinfo.compress_type,
1120 zinfo._compresslevel)
1121 self._file_size = 0
1122 self._compress_size = 0
1123 self._crc = 0
1124
1125 @property
1126 def _fileobj(self):
1127 return self._zipfile.fp
1128
1129 def writable(self):
1130 return True
1131
1132 def write(self, data):
1133 if self.closed:
1134 raise ValueError('I/O operation on closed file.')
1135 nbytes = len(data)
1136 self._file_size += nbytes
1137 self._crc = crc32(data, self._crc)
1138 if self._compressor:
1139 data = self._compressor.compress(data)
1140 self._compress_size += len(data)
1141 self._fileobj.write(data)
1142 return nbytes
1143
1144 def close(self):
1145 if self.closed:
1146 return
1147 try:
1148 super().close()
1149 # Flush any data from the compressor, and update header info
1150 if self._compressor:
1151 buf = self._compressor.flush()
1152 self._compress_size += len(buf)
1153 self._fileobj.write(buf)
1154 self._zinfo.compress_size = self._compress_size
1155 else:
1156 self._zinfo.compress_size = self._file_size
1157 self._zinfo.CRC = self._crc
1158 self._zinfo.file_size = self._file_size
1159
1160 # Write updated header info
1161 if self._zinfo.flag_bits & 0x08:
1162 # Write CRC and file sizes after the file data
1163 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1164 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1165 self._zinfo.compress_size, self._zinfo.file_size))
1166 self._zipfile.start_dir = self._fileobj.tell()
1167 else:
1168 if not self._zip64:
1169 if self._file_size > ZIP64_LIMIT:
1170 raise RuntimeError(
1171 'File size unexpectedly exceeded ZIP64 limit')
1172 if self._compress_size > ZIP64_LIMIT:
1173 raise RuntimeError(
1174 'Compressed size unexpectedly exceeded ZIP64 limit')
1175 # Seek backwards and write file header (which will now include
1176 # correct CRC and file sizes)
1177
1178 # Preserve current position in file
1179 self._zipfile.start_dir = self._fileobj.tell()
1180 self._fileobj.seek(self._zinfo.header_offset)
1181 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1182 self._fileobj.seek(self._zipfile.start_dir)
1183
1184 # Successfully written: Add file to our caches
1185 self._zipfile.filelist.append(self._zinfo)
1186 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1187 finally:
1188 self._zipfile._writing = False
1189
1190
1191
1192class ZipFile:
1193 """ Class with methods to open, read, write, close, list zip files.
1194
1195 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1196 compresslevel=None)
1197
1198 file: Either the path to the file, or a file-like object.
1199 If it is a path, the file will be opened and closed by ZipFile.
1200 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1201 or append 'a'.
1202 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1203 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1204 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1205 needed, otherwise it will raise an exception when this would
1206 be necessary.
1207 compresslevel: None (default for the given compression type) or an integer
1208 specifying the level to pass to the compressor.
1209 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1210 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1211 When using ZIP_BZIP2 integers 1 through 9 are accepted.
1212
1213 """
1214
1215 fp = None # Set here since __del__ checks it
1216 _windows_illegal_name_trans_table = None
1217
1218 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1219 compresslevel=None, *, strict_timestamps=True):
1220 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1221 or append 'a'."""
1222 if mode not in ('r', 'w', 'x', 'a'):
1223 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1224
1225 _check_compression(compression)
1226
1227 self._allowZip64 = allowZip64
1228 self._didModify = False
1229 self.debug = 0 # Level of printing: 0 through 3
1230 self.NameToInfo = {} # Find file info given name
1231 self.filelist = [] # List of ZipInfo instances for archive
1232 self.compression = compression # Method of compression
1233 self.compresslevel = compresslevel
1234 self.mode = mode
1235 self.pwd = None
1236 self._comment = b''
1237 self._strict_timestamps = strict_timestamps
1238
1239 # Check if we were passed a file-like object
1240 if isinstance(file, os.PathLike):
1241 file = os.fspath(file)
1242 if isinstance(file, str):
1243 # No, it's a filename
1244 self._filePassed = 0
1245 self.filename = file
1246 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1247 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1248 filemode = modeDict[mode]
1249 while True:
1250 try:
1251 self.fp = io.open(file, filemode)
1252 except OSError:
1253 if filemode in modeDict:
1254 filemode = modeDict[filemode]
1255 continue
1256 raise
1257 break
1258 else:
1259 self._filePassed = 1
1260 self.fp = file
1261 self.filename = getattr(file, 'name', None)
1262 self._fileRefCnt = 1
1263 self._lock = threading.RLock()
1264 self._seekable = True
1265 self._writing = False
1266
1267 try:
1268 if mode == 'r':
1269 self._RealGetContents()
1270 elif mode in ('w', 'x'):
1271 # set the modified flag so central directory gets written
1272 # even if no files are added to the archive
1273 self._didModify = True
1274 try:
1275 self.start_dir = self.fp.tell()
1276 except (AttributeError, OSError):
1277 self.fp = _Tellable(self.fp)
1278 self.start_dir = 0
1279 self._seekable = False
1280 else:
1281 # Some file-like objects can provide tell() but not seek()
1282 try:
1283 self.fp.seek(self.start_dir)
1284 except (AttributeError, OSError):
1285 self._seekable = False
1286 elif mode == 'a':
1287 try:
1288 # See if file is a zip file
1289 self._RealGetContents()
1290 # seek to start of directory and overwrite
1291 self.fp.seek(self.start_dir)
1292 except BadZipFile:
1293 # file is not a zip file, just append
1294 self.fp.seek(0, 2)
1295
1296 # set the modified flag so central directory gets written
1297 # even if no files are added to the archive
1298 self._didModify = True
1299 self.start_dir = self.fp.tell()
1300 else:
1301 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1302 except:
1303 fp = self.fp
1304 self.fp = None
1305 self._fpclose(fp)
1306 raise
1307
1308 def __enter__(self):
1309 return self
1310
1311 def __exit__(self, type, value, traceback):
1312 self.close()
1313
1314 def __repr__(self):
1315 result = ['<%s.%s' % (self.__class__.__module__,
1316 self.__class__.__qualname__)]
1317 if self.fp is not None:
1318 if self._filePassed:
1319 result.append(' file=%r' % self.fp)
1320 elif self.filename is not None:
1321 result.append(' filename=%r' % self.filename)
1322 result.append(' mode=%r' % self.mode)
1323 else:
1324 result.append(' [closed]')
1325 result.append('>')
1326 return ''.join(result)
1327
1328 def _RealGetContents(self):
1329 """Read in the table of contents for the ZIP file."""
1330 fp = self.fp
1331 try:
1332 endrec = _EndRecData(fp)
1333 except OSError:
1334 raise BadZipFile("File is not a zip file")
1335 if not endrec:
1336 raise BadZipFile("File is not a zip file")
1337 if self.debug > 1:
1338 print(endrec)
1339 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1340 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
1341 self._comment = endrec[_ECD_COMMENT] # archive comment
1342
1343 # "concat" is zero, unless zip was concatenated to another file
1344 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1345 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1346 # If Zip64 extension structures are present, account for them
1347 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1348
1349 if self.debug > 2:
1350 inferred = concat + offset_cd
1351 print("given, inferred, offset", offset_cd, inferred, concat)
1352 # self.start_dir: Position of start of central directory
1353 self.start_dir = offset_cd + concat
1354 fp.seek(self.start_dir, 0)
1355 data = fp.read(size_cd)
1356 fp = io.BytesIO(data)
1357 total = 0
1358 while total < size_cd:
1359 centdir = fp.read(sizeCentralDir)
1360 if len(centdir) != sizeCentralDir:
1361 raise BadZipFile("Truncated central directory")
1362 centdir = struct.unpack(structCentralDir, centdir)
1363 if centdir[_CD_SIGNATURE] != stringCentralDir:
1364 raise BadZipFile("Bad magic number for central directory")
1365 if self.debug > 2:
1366 print(centdir)
1367 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1368 flags = centdir[5]
1369 if flags & 0x800:
1370 # UTF-8 file names extension
1371 filename = filename.decode('utf-8')
1372 else:
1373 # Historical ZIP filename encoding
1374 filename = filename.decode('cp437')
1375 # Create ZipInfo instance to store file information
1376 x = ZipInfo(filename)
1377 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1378 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1379 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1380 (x.create_version, x.create_system, x.extract_version, x.reserved,
1381 x.flag_bits, x.compress_type, t, d,
1382 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1383 if x.extract_version > MAX_EXTRACT_VERSION:
1384 raise NotImplementedError("zip file version %.1f" %
1385 (x.extract_version / 10))
1386 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1387 # Convert date/time code to (year, month, day, hour, min, sec)
1388 x._raw_time = t
1389 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1390 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1391
1392 x._decodeExtra()
1393 x.header_offset = x.header_offset + concat
1394 self.filelist.append(x)
1395 self.NameToInfo[x.filename] = x
1396
1397 # update total bytes read from central directory
1398 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1399 + centdir[_CD_EXTRA_FIELD_LENGTH]
1400 + centdir[_CD_COMMENT_LENGTH])
1401
1402 if self.debug > 2:
1403 print("total", total)
1404
1405
1406 def namelist(self):
1407 """Return a list of file names in the archive."""
1408 return [data.filename for data in self.filelist]
1409
1410 def infolist(self):
1411 """Return a list of class ZipInfo instances for files in the
1412 archive."""
1413 return self.filelist
1414
1415 def printdir(self, file=None):
1416 """Print a table of contents for the zip file."""
1417 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1418 file=file)
1419 for zinfo in self.filelist:
1420 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1421 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1422 file=file)
1423
1424 def testzip(self):
1425 """Read all the files and check the CRC."""
1426 chunk_size = 2 ** 20
1427 for zinfo in self.filelist:
1428 try:
1429 # Read by chunks, to avoid an OverflowError or a
1430 # MemoryError with very large embedded files.
1431 with self.open(zinfo.filename, "r") as f:
1432 while f.read(chunk_size): # Check CRC-32
1433 pass
1434 except BadZipFile:
1435 return zinfo.filename
1436
1437 def getinfo(self, name):
1438 """Return the instance of ZipInfo given 'name'."""
1439 info = self.NameToInfo.get(name)
1440 if info is None:
1441 raise KeyError(
1442 'There is no item named %r in the archive' % name)
1443
1444 return info
1445
1446 def setpassword(self, pwd):
1447 """Set default password for encrypted files."""
1448 if pwd and not isinstance(pwd, bytes):
1449 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1450 if pwd:
1451 self.pwd = pwd
1452 else:
1453 self.pwd = None
1454
1455 @property
1456 def comment(self):
1457 """The comment text associated with the ZIP file."""
1458 return self._comment
1459
1460 @comment.setter
1461 def comment(self, comment):
1462 if not isinstance(comment, bytes):
1463 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1464 # check for valid comment length
1465 if len(comment) > ZIP_MAX_COMMENT:
1466 import warnings
1467 warnings.warn('Archive comment is too long; truncating to %d bytes'
1468 % ZIP_MAX_COMMENT, stacklevel=2)
1469 comment = comment[:ZIP_MAX_COMMENT]
1470 self._comment = comment
1471 self._didModify = True
1472
1473 def read(self, name, pwd=None):
1474 """Return file bytes for name."""
1475 with self.open(name, "r", pwd) as fp:
1476 return fp.read()
1477
1478 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1479 """Return file-like object for 'name'.
1480
1481 name is a string for the file name within the ZIP file, or a ZipInfo
1482 object.
1483
1484 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1485 write to a file newly added to the archive.
1486
1487 pwd is the password to decrypt files (only used for reading).
1488
1489 When writing, if the file size is not known in advance but may exceed
1490 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1491 files. If the size is known in advance, it is best to pass a ZipInfo
1492 instance for name, with zinfo.file_size set.
1493 """
1494 if mode not in {"r", "w"}:
1495 raise ValueError('open() requires mode "r" or "w"')
1496 if pwd and not isinstance(pwd, bytes):
1497 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1498 if pwd and (mode == "w"):
1499 raise ValueError("pwd is only supported for reading files")
1500 if not self.fp:
1501 raise ValueError(
1502 "Attempt to use ZIP archive that was already closed")
1503
1504 # Make sure we have an info object
1505 if isinstance(name, ZipInfo):
1506 # 'name' is already an info object
1507 zinfo = name
1508 elif mode == 'w':
1509 zinfo = ZipInfo(name)
1510 zinfo.compress_type = self.compression
1511 zinfo._compresslevel = self.compresslevel
1512 else:
1513 # Get info object for name
1514 zinfo = self.getinfo(name)
1515
1516 if mode == 'w':
1517 return self._open_to_write(zinfo, force_zip64=force_zip64)
1518
1519 if self._writing:
1520 raise ValueError("Can't read from the ZIP file while there "
1521 "is an open writing handle on it. "
1522 "Close the writing handle before trying to read.")
1523
1524 # Open for reading:
1525 self._fileRefCnt += 1
1526 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1527 self._fpclose, self._lock, lambda: self._writing)
1528 try:
1529 # Skip the file header:
1530 fheader = zef_file.read(sizeFileHeader)
1531 if len(fheader) != sizeFileHeader:
1532 raise BadZipFile("Truncated file header")
1533 fheader = struct.unpack(structFileHeader, fheader)
1534 if fheader[_FH_SIGNATURE] != stringFileHeader:
1535 raise BadZipFile("Bad magic number for file header")
1536
1537 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1538 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1539 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1540
1541 if zinfo.flag_bits & 0x20:
1542 # Zip 2.7: compressed patched data
1543 raise NotImplementedError("compressed patched data (flag bit 5)")
1544
1545 if zinfo.flag_bits & 0x40:
1546 # strong encryption
1547 raise NotImplementedError("strong encryption (flag bit 6)")
1548
1549 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800:
1550 # UTF-8 filename
1551 fname_str = fname.decode("utf-8")
1552 else:
1553 fname_str = fname.decode("cp437")
1554
1555 if fname_str != zinfo.orig_filename:
1556 raise BadZipFile(
1557 'File name in directory %r and header %r differ.'
1558 % (zinfo.orig_filename, fname))
1559
1560 # check for encrypted flag & handle password
1561 is_encrypted = zinfo.flag_bits & 0x1
1562 if is_encrypted:
1563 if not pwd:
1564 pwd = self.pwd
1565 if not pwd:
1566 raise RuntimeError("File %r is encrypted, password "
1567 "required for extraction" % name)
1568 else:
1569 pwd = None
1570
1571 return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1572 except:
1573 zef_file.close()
1574 raise
1575
1576 def _open_to_write(self, zinfo, force_zip64=False):
1577 if force_zip64 and not self._allowZip64:
1578 raise ValueError(
1579 "force_zip64 is True, but allowZip64 was False when opening "
1580 "the ZIP file."
1581 )
1582 if self._writing:
1583 raise ValueError("Can't write to the ZIP file while there is "
1584 "another write handle open on it. "
1585 "Close the first handle before opening another.")
1586
1587 # Sizes and CRC are overwritten with correct data after processing the file
1588 if not hasattr(zinfo, 'file_size'):
1589 zinfo.file_size = 0
1590 zinfo.compress_size = 0
1591 zinfo.CRC = 0
1592
1593 zinfo.flag_bits = 0x00
1594 if zinfo.compress_type == ZIP_LZMA:
1595 # Compressed data includes an end-of-stream (EOS) marker
1596 zinfo.flag_bits |= 0x02
1597 if not self._seekable:
1598 zinfo.flag_bits |= 0x08
1599
1600 if not zinfo.external_attr:
1601 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1602
1603 # Compressed size can be larger than uncompressed size
1604 zip64 = self._allowZip64 and \
1605 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1606
1607 if self._seekable:
1608 self.fp.seek(self.start_dir)
1609 zinfo.header_offset = self.fp.tell()
1610
1611 self._writecheck(zinfo)
1612 self._didModify = True
1613
1614 self.fp.write(zinfo.FileHeader(zip64))
1615
1616 self._writing = True
1617 return _ZipWriteFile(self, zinfo, zip64)
1618
1619 def extract(self, member, path=None, pwd=None):
1620 """Extract a member from the archive to the current working directory,
1621 using its full name. Its file information is extracted as accurately
1622 as possible. `member' may be a filename or a ZipInfo object. You can
1623 specify a different directory using `path'.
1624 """
1625 if path is None:
1626 path = os.getcwd()
1627 else:
1628 path = os.fspath(path)
1629
1630 return self._extract_member(member, path, pwd)
1631
1632 def extractall(self, path=None, members=None, pwd=None):
1633 """Extract all members from the archive to the current working
1634 directory. `path' specifies a different directory to extract to.
1635 `members' is optional and must be a subset of the list returned
1636 by namelist().
1637 """
1638 if members is None:
1639 members = self.namelist()
1640
1641 if path is None:
1642 path = os.getcwd()
1643 else:
1644 path = os.fspath(path)
1645
1646 for zipinfo in members:
1647 self._extract_member(zipinfo, path, pwd)
1648
1649 @classmethod
1650 def _sanitize_windows_name(cls, arcname, pathsep):
1651 """Replace bad characters and remove trailing dots from parts."""
1652 table = cls._windows_illegal_name_trans_table
1653 if not table:
1654 illegal = ':<>|"?*'
1655 table = str.maketrans(illegal, '_' * len(illegal))
1656 cls._windows_illegal_name_trans_table = table
1657 arcname = arcname.translate(table)
1658 # remove trailing dots
1659 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1660 # rejoin, removing empty parts.
1661 arcname = pathsep.join(x for x in arcname if x)
1662 return arcname
1663
1664 def _extract_member(self, member, targetpath, pwd):
1665 """Extract the ZipInfo object 'member' to a physical
1666 file on the path targetpath.
1667 """
1668 if not isinstance(member, ZipInfo):
1669 member = self.getinfo(member)
1670
1671 # build the destination pathname, replacing
1672 # forward slashes to platform specific separators.
1673 arcname = member.filename.replace('/', os.path.sep)
1674
1675 if os.path.altsep:
1676 arcname = arcname.replace(os.path.altsep, os.path.sep)
1677 # interpret absolute pathname as relative, remove drive letter or
1678 # UNC path, redundant separators, "." and ".." components.
1679 arcname = os.path.splitdrive(arcname)[1]
1680 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1681 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1682 if x not in invalid_path_parts)
1683 if os.path.sep == '\\':
1684 # filter illegal characters on Windows
1685 arcname = self._sanitize_windows_name(arcname, os.path.sep)
1686
1687 targetpath = os.path.join(targetpath, arcname)
1688 targetpath = os.path.normpath(targetpath)
1689
1690 # Create all upper directories if necessary.
1691 upperdirs = os.path.dirname(targetpath)
1692 if upperdirs and not os.path.exists(upperdirs):
1693 os.makedirs(upperdirs)
1694
1695 if member.is_dir():
1696 if not os.path.isdir(targetpath):
1697 os.mkdir(targetpath)
1698 return targetpath
1699
1700 with self.open(member, pwd=pwd) as source, \
1701 open(targetpath, "wb") as target:
1702 shutil.copyfileobj(source, target)
1703
1704 return targetpath
1705
1706 def _writecheck(self, zinfo):
1707 """Check for errors before writing a file to the archive."""
1708 if zinfo.filename in self.NameToInfo:
1709 import warnings
1710 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1711 if self.mode not in ('w', 'x', 'a'):
1712 raise ValueError("write() requires mode 'w', 'x', or 'a'")
1713 if not self.fp:
1714 raise ValueError(
1715 "Attempt to write ZIP archive that was already closed")
1716 _check_compression(zinfo.compress_type)
1717 if not self._allowZip64:
1718 requires_zip64 = None
1719 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1720 requires_zip64 = "Files count"
1721 elif zinfo.file_size > ZIP64_LIMIT:
1722 requires_zip64 = "Filesize"
1723 elif zinfo.header_offset > ZIP64_LIMIT:
1724 requires_zip64 = "Zipfile size"
1725 if requires_zip64:
1726 raise LargeZipFile(requires_zip64 +
1727 " would require ZIP64 extensions")
1728
1729 def write(self, filename, arcname=None,
1730 compress_type=None, compresslevel=None):
1731 """Put the bytes from filename into the archive under the name
1732 arcname."""
1733 if not self.fp:
1734 raise ValueError(
1735 "Attempt to write to ZIP archive that was already closed")
1736 if self._writing:
1737 raise ValueError(
1738 "Can't write to ZIP archive while an open writing handle exists"
1739 )
1740
1741 zinfo = ZipInfo.from_file(filename, arcname,
1742 strict_timestamps=self._strict_timestamps)
1743
1744 if zinfo.is_dir():
1745 zinfo.compress_size = 0
1746 zinfo.CRC = 0
1747 else:
1748 if compress_type is not None:
1749 zinfo.compress_type = compress_type
1750 else:
1751 zinfo.compress_type = self.compression
1752
1753 if compresslevel is not None:
1754 zinfo._compresslevel = compresslevel
1755 else:
1756 zinfo._compresslevel = self.compresslevel
1757
1758 if zinfo.is_dir():
1759 with self._lock:
1760 if self._seekable:
1761 self.fp.seek(self.start_dir)
1762 zinfo.header_offset = self.fp.tell() # Start of header bytes
1763 if zinfo.compress_type == ZIP_LZMA:
1764 # Compressed data includes an end-of-stream (EOS) marker
1765 zinfo.flag_bits |= 0x02
1766
1767 self._writecheck(zinfo)
1768 self._didModify = True
1769
1770 self.filelist.append(zinfo)
1771 self.NameToInfo[zinfo.filename] = zinfo
1772 self.fp.write(zinfo.FileHeader(False))
1773 self.start_dir = self.fp.tell()
1774 else:
1775 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1776 shutil.copyfileobj(src, dest, 1024*8)
1777
1778 def writestr(self, zinfo_or_arcname, data,
1779 compress_type=None, compresslevel=None):
1780 """Write a file into the archive. The contents is 'data', which
1781 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1782 it is encoded as UTF-8 first.
1783 'zinfo_or_arcname' is either a ZipInfo instance or
1784 the name of the file in the archive."""
1785 if isinstance(data, str):
1786 data = data.encode("utf-8")
1787 if not isinstance(zinfo_or_arcname, ZipInfo):
1788 zinfo = ZipInfo(filename=zinfo_or_arcname,
1789 date_time=time.localtime(time.time())[:6])
1790 zinfo.compress_type = self.compression
1791 zinfo._compresslevel = self.compresslevel
1792 if zinfo.filename[-1] == '/':
1793 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1794 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1795 else:
1796 zinfo.external_attr = 0o600 << 16 # ?rw-------
1797 else:
1798 zinfo = zinfo_or_arcname
1799
1800 if not self.fp:
1801 raise ValueError(
1802 "Attempt to write to ZIP archive that was already closed")
1803 if self._writing:
1804 raise ValueError(
1805 "Can't write to ZIP archive while an open writing handle exists."
1806 )
1807
1808 if compress_type is not None:
1809 zinfo.compress_type = compress_type
1810
1811 if compresslevel is not None:
1812 zinfo._compresslevel = compresslevel
1813
1814 zinfo.file_size = len(data) # Uncompressed size
1815 with self._lock:
1816 with self.open(zinfo, mode='w') as dest:
1817 dest.write(data)
1818
1819 def __del__(self):
1820 """Call the "close()" method in case the user forgot."""
1821 self.close()
1822
1823 def close(self):
1824 """Close the file, and for mode 'w', 'x' and 'a' write the ending
1825 records."""
1826 if self.fp is None:
1827 return
1828
1829 if self._writing:
1830 raise ValueError("Can't close the ZIP file while there is "
1831 "an open writing handle on it. "
1832 "Close the writing handle before closing the zip.")
1833
1834 try:
1835 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1836 with self._lock:
1837 if self._seekable:
1838 self.fp.seek(self.start_dir)
1839 self._write_end_record()
1840 finally:
1841 fp = self.fp
1842 self.fp = None
1843 self._fpclose(fp)
1844
1845 def _write_end_record(self):
1846 for zinfo in self.filelist: # write central directory
1847 dt = zinfo.date_time
1848 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1849 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1850 extra = []
1851 if zinfo.file_size > ZIP64_LIMIT \
1852 or zinfo.compress_size > ZIP64_LIMIT:
1853 extra.append(zinfo.file_size)
1854 extra.append(zinfo.compress_size)
1855 file_size = 0xffffffff
1856 compress_size = 0xffffffff
1857 else:
1858 file_size = zinfo.file_size
1859 compress_size = zinfo.compress_size
1860
1861 if zinfo.header_offset > ZIP64_LIMIT:
1862 extra.append(zinfo.header_offset)
1863 header_offset = 0xffffffff
1864 else:
1865 header_offset = zinfo.header_offset
1866
1867 extra_data = zinfo.extra
1868 min_version = 0
1869 if extra:
1870 # Append a ZIP64 field to the extra's
1871 extra_data = _strip_extra(extra_data, (1,))
1872 extra_data = struct.pack(
1873 '<HH' + 'Q'*len(extra),
1874 1, 8*len(extra), *extra) + extra_data
1875
1876 min_version = ZIP64_VERSION
1877
1878 if zinfo.compress_type == ZIP_BZIP2:
1879 min_version = max(BZIP2_VERSION, min_version)
1880 elif zinfo.compress_type == ZIP_LZMA:
1881 min_version = max(LZMA_VERSION, min_version)
1882
1883 extract_version = max(min_version, zinfo.extract_version)
1884 create_version = max(min_version, zinfo.create_version)
1885 try:
1886 filename, flag_bits = zinfo._encodeFilenameFlags()
1887 centdir = struct.pack(structCentralDir,
1888 stringCentralDir, create_version,
1889 zinfo.create_system, extract_version, zinfo.reserved,
1890 flag_bits, zinfo.compress_type, dostime, dosdate,
1891 zinfo.CRC, compress_size, file_size,
1892 len(filename), len(extra_data), len(zinfo.comment),
1893 0, zinfo.internal_attr, zinfo.external_attr,
1894 header_offset)
1895 except DeprecationWarning:
1896 print((structCentralDir, stringCentralDir, create_version,
1897 zinfo.create_system, extract_version, zinfo.reserved,
1898 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1899 zinfo.CRC, compress_size, file_size,
1900 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1901 0, zinfo.internal_attr, zinfo.external_attr,
1902 header_offset), file=sys.stderr)
1903 raise
1904 self.fp.write(centdir)
1905 self.fp.write(filename)
1906 self.fp.write(extra_data)
1907 self.fp.write(zinfo.comment)
1908
1909 pos2 = self.fp.tell()
1910 # Write end-of-zip-archive record
1911 centDirCount = len(self.filelist)
1912 centDirSize = pos2 - self.start_dir
1913 centDirOffset = self.start_dir
1914 requires_zip64 = None
1915 if centDirCount > ZIP_FILECOUNT_LIMIT:
1916 requires_zip64 = "Files count"
1917 elif centDirOffset > ZIP64_LIMIT:
1918 requires_zip64 = "Central directory offset"
1919 elif centDirSize > ZIP64_LIMIT:
1920 requires_zip64 = "Central directory size"
1921 if requires_zip64:
1922 # Need to write the ZIP64 end-of-archive records
1923 if not self._allowZip64:
1924 raise LargeZipFile(requires_zip64 +
1925 " would require ZIP64 extensions")
1926 zip64endrec = struct.pack(
1927 structEndArchive64, stringEndArchive64,
1928 44, 45, 45, 0, 0, centDirCount, centDirCount,
1929 centDirSize, centDirOffset)
1930 self.fp.write(zip64endrec)
1931
1932 zip64locrec = struct.pack(
1933 structEndArchive64Locator,
1934 stringEndArchive64Locator, 0, pos2, 1)
1935 self.fp.write(zip64locrec)
1936 centDirCount = min(centDirCount, 0xFFFF)
1937 centDirSize = min(centDirSize, 0xFFFFFFFF)
1938 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1939
1940 endrec = struct.pack(structEndArchive, stringEndArchive,
1941 0, 0, centDirCount, centDirCount,
1942 centDirSize, centDirOffset, len(self._comment))
1943 self.fp.write(endrec)
1944 self.fp.write(self._comment)
1945 if self.mode == "a":
1946 self.fp.truncate()
1947 self.fp.flush()
1948
1949 def _fpclose(self, fp):
1950 assert self._fileRefCnt > 0
1951 self._fileRefCnt -= 1
1952 if not self._fileRefCnt and not self._filePassed:
1953 fp.close()
1954
1955
1956class PyZipFile(ZipFile):
1957 """Class to create ZIP archives with Python library files and packages."""
1958
1959 def __init__(self, file, mode="r", compression=ZIP_STORED,
1960 allowZip64=True, optimize=-1):
1961 ZipFile.__init__(self, file, mode=mode, compression=compression,
1962 allowZip64=allowZip64)
1963 self._optimize = optimize
1964
1965 def writepy(self, pathname, basename="", filterfunc=None):
1966 """Add all files from "pathname" to the ZIP archive.
1967
1968 If pathname is a package directory, search the directory and
1969 all package subdirectories recursively for all *.py and enter
1970 the modules into the archive. If pathname is a plain
1971 directory, listdir *.py and enter all modules. Else, pathname
1972 must be a Python *.py file and the module will be put into the
1973 archive. Added modules are always module.pyc.
1974 This method will compile the module.py into module.pyc if
1975 necessary.
1976 If filterfunc(pathname) is given, it is called with every argument.
1977 When it is False, the file or directory is skipped.
1978 """
1979 pathname = os.fspath(pathname)
1980 if filterfunc and not filterfunc(pathname):
1981 if self.debug:
1982 label = 'path' if os.path.isdir(pathname) else 'file'
1983 print('%s %r skipped by filterfunc' % (label, pathname))
1984 return
1985 dir, name = os.path.split(pathname)
1986 if os.path.isdir(pathname):
1987 initname = os.path.join(pathname, "__init__.py")
1988 if os.path.isfile(initname):
1989 # This is a package directory, add it
1990 if basename:
1991 basename = "%s/%s" % (basename, name)
1992 else:
1993 basename = name
1994 if self.debug:
1995 print("Adding package in", pathname, "as", basename)
1996 fname, arcname = self._get_codename(initname[0:-3], basename)
1997 if self.debug:
1998 print("Adding", arcname)
1999 self.write(fname, arcname)
2000 dirlist = sorted(os.listdir(pathname))
2001 dirlist.remove("__init__.py")
2002 # Add all *.py files and package subdirectories
2003 for filename in dirlist:
2004 path = os.path.join(pathname, filename)
2005 root, ext = os.path.splitext(filename)
2006 if os.path.isdir(path):
2007 if os.path.isfile(os.path.join(path, "__init__.py")):
2008 # This is a package directory, add it
2009 self.writepy(path, basename,
2010 filterfunc=filterfunc) # Recursive call
2011 elif ext == ".py":
2012 if filterfunc and not filterfunc(path):
2013 if self.debug:
2014 print('file %r skipped by filterfunc' % path)
2015 continue
2016 fname, arcname = self._get_codename(path[0:-3],
2017 basename)
2018 if self.debug:
2019 print("Adding", arcname)
2020 self.write(fname, arcname)
2021 else:
2022 # This is NOT a package directory, add its files at top level
2023 if self.debug:
2024 print("Adding files from directory", pathname)
2025 for filename in sorted(os.listdir(pathname)):
2026 path = os.path.join(pathname, filename)
2027 root, ext = os.path.splitext(filename)
2028 if ext == ".py":
2029 if filterfunc and not filterfunc(path):
2030 if self.debug:
2031 print('file %r skipped by filterfunc' % path)
2032 continue
2033 fname, arcname = self._get_codename(path[0:-3],
2034 basename)
2035 if self.debug:
2036 print("Adding", arcname)
2037 self.write(fname, arcname)
2038 else:
2039 if pathname[-3:] != ".py":
2040 raise RuntimeError(
2041 'Files added with writepy() must end with ".py"')
2042 fname, arcname = self._get_codename(pathname[0:-3], basename)
2043 if self.debug:
2044 print("Adding file", arcname)
2045 self.write(fname, arcname)
2046
2047 def _get_codename(self, pathname, basename):
2048 """Return (filename, archivename) for the path.
2049
2050 Given a module name path, return the correct file path and
2051 archive name, compiling if necessary. For example, given
2052 /python/lib/string, return (/python/lib/string.pyc, string).
2053 """
2054 def _compile(file, optimize=-1):
2055 import py_compile
2056 if self.debug:
2057 print("Compiling", file)
2058 try:
2059 py_compile.compile(file, doraise=True, optimize=optimize)
2060 except py_compile.PyCompileError as err:
2061 print(err.msg)
2062 return False
2063 return True
2064
2065 file_py = pathname + ".py"
2066 file_pyc = pathname + ".pyc"
2067 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2068 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2069 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2070 if self._optimize == -1:
2071 # legacy mode: use whatever file is present
2072 if (os.path.isfile(file_pyc) and
2073 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2074 # Use .pyc file.
2075 arcname = fname = file_pyc
2076 elif (os.path.isfile(pycache_opt0) and
2077 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2078 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2079 # file name in the archive.
2080 fname = pycache_opt0
2081 arcname = file_pyc
2082 elif (os.path.isfile(pycache_opt1) and
2083 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2084 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2085 # file name in the archive.
2086 fname = pycache_opt1
2087 arcname = file_pyc
2088 elif (os.path.isfile(pycache_opt2) and
2089 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2090 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2091 # file name in the archive.
2092 fname = pycache_opt2
2093 arcname = file_pyc
2094 else:
2095 # Compile py into PEP 3147 pyc file.
2096 if _compile(file_py):
2097 if sys.flags.optimize == 0:
2098 fname = pycache_opt0
2099 elif sys.flags.optimize == 1:
2100 fname = pycache_opt1
2101 else:
2102 fname = pycache_opt2
2103 arcname = file_pyc
2104 else:
2105 fname = arcname = file_py
2106 else:
2107 # new mode: use given optimization level
2108 if self._optimize == 0:
2109 fname = pycache_opt0
2110 arcname = file_pyc
2111 else:
2112 arcname = file_pyc
2113 if self._optimize == 1:
2114 fname = pycache_opt1
2115 elif self._optimize == 2:
2116 fname = pycache_opt2
2117 else:
2118 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2119 raise ValueError(msg)
2120 if not (os.path.isfile(fname) and
2121 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2122 if not _compile(file_py, optimize=self._optimize):
2123 fname = arcname = file_py
2124 archivename = os.path.split(arcname)[1]
2125 if basename:
2126 archivename = "%s/%s" % (basename, archivename)
2127 return (fname, archivename)
2128
2129
2130def _parents(path):
2131 """
2132 Given a path with elements separated by
2133 posixpath.sep, generate all parents of that path.
2134
2135 >>> list(_parents('b/d'))
2136 ['b']
2137 >>> list(_parents('/b/d/'))
2138 ['/b']
2139 >>> list(_parents('b/d/f/'))
2140 ['b/d', 'b']
2141 >>> list(_parents('b'))
2142 []
2143 >>> list(_parents(''))
2144 []
2145 """
2146 return itertools.islice(_ancestry(path), 1, None)
2147
2148
2149def _ancestry(path):
2150 """
2151 Given a path with elements separated by
2152 posixpath.sep, generate all elements of that path
2153
2154 >>> list(_ancestry('b/d'))
2155 ['b/d', 'b']
2156 >>> list(_ancestry('/b/d/'))
2157 ['/b/d', '/b']
2158 >>> list(_ancestry('b/d/f/'))
2159 ['b/d/f', 'b/d', 'b']
2160 >>> list(_ancestry('b'))
2161 ['b']
2162 >>> list(_ancestry(''))
2163 []
2164 """
2165 path = path.rstrip(posixpath.sep)
2166 while path and path != posixpath.sep:
2167 yield path
2168 path, tail = posixpath.split(path)
2169
2170
2171_dedupe = dict.fromkeys
2172"""Deduplicate an iterable in original order"""
2173
2174
2175def _difference(minuend, subtrahend):
2176 """
2177 Return items in minuend not in subtrahend, retaining order
2178 with O(1) lookup.
2179 """
2180 return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2181
2182
2183class CompleteDirs(ZipFile):
2184 """
2185 A ZipFile subclass that ensures that implied directories
2186 are always included in the namelist.
2187 """
2188
2189 @staticmethod
2190 def _implied_dirs(names):
2191 parents = itertools.chain.from_iterable(map(_parents, names))
2192 as_dirs = (p + posixpath.sep for p in parents)
2193 return _dedupe(_difference(as_dirs, names))
2194
2195 def namelist(self):
2196 names = super(CompleteDirs, self).namelist()
2197 return names + list(self._implied_dirs(names))
2198
2199 def _name_set(self):
2200 return set(self.namelist())
2201
2202 def resolve_dir(self, name):
2203 """
2204 If the name represents a directory, return that name
2205 as a directory (with the trailing slash).
2206 """
2207 names = self._name_set()
2208 dirname = name + '/'
2209 dir_match = name not in names and dirname in names
2210 return dirname if dir_match else name
2211
2212 @classmethod
2213 def make(cls, source):
2214 """
2215 Given a source (filename or zipfile), return an
2216 appropriate CompleteDirs subclass.
2217 """
2218 if isinstance(source, CompleteDirs):
2219 return source
2220
2221 if not isinstance(source, ZipFile):
2222 return cls(source)
2223
2224 # Only allow for FastPath when supplied zipfile is read-only
2225 if 'r' not in source.mode:
2226 cls = CompleteDirs
2227
2228 res = cls.__new__(cls)
2229 vars(res).update(vars(source))
2230 return res
2231
2232
2233class FastLookup(CompleteDirs):
2234 """
2235 ZipFile subclass to ensure implicit
2236 dirs exist and are resolved rapidly.
2237 """
2238 def namelist(self):
2239 with contextlib.suppress(AttributeError):
2240 return self.__names
2241 self.__names = super(FastLookup, self).namelist()
2242 return self.__names
2243
2244 def _name_set(self):
2245 with contextlib.suppress(AttributeError):
2246 return self.__lookup
2247 self.__lookup = super(FastLookup, self)._name_set()
2248 return self.__lookup
2249
2250
2251class Path:
2252 """
2253 A pathlib-compatible interface for zip files.
2254
2255 Consider a zip file with this structure::
2256
2257 .
2258 ├── a.txt
2259 └── b
2260 ├── c.txt
2261 └── d
2262 └── e.txt
2263
2264 >>> data = io.BytesIO()
2265 >>> zf = ZipFile(data, 'w')
2266 >>> zf.writestr('a.txt', 'content of a')
2267 >>> zf.writestr('b/c.txt', 'content of c')
2268 >>> zf.writestr('b/d/e.txt', 'content of e')
2269 >>> zf.filename = 'abcde.zip'
2270
2271 Path accepts the zipfile object itself or a filename
2272
2273 >>> root = Path(zf)
2274
2275 From there, several path operations are available.
2276
2277 Directory iteration (including the zip file itself):
2278
2279 >>> a, b = root.iterdir()
2280 >>> a
2281 Path('abcde.zip', 'a.txt')
2282 >>> b
2283 Path('abcde.zip', 'b/')
2284
2285 name property:
2286
2287 >>> b.name
2288 'b'
2289
2290 join with divide operator:
2291
2292 >>> c = b / 'c.txt'
2293 >>> c
2294 Path('abcde.zip', 'b/c.txt')
2295 >>> c.name
2296 'c.txt'
2297
2298 Read text:
2299
2300 >>> c.read_text()
2301 'content of c'
2302
2303 existence:
2304
2305 >>> c.exists()
2306 True
2307 >>> (b / 'missing.txt').exists()
2308 False
2309
2310 Coercion to string:
2311
2312 >>> str(c)
2313 'abcde.zip/b/c.txt'
2314 """
2315
2316 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2317
2318 def __init__(self, root, at=""):
2319 self.root = FastLookup.make(root)
2320 self.at = at
2321
2322 @property
2323 def open(self):
2324 return functools.partial(self.root.open, self.at)
2325
2326 @property
2327 def name(self):
2328 return posixpath.basename(self.at.rstrip("/"))
2329
2330 def read_text(self, *args, **kwargs):
2331 with self.open() as strm:
2332 return io.TextIOWrapper(strm, *args, **kwargs).read()
2333
2334 def read_bytes(self):
2335 with self.open() as strm:
2336 return strm.read()
2337
2338 def _is_child(self, path):
2339 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2340
2341 def _next(self, at):
2342 return Path(self.root, at)
2343
2344 def is_dir(self):
2345 return not self.at or self.at.endswith("/")
2346
2347 def is_file(self):
2348 return not self.is_dir()
2349
2350 def exists(self):
2351 return self.at in self.root._name_set()
2352
2353 def iterdir(self):
2354 if not self.is_dir():
2355 raise ValueError("Can't listdir a file")
2356 subs = map(self._next, self.root.namelist())
2357 return filter(self._is_child, subs)
2358
2359 def __str__(self):
2360 return posixpath.join(self.root.filename, self.at)
2361
2362 def __repr__(self):
2363 return self.__repr.format(self=self)
2364
2365 def joinpath(self, add):
2366 next = posixpath.join(self.at, add)
2367 return self._next(self.root.resolve_dir(next))
2368
2369 __truediv__ = joinpath
2370
2371 @property
2372 def parent(self):
2373 parent_at = posixpath.dirname(self.at.rstrip('/'))
2374 if parent_at:
2375 parent_at += '/'
2376 return self._next(parent_at)
2377
2378
2379def main(args=None):
2380 import argparse
2381
2382 description = 'A simple command-line interface for zipfile module.'
2383 parser = argparse.ArgumentParser(description=description)
2384 group = parser.add_mutually_exclusive_group(required=True)
2385 group.add_argument('-l', '--list', metavar='<zipfile>',
2386 help='Show listing of a zipfile')
2387 group.add_argument('-e', '--extract', nargs=2,
2388 metavar=('<zipfile>', '<output_dir>'),
2389 help='Extract zipfile into target dir')
2390 group.add_argument('-c', '--create', nargs='+',
2391 metavar=('<name>', '<file>'),
2392 help='Create zipfile from sources')
2393 group.add_argument('-t', '--test', metavar='<zipfile>',
2394 help='Test if a zipfile is valid')
2395 args = parser.parse_args(args)
2396
2397 if args.test is not None:
2398 src = args.test
2399 with ZipFile(src, 'r') as zf:
2400 badfile = zf.testzip()
2401 if badfile:
2402 print("The following enclosed file is corrupted: {!r}".format(badfile))
2403 print("Done testing")
2404
2405 elif args.list is not None:
2406 src = args.list
2407 with ZipFile(src, 'r') as zf:
2408 zf.printdir()
2409
2410 elif args.extract is not None:
2411 src, curdir = args.extract
2412 with ZipFile(src, 'r') as zf:
2413 zf.extractall(curdir)
2414
2415 elif args.create is not None:
2416 zip_name = args.create.pop(0)
2417 files = args.create
2418
2419 def addToZip(zf, path, zippath):
2420 if os.path.isfile(path):
2421 zf.write(path, zippath, ZIP_DEFLATED)
2422 elif os.path.isdir(path):
2423 if zippath:
2424 zf.write(path, zippath)
2425 for nm in sorted(os.listdir(path)):
2426 addToZip(zf,
2427 os.path.join(path, nm), os.path.join(zippath, nm))
2428 # else: ignore
2429
2430 with ZipFile(zip_name, 'w') as zf:
2431 for path in files:
2432 zippath = os.path.basename(path)
2433 if not zippath:
2434 zippath = os.path.basename(os.path.dirname(path))
2435 if zippath in ('', os.curdir, os.pardir):
2436 zippath = ''
2437 addToZip(zf, path, zippath)
2438
2439
2440if __name__ == "__main__":
2441 main()