From 455e3c8aea76c8e6aaa873df7cc210f6d6c5e719 Mon Sep 17 00:00:00 2001 From: JuniorJPDJ Date: Mon, 31 Aug 2020 05:30:22 +0200 Subject: [PATCH] better approach for seek in non-compressed ZipExtFile from zipfile --- Lib/zipfile.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 3efeecb13bd179..1d3577af9ee016 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -833,10 +833,10 @@ def __init__(self, fileobj, mode, zipinfo, pwd=None, self.name = zipinfo.filename if hasattr(zipinfo, 'CRC'): - self._expected_crc = zipinfo.CRC + self._expected_crc = self._orig_crc = zipinfo.CRC self._running_crc = crc32(b'') else: - self._expected_crc = None + self._expected_crc = self._orig_crc = None self._seekable = False try: @@ -1067,17 +1067,17 @@ def seekable(self): raise ValueError("I/O operation on closed file.") return self._seekable - def seek(self, offset, whence=0): + def seek(self, offset, whence=os.SEEK_SET): if self.closed: raise ValueError("seek on closed file.") if not self._seekable: raise io.UnsupportedOperation("underlying stream is not seekable") curr_pos = self.tell() - if whence == 0: # Seek from start of file + if whence == os.SEEK_SET: new_pos = offset - elif whence == 1: # Seek from current position + elif whence == os.SEEK_CUR: new_pos = curr_pos + offset - elif whence == 2: # Seek from EOF + elif whence == os.SEEK_END: new_pos = self._orig_file_size + offset else: raise ValueError("whence must be os.SEEK_SET (0), " @@ -1100,6 +1100,7 @@ def seek(self, offset, whence=0): # Position is before the current position. Reset the ZipExtFile self._fileobj.seek(self._orig_compress_start) self._running_crc = self._orig_start_crc + self._expected_crc = self._orig_crc self._compress_left = self._orig_compress_size self._left = self._orig_file_size self._readbuffer = b'' @@ -1110,6 +1111,14 @@ def seek(self, offset, whence=0): if self._decrypter is not None: self._init_decrypter() + if read_offset > 0 and self._compress_type == ZIP_STORED: + # disable CRC checking after first seeking - it would be invalid + self._expected_crc = None + + self._fileobj.seek(read_offset, os.SEEK_CUR) + self._left -= read_offset + self._offset = read_offset = 0 + while read_offset > 0: read_len = min(self.MAX_SEEK_READ, read_offset) self.read(read_len)