Skip to content

bpo-44173: better approach for seeking in non-compressed ZipExtFile #26227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions Lib/zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -833,10 +833,10 @@ def __init__(self, fileobj, mode, zipinfo, pwd=None,
self.name = zipinfo.filename

if hasattr(zipinfo, 'CRC'):
self._expected_crc = zipinfo.CRC
self._expected_crc = self._orig_crc = zipinfo.CRC
self._running_crc = crc32(b'')
else:
self._expected_crc = None
self._expected_crc = self._orig_crc = None

self._seekable = False
try:
Expand Down Expand Up @@ -1067,17 +1067,17 @@ def seekable(self):
raise ValueError("I/O operation on closed file.")
return self._seekable

def seek(self, offset, whence=0):
def seek(self, offset, whence=os.SEEK_SET):
if self.closed:
raise ValueError("seek on closed file.")
if not self._seekable:
raise io.UnsupportedOperation("underlying stream is not seekable")
curr_pos = self.tell()
if whence == 0: # Seek from start of file
if whence == os.SEEK_SET:
new_pos = offset
elif whence == 1: # Seek from current position
elif whence == os.SEEK_CUR:
new_pos = curr_pos + offset
elif whence == 2: # Seek from EOF
elif whence == os.SEEK_END:
new_pos = self._orig_file_size + offset
else:
raise ValueError("whence must be os.SEEK_SET (0), "
Expand All @@ -1100,6 +1100,7 @@ def seek(self, offset, whence=0):
# Position is before the current position. Reset the ZipExtFile
self._fileobj.seek(self._orig_compress_start)
self._running_crc = self._orig_start_crc
self._expected_crc = self._orig_crc
self._compress_left = self._orig_compress_size
self._left = self._orig_file_size
self._readbuffer = b''
Expand All @@ -1110,6 +1111,14 @@ def seek(self, offset, whence=0):
if self._decrypter is not None:
self._init_decrypter()

if read_offset > 0 and self._compress_type == ZIP_STORED:
# disable CRC checking after first seeking - it would be invalid
self._expected_crc = None

self._fileobj.seek(read_offset, os.SEEK_CUR)
self._left -= read_offset
self._offset = read_offset = 0

while read_offset > 0:
read_len = min(self.MAX_SEEK_READ, read_offset)
self.read(read_len)
Expand Down