Skip to content

Commit 330f1d5

Browse files
authored
gh-88339: enable fast seeking of uncompressed unencrypted zipfile.ZipExtFile (GH-27737)
Avoid reading all of the intermediate data in uncompressed items in a zip file when the user seeks forward. Contributed by: @JuniorJPDJ
1 parent 56af5a2 commit 330f1d5

File tree

3 files changed

+22
-5
lines changed

3 files changed

+22
-5
lines changed

Lib/test/test_zipfile.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2032,6 +2032,7 @@ def test_seek_tell(self):
20322032
fp.seek(bloc, os.SEEK_CUR)
20332033
self.assertEqual(fp.tell(), bloc)
20342034
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
2035+
self.assertEqual(fp.tell(), bloc + 5)
20352036
fp.seek(0, os.SEEK_END)
20362037
self.assertEqual(fp.tell(), len(txt))
20372038
fp.seek(0, os.SEEK_SET)
@@ -2049,6 +2050,7 @@ def test_seek_tell(self):
20492050
fp.seek(bloc, os.SEEK_CUR)
20502051
self.assertEqual(fp.tell(), bloc)
20512052
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
2053+
self.assertEqual(fp.tell(), bloc + 5)
20522054
fp.seek(0, os.SEEK_END)
20532055
self.assertEqual(fp.tell(), len(txt))
20542056
fp.seek(0, os.SEEK_SET)

Lib/zipfile.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -847,6 +847,7 @@ def __init__(self, fileobj, mode, zipinfo, pwd=None,
847847
self._orig_compress_size = zipinfo.compress_size
848848
self._orig_file_size = zipinfo.file_size
849849
self._orig_start_crc = self._running_crc
850+
self._orig_crc = self._expected_crc
850851
self._seekable = True
851852
except AttributeError:
852853
pass
@@ -1069,17 +1070,17 @@ def seekable(self):
10691070
raise ValueError("I/O operation on closed file.")
10701071
return self._seekable
10711072

1072-
def seek(self, offset, whence=0):
1073+
def seek(self, offset, whence=os.SEEK_SET):
10731074
if self.closed:
10741075
raise ValueError("seek on closed file.")
10751076
if not self._seekable:
10761077
raise io.UnsupportedOperation("underlying stream is not seekable")
10771078
curr_pos = self.tell()
1078-
if whence == 0: # Seek from start of file
1079+
if whence == os.SEEK_SET:
10791080
new_pos = offset
1080-
elif whence == 1: # Seek from current position
1081+
elif whence == os.SEEK_CUR:
10811082
new_pos = curr_pos + offset
1082-
elif whence == 2: # Seek from EOF
1083+
elif whence == os.SEEK_END:
10831084
new_pos = self._orig_file_size + offset
10841085
else:
10851086
raise ValueError("whence must be os.SEEK_SET (0), "
@@ -1094,14 +1095,27 @@ def seek(self, offset, whence=0):
10941095
read_offset = new_pos - curr_pos
10951096
buff_offset = read_offset + self._offset
10961097

1097-
if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1098+
# Fast seek uncompressed unencrypted file
1099+
if self._compress_type == ZIP_STORED and self._decrypter is None and read_offset > 0:
1100+
# disable CRC checking after first seeking - it would be invalid
1101+
self._expected_crc = None
1102+
# seek actual file taking already buffered data into account
1103+
read_offset -= len(self._readbuffer) - self._offset
1104+
self._fileobj.seek(read_offset, os.SEEK_CUR)
1105+
self._left -= read_offset
1106+
read_offset = 0
1107+
# flush read buffer
1108+
self._readbuffer = b''
1109+
self._offset = 0
1110+
elif buff_offset >= 0 and buff_offset < len(self._readbuffer):
10981111
# Just move the _offset index if the new position is in the _readbuffer
10991112
self._offset = buff_offset
11001113
read_offset = 0
11011114
elif read_offset < 0:
11021115
# Position is before the current position. Reset the ZipExtFile
11031116
self._fileobj.seek(self._orig_compress_start)
11041117
self._running_crc = self._orig_start_crc
1118+
self._expected_crc = self._orig_crc
11051119
self._compress_left = self._orig_compress_size
11061120
self._left = self._orig_file_size
11071121
self._readbuffer = b''
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Enable fast seeking of uncompressed unencrypted :class:`zipfile.ZipExtFile`

0 commit comments

Comments
 (0)