Skip to content

Commit 12c171f

Browse files
authored
Use the data_filter when extracting tarballs, if it's available. (#12214)
Previous behaviour is used on Python without PEP-720 tarfile filters. (Note that the feature is now in security releases of all supported versions.) A custom filter (which wraps `data_filter`) is used to retain pip-specific behaviour: - Removing a common leading directory - Setting the mode (Unix permissions) Compared to the previous behaviour, if a file can't be unpacked, the unpacking operation will fail with `InstallError`, rather than skipping the individual file with a `logger.warning`. This means that "some corrupt tar files" now can't be unpacked. Note that PEP 721 limits itself to sdists, this change affects unpacking any other tar file.
1 parent 71a08a7 commit 12c171f

File tree

3 files changed

+148
-51
lines changed

3 files changed

+148
-51
lines changed

news/12111.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
PEP 721: Use the ``data_filter`` when extracting tarballs, if it's available.

src/pip/_internal/utils/unpacking.py

Lines changed: 120 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
import shutil
77
import stat
8+
import sys
89
import tarfile
910
import zipfile
1011
from typing import Iterable, List, Optional
@@ -85,12 +86,16 @@ def is_within_directory(directory: str, target: str) -> bool:
8586
return prefix == abs_directory
8687

8788

89+
def _get_default_mode_plus_executable() -> int:
90+
return 0o777 & ~current_umask() | 0o111
91+
92+
8893
def set_extracted_file_to_default_mode_plus_executable(path: str) -> None:
8994
"""
9095
Make file present at path have execute for user/group/world
9196
(chmod +x) is no-op on windows per python docs
9297
"""
93-
os.chmod(path, (0o777 & ~current_umask() | 0o111))
98+
os.chmod(path, _get_default_mode_plus_executable())
9499

95100

96101
def zip_item_is_executable(info: ZipInfo) -> bool:
@@ -151,8 +156,8 @@ def untar_file(filename: str, location: str) -> None:
151156
Untar the file (with path `filename`) to the destination `location`.
152157
All files are written based on system defaults and umask (i.e. permissions
153158
are not preserved), except that regular file members with any execute
154-
permissions (user, group, or world) have "chmod +x" applied after being
155-
written. Note that for windows, any execute changes using os.chmod are
159+
permissions (user, group, or world) have "chmod +x" applied on top of the
160+
default. Note that for windows, any execute changes using os.chmod are
156161
no-ops per the python docs.
157162
"""
158163
ensure_dir(location)
@@ -170,62 +175,127 @@ def untar_file(filename: str, location: str) -> None:
170175
filename,
171176
)
172177
mode = "r:*"
178+
173179
tar = tarfile.open(filename, mode, encoding="utf-8")
174180
try:
175181
leading = has_leading_dir([member.name for member in tar.getmembers()])
176-
for member in tar.getmembers():
177-
fn = member.name
178-
if leading:
179-
fn = split_leading_dir(fn)[1]
180-
path = os.path.join(location, fn)
181-
if not is_within_directory(location, path):
182-
message = (
183-
"The tar file ({}) has a file ({}) trying to install "
184-
"outside target directory ({})"
185-
)
186-
raise InstallationError(message.format(filename, path, location))
187-
if member.isdir():
188-
ensure_dir(path)
189-
elif member.issym():
190-
try:
191-
tar._extract_member(member, path)
192-
except Exception as exc:
193-
# Some corrupt tar files seem to produce this
194-
# (specifically bad symlinks)
195-
logger.warning(
196-
"In the tar file %s the member %s is invalid: %s",
197-
filename,
198-
member.name,
199-
exc,
200-
)
201-
continue
202-
else:
182+
183+
# PEP 706 added `tarfile.data_filter`, and made some other changes to
184+
# Python's tarfile module (see below). The features were backported to
185+
# security releases.
186+
try:
187+
data_filter = tarfile.data_filter
188+
except AttributeError:
189+
_untar_without_filter(filename, location, tar, leading)
190+
else:
191+
default_mode_plus_executable = _get_default_mode_plus_executable()
192+
193+
def pip_filter(member: tarfile.TarInfo, path: str) -> tarfile.TarInfo:
194+
if leading:
195+
member.name = split_leading_dir(member.name)[1]
196+
orig_mode = member.mode
203197
try:
204-
fp = tar.extractfile(member)
205-
except (KeyError, AttributeError) as exc:
206-
# Some corrupt tar files seem to produce this
207-
# (specifically bad symlinks)
208-
logger.warning(
209-
"In the tar file %s the member %s is invalid: %s",
210-
filename,
211-
member.name,
212-
exc,
198+
try:
199+
member = data_filter(member, location)
200+
except tarfile.LinkOutsideDestinationError:
201+
if sys.version_info[:3] in {
202+
(3, 8, 17),
203+
(3, 9, 17),
204+
(3, 10, 12),
205+
(3, 11, 4),
206+
}:
207+
# The tarfile filter in specific Python versions
208+
# raises LinkOutsideDestinationError on valid input
209+
# (https://github.com/python/cpython/issues/107845)
210+
# Ignore the error there, but do use the
211+
# more lax `tar_filter`
212+
member = tarfile.tar_filter(member, location)
213+
else:
214+
raise
215+
except tarfile.TarError as exc:
216+
message = "Invalid member in the tar file {}: {}"
217+
# Filter error messages mention the member name.
218+
# No need to add it here.
219+
raise InstallationError(
220+
message.format(
221+
filename,
222+
exc,
223+
)
213224
)
214-
continue
215-
ensure_dir(os.path.dirname(path))
216-
assert fp is not None
217-
with open(path, "wb") as destfp:
218-
shutil.copyfileobj(fp, destfp)
219-
fp.close()
220-
# Update the timestamp (useful for cython compiled files)
221-
tar.utime(member, path)
222-
# member have any execute permissions for user/group/world?
223-
if member.mode & 0o111:
224-
set_extracted_file_to_default_mode_plus_executable(path)
225+
if member.isfile() and orig_mode & 0o111:
226+
member.mode = default_mode_plus_executable
227+
else:
228+
# See PEP 706 note above.
229+
# The PEP changed this from `int` to `Optional[int]`,
230+
# where None means "use the default". Mypy doesn't
231+
# know this yet.
232+
member.mode = None # type: ignore [assignment]
233+
return member
234+
235+
tar.extractall(location, filter=pip_filter)
236+
225237
finally:
226238
tar.close()
227239

228240

241+
def _untar_without_filter(
242+
filename: str,
243+
location: str,
244+
tar: tarfile.TarFile,
245+
leading: bool,
246+
) -> None:
247+
"""Fallback for Python without tarfile.data_filter"""
248+
for member in tar.getmembers():
249+
fn = member.name
250+
if leading:
251+
fn = split_leading_dir(fn)[1]
252+
path = os.path.join(location, fn)
253+
if not is_within_directory(location, path):
254+
message = (
255+
"The tar file ({}) has a file ({}) trying to install "
256+
"outside target directory ({})"
257+
)
258+
raise InstallationError(message.format(filename, path, location))
259+
if member.isdir():
260+
ensure_dir(path)
261+
elif member.issym():
262+
try:
263+
tar._extract_member(member, path)
264+
except Exception as exc:
265+
# Some corrupt tar files seem to produce this
266+
# (specifically bad symlinks)
267+
logger.warning(
268+
"In the tar file %s the member %s is invalid: %s",
269+
filename,
270+
member.name,
271+
exc,
272+
)
273+
continue
274+
else:
275+
try:
276+
fp = tar.extractfile(member)
277+
except (KeyError, AttributeError) as exc:
278+
# Some corrupt tar files seem to produce this
279+
# (specifically bad symlinks)
280+
logger.warning(
281+
"In the tar file %s the member %s is invalid: %s",
282+
filename,
283+
member.name,
284+
exc,
285+
)
286+
continue
287+
ensure_dir(os.path.dirname(path))
288+
assert fp is not None
289+
with open(path, "wb") as destfp:
290+
shutil.copyfileobj(fp, destfp)
291+
fp.close()
292+
# Update the timestamp (useful for cython compiled files)
293+
tar.utime(member, path)
294+
# member have any execute permissions for user/group/world?
295+
if member.mode & 0o111:
296+
set_extracted_file_to_default_mode_plus_executable(path)
297+
298+
229299
def unpack_file(
230300
filename: str,
231301
location: str,

tests/unit/test_utils_unpacking.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,13 @@ def test_unpack_tar_failure(self) -> None:
155155
test_tar = self.make_tar_file("test_tar.tar", files)
156156
with pytest.raises(InstallationError) as e:
157157
untar_file(test_tar, self.tempdir)
158-
assert "trying to install outside target directory" in str(e.value)
158+
159+
# The error message comes from tarfile.data_filter when it is available,
160+
# otherwise from pip's own check.
161+
if hasattr(tarfile, "data_filter"):
162+
assert "is outside the destination" in str(e.value)
163+
else:
164+
assert "trying to install outside target directory" in str(e.value)
159165

160166
def test_unpack_tar_success(self) -> None:
161167
"""
@@ -171,6 +177,26 @@ def test_unpack_tar_success(self) -> None:
171177
test_tar = self.make_tar_file("test_tar.tar", files)
172178
untar_file(test_tar, self.tempdir)
173179

180+
@pytest.mark.skipif(
181+
not hasattr(tarfile, "data_filter"),
182+
reason="tarfile filters (PEP-721) not available",
183+
)
184+
def test_unpack_tar_filter(self) -> None:
185+
"""
186+
Test that the tarfile.data_filter is used to disallow dangerous
187+
behaviour (PEP-721)
188+
"""
189+
test_tar = os.path.join(self.tempdir, "test_tar_filter.tar")
190+
with tarfile.open(test_tar, "w") as mytar:
191+
file_tarinfo = tarfile.TarInfo("bad-link")
192+
file_tarinfo.type = tarfile.SYMTYPE
193+
file_tarinfo.linkname = "../../../../pwn"
194+
mytar.addfile(file_tarinfo, io.BytesIO(b""))
195+
with pytest.raises(InstallationError) as e:
196+
untar_file(test_tar, self.tempdir)
197+
198+
assert "is outside the destination" in str(e.value)
199+
174200

175201
def test_unpack_tar_unicode(tmpdir: Path) -> None:
176202
test_tar = tmpdir / "test.tar"

0 commit comments

Comments
 (0)