Skip to content

Commit 15f0863

Browse files
authored
Merge pull request #8223 from uranusjr/unicode-wheel
2 parents 9935311 + 0a31845 commit 15f0863

File tree

6 files changed

+158
-71
lines changed

6 files changed

+158
-71
lines changed

news/5712.bugfix

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Correctly treat wheels contenting non-ASCII file contents so they can be
2+
installed on Windows.

src/pip/_internal/operations/install/wheel.py

Lines changed: 99 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
"""Support for installing and building the "wheel" binary package format.
22
"""
33

4-
# The following comment should be removed at some point in the future.
5-
# mypy: strict-optional=False
6-
74
from __future__ import absolute_import
85

96
import collections
@@ -24,7 +21,14 @@
2421
from pip._vendor import pkg_resources
2522
from pip._vendor.distlib.scripts import ScriptMaker
2623
from pip._vendor.distlib.util import get_export_entry
27-
from pip._vendor.six import StringIO
24+
from pip._vendor.six import (
25+
PY2,
26+
StringIO,
27+
ensure_str,
28+
ensure_text,
29+
itervalues,
30+
text_type,
31+
)
2832

2933
from pip._internal.exceptions import InstallationError
3034
from pip._internal.locations import get_major_minor_version
@@ -43,28 +47,35 @@
4347
from pip._internal.utils.typing import cast
4448
else:
4549
from email.message import Message
46-
import typing # noqa F401
4750
from typing import (
48-
Dict, List, Optional, Sequence, Tuple, Any,
49-
Iterable, Iterator, Callable, Set, IO, cast
51+
Any,
52+
Callable,
53+
Dict,
54+
IO,
55+
Iterable,
56+
Iterator,
57+
List,
58+
NewType,
59+
Optional,
60+
Sequence,
61+
Set,
62+
Tuple,
63+
Union,
64+
cast,
5065
)
5166

5267
from pip._internal.models.scheme import Scheme
5368
from pip._internal.utils.filesystem import NamedTemporaryFileResult
5469

55-
InstalledCSVRow = Tuple[str, ...]
70+
RecordPath = NewType('RecordPath', text_type)
71+
InstalledCSVRow = Tuple[RecordPath, str, Union[int, str]]
5672

5773

5874
logger = logging.getLogger(__name__)
5975

6076

61-
def normpath(src, p):
62-
# type: (str, str) -> str
63-
return os.path.relpath(src, p).replace(os.path.sep, '/')
64-
65-
6677
def rehash(path, blocksize=1 << 20):
67-
# type: (str, int) -> Tuple[str, str]
78+
# type: (text_type, int) -> Tuple[str, str]
6879
"""Return (encoded_digest, length) for path using hashlib.sha256()"""
6980
h, length = hash_file(path, blocksize)
7081
digest = 'sha256=' + urlsafe_b64encode(
@@ -79,14 +90,14 @@ def csv_io_kwargs(mode):
7990
"""Return keyword arguments to properly open a CSV file
8091
in the given mode.
8192
"""
82-
if sys.version_info.major < 3:
93+
if PY2:
8394
return {'mode': '{}b'.format(mode)}
8495
else:
85-
return {'mode': mode, 'newline': ''}
96+
return {'mode': mode, 'newline': '', 'encoding': 'utf-8'}
8697

8798

8899
def fix_script(path):
89-
# type: (str) -> Optional[bool]
100+
# type: (text_type) -> Optional[bool]
90101
"""Replace #!python with #!/path/to/python
91102
Return True if file was changed.
92103
"""
@@ -217,9 +228,12 @@ def message_about_scripts_not_on_PATH(scripts):
217228
return "\n".join(msg_lines)
218229

219230

220-
def sorted_outrows(outrows):
221-
# type: (Iterable[InstalledCSVRow]) -> List[InstalledCSVRow]
222-
"""Return the given rows of a RECORD file in sorted order.
231+
def _normalized_outrows(outrows):
232+
# type: (Iterable[InstalledCSVRow]) -> List[Tuple[str, str, str]]
233+
"""Normalize the given rows of a RECORD file.
234+
235+
Items in each row are converted into str. Rows are then sorted to make
236+
the value more predictable for tests.
223237
224238
Each row is a 3-tuple (path, hash, size) and corresponds to a record of
225239
a RECORD file (see PEP 376 and PEP 427 for details). For the rows
@@ -234,13 +248,35 @@ def sorted_outrows(outrows):
234248
# coerce each element to a string to avoid a TypeError in this case.
235249
# For additional background, see--
236250
# https://github.com/pypa/pip/issues/5868
237-
return sorted(outrows, key=lambda row: tuple(str(x) for x in row))
251+
return sorted(
252+
(ensure_str(record_path, encoding='utf-8'), hash_, str(size))
253+
for record_path, hash_, size in outrows
254+
)
255+
256+
257+
def _record_to_fs_path(record_path):
258+
# type: (RecordPath) -> text_type
259+
return record_path
260+
261+
262+
def _fs_to_record_path(path, relative_to=None):
263+
# type: (text_type, Optional[text_type]) -> RecordPath
264+
if relative_to is not None:
265+
path = os.path.relpath(path, relative_to)
266+
path = path.replace(os.path.sep, '/')
267+
return cast('RecordPath', path)
268+
269+
270+
def _parse_record_path(record_column):
271+
# type: (str) -> RecordPath
272+
p = ensure_text(record_column, encoding='utf-8')
273+
return cast('RecordPath', p)
238274

239275

240276
def get_csv_rows_for_installed(
241277
old_csv_rows, # type: Iterable[List[str]]
242-
installed, # type: Dict[str, str]
243-
changed, # type: Set[str]
278+
installed, # type: Dict[RecordPath, RecordPath]
279+
changed, # type: Set[RecordPath]
244280
generated, # type: List[str]
245281
lib_dir, # type: str
246282
):
@@ -255,21 +291,20 @@ def get_csv_rows_for_installed(
255291
logger.warning(
256292
'RECORD line has more than three elements: {}'.format(row)
257293
)
258-
# Make a copy because we are mutating the row.
259-
row = list(row)
260-
old_path = row[0]
261-
new_path = installed.pop(old_path, old_path)
262-
row[0] = new_path
263-
if new_path in changed:
264-
digest, length = rehash(new_path)
265-
row[1] = digest
266-
row[2] = length
267-
installed_rows.append(tuple(row))
294+
old_record_path = _parse_record_path(row[0])
295+
new_record_path = installed.pop(old_record_path, old_record_path)
296+
if new_record_path in changed:
297+
digest, length = rehash(_record_to_fs_path(new_record_path))
298+
else:
299+
digest = row[1] if len(row) > 1 else ''
300+
length = row[2] if len(row) > 2 else ''
301+
installed_rows.append((new_record_path, digest, length))
268302
for f in generated:
303+
path = _fs_to_record_path(f, lib_dir)
269304
digest, length = rehash(f)
270-
installed_rows.append((normpath(f, lib_dir), digest, str(length)))
271-
for f in installed:
272-
installed_rows.append((installed[f], '', ''))
305+
installed_rows.append((path, digest, length))
306+
for installed_record_path in itervalues(installed):
307+
installed_rows.append((installed_record_path, '', ''))
273308
return installed_rows
274309

275310

@@ -338,8 +373,8 @@ def install_unpacked_wheel(
338373
# installed = files copied from the wheel to the destination
339374
# changed = files changed while installing (scripts #! line typically)
340375
# generated = files newly generated during the install (script wrappers)
341-
installed = {} # type: Dict[str, str]
342-
changed = set()
376+
installed = {} # type: Dict[RecordPath, RecordPath]
377+
changed = set() # type: Set[RecordPath]
343378
generated = [] # type: List[str]
344379

345380
# Compile all of the pyc files that we're going to be installing
@@ -351,20 +386,20 @@ def install_unpacked_wheel(
351386
logger.debug(stdout.getvalue())
352387

353388
def record_installed(srcfile, destfile, modified=False):
354-
# type: (str, str, bool) -> None
389+
# type: (text_type, text_type, bool) -> None
355390
"""Map archive RECORD paths to installation RECORD paths."""
356-
oldpath = normpath(srcfile, wheeldir)
357-
newpath = normpath(destfile, lib_dir)
391+
oldpath = _fs_to_record_path(srcfile, wheeldir)
392+
newpath = _fs_to_record_path(destfile, lib_dir)
358393
installed[oldpath] = newpath
359394
if modified:
360-
changed.add(destfile)
395+
changed.add(_fs_to_record_path(destfile))
361396

362397
def clobber(
363-
source, # type: str
364-
dest, # type: str
398+
source, # type: text_type
399+
dest, # type: text_type
365400
is_base, # type: bool
366-
fixer=None, # type: Optional[Callable[[str], Any]]
367-
filter=None # type: Optional[Callable[[str], bool]]
401+
fixer=None, # type: Optional[Callable[[text_type], Any]]
402+
filter=None # type: Optional[Callable[[text_type], bool]]
368403
):
369404
# type: (...) -> None
370405
ensure_dir(dest) # common for the 'include' path
@@ -423,7 +458,11 @@ def clobber(
423458
changed = fixer(destfile)
424459
record_installed(srcfile, destfile, changed)
425460

426-
clobber(source, lib_dir, True)
461+
clobber(
462+
ensure_text(source, encoding=sys.getfilesystemencoding()),
463+
ensure_text(lib_dir, encoding=sys.getfilesystemencoding()),
464+
True,
465+
)
427466

428467
dest_info_dir = os.path.join(lib_dir, info_dir)
429468

@@ -432,7 +471,7 @@ def clobber(
432471
console, gui = get_entrypoints(ep_file)
433472

434473
def is_entrypoint_wrapper(name):
435-
# type: (str) -> bool
474+
# type: (text_type) -> bool
436475
# EP, EP.exe and EP-script.py are scripts generated for
437476
# entry point EP by setuptools
438477
if name.lower().endswith('.exe'):
@@ -456,7 +495,13 @@ def is_entrypoint_wrapper(name):
456495
filter = is_entrypoint_wrapper
457496
source = os.path.join(wheeldir, datadir, subdir)
458497
dest = getattr(scheme, subdir)
459-
clobber(source, dest, False, fixer=fixer, filter=filter)
498+
clobber(
499+
ensure_text(source, encoding=sys.getfilesystemencoding()),
500+
ensure_text(dest, encoding=sys.getfilesystemencoding()),
501+
False,
502+
fixer=fixer,
503+
filter=filter,
504+
)
460505

461506
maker = PipScriptMaker(None, scheme.scripts)
462507

@@ -606,16 +651,11 @@ def _generate_file(path, **kwargs):
606651
generated=generated,
607652
lib_dir=lib_dir)
608653
with _generate_file(record_path, **csv_io_kwargs('w')) as record_file:
609-
610-
# The type mypy infers for record_file using reveal_type
611-
# is different for Python 3 (typing.IO[Any]) and
612-
# Python 2 (typing.BinaryIO), leading us to explicitly
613-
# cast to typing.IO[str] as a workaround
614-
# for bad Python 2 behaviour
615-
record_file_obj = cast('IO[str]', record_file)
616-
617-
writer = csv.writer(record_file_obj)
618-
writer.writerows(sorted_outrows(rows)) # sort to simplify testing
654+
# The type mypy infers for record_file is different for Python 3
655+
# (typing.IO[Any]) and Python 2 (typing.BinaryIO). We explicitly
656+
# cast to typing.IO[str] as a workaround.
657+
writer = csv.writer(cast('IO[str]', record_file))
658+
writer.writerows(_normalized_outrows(rows))
619659

620660

621661
def install_wheel(

src/pip/_internal/utils/misc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def get_prog():
131131
# Retry every half second for up to 3 seconds
132132
@retry(stop_max_delay=3000, wait_fixed=500)
133133
def rmtree(dir, ignore_errors=False):
134-
# type: (str, bool) -> None
134+
# type: (Text, bool) -> None
135135
shutil.rmtree(dir, ignore_errors=ignore_errors,
136136
onerror=rmtree_errorhandler)
137137

@@ -876,7 +876,7 @@ def is_console_interactive():
876876

877877

878878
def hash_file(path, blocksize=1 << 20):
879-
# type: (str, int) -> Tuple[Any, int]
879+
# type: (Text, int) -> Tuple[Any, int]
880880
"""Return (hash, length) for path using hashlib.sha256()
881881
"""
882882

src/pip/_internal/utils/temp_dir.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from contextlib import contextmanager
99

1010
from pip._vendor.contextlib2 import ExitStack
11+
from pip._vendor.six import ensure_text
1112

1213
from pip._internal.utils.misc import enum, rmtree
1314
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
@@ -193,7 +194,9 @@ def cleanup(self):
193194
"""
194195
self._deleted = True
195196
if os.path.exists(self._path):
196-
rmtree(self._path)
197+
# Make sure to pass unicode on Python 2 to make the contents also
198+
# use unicode, ensuring non-ASCII names and can be represented.
199+
rmtree(ensure_text(self._path))
197200

198201

199202
class AdjacentTempDirectory(TempDirectory):

tests/functional/test_install_wheel.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# -*- coding: utf-8 -*-
2+
13
import distutils
24
import glob
35
import os
@@ -125,6 +127,36 @@ def test_basic_install_from_wheel_file(script, data):
125127
result.stdout)
126128

127129

130+
# Installation seems to work, but scripttest fails to check.
131+
# I really don't care now since we're desupporting it soon anyway.
132+
@skip_if_python2
133+
def test_basic_install_from_unicode_wheel(script, data):
134+
"""
135+
Test installing from a wheel (that has a script)
136+
"""
137+
make_wheel(
138+
'unicode_package',
139+
'1.0',
140+
extra_files={
141+
'வணக்கம்/__init__.py': b'',
142+
'வணக்கம்/નમસ્તે.py': b'',
143+
},
144+
).save_to_dir(script.scratch_path)
145+
146+
result = script.pip(
147+
'install', 'unicode_package==1.0', '--no-index',
148+
'--find-links', script.scratch_path,
149+
)
150+
dist_info_folder = script.site_packages / 'unicode_package-1.0.dist-info'
151+
assert dist_info_folder in result.files_created, str(result)
152+
153+
file1 = script.site_packages.joinpath('வணக்கம்', '__init__.py')
154+
assert file1 in result.files_created, str(result)
155+
156+
file2 = script.site_packages.joinpath('வணக்கம்', 'નમસ્તે.py')
157+
assert file2 in result.files_created, str(result)
158+
159+
128160
def test_install_from_wheel_with_headers(script, data):
129161
"""
130162
Test installing from a wheel file with headers

0 commit comments

Comments
 (0)