From cdee9e2c331f8f16118e2e2d0ea403cdce8b8c27 Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sun, 15 Dec 2019 04:00:07 +0530 Subject: [PATCH 1/5] Add configuration for using vendoring --- pyproject.toml | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index aa798360c5b..645d54a7550 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,3 +18,42 @@ type = [ { name = "Improved Documentation", directory = "doc", showcontent = true }, { name = "Trivial Changes", directory = "trivial", showcontent = false }, ] + +[tool.vendoring] +destination = "src/pip/_vendor/" +requirements = "src/pip/_vendor/vendor.txt" +namespace = "pip._vendor" + +protected-files = ["__init__.py", "README.rst", "vendor.txt"] +patches-dir = "tools/automation/vendoring/patches" + +[tool.vendoring.transformations] +substitute = [ + # pkg_resource's vendored packages are directly vendored in pip. + { match='pkg_resources\.extern', replace='pip._vendor' }, + { match='from \.extern', replace='from pip._vendor' }, +] +drop = [ + # contains unnecessary scripts + "bin/", + # interpreter and OS specific msgpack libs + "msgpack/*.so", + # unneeded parts of setuptools + "easy_install.py", + "setuptools", + "pkg_resources/_vendor/", + "pkg_resources/extern/", +] + +[tool.vendoring.typing-stubs] +six = ["six.__init__", "six.moves.__init__", "six.moves.configparser"] +appdirs = [] +contextlib2 = [] + +[tool.vendoring.license.directories] +setuptools = "pkg_resources" +msgpack-python = "msgpack" + +[tool.vendoring.license.fallback-urls] +pytoml = "https://github.com/avakar/pytoml/raw/master/LICENSE" +webencodings = "https://github.com/SimonSapin/python-webencodings/raw/master/LICENSE" From 248f6b2fc6be8fec63bdbce7dd7882bbafc57b20 Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sun, 15 Dec 2019 04:00:25 +0530 Subject: [PATCH 2/5] Switch tox -e vendoring, to use vendoring --- tox.ini | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tox.ini b/tox.ini index de1263fc47c..5f9b6f4284b 100644 --- a/tox.ini +++ b/tox.ini @@ -48,16 +48,16 @@ commands = pre-commit run [] --all-files --show-diff-on-failure [testenv:vendoring] +basepython = python3.8 skip_install = True commands_pre = deps = - invoke - requests + vendoring == 0.2.2 # Required, otherwise we interpret --no-binary :all: as # "do not build wheels", which fails for PEP 517 requirements pip>=19.3.1 whitelist_externals = git commands = # Check that the vendoring is up-to-date - invoke vendoring.update + vendoring sync . -v git diff --exit-code From 53aaa3e40bcdddf3d4b816de5365aa7534ceefdd Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sun, 15 Dec 2019 04:01:33 +0530 Subject: [PATCH 3/5] Drop invoke task for vendoring dependencies This is no longer needed, since `vendoring` does what we need here. --- tasks/__init__.py | 4 - tools/automation/vendoring/__init__.py | 282 ------------------------- tools/automation/vendoring/typing.py | 59 ------ 3 files changed, 345 deletions(-) delete mode 100644 tasks/__init__.py delete mode 100644 tools/automation/vendoring/__init__.py delete mode 100644 tools/automation/vendoring/typing.py diff --git a/tasks/__init__.py b/tasks/__init__.py deleted file mode 100644 index 9591fb9ef05..00000000000 --- a/tasks/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -import invoke -from tools.automation import vendoring - -ns = invoke.Collection(vendoring) diff --git a/tools/automation/vendoring/__init__.py b/tools/automation/vendoring/__init__.py deleted file mode 100644 index 1c2ae3a6948..00000000000 --- a/tools/automation/vendoring/__init__.py +++ /dev/null @@ -1,282 +0,0 @@ -""""Vendoring script, python 3.5 with requests needed""" - -# The following comment should be removed at some point in the future. -# mypy: disallow-untyped-defs=False - -import re -import shutil -import tarfile -import zipfile -from pathlib import Path - -import invoke -import requests - -from .typing import generate_stubs - -FILE_WHITE_LIST = ( - 'Makefile', - 'vendor.txt', - '__init__.py', - 'README.rst', -) - -# libraries that have directories with different names -LIBRARY_DIRNAMES = { - 'setuptools': 'pkg_resources', - 'msgpack-python': 'msgpack', -} - -# from time to time, remove the no longer needed ones -HARDCODED_LICENSE_URLS = { - 'pytoml': 'https://github.com/avakar/pytoml/raw/master/LICENSE', - 'webencodings': 'https://github.com/SimonSapin/python-webencodings/raw/' - 'master/LICENSE', -} - - -def drop_dir(path, **kwargs): - shutil.rmtree(str(path), **kwargs) - - -def remove_all(paths): - for path in paths: - if path.is_dir(): - drop_dir(path) - else: - path.unlink() - - -def log(msg): - print('[vendoring.update] ' + msg) - - -def _get_vendor_dir(ctx): - git_root = ctx.run('git rev-parse --show-toplevel', hide=True).stdout - return Path(git_root.strip()) / 'src' / 'pip' / '_vendor' - - -def clean_vendor(ctx, vendor_dir): - # Old _vendor cleanup - remove_all(vendor_dir.glob('*.pyc')) - log('Cleaning %s' % vendor_dir) - for item in vendor_dir.iterdir(): - if item.is_dir(): - shutil.rmtree(str(item)) - elif item.name not in FILE_WHITE_LIST: - item.unlink() - else: - log('Skipping %s' % item) - - -def detect_vendored_libs(vendor_dir): - retval = [] - for item in vendor_dir.iterdir(): - if item.is_dir(): - retval.append(item.name) - elif item.name.endswith(".pyi"): - continue - elif "LICENSE" in item.name or "COPYING" in item.name: - continue - elif item.name not in FILE_WHITE_LIST: - retval.append(item.name[:-3]) - return retval - - -def rewrite_imports(package_dir, vendored_libs): - for item in package_dir.iterdir(): - if item.is_dir(): - rewrite_imports(item, vendored_libs) - elif item.name.endswith('.py'): - rewrite_file_imports(item, vendored_libs) - - -def rewrite_file_imports(item, vendored_libs): - """Rewrite 'import xxx' and 'from xxx import' for vendored_libs""" - text = item.read_text(encoding='utf-8') - # Revendor pkg_resources.extern first - text = re.sub(r'pkg_resources\.extern', r'pip._vendor', text) - text = re.sub(r'from \.extern', r'from pip._vendor', text) - for lib in vendored_libs: - text = re.sub( - r'(\n\s*|^)import %s(\n\s*)' % lib, - r'\1from pip._vendor import %s\2' % lib, - text, - ) - text = re.sub( - r'(\n\s*|^)from %s(\.|\s+)' % lib, - r'\1from pip._vendor.%s\2' % lib, - text, - ) - item.write_text(text, encoding='utf-8') - - -def apply_patch(ctx, patch_file_path): - log('Applying patch %s' % patch_file_path.name) - ctx.run('git apply --verbose %s' % patch_file_path) - - -def vendor(ctx, vendor_dir): - log('Reinstalling vendored libraries') - # We use --no-deps because we want to ensure that all of our dependencies - # are added to vendor.txt, this includes all dependencies recursively up - # the chain. - ctx.run( - 'pip install -t {0} -r {0}/vendor.txt --no-compile --no-deps'.format( - str(vendor_dir), - ) - ) - remove_all(vendor_dir.glob('*.dist-info')) - remove_all(vendor_dir.glob('*.egg-info')) - - # Cleanup setuptools unneeded parts - (vendor_dir / 'easy_install.py').unlink() - drop_dir(vendor_dir / 'setuptools') - drop_dir(vendor_dir / 'pkg_resources' / '_vendor') - drop_dir(vendor_dir / 'pkg_resources' / 'extern') - - # Drop the bin directory (contains easy_install, distro, chardetect etc.) - # Might not appear on all OSes, so ignoring errors - drop_dir(vendor_dir / 'bin', ignore_errors=True) - - # Drop interpreter and OS specific msgpack libs. - # Pip will rely on the python-only fallback instead. - remove_all(vendor_dir.glob('msgpack/*.so')) - - # Detect the vendored packages/modules - vendored_libs = detect_vendored_libs(vendor_dir) - log("Detected vendored libraries: %s" % ", ".join(vendored_libs)) - - # Global import rewrites - log("Rewriting all imports related to vendored libs") - for item in vendor_dir.iterdir(): - if item.is_dir(): - rewrite_imports(item, vendored_libs) - elif item.name not in FILE_WHITE_LIST: - rewrite_file_imports(item, vendored_libs) - - # Special cases: apply stored patches - log("Apply patches") - patch_dir = Path(__file__).parent / 'patches' - for patch in patch_dir.glob('*.patch'): - apply_patch(ctx, patch) - - -def download_licenses(ctx, vendor_dir): - log('Downloading licenses') - tmp_dir = vendor_dir / '__tmp__' - ctx.run( - 'pip download -r {0}/vendor.txt --no-binary ' - ':all: --no-deps -d {1}'.format( - str(vendor_dir), - str(tmp_dir), - ) - ) - for sdist in tmp_dir.iterdir(): - extract_license(vendor_dir, sdist) - drop_dir(tmp_dir) - - -def extract_license(vendor_dir, sdist): - if sdist.suffixes[-2] == '.tar': - ext = sdist.suffixes[-1][1:] - with tarfile.open(sdist, mode='r:{}'.format(ext)) as tar: - found = find_and_extract_license(vendor_dir, tar, tar.getmembers()) - elif sdist.suffixes[-1] == '.zip': - with zipfile.ZipFile(sdist) as zip: - found = find_and_extract_license(vendor_dir, zip, zip.infolist()) - else: - raise NotImplementedError('new sdist type!') - - if not found: - log('License not found in {}, will download'.format(sdist.name)) - license_fallback(vendor_dir, sdist.name) - - -def find_and_extract_license(vendor_dir, tar, members): - found = False - for member in members: - try: - name = member.name - except AttributeError: # zipfile - name = member.filename - if 'LICENSE' in name or 'COPYING' in name: - if '/test' in name: - # some testing licenses in html5lib and distlib - log('Ignoring {}'.format(name)) - continue - found = True - extract_license_member(vendor_dir, tar, member, name) - return found - - -def license_fallback(vendor_dir, sdist_name): - """Hardcoded license URLs. Check when updating if those are still needed""" - libname = libname_from_dir(sdist_name) - if libname not in HARDCODED_LICENSE_URLS: - raise ValueError('No hardcoded URL for {} license'.format(libname)) - - url = HARDCODED_LICENSE_URLS[libname] - _, _, name = url.rpartition('/') - dest = license_destination(vendor_dir, libname, name) - log('Downloading {}'.format(url)) - r = requests.get(url, allow_redirects=True) - r.raise_for_status() - dest.write_bytes(r.content) - - -def libname_from_dir(dirname): - """Reconstruct the library name without it's version""" - parts = [] - for part in dirname.split('-'): - if part[0].isdigit(): - break - parts.append(part) - return '-'.join(parts) - - -def license_destination(vendor_dir, libname, filename): - """Given the (reconstructed) library name, find appropriate destination""" - normal = vendor_dir / libname - if normal.is_dir(): - return normal / filename - lowercase = vendor_dir / libname.lower() - if lowercase.is_dir(): - return lowercase / filename - if libname in LIBRARY_DIRNAMES: - return vendor_dir / LIBRARY_DIRNAMES[libname] / filename - # fallback to libname.LICENSE (used for nondirs) - return vendor_dir / '{}.{}'.format(libname, filename) - - -def extract_license_member(vendor_dir, tar, member, name): - mpath = Path(name) # relative path inside the sdist - dirname = list(mpath.parents)[-2].name # -1 is . - libname = libname_from_dir(dirname) - dest = license_destination(vendor_dir, libname, mpath.name) - dest_relative = dest.relative_to(Path.cwd()) - log('Extracting {} into {}'.format(name, dest_relative)) - try: - fileobj = tar.extractfile(member) - dest.write_bytes(fileobj.read()) - except AttributeError: # zipfile - dest.write_bytes(tar.read(member)) - - -@invoke.task -def update_stubs(ctx): - vendor_dir = _get_vendor_dir(ctx) - vendored_libs = detect_vendored_libs(vendor_dir) - - print("[vendoring.update_stubs] Add mypy stubs") - generate_stubs(vendor_dir, vendored_libs) - - -@invoke.task(name="update", post=[update_stubs]) -def main(ctx): - vendor_dir = _get_vendor_dir(ctx) - log('Using vendor dir: %s' % vendor_dir) - clean_vendor(ctx, vendor_dir) - vendor(ctx, vendor_dir) - download_licenses(ctx, vendor_dir) - log('Revendoring complete') diff --git a/tools/automation/vendoring/typing.py b/tools/automation/vendoring/typing.py deleted file mode 100644 index 35f7e0bfff0..00000000000 --- a/tools/automation/vendoring/typing.py +++ /dev/null @@ -1,59 +0,0 @@ -"""Logic for adding static typing related stubs of vendored dependencies. - -We autogenerate `.pyi` stub files for the vendored modules, when vendoring. -These .pyi files are not distributed (thanks to MANIFEST.in). The stub files -are merely `from ... import *` but they do what they're supposed to and mypy -is able to find the correct declarations using these files. -""" - -import os -from pathlib import Path -from typing import Dict, Iterable, List, Tuple - -EXTRA_STUBS_NEEDED = { - # Some projects need stubs other than a simple .pyi - "six": [ - "six.__init__", - "six.moves.__init__", - "six.moves.configparser", - ], - # Some projects should not have stubs because they're a single module - "appdirs": [], - "contextlib2": [], -} # type: Dict[str, List[str]] - - -def determine_stub_files(lib): - # type: (str) -> Iterable[Tuple[str, str]] - # There's no special handling needed -- a .pyi file is good enough - if lib not in EXTRA_STUBS_NEEDED: - yield lib + ".pyi", lib - return - - # Need to generate the given stubs, with the correct import names - for import_name in EXTRA_STUBS_NEEDED[lib]: - rel_location = import_name.replace(".", os.sep) + ".pyi" - - # Writing an __init__.pyi file -> don't import from `pkg.__init__` - if import_name.endswith(".__init__"): - import_name = import_name[:-9] - - yield rel_location, import_name - - -def write_stub(destination, import_name): - # type: (Path, str) -> None - # Create the parent directories if needed. - if not destination.parent.exists(): - destination.parent.mkdir() - - # Write `from ... import *` in the stub file. - destination.write_text("from %s import *" % import_name) - - -def generate_stubs(vendor_dir, libraries): - # type: (Path, List[str]) -> None - for lib in libraries: - for rel_location, import_name in determine_stub_files(lib): - destination = vendor_dir / rel_location - write_stub(destination, import_name) From 618b2d839391e9041cd9e5430a7ba4d0d6bad073 Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sun, 15 Dec 2019 12:34:35 +0530 Subject: [PATCH 4/5] :newspaper: --- news/4785.process | 1 + 1 file changed, 1 insertion(+) create mode 100644 news/4785.process diff --git a/news/4785.process b/news/4785.process new file mode 100644 index 00000000000..022510f4b18 --- /dev/null +++ b/news/4785.process @@ -0,0 +1 @@ +Switch to a dedicated CLI tool for vendoring dependencies. From 6929cc6d6ba1b6bda4252832749790ed10e55a71 Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sun, 15 Dec 2019 21:22:04 +0530 Subject: [PATCH 5/5] Address review comments These were all nitpicks but hey, that's good news. :) --- pyproject.toml | 4 ++-- tox.ini | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 645d54a7550..01fae701523 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,8 +30,8 @@ patches-dir = "tools/automation/vendoring/patches" [tool.vendoring.transformations] substitute = [ # pkg_resource's vendored packages are directly vendored in pip. - { match='pkg_resources\.extern', replace='pip._vendor' }, - { match='from \.extern', replace='from pip._vendor' }, + { match='pkg_resources\.extern', replace="pip._vendor" }, + { match='from \.extern', replace="from pip._vendor" }, ] drop = [ # contains unnecessary scripts diff --git a/tox.ini b/tox.ini index 5f9b6f4284b..6be97232065 100644 --- a/tox.ini +++ b/tox.ini @@ -52,7 +52,7 @@ basepython = python3.8 skip_install = True commands_pre = deps = - vendoring == 0.2.2 + vendoring==0.2.2 # Required, otherwise we interpret --no-binary :all: as # "do not build wheels", which fails for PEP 517 requirements pip>=19.3.1