From 51140bcbb65e14358f7573d03838677216967d5b Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 3 May 2025 22:36:44 +0900 Subject: [PATCH 1/3] feat: add an env variable to toggle pipstar This is a flag to start leveraging of the new codepahs. Work towards #260 --- CHANGELOG.md | 2 + docs/environment-variables.md | 9 + python/private/internal_config_repo.bzl | 4 + .../private/pypi/whl_installer/arguments.py | 5 + .../pypi/whl_installer/wheel_installer.py | 44 ++-- python/private/pypi/whl_library.bzl | 207 ++++++++++++------ 6 files changed, 185 insertions(+), 86 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8cac4c5cd..01be94aa6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -80,6 +80,8 @@ END_UNRELEASED_TEMPLATE * Repo utilities `execute_unchecked`, `execute_checked`, and `execute_checked_stdout` now support `log_stdout` and `log_stderr` keyword arg booleans. When these are `True` (the default), the subprocess's stdout/stderr will be logged. +* (pypi) A new environment variable `RULES_PYTHON_ENABLE_PIPSTAR` to toggle the + Starlark implementation of the whl METADATA parsing. Set it to `1` to enable. {#v0-0-0-removed} ### Removed diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 49fdf766f6..26c171095d 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -60,6 +60,15 @@ The default became `1` if unspecified ::: :::: +::::{envvar} RULES_PYTHON_ENABLE_PIPSTAR + +When `1`, the rules_python Starlark implementation of the pypi/pip integration is used +instead of the legacy Python scripts. + +:::{versionadded} VERSION_NEXT_FEATURE +::: +:::: + ::::{envvar} RULES_PYTHON_EXTRACT_ROOT Directory to use as the root for creating files necessary for bootstrapping so diff --git a/python/private/internal_config_repo.bzl b/python/private/internal_config_repo.bzl index a5c4787161..cfe2fdfd77 100644 --- a/python/private/internal_config_repo.bzl +++ b/python/private/internal_config_repo.bzl @@ -20,6 +20,8 @@ settings for rules to later use. load(":repo_utils.bzl", "repo_utils") +_ENABLE_PIPSTAR_ENVVAR_NAME = "RULES_PYTHON_ENABLE_PIPSTAR" +_ENABLE_PIPSTAR_DEFAULT = "0" _ENABLE_PYSTAR_ENVVAR_NAME = "RULES_PYTHON_ENABLE_PYSTAR" _ENABLE_PYSTAR_DEFAULT = "1" _ENABLE_DEPRECATION_WARNINGS_ENVVAR_NAME = "RULES_PYTHON_DEPRECATION_WARNINGS" @@ -28,6 +30,7 @@ _ENABLE_DEPRECATION_WARNINGS_DEFAULT = "0" _CONFIG_TEMPLATE = """\ config = struct( enable_pystar = {enable_pystar}, + enable_pipstar = {enable_pipstar}, enable_deprecation_warnings = {enable_deprecation_warnings}, BuiltinPyInfo = getattr(getattr(native, "legacy_globals", None), "PyInfo", {builtin_py_info_symbol}), BuiltinPyRuntimeInfo = getattr(getattr(native, "legacy_globals", None), "PyRuntimeInfo", {builtin_py_runtime_info_symbol}), @@ -84,6 +87,7 @@ def _internal_config_repo_impl(rctx): rctx.file("rules_python_config.bzl", _CONFIG_TEMPLATE.format( enable_pystar = enable_pystar, + enable_pipstar = _bool_from_environ(rctx, _ENABLE_PIPSTAR_ENVVAR_NAME, _ENABLE_PIPSTAR_DEFAULT), enable_deprecation_warnings = _bool_from_environ(rctx, _ENABLE_DEPRECATION_WARNINGS_ENVVAR_NAME, _ENABLE_DEPRECATION_WARNINGS_DEFAULT), builtin_py_info_symbol = builtin_py_info_symbol, builtin_py_runtime_info_symbol = builtin_py_runtime_info_symbol, diff --git a/python/private/pypi/whl_installer/arguments.py b/python/private/pypi/whl_installer/arguments.py index 29bea8026e..ea609bef9d 100644 --- a/python/private/pypi/whl_installer/arguments.py +++ b/python/private/pypi/whl_installer/arguments.py @@ -47,6 +47,11 @@ def parser(**kwargs: Any) -> argparse.ArgumentParser: type=Platform.from_string, help="Platforms to target dependencies. Can be used multiple times.", ) + parser.add_argument( + "--enable-pipstar", + action="store_true", + help="Disable certain code paths if we expect to process the whl in Starlark.", + ) parser.add_argument( "--pip_data_exclude", action="store", diff --git a/python/private/pypi/whl_installer/wheel_installer.py b/python/private/pypi/whl_installer/wheel_installer.py index a48df699ba..2db03e039d 100644 --- a/python/private/pypi/whl_installer/wheel_installer.py +++ b/python/private/pypi/whl_installer/wheel_installer.py @@ -104,6 +104,7 @@ def _setup_namespace_pkg_compatibility(wheel_dir: str) -> None: def _extract_wheel( wheel_file: str, extras: Dict[str, Set[str]], + enable_pipstar: bool, enable_implicit_namespace_pkgs: bool, platforms: List[wheel.Platform], installation_dir: Path = Path("."), @@ -114,6 +115,7 @@ def _extract_wheel( wheel_file: the filepath of the .whl installation_dir: the destination directory for installation of the wheel. extras: a list of extras to add as dependencies for the installed wheel + enable_pipstar: if true, turns off certain operations. enable_implicit_namespace_pkgs: if true, disables conversion of implicit namespace packages and will unzip as-is """ @@ -123,26 +125,31 @@ def _extract_wheel( if not enable_implicit_namespace_pkgs: _setup_namespace_pkg_compatibility(installation_dir) - extras_requested = extras[whl.name] if whl.name in extras else set() - - dependencies = whl.dependencies(extras_requested, platforms) + metadata = { + "python_version": f"{sys.version_info[0]}.{sys.version_info[1]}.{sys.version_info[2]}", + "entry_points": [ + { + "name": name, + "module": module, + "attribute": attribute, + } + for name, (module, attribute) in sorted(whl.entry_points().items()) + ], + } + if not enable_pipstar: + extras_requested = extras[whl.name] if whl.name in extras else set() + dependencies = whl.dependencies(extras_requested, platforms) + + metadata.update( + { + "name": whl.name, + "version": whl.version, + "deps": dependencies.deps, + "deps_by_platform": dependencies.deps_select, + } + ) with open(os.path.join(installation_dir, "metadata.json"), "w") as f: - metadata = { - "name": whl.name, - "version": whl.version, - "deps": dependencies.deps, - "python_version": f"{sys.version_info[0]}.{sys.version_info[1]}.{sys.version_info[2]}", - "deps_by_platform": dependencies.deps_select, - "entry_points": [ - { - "name": name, - "module": module, - "attribute": attribute, - } - for name, (module, attribute) in sorted(whl.entry_points().items()) - ], - } json.dump(metadata, f) @@ -161,6 +168,7 @@ def main() -> None: _extract_wheel( wheel_file=whl, extras=extras, + enable_pipstar=args.enable_pipstar, enable_implicit_namespace_pkgs=args.enable_implicit_namespace_pkgs, platforms=arguments.get_platforms(args), ) diff --git a/python/private/pypi/whl_library.bzl b/python/private/pypi/whl_library.bzl index 0c09f7960a..160bb5b799 100644 --- a/python/private/pypi/whl_library.bzl +++ b/python/private/pypi/whl_library.bzl @@ -14,6 +14,7 @@ "" +load("@rules_python_internal//:rules_python_config.bzl", rp_config = "config") load("//python/private:auth.bzl", "AUTH_ATTRS", "get_auth") load("//python/private:envsubst.bzl", "envsubst") load("//python/private:is_standalone_interpreter.bzl", "is_standalone_interpreter") @@ -21,9 +22,11 @@ load("//python/private:repo_utils.bzl", "REPO_DEBUG_ENV_VAR", "repo_utils") load(":attrs.bzl", "ATTRS", "use_isolated") load(":deps.bzl", "all_repo_names", "record_files") load(":generate_whl_library_build_bazel.bzl", "generate_whl_library_build_bazel") +load(":parse_requirements.bzl", "host_platform") load(":parse_whl_name.bzl", "parse_whl_name") load(":patch_whl.bzl", "patch_whl") load(":pypi_repo_utils.bzl", "pypi_repo_utils") +load(":whl_metadata.bzl", "whl_metadata") load(":whl_target_platforms.bzl", "whl_target_platforms") _CPPFLAGS = "CPPFLAGS" @@ -340,79 +343,147 @@ def _whl_library_impl(rctx): timeout = rctx.attr.timeout, ) - target_platforms = rctx.attr.experimental_target_platforms or [] - if target_platforms: - parsed_whl = parse_whl_name(whl_path.basename) - - # NOTE @aignas 2023-12-04: if the wheel is a platform specific wheel, we - # only include deps for that target platform - if parsed_whl.platform_tag != "any": - target_platforms = [ - p.target_platform - for p in whl_target_platforms( - platform_tag = parsed_whl.platform_tag, - abi_tag = parsed_whl.abi_tag.strip("tm"), - ) - ] - - pypi_repo_utils.execute_checked( - rctx, - op = "whl_library.ExtractWheel({}, {})".format(rctx.attr.name, whl_path), - python = python_interpreter, - arguments = args + [ - "--whl-file", - whl_path, - ] + ["--platform={}".format(p) for p in target_platforms], - srcs = rctx.attr._python_srcs, - environment = environment, - quiet = rctx.attr.quiet, - timeout = rctx.attr.timeout, - logger = logger, - ) + if rp_config.enable_pipstar: + pypi_repo_utils.execute_checked( + rctx, + op = "whl_library.ExtractWheel({}, {})".format(rctx.attr.name, whl_path), + python = python_interpreter, + arguments = args + [ + "--whl-file", + whl_path, + "--enable-pipstar", + ], + srcs = rctx.attr._python_srcs, + environment = environment, + quiet = rctx.attr.quiet, + timeout = rctx.attr.timeout, + logger = logger, + ) - metadata = json.decode(rctx.read("metadata.json")) - rctx.delete("metadata.json") + metadata = json.decode(rctx.read("metadata.json")) + rctx.delete("metadata.json") + python_version = metadata["python_version"] - # NOTE @aignas 2024-06-22: this has to live on until we stop supporting - # passing `twine` as a `:pkg` library via the `WORKSPACE` builds. - # - # See ../../packaging.bzl line 190 - entry_points = {} - for item in metadata["entry_points"]: - name = item["name"] - module = item["module"] - attribute = item["attribute"] - - # There is an extreme edge-case with entry_points that end with `.py` - # See: https://github.com/bazelbuild/bazel/blob/09c621e4cf5b968f4c6cdf905ab142d5961f9ddc/src/test/java/com/google/devtools/build/lib/rules/python/PyBinaryConfiguredTargetTest.java#L174 - entry_point_without_py = name[:-3] + "_py" if name.endswith(".py") else name - entry_point_target_name = ( - _WHEEL_ENTRY_POINT_PREFIX + "_" + entry_point_without_py + # NOTE @aignas 2024-06-22: this has to live on until we stop supporting + # passing `twine` as a `:pkg` library via the `WORKSPACE` builds. + # + # See ../../packaging.bzl line 190 + entry_points = {} + for item in metadata["entry_points"]: + name = item["name"] + module = item["module"] + attribute = item["attribute"] + + # There is an extreme edge-case with entry_points that end with `.py` + # See: https://github.com/bazelbuild/bazel/blob/09c621e4cf5b968f4c6cdf905ab142d5961f9ddc/src/test/java/com/google/devtools/build/lib/rules/python/PyBinaryConfiguredTargetTest.java#L174 + entry_point_without_py = name[:-3] + "_py" if name.endswith(".py") else name + entry_point_target_name = ( + _WHEEL_ENTRY_POINT_PREFIX + "_" + entry_point_without_py + ) + entry_point_script_name = entry_point_target_name + ".py" + + rctx.file( + entry_point_script_name, + _generate_entry_point_contents(module, attribute), + ) + entry_points[entry_point_without_py] = entry_point_script_name + + metadata = whl_metadata( + install_dir = whl_path.dirname.get_child("site-packages"), + read_fn = rctx.read, + logger = logger, ) - entry_point_script_name = entry_point_target_name + ".py" - rctx.file( - entry_point_script_name, - _generate_entry_point_contents(module, attribute), + build_file_contents = generate_whl_library_build_bazel( + name = whl_path.basename, + dep_template = rctx.attr.dep_template or "@{}{{name}}//:{{target}}".format(rctx.attr.repo_prefix), + entry_points = entry_points, + metadata_name = metadata.name, + metadata_version = metadata.version, + default_python_version = python_version, + requires_dist = metadata.requires_dist, + target_platforms = rctx.attr.experimental_target_platforms or [host_platform(rctx)], + # TODO @aignas 2025-04-14: load through the hub: + annotation = None if not rctx.attr.annotation else struct(**json.decode(rctx.read(rctx.attr.annotation))), + data_exclude = rctx.attr.pip_data_exclude, + group_deps = rctx.attr.group_deps, + group_name = rctx.attr.group_name, ) - entry_points[entry_point_without_py] = entry_point_script_name - - build_file_contents = generate_whl_library_build_bazel( - name = whl_path.basename, - dep_template = rctx.attr.dep_template or "@{}{{name}}//:{{target}}".format(rctx.attr.repo_prefix), - entry_points = entry_points, - # TODO @aignas 2025-04-14: load through the hub: - dependencies = metadata["deps"], - dependencies_by_platform = metadata["deps_by_platform"], - annotation = None if not rctx.attr.annotation else struct(**json.decode(rctx.read(rctx.attr.annotation))), - data_exclude = rctx.attr.pip_data_exclude, - group_deps = rctx.attr.group_deps, - group_name = rctx.attr.group_name, - tags = [ - "pypi_name={}".format(metadata["name"]), - "pypi_version={}".format(metadata["version"]), - ], - ) + else: + target_platforms = rctx.attr.experimental_target_platforms or [] + if target_platforms: + parsed_whl = parse_whl_name(whl_path.basename) + + # NOTE @aignas 2023-12-04: if the wheel is a platform specific wheel, we + # only include deps for that target platform + if parsed_whl.platform_tag != "any": + target_platforms = [ + p.target_platform + for p in whl_target_platforms( + platform_tag = parsed_whl.platform_tag, + abi_tag = parsed_whl.abi_tag.strip("tm"), + ) + ] + + pypi_repo_utils.execute_checked( + rctx, + op = "whl_library.ExtractWheel({}, {})".format(rctx.attr.name, whl_path), + python = python_interpreter, + arguments = args + [ + "--whl-file", + whl_path, + ] + ["--platform={}".format(p) for p in target_platforms], + srcs = rctx.attr._python_srcs, + environment = environment, + quiet = rctx.attr.quiet, + timeout = rctx.attr.timeout, + logger = logger, + ) + + metadata = json.decode(rctx.read("metadata.json")) + rctx.delete("metadata.json") + + # NOTE @aignas 2024-06-22: this has to live on until we stop supporting + # passing `twine` as a `:pkg` library via the `WORKSPACE` builds. + # + # See ../../packaging.bzl line 190 + entry_points = {} + for item in metadata["entry_points"]: + name = item["name"] + module = item["module"] + attribute = item["attribute"] + + # There is an extreme edge-case with entry_points that end with `.py` + # See: https://github.com/bazelbuild/bazel/blob/09c621e4cf5b968f4c6cdf905ab142d5961f9ddc/src/test/java/com/google/devtools/build/lib/rules/python/PyBinaryConfiguredTargetTest.java#L174 + entry_point_without_py = name[:-3] + "_py" if name.endswith(".py") else name + entry_point_target_name = ( + _WHEEL_ENTRY_POINT_PREFIX + "_" + entry_point_without_py + ) + entry_point_script_name = entry_point_target_name + ".py" + + rctx.file( + entry_point_script_name, + _generate_entry_point_contents(module, attribute), + ) + entry_points[entry_point_without_py] = entry_point_script_name + + build_file_contents = generate_whl_library_build_bazel( + name = whl_path.basename, + dep_template = rctx.attr.dep_template or "@{}{{name}}//:{{target}}".format(rctx.attr.repo_prefix), + entry_points = entry_points, + # TODO @aignas 2025-04-14: load through the hub: + dependencies = metadata["deps"], + dependencies_by_platform = metadata["deps_by_platform"], + annotation = None if not rctx.attr.annotation else struct(**json.decode(rctx.read(rctx.attr.annotation))), + data_exclude = rctx.attr.pip_data_exclude, + group_deps = rctx.attr.group_deps, + group_name = rctx.attr.group_name, + tags = [ + "pypi_name={}".format(metadata["name"]), + "pypi_version={}".format(metadata["version"]), + ], + ) + rctx.file("BUILD.bazel", build_file_contents) return From 79a5fc3d0dcd47f2ed7d4bac869a42df17510388 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Sat, 3 May 2025 13:43:39 -0700 Subject: [PATCH 2/3] make changelog entry more user-focused --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01be94aa6b..a04ff60aa9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -80,8 +80,9 @@ END_UNRELEASED_TEMPLATE * Repo utilities `execute_unchecked`, `execute_checked`, and `execute_checked_stdout` now support `log_stdout` and `log_stderr` keyword arg booleans. When these are `True` (the default), the subprocess's stdout/stderr will be logged. -* (pypi) A new environment variable `RULES_PYTHON_ENABLE_PIPSTAR` to toggle the - Starlark implementation of the whl METADATA parsing. Set it to `1` to enable. +* (pypi) `RULES_PYTHON_ENABLE_PIPSTAR` environment variable: when `1`, the Starlark + implementation of wheel METADATA parsing is used (which has improved multi-platform + build support). {#v0-0-0-removed} ### Removed From d04733ec328d9111749602de630330a0b1913a25 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Sat, 3 May 2025 13:45:46 -0700 Subject: [PATCH 3/3] wheel installer test: add missing arg --- tests/pypi/whl_installer/wheel_installer_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/pypi/whl_installer/wheel_installer_test.py b/tests/pypi/whl_installer/wheel_installer_test.py index b736877e81..e838047925 100644 --- a/tests/pypi/whl_installer/wheel_installer_test.py +++ b/tests/pypi/whl_installer/wheel_installer_test.py @@ -72,6 +72,7 @@ def test_wheel_exists(self) -> None: extras={}, enable_implicit_namespace_pkgs=False, platforms=[], + enable_pipstar = False, ) want_files = [