Skip to content

Commit 68511b4

Browse files
committed
refactor(pypi): implement PEP508 compliant marker evaluation
This implements the PEP508 compliant marker evaluation in starlark and removes the need for the Python interpreter when evaluating requirements files passed to `pip.parse`. This makes the evaluation faster and allows us to fix a few known issues (bazel-contrib#2690). In the future the intent is to move the `METADATA` parsing to pure starlark so that the `RequiresDist` could be parsed in starlark at the macro evaluation or analysis phases. This should make it possible to more easily solve the design problem that more and more things need to be passed to `whl_library` as args to have a robust dependency parsing: * bazel-contrib#2319 needs the full Python version to have correct cross-platform compatible METADATA parsing and passing it to `Python` and back makes it difficult/annoying to implement. * Parsing the `METADATA` file requires the precise list of target platform or the list of available packages in the `requirements.txt`. This means that without it we cannot trim the dependency tree in the `whl_library`. Doing this at macro loading phase allows us to depend on `.bzl` files in the `hub_repository` and more effectively pass information. Fixes bazel-contrib#2423
1 parent 175fe4c commit 68511b4

15 files changed

+1027
-178
lines changed

python/private/pypi/BUILD.bazel

+34-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,9 @@ bzl_library(
7575
name = "evaluate_markers_bzl",
7676
srcs = ["evaluate_markers.bzl"],
7777
deps = [
78-
":pypi_repo_utils_bzl",
78+
":pep508_env_bzl",
79+
":pep508_evaluate_bzl",
80+
":pep508_req_bzl",
7981
],
8082
)
8183

@@ -208,6 +210,37 @@ bzl_library(
208210
],
209211
)
210212

213+
bzl_library(
214+
name = "pep508_bzl",
215+
srcs = ["pep508.bzl"],
216+
deps = [
217+
":pep508_env_bzl",
218+
":pep508_evaluate_bzl",
219+
],
220+
)
221+
222+
bzl_library(
223+
name = "pep508_env_bzl",
224+
srcs = ["pep508_env.bzl"],
225+
)
226+
227+
bzl_library(
228+
name = "pep508_evaluate_bzl",
229+
srcs = ["pep508_evaluate.bzl"],
230+
deps = [
231+
"//python/private:enum_bzl",
232+
"//python/private:semver_bzl",
233+
],
234+
)
235+
236+
bzl_library(
237+
name = "pep508_req_bzl",
238+
srcs = ["pep508_req.bzl"],
239+
deps = [
240+
"//python/private:normalize_name_bzl",
241+
],
242+
)
243+
211244
bzl_library(
212245
name = "pip_bzl",
213246
srcs = ["pip.bzl"],

python/private/pypi/evaluate_markers.bzl

+13-54
Original file line numberDiff line numberDiff line change
@@ -14,65 +14,24 @@
1414

1515
"""A simple function that evaluates markers using a python interpreter."""
1616

17-
load(":deps.bzl", "record_files")
18-
load(":pypi_repo_utils.bzl", "pypi_repo_utils")
17+
load(":pep508_env.bzl", "env", _platform_from_str = "platform_from_str")
18+
load(":pep508_evaluate.bzl", "evaluate")
19+
load(":pep508_req.bzl", _req = "requirement")
1920

20-
# Used as a default value in a rule to ensure we fetch the dependencies.
21-
SRCS = [
22-
# When the version, or any of the files in `packaging` package changes,
23-
# this file will change as well.
24-
record_files["pypi__packaging"],
25-
Label("//python/private/pypi/requirements_parser:resolve_target_platforms.py"),
26-
Label("//python/private/pypi/whl_installer:platform.py"),
27-
]
28-
29-
def evaluate_markers(mrctx, *, requirements, python_interpreter, python_interpreter_target, srcs, logger = None):
21+
def evaluate_markers(requirements):
3022
"""Return the list of supported platforms per requirements line.
3123
3224
Args:
33-
mrctx: repository_ctx or module_ctx.
34-
requirements: list[str] of the requirement file lines to evaluate.
35-
python_interpreter: str, path to the python_interpreter to use to
36-
evaluate the env markers in the given requirements files. It will
37-
be only called if the requirements files have env markers. This
38-
should be something that is in your PATH or an absolute path.
39-
python_interpreter_target: Label, same as python_interpreter, but in a
40-
label format.
41-
srcs: list[Label], the value of SRCS passed from the `rctx` or `mctx` to this function.
42-
logger: repo_utils.logger or None, a simple struct to log diagnostic
43-
messages. Defaults to None.
25+
requirements: dict[str, list[str]] of the requirement file lines to evaluate.
4426
4527
Returns:
4628
dict of string lists with target platforms
4729
"""
48-
if not requirements:
49-
return {}
50-
51-
in_file = mrctx.path("requirements_with_markers.in.json")
52-
out_file = mrctx.path("requirements_with_markers.out.json")
53-
mrctx.file(in_file, json.encode(requirements))
54-
55-
pypi_repo_utils.execute_checked(
56-
mrctx,
57-
op = "ResolveRequirementEnvMarkers({})".format(in_file),
58-
python = pypi_repo_utils.resolve_python_interpreter(
59-
mrctx,
60-
python_interpreter = python_interpreter,
61-
python_interpreter_target = python_interpreter_target,
62-
),
63-
arguments = [
64-
"-m",
65-
"python.private.pypi.requirements_parser.resolve_target_platforms",
66-
in_file,
67-
out_file,
68-
],
69-
srcs = srcs,
70-
environment = {
71-
"PYTHONPATH": [
72-
Label("@pypi__packaging//:BUILD.bazel"),
73-
Label("//:BUILD.bazel"),
74-
],
75-
},
76-
logger = logger,
77-
)
78-
return json.decode(mrctx.read(out_file))
30+
ret = {}
31+
for req_string, platforms in requirements.items():
32+
req = _req(req_string)
33+
for platform in platforms:
34+
if evaluate(req.marker, env = env(_platform_from_str(platform, None))):
35+
ret.setdefault(req_string, []).append(platform)
36+
37+
return ret

python/private/pypi/extension.bzl

+5-30
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ load("//python/private:repo_utils.bzl", "repo_utils")
2222
load("//python/private:semver.bzl", "semver")
2323
load("//python/private:version_label.bzl", "version_label")
2424
load(":attrs.bzl", "use_isolated")
25-
load(":evaluate_markers.bzl", "evaluate_markers", EVALUATE_MARKERS_SRCS = "SRCS")
25+
load(":evaluate_markers.bzl", "evaluate_markers")
2626
load(":hub_repository.bzl", "hub_repository", "whl_config_settings_to_json")
2727
load(":parse_requirements.bzl", "parse_requirements")
2828
load(":parse_whl_name.bzl", "parse_whl_name")
@@ -166,28 +166,10 @@ def _create_whl_repos(
166166
),
167167
extra_pip_args = pip_attr.extra_pip_args,
168168
get_index_urls = get_index_urls,
169-
# NOTE @aignas 2024-08-02: , we will execute any interpreter that we find either
170-
# in the PATH or if specified as a label. We will configure the env
171-
# markers when evaluating the requirement lines based on the output
172-
# from the `requirements_files_by_platform` which should have something
173-
# similar to:
174-
# {
175-
# "//:requirements.txt": ["cp311_linux_x86_64", ...]
176-
# }
177-
#
178-
# We know the target python versions that we need to evaluate the
179-
# markers for and thus we don't need to use multiple python interpreter
180-
# instances to perform this manipulation. This function should be executed
181-
# only once by the underlying code to minimize the overhead needed to
182-
# spin up a Python interpreter.
183-
evaluate_markers = lambda module_ctx, requirements: evaluate_markers(
184-
module_ctx,
185-
requirements = requirements,
186-
python_interpreter = pip_attr.python_interpreter,
187-
python_interpreter_target = python_interpreter_target,
188-
srcs = pip_attr._evaluate_markers_srcs,
189-
logger = logger,
190-
),
169+
# NOTE @aignas 2025-02-24: we will use the "cp3xx_os_arch" platform labels
170+
# for converting to the PEP508 environment and will evaluate them in starlark
171+
# without involving the interpreter at all.
172+
evaluate_markers = evaluate_markers,
191173
logger = logger,
192174
)
193175

@@ -764,13 +746,6 @@ a corresponding `python.toolchain()` configured.
764746
doc = """\
765747
A dict of labels to wheel names that is typically generated by the whl_modifications.
766748
The labels are JSON config files describing the modifications.
767-
""",
768-
),
769-
"_evaluate_markers_srcs": attr.label_list(
770-
default = EVALUATE_MARKERS_SRCS,
771-
doc = """\
772-
The list of labels to use as SRCS for the marker evaluation code. This ensures that the
773-
code will be re-evaluated when any of files in the default changes.
774749
""",
775750
),
776751
}, **ATTRS)

python/private/pypi/parse_requirements.bzl

+6-6
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,10 @@ def parse_requirements(
6767
of the distribution URLs from a PyPI index. Accepts ctx and
6868
distribution names to query.
6969
evaluate_markers: A function to use to evaluate the requirements.
70-
Accepts the ctx and a dict where keys are requirement lines to
71-
evaluate against the platforms stored as values in the input dict.
72-
Returns the same dict, but with values being platforms that are
73-
compatible with the requirements line.
70+
Accepts a dict where keys are requirement lines to evaluate against
71+
the platforms stored as values in the input dict. Returns the same
72+
dict, but with values being platforms that are compatible with the
73+
requirements line.
7474
logger: repo_utils.logger or None, a simple struct to log diagnostic messages.
7575
7676
Returns:
@@ -93,7 +93,7 @@ def parse_requirements(
9393
9494
The second element is extra_pip_args should be passed to `whl_library`.
9595
"""
96-
evaluate_markers = evaluate_markers or (lambda *_: {})
96+
evaluate_markers = evaluate_markers or (lambda _: {})
9797
options = {}
9898
requirements = {}
9999
for file, plats in requirements_by_platform.items():
@@ -168,7 +168,7 @@ def parse_requirements(
168168
# to do, we could use Python to parse the requirement lines and infer the
169169
# URL of the files to download things from. This should be important for
170170
# VCS package references.
171-
env_marker_target_platforms = evaluate_markers(ctx, reqs_with_env_markers)
171+
env_marker_target_platforms = evaluate_markers(reqs_with_env_markers)
172172
if logger:
173173
logger.debug(lambda: "Evaluated env markers from:\n{}\n\nTo:\n{}".format(
174174
reqs_with_env_markers,

python/private/pypi/pep508.bzl

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Copyright 2025 The Bazel Authors. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""This module is for implementing PEP508 in starlark as FeatureFlagInfo
16+
"""
17+
18+
load(":pep508_env.bzl", _env = "env")
19+
load(":pep508_evaluate.bzl", _evaluate = "evaluate", _to_string = "to_string")
20+
21+
to_string = _to_string
22+
evaluate = _evaluate
23+
env = _env

python/private/pypi/pep508_env.bzl

+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# Copyright 2025 The Bazel Authors. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""This module is for implementing PEP508 environment definition.
16+
"""
17+
18+
_platform_machine_values = {
19+
"aarch64": "arm64",
20+
"ppc": "ppc64le",
21+
"s390x": "s390x",
22+
"x86_32": "i386",
23+
"x86_64": "x86_64",
24+
}
25+
_platform_system_values = {
26+
"linux": "Linux",
27+
"osx": "Darwin",
28+
"windows": "Windows",
29+
}
30+
_sys_platform_values = {
31+
"linux": "posix",
32+
"osx": "darwin",
33+
"windows": "win32",
34+
}
35+
_os_name_values = {
36+
"linux": "posix",
37+
"osx": "posix",
38+
"windows": "nt",
39+
}
40+
41+
def env(target_platform, *, extra = None):
42+
"""Return an env target platform
43+
44+
Args:
45+
target_platform: {type}`str` the target platform identifier, e.g.
46+
`cp33_linux_aarch64`
47+
extra: {type}`str` the extra value to be added into the env.
48+
49+
Returns:
50+
A dict that can be used as `env` in the marker evaluation.
51+
"""
52+
53+
# TODO @aignas 2025-02-13: consider moving this into config settings.
54+
55+
env = {"extra": extra} if extra != None else {}
56+
env = env | {
57+
"implementation_name": "cpython",
58+
"platform_python_implementation": "CPython",
59+
"platform_release": "",
60+
"platform_version": "",
61+
}
62+
if target_platform.abi:
63+
minor_version, _, micro_version = target_platform.abi[3:].partition(".")
64+
micro_version = micro_version or "0"
65+
env = env | {
66+
"implementation_version": "3.{}.{}".format(minor_version, micro_version),
67+
"python_full_version": "3.{}.{}".format(minor_version, micro_version),
68+
"python_version": "3.{}".format(minor_version),
69+
}
70+
if target_platform.os and target_platform.arch:
71+
os = target_platform.os
72+
arch = target_platform.arch
73+
env = env | {
74+
"os_name": _os_name_values.get(os, ""),
75+
"platform_machine": "aarch64" if (os, arch) == ("linux", "aarch64") else _platform_machine_values.get(arch, ""),
76+
"platform_system": _platform_system_values.get(os, ""),
77+
"sys_platform": _sys_platform_values.get(os, ""),
78+
}
79+
80+
# This is split by topic
81+
return env
82+
83+
def _platform(*, abi = None, os = None, arch = None):
84+
return struct(
85+
abi = abi,
86+
os = os,
87+
arch = arch,
88+
)
89+
90+
def platform_from_str(p, python_version):
91+
"""Return a platform from a string.
92+
93+
Args:
94+
p: {type}`str` the actual string.
95+
python_version: {type}`str` the python version to add to platform if needed.
96+
97+
Returns:
98+
A struct that is returned by the `_platform` function.
99+
"""
100+
if p.startswith("cp"):
101+
abi, _, p = p.partition("_")
102+
elif python_version:
103+
major, _, tail = python_version.partition(".")
104+
abi = "cp{}{}".format(major, tail)
105+
else:
106+
abi = None
107+
108+
os, _, arch = p.partition("_")
109+
return _platform(abi = abi, os = os or None, arch = arch or None)

0 commit comments

Comments
 (0)