Skip to content

Commit b4b0424

Browse files
facutuescawoodruffwdi
authored
Verify URLs that link to the project page on PyPI (#16485)
* Verify URLs that link to the project page on PyPI * fix test name * use single helper function to verify urls * Update warehouse/forklift/legacy.py Co-authored-by: William Woodruff <[email protected]> * lint * fix missing coverage --------- Co-authored-by: William Woodruff <[email protected]> Co-authored-by: Dustin Ingram <[email protected]>
1 parent 42a6ad1 commit b4b0424

File tree

2 files changed

+185
-10
lines changed

2 files changed

+185
-10
lines changed

tests/unit/forklift/test_legacy.py

+138-5
Original file line numberDiff line numberDiff line change
@@ -4864,12 +4864,145 @@ def test_missing_trailing_slash_redirect(pyramid_request):
48644864
"https://github.com",
48654865
False,
48664866
),
4867-
( # Publisher URL is None
4868-
"https://github.com/owner/project",
4869-
None,
4867+
],
4868+
)
4869+
def test_verify_url_with_trusted_publisher(url, publisher_url, expected):
4870+
assert legacy._verify_url_with_trusted_publisher(url, publisher_url) == expected
4871+
4872+
4873+
@pytest.mark.parametrize(
4874+
("url", "project_name", "project_normalized_name", "expected"),
4875+
[
4876+
( # PyPI /project/ case
4877+
"https://pypi.org/project/myproject",
4878+
"myproject",
4879+
"myproject",
4880+
True,
4881+
),
4882+
( # PyPI /p/ case
4883+
"https://pypi.org/p/myproject",
4884+
"myproject",
4885+
"myproject",
4886+
True,
4887+
),
4888+
( # pypi.python.org /project/ case
4889+
"https://pypi.python.org/project/myproject",
4890+
"myproject",
4891+
"myproject",
4892+
True,
4893+
),
4894+
( # pypi.python.org /p/ case
4895+
"https://pypi.python.org/p/myproject",
4896+
"myproject",
4897+
"myproject",
4898+
True,
4899+
),
4900+
( # python.org/pypi/ case
4901+
"https://python.org/pypi/myproject",
4902+
"myproject",
4903+
"myproject",
4904+
True,
4905+
),
4906+
( # PyPI /project/ case
4907+
"https://pypi.org/project/myproject",
4908+
"myproject",
4909+
"myproject",
4910+
True,
4911+
),
4912+
( # Normalized name differs from URL
4913+
"https://pypi.org/project/my_project",
4914+
"my_project",
4915+
"my-project",
4916+
True,
4917+
),
4918+
( # Normalized name same as URL
4919+
"https://pypi.org/project/my-project",
4920+
"my_project",
4921+
"my-project",
4922+
True,
4923+
),
4924+
( # Trailing slash
4925+
"https://pypi.org/project/myproject/",
4926+
"myproject",
4927+
"myproject",
4928+
True,
4929+
),
4930+
( # Domains are case insensitive
4931+
"https://PyPI.org/project/myproject",
4932+
"myproject",
4933+
"myproject",
4934+
True,
4935+
),
4936+
( # Paths are case-sensitive
4937+
"https://pypi.org/Project/myproject",
4938+
"myproject",
4939+
"myproject",
4940+
False,
4941+
),
4942+
( # Wrong domain
4943+
"https://example.com/project/myproject",
4944+
"myproject",
4945+
"myproject",
4946+
False,
4947+
),
4948+
( # Wrong path
4949+
"https://pypi.org/something/myproject",
4950+
"myproject",
4951+
"myproject",
4952+
False,
4953+
),
4954+
( # Path has extra components
4955+
"https://pypi.org/something/myproject/something",
4956+
"myproject",
4957+
"myproject",
4958+
False,
4959+
),
4960+
( # Wrong package name
4961+
"https://pypi.org/project/otherproject",
4962+
"myproject",
4963+
"myproject",
4964+
False,
4965+
),
4966+
( # Similar package name
4967+
"https://pypi.org/project/myproject",
4968+
"myproject2",
4969+
"myproject2",
4970+
False,
4971+
),
4972+
( # Similar package name
4973+
"https://pypi.org/project/myproject2",
4974+
"myproject",
4975+
"myproject",
48704976
False,
48714977
),
48724978
],
48734979
)
4874-
def test_verify_url(url, publisher_url, expected):
4875-
assert legacy._verify_url(url, publisher_url) == expected
4980+
def test_verify_url_pypi(url, project_name, project_normalized_name, expected):
4981+
assert (
4982+
legacy._verify_url_pypi(url, project_name, project_normalized_name) == expected
4983+
)
4984+
4985+
4986+
def test_verify_url():
4987+
# `_verify_url` is just a helper function that calls `_verify_url_pypi` and
4988+
# `_verify_url_with_trusted_publisher`, where the actual verification logic lives.
4989+
assert legacy._verify_url(
4990+
url="https://pypi.org/project/myproject/",
4991+
publisher_url=None,
4992+
project_name="myproject",
4993+
project_normalized_name="myproject",
4994+
)
4995+
4996+
assert legacy._verify_url(
4997+
url="https://github.com/org/myproject/issues",
4998+
publisher_url="https://github.com/org/myproject",
4999+
project_name="myproject",
5000+
project_normalized_name="myproject",
5001+
)
5002+
5003+
assert not legacy._verify_url(
5004+
url="example.com",
5005+
publisher_url="https://github.com/or/myproject",
5006+
project_name="myproject",
5007+
project_normalized_name="myproject",
5008+
)

warehouse/forklift/legacy.py

+47-5
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,31 @@ def _process_attestations(request, distribution: Distribution):
457457
metrics.increment("warehouse.upload.attestations.ok")
458458

459459

460-
def _verify_url(url: str, publisher_url: str | None) -> bool:
460+
_pypi_project_urls = [
461+
"https://pypi.org/project/",
462+
"https://pypi.org/p/",
463+
"https://pypi.python.org/project/",
464+
"https://pypi.python.org/p/",
465+
"https://python.org/pypi/",
466+
]
467+
468+
469+
def _verify_url_pypi(url: str, project_name: str, project_normalized_name: str) -> bool:
470+
candidate_urls = (
471+
f"{pypi_project_url}{name}{optional_slash}"
472+
for pypi_project_url in _pypi_project_urls
473+
for name in {project_name, project_normalized_name}
474+
for optional_slash in ["/", ""]
475+
)
476+
477+
user_uri = rfc3986.api.uri_reference(url).normalize()
478+
return any(
479+
user_uri == rfc3986.api.uri_reference(candidate_url).normalize()
480+
for candidate_url in candidate_urls
481+
)
482+
483+
484+
def _verify_url_with_trusted_publisher(url: str, publisher_url: str) -> bool:
461485
"""
462486
Verify a given URL against a Trusted Publisher URL
463487
@@ -473,9 +497,6 @@ def _verify_url(url: str, publisher_url: str | None) -> bool:
473497
the authority includes the host, and in practice neither URL should have user
474498
nor port information.
475499
"""
476-
if not publisher_url:
477-
return False
478-
479500
publisher_uri = rfc3986.api.uri_reference(publisher_url).normalize()
480501
user_uri = rfc3986.api.uri_reference(url).normalize()
481502
if publisher_uri.path is None:
@@ -496,6 +517,22 @@ def _verify_url(url: str, publisher_url: str | None) -> bool:
496517
)
497518

498519

520+
def _verify_url(
521+
url: str, publisher_url: str | None, project_name: str, project_normalized_name: str
522+
) -> bool:
523+
if _verify_url_pypi(
524+
url=url,
525+
project_name=project_name,
526+
project_normalized_name=project_normalized_name,
527+
):
528+
return True
529+
530+
if not publisher_url:
531+
return False
532+
533+
return _verify_url_with_trusted_publisher(url=url, publisher_url=publisher_url)
534+
535+
499536
def _sort_releases(request: Request, project: Project):
500537
releases = (
501538
request.db.query(Release)
@@ -866,7 +903,12 @@ def file_upload(request):
866903
else {
867904
name: {
868905
"url": url,
869-
"verified": _verify_url(url=url, publisher_url=publisher_base_url),
906+
"verified": _verify_url(
907+
url=url,
908+
publisher_url=publisher_base_url,
909+
project_name=project.name,
910+
project_normalized_name=project.normalized_name,
911+
),
870912
}
871913
for name, url in meta.project_urls.items()
872914
}

0 commit comments

Comments
 (0)