Skip to content

Commit 1e0357d

Browse files
facutuescawoodruffwdi
authored
Verify Home-Page and Download-URL metadata URLs (#16568)
* Verify Home-Page and Download-URL metadata URLs * Update warehouse/forklift/legacy.py Co-authored-by: Dustin Ingram <[email protected]> --------- Co-authored-by: William Woodruff <[email protected]> Co-authored-by: Dustin Ingram <[email protected]>
1 parent be8e7fb commit 1e0357d

File tree

4 files changed

+189
-28
lines changed

4 files changed

+189
-28
lines changed

tests/unit/forklift/test_legacy.py

+96-4
Original file line numberDiff line numberDiff line change
@@ -3898,26 +3898,110 @@ def test_new_release_url_verified(
38983898
assert release_db.urls_by_verify_status(verified=expected) == {"Test": url}
38993899
assert not release_db.urls_by_verify_status(verified=not expected)
39003900

3901+
@pytest.mark.parametrize(
3902+
("url", "expected"),
3903+
[
3904+
("https://google.com", False), # Totally different
3905+
("https://github.com/foo", False), # Missing parts
3906+
("https://github.com/foo/bar/", True), # Exactly the same
3907+
("https://github.com/foo/bar/readme.md", True), # Additional parts
3908+
("https://github.com/foo/bar", True), # Missing trailing slash
3909+
],
3910+
)
3911+
def test_new_release_homepage_download_urls_verified(
3912+
self, monkeypatch, pyramid_config, db_request, metrics, url, expected
3913+
):
3914+
project = ProjectFactory.create()
3915+
publisher = GitHubPublisherFactory.create(projects=[project])
3916+
publisher.repository_owner = "foo"
3917+
publisher.repository_name = "bar"
3918+
claims = {"sha": "somesha"}
3919+
identity = PublisherTokenContext(publisher, SignedClaims(claims))
3920+
db_request.oidc_publisher = identity.publisher
3921+
db_request.oidc_claims = identity.claims
3922+
3923+
db_request.db.add(Classifier(classifier="Environment :: Other Environment"))
3924+
db_request.db.add(Classifier(classifier="Programming Language :: Python"))
3925+
3926+
filename = "{}-{}.tar.gz".format(project.name, "1.0")
3927+
3928+
pyramid_config.testing_securitypolicy(identity=identity)
3929+
db_request.user_agent = "warehouse-tests/6.6.6"
3930+
db_request.POST = MultiDict(
3931+
{
3932+
"metadata_version": "1.2",
3933+
"name": project.name,
3934+
"version": "1.0",
3935+
"summary": "This is my summary!",
3936+
"filetype": "sdist",
3937+
"md5_digest": _TAR_GZ_PKG_MD5,
3938+
"content": pretend.stub(
3939+
filename=filename,
3940+
file=io.BytesIO(_TAR_GZ_PKG_TESTDATA),
3941+
type="application/tar",
3942+
),
3943+
}
3944+
)
3945+
db_request.POST.extend(
3946+
[
3947+
("classifiers", "Environment :: Other Environment"),
3948+
("classifiers", "Programming Language :: Python"),
3949+
("requires_dist", "foo"),
3950+
("requires_dist", "bar (>1.0)"),
3951+
("home_page", url),
3952+
("download_url", url),
3953+
("requires_external", "Cheese (>1.0)"),
3954+
("provides", "testing"),
3955+
]
3956+
)
3957+
3958+
storage_service = pretend.stub(store=lambda path, filepath, meta: None)
3959+
db_request.find_service = lambda svc, name=None, context=None: {
3960+
IFileStorage: storage_service,
3961+
IMetricsService: metrics,
3962+
}.get(svc)
3963+
3964+
legacy.file_upload(db_request)
3965+
release_db = (
3966+
db_request.db.query(Release).filter(Release.project == project).one()
3967+
)
3968+
assert release_db.urls_by_verify_status(verified=expected) == {
3969+
"Homepage": url,
3970+
"Download": url,
3971+
}
3972+
assert not release_db.urls_by_verify_status(verified=not expected)
3973+
3974+
@pytest.mark.parametrize(
3975+
("home_page_verified", "download_url_verified"),
3976+
[(False, False), (False, True), (True, False), (True, True)],
3977+
)
39013978
def test_new_publisher_verifies_existing_release_url(
39023979
self,
39033980
monkeypatch,
39043981
pyramid_config,
39053982
db_request,
39063983
metrics,
3984+
home_page_verified,
3985+
download_url_verified,
39073986
):
39083987
repo_name = "my_new_repo"
39093988
verified_url = "https://github.com/foo/bar"
39103989
unverified_url = f"https://github.com/foo/{repo_name}"
39113990

39123991
project = ProjectFactory.create()
39133992
release = ReleaseFactory.create(project=project, version="1.0")
3914-
# We start with an existing release, with one verified URL and one unverified
3915-
# URL. Uploading a new file with a Trusted Publisher that matches the unverified
3916-
# URL should mark it as verified.
3993+
# We start with an existing release, with one verified URL and some unverified
3994+
# URLs. Uploading a new file with a Trusted Publisher that matches the
3995+
# unverified URLs should mark them as verified.
39173996
release.project_urls = {
39183997
"verified_url": {"url": verified_url, "verified": True},
39193998
"unverified_url": {"url": unverified_url, "verified": False},
39203999
}
4000+
release.home_page = verified_url if home_page_verified else unverified_url
4001+
release.home_page_verified = home_page_verified
4002+
release.download_url = verified_url if download_url_verified else unverified_url
4003+
release.download_url_verified = download_url_verified
4004+
39214005
publisher = GitHubPublisherFactory.create(projects=[project])
39224006
publisher.repository_owner = "foo"
39234007
publisher.repository_name = repo_name
@@ -3956,6 +4040,11 @@ def test_new_publisher_verifies_existing_release_url(
39564040
("requires_dist", "bar (>1.0)"),
39574041
("requires_external", "Cheese (>1.0)"),
39584042
("provides", "testing"),
4043+
("home_page", verified_url if home_page_verified else unverified_url),
4044+
(
4045+
"download_url",
4046+
verified_url if download_url_verified else unverified_url,
4047+
),
39594048
]
39604049
)
39614050
db_request.POST.add("project_urls", f"verified_url, {verified_url}")
@@ -3969,13 +4058,16 @@ def test_new_publisher_verifies_existing_release_url(
39694058

39704059
legacy.file_upload(db_request)
39714060

3972-
# After successful upload, the Release should have now both URLs verified
4061+
# After successful upload, the Release should have now all URLs verified
39734062
release_db = (
39744063
db_request.db.query(Release).filter(Release.project == project).one()
39754064
)
4065+
39764066
assert release_db.urls_by_verify_status(verified=True) == {
39774067
"unverified_url": unverified_url,
39784068
"verified_url": verified_url,
4069+
"Homepage": release.home_page,
4070+
"Download": release.download_url,
39794071
}
39804072
assert not release_db.urls_by_verify_status(verified=False)
39814073

warehouse/forklift/legacy.py

+33-2
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,30 @@ def file_upload(request):
879879
for name, url in meta.project_urls.items()
880880
}
881881
)
882+
home_page = meta.home_page
883+
home_page_verified = (
884+
False
885+
if home_page is None
886+
else _verify_url(
887+
url=home_page,
888+
publisher=request.oidc_publisher,
889+
project_name=project.name,
890+
project_normalized_name=project.normalized_name,
891+
)
892+
)
893+
894+
download_url = meta.download_url
895+
download_url_verified = (
896+
False
897+
if download_url is None
898+
else _verify_url(
899+
url=download_url,
900+
publisher=request.oidc_publisher,
901+
project_name=project.name,
902+
project_normalized_name=project.normalized_name,
903+
)
904+
)
905+
882906
try:
883907
is_new_release = False
884908
canonical_version = packaging.utils.canonicalize_version(meta.version)
@@ -934,6 +958,10 @@ def file_upload(request):
934958
html=rendered or "",
935959
rendered_by=readme.renderer_version(),
936960
),
961+
home_page=home_page,
962+
home_page_verified=home_page_verified,
963+
download_url=download_url,
964+
download_url_verified=download_url_verified,
937965
project_urls=project_urls,
938966
# TODO: Fix this, we currently treat platform as if it is a single
939967
# use field, but in reality it is a multi-use field, which the
@@ -964,8 +992,6 @@ def file_upload(request):
964992
"author_email",
965993
"maintainer",
966994
"maintainer_email",
967-
"home_page",
968-
"download_url",
969995
"provides_extra",
970996
}
971997
},
@@ -1384,6 +1410,11 @@ def file_upload(request):
13841410
):
13851411
release_url.verified = True
13861412

1413+
if home_page_verified and not release.home_page_verified:
1414+
release.home_page_verified = True
1415+
if download_url_verified and not release.download_url_verified:
1416+
release.download_url_verified = True
1417+
13871418
request.db.flush() # flush db now so server default values are populated for celery
13881419

13891420
# Push updates to BigQuery
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
12+
"""
13+
Add columns to Release for verified URL status
14+
15+
Revision ID: 606abd3b8e7f
16+
Revises: 7ca0f1f5e7b3
17+
Create Date: 2024-08-26 08:31:32.734838
18+
"""
19+
20+
import sqlalchemy as sa
21+
22+
from alembic import op
23+
24+
revision = "606abd3b8e7f"
25+
down_revision = "7ca0f1f5e7b3"
26+
27+
28+
def upgrade():
29+
op.add_column(
30+
"releases",
31+
sa.Column(
32+
"home_page_verified",
33+
sa.Boolean(),
34+
server_default=sa.text("false"),
35+
nullable=False,
36+
),
37+
)
38+
op.add_column(
39+
"releases",
40+
sa.Column(
41+
"download_url_verified",
42+
sa.Boolean(),
43+
server_default=sa.text("false"),
44+
nullable=False,
45+
),
46+
)
47+
48+
49+
def downgrade():
50+
op.drop_column("releases", "download_url_verified")
51+
op.drop_column("releases", "home_page_verified")

warehouse/packaging/models.py

+9-22
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,7 @@ def __table_args__(cls): # noqa
559559
maintainer: Mapped[str | None]
560560
maintainer_email: Mapped[str | None]
561561
home_page: Mapped[str | None]
562+
home_page_verified: Mapped[bool_false]
562563
license: Mapped[str | None]
563564
summary: Mapped[str | None]
564565
keywords: Mapped[str | None]
@@ -571,6 +572,7 @@ def __table_args__(cls): # noqa
571572
)
572573
platform: Mapped[str | None]
573574
download_url: Mapped[str | None]
575+
download_url_verified: Mapped[bool_false]
574576
_pypi_ordering: Mapped[int | None]
575577
requires_python: Mapped[str | None] = mapped_column(Text)
576578
created: Mapped[datetime_now] = mapped_column()
@@ -690,30 +692,15 @@ def urls(self):
690692
return _urls
691693

692694
def urls_by_verify_status(self, *, verified: bool):
693-
verified_urls = {
695+
matching_urls = {
694696
release_url.url
695-
for release_url in self._project_urls.values() # type: ignore[attr-defined]
696-
if release_url.verified
697+
for release_url in self._project_urls.values() # type: ignore[attr-defined] # noqa: E501
698+
if release_url.verified == verified
697699
}
698-
699-
if verified:
700-
matching_urls = verified_urls
701-
else:
702-
matching_urls = {
703-
release_url.url
704-
for release_url in self._project_urls.values() # type: ignore[attr-defined] # noqa: E501
705-
if not release_url.verified
706-
}
707-
# The Homepage and Download URLs in the release metadata are currently not
708-
# verified, so we return them if the user requests non-verified URLs *and*
709-
# they are not verified in `project_urls`.
710-
matching_urls.update(
711-
{
712-
url
713-
for url in (self.home_page, self.download_url)
714-
if url is not None and url not in verified_urls
715-
}
716-
)
700+
if self.home_page and self.home_page_verified == verified:
701+
matching_urls.add(self.home_page)
702+
if self.download_url and self.download_url_verified == verified:
703+
matching_urls.add(self.download_url)
717704

718705
# Filter the output of `Release.urls`, since it has custom logic to de-duplicate
719706
# release URLs

0 commit comments

Comments
 (0)