Skip to content

Commit 4190dab

Browse files
committed
chore: refactor provenance l3 check
Signed-off-by: Ben Selwyn-Smith <[email protected]>
1 parent 44d2e2c commit 4190dab

File tree

14 files changed

+470
-612
lines changed

14 files changed

+470
-612
lines changed

src/macaron/provenance/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""This package contains the provenance tools for software components."""

src/macaron/repo_finder/provenance_extractor.py renamed to src/macaron/provenance/provenance_extractor.py

+2-28
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,10 @@
66
import urllib.parse
77

88
from packageurl import PackageURL
9-
from pydriller import Git
109

1110
from macaron.errors import ProvenanceError
1211
from macaron.json_tools import JsonType, json_extract
13-
from macaron.repo_finder.commit_finder import (
14-
AbstractPurlType,
15-
determine_abstract_purl_type,
16-
extract_commit_from_version,
17-
)
12+
from macaron.repo_finder.commit_finder import AbstractPurlType, determine_abstract_purl_type
1813
from macaron.repo_finder.repo_finder import to_domain_from_known_purl_types
1914
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, InTotoV1Payload, InTotoV01Payload
2015

@@ -254,7 +249,7 @@ def check_if_input_repo_provenance_conflict(
254249
repo_path_input: str | None
255250
The repo URL from input.
256251
provenance_repo_url: str | None
257-
The repo URL from provenance.
252+
The repo url from provenance.
258253
259254
Returns
260255
-------
@@ -275,27 +270,18 @@ def check_if_input_repo_provenance_conflict(
275270

276271

277272
def check_if_input_purl_provenance_conflict(
278-
git_obj: Git,
279273
repo_path_input: bool,
280-
digest_input: bool,
281274
provenance_repo_url: str | None,
282-
provenance_commit_digest: str | None,
283275
purl: PackageURL,
284276
) -> bool:
285277
"""Test if the input repository type PURL's repo and commit match the contents of the provenance.
286278
287279
Parameters
288280
----------
289-
git_obj: Git
290-
The Git object.
291281
repo_path_input: bool
292282
True if there is a repo as input.
293-
digest_input: str
294-
True if there is a commit as input.
295283
provenance_repo_url: str | None
296284
The repo url from provenance.
297-
provenance_commit_digest: str | None
298-
The commit digest from provenance.
299285
purl: PackageURL
300286
The input repository PURL.
301287
@@ -318,18 +304,6 @@ def check_if_input_purl_provenance_conflict(
318304
)
319305
return True
320306

321-
# Check the PURL commit against the provenance.
322-
if not digest_input and provenance_commit_digest and purl.version:
323-
purl_commit = extract_commit_from_version(git_obj, purl.version)
324-
if purl_commit and purl_commit != provenance_commit_digest:
325-
logger.debug(
326-
"The commit digest passed via purl input does not match what exists in the "
327-
"provenance. Purl Commit: %s, Provenance Commit: %s.",
328-
purl_commit,
329-
provenance_commit_digest,
330-
)
331-
return True
332-
333307
return False
334308

335309

src/macaron/repo_finder/provenance_finder.py renamed to src/macaron/provenance/provenance_finder.py

+42-139
Original file line numberDiff line numberDiff line change
@@ -107,38 +107,6 @@ def _find_provenance(self, discovery_functions: list[partial[list[InTotoPayload]
107107
logger.debug("No provenance found.")
108108
return []
109109

110-
def verify_provenance(self, purl: PackageURL, provenance: list[InTotoPayload]) -> bool:
111-
"""Verify the passed provenance.
112-
113-
Parameters
114-
----------
115-
purl: PackageURL
116-
The PURL of the analysis target.
117-
provenance: list[InTotoPayload]
118-
The list of provenance.
119-
120-
Returns
121-
-------
122-
bool
123-
True if the provenance could be verified, or False otherwise.
124-
"""
125-
if determine_abstract_purl_type(purl) == AbstractPurlType.REPOSITORY:
126-
# Do not perform default verification for repository type targets.
127-
return False
128-
129-
verification_function = None
130-
131-
if purl.type == "npm":
132-
verification_function = partial(verify_npm_provenance, purl, provenance)
133-
134-
# TODO other verification functions go here.
135-
136-
if verification_function:
137-
return verification_function()
138-
139-
logger.debug("Provenance verification not supported for PURL type: %s", purl.type)
140-
return False
141-
142110

143111
def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[InTotoPayload]:
144112
"""Find and download the NPM based provenance for the passed PURL.
@@ -213,72 +181,6 @@ def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[InTotoP
213181
return []
214182

215183

216-
def verify_npm_provenance(purl: PackageURL, provenance: list[InTotoPayload]) -> bool:
217-
"""Compare the unsigned payload subject digest with the signed payload digest, if available.
218-
219-
Parameters
220-
----------
221-
purl: PackageURL
222-
The PURL of the analysis target.
223-
provenance: list[InTotoPayload]
224-
The provenances to verify.
225-
226-
Returns
227-
-------
228-
bool
229-
True if the provenance was verified, or False otherwise.
230-
"""
231-
if len(provenance) != 2:
232-
logger.debug("Expected unsigned and signed provenance.")
233-
return False
234-
235-
signed_subjects = provenance[1].statement.get("subject")
236-
if not signed_subjects:
237-
return False
238-
239-
unsigned_subjects = provenance[0].statement.get("subject")
240-
if not unsigned_subjects:
241-
return False
242-
243-
found_signed_subject = None
244-
for signed_subject in signed_subjects:
245-
name = signed_subject.get("name")
246-
if name and name == str(purl):
247-
found_signed_subject = signed_subject
248-
break
249-
250-
if not found_signed_subject:
251-
return False
252-
253-
found_unsigned_subject = None
254-
for unsigned_subject in unsigned_subjects:
255-
name = unsigned_subject.get("name")
256-
if name and name == str(purl):
257-
found_unsigned_subject = unsigned_subject
258-
break
259-
260-
if not found_unsigned_subject:
261-
return False
262-
263-
signed_digest = found_signed_subject.get("digest")
264-
unsigned_digest = found_unsigned_subject.get("digest")
265-
if not (signed_digest and unsigned_digest):
266-
return False
267-
268-
# For signed and unsigned to match, the digests must be identical.
269-
if signed_digest != unsigned_digest:
270-
return False
271-
272-
key = list(signed_digest.keys())[0]
273-
logger.debug(
274-
"Verified provenance against signed companion. Signed: %s, Unsigned: %s.",
275-
signed_digest[key][:7],
276-
unsigned_digest[key][:7],
277-
)
278-
279-
return True
280-
281-
282184
def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[InTotoPayload]:
283185
"""Find and download the GAV based provenance for the passed PURL.
284186
@@ -377,7 +279,9 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[
377279
return provenances[:1]
378280

379281

380-
def find_provenance_from_ci(analyze_ctx: AnalyzeContext, git_obj: Git | None) -> InTotoPayload | None:
282+
def find_provenance_from_ci(
283+
analyze_ctx: AnalyzeContext, git_obj: Git | None, download_path: str
284+
) -> InTotoPayload | None:
381285
"""Try to find provenance from CI services of the repository.
382286
383287
Note that we stop going through the CI services once we encounter a CI service
@@ -389,9 +293,11 @@ def find_provenance_from_ci(analyze_ctx: AnalyzeContext, git_obj: Git | None) ->
389293
Parameters
390294
----------
391295
analyze_ctx: AnalyzeContext
392-
The contenxt of the ongoing analysis.
296+
The context of the ongoing analysis.
393297
git_obj: Git | None
394298
The Pydriller Git object representing the repository, if any.
299+
download_path: str
300+
The pre-existing location to download discovered files to.
395301
396302
Returns
397303
-------
@@ -468,66 +374,63 @@ def find_provenance_from_ci(analyze_ctx: AnalyzeContext, git_obj: Git | None) ->
468374
ci_info["provenance_assets"].extend(provenance_assets)
469375

470376
# Download the provenance assets and load the provenance payloads.
471-
download_provenances_from_github_actions_ci_service(
472-
ci_info,
473-
)
377+
download_provenances_from_ci_service(ci_info, download_path)
474378

475379
# TODO consider how to handle multiple payloads here.
476380
return ci_info["provenances"][0].payload if ci_info["provenances"] else None
477381

478382
return None
479383

480384

481-
def download_provenances_from_github_actions_ci_service(ci_info: CIInfo) -> None:
385+
def download_provenances_from_ci_service(ci_info: CIInfo, download_path: str) -> None:
482386
"""Download provenances from GitHub Actions.
483387
484388
Parameters
485389
----------
486390
ci_info: CIInfo,
487391
A ``CIInfo`` instance that holds a GitHub Actions git service object.
392+
download_path: str
393+
The pre-existing location to download discovered files to.
488394
"""
489395
ci_service = ci_info["service"]
490396
prov_assets = ci_info["provenance_assets"]
491-
397+
if not os.path.isdir(download_path):
398+
logger.debug("Download location is not a valid directory.")
399+
return
492400
try:
493-
with tempfile.TemporaryDirectory() as temp_path:
494-
downloaded_provs = []
495-
for prov_asset in prov_assets:
496-
# Check the size before downloading.
497-
if prov_asset.size_in_bytes > defaults.getint(
498-
"slsa.verifier",
499-
"max_download_size",
500-
fallback=1000000,
501-
):
502-
logger.info(
503-
"Skip verifying the provenance %s: asset size too large.",
504-
prov_asset.name,
505-
)
506-
continue
401+
downloaded_provs = []
402+
for prov_asset in prov_assets:
403+
# Check the size before downloading.
404+
if prov_asset.size_in_bytes > defaults.getint("slsa.verifier", "max_download_size", fallback=1000000):
405+
logger.info(
406+
"Skip verifying the provenance %s: asset size too large.",
407+
prov_asset.name,
408+
)
409+
continue
507410

508-
provenance_filepath = os.path.join(temp_path, prov_asset.name)
411+
provenance_filepath = os.path.join(download_path, prov_asset.name)
509412

510-
if not ci_service.api_client.download_asset(
511-
prov_asset.url,
512-
provenance_filepath,
513-
):
514-
logger.debug(
515-
"Could not download the provenance %s. Skip verifying...",
516-
prov_asset.name,
517-
)
518-
continue
413+
if not ci_service.api_client.download_asset(
414+
prov_asset.url,
415+
provenance_filepath,
416+
):
417+
logger.debug(
418+
"Could not download the provenance %s. Skip verifying...",
419+
prov_asset.name,
420+
)
421+
continue
519422

520-
# Read the provenance.
521-
try:
522-
payload = load_provenance_payload(provenance_filepath)
523-
except LoadIntotoAttestationError as error:
524-
logger.error("Error logging provenance: %s", error)
525-
continue
423+
# Read the provenance.
424+
try:
425+
payload = load_provenance_payload(provenance_filepath)
426+
except LoadIntotoAttestationError as error:
427+
logger.error("Error logging provenance: %s", error)
428+
continue
526429

527-
# Add the provenance file.
528-
downloaded_provs.append(SLSAProvenanceData(payload=payload, asset=prov_asset))
430+
# Add the provenance file.
431+
downloaded_provs.append(SLSAProvenanceData(payload=payload, asset=prov_asset))
529432

530-
# Persist the provenance payloads into the CIInfo object.
531-
ci_info["provenances"] = downloaded_provs
433+
# Persist the provenance payloads into the CIInfo object.
434+
ci_info["provenances"] = downloaded_provs
532435
except OSError as error:
533436
logger.error("Error while storing provenance in the temporary directory: %s", error)

0 commit comments

Comments
 (0)