Skip to content

Commit bf8e9ed

Browse files
committed
feat: add GitHub attestation discovery
Signed-off-by: Ben Selwyn-Smith <[email protected]>
1 parent 0a81f5e commit bf8e9ed

File tree

9 files changed

+328
-17
lines changed

9 files changed

+328
-17
lines changed

src/macaron/artifact/local_artifact.py

+56-1
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,21 @@
1-
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module declares types and utilities for handling local artifacts."""
55

66
import fnmatch
77
import glob
8+
import hashlib
9+
import logging
810
import os
911

1012
from packageurl import PackageURL
1113

1214
from macaron.artifact.maven import construct_maven_repository_path
1315
from macaron.errors import LocalArtifactFinderError
16+
from macaron.slsa_analyzer.package_registry import MavenCentralRegistry
17+
18+
logger: logging.Logger = logging.getLogger(__name__)
1419

1520

1621
def construct_local_artifact_dirs_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None:
@@ -247,3 +252,53 @@ def get_local_artifact_dirs(
247252
)
248253

249254
raise LocalArtifactFinderError(f"Unsupported PURL type {purl_type}")
255+
256+
257+
def get_local_artifact_hash(purl: PackageURL, artifact_dirs: list[str], hash_algorithm_name: str) -> str | None:
258+
"""Compute the hash of the local artifact.
259+
260+
Parameters
261+
----------
262+
purl: PackageURL
263+
The PURL of the artifact being sought.
264+
artifact_dirs: list[str]
265+
The possible locations of the artifact.
266+
hash_algorithm_name: str
267+
The hash algorithm to use.
268+
269+
Returns
270+
-------
271+
str | None
272+
The hash, or None if not found.
273+
"""
274+
if not artifact_dirs:
275+
logger.debug("No artifact directories provided.")
276+
return None
277+
278+
if not purl.version:
279+
logger.debug("PURL is missing version.")
280+
return None
281+
282+
artifact_target = None
283+
if purl.type == "maven":
284+
artifact_target = MavenCentralRegistry.get_artifact_file_name(purl)
285+
286+
if not artifact_target:
287+
logger.debug("PURL type not supported: %s", purl.type)
288+
return None
289+
290+
for artifact_dir in artifact_dirs:
291+
full_path = os.path.join(artifact_dir, artifact_target)
292+
if not os.path.exists(full_path):
293+
continue
294+
295+
with open(full_path, "rb") as file:
296+
try:
297+
hash_result = hashlib.file_digest(file, hash_algorithm_name)
298+
except ValueError as error:
299+
logger.debug("Error while hashing file: %s", error)
300+
continue
301+
302+
return hash_result.hexdigest()
303+
304+
return None

src/macaron/slsa_analyzer/analyzer.py

+104-13
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,14 @@
44
"""This module handles the cloning and analyzing a Git repo."""
55

66
import glob
7+
import hashlib
8+
import json
79
import logging
810
import os
911
import re
1012
import sys
1113
import tempfile
14+
import urllib.parse
1215
from collections.abc import Mapping
1316
from datetime import datetime, timezone
1417
from pathlib import Path
@@ -20,7 +23,10 @@
2023
from sqlalchemy.orm import Session
2124

2225
from macaron import __version__
23-
from macaron.artifact.local_artifact import get_local_artifact_dirs
26+
from macaron.artifact.local_artifact import (
27+
get_local_artifact_dirs,
28+
get_local_artifact_hash,
29+
)
2430
from macaron.config.global_config import global_config
2531
from macaron.config.target_config import Configuration
2632
from macaron.database.database_manager import DatabaseManager, get_db_manager, get_db_session
@@ -41,6 +47,7 @@
4147
ProvenanceError,
4248
PURLNotFoundError,
4349
)
50+
from macaron.json_tools import json_extract
4451
from macaron.output_reporter.reporter import FileReporter
4552
from macaron.output_reporter.results import Record, Report, SCMStatus
4653
from macaron.provenance import provenance_verifier
@@ -66,12 +73,14 @@
6673
from macaron.slsa_analyzer.checks import * # pylint: disable=wildcard-import,unused-wildcard-import # noqa: F401,F403
6774
from macaron.slsa_analyzer.ci_service import CI_SERVICES
6875
from macaron.slsa_analyzer.database_store import store_analyze_context_to_db
69-
from macaron.slsa_analyzer.git_service import GIT_SERVICES, BaseGitService
76+
from macaron.slsa_analyzer.git_service import GIT_SERVICES, BaseGitService, GitHub
7077
from macaron.slsa_analyzer.git_service.base_git_service import NoneGitService
7178
from macaron.slsa_analyzer.git_url import GIT_REPOS_DIR
72-
from macaron.slsa_analyzer.package_registry import PACKAGE_REGISTRIES
79+
from macaron.slsa_analyzer.package_registry import PACKAGE_REGISTRIES, MavenCentralRegistry
7380
from macaron.slsa_analyzer.provenance.expectations.expectation_registry import ExpectationRegistry
7481
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, InTotoV01Payload
82+
from macaron.slsa_analyzer.provenance.intoto.errors import LoadIntotoAttestationError
83+
from macaron.slsa_analyzer.provenance.loader import load_provenance_payload
7584
from macaron.slsa_analyzer.provenance.slsa import SLSAProvenanceData
7685
from macaron.slsa_analyzer.registry import registry
7786
from macaron.slsa_analyzer.specs.ci_spec import CIInfo
@@ -395,6 +404,17 @@ def run_single(
395404
status=SCMStatus.ANALYSIS_FAILED,
396405
)
397406

407+
local_artifact_dirs = None
408+
if parsed_purl and parsed_purl.type in self.local_artifact_repo_mapper:
409+
local_artifact_repo_path = self.local_artifact_repo_mapper[parsed_purl.type]
410+
try:
411+
local_artifact_dirs = get_local_artifact_dirs(
412+
purl=parsed_purl,
413+
local_artifact_repo_path=local_artifact_repo_path,
414+
)
415+
except LocalArtifactFinderError as error:
416+
logger.debug(error)
417+
398418
# Prepare the repo.
399419
git_obj = None
400420
commit_finder_outcome = CommitFinderInfo.NOT_USED
@@ -472,6 +492,37 @@ def run_single(
472492
git_service = self._determine_git_service(analyze_ctx)
473493
self._determine_ci_services(analyze_ctx, git_service)
474494
self._determine_build_tools(analyze_ctx, git_service)
495+
496+
# Try to find an attestation from GitHub, if applicable.
497+
if parsed_purl and not provenance_payload and analysis_target.repo_path and isinstance(git_service, GitHub):
498+
# Try to discover GitHub attestation for the target software component.
499+
url = None
500+
try:
501+
url = urllib.parse.urlparse(analysis_target.repo_path)
502+
except TypeError as error:
503+
logger.debug("Failed to parse repository path as URL: %s", error)
504+
if url and url.hostname == "github.com":
505+
artifact_hash = self.get_artifact_hash(parsed_purl, local_artifact_dirs, hashlib.sha256())
506+
if artifact_hash:
507+
git_attestation_dict = git_service.api_client.get_attestation(
508+
analyze_ctx.component.repository.full_name, artifact_hash
509+
)
510+
if git_attestation_dict:
511+
git_attestation_list = json_extract(git_attestation_dict, ["attestations"], list)
512+
if git_attestation_list:
513+
git_attestation = git_attestation_list[0]
514+
515+
with tempfile.TemporaryDirectory() as temp_dir:
516+
attestation_file = os.path.join(temp_dir, "attestation")
517+
with open(attestation_file, "w", encoding="UTF-8") as file:
518+
json.dump(git_attestation, file)
519+
520+
try:
521+
payload = load_provenance_payload(attestation_file)
522+
provenance_payload = payload
523+
except LoadIntotoAttestationError as error:
524+
logger.debug("Failed to load provenance payload: %s", error)
525+
475526
if parsed_purl is not None:
476527
self._verify_repository_link(parsed_purl, analyze_ctx)
477528
self._determine_package_registries(analyze_ctx)
@@ -533,16 +584,8 @@ def run_single(
533584

534585
analyze_ctx.dynamic_data["validate_malware"] = validate_malware
535586

536-
if parsed_purl and parsed_purl.type in self.local_artifact_repo_mapper:
537-
local_artifact_repo_path = self.local_artifact_repo_mapper[parsed_purl.type]
538-
try:
539-
local_artifact_dirs = get_local_artifact_dirs(
540-
purl=parsed_purl,
541-
local_artifact_repo_path=local_artifact_repo_path,
542-
)
543-
analyze_ctx.dynamic_data["local_artifact_paths"].extend(local_artifact_dirs)
544-
except LocalArtifactFinderError as error:
545-
logger.debug(error)
587+
if local_artifact_dirs:
588+
analyze_ctx.dynamic_data["local_artifact_paths"].extend(local_artifact_dirs)
546589

547590
analyze_ctx.check_results = registry.scan(analyze_ctx)
548591

@@ -926,6 +969,54 @@ def get_analyze_ctx(self, component: Component) -> AnalyzeContext:
926969

927970
return analyze_ctx
928971

972+
def get_artifact_hash(
973+
self, purl: PackageURL, cached_artifacts: list[str] | None, hash_algorithm: Any
974+
) -> str | None:
975+
"""Get the hash of the artifact found from the passed PURL using local or remote files.
976+
977+
Parameters
978+
----------
979+
purl: PackageURL
980+
The PURL of the artifact.
981+
cached_artifacts: list[str] | None
982+
The list of local files that match the PURL.
983+
hash_algorithm: Any
984+
The hash algorithm to use.
985+
986+
Returns
987+
-------
988+
str | None
989+
The hash of the artifact, or None if not found.
990+
"""
991+
if cached_artifacts:
992+
# Try to get the hash from a local file.
993+
artifact_hash = get_local_artifact_hash(purl, cached_artifacts, hash_algorithm.name)
994+
995+
if artifact_hash:
996+
return artifact_hash
997+
998+
# Download the artifact.
999+
if purl.type == "maven":
1000+
maven_registry = next(
1001+
(
1002+
package_registry
1003+
for package_registry in PACKAGE_REGISTRIES
1004+
if isinstance(package_registry, MavenCentralRegistry)
1005+
),
1006+
None,
1007+
)
1008+
if not maven_registry:
1009+
return None
1010+
1011+
return maven_registry.get_artifact_hash(purl, hash_algorithm)
1012+
1013+
if purl.type == "pypi":
1014+
# TODO implement
1015+
return None
1016+
1017+
logger.debug("Purl type '%s' not yet supported for GitHub attestation discovery.", purl.type)
1018+
return None
1019+
9291020
def _determine_git_service(self, analyze_ctx: AnalyzeContext) -> BaseGitService:
9301021
"""Determine the Git service used by the software component."""
9311022
remote_path = analyze_ctx.component.repository.remote_path if analyze_ctx.component.repository else None

src/macaron/slsa_analyzer/git_service/api_client.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""The module provides API clients for VCS services, such as GitHub."""
@@ -659,6 +659,25 @@ def download_asset(self, url: str, download_path: str) -> bool:
659659

660660
return True
661661

662+
def get_attestation(self, full_name: str, artifact_hash: str) -> dict:
663+
"""Download and return the attestation associated with the passed artifact hash, if any.
664+
665+
Parameters
666+
----------
667+
full_name : str
668+
The full name of the repo.
669+
artifact_hash: str
670+
The SHA256 hash of an artifact.
671+
672+
Returns
673+
-------
674+
dict
675+
The attestation data, or an empty dict if not found.
676+
"""
677+
url = f"{GhAPIClient._REPO_END_POINT}/{full_name}/attestations/sha256:{artifact_hash}"
678+
response_data = send_get_http(url, self.headers)
679+
return response_data or {}
680+
662681

663682
def get_default_gh_client(access_token: str) -> GhAPIClient:
664683
"""Return a GhAPIClient instance with default values.

src/macaron/slsa_analyzer/package_registry/maven_central_registry.py

+72
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,13 @@
66
import logging
77
import urllib.parse
88
from datetime import datetime, timezone
9+
from typing import Any
910

1011
import requests
1112
from packageurl import PackageURL
13+
from requests import RequestException
1214

15+
from macaron.artifact.maven import construct_maven_repository_path
1316
from macaron.config.defaults import defaults
1417
from macaron.errors import ConfigurationError, InvalidHTTPResponseError
1518
from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
@@ -262,3 +265,72 @@ def find_publish_timestamp(self, purl: str) -> datetime:
262265
raise InvalidHTTPResponseError(f"The timestamp returned by {url} is invalid") from error
263266

264267
raise InvalidHTTPResponseError(f"Invalid response from Maven central for {url}.")
268+
269+
@staticmethod
270+
def get_artifact_file_name(purl: PackageURL) -> str | None:
271+
"""Return the artifact file name of the passed PURL based on the Maven registry standard.
272+
273+
Parameters
274+
----------
275+
purl: PackageURL
276+
The PURL of the artifact.
277+
278+
Returns
279+
-------
280+
str | None
281+
The artifact file name, or None if invalid.
282+
"""
283+
if not purl.version:
284+
return None
285+
286+
return purl.name + "-" + purl.version + ".jar"
287+
288+
def get_artifact_hash(self, purl: PackageURL, hash_algorithm: Any) -> str | None:
289+
"""Return the hash of the artifact found by the passed purl relevant to the registry's URL.
290+
291+
Parameters
292+
----------
293+
purl: PackageURL
294+
The purl of the artifact.
295+
hash_algorithm: Any
296+
The hash algorithm to use.
297+
298+
Returns
299+
-------
300+
str | None
301+
The hash of the artifact, or None if not found.
302+
"""
303+
if not (purl.namespace and purl.version):
304+
return None
305+
306+
artifact_path = construct_maven_repository_path(purl.namespace, purl.name, purl.version)
307+
file_name = MavenCentralRegistry.get_artifact_file_name(purl)
308+
if not file_name:
309+
return None
310+
311+
artifact_url = self.registry_url + "/" + artifact_path + "/" + file_name
312+
logger.debug("Search for artifact using URL: %s", artifact_url)
313+
314+
try:
315+
response = requests.get(artifact_url, stream=True, timeout=40)
316+
response.raise_for_status()
317+
except requests.exceptions.HTTPError as http_err:
318+
logger.debug("HTTP error occurred: %s", http_err)
319+
return None
320+
321+
if response.status_code != 200:
322+
return None
323+
324+
# Download file and compute hash as chunks are received.
325+
try:
326+
for chunk in response.iter_content():
327+
hash_algorithm.update(chunk)
328+
except RequestException as error:
329+
# Something went wrong with the request, abort.
330+
logger.debug("Error while streaming target file: %s", error)
331+
response.close()
332+
return None
333+
334+
artifact_hash: str = hash_algorithm.hexdigest()
335+
logger.debug("Computed hash of artifact: %s", artifact_hash)
336+
return artifact_hash

0 commit comments

Comments
 (0)