Skip to content

Commit acfb75b

Browse files
committed
feat: add a new check to validate npm provenance and extract facts for policy engine
Signed-off-by: behnazh-w <[email protected]>
1 parent 9ef9d50 commit acfb75b

File tree

8 files changed

+491
-15
lines changed

8 files changed

+491
-15
lines changed

src/macaron/database/table_definitions.py

+184-7
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
from macaron.artifact.maven import MavenSubjectPURLMatcher
3636
from macaron.database.database_manager import ORMBase
37-
from macaron.database.db_custom_types import RFC3339DateTime
37+
from macaron.database.db_custom_types import DBJsonDict, RFC3339DateTime
3838
from macaron.errors import InvalidPURLError
3939
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, ProvenanceSubjectPURLMatcher
4040
from macaron.slsa_analyzer.slsa_req import ReqName
@@ -161,7 +161,7 @@ class Component(PackageURLMixin, ORMBase):
161161
checkfacts: Mapped[list["CheckFacts"]] = relationship(back_populates="component", lazy="immediate")
162162

163163
#: The one-to-many relationship with provenances.
164-
provenance: Mapped[list["Provenance"]] = relationship(back_populates="component", lazy="immediate")
164+
provenance: Mapped[list["ProvenanceFacts"]] = relationship(back_populates="component", lazy="immediate")
165165

166166
#: The bidirectional many-to-many relationship for component dependencies.
167167
dependencies: Mapped[list["Component"]] = relationship(
@@ -464,7 +464,7 @@ class CheckFacts(ORMBase):
464464
}
465465

466466

467-
class Provenance(ORMBase):
467+
class ProvenanceFacts(ORMBase):
468468
"""ORM class for a provenance document."""
469469

470470
__tablename__ = "_provenance"
@@ -479,7 +479,7 @@ class Provenance(ORMBase):
479479
component: Mapped["Component"] = relationship(back_populates="provenance")
480480

481481
#: The SLSA version.
482-
version: Mapped[str] = mapped_column(String, nullable=False)
482+
version: Mapped[str] = mapped_column(String, nullable=True)
483483

484484
#: The release tag commit sha.
485485
release_commit_sha: Mapped[str] = mapped_column(String, nullable=True)
@@ -488,12 +488,189 @@ class Provenance(ORMBase):
488488
release_tag: Mapped[str] = mapped_column(String, nullable=True)
489489

490490
#: The provenance payload content in JSON format.
491-
provenance_json: Mapped[str] = mapped_column(String, nullable=False)
491+
provenance_json: Mapped[dict] = mapped_column(DBJsonDict, nullable=False)
492+
493+
#: The provenance statement.
494+
statement: Mapped["Statement"] = relationship(back_populates="provenance")
492495

493496
#: A one-to-many relationship with the release artifacts.
494497
artifact: Mapped[list["ReleaseArtifact"]] = relationship(back_populates="provenance")
495498

496499

500+
class Statement(ORMBase):
501+
"""The ORM class for provenance statement."""
502+
503+
__tablename__ = "_statement"
504+
505+
#: The primary key.
506+
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # noqa: A003
507+
508+
#: The foreign key to the software component.
509+
provenance_id: Mapped[int] = mapped_column(Integer, ForeignKey(ProvenanceFacts.id), nullable=False)
510+
511+
#: A one-to-one relationship with software components.
512+
provenance: Mapped["ProvenanceFacts"] = relationship(back_populates="statement")
513+
514+
#: Statement type.
515+
_type: Mapped[str] = mapped_column(String, nullable=False)
516+
517+
#: Predicate Type.
518+
predicate_type: Mapped[str] = mapped_column(String, nullable=False)
519+
520+
#: Provenance Subjects.
521+
subject: Mapped[list["ProvenanceSubjectRaw"]] = relationship(back_populates="statement")
522+
523+
#: Provenance predicate.
524+
predicate: Mapped["Predicate"] = relationship(back_populates="statement")
525+
526+
527+
class ProvenanceSubjectRaw(ORMBase):
528+
"""The ORM class for the provenance subject containing all the information."""
529+
530+
__tablename__ = "_subject"
531+
532+
#: The primary key.
533+
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # noqa: A003
534+
535+
#: The foreign key to the software component.
536+
statement_id: Mapped[int] = mapped_column(Integer, ForeignKey(Statement.id), nullable=False)
537+
538+
#: A one-to-one relationship with provenance statement.
539+
statement: Mapped["Statement"] = relationship(back_populates="subject")
540+
541+
#: Subject name.
542+
name: Mapped[str] = mapped_column(String, nullable=False)
543+
544+
#: Subject digests.
545+
digest: Mapped["SubjectDigest"] = relationship(back_populates="subject")
546+
547+
548+
class SubjectDigest(ORMBase):
549+
"""The ORM class for the provenance subject digest."""
550+
551+
__tablename__ = "_subject_digest"
552+
553+
#: The primary key.
554+
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # noqa: A003
555+
556+
#: The foreign key to the provenance subject.
557+
subject_id: Mapped[int] = mapped_column(Integer, ForeignKey(ProvenanceSubjectRaw.id), nullable=False)
558+
559+
#: A one-to-one relationship with provenance subject.
560+
subject: Mapped["ProvenanceSubjectRaw"] = relationship(back_populates="digest")
561+
562+
#: Digest.
563+
sha512: Mapped[str] = mapped_column(String, nullable=False)
564+
565+
566+
class Predicate(ORMBase):
567+
"""The ORM class for provenance predicate."""
568+
569+
__tablename__ = "_predicate"
570+
571+
#: The primary key.
572+
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # noqa: A003
573+
574+
#: The foreign key to the software component.
575+
statement_id: Mapped[int] = mapped_column(Integer, ForeignKey(Statement.id), nullable=False)
576+
577+
#: A one-to-one relationship with provenance statement.
578+
statement: Mapped["Statement"] = relationship(back_populates="predicate")
579+
580+
#: Build definition.
581+
build_definition: Mapped["BuildDefinition"] = relationship(back_populates="predicate")
582+
583+
584+
class BuildDefinition(ORMBase):
585+
"""The ORM class for provenance predicate build definition."""
586+
587+
__tablename__ = "_build_definition"
588+
589+
#: The primary key.
590+
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # noqa: A003
591+
592+
#: The foreign key to the software component.
593+
predicate_id: Mapped[int] = mapped_column(Integer, ForeignKey(Predicate.id), nullable=False)
594+
595+
#: A one-to-one relationship with provenance predicate.
596+
predicate: Mapped["Predicate"] = relationship(back_populates="build_definition")
597+
598+
#: Build type.
599+
build_type: Mapped[str] = mapped_column(String, nullable=False)
600+
601+
#: External parameters in build definitions.
602+
external_parameters: Mapped["ExternalParameters"] = relationship(back_populates="build_definition")
603+
604+
#: Internal parameters in build definitions.
605+
internal_parameters: Mapped["InternalParameters"] = relationship(back_populates="build_definition")
606+
607+
608+
class ExternalParameters(ORMBase):
609+
"""The ORM class for provenance predicate build definition external parameters."""
610+
611+
__tablename__ = "_external_parameters"
612+
613+
#: The primary key.
614+
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # noqa: A003
615+
616+
#: The foreign key to the software component.
617+
build_definition_id: Mapped[int] = mapped_column(Integer, ForeignKey(BuildDefinition.id), nullable=False)
618+
619+
#: A one-to-one relationship with build definition.
620+
build_definition: Mapped["BuildDefinition"] = relationship(back_populates="external_parameters")
621+
622+
#: External parameters in build definitions.
623+
workflow: Mapped["Workflow"] = relationship(back_populates="external_parameters")
624+
625+
626+
class Workflow(ORMBase):
627+
"""The ORM class for provenance predicate build definition external parameters workflows."""
628+
629+
__tablename__ = "_workflow"
630+
631+
#: The primary key.
632+
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # noqa: A003
633+
634+
#: The foreign key to the software component.
635+
external_parameters_id: Mapped[int] = mapped_column(Integer, ForeignKey(ExternalParameters.id), nullable=False)
636+
637+
#: A one-to-one relationship with external_parameters.
638+
external_parameters: Mapped["ExternalParameters"] = relationship(back_populates="workflow")
639+
640+
#: Workflow reference.
641+
ref: Mapped[str] = mapped_column(String, nullable=False)
642+
643+
#: Workflow repository.
644+
repository: Mapped[str] = mapped_column(String, nullable=False)
645+
646+
#: Workflow path.
647+
path: Mapped[str] = mapped_column(String, nullable=False)
648+
649+
650+
class InternalParameters(ORMBase):
651+
"""The ORM class for provenance predicate build definition internal parameters."""
652+
653+
__tablename__ = "_internal_parameters"
654+
655+
#: The primary key.
656+
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # noqa: A003
657+
658+
#: The foreign key to the software component.
659+
build_definition_id: Mapped[int] = mapped_column(Integer, ForeignKey(BuildDefinition.id), nullable=False)
660+
661+
#: A one-to-one relationship with build definition.
662+
build_definition: Mapped["BuildDefinition"] = relationship(back_populates="internal_parameters")
663+
664+
#: The GitHub event that triggered the publish.
665+
github_event_name: Mapped[str] = mapped_column(String, nullable=False)
666+
667+
#: The GitHub repository ID that triggered the publish.
668+
github_repository_id: Mapped[str] = mapped_column(String, nullable=False)
669+
670+
#: The GitHub repository owner ID that triggered the publish.
671+
github_repository_owner_id: Mapped[str] = mapped_column(String, nullable=False)
672+
673+
497674
class ReleaseArtifact(ORMBase):
498675
"""The ORM class for release artifacts."""
499676

@@ -509,10 +686,10 @@ class ReleaseArtifact(ORMBase):
509686
slsa_verified: Mapped[bool] = mapped_column(Boolean, nullable=True)
510687

511688
#: The foreign key to the SLSA provenance.
512-
provenance_id: Mapped[int] = mapped_column(Integer, ForeignKey(Provenance.id), nullable=True)
689+
provenance_id: Mapped[int] = mapped_column(Integer, ForeignKey(ProvenanceFacts.id), nullable=True)
513690

514691
#: A many-to-one relationship with the SLSA provenance.
515-
provenance: Mapped["Provenance"] = relationship(back_populates="artifact")
692+
provenance: Mapped["ProvenanceFacts"] = relationship(back_populates="artifact")
516693

517694
#: The one-to-many relationship with the hash digests for this artifact.
518695
digests: Mapped[list["HashDigest"]] = relationship(back_populates="artifact")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/* Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. */
2+
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
3+
4+
/* Souffle datalog rules to assist in authoring in-toto policies.*/
5+
6+
/**
7+
* This relation provides the external parameters of a SLSA v1 provenance generated by npm.
8+
The external parameters include details of the triggering hosted build service workflow.
9+
10+
Here is the related section of an example predicate we process in this relation:
11+
"externalParameters": {
12+
"workflow": {
13+
"ref": "refs/heads/main",
14+
"repository": "https://github.com/npm/node-semver",
15+
"path": ".github/workflows/release.yml"
16+
}
17+
},
18+
19+
Parameters:
20+
component_id: number
21+
The target software component id.
22+
purl: symbol
23+
The Package URL identifier for the provenance subject.
24+
ref: symbol
25+
The Git reference.
26+
repository: symbol
27+
The repository URL.
28+
path: symbol
29+
The GitHub Actions workflow path.
30+
31+
*/
32+
.decl slsa_v1_npm_external_parameters(component_id: number, purl: symbol, ref: symbol, repository: symbol, path: symbol)
33+
34+
slsa_v1_npm_external_parameters(component_id, purl, ref, repository, path):-
35+
provenance(prov_id, component_id, _, _, _, _),
36+
statement(stmt_id, prov_id, "https://in-toto.io/Statement/v1", "https://slsa.dev/provenance/v1"),
37+
subject(sub_id, stmt_id, purl),
38+
predicate(pred_id, stmt_id),
39+
build_definition(build_id, pred_id, _),
40+
external_parameters(external_params_id, build_id),
41+
workflow(_, external_params_id, ref, repository, path).
42+
43+
/**
44+
* This relation provides the external parameters of a SLSA v1 provenance generated by npm.
45+
The external parameters include details of the triggering hosted build service workflow.
46+
47+
Here is the related section of an example predicate we process in this relation:
48+
"internalParameters": {
49+
"github": {
50+
"event_name": "push",
51+
"repository_id": "1357199",
52+
"repository_owner_id": "6078720"
53+
}
54+
},
55+
56+
57+
Parameters:
58+
component_id: number
59+
The target software component id.
60+
purl: symbol
61+
The Package URL identifier for the provenance subject.
62+
github_event_name: symbol
63+
TODO
64+
github_repository_id: symbol
65+
TODO
66+
github_repository_owner_id: symbol
67+
TODO
68+
69+
*/
70+
.decl slsa_v1_npm_internal_parameters(
71+
component_id: number,
72+
purl: symbol,
73+
github_event_name: symbol,
74+
github_repository_id: symbol,
75+
github_repository_owner_id: symbol
76+
)
77+
78+
slsa_v1_npm_internal_parameters(
79+
component_id,
80+
purl,
81+
github_event_name,
82+
github_repository_id,
83+
github_repository_owner_id
84+
):-
85+
provenance(prov_id, component_id, _, _, _, _),
86+
statement(stmt_id, prov_id, "https://in-toto.io/Statement/v1", "https://slsa.dev/provenance/v1"),
87+
subject(sub_id, stmt_id, purl),
88+
predicate(pred_id, stmt_id),
89+
build_definition(build_id, pred_id, _),
90+
internal_parameters(_, build_id, github_event_name, github_repository_id, github_repository_owner_id).

src/macaron/policy_engine/prelude/prelude.dl

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. */
1+
/* Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. */
22
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
33

44
/**
@@ -13,6 +13,7 @@
1313
#include "helper_rules.dl"
1414
#include "policy.dl"
1515
#include "aggregate_rules.dl"
16+
#include "intoto_policies.dl"
1617

1718
/* The fact import statements generated by the policy engine */
1819
#include "import_data.dl"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */
2+
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
3+
4+
#include "prelude.dl"
5+
6+
Policy("test_policy", component_id, "") :-
7+
// Checks if the npm attestation has been successfully processed.
8+
check_passed(component_id, "mcn_npm_attestation_validation_1"),
9+
// This relation provides the external parameters of a SLSA v1 provenance generated by npm.
10+
slsa_v1_npm_external_parameters(component_id, purl, ref, repository, path),
11+
// This relation provides the internal parameters of a SLSA v1 provenance generated by npm.
12+
slsa_v1_npm_internal_parameters(component_id, purl, event_name, repository_id, repository_owner_id),
13+
// This match constraint makes sure the subjects we are interested in exist in the provenance.
14+
match("pkg:npm/semver@.*", purl),
15+
// Here we can add constraints that we are interested in.
16+
approved_refs(ref),
17+
approved_repository_owner_ids(repository_owner_id),
18+
repository = "https://github.com/npm/node-semver",
19+
path = ".github/workflows/release.yml".
20+
21+
Policy("test_policy", component_id, "") :-
22+
// Checks if the npm attestation has been successfully processed.
23+
check_passed(component_id, "mcn_npm_attestation_validation_1"),
24+
// Checks if the repository URL in the provenance matches the repository metadata on deps.dev.
25+
check_passed(component_id, "mcn_provenance_derived_repo_1"),
26+
// Checks if the commit hash in the provenance matches the release tag.
27+
check_passed(component_id, "mcn_provenance_derived_commit_1"),
28+
// This relation provides the external parameters of a SLSA v1 provenance generated by npm.
29+
slsa_v1_npm_external_parameters(component_id, purl, ref, repository, path),
30+
// This relation provides the internal parameters of a SLSA v1 provenance generated by npm.
31+
slsa_v1_npm_internal_parameters(component_id, purl, event_name, repository_id, repository_owner_id),
32+
// This match constraint makes sure the subjects we are interested in exist in the provenance.
33+
match("pkg:npm/semver@.*", purl),
34+
// Here we can add constraints that we are interested in.
35+
approved_refs(ref),
36+
approved_repository_owner_ids(repository_owner_id),
37+
path = ".github/workflows/release.yml".
38+
39+
// Create a relation containing the approved Git branches for publishing the artifact.
40+
.decl approved_refs(name: symbol)
41+
approved_refs("refs/heads/main").
42+
approved_refs("refs/heads/master").
43+
approved_refs("refs/heads/release").
44+
45+
// Create a relation containing the approved repository owner IDs for publishing the artifact.
46+
.decl approved_repository_owner_ids(name: symbol)
47+
approved_repository_owner_ids("6078720").
48+
approved_repository_owner_ids("71096353").
49+
50+
// Apply the policy to the desired software components.
51+
apply_policy_to("test_policy", component_id) :-
52+
is_component(component_id, purl),
53+
match("pkg:npm/semver@.*", purl).

0 commit comments

Comments
 (0)