diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 703bc7f88..e9739afad 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,11 @@ Changelog v31.1.0 (unreleased) -------------------- +- Add a new "inspect manifest" pipeline to resolve packages from manifest, lockfile, + and SBOM. The resolved packages are created as discovered packages. + Support PyPI "requirements.txt" files and AboutCode ".ABOUT" files + https://github.com/nexB/scancode.io/issues/284 + - Add a new "check vulnerabilities" pipeline to lookup vulnerabilities in the VulnerableCode database for all project discovered packages. Vulnerability data is stored in the extra_data field of each package. diff --git a/docs/built-in-pipelines.rst b/docs/built-in-pipelines.rst index f758eff36..151f9f150 100644 --- a/docs/built-in-pipelines.rst +++ b/docs/built-in-pipelines.rst @@ -39,6 +39,16 @@ Docker Windows Image Analysis :members: :member-order: bysource +.. _pipeline_inspect_manifest: + +Inspect Manifest +---------------- +.. autoclass:: scanpipe.pipelines.inspect_manifest.InspectManifest() + :members: + :member-order: bysource + +.. _pipeline_load_inventory: + Load Inventory From Scan ------------------------ .. autoclass:: scanpipe.pipelines.load_inventory.LoadInventory() diff --git a/docs/scanpipe-pipes.rst b/docs/scanpipe-pipes.rst index b974e42cf..65a6ec002 100644 --- a/docs/scanpipe-pipes.rst +++ b/docs/scanpipe-pipes.rst @@ -23,10 +23,6 @@ Docker .. automodule:: scanpipe.pipes.docker :members: -Windows -------- -.. automodule:: scanpipe.pipes.windows - :members: Fetch ----- @@ -45,9 +41,13 @@ Output :members: :exclude-members: JSONResultsGenerator +Resolve +------- +.. automodule:: scanpipe.pipes.resolve + :members: + RootFS ------ - .. automodule:: scanpipe.pipes.rootfs :members: @@ -55,3 +55,13 @@ ScanCode -------- .. automodule:: scanpipe.pipes.scancode :members: + +VulnerableCode +-------------- +.. automodule:: scanpipe.pipes.vulnerablecode + :members: + +Windows +------- +.. automodule:: scanpipe.pipes.windows + :members: diff --git a/scanpipe/models.py b/scanpipe/models.py index 965d0f98d..2a15d97b2 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2318,6 +2318,17 @@ def create_from_data(cls, project, package_data): discovered_package.save(save_error=False, capture_exception=False) return discovered_package + @classmethod + def clean_data(cls, data, include_none=False): + """ + Returns the `data` dict keeping only entries for fields available in the model. + """ + return { + field_name: value + for field_name, value in data.items() + if field_name in cls.model_fields() # and value + } + @property def spdx_id(self): return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.uuid}" diff --git a/scanpipe/pipelines/inspect_manifest.py b/scanpipe/pipelines/inspect_manifest.py new file mode 100644 index 000000000..19b69cfa0 --- /dev/null +++ b/scanpipe/pipelines/inspect_manifest.py @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/nexB/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/scancode.io for support and download. + +from scanpipe.pipelines import Pipeline +from scanpipe.pipes import resolve +from scanpipe.pipes import update_or_create_package + + +class InspectManifest(Pipeline): + """ + A pipeline to inspect one or more manifest files and resolve its packages. + + Supports: + - PyPI "requirements.txt" files + - AboutCode ".ABOUT" files + """ + + @classmethod + def steps(cls): + return ( + cls.get_manifest_inputs, + cls.create_packages_from_manifest, + ) + + def get_manifest_inputs(self): + """ + Locates all the manifest files from the project's input/ directory. + """ + self.input_locations = [ + str(input.absolute()) for input in self.project.inputs() + ] + + def create_packages_from_manifest(self): + """ + Resolves manifest files into packages. + """ + for input_location in self.input_locations: + default_package_type = resolve.get_default_package_type(input_location) + if not default_package_type: + raise Exception(f"No package type found for {input_location}") + + resolver = resolve.resolver_registry.get(default_package_type) + if not resolver: + raise Exception( + f'No resolver for package type "{default_package_type}" for ' + f"{input_location}" + ) + + resolved_packages = resolver(input_location=input_location) + if not resolved_packages: + raise Exception(f"No packages could be resolved for {input_location}") + + for package_data in resolved_packages: + update_or_create_package(self.project, package_data) diff --git a/scanpipe/pipes/__init__.py b/scanpipe/pipes/__init__.py index 6226843a5..fe71d2f38 100644 --- a/scanpipe/pipes/__init__.py +++ b/scanpipe/pipes/__init__.py @@ -94,6 +94,11 @@ def update_or_create_package(project, package_data, codebase_resource=None): except DiscoveredPackage.DoesNotExist: package = None + package_data = package_data.copy() + if release_date := package_data.get("release_date"): + if type(release_date) is str: + package_data["release_date"] = datetime.fromisoformat(release_date).date() + if package: package.update_from_data(package_data) else: diff --git a/scanpipe/pipes/resolve.py b/scanpipe/pipes/resolve.py new file mode 100644 index 000000000..771a01246 --- /dev/null +++ b/scanpipe/pipes/resolve.py @@ -0,0 +1,77 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/nexB/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/scancode.io for support and download. + +from attributecode.model import About +from packagedcode import APPLICATION_PACKAGE_DATAFILE_HANDLERS +from packageurl import PackageURL +from python_inspector.resolve_cli import resolver_api + +from scanpipe.models import DiscoveredPackage + +""" +Utilities to resolve packages from manifest, lockfile, and SBOM. +""" + + +def resolve_pypi_packages(input_location): + """ + Resolve the PyPI packages from the `input_location` requirements file. + """ + inspector_output = resolver_api( + requirement_files=[input_location], + prefer_source=True, + ) + return inspector_output.packages + + +def resolve_about_packages(input_location): + """ + Resolve the packages from the `input_location` .ABOUT file. + """ + about = About(location=input_location) + about_data = about.as_dict() + + if package_url := about_data.get("package_url"): + package_url_data = PackageURL.from_string(package_url).to_dict(encode=True) + for field_name, value in package_url_data.items(): + if value: + about_data[field_name] = value + + package_data = DiscoveredPackage.clean_data(about_data) + return [package_data] + + +def get_default_package_type(input_location): + """ + Return the package type associated with the provided `input_location`. + This type is used to get the related handler that knows how process the input. + """ + for handler in APPLICATION_PACKAGE_DATAFILE_HANDLERS: + if handler.is_datafile(input_location): + return handler.default_package_type + + +# Mapping between the `default_package_type` its related resolver function +resolver_registry = { + "about": resolve_about_packages, + "pypi": resolve_pypi_packages, +} diff --git a/scanpipe/tests/__init__.py b/scanpipe/tests/__init__.py index d16273a0e..691ba9d85 100644 --- a/scanpipe/tests/__init__.py +++ b/scanpipe/tests/__init__.py @@ -49,7 +49,7 @@ "subpath": None, "primary_language": None, "description": "add and remove users and groups", - "release_date": None, + "release_date": "1999-10-10", "parties": [ { "type": None, diff --git a/scanpipe/tests/data/Django-4.0.8-py3-none-any.whl.ABOUT b/scanpipe/tests/data/Django-4.0.8-py3-none-any.whl.ABOUT new file mode 100644 index 000000000..ed3e16333 --- /dev/null +++ b/scanpipe/tests/data/Django-4.0.8-py3-none-any.whl.ABOUT @@ -0,0 +1,13 @@ +about_resource: Django-4.0.8-py3-none-any.whl +name: django +version: 4.0.8 +download_url: https://files.pythonhosted.org/packages/e1/d0/d90528978da16288d470bb423abad307ed7ae724090132ff6bf67d6a5579/Django-4.0.8-py3-none-any.whl +package_url: pkg:pypi/django@4.0.8 +license_expression: bsd-new +attribute: yes +checksum_md5: 386349753c386e574dceca5067e2788a +checksum_sha1: 4cc6f7abda928a0b12cd1f1cd8ad3677519ca04e +licenses: + - key: bsd-new + name: BSD-3-Clause + file: bsd-new.LICENSE diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index 8b590385c..ce0a4ce31 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -624,3 +624,62 @@ def test_scanpipe_check_vulnerabilities_pipeline_integration_test( package1.refresh_from_db() expected = {"discovered_vulnerabilities": vulnerability_data} self.assertEqual(expected, package1.extra_data) + + def test_scanpipe_inspect_manifest_pipeline_integration_test(self): + pipeline_name = "inspect_manifest" + project1 = Project.objects.create(name="Analysis") + + run = project1.add_pipeline(pipeline_name) + pipeline = run.make_pipeline_instance() + + project1.move_input_from(tempfile.mkstemp()[1]) + exitcode, out = pipeline.execute() + self.assertEqual(1, exitcode, msg=out) + self.assertIn("No package type found for", out) + + @mock.patch("scanpipe.pipes.resolve.resolver_api") + def test_scanpipe_inspect_manifest_pipeline_pypi_integration_test( + self, resolver_api + ): + pipeline_name = "inspect_manifest" + project1 = Project.objects.create(name="Analysis") + + run = project1.add_pipeline(pipeline_name) + pipeline = run.make_pipeline_instance() + + resolver_api.return_value = mock.Mock(packages=[]) + project1.move_input_from(tempfile.mkstemp(suffix="requirements.txt")[1]) + exitcode, out = pipeline.execute() + self.assertEqual(1, exitcode, msg=out) + self.assertIn("No packages could be resolved", out) + + resolver_api.return_value = mock.Mock(packages=[package_data1]) + exitcode, out = pipeline.execute() + self.assertEqual(0, exitcode, msg=out) + + self.assertEqual(1, project1.discoveredpackages.count()) + discoveredpackage = project1.discoveredpackages.get() + exclude_fields = ["qualifiers", "release_date", "size"] + for field_name, value in package_data1.items(): + if value and field_name not in exclude_fields: + self.assertEqual(value, getattr(discoveredpackage, field_name)) + + def test_scanpipe_inspect_manifest_pipeline_aboutfile_integration_test(self): + pipeline_name = "inspect_manifest" + project1 = Project.objects.create(name="Analysis") + + input_location = self.data_location / "Django-4.0.8-py3-none-any.whl.ABOUT" + project1.copy_input_from(input_location) + + run = project1.add_pipeline(pipeline_name) + pipeline = run.make_pipeline_instance() + + exitcode, out = pipeline.execute() + self.assertEqual(0, exitcode, msg=out) + + self.assertEqual(1, project1.discoveredpackages.count()) + discoveredpackage = project1.discoveredpackages.get() + self.assertEqual("pypi", discoveredpackage.type) + self.assertEqual("django", discoveredpackage.name) + self.assertEqual("4.0.8", discoveredpackage.version) + self.assertEqual("bsd-new", discoveredpackage.license_expression) diff --git a/scanpipe/tests/test_pipes.py b/scanpipe/tests/test_pipes.py index 4c75725c9..480d674ff 100644 --- a/scanpipe/tests/test_pipes.py +++ b/scanpipe/tests/test_pipes.py @@ -20,6 +20,7 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. +import datetime import json import os import tempfile @@ -1055,6 +1056,7 @@ def test_scanpipe_pipes_update_or_create_package(self): package = update_or_create_package(p1, package_data1) self.assertEqual("pkg:deb/debian/adduser@3.118?arch=all", package.purl) self.assertEqual("", package.primary_language) + self.assertEqual(datetime.date(1999, 10, 10), package.release_date) updated_data = dict(package_data1) updated_data["primary_language"] = "Python" @@ -1067,9 +1069,11 @@ def test_scanpipe_pipes_update_or_create_package(self): package_data2 = dict(package_data1) package_data2["name"] = "new name" package_data2["package_uid"] = "" + package_data2["release_date"] = "2020-11-01T01:40:20" package2 = update_or_create_package(p1, package_data2, resource1) self.assertNotEqual(package.pk, package2.pk) self.assertIn(resource1, package2.codebase_resources.all()) + self.assertEqual(datetime.date(2020, 11, 1), package2.release_date) class ScanPipePipesTransactionTest(TransactionTestCase): diff --git a/setup.cfg b/setup.cfg index 15fe387d0..ccd60a616 100644 --- a/setup.cfg +++ b/setup.cfg @@ -75,6 +75,9 @@ install_requires = commoncode==31.0.0 # FetchCode fetchcode-container==1.2.3.210512; sys_platform == "linux" + # Inspectors + python-inspector==0.9.2 + aboutcode-toolkit==7.2.0 # Utilities XlsxWriter==3.0.3 requests==2.28.1 @@ -107,6 +110,7 @@ scancodeio_pipelines = check_vulnerabilities = scanpipe.pipelines.check_vulnerabilities:CheckVulnerabilities docker = scanpipe.pipelines.docker:Docker docker_windows = scanpipe.pipelines.docker_windows:DockerWindows + inspect_manifest = scanpipe.pipelines.inspect_manifest:InspectManifest load_inventory = scanpipe.pipelines.load_inventory:LoadInventory root_filesystems = scanpipe.pipelines.root_filesystems:RootFS scan_codebase = scanpipe.pipelines.scan_codebase:ScanCodebase