Skip to content

Commit 472eb07

Browse files
committed
Re-add update_or_create_dependencies #447
* Create DiscoveredDependencies in load_codebase pipeline * Update tests to check for DiscoveredDependencies * Update test expectations Signed-off-by: Jono Yang <[email protected]>
1 parent 7629d55 commit 472eb07

9 files changed

+215
-6
lines changed

CHANGELOG.rst

+20-1
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,32 @@ v31.0.0 (next)
5656

5757
https://github.com/nexB/scancode.io/issues/413
5858

59+
- Update application Package scanning step to reflect the updates in
60+
scancode-toolkit package scanning.
61+
62+
- Package data detected from a file are now stored on the
63+
CodebaseResource.package_data field.
64+
- A second processing step is now done after scanning for Package data, where
65+
Package Resources are determined and DiscoveredPackages and
66+
DiscoveredDependencies are created.
67+
68+
https://github.com/nexB/scancode.io/issues/444
69+
5970
- CodebaseResource.name now contains both the bare file name with extension, as
6071
opposed to just the bare file name without extension.
61-
https://github.com/nexB/scancode.io/issues/467
6272

6373
- Using a name stripped from its extension was something that was not used in
6474
other AboutCode project or tools.
6575

76+
https://github.com/nexB/scancode.io/issues/467
77+
78+
- Add the model DiscoveredDependency. This represents Package dependencies
79+
discovered in a Project. The ``scan_codebase`` and ``scan_packages`` pipelines
80+
have been updated to create DiscoveredDepdendency objects.
81+
https://github.com/nexB/scancode.io/issues/447
82+
83+
- The ``dependencies`` field has been removed from the DiscoveredPackage model.
84+
6685
v30.2.0 (2021-12-17)
6786
--------------------
6887

scanpipe/models.py

+56-1
Original file line numberDiff line numberDiff line change
@@ -1983,12 +1983,67 @@ def create_from_data(cls, project, dependency_data):
19831983
Creates and returns a DiscoveredDependency for a `project` from the
19841984
`dependency_data`.
19851985
"""
1986+
required_fields = ["purl", "dependency_uid"]
1987+
missing_values = [
1988+
field_name
1989+
for field_name in required_fields
1990+
if not dependency_data.get(field_name)
1991+
]
1992+
1993+
if missing_values:
1994+
message = (
1995+
f"No values for the following required fields: "
1996+
f"{', '.join(missing_values)}"
1997+
)
1998+
1999+
project.add_error(error=message, model=cls, details=dependency_data)
2000+
return
2001+
19862002
if "resolved_package" in dependency_data:
19872003
dependency_data.pop("resolved_package")
1988-
discovered_dependency = cls(project=project, **dependency_data)
2004+
2005+
cleaned_dependency_data = {
2006+
field_name: value
2007+
for field_name, value in dependency_data.items()
2008+
if field_name in DiscoveredDependency.model_fields() and value
2009+
}
2010+
discovered_dependency = cls(
2011+
project=project,
2012+
**cleaned_dependency_data
2013+
)
19892014
discovered_dependency.save()
2015+
19902016
return discovered_dependency
19912017

2018+
def update_from_data(self, dependency_data):
2019+
"""
2020+
Update this discovered dependency instance with the provided `dependency_data`.
2021+
The `save()` is called only if at least one field was modified.
2022+
"""
2023+
model_fields = DiscoveredDependency.model_fields()
2024+
updated_fields = []
2025+
2026+
for field_name, value in dependency_data.items():
2027+
skip_reasons = [
2028+
not value,
2029+
field_name not in model_fields,
2030+
]
2031+
if any(skip_reasons):
2032+
continue
2033+
2034+
current_value = getattr(self, field_name, None)
2035+
if (
2036+
not current_value
2037+
or current_value != value
2038+
):
2039+
setattr(self, field_name, value)
2040+
updated_fields.append(field_name)
2041+
2042+
if updated_fields:
2043+
self.save()
2044+
2045+
return updated_fields
2046+
19922047

19932048
class WebhookSubscription(UUIDPKModel, ProjectRelatedModel):
19942049
target_url = models.URLField(_("Target URL"), max_length=1024)

scanpipe/pipes/__init__.py

+23
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,29 @@ def update_or_create_package(project, package_data, codebase_resource=None):
105105
return package
106106

107107

108+
def update_or_create_dependencies(project, dependency_data):
109+
"""
110+
Gets, updates or creates a DiscoveredDependency then returns it.
111+
Uses the `project` and `dependency_data` mapping to lookup and creates the
112+
DiscoveredDependency using its dependency_uid and for_package_uid as a unique key.
113+
"""
114+
try:
115+
dependency = DiscoveredDependency.objects.get(
116+
project=project,
117+
dependency_uid=dependency_data.get("dependency_uid"),
118+
for_package_uid=dependency_data.get("for_package_uid"),
119+
)
120+
except DiscoveredDependency.DoesNotExist:
121+
dependency = None
122+
123+
if dependency:
124+
dependency.update_from_data(dependency_data)
125+
else:
126+
dependency = DiscoveredDependency.create_from_data(project, dependency_data)
127+
128+
return dependency
129+
130+
108131
def analyze_scanned_files(project):
109132
"""
110133
Sets the status for CodebaseResource to unknown or no license.

scanpipe/pipes/scancode.py

+11
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,16 @@ def create_discovered_packages(project, scanned_codebase):
488488
pipes.update_or_create_package(project, package_data)
489489

490490

491+
def create_discovered_dependencies(project, scanned_codebase):
492+
"""
493+
Saves the dependencies of a ScanCode `scanned_codebase` scancode.resource.Codebase
494+
object to the database as a DiscoveredDependency of `project`.
495+
"""
496+
if hasattr(scanned_codebase.attributes, "dependencies"):
497+
for dependency_data in scanned_codebase.attributes.dependencies:
498+
pipes.update_or_create_dependencies(project, dependency_data)
499+
500+
491501
def set_codebase_resource_for_package(codebase_resource, discovered_package):
492502
"""
493503
Assigns the `discovered_package` to the `codebase_resource` and set its
@@ -578,4 +588,5 @@ def create_inventory_from_scan(project, input_location):
578588
"""
579589
scanned_codebase = get_virtual_codebase(project, input_location)
580590
create_discovered_packages(project, scanned_codebase)
591+
create_discovered_dependencies(project, scanned_codebase)
581592
create_codebase_resources(project, scanned_codebase)

scanpipe/tests/__init__.py

+13
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,19 @@
8989
"package_uid": "pkg:deb/debian/[email protected]?uuid=610bed29-ce39-40e7-92d6-fd8b",
9090
}
9191

92+
dependency_data1 = {
93+
"purl": "pkg:pypi/dask",
94+
"extracted_requirement": "dask<2023.0.0,>=2022.6.0",
95+
"scope": "install",
96+
"is_runtime": True,
97+
"is_optional": False,
98+
"is_resolved": False,
99+
"dependency_uid": "pkg:pypi/dask?uuid=e656b571-7d3f-46d1-b95b-8f037aef9692",
100+
"for_package_uid": "pkg:pypi/[email protected]?uuid=4d1f048b-a155-4f95-8cf9-185ab872ab4c",
101+
"datafile_path": "daglib-0.3.2.tar.gz-extract/daglib-0.3.2/PKG-INFO",
102+
"datasource_id": "pypi_sdist_pkginfo"
103+
}
104+
92105
license_policies = [
93106
{
94107
"license_key": "apache-2.0",

scanpipe/tests/data/asgiref-3.3.0_load_inventory_expected.json

+50-1
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,56 @@
119119
"source_packages": []
120120
}
121121
],
122-
"dependencies": [],
122+
"dependencies": [
123+
{
124+
"purl": "pkg:pypi/pytest",
125+
"extracted_requirement": "",
126+
"scope": "tests",
127+
"is_runtime": true,
128+
"is_optional": false,
129+
"is_resolved": false,
130+
"dependency_uid": "pkg:pypi/pytest?uuid=fixed-uid-done-for-testing-5642512d1758",
131+
"for_package_uid": "pkg:pypi/[email protected]?uuid=fixed-uid-done-for-testing-5642512d1758",
132+
"datafile_path": "codebase/asgiref-3.3.0-py3-none-any.whl",
133+
"datasource_id": "pypi_wheel"
134+
},
135+
{
136+
"purl": "pkg:pypi/pytest",
137+
"extracted_requirement": "",
138+
"scope": "tests",
139+
"is_runtime": true,
140+
"is_optional": false,
141+
"is_resolved": false,
142+
"dependency_uid": "pkg:pypi/pytest?uuid=fixed-uid-done-for-testing-5642512d1758",
143+
"for_package_uid": "pkg:pypi/[email protected]?uuid=fixed-uid-done-for-testing-5642512d1758",
144+
"datafile_path": "codebase/asgiref-3.3.0-py3-none-any.whl-extract/asgiref-3.3.0.dist-info/METADATA",
145+
"datasource_id": "pypi_wheel_metadata"
146+
},
147+
{
148+
"purl": "pkg:pypi/pytest-asyncio",
149+
"extracted_requirement": "",
150+
"scope": "tests",
151+
"is_runtime": true,
152+
"is_optional": false,
153+
"is_resolved": false,
154+
"dependency_uid": "pkg:pypi/pytest-asyncio?uuid=fixed-uid-done-for-testing-5642512d1758",
155+
"for_package_uid": "pkg:pypi/[email protected]?uuid=fixed-uid-done-for-testing-5642512d1758",
156+
"datafile_path": "codebase/asgiref-3.3.0-py3-none-any.whl",
157+
"datasource_id": "pypi_wheel"
158+
},
159+
{
160+
"purl": "pkg:pypi/pytest-asyncio",
161+
"extracted_requirement": "",
162+
"scope": "tests",
163+
"is_runtime": true,
164+
"is_optional": false,
165+
"is_resolved": false,
166+
"dependency_uid": "pkg:pypi/pytest-asyncio?uuid=fixed-uid-done-for-testing-5642512d1758",
167+
"for_package_uid": "pkg:pypi/[email protected]?uuid=fixed-uid-done-for-testing-5642512d1758",
168+
"datafile_path": "codebase/asgiref-3.3.0-py3-none-any.whl-extract/asgiref-3.3.0.dist-info/METADATA",
169+
"datasource_id": "pypi_wheel_metadata"
170+
}
171+
],
123172
"files": [
124173
{
125174
"for_packages": [

scanpipe/tests/test_models.py

+33
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646

4747
from scancodeio import __version__ as scancodeio_version
4848
from scanpipe.models import CodebaseResource
49+
from scanpipe.models import DiscoveredDependency
4950
from scanpipe.models import DiscoveredPackage
5051
from scanpipe.models import Project
5152
from scanpipe.models import ProjectError
@@ -57,6 +58,7 @@
5758
from scanpipe.pipes.input import copy_inputs
5859
from scanpipe.tests import license_policies_index
5960
from scanpipe.tests import mocked_now
61+
from scanpipe.tests import dependency_data1
6062
from scanpipe.tests import package_data1
6163
from scanpipe.tests.pipelines.do_nothing import DoNothing
6264

@@ -1416,6 +1418,37 @@ def test_scanpipe_discovered_package_model_create_from_data(self):
14161418
self.assertEqual(package_count, DiscoveredPackage.objects.count())
14171419
self.assertEqual(project_error_count, ProjectError.objects.count())
14181420

1421+
@skipIf(connection.vendor == "sqlite", "No max_length constraints on SQLite.")
1422+
def test_scanpipe_discovered_dependency_model_create_from_data(self):
1423+
project1 = Project.objects.create(name="Analysis")
1424+
1425+
dependency = DiscoveredDependency.create_from_data(project1, dependency_data1)
1426+
self.assertEqual(project1, dependency.project)
1427+
self.assertEqual("pkg:pypi/dask", dependency.purl)
1428+
self.assertEqual("dask<2023.0.0,>=2022.6.0", dependency.extracted_requirement)
1429+
self.assertEqual("install", dependency.scope)
1430+
self.assertTrue(dependency.is_runtime)
1431+
self.assertFalse(dependency.is_optional)
1432+
self.assertFalse(dependency.is_resolved)
1433+
self.assertEqual("pkg:pypi/dask?uuid=e656b571-7d3f-46d1-b95b-8f037aef9692", dependency.dependency_uid)
1434+
self.assertEqual("pkg:pypi/[email protected]?uuid=4d1f048b-a155-4f95-8cf9-185ab872ab4c", dependency.for_package_uid)
1435+
self.assertEqual("daglib-0.3.2.tar.gz-extract/daglib-0.3.2/PKG-INFO", dependency.datafile_path)
1436+
self.assertEqual("pypi_sdist_pkginfo", dependency.datasource_id)
1437+
1438+
dependency_count = DiscoveredDependency.objects.count()
1439+
incomplete_data = dict(dependency_data1)
1440+
incomplete_data["dependency_uid"] = ""
1441+
self.assertIsNone(DiscoveredDependency.create_from_data(project1, incomplete_data))
1442+
self.assertEqual(dependency_count, DiscoveredDependency.objects.count())
1443+
error = project1.projecterrors.latest("created_date")
1444+
self.assertEqual("DiscoveredDependency", error.model)
1445+
expected_message = "No values for the following required fields: dependency_uid"
1446+
self.assertEqual(expected_message, error.message)
1447+
self.assertEqual(dependency_data1["purl"], error.details["purl"])
1448+
self.assertEqual("", error.details["dependency_uid"])
1449+
self.assertEqual("", error.traceback)
1450+
1451+
14191452
def test_scanpipe_discovered_package_model_unique_package_uid_in_project(self):
14201453
project1 = Project.objects.create(name="Analysis")
14211454

scanpipe/tests/test_pipelines.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ def test_scanpipe_scan_package_pipeline_integration_test(self):
302302

303303
self.assertEqual(4, project1.codebaseresources.count())
304304
self.assertEqual(1, project1.discoveredpackages.count())
305-
self.assertEqual(0, project1.discovereddependencys.count())
305+
self.assertEqual(1, project1.discovereddependencys.count())
306306

307307
scancode_file = project1.get_latest_output(filename="scancode")
308308
expected_file = self.data_location / "is-npm-1.0.0_scan_package.json"
@@ -336,7 +336,7 @@ def test_scanpipe_scan_package_pipeline_integration_test_multiple_packages(self)
336336

337337
self.assertEqual(9, project1.codebaseresources.count())
338338
self.assertEqual(2, project1.discoveredpackages.count())
339-
self.assertEqual(0, project1.discovereddependencys.count())
339+
self.assertEqual(2, project1.discovereddependencys.count())
340340

341341
scancode_file = project1.get_latest_output(filename="scancode")
342342
expected_file = self.data_location / "multiple-is-npm-1.0.0_scan_package.json"
@@ -500,7 +500,7 @@ def test_scanpipe_load_inventory_pipeline_integration_test(self):
500500

501501
self.assertEqual(18, project1.codebaseresources.count())
502502
self.assertEqual(2, project1.discoveredpackages.count())
503-
self.assertEqual(0, project1.discovereddependencys.count())
503+
self.assertEqual(4, project1.discovereddependencys.count())
504504

505505
result_file = output.to_json(project1)
506506
expected_file = (

scanpipe/tests/test_pipes.py

+6
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from scancode.interrupt import TimeoutError as InterruptTimeoutError
4040

4141
from scanpipe.models import CodebaseResource
42+
from scanpipe.models import DiscoveredDependency
4243
from scanpipe.models import DiscoveredPackage
4344
from scanpipe.models import Project
4445
from scanpipe.models import ProjectError
@@ -528,9 +529,11 @@ def test_scanpipe_pipes_scancode_virtual_codebase(self):
528529

529530
scancode.create_codebase_resources(project, virtual_codebase)
530531
scancode.create_discovered_packages(project, virtual_codebase)
532+
scancode.create_discovered_dependencies(project, virtual_codebase)
531533

532534
self.assertEqual(18, CodebaseResource.objects.count())
533535
self.assertEqual(1, DiscoveredPackage.objects.count())
536+
self.assertEqual(1, DiscoveredDependency.objects.count())
534537
# Make sure the root is not created as a CodebaseResource, walk(skip_root=True)
535538
self.assertFalse(CodebaseResource.objects.filter(path="codebase").exists())
536539

@@ -547,8 +550,10 @@ def test_scanpipe_pipes_scancode_virtual_codebase(self):
547550
# The functions can be called again and existing objects are skipped
548551
scancode.create_codebase_resources(project, virtual_codebase)
549552
scancode.create_discovered_packages(project, virtual_codebase)
553+
scancode.create_discovered_dependencies(project, virtual_codebase)
550554
self.assertEqual(18, CodebaseResource.objects.count())
551555
self.assertEqual(1, DiscoveredPackage.objects.count())
556+
self.assertEqual(1, DiscoveredDependency.objects.count())
552557

553558
def test_scanpipe_pipes_scancode_create_codebase_resources_inject_policy(self):
554559
project = Project.objects.create(name="asgiref")
@@ -560,6 +565,7 @@ def test_scanpipe_pipes_scancode_create_codebase_resources_inject_policy(self):
560565

561566
scanpipe_app.license_policies_index = license_policies_index
562567
scancode.create_discovered_packages(project, virtual_codebase)
568+
scancode.create_discovered_dependencies(project, virtual_codebase)
563569
scancode.create_codebase_resources(project, virtual_codebase)
564570
resources = project.codebaseresources
565571

0 commit comments

Comments
 (0)