Skip to content

Commit d1c9379

Browse files
committed
Correctly compute_normalized_license for debian
* The license of Debian installed system packages was not detected correctly. In particular merging copyright detection details with other metadata was not working Reference: aboutcode-org/scancode.io#478 Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent 86a71a5 commit d1c9379

14 files changed

+1324
-306
lines changed

src/packagedcode/debian.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@
2828

2929
TRACE = SCANCODE_DEBUG_PACKAGE_API
3030

31+
3132
def logger_debug(*args):
3233
pass
3334

35+
3436
logger = logging.getLogger(__name__)
3537

3638
if TRACE:
@@ -43,7 +45,6 @@ def logger_debug(*args):
4345
' '.join(isinstance(a, str) and a or repr(a) for a in args)
4446
)
4547

46-
4748
# TODO: add dependencies
4849

4950

@@ -282,14 +283,27 @@ def assemble(cls, package_data, resource, codebase):
282283
resources = []
283284
# TODO: keep track of missing files
284285
for res in root_resource.walk(codebase):
286+
if TRACE:
287+
logger_debug(f' debian: assemble: root_walk: res: {res}')
285288
if not res.path.endswith(assemblable_paths):
286289
continue
287290

288291
for pkgdt in res.package_data:
289292
package_data = models.PackageData.from_dict(pkgdt)
293+
if TRACE:
294+
# logger_debug(f' debian: assemble: root_walk: package_data: {package_data}')
295+
logger_debug(f' debian: assemble: root_walk: package_data: {package_data.license_expression}')
296+
297+
# Most debian secondary files are only specific to a name. We
298+
# have a few cases where the arch is included in the lists and
299+
# md5sums.
290300
package.update(
291301
package_data=package_data,
292302
datafile_path=res.path,
303+
replace=False,
304+
include_version=False,
305+
include_qualifiers=False,
306+
include_subpath=False,
293307
)
294308
package_file_references.extend(package_data.file_references)
295309

src/packagedcode/models.py

Lines changed: 83 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,6 +1207,13 @@ def __attrs_post_init__(self, *args, **kwargs):
12071207
def to_dict(self):
12081208
return super().to_dict(with_details=False)
12091209

1210+
def to_package_data(self):
1211+
mapping = super().to_dict(with_details=True)
1212+
mapping.pop('package_uid', None)
1213+
mapping.pop('datafile_paths', None)
1214+
mapping.pop('datasource_ids', None)
1215+
return PackageData.from_dict(mapping)
1216+
12101217
@classmethod
12111218
def from_package_data(cls, package_data, datafile_path):
12121219
"""
@@ -1255,7 +1262,15 @@ def is_compatible(self, package_data, include_qualifiers=True):
12551262
and self.primary_language == package_data.primary_language
12561263
)
12571264

1258-
def update(self, package_data, datafile_path, replace=False):
1265+
def update(
1266+
self,
1267+
package_data,
1268+
datafile_path,
1269+
replace=False,
1270+
include_version=True,
1271+
include_qualifiers=False,
1272+
include_subpath=False,
1273+
):
12591274
"""
12601275
Update this Package with data from the ``package_data`` PackageData.
12611276
@@ -1281,9 +1296,15 @@ def update(self, package_data, datafile_path, replace=False):
12811296
if isinstance(package_data, dict):
12821297
package_data = PackageData.from_dict(package_data)
12831298

1284-
if not self.is_compatible(package_data, include_qualifiers=False):
1299+
if not is_compatible(
1300+
purl1=self,
1301+
purl2=package_data,
1302+
include_version=include_version,
1303+
include_qualifiers=include_qualifiers,
1304+
include_subpath=include_subpath,
1305+
):
12851306
if TRACE_UPDATE:
1286-
logger_debug(f'update: {self.purl} not compatible with: {package_data.purl}')
1307+
logger_debug(f'update: skipping: {self.purl} is not compatible with: {package_data.purl}')
12871308
return False
12881309

12891310
# always append these new items
@@ -1345,6 +1366,56 @@ def get_packages_files(self, codebase):
13451366
yield resource
13461367

13471368

1369+
def is_compatible(
1370+
purl1,
1371+
purl2,
1372+
include_version=True,
1373+
include_qualifiers=True,
1374+
include_subpath=True,
1375+
):
1376+
"""
1377+
Return True if the ``purl1`` PackageURL-like object is compatible with
1378+
the ``purl2`` PackageURL-like object, e.g. it is about the same package.
1379+
PackageData objectys are PackageURL-like.
1380+
1381+
For example::
1382+
>>> p1 = PackageURL.from_string('pkg:deb/[email protected]?arch=arm64')
1383+
>>> p2 = PackageURL.from_string('pkg:deb/[email protected]')
1384+
>>> p3 = PackageURL.from_string('pkg:deb/libssl')
1385+
>>> p4 = PackageURL.from_string('pkg:deb/libncurses5')
1386+
>>> p5 = PackageURL.from_string('pkg:deb/[email protected]?arch=arm64#/sbin')
1387+
>>> is_compatible(p1, p2)
1388+
False
1389+
>>> is_compatible(p1, p2, include_qualifiers=False)
1390+
True
1391+
>>> is_compatible(p1, p4)
1392+
False
1393+
>>> is_compatible(p1, p4, include_version=False, include_qualifiers=False)
1394+
True
1395+
>>> is_compatible(p3, p4)
1396+
False
1397+
>>> is_compatible(p1, p5)
1398+
False
1399+
>>> is_compatible(p1, p5, include_subpath=False)
1400+
True
1401+
"""
1402+
is_compatible = (
1403+
purl1.type == purl2.type
1404+
and purl1.namespace == purl2.namespace
1405+
and purl1.name == purl2.name
1406+
)
1407+
if include_version:
1408+
is_compatible = is_compatible and (purl1.version == purl2.version)
1409+
1410+
if include_qualifiers:
1411+
is_compatible = is_compatible and (purl1.qualifiers == purl2.qualifiers)
1412+
1413+
if include_subpath:
1414+
is_compatible = is_compatible and (purl1.subpath == purl2.subpath)
1415+
1416+
return is_compatible
1417+
1418+
13481419
@attr.attributes(slots=True)
13491420
class PackageWithResources(Package):
13501421
"""
@@ -1384,7 +1455,15 @@ def merge_sequences(list1, list2, **kwargs):
13841455
merged = []
13851456
existing = set()
13861457
for item in list1 + list2:
1387-
key = item.to_tuple(**kwargs)
1458+
try:
1459+
if hasattr(item, 'to_tuple'):
1460+
key = item.to_tuple(**kwargs)
1461+
else:
1462+
key = to_tuple(kwargs)
1463+
1464+
except Exception as e:
1465+
raise Exception(f'Failed to merge sequences: {item}', f'kwargs: {kwargs}') from e
1466+
13881467
if not key in existing:
13891468
merged.append(item)
13901469
existing.add(key)

tests/packagedcode/data/chef/package.scan.expected.json

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,4 @@
11
{
2-
"headers": [
3-
{
4-
"tool_name": "scancode-toolkit",
5-
"options": {
6-
"input": "<path>",
7-
"--json": "<file>",
8-
"--package": true
9-
},
10-
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
11-
"output_format_version": "2.0.0",
12-
"message": null,
13-
"errors": [],
14-
"warnings": [],
15-
"extra_data": {
16-
"system_environment": {
17-
"operating_system": "linux",
18-
"cpu_architecture": "64",
19-
"platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic",
20-
"platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022",
21-
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
22-
},
23-
"spdx_license_list_version": "3.16",
24-
"files_count": 2
25-
}
26-
}
27-
],
282
"dependencies": [
293
{
304
"purl": "pkg:chef/nodejs",
@@ -95,10 +69,12 @@
9569
"api_data_url": "https://supermarket.chef.io/api/v1/cookbooks/301/versions/0.1.0",
9670
"package_uid": "pkg:chef/[email protected]?uuid=fixed-uid-done-for-testing-5642512d1758",
9771
"datafile_paths": [
98-
"package/metadata.rb"
72+
"package/metadata.rb",
73+
"package/metadata.json"
9974
],
10075
"datasource_ids": [
101-
"chef_cookbook_metadata_rb"
76+
"chef_cookbook_metadata_rb",
77+
"chef_cookbook_metadata_json"
10278
],
10379
"purl": "pkg:chef/[email protected]"
10480
}

tests/packagedcode/data/debian/basic-rootfs-expected.json

Lines changed: 21 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,4 @@
11
{
2-
"headers": [
3-
{
4-
"tool_name": "scancode-toolkit",
5-
"options": {
6-
"input": "<path>",
7-
"--json-pp": "<file>",
8-
"--package": true
9-
},
10-
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
11-
"output_format_version": "2.0.0",
12-
"message": null,
13-
"errors": [],
14-
"warnings": [],
15-
"extra_data": {
16-
"system_environment": {
17-
"operating_system": "linux",
18-
"cpu_architecture": "64",
19-
"platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic",
20-
"platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022",
21-
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
22-
},
23-
"spdx_license_list_version": "3.16",
24-
"files_count": 5
25-
}
26-
}
27-
],
282
"dependencies": [],
293
"packages": [
304
{
@@ -61,8 +35,8 @@
6135
"bug_tracking_url": null,
6236
"code_view_url": null,
6337
"vcs_url": null,
64-
"copyright": null,
65-
"license_expression": null,
38+
"copyright": "Copyright (c) 1998-2016 Free Software Foundation, Inc.\nCopyright (c) 2001 by Pradeep Padala\nCopyright (c) 1994 X Consortium\nCopyright (c) 1980, 1991, 1992, 1993 The Regents of the University of California\nCopyright 1996-2007 by Thomas E. Dickey",
39+
"license_expression": "x11-fsf AND x11-xconsortium AND bsd-new AND x11-fsf",
6640
"declared_license": null,
6741
"notice_text": null,
6842
"source_packages": [
@@ -114,10 +88,14 @@
11488
"api_data_url": null,
11589
"package_uid": "pkg:deb/[email protected]?architecture=amd64&uuid=fixed-uid-done-for-testing-5642512d1758",
11690
"datafile_paths": [
117-
"basic-rootfs.tar.gz/var/lib/dpkg/status"
91+
"basic-rootfs.tar.gz/var/lib/dpkg/status",
92+
"basic-rootfs.tar.gz/usr/share/doc/libncurses5/copyright",
93+
"basic-rootfs.tar.gz/var/lib/dpkg/info/libncurses5:amd64.md5sums"
11894
],
11995
"datasource_ids": [
120-
"debian_installed_status_db"
96+
"debian_installed_status_db",
97+
"debian_copyright_in_package",
98+
"debian_installed_md5sums"
12199
],
122100
"purl": "pkg:deb/[email protected]?architecture=amd64"
123101
},
@@ -155,9 +133,13 @@
155133
"bug_tracking_url": null,
156134
"code_view_url": null,
157135
"vcs_url": null,
158-
"copyright": null,
159-
"license_expression": null,
160-
"declared_license": null,
136+
"copyright": "Copyright 2013 Jiri Pirko <[email protected]>\nCopyright 2014 Andrew Ayer <[email protected]>",
137+
"license_expression": "(lgpl-2.1-plus AND lgpl-2.1-plus AND lgpl-2.1) AND (lgpl-2.1-plus AND lgpl-2.1-plus AND lgpl-2.1)",
138+
"declared_license": [
139+
"LGPL-2.1+",
140+
"LGPL-2.1+",
141+
"LGPL-2.1+"
142+
],
161143
"notice_text": null,
162144
"source_packages": [
163145
"pkg:deb/libndp"
@@ -190,10 +172,14 @@
190172
"api_data_url": null,
191173
"package_uid": "pkg:deb/[email protected]?architecture=amd64&uuid=fixed-uid-done-for-testing-5642512d1758",
192174
"datafile_paths": [
193-
"basic-rootfs.tar.gz/var/lib/dpkg/status"
175+
"basic-rootfs.tar.gz/var/lib/dpkg/status",
176+
"basic-rootfs.tar.gz/usr/share/doc/libndp0/copyright",
177+
"basic-rootfs.tar.gz/var/lib/dpkg/info/libndp0:amd64.md5sums"
194178
],
195179
"datasource_ids": [
196-
"debian_installed_status_db"
180+
"debian_installed_status_db",
181+
"debian_copyright_in_package",
182+
"debian_installed_md5sums"
197183
],
198184
"purl": "pkg:deb/[email protected]?architecture=amd64"
199185
}

0 commit comments

Comments
 (0)