From 3b45dc6a8aaeeeaa9b1c6d6c6b97c9c6265f4fd3 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Fri, 29 Jul 2022 17:21:51 -0700 Subject: [PATCH 1/2] Add package_adder argument to assemble() #3034 * Add test for add_to_package Signed-off-by: Jono Yang --- src/packagedcode/about.py | 8 ++-- src/packagedcode/alpine.py | 8 ++-- src/packagedcode/build.py | 20 +++++---- src/packagedcode/build_gradle.py | 3 +- src/packagedcode/cargo.py | 6 ++- src/packagedcode/chef.py | 3 +- src/packagedcode/cocoapods.py | 12 +++++- src/packagedcode/conda.py | 3 +- src/packagedcode/debian.py | 45 +++++++++----------- src/packagedcode/debian_copyright.py | 10 ++--- src/packagedcode/godeps.py | 4 +- src/packagedcode/golang.py | 3 +- src/packagedcode/jar_manifest.py | 9 +++- src/packagedcode/maven.py | 16 +++++-- src/packagedcode/models.py | 52 ++++++++++++++--------- src/packagedcode/npm.py | 11 ++--- src/packagedcode/opam.py | 4 +- src/packagedcode/phpcomposer.py | 7 +-- src/packagedcode/plugin_package.py | 13 ++++-- src/packagedcode/pubspec.py | 3 +- src/packagedcode/pypi.py | 36 ++++++---------- src/packagedcode/rpm.py | 5 +-- src/packagedcode/rubygems.py | 28 ++++++------ src/packagedcode/win_reg.py | 8 ++-- tests/packagedcode/test_package_models.py | 27 +++++++++++- 25 files changed, 204 insertions(+), 140 deletions(-) diff --git a/src/packagedcode/about.py b/src/packagedcode/about.py index e68c7b5f5fd..0a0f197971a 100644 --- a/src/packagedcode/about.py +++ b/src/packagedcode/about.py @@ -107,7 +107,7 @@ def parse(cls, location): ) @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): """ Yield a Package. Note that ABOUT files do not carry dependencies. """ @@ -122,8 +122,7 @@ def assemble(cls, package_data, resource, codebase): # NOTE: we do not attach files to the Package level. Instead we # update `for_package` in the file - resource.for_packages.append(package_uid) - resource.save(codebase) + package_adder(package_uid, resource, codebase) if not package.license_expression: package.license_expression = cls.compute_normalized_license(package) @@ -151,8 +150,7 @@ def assemble(cls, package_data, resource, codebase): # path is found and processed: remove it, so we can # check if we found all of them del file_references_by_path[res.path] - res.for_packages.append(package_uid) - res.save(codebase) + package_adder(package_uid, res, codebase) yield res diff --git a/src/packagedcode/alpine.py b/src/packagedcode/alpine.py index e69500f1f71..9dca74f3b9e 100644 --- a/src/packagedcode/alpine.py +++ b/src/packagedcode/alpine.py @@ -64,7 +64,7 @@ def compute_normalized_license(cls, package): return detected @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): # get the root resource of the rootfs levels_up = len('lib/apk/db/installed'.split('/')) root_resource = get_ancestor( @@ -107,8 +107,7 @@ def assemble(cls, package_data, resource, codebase): # path is found and processed: remove it, so we can check if we # found all of them del file_references_by_path[res.path] - res.for_packages.append(package_uid) - res.save(codebase) + package_adder(package_uid, res, codebase) resources.append(res) # if we have left over file references, add these to extra data @@ -139,11 +138,12 @@ def compute_normalized_license(cls, package): return detected @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): models.DatafileHandler.assign_package_to_parent_tree( package=package, resource=resource, codebase=codebase, + package_adder=package_adder, ) diff --git a/src/packagedcode/build.py b/src/packagedcode/build.py index 49d6afb7276..eb1bd7a04f4 100644 --- a/src/packagedcode/build.py +++ b/src/packagedcode/build.py @@ -50,11 +50,12 @@ def parse(cls, location): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): models.DatafileHandler.assign_package_to_parent_tree( package=package, resource=resource, codebase=codebase, + package_adder=package_adder, ) @@ -75,7 +76,7 @@ class BaseStarlarkManifestHandler(models.DatafileHandler): """ @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): """ Given a ``package_data`` PackageData found in the ``resource`` datafile of the ``codebase``, assemble package their files and dependencies @@ -100,6 +101,7 @@ def assemble(cls, package_data, resource, codebase): package=package, resource=resource, codebase=codebase, + package_adder=package_adder ) yield package @@ -175,14 +177,13 @@ def parse(cls, location): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase, skip_name=None): + def assign_package_to_resources(cls, package, resource, codebase, package_adder, skip_name=None): package_uid = package.package_uid if not package_uid: return parent = resource.parent(codebase) for res in walk_build(resource=parent, codebase=codebase, skip_name=skip_name): - res.for_packages.append(package_uid) - res.save(codebase) + package_adder(package_uid, res, codebase) def walk_build(resource, codebase, skip_name): @@ -235,11 +236,12 @@ class BazelBuildHandler(BaseStarlarkManifestHandler): documentation_url = 'https://bazel.build/' @classmethod - def assign_package_to_resources(cls, package, resource, codebase, skip_name='BUILD'): + def assign_package_to_resources(cls, package, resource, codebase, package_adder, skip_name='BUILD'): return super().assign_package_to_resources( package=package, resource=resource, codebase=codebase, + package_adder=package_adder, skip_name=skip_name, ) @@ -252,11 +254,12 @@ class BuckPackageHandler(BaseStarlarkManifestHandler): documentation_url = 'https://buck.build/' @classmethod - def assign_package_to_resources(cls, package, resource, codebase, skip_name='BUCK'): + def assign_package_to_resources(cls, package, resource, codebase, package_adder, skip_name='BUCK'): return super().assign_package_to_resources( package=package, resource=resource, codebase=codebase, + package_adder=package_adder, skip_name=skip_name, ) @@ -377,9 +380,10 @@ def compute_normalized_license(cls, package): return models.compute_normalized_license(declared_license) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): models.DatafileHandler.assign_package_to_parent_tree( package_=package, resource=resource, codebase=codebase, + package_adder=package_adder, ) diff --git a/src/packagedcode/build_gradle.py b/src/packagedcode/build_gradle.py index 915a08ddf9a..e46acc91249 100644 --- a/src/packagedcode/build_gradle.py +++ b/src/packagedcode/build_gradle.py @@ -65,11 +65,12 @@ def parse(cls, location): # TODO: handle complex cases of nested builds with many packages @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): models.DatafileHandler.assign_package_to_parent_tree( package=package, resource=resource, codebase=codebase, + package_adder=package_adder, ) diff --git a/src/packagedcode/cargo.py b/src/packagedcode/cargo.py index 76241487404..42f36e267df 100644 --- a/src/packagedcode/cargo.py +++ b/src/packagedcode/cargo.py @@ -85,7 +85,7 @@ def parse(cls, location): ) @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): """ Assemble Cargo.toml and possible Cargo.lock datafiles """ @@ -93,6 +93,7 @@ def assemble(cls, package_data, resource, codebase): datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'), directory=resource.parent(codebase), codebase=codebase, + package_adder=package_adder, ) @@ -144,7 +145,7 @@ def parse(cls, location): ) @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): """ Assemble Cargo.toml and possible Cargo.lock datafiles """ @@ -152,6 +153,7 @@ def assemble(cls, package_data, resource, codebase): datafile_name_patterns=('Cargo.toml', 'Cargo.lock',), directory=resource.parent(codebase), codebase=codebase, + package_adder=package_adder, ) diff --git a/src/packagedcode/chef.py b/src/packagedcode/chef.py index 17f8401c7d9..7915ea20dfe 100644 --- a/src/packagedcode/chef.py +++ b/src/packagedcode/chef.py @@ -142,7 +142,7 @@ def format(self, tokens, outfile): class BaseChefMetadataHandler(models.DatafileHandler): @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): """ Assemble Package from Chef metadata.rb, then from metadata.json files. """ @@ -150,6 +150,7 @@ def assemble(cls, package_data, resource, codebase): datafile_name_patterns=('metadata.rb', 'metadata.json',), directory=resource.parent(codebase), codebase=codebase, + package_adder=package_adder, ) diff --git a/src/packagedcode/cocoapods.py b/src/packagedcode/cocoapods.py index 22a9c5a959b..1668b8a5766 100644 --- a/src/packagedcode/cocoapods.py +++ b/src/packagedcode/cocoapods.py @@ -114,7 +114,7 @@ def get_first_three_md5_hash_characters(podname): class BasePodHandler(models.DatafileHandler): @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): """ Assemble pod packages and dependencies and handle the specific cases where there are more than one podspec in the same directory. @@ -147,6 +147,7 @@ def assemble(cls, package_data, resource, codebase): datafile_name_patterns=datafile_name_patterns, directory=parent, codebase=codebase, + package_adder=package_adder, ) elif has_multiple_podspec: @@ -160,6 +161,7 @@ def assemble(cls, package_data, resource, codebase): datafile_name_patterns=datafile_name_patterns, directory=parent, codebase=codebase, + package_adder=package_adder, ) for resource in sibling_podspecs: @@ -170,7 +172,12 @@ def assemble(cls, package_data, resource, codebase): package_data=package_data, datafile_path=datafile_path, ) - cls.assign_package_to_resources(package, resource, codebase) + cls.assign_package_to_resources( + package=package, + resource=resource, + codebase=codebase, + package_adder=package_adder, + ) yield package yield resource @@ -180,6 +187,7 @@ def assemble(cls, package_data, resource, codebase): datafile_name_patterns=datafile_name_patterns, directory=parent, codebase=codebase, + package_adder=package_adder, ) diff --git a/src/packagedcode/conda.py b/src/packagedcode/conda.py index 5d08eb84077..bfcbabe738e 100644 --- a/src/packagedcode/conda.py +++ b/src/packagedcode/conda.py @@ -70,11 +70,12 @@ def get_conda_root(cls, resource, codebase): return resource.parent(codebase) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): return models.DatafileHandler.assign_package_to_resources( package=package, resource=cls.get_conda_root(resource, codebase), codebase=codebase, + package_adder=package_adder, ) @classmethod diff --git a/src/packagedcode/debian.py b/src/packagedcode/debian.py index 8f072794f29..8a65bb72b2c 100644 --- a/src/packagedcode/debian.py +++ b/src/packagedcode/debian.py @@ -66,9 +66,9 @@ def parse(cls, location): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): # only assign this resource - return models.DatafileHandler.assign_package_to_resources(package, resource, codebase) + return models.DatafileHandler.assign_package_to_resources(package, resource, codebase, package_adder) # TODO: introspect archive @@ -91,9 +91,9 @@ def parse(cls, location): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): # only assign this resource - return models.DatafileHandler.assign_package_to_resources(package, resource, codebase) + return models.DatafileHandler.assign_package_to_resources(package, resource, codebase, package_adder) # TODO: introspect archive @@ -116,9 +116,9 @@ def parse(cls, location): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): # only assign this resource - return models.DatafileHandler.assign_package_to_resources(package, resource, codebase) + return models.DatafileHandler.assign_package_to_resources(package, resource, codebase, package_adder) # TODO: also look into neighboring md5sum and data.tarball copyright files!!! @@ -139,11 +139,11 @@ def parse(cls, location): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): # two levels up root = resource.parent(codebase).parent(codebase) if root: - return models.DatafileHandler.assign_package_to_resources(package, root, codebase) + return models.DatafileHandler.assign_package_to_resources(package, root, codebase, package_adder) # TODO: also look into neighboring copyright files!!! @@ -166,11 +166,11 @@ def parse(cls, location): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): # two levels up root = resource.parent(codebase).parent(codebase) if root: - return models.DatafileHandler.assign_package_to_resources(package, root, codebase) + return models.DatafileHandler.assign_package_to_resources(package, root, codebase, package_adder) class DebianDscFileHandler(models.DatafileHandler): @@ -197,9 +197,9 @@ def parse(cls, location): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): # only assign this resource - return models.DatafileHandler.assign_package_to_resources(package, resource, codebase) + return models.DatafileHandler.assign_package_to_resources(package, resource, codebase, package_adder) class DebianInstalledStatusDatabaseHandler(models.DatafileHandler): @@ -221,7 +221,7 @@ def parse(cls, location): ) @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): # get the root resource of the rootfs levels_up = len('var/lib/dpkg/status'.split('/')) root_resource = get_ancestor( @@ -307,8 +307,7 @@ def assemble(cls, package_data, resource, codebase): ) package_file_references.extend(package_data.file_references) - res.for_packages.append(package_uid) - res.save(codebase) + package_adder(package_uid, res, codebase) # yield possible dependencies dependent_packages = package_data.dependencies @@ -343,8 +342,7 @@ def assemble(cls, package_data, resource, codebase): # path is found and processed: remove it, so we can check if we found all of them del file_references_by_path[res.path] - res.for_packages.append(package_uid) - res.save(codebase) + package_adder(package_uid, res, codebase) resources.append(res) @@ -381,7 +379,7 @@ def parse(cls, location): ) @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): # get the root resource of the rootfs levels_up = len('var/lib/dpkg/status.d/name'.split('/')) root_resource = get_ancestor( @@ -415,8 +413,7 @@ def assemble(cls, package_data, resource, codebase): datafile_path=res.path, ) - res.for_packages.append(package_uid) - res.save(codebase) + package_adder(package_uid, res, codebase) resources.append(res) @@ -453,7 +450,7 @@ def parse(cls, location): ) @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): # this is assembled only from a database entry return @@ -479,7 +476,7 @@ def parse(cls, location): ) @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): # this is assembled only from a database entry return [] @@ -504,11 +501,11 @@ def parse(cls, location): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): # two levels up root = resource.parent(codebase).parent(codebase) if root: - return models.DatafileHandler.assign_package_to_resources(package, root, codebase) + return models.DatafileHandler.assign_package_to_resources(package, root, codebase, package_adder) def build_package_data_from_package_filename(filename, datasource_id, package_type,): diff --git a/src/packagedcode/debian_copyright.py b/src/packagedcode/debian_copyright.py index d95978574a7..aa038cce8c3 100644 --- a/src/packagedcode/debian_copyright.py +++ b/src/packagedcode/debian_copyright.py @@ -130,11 +130,11 @@ class DebianCopyrightFileInSourceHandler(BaseDebianCopyrightFileHandler): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): # two levels up root = resource.parent(codebase).parent(codebase) if root: - return cls.assign_package_to_resources(package, root, codebase) + return cls.assign_package_to_resources(package, root, codebase, package_adder) # TODO: distiguish the cased of an installed package vs. the case of an extracted .deb @@ -150,7 +150,7 @@ class DebianCopyrightFileInPackageHandler(BaseDebianCopyrightFileHandler): ) @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): # DO NOTHING: let other handler reuse this return [] @@ -167,9 +167,9 @@ class StandaloneDebianCopyrightFileHandler(BaseDebianCopyrightFileHandler): ) @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): # assemble is the default - yield from super().assemble(package_data, resource, codebase) + yield from super().assemble(package_data, resource, codebase, package_adder) class NotReallyStructuredCopyrightFile(Exception): diff --git a/src/packagedcode/godeps.py b/src/packagedcode/godeps.py index 06633fdbd0c..68ff54787ff 100644 --- a/src/packagedcode/godeps.py +++ b/src/packagedcode/godeps.py @@ -74,8 +74,8 @@ def parse(cls, location): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): - models.DatafileHandler.assign_package_to_parent_tree(package, resource, codebase) + def assign_package_to_resources(cls, package, resource, codebase, package_adder): + models.DatafileHandler.assign_package_to_parent_tree(package, resource, codebase, package_adder) @attr.s diff --git a/src/packagedcode/golang.py b/src/packagedcode/golang.py index e78477e33dc..6075c713e86 100644 --- a/src/packagedcode/golang.py +++ b/src/packagedcode/golang.py @@ -28,7 +28,7 @@ class BaseGoModuleHandler(models.DatafileHandler): @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): """ Always use go.mod first then go.sum """ @@ -36,6 +36,7 @@ def assemble(cls, package_data, resource, codebase): datafile_name_patterns=('go.mod', 'go.sum',), directory=resource.parent(codebase), codebase=codebase, + package_adder=package_adder, ) diff --git a/src/packagedcode/jar_manifest.py b/src/packagedcode/jar_manifest.py index bed3348a8c2..a918f1ac196 100644 --- a/src/packagedcode/jar_manifest.py +++ b/src/packagedcode/jar_manifest.py @@ -46,14 +46,19 @@ def parse(cls, location): yield package_data @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): # we want to root of the jar, two levels up parent = resource.parent(codebase) if parent: parent = resource.parent(codebase) if parent: - models.DatafileHandler.assign_package_to_resources(package, resource=parent, codebase=codebase) + models.DatafileHandler.assign_package_to_resources( + package, + resource=parent, + codebase=codebase, + package_adder=package_adder, + ) diff --git a/src/packagedcode/maven.py b/src/packagedcode/maven.py index 434fc285d0d..f81c9d6e607 100644 --- a/src/packagedcode/maven.py +++ b/src/packagedcode/maven.py @@ -105,7 +105,7 @@ def parse(cls, location, base_url='https://repo1.maven.org/maven2'): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): """ Set the "for_packages" attributes to ``package`` for the whole resource tree of a ``resource`` object in the ``codebase``. @@ -115,7 +115,12 @@ def assign_package_to_resources(cls, package, resource, codebase): if resource.path.endswith('.pom'): # we only treat the parent as the root - return models.DatafileHandler.assign_package_to_parent_tree(package, resource, codebase) + return models.DatafileHandler.assign_package_to_parent_tree( + package, + resource, + codebase, + package_adder + ) # the root is either the parent or further up for poms stored under # a META-INF dir @@ -139,7 +144,12 @@ def assign_package_to_resources(cls, package, resource, codebase): if not root: root = resource.parent(codebase) - return models.DatafileHandler.assign_package_to_resources(package, resource=root, codebase=codebase) + return models.DatafileHandler.assign_package_to_resources( + package, + resource=root, + codebase=codebase, + package_adder=package_adder + ) @classmethod def compute_normalized_license(cls, package): diff --git a/src/packagedcode/models.py b/src/packagedcode/models.py index 8e1d9ca091b..8c8b1d4631c 100644 --- a/src/packagedcode/models.py +++ b/src/packagedcode/models.py @@ -803,6 +803,15 @@ def compute_normalized_license(declared_license, expression_symbols=None): return 'unknown' +def add_to_package(package_uid, resource, codebase): + """ + Append `package_uid` to `resource.for_packages`, if the attribute exists. + """ + if hasattr(resource, 'for_packages') and isinstance(resource.for_packages, list): + resource.for_packages.append(package_uid) + resource.save(codebase) + + class DatafileHandler: """ A base handler class to handle any package manifests, lockfiles and data @@ -888,13 +897,15 @@ def parse(cls, location): raise NotImplementedError @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder=add_to_package): """ Given a ``package_data`` PackageData found in the ``resource`` datafile of the ``codebase``, assemble package their files and dependencies from one or more datafiles. - Update ``codebase`` Resources with the package they are for. + Update ``codebase`` Resources with the package they are for, using the + function ``package_adder`` to associate Resources to the Package they + are part of. Yield items that can be of these types: @@ -931,6 +942,7 @@ def assemble(cls, package_data, resource, codebase): package=package, resource=resource, codebase=codebase, + package_adder=package_adder, ) yield package @@ -973,7 +985,7 @@ def compute_normalized_license(cls, package): return license_expression @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder=add_to_package): """ Set the "for_packages" attributes to ``package`` given a starting ``resource`` in the ``codebase``. @@ -990,14 +1002,12 @@ def assign_package_to_resources(cls, package, resource, codebase): # update `for_packages` of a codebase resource. package_uid = package.package_uid if resource and package_uid: - resource.for_packages.append(package_uid) - resource.save(codebase) + package_adder(package_uid, resource, codebase) for res in resource.walk(codebase): - res.for_packages.append(package_uid) - res.save(codebase) + package_adder(package_uid, res, codebase) @classmethod - def assign_package_to_parent_tree(cls, package, resource, codebase): + def assign_package_to_parent_tree(cls, package, resource, codebase, package_adder=add_to_package): """ Set the "for_packages" attributes to ``package`` for the whole resource tree of the parent of a ``resource`` object in the @@ -1009,12 +1019,12 @@ def assign_package_to_parent_tree(cls, package, resource, codebase): """ if resource.has_parent(): parent = resource.parent(codebase) - cls.assign_package_to_resources(package, parent, codebase) + cls.assign_package_to_resources(package, parent, codebase, package_adder) else: - cls.assign_package_to_resources(package, resource, codebase) + cls.assign_package_to_resources(package, resource, codebase, package_adder) @classmethod - def assemble_from_many(cls, pkgdata_resources, codebase,): + def assemble_from_many(cls, pkgdata_resources, codebase, package_adder=add_to_package): """ Yield Package, Resources or Dependency given a ``pkgdata_resources`` list of tuple (PackageData, Resource) in ``codebase``. @@ -1050,8 +1060,7 @@ def assemble_from_many(cls, pkgdata_resources, codebase,): ) package_uid = package.package_uid if package_uid: - resource.for_packages.append(package_uid) - resource.save(codebase) + package_adder(package_uid, resource, codebase) else: # FIXME: What is the package_data is NOT for the same package as package? # FIXME: What if the update did not do anything? (it does return True or False) @@ -1061,8 +1070,7 @@ def assemble_from_many(cls, pkgdata_resources, codebase,): datafile_path=resource.path, ) if package_uid: - resource.for_packages.append(package_uid) - resource.save(codebase) + package_adder(package_uid, resource, codebase) # in all cases yield possible dependencies dependent_packages = package_data.dependencies @@ -1080,8 +1088,7 @@ def assemble_from_many(cls, pkgdata_resources, codebase,): # the whole parent subtree of the base_resource is for this package if package_uid: for res in base_resource.walk(codebase): - res.for_packages.append(package_uid) - res.save(codebase) + package_adder(package_uid, res, codebase) if package: if not package.license_expression: @@ -1089,7 +1096,13 @@ def assemble_from_many(cls, pkgdata_resources, codebase,): yield package @classmethod - def assemble_from_many_datafiles(cls, datafile_name_patterns, directory, codebase): + def assemble_from_many_datafiles( + cls, + datafile_name_patterns, + directory, + codebase, + package_adder=add_to_package, + ): """ Assemble Package and Dependency from package data of the datafiles found in multiple ``datafile_name_patterns`` name patterns (case- sensitive) @@ -1135,6 +1148,7 @@ def assemble_from_many_datafiles(cls, datafile_name_patterns, directory, codebas yield from cls.assemble_from_many( pkgdata_resources=pkgdata_resources, codebase=codebase, + package_adder=package_adder, ) @classmethod @@ -1157,7 +1171,7 @@ class NonAssemblableDatafileHandler(DatafileHandler): """ @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): return [] diff --git a/src/packagedcode/npm.py b/src/packagedcode/npm.py index 515b5d44358..2a50d1434a3 100644 --- a/src/packagedcode/npm.py +++ b/src/packagedcode/npm.py @@ -41,7 +41,7 @@ class BaseNpmHandler(models.DatafileHandler): @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): """ If ``resource``, or one of its siblings, is a package.json file, use it to create and yield the package, the package dependencies, and the @@ -96,13 +96,11 @@ def assemble(cls, package_data, resource, codebase): if root: for npm_res in cls.walk_npm(resource=root, codebase=codebase): if package_uid and package_uid not in npm_res.for_packages: - npm_res.for_packages.append(package_uid) - npm_res.save(codebase) + package_adder(package_uid, npm_res, codebase) yield npm_res elif codebase.has_single_resource: if package_uid and package_uid not in package_resource.for_packages: - package_resource.for_packages.append(package_uid) - package_resource.save(codebase) + package_adder(package_uid, package_resource, codebase) yield package_resource else: @@ -120,8 +118,7 @@ def assemble(cls, package_data, resource, codebase): yield from yield_dependencies_from_package_resource(lock_file, package_uid) if package_uid and package_uid not in lock_file.for_packages: - lock_file.for_packages.append(package_uid) - lock_file.save(codebase) + package_adder(package_uid, lock_file, codebase) yield lock_file else: # we do not have a package.json diff --git a/src/packagedcode/opam.py b/src/packagedcode/opam.py index 2373a0ec3c1..695a8d00413 100644 --- a/src/packagedcode/opam.py +++ b/src/packagedcode/opam.py @@ -118,8 +118,8 @@ def parse(cls, location): yield package_data @classmethod - def assign_package_to_resources(cls, package, resource, codebase): - return models.DatafileHandler.assign_package_to_parent_tree(package, resource, codebase) + def assign_package_to_resources(cls, package, resource, codebase, package_adder): + return models.DatafileHandler.assign_package_to_parent_tree(package, resource, codebase, package_adder) def get_repository_homepage_url(name): diff --git a/src/packagedcode/phpcomposer.py b/src/packagedcode/phpcomposer.py index a4aca458a22..8a281f7d856 100644 --- a/src/packagedcode/phpcomposer.py +++ b/src/packagedcode/phpcomposer.py @@ -26,7 +26,7 @@ class BasePhpComposerHandler(models.DatafileHandler): @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): datafile_name_patterns = ( 'composer.json', 'composer.lock', @@ -41,11 +41,12 @@ def assemble(cls, package_data, resource, codebase): datafile_name_patterns=datafile_name_patterns, directory=dir_resource, codebase=codebase, + package_adder=package_adder, ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): - return models.DatafileHandler.assign_package_to_parent_tree(package, resource, codebase) + def assign_package_to_resources(cls, package, resource, codebase, package_adder): + return models.DatafileHandler.assign_package_to_parent_tree(package, resource, codebase, package_adder) @classmethod def compute_normalized_license(cls, package): diff --git a/src/packagedcode/plugin_package.py b/src/packagedcode/plugin_package.py index 35d4bda28a5..547aa7d6d02 100644 --- a/src/packagedcode/plugin_package.py +++ b/src/packagedcode/plugin_package.py @@ -22,6 +22,7 @@ from plugincode.scan import ScanPlugin from packagedcode import get_package_handler +from packagedcode.models import add_to_package from packagedcode.models import Dependency from packagedcode.models import Package from packagedcode.models import PackageData @@ -185,17 +186,22 @@ def get_installed_packages(root_dir, processes=2, **kwargs): yield from packages_by_uid.values() -def create_package_and_deps(codebase, strip_root=False, **kwargs): +def create_package_and_deps(codebase, package_adder=add_to_package, strip_root=False, **kwargs): """ Create and save top-level Package and Dependency from the parsed package data present in the codebase. """ - packages, dependencies = get_package_and_deps(codebase, strip_root=strip_root, **kwargs) + packages, dependencies = get_package_and_deps( + codebase, + package_adder=package_adder, + strip_root=strip_root, + **kwargs + ) codebase.attributes.packages.extend(pkg.to_dict() for pkg in packages) codebase.attributes.dependencies.extend(dep.to_dict() for dep in dependencies) -def get_package_and_deps(codebase, strip_root=False, **kwargs): +def get_package_and_deps(codebase, package_adder=add_to_package, strip_root=False, **kwargs): """ Return a tuple of (Packages list, Dependency list) from the parsed package data present in the codebase files.package_data attributes. @@ -234,6 +240,7 @@ def get_package_and_deps(codebase, strip_root=False, **kwargs): package_data=package_data, resource=resource, codebase=codebase, + package_adder=package_adder, ) for item in items: diff --git a/src/packagedcode/pubspec.py b/src/packagedcode/pubspec.py index 694bf9b4972..9fb1d5b487c 100644 --- a/src/packagedcode/pubspec.py +++ b/src/packagedcode/pubspec.py @@ -39,7 +39,7 @@ class BaseDartPubspecHandler(models.DatafileHandler): @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): datafile_name_patterns = \ DartPubspecYamlHandler.path_patterns + DartPubspecLockHandler.path_patterns @@ -52,6 +52,7 @@ def assemble(cls, package_data, resource, codebase): datafile_name_patterns=datafile_name_patterns, directory=dir_resource, codebase=codebase, + package_adder=package_adder, ) @classmethod diff --git a/src/packagedcode/pypi.py b/src/packagedcode/pypi.py index 7a2eb3df73c..a2369d8b388 100644 --- a/src/packagedcode/pypi.py +++ b/src/packagedcode/pypi.py @@ -94,11 +94,11 @@ def parse(cls, location): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): # two levels up root = resource.parent(codebase).parent(codebase) if root: - return models.DatafileHandler.assign_package_to_resources(package, root, codebase) + return models.DatafileHandler.assign_package_to_resources(package, root, codebase, package_adder) class PythonEditableInstallationPkgInfoFile(BasePypiHandler): @@ -118,9 +118,9 @@ def parse(cls, location): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): # only the parent for now... though it can be more complex - return models.DatafileHandler.assign_package_to_parent_tree(package, resource, codebase) + return models.DatafileHandler.assign_package_to_parent_tree(package, resource, codebase, package_adder) def create_package_from_package_data(package_data, datafile_path): @@ -140,7 +140,7 @@ class BaseExtractedPythonLayout(BasePypiHandler): """ @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): # a source distribution can have many manifests datafile_name_patterns = ( 'Pipfile.lock', @@ -171,8 +171,7 @@ def assemble(cls, package_data, resource, codebase): ) yield package - package_resource.for_packages.append(package.package_uid) - package_resource.save(codebase) + package_adder(package.package_uid, package_resource, codebase) yield package_resource yield from yield_dependencies_from_package_data( @@ -208,8 +207,7 @@ def assemble(cls, package_data, resource, codebase): package.update(setup_pkg_data, setup_resource.path) if package: for setup_resource, setup_pkg_data in setup_package_data: - setup_resource.for_packages.append(package.package_uid) - setup_resource.save(codebase) + package_adder(package.package_uid, setup_resource, codebase) yield setup_resource yield from yield_dependencies_from_package_data( @@ -229,13 +227,11 @@ def assemble(cls, package_data, resource, codebase): if py_res.is_dir: continue if package_uid and package_uid not in py_res.for_packages: - py_res.for_packages.append(package_uid) - py_res.save(codebase) + package_adder(package_uid, py_res, codebase) yield py_res elif codebase.has_single_resource: if package_uid and package_uid not in package_resource.for_packages: - package_resource.for_packages.append(package_uid) - package_resource.save(codebase) + package_adder(package_uid, package_resource, codebase) else: package_uid = None @@ -249,8 +245,7 @@ def assemble(cls, package_data, resource, codebase): ) if package_uid and package_uid not in sibling.for_packages: - sibling.for_packages.append(package_uid) - sibling.save(codebase) + package_adder(package_uid, sibling, codebase) yield sibling @classmethod @@ -316,7 +311,7 @@ def parse(cls, location): ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): """ Assign files to package for an installed wheel. This requires a bit of navigation around as the files can be in multiple places. @@ -336,8 +331,7 @@ def assign_package_to_resources(cls, package, resource, codebase): if package_uid: # save thyself! - resource.for_packages.append(package_uid) - resource.save(codebase) + package_adder(package_uid, resource, codebase) # collect actual paths based on the file references for file_ref in package_data.file_references: @@ -358,8 +352,7 @@ def assign_package_to_resources(cls, package, resource, codebase): continue else: if package_uid: - ref_resource.for_packages.append(package_uid) - ref_resource.save(codebase) + package_adder(package_uid, ref_resource, codebase) else: ref_resource = get_resource_for_path( path=path_ref, @@ -367,8 +360,7 @@ def assign_package_to_resources(cls, package, resource, codebase): codebase=codebase, ) if ref_resource and package_uid: - ref_resource.for_packages.append(package_uid) - ref_resource.save(codebase) + package_adder(package_uid, ref_resource, codebase) def get_resource_for_path(path, root, codebase): diff --git a/src/packagedcode/rpm.py b/src/packagedcode/rpm.py index 645c2b0d720..85aced5f0eb 100644 --- a/src/packagedcode/rpm.py +++ b/src/packagedcode/rpm.py @@ -146,7 +146,7 @@ def compute_normalized_license(cls, package): return detected @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): # get the root resource of the rootfs # take the 1st pattern as a reference # for instance: '*usr/lib/sysimage/rpm/Packages.db' @@ -201,8 +201,7 @@ def assemble(cls, package_data, resource, codebase): if package_uid: # path is found and processed: remove it, so we can check if we # found all of them - res.for_packages.append(package_uid) - res.save(codebase) + package_adder(package_uid, res, codebase) resources.append(res) # if we have left over file references, add these to extra data diff --git a/src/packagedcode/rubygems.py b/src/packagedcode/rubygems.py index 6b4f7d16808..0edc69cdc2d 100644 --- a/src/packagedcode/rubygems.py +++ b/src/packagedcode/rubygems.py @@ -58,7 +58,7 @@ def parse(cls, location): ) -def assemble_extracted_gem(cls, package_data, resource, codebase): +def assemble_extracted_gem(cls, package_data, resource, codebase, package_adder): """ An assemble implementation shared by handlers for manifests found in an extracted gem using extractcode. @@ -76,6 +76,7 @@ def assemble_extracted_gem(cls, package_data, resource, codebase): datafile_name_patterns=datafile_name_patterns, directory=gemroot, codebase=codebase, + package_adder=package_adder, ) @@ -101,14 +102,14 @@ def parse(cls, location): ) @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): yield from assemble_extracted_gem(cls, package_data, resource, codebase) class BaseGemProjectHandler(BaseGemHandler): @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): datafile_name_patterns = ( '*.gemspec', 'Gemfile', @@ -119,11 +120,12 @@ def assemble(cls, package_data, resource, codebase): datafile_name_patterns=datafile_name_patterns, directory=resource.parent(codebase), codebase=codebase, + package_adder=package_adder, ) @classmethod - def assign_package_to_resources(cls, package, resource, codebase): - return models.DatafileHandler.assign_package_to_parent_tree(package, resource, codebase) + def assign_package_to_resources(cls, package, resource, codebase, package_adder): + return models.DatafileHandler.assign_package_to_parent_tree(package, resource, codebase, package_adder) class GemspecHandler(BaseGemHandler): @@ -187,8 +189,8 @@ class GemspecInExtractedGemHandler(GemspecHandler): description = 'RubyGems gemspec manifest - extracted data layout' @classmethod - def assemble(cls, package_data, resource, codebase): - yield from assemble_extracted_gem(cls, package_data, resource, codebase) + def assemble(cls, package_data, resource, codebase, package_adder): + yield from assemble_extracted_gem(cls, package_data, resource, codebase, package_adder) class GemspecInInstalledVendorBundleSpecificationsHandler(GemspecHandler): @@ -213,9 +215,9 @@ class GemspecInInstalledVendorBundleSpecificationsHandler(GemspecHandler): description = 'RubyGems gemspec manifest - installed vendor/bundle/specifications layout' @classmethod - def assemble(cls, package_data, resource, codebase): + def assemble(cls, package_data, resource, codebase, package_adder): # TODO: consider assembling datafiles across vendor/ subdirs - yield from models.DatafileHandler.assemble(package_data, resource, codebase) + yield from models.DatafileHandler.assemble(package_data, resource, codebase, package_adder) # Note: we subclass GemspecHandler as the parsing code can handle both Ruby files @@ -235,8 +237,8 @@ class GemfileInExtractedGemHandler(GemfileHandler): description = 'RubyGems Bundler Gemfile - extracted layout' @classmethod - def assemble(cls, package_data, resource, codebase): - return assemble_extracted_gem(cls, package_data, resource, codebase) + def assemble(cls, package_data, resource, codebase, package_adder): + return assemble_extracted_gem(cls, package_data, resource, codebase, package_adder) class GemfileLockHandler(BaseGemProjectHandler): @@ -311,8 +313,8 @@ class GemfileLockInExtractedGemHandler(GemfileLockHandler): description = 'RubyGems Bundler Gemfile.lock - extracted layout' @classmethod - def assemble(cls, package_data, resource, codebase): - yield from assemble_extracted_gem(cls, package_data, resource, codebase) + def assemble(cls, package_data, resource, codebase, package_adder): + yield from assemble_extracted_gem(cls, package_data, resource, codebase, package_adder) def compute_normalized_license(declared_license): diff --git a/src/packagedcode/win_reg.py b/src/packagedcode/win_reg.py index 1761482a72a..4d4d7c82462 100644 --- a/src/packagedcode/win_reg.py +++ b/src/packagedcode/win_reg.py @@ -378,11 +378,10 @@ def get_root_resource(cls, resource, codebase): return resource @classmethod - def assign_package_to_resources(cls, package, resource, codebase): + def assign_package_to_resources(cls, package, resource, codebase, package_adder): package_uid = package.package_uid if package_uid: - resource.for_packages.append(package_uid) - resource.save(codebase) + package_adder(package_uid, resource, codebase) refs = package.file_references if not refs: @@ -411,8 +410,7 @@ def assign_package_to_resources(cls, package, resource, codebase): # path is found and processed: remove it, so we can check if we # found all of them del refs_by_path[res.path] - res.for_packages.append(package_uid) - res.save(codebase) + package_adder(package_uid, res, codebase) # if we have left over file references, add these to extra data if refs_by_path: diff --git a/tests/packagedcode/test_package_models.py b/tests/packagedcode/test_package_models.py index 95d17915969..97657aed551 100644 --- a/tests/packagedcode/test_package_models.py +++ b/tests/packagedcode/test_package_models.py @@ -14,9 +14,12 @@ from packagedcode import ALL_DATAFILE_HANDLERS from packagedcode.models import PackageData from packagedcode.models import Party +from packagedcode.plugin_package import PackageScanner from packages_test_utils import PackageTester from scancode_config import REGEN_TEST_FIXTURES +from commoncode.resource import Codebase + class TestModels(PackageTester): test_data_dir = os.path.join(os.path.dirname(__file__), 'data') @@ -140,7 +143,7 @@ def test_package_data_datasource_id_are_unique(self): pdh.datasource_id not in seen ), f'Duplicated datasource_id: {pdh!r} with {seen[pdhid]!r}' seen[pdh.datasource_id] = pdh - + def test_package_data_file_patterns_are_tuples(self): """ Check that all file patterns are tuples, as if they are @@ -152,3 +155,25 @@ def test_package_data_file_patterns_are_tuples(self): assert type(pdh.path_patterns) == tuple, pdh if pdh.filetypes: assert type(pdh.filetypes) == tuple, pdh + + def test_add_to_package(self): + test_loc = self.get_test_loc('npm/electron') + test_package = models.Package( + type='npm', + name='electron', + version='3.1.11', + ) + test_package_uid = test_package.package_uid + test_codebase = Codebase( + location=test_loc, + codebase_attributes=PackageScanner.codebase_attributes, + resource_attributes=PackageScanner.resource_attributes + ) + test_resource = test_codebase.get_resource('electron/package/package.json') + assert test_package_uid not in test_resource.for_packages + models.add_to_package( + test_package.package_uid, + test_resource, + test_codebase + ) + assert test_package.package_uid in test_resource.for_packages From 230f128acbb72445118df261b6fa6a9187392af8 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Mon, 1 Aug 2022 12:36:33 -0700 Subject: [PATCH 2/2] Add test for custom package_adder #3034 * Update CHANGELOG.rst Signed-off-by: Jono Yang --- CHANGELOG.rst | 12 +- .../models/get_package_resources.scan.json | 213 ++++++++++++++++++ tests/packagedcode/test_package_models.py | 34 +++ 3 files changed, 255 insertions(+), 4 deletions(-) create mode 100644 tests/packagedcode/data/models/get_package_resources.scan.json diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9453790449e..18500e422da 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -167,8 +167,12 @@ Package detection: of the new format where there is no root conceptually, just a list of files for each package. - - There is a new resource-level attribute `for_packages` which refers to packages - through package_uuids (pURL + uuid string). + - There is a new resource-level attribute `for_packages` which refers to + packages through package_uids (pURL + uuid string). A `package_adder` + function is now used to associate a Package to a Resource that is part of + it. This gives us the flexibility to use the packagedcode Package handlers + in other contexts where `for_packages` on Resource is not implemented in the + same way as scancode-toolkit. - The package_data attribute `dependencies` (which is a list of DependentPackages), now has a new attribute `resolved_package` with a package data mapping. @@ -336,8 +340,8 @@ Miscellaneous - Added support for usage of shortcut flags - `-A` or `--about` - - `-q` or `--quiet` - - `-v` or `--verbose` + - `-q` or `--quiet` + - `-v` or `--verbose` - `-V` or `--version` can be used. diff --git a/tests/packagedcode/data/models/get_package_resources.scan.json b/tests/packagedcode/data/models/get_package_resources.scan.json new file mode 100644 index 00000000000..30107ab22b7 --- /dev/null +++ b/tests/packagedcode/data/models/get_package_resources.scan.json @@ -0,0 +1,213 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "tool_version": "31.0.0rc3", + "options": { + "input": [ + "." + ], + "--info": true, + "--json-pp": "../get_package_resources.scan.json", + "--package": true + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "start_timestamp": "2022-08-01T185859.869487", + "end_timestamp": "2022-08-01T185902.232500", + "output_format_version": "2.0.0", + "duration": 2.363025665283203, + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.4.0-122-generic-x86_64-with-glibc2.27", + "platform_version": "#138~18.04.1-Ubuntu SMP Fri Jun 24 14:14:03 UTC 2022", + "python_version": "3.10.4 (main, May 22 2022, 00:46:26) [GCC 7.5.0]" + }, + "spdx_license_list_version": "3.17", + "files_count": 3 + } + } + ], + "dependencies": [], + "packages": [], + "files": [ + { + "path": "get_package_resources", + "type": "directory", + "name": "get_package_resources", + "base_name": "get_package_resources", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [], + "files_count": 3, + "dirs_count": 1, + "size_count": 63, + "scan_errors": [] + }, + { + "path": "get_package_resources/package.json", + "type": "file", + "name": "package.json", + "base_name": "package", + "extension": ".json", + "size": 63, + "date": "2022-02-15", + "sha1": "47ad6746a065a22a4c2a119cb425c250bddfbf88", + "md5": "5cbb43ca814ec801e8673e348e8eec19", + "sha256": "aac331f407d4d662a2897eafeb4f05a1c2343218220d99779c9553d3b92bd4ac", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [ + { + "type": "npm", + "namespace": null, + "name": "test", + "version": "0.1.0", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": null, + "download_url": "https://registry.npmjs.org/test/-/test-0.1.0.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "license_expression": "mit", + "declared_license": [ + "MIT" + ], + "notice_text": null, + "source_packages": [], + "file_references": [], + "extra_data": {}, + "dependencies": [], + "repository_homepage_url": "https://www.npmjs.com/package/test", + "repository_download_url": "https://registry.npmjs.org/test/-/test-0.1.0.tgz", + "api_data_url": "https://registry.npmjs.org/test/0.1.0", + "datasource_id": "npm_package_json", + "purl": "pkg:npm/test@0.1.0" + } + ], + "for_packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "get_package_resources/this-should-be-returned", + "type": "file", + "name": "this-should-be-returned", + "base_name": "this-should-be-returned", + "extension": "", + "size": 0, + "date": "2022-02-15", + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": "inode/x-empty", + "file_type": "empty", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "get_package_resources/node_modules", + "type": "directory", + "name": "node_modules", + "base_name": "node_modules", + "extension": "", + "size": 0, + "date": null, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [], + "files_count": 1, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "get_package_resources/node_modules/this-should-not-be-returned", + "type": "file", + "name": "this-should-not-be-returned", + "base_name": "this-should-not-be-returned", + "extension": "", + "size": 0, + "date": "2022-02-15", + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": "inode/x-empty", + "file_type": "empty", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "package_data": [], + "for_packages": [], + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/tests/packagedcode/test_package_models.py b/tests/packagedcode/test_package_models.py index 97657aed551..e8ea0726240 100644 --- a/tests/packagedcode/test_package_models.py +++ b/tests/packagedcode/test_package_models.py @@ -7,6 +7,7 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +from cgi import test import os.path from packagedcode import misc @@ -14,11 +15,14 @@ from packagedcode import ALL_DATAFILE_HANDLERS from packagedcode.models import PackageData from packagedcode.models import Party +from packagedcode.plugin_package import get_package_and_deps from packagedcode.plugin_package import PackageScanner from packages_test_utils import PackageTester +from scancode.cli_test_utils import purl_with_fake_uuid from scancode_config import REGEN_TEST_FIXTURES from commoncode.resource import Codebase +from commoncode.resource import VirtualCodebase class TestModels(PackageTester): @@ -177,3 +181,33 @@ def test_add_to_package(self): test_codebase ) assert test_package.package_uid in test_resource.for_packages + + def test_assembly_custom_package_adder(self): + def test_package_adder(package_uid, resource, codebase): + """ + Add `package_uid` to `resource.extra_data` + """ + if 'for_packages' in resource.extra_data: + resource.extra_data['for_packages'].append(package_uid) + else: + resource.extra_data['for_packages'] = [package_uid] + resource.save(codebase) + + # This scan does not contain top-level Packages or Dependencies since we + # want to run `get_packages_and_deps` to create them + test_loc = self.get_test_loc('models/get_package_resources.scan.json') + test_codebase = VirtualCodebase(location=test_loc) + packages, dependencies = get_package_and_deps(test_codebase, test_package_adder) + + assert len(packages) == 1 + assert not dependencies + + package = packages[0] + package_uid = package.package_uid + test_package_uid = purl_with_fake_uuid(package_uid) + + for resource in test_codebase.walk(): + for_packages = resource.extra_data.get('for_packages', []) + for package_uid in for_packages: + normalized_package_uid = purl_with_fake_uuid(package_uid) + assert normalized_package_uid == test_package_uid