-
Notifications
You must be signed in to change notification settings - Fork 108
Add ecosystem specific inclusions or exclusions #1550
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
88bc201
a185d7d
0530bbe
32e1543
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,9 +29,10 @@ | |
from scanpipe.pipes import matchcode | ||
from scanpipe.pipes import purldb | ||
from scanpipe.pipes import scancode | ||
from scanpipe.pipes.d2d_config import DefaultEcosystemConfig | ||
|
||
|
||
class DeployToDevelop(Pipeline): | ||
class DeployToDevelop(Pipeline, DefaultEcosystemConfig): | ||
""" | ||
Establish relationships between two code trees: deployment and development. | ||
|
||
|
@@ -64,6 +65,8 @@ def steps(cls): | |
cls.flag_empty_files, | ||
cls.flag_whitespace_files, | ||
cls.flag_ignored_resources, | ||
cls.load_ecosystem_config, | ||
cls.load_ecosystem_config_ruby, | ||
cls.map_about_files, | ||
cls.map_checksum, | ||
cls.match_archives_to_purldb, | ||
|
@@ -91,33 +94,6 @@ def steps(cls): | |
cls.create_local_files_packages, | ||
) | ||
|
||
purldb_package_extensions = [".jar", ".war", ".zip"] | ||
purldb_resource_extensions = [ | ||
".map", | ||
".js", | ||
".mjs", | ||
".ts", | ||
".d.ts", | ||
".jsx", | ||
".tsx", | ||
".css", | ||
".scss", | ||
".less", | ||
".sass", | ||
".soy", | ||
".class", | ||
] | ||
doc_extensions = [ | ||
".pdf", | ||
".doc", | ||
".docx", | ||
".ppt", | ||
".pptx", | ||
".tex", | ||
".odt", | ||
".odp", | ||
] | ||
|
||
def get_inputs(self): | ||
"""Locate the ``from`` and ``to`` input files.""" | ||
self.from_files, self.to_files = d2d.get_inputs(self.project) | ||
|
@@ -152,6 +128,15 @@ def flag_whitespace_files(self): | |
"""Flag whitespace files with size less than or equal to 100 byte as ignored.""" | ||
d2d.flag_whitespace_files(project=self.project) | ||
|
||
def load_ecosystem_config(self): | ||
"""Load ecosystem specific configurations for d2d steps for selected options.""" | ||
d2d.load_ecosystem_config(pipeline=self, options=self.selected_groups) | ||
|
||
@optional_step("Ruby") | ||
def load_ecosystem_config_ruby(self): | ||
"""Load Ruby specific configurations for d2d steps.""" | ||
pass | ||
Comment on lines
+135
to
+138
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this a leftover or are we planning to duplicate those methods? |
||
|
||
def map_about_files(self): | ||
"""Map ``from/`` .ABOUT files to their related ``to/`` resources.""" | ||
d2d.map_about_files(project=self.project, logger=self.log) | ||
|
@@ -268,6 +253,7 @@ def flag_mapped_resources_archives_and_ignored_directories(self): | |
def perform_house_keeping_tasks(self): | ||
""" | ||
On deployed side | ||
- Ignore specific files based on ecosystem based configurations. | ||
- PurlDB match files with ``no-java-source`` and empty status, | ||
if no match is found update status to ``requires-review``. | ||
- Update status for uninteresting files. | ||
|
@@ -278,6 +264,11 @@ def perform_house_keeping_tasks(self): | |
""" | ||
d2d.match_resources_with_no_java_source(project=self.project, logger=self.log) | ||
d2d.handle_dangling_deployed_legal_files(project=self.project, logger=self.log) | ||
d2d.ignore_unmapped_resources_from_config( | ||
project=self.project, | ||
patterns_to_ignore=self.deployed_resource_path_exclusions, | ||
logger=self.log, | ||
) | ||
d2d.match_unmapped_resources( | ||
project=self.project, | ||
matched_extensions=self.purldb_resource_extensions, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,6 +51,7 @@ | |
from scanpipe.models import CodebaseRelation | ||
from scanpipe.models import CodebaseResource | ||
from scanpipe.models import convert_glob_to_django_regex | ||
from scanpipe.pipes import d2d_config | ||
from scanpipe.pipes import flag | ||
from scanpipe.pipes import get_resource_diff_ratio | ||
from scanpipe.pipes import js | ||
|
@@ -66,6 +67,16 @@ | |
TO = "to/" | ||
|
||
|
||
ECOSYSTEM_CONFIGS = [ | ||
d2d_config.DefaultEcosystemConfig, | ||
d2d_config.JavaEcosystemConfig, | ||
d2d_config.JavaScriptEcosystemConfig, | ||
d2d_config.RubyEcosystemConfig, | ||
d2d_config.RustEcosystemConfig, | ||
d2d_config.GoEcosystemConfig, | ||
] | ||
Comment on lines
+70
to
+77
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this exist directly in the d2d_config module instead? |
||
|
||
|
||
def get_inputs(project): | ||
""" | ||
Locate the ``from`` and ``to`` input files in project inputs/ directory. | ||
|
@@ -114,6 +125,55 @@ def get_best_path_matches(to_resource, matches): | |
return matches | ||
|
||
|
||
def load_ecosystem_config(pipeline, options): | ||
""" | ||
Add ecosystem specific configurations for each ecosystem selected | ||
as `options` to the `pipeline`. | ||
Comment on lines
+130
to
+131
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to provide more details about what's actually happening when "adding" a config. |
||
""" | ||
configs_by_ecosystem = { | ||
ecosystem.ecosystem_option: ecosystem for ecosystem in ECOSYSTEM_CONFIGS | ||
} | ||
Comment on lines
+133
to
+135
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be replaced by a |
||
|
||
# Add default configurations which are common accross ecosystems | ||
add_ecosystem_config( | ||
pipeline=pipeline, | ||
configs_by_ecosystem=configs_by_ecosystem, | ||
selected_option="Default", | ||
) | ||
|
||
# Add configurations for each selected ecosystem | ||
for selected_option in options: | ||
if selected_option not in configs_by_ecosystem: | ||
continue | ||
|
||
add_ecosystem_config( | ||
pipeline=pipeline, | ||
configs_by_ecosystem=configs_by_ecosystem, | ||
selected_option=selected_option, | ||
) | ||
|
||
|
||
def add_ecosystem_config(pipeline, configs_by_ecosystem, selected_option): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing a detailed docstring. |
||
d2d_pipeline_configs = [ | ||
"purldb_package_extensions", | ||
"purldb_resource_extensions", | ||
"deployed_resource_path_exclusions", | ||
] | ||
|
||
ecosystem_config = configs_by_ecosystem.get(selected_option) | ||
|
||
for pipeline_config in d2d_pipeline_configs: | ||
config_value = getattr(ecosystem_config, pipeline_config) | ||
pipeline_config_value = getattr(pipeline, pipeline_config) | ||
if config_value: | ||
if not pipeline_config_value: | ||
new_config_value = config_value | ||
else: | ||
new_config_value = pipeline_config_value.extend(config_value) | ||
|
||
setattr(pipeline, pipeline_config, new_config_value) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not ideal to set values from all the way down here, shouldn't we return those to a higher location that will explicitly set the values? |
||
|
||
|
||
def get_from_files_for_scanning(resources): | ||
""" | ||
Return resources in the "from/" side which has been mapped to the "to/" | ||
|
@@ -1460,6 +1520,20 @@ def match_resources_with_no_java_source(project, logger=None): | |
) | ||
|
||
|
||
def ignore_unmapped_resources_from_config(project, patterns_to_ignore, logger=None): | ||
"""Ignore unmapped resources for a project using `patterns_to_ignore`.""" | ||
ignored_resources_count = flag.flag_ignored_patterns( | ||
codebaseresources=project.codebaseresources.to_codebase().no_status(), | ||
patterns=patterns_to_ignore, | ||
status=flag.IGNORED_FROM_CONFIG, | ||
) | ||
if logger: | ||
logger( | ||
f"Ignoring {ignored_resources_count:,d} to/ resources with " | ||
"from ecosystem specific configurations." | ||
) | ||
|
||
|
||
def match_unmapped_resources(project, matched_extensions=None, logger=None): | ||
""" | ||
Match resources with empty status to PurlDB, if unmatched | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# http://nexb.com and https://github.com/aboutcode-org/scancode.io | ||
# The ScanCode.io software is licensed under the Apache License version 2.0. | ||
# Data generated with ScanCode.io is provided as-is without warranties. | ||
# ScanCode is a trademark of nexB Inc. | ||
# | ||
# You may not use this software except in compliance with the License. | ||
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software distributed | ||
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
# CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations under the License. | ||
# | ||
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES | ||
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from | ||
# ScanCode.io should be considered or used as legal advice. Consult an Attorney | ||
# for any legal advice. | ||
# | ||
# ScanCode.io is a free software code scanning tool from nexB Inc. and others. | ||
# Visit https://github.com/aboutcode-org/scancode.io for support and download. | ||
|
||
|
||
class EcosystemConfig: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we benefit from using a @DataClass here for EcosystemConfig? For example:
|
||
""" | ||
Base class for ecosystem specific configurations to be defined | ||
for each ecosystems. | ||
""" | ||
|
||
# This should be defined for each ecosystem which | ||
# are options in the pipelines | ||
ecosystem_option = None | ||
|
||
# These are extensions for packages of this ecosystem which | ||
# needs to be matched from purldb | ||
purldb_package_extensions = [] | ||
|
||
# These are extensions for resources of this ecosystem which | ||
# needs to be macthed from purldb | ||
purldb_resource_extensions = [] | ||
|
||
# Extensions for document files which do not require review | ||
doc_extensions = [] | ||
|
||
# Paths in the deployed binaries/archives (on the to/ side) which | ||
# do not need review even if they are not matched to the source side | ||
deployed_resource_path_exclusions = [] | ||
|
||
# Paths in the developement/source archive (on the from/ side) which | ||
# should not be considered even if unmapped to the deployed side when | ||
# assesing what to review on the deployed side | ||
devel_resource_path_exclusions = [] | ||
|
||
# Symbols which are found in ecosystem specific standard libraries | ||
# which are not so useful in mapping | ||
standard_symbols_to_exclude = [] | ||
|
||
|
||
class DefaultEcosystemConfig(EcosystemConfig): | ||
"""Configurations which are common across multiple ecosystems.""" | ||
|
||
ecosystem_option = "Default" | ||
purldb_package_extensions = [".zip", ".tar.gz", ".tar.xz"] | ||
devel_resource_path_exclusions = ["*/tests/*"] | ||
doc_extensions = [ | ||
".pdf", | ||
".doc", | ||
".docx", | ||
".ppt", | ||
".pptx", | ||
".tex", | ||
".odt", | ||
".odp", | ||
] | ||
|
||
|
||
class JavaEcosystemConfig(EcosystemConfig): | ||
ecosystem_option = "Java" | ||
purldb_package_extensions = [".jar", ".war"] | ||
purldb_resource_extensions = [".class"] | ||
|
||
|
||
class JavaScriptEcosystemConfig(EcosystemConfig): | ||
ecosystem_option = "JavaScript" | ||
purldb_resource_extensions = [ | ||
".map", | ||
".js", | ||
".mjs", | ||
".ts", | ||
".d.ts", | ||
".jsx", | ||
".tsx", | ||
".css", | ||
".scss", | ||
".less", | ||
".sass", | ||
".soy", | ||
] | ||
|
||
|
||
class GoEcosystemConfig(EcosystemConfig): | ||
ecosystem_option = "Go" | ||
purldb_resource_extensions = [".go"] | ||
|
||
|
||
class RustEcosystemConfig(EcosystemConfig): | ||
ecosystem_option = "Rust" | ||
purldb_resource_extensions = [".rs"] | ||
|
||
|
||
class RubyEcosystemConfig(EcosystemConfig): | ||
ecosystem_option = "Ruby" | ||
purldb_package_extensions = [".gem"] | ||
purldb_resource_extensions = [".rb"] | ||
deployed_resource_path_exclusions = ["*checksums.yaml.gz*", "*metadata.gz*"] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should make this work without the need for the extra mixing. See further suggestions.