-
Notifications
You must be signed in to change notification settings - Fork 109
Add ecosystem specific inclusions or exclusions #1550
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
88bc201
a185d7d
0530bbe
32e1543
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# http://nexb.com and https://github.com/aboutcode-org/scancode.io | ||
# The ScanCode.io software is licensed under the Apache License version 2.0. | ||
# Data generated with ScanCode.io is provided as-is without warranties. | ||
# ScanCode is a trademark of nexB Inc. | ||
# | ||
# You may not use this software except in compliance with the License. | ||
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software distributed | ||
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
# CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations under the License. | ||
# | ||
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES | ||
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from | ||
# ScanCode.io should be considered or used as legal advice. Consult an Attorney | ||
# for any legal advice. | ||
# | ||
# ScanCode.io is a free software code scanning tool from nexB Inc. and others. | ||
# Visit https://github.com/aboutcode-org/scancode.io for support and download. | ||
|
||
|
||
class EcosystemConfig: | ||
""" | ||
Base class for ecosystem specific configurations to be defined | ||
for each ecosystems. | ||
""" | ||
|
||
# This should be defined for each ecosystem which | ||
# are options in the pipelines | ||
ecosystem_option = None | ||
|
||
# These are extensions for packages of this ecosystem which | ||
# needs to be matched from purldb | ||
purldb_package_extensions = [] | ||
|
||
# These are extensions for resources of this ecosystem which | ||
# needs to be macthed from purldb | ||
purldb_resource_extensions = [] | ||
|
||
# Extensions for document files which do not require review | ||
doc_extensions = [] | ||
|
||
# Paths in the deployed binaries/archives (on the to/ side) which | ||
# do not need review even if they are not matched to the source side | ||
deployed_resource_path_exclusions = [] | ||
|
||
# Paths in the developement/source archive (on the from/ side) which | ||
# should not be considered even if unmapped to the deployed side when | ||
# assesing what to review on the deployed side | ||
devel_resource_path_exclusions = [] | ||
|
||
# Symbols which are found in ecosystem specific standard libraries | ||
# which are not so useful in mapping | ||
standard_symbols_to_exclude = [] | ||
|
||
|
||
class DefaultEcosystemConfig(EcosystemConfig): | ||
"""Configurations which are common across multiple ecosystems.""" | ||
|
||
ecosystem_option = "Default" | ||
purldb_package_extensions = [".zip", ".tar.gz", ".tar.xz"] | ||
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved
Hide resolved
|
||
devel_resource_path_exclusions = ["*/tests/*"] | ||
doc_extensions = [ | ||
".pdf", | ||
".doc", | ||
".docx", | ||
".ppt", | ||
".pptx", | ||
".tex", | ||
".odt", | ||
".odp", | ||
] | ||
|
||
|
||
class JavaEcosystemConfig(EcosystemConfig): | ||
ecosystem_option = "Java" | ||
purldb_package_extensions = [".jar", ".war"] | ||
purldb_resource_extensions = [".class"] | ||
|
||
|
||
class JavaScriptEcosystemConfig(EcosystemConfig): | ||
ecosystem_option = "JavaScript" | ||
purldb_resource_extensions = [ | ||
".map", | ||
".js", | ||
".mjs", | ||
".ts", | ||
".d.ts", | ||
".jsx", | ||
".tsx", | ||
".css", | ||
".scss", | ||
".less", | ||
".sass", | ||
".soy", | ||
] | ||
|
||
|
||
class GoEcosystemConfig(EcosystemConfig): | ||
ecosystem_option = "Go" | ||
purldb_resource_extensions = [".go"] | ||
|
||
|
||
class RustEcosystemConfig(EcosystemConfig): | ||
ecosystem_option = "Rust" | ||
purldb_resource_extensions = [".rs"] | ||
|
||
|
||
class RubyEcosystemConfig(EcosystemConfig): | ||
ecosystem_option = "Ruby" | ||
purldb_package_extensions = [".gem"] | ||
purldb_resource_extensions = [".rb"] | ||
deployed_resource_path_exclusions = ["*checksums.yaml.gz*", "*metadata.gz*"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ | |
|
||
from aboutcode.pipeline import optional_step | ||
from scanpipe import pipes | ||
from scanpipe.config import DefaultEcosystemConfig | ||
from scanpipe.pipelines import Pipeline | ||
from scanpipe.pipes import d2d | ||
from scanpipe.pipes import flag | ||
|
@@ -31,7 +32,7 @@ | |
from scanpipe.pipes import scancode | ||
|
||
|
||
class DeployToDevelop(Pipeline): | ||
class DeployToDevelop(Pipeline, DefaultEcosystemConfig): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should make this work without the need for the extra mixing. See further suggestions. |
||
""" | ||
Establish relationships between two code trees: deployment and development. | ||
|
||
|
@@ -64,6 +65,8 @@ def steps(cls): | |
cls.flag_empty_files, | ||
cls.flag_whitespace_files, | ||
cls.flag_ignored_resources, | ||
cls.load_ecosystem_config, | ||
cls.load_ecosystem_config_ruby, | ||
cls.map_about_files, | ||
cls.map_checksum, | ||
cls.match_archives_to_purldb, | ||
|
@@ -91,33 +94,6 @@ def steps(cls): | |
cls.create_local_files_packages, | ||
) | ||
|
||
purldb_package_extensions = [".jar", ".war", ".zip"] | ||
purldb_resource_extensions = [ | ||
".map", | ||
".js", | ||
".mjs", | ||
".ts", | ||
".d.ts", | ||
".jsx", | ||
".tsx", | ||
".css", | ||
".scss", | ||
".less", | ||
".sass", | ||
".soy", | ||
".class", | ||
] | ||
doc_extensions = [ | ||
".pdf", | ||
".doc", | ||
".docx", | ||
".ppt", | ||
".pptx", | ||
".tex", | ||
".odt", | ||
".odp", | ||
] | ||
|
||
def get_inputs(self): | ||
"""Locate the ``from`` and ``to`` input files.""" | ||
self.from_files, self.to_files = d2d.get_inputs(self.project) | ||
|
@@ -152,6 +128,15 @@ def flag_whitespace_files(self): | |
"""Flag whitespace files with size less than or equal to 100 byte as ignored.""" | ||
d2d.flag_whitespace_files(project=self.project) | ||
|
||
def load_ecosystem_config(self): | ||
"""Load ecosystem specific configurations for d2d steps for selected options.""" | ||
d2d.load_ecosystem_config(pipeline=self, options=self.selected_groups) | ||
|
||
@optional_step("Ruby") | ||
def load_ecosystem_config_ruby(self): | ||
"""Load Ruby specific configurations for d2d steps.""" | ||
pass | ||
Comment on lines
+135
to
+138
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this a leftover or are we planning to duplicate those methods? |
||
|
||
def map_about_files(self): | ||
"""Map ``from/`` .ABOUT files to their related ``to/`` resources.""" | ||
d2d.map_about_files(project=self.project, logger=self.log) | ||
|
@@ -268,6 +253,7 @@ def flag_mapped_resources_archives_and_ignored_directories(self): | |
def perform_house_keeping_tasks(self): | ||
""" | ||
On deployed side | ||
- Ignore specific files based on ecosystem based configurations. | ||
- PurlDB match files with ``no-java-source`` and empty status, | ||
if no match is found update status to ``requires-review``. | ||
- Update status for uninteresting files. | ||
|
@@ -278,6 +264,11 @@ def perform_house_keeping_tasks(self): | |
""" | ||
d2d.match_resources_with_no_java_source(project=self.project, logger=self.log) | ||
d2d.handle_dangling_deployed_legal_files(project=self.project, logger=self.log) | ||
d2d.ignore_unmapped_resources_from_config( | ||
project=self.project, | ||
patterns_to_ignore=self.deployed_resource_path_exclusions, | ||
logger=self.log, | ||
) | ||
d2d.match_unmapped_resources( | ||
project=self.project, | ||
matched_extensions=self.purldb_resource_extensions, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,6 +47,7 @@ | |
from summarycode.classify import LEGAL_STARTS_ENDS | ||
|
||
from aboutcode.pipeline import LoopProgress | ||
from scanpipe import config | ||
from scanpipe import pipes | ||
from scanpipe.models import CodebaseRelation | ||
from scanpipe.models import CodebaseResource | ||
|
@@ -66,6 +67,16 @@ | |
TO = "to/" | ||
|
||
|
||
ECOSYSTEM_CONFIGS = [ | ||
config.DefaultEcosystemConfig, | ||
config.JavaEcosystemConfig, | ||
config.JavaScriptEcosystemConfig, | ||
config.RubyEcosystemConfig, | ||
config.RustEcosystemConfig, | ||
config.GoEcosystemConfig, | ||
] | ||
|
||
|
||
def get_inputs(project): | ||
""" | ||
Locate the ``from`` and ``to`` input files in project inputs/ directory. | ||
|
@@ -114,6 +125,55 @@ def get_best_path_matches(to_resource, matches): | |
return matches | ||
|
||
|
||
def load_ecosystem_config(pipeline, options): | ||
""" | ||
Add ecosystem specific configurations for each ecosystem selected | ||
as `options` to the `pipeline`. | ||
Comment on lines
+130
to
+131
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to provide more details about what's actually happening when "adding" a config. |
||
""" | ||
configs_by_ecosystem = { | ||
ecosystem.ecosystem_option: ecosystem for ecosystem in ECOSYSTEM_CONFIGS | ||
} | ||
Comment on lines
+133
to
+135
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be replaced by a |
||
|
||
# Add default configurations which are common accross ecosystems | ||
add_ecosystem_config( | ||
pipeline=pipeline, | ||
configs_by_ecosystem=configs_by_ecosystem, | ||
selected_option="Default", | ||
) | ||
|
||
# Add configurations for each selected ecosystem | ||
for selected_option in options: | ||
if selected_option not in configs_by_ecosystem: | ||
continue | ||
|
||
add_ecosystem_config( | ||
pipeline=pipeline, | ||
configs_by_ecosystem=configs_by_ecosystem, | ||
selected_option=selected_option, | ||
) | ||
|
||
|
||
def add_ecosystem_config(pipeline, configs_by_ecosystem, selected_option): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing a detailed docstring. |
||
d2d_pipeline_configs = [ | ||
"purldb_package_extensions", | ||
"purldb_resource_extensions", | ||
"deployed_resource_path_exclusions", | ||
] | ||
|
||
ecosystem_config = configs_by_ecosystem.get(selected_option) | ||
|
||
for pipeline_config in d2d_pipeline_configs: | ||
config_value = getattr(ecosystem_config, pipeline_config) | ||
pipeline_config_value = getattr(pipeline, pipeline_config) | ||
if config_value: | ||
if not pipeline_config_value: | ||
new_config_value = config_value | ||
else: | ||
new_config_value = pipeline_config_value.extend(config_value) | ||
|
||
setattr(pipeline, pipeline_config, new_config_value) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not ideal to set values from all the way down here, shouldn't we return those to a higher location that will explicitly set the values? |
||
|
||
|
||
def get_from_files_for_scanning(resources): | ||
""" | ||
Return resources in the "from/" side which has been mapped to the "to/" | ||
|
@@ -1453,6 +1513,20 @@ def match_resources_with_no_java_source(project, logger=None): | |
) | ||
|
||
|
||
def ignore_unmapped_resources_from_config(project, patterns_to_ignore, logger=None): | ||
"""Ignore unmapped resources for a project using `patterns_to_ignore`.""" | ||
ignored_resources_count = flag.flag_ignored_patterns( | ||
codebaseresources=project.codebaseresources.to_codebase().no_status(), | ||
patterns=patterns_to_ignore, | ||
status=flag.IGNORED_FROM_CONFIG, | ||
) | ||
if logger: | ||
logger( | ||
f"Ignoring {ignored_resources_count:,d} to/ resources with " | ||
"from ecosystem specific configurations." | ||
) | ||
|
||
|
||
def match_unmapped_resources(project, matched_extensions=None, logger=None): | ||
""" | ||
Match resources with empty status to PurlDB, if unmatched | ||
|
Uh oh!
There was an error while loading. Please reload this page.