detect_secrets/core/usage.py

import argparse
from collections import namedtuple

from detect_secrets import VERSION
from detect_secrets.constants import DEFAULT_GHE_INSTANCE


def add_exclude_lines_argument(parser):
    parser.add_argument(
        '--exclude-lines',
        type=str,
        help='Pass in regex to specify lines to ignore during scan.',
    )


def add_word_list_argument(parser):
    parser.add_argument(
        '--word-list',
        type=str,
        help=(
            'Text file with a list of words, '
            'if a secret contains a word in the list we ignore it.'
        ),
        dest='word_list_file',
    )


def add_use_all_plugins_argument(parser):
    parser.add_argument(
        '--use-all-plugins',
        action='store_true',
        help='Use all available plugins to scan files.',
    )


def add_no_verify_flag(parser):
    parser.add_argument(
        '-n',
        '--no-verify',
        action='store_true',
        help='Disables additional verification of secrets via network call.',
    )


def add_no_version_check_flag(parser):
    parser.add_argument(
        '--no-version-check',
        action='store_true',
        help='Disables detect-secrets up-to-date version check.',
    )


def add_output_verified_false_flag(parser):
    parser.add_argument(
        '--output-verified-false',
        action='store_true',
        help='Output secrets that are verified false.',
    )


def add_suppress_unscannable_file_warnings(parser):
    parser.add_argument(
        '--suppress-unscannable-file-warnings',
        action='store_true',
        help='Suppress warnings that occur when one or more files cannot be scanned.',
    )


def add_fail_on_file_unscannable(parser):
    parser.add_argument(
        '--fail-on-file-unscannable',
        action='store_true',
        help='Fail if one or more files could not be scanned. '
        'Note that binary files will be skipped by this check.',
    )


class ParserBuilder(object):
    def __init__(self):
        self.parser = argparse.ArgumentParser()
        self.subparser = None

        self.add_default_arguments()

    def add_default_arguments(self):
        self._add_no_version_check_flag()\
            ._add_verbosity_argument()\
            ._add_version_argument()

    def add_pre_commit_arguments(self):
        self._add_filenames_argument()\
            ._add_set_baseline_argument()\
            ._add_exclude_lines_argument()\
            ._add_word_list_argument()\
            ._add_use_all_plugins_argument()\
            ._add_no_verify_flag()\
            ._add_output_verified_false_flag()\
            ._add_fail_on_unaudited_flag()\
            ._add_suppress_unscannable_file_warnings()\
            ._add_fail_on_file_unscannable()

        PluginOptions(self.parser).add_arguments()

        return self

    def add_console_use_arguments(self):
        self.subparser = self.parser.add_subparsers(
            dest='action',
        )

        for action_parser in (ScanOptions, AuditOptions):
            action_parser(self.subparser).add_arguments()

        return self

    def parse_args(self, argv):
        output = self.parser.parse_args(argv)
        PluginOptions.consolidate_args(output)

        return output

    def _add_version_argument(self):
        self.parser.add_argument(
            '--version',
            action='version',
            version=VERSION,
            help='Display version information.',
        )
        return self

    def _add_verbosity_argument(self):
        self.parser.add_argument(
            '-v',
            '--verbose',
            action='count',
            help='Verbose mode.',
        )
        return self

    def _add_filenames_argument(self):
        self.parser.add_argument(
            'filenames',
            nargs='*',
            help='Filenames to check.',
        )
        return self

    def _add_set_baseline_argument(self):
        self.parser.add_argument(
            '--baseline',
            nargs=1,
            default=[''],
            help='Sets a baseline for explicitly ignored secrets, generated by `--scan`.',
        )
        return self

    def _add_exclude_lines_argument(self):
        add_exclude_lines_argument(self.parser)
        return self

    def _add_word_list_argument(self):
        add_word_list_argument(self.parser)
        return self

    def _add_use_all_plugins_argument(self):
        add_use_all_plugins_argument(self.parser)
        return self

    def _add_no_verify_flag(self):
        add_no_verify_flag(self.parser)
        return self

    def _add_no_version_check_flag(self):
        add_no_version_check_flag(self.parser)
        return self

    def _add_output_verified_false_flag(self):
        add_output_verified_false_flag(self.parser)
        return self

    def _add_fail_on_unaudited_flag(self):
        self.parser.add_argument(
            '--fail-on-unaudited',
            action='store_true',
            help='Fail check if there are entries have not been audited in baseline.',
        )
        return self

    def _add_suppress_unscannable_file_warnings(self):
        add_suppress_unscannable_file_warnings(self.parser)
        return self

    def _add_fail_on_file_unscannable(self):
        add_fail_on_file_unscannable(self.parser)
        return self


class ScanOptions:
    def __init__(self, subparser):
        self.parser: argparse.ArgumentParser = subparser.add_parser(
            'scan',
        )

    def add_arguments(self):
        self._add_initialize_baseline_argument()\
            ._add_adhoc_scanning_argument()\
            ._add_output_raw_argument()\
            ._add_suppress_unscannable_file_warnings()

        PluginOptions(self.parser).add_arguments()

        return self

    def _add_initialize_baseline_argument(self):
        self.parser.add_argument(
            'path',
            nargs='*',
            default='.',
            help=(
                'Scans the entire codebase and outputs a snapshot of '
                'currently identified secrets.'
            ),
        )

        # Pairing `--exclude-lines` and `--word-list` to
        # both pre-commit and `--scan` because it can be used for both.
        add_exclude_lines_argument(self.parser)
        add_word_list_argument(self.parser)

        # Pairing `--exclude-files` with `--scan` because it's only used for the initialization.
        # The pre-commit hook framework already has an `exclude` option that can
        # be used instead.
        self.parser.add_argument(
            '--exclude-files',
            type=str,
            help='Pass in regex to specify ignored paths during initialization scan.',
        )

        # Pairing `--update` with `--scan` because it's only used for
        # initialization.
        self.parser.add_argument(
            '--update',
            nargs=1,
            metavar='OLD_BASELINE_FILE',
            help=(
                'Update existing baseline by importing settings from it.'
                'New file would be created if old baseline file does not exist.'
            ),
            dest='import_filename',
        )

        # Pairing `--update` with `--use-all-plugins` to overwrite plugins list
        # from baseline
        add_use_all_plugins_argument(self.parser)

        self.parser.add_argument(
            '--all-files',
            action='store_true',
            help='Scan all files recursively (as compared to only scanning git tracked files).',
        )

        add_no_verify_flag(self.parser)
        add_output_verified_false_flag(self.parser)

        return self

    def _add_adhoc_scanning_argument(self):
        self.parser.add_argument(
            '--string',
            nargs='?',
            const=True,
            help=('Scans an individual string, and displays configured ' 'plugins\' verdict.'),
        )
        return self

    def _add_output_raw_argument(self):
        self.parser.add_argument(
            '--output-raw',
            action='store_true',
            help=(
                'Outputs the raw secret in the baseline file.'
                'For development/extension purposes.'
                'Do not use this option in a repo monitoring context.'
            ),
        )
        return self

    def _add_suppress_unscannable_file_warnings(self):
        add_suppress_unscannable_file_warnings(self.parser)
        return self


class AuditOptions:
    def __init__(self, subparser):
        # Override the default audit parser usage message since the arguments within
        # the _add_report_module group should only be permitted when the --report
        # arg is included. argparse does not have built-in mutual inclusion functionality,
        # so we had to add our own custom validation function, validate_args,
        # in detect-secrets/core/report/report.py.
        # docs: https://docs.python.org/3/library/argparse.html#usage
        self.parser: argparse.ArgumentParser = subparser.add_parser(
            'audit',
            usage='%(prog)s [-h] [--diff |  --display-results | --report [--fail-on-unaudited]'
            ' [--fail-on-live] [--fail-on-audited-real] [--json | --omit-instructions]]'
            ' [filename ...]',
        )

    def _add_report_module(self):
        report_parser = self.parser.add_argument_group(
            title='reporting',
            description=(
                'Displays a report with the secrets detected which fail certain conditions. '
                'To be used with the report mode (--report).'
            ),
        )

        report_parser.add_argument(
            '--fail-on-unaudited',
            action='store_true',
            help=(
                'This condition is met when there are potential secrets'
                ' in the baseline file which have not yet been audited.'
                ' To pass this check, run detect-secrets audit <BASELINE_FILE> to'
                ' audit all unaudited secrets.'
            ),
        )

        report_parser.add_argument(
            '--fail-on-live',
            action='store_true',
            help=(
                'This condition is met when a secret has been verified'
                ' to be live. To pass this check, make sure that any'
                ' secrets in the baseline file with a property of'
                ' is_verified: true have been remediated, afterwards re-scan.'
            ),
        )

        report_parser.add_argument(
            '--fail-on-audited-real',
            action='store_true',
            help=(
                'This condition is met when the baseline file contains'
                ' one or more secrets which have been marked as actual'
                ' secrets during the auditing process. Secrets with a'
                ' property of is_secret: true meet this condition.'
                ' To pass this check, remove these secrets from your'
                ' code and re-scan so that they will be removed from your baseline.'
            ),
        )
        report_parser_exclusive = report_parser.add_mutually_exclusive_group()

        report_parser_exclusive.add_argument(
            '--json',
            action='store_true',
            help=('Causes the report output to be formatted as JSON.'),
        )

        report_parser_exclusive.add_argument(
            '--omit-instructions',
            action='store_true',
            help=('Omits instructions from the report.'),
        )

    def add_arguments(self):
        action_parser = self.parser.add_mutually_exclusive_group()

        action_parser.add_argument(
            '--diff',
            action='store_true',
            help=(
                'Allows the comparison of two baseline files, in order to '
                'effectively distinguish the difference between various '
                'plugin configurations.'
            ),
        )

        action_parser.add_argument(
            '--display-results',
            action='store_true',
            help=(
                'Displays the results of an interactive auditing session '
                'which have been saved to a baseline file.'
            ),
        )

        action_parser.add_argument(
            '--report',
            action='store_true',
            help=('Displays a report with the secrets detected'),
        )

        self._add_report_module()

        self.parser.add_argument(
            'filename',
            nargs='+',
            help=(
                'Audit a given baseline file to distinguish the difference '
                'between false and true positives.'
            ),
        )

        return self


class PluginDescriptor(
    namedtuple(
        'PluginDescriptor',
        [
            # Classname of plugin; used for initialization
            'classname',
            # Flag to disable plugin. e.g. `--no-hex-string-scan`
            'flag_text',
            # Description for disable flag.
            'help_text',
            # type: list
            # Allows the bundling of all related command line provided
            # arguments together, under one plugin name.
            # Assumes there is no shared related arg.
            #
            # Furthermore, each related arg can have its own default
            # value (paired together, with a tuple). This allows us to
            # distinguish the difference between a default value, and
            # whether a user has entered the same value as a default value.
            # Therefore, only populate the default value upon consolidation
            # (rather than relying on argparse default).
            'related_args',
            # The name of the plugin file
            'filename',
        ],
    ),
):
    def __new__(cls, related_args=None, **kwargs):
        return super(PluginDescriptor, cls).__new__(cls, related_args=related_args or [], **kwargs)

    @classmethod
    def from_plugin_class(cls, plugin, name):
        """
        :type plugin: Type[TypeVar('Plugin', bound=BasePlugin)]
        :type name: str
        """
        related_args = None
        if plugin.default_options:
            related_args = []
            for arg_name, value in plugin.default_options.items():
                related_args.append(
                    (
                        '--{}'.format(arg_name.replace('_', '-')),
                        value,
                    ),
                )

        return cls(
            classname=name,
            flag_text='--{}'.format(plugin.flag_text),
            help_text=cls.get_disabled_help_text(plugin),
            related_args=related_args,
        )

    @staticmethod
    def get_disabled_help_text(plugin):
        for line in plugin.__doc__.splitlines():
            line = line.strip().lstrip()
            if line:
                break
        else:
            raise NotImplementedError('Plugins must declare a docstring.')

        line = line[0].lower() + line[1:]
        return 'Disables {}'.format(line)


class PluginOptions:

    opt_out_plugins = [
        PluginDescriptor(
            classname='HexHighEntropyString',
            flag_text='--no-hex-string-scan',
            help_text='Disables scanning for hex high entropy strings',
            related_args=[
                ('--hex-limit', 3),
            ],
            filename='high_entropy_strings',
        ),
        PluginDescriptor(
            classname='Base64HighEntropyString',
            flag_text='--no-base64-string-scan',
            help_text='Disables scanning for base64 high entropy strings',
            related_args=[
                ('--base64-limit', 4.5),
            ],
            filename='high_entropy_strings',
        ),
        PluginDescriptor(
            classname='PrivateKeyDetector',
            flag_text='--no-private-key-scan',
            help_text='Disables scanning for private keys.',
            filename='private_key',
        ),
        PluginDescriptor(
            classname='BasicAuthDetector',
            flag_text='--no-basic-auth-scan',
            help_text='Disables scanning for Basic Auth formatted URIs.',
            filename='basic_auth',
        ),
        PluginDescriptor(
            classname='KeywordDetector',
            flag_text='--no-keyword-scan',
            help_text='Disables scanning for secret keywords.',
            related_args=[
                ('--keyword-exclude', None),
            ],
            filename='keyword',
        ),
        PluginDescriptor(
            classname='AWSKeyDetector',
            flag_text='--no-aws-key-scan',
            help_text='Disables scanning for AWS keys.',
            filename='aws',
        ),
        PluginDescriptor(
            classname='SlackDetector',
            flag_text='--no-slack-scan',
            help_text='Disables scanning for Slack tokens.',
            filename='slack',
        ),
        PluginDescriptor(
            classname='ArtifactoryDetector',
            flag_text='--no-artifactory-scan',
            help_text='Disable scanning for Artifactory credentials',
            filename='artifactory',
        ),
        PluginDescriptor(
            classname='StripeDetector',
            flag_text='--no-stripe-scan',
            help_text='Disable scanning for Stripe keys',
            filename='stripe',
        ),
        PluginDescriptor(
            classname='MailchimpDetector',
            flag_text='--no-mailchimp-scan',
            help_text='Disable scanning for Mailchimp keys',
            filename='mailchimp',
        ),
        PluginDescriptor(
            classname='JwtTokenDetector',
            flag_text='--no-jwt-scan',
            help_text='Disable scanning for JWTs',
            filename='jwt',
        ),
        PluginDescriptor(
            classname='BoxDetector',
            flag_text='--no-box-scan',
            help_text='Disables scans for Box credentials',
            filename='box',
        ),
        PluginDescriptor(
            classname='CloudantDetector',
            flag_text='--no-cloudant-scan',
            help_text='Disables scans for Cloudant credentials',
            filename='cloudant',
        ),
        PluginDescriptor(
            classname='GheDetector',
            flag_text='--no-ghe-scan',
            help_text='Disables scans for GitHub Enterprise credentials',
            filename='github_enterprise',
            related_args=[
                ('--ghe-instance', DEFAULT_GHE_INSTANCE),
            ],
        ),
        PluginDescriptor(
            classname='SoftlayerDetector',
            flag_text='--no-softlayer-scan',
            help_text='Disables scans for SoftLayer credentials',
            filename='softlayer',
        ),
        PluginDescriptor(
            classname='IbmCloudIamDetector',
            flag_text='--no-ibm-cloud-iam-scan',
            help_text='Disables scans for IBM Cloud IAM credentials',
            filename='ibm_cloud_iam',
        ),
        PluginDescriptor(
            classname='IbmCosHmacDetector',
            flag_text='--no-ibm-cos-hmac-scan',
            help_text='Disables scans for IBM Cloud Object Storage HMAC keys',
            filename='ibm_cos_hmac',
        ),
        PluginDescriptor(
            classname='TwilioKeyDetector',
            flag_text='--no-twilio-key-scan',
            help_text='Disables scans for Twilio API keys.',
            filename='twilio',
        ),
        PluginDescriptor(
            classname='NpmDetector',
            flag_text='--no-npm-scan',
            help_text='Disables scans for NPM keys.',
            filename='npm',
        ),
        PluginDescriptor(
            classname='SquareOAuthDetector',
            flag_text='--no-square-oauth',
            help_text='Disables scans for Square OAuth tokens.',
            filename='square_oauth',
        ),
        PluginDescriptor(
            classname='AzureStorageKeyDetector',
            flag_text='--no-azure-storage-scan',
            help_text='Disables scans for Azure Storage Account access.',
            filename='azure_storage_key',
        ),
        PluginDescriptor(
            classname='GitHubTokenDetector',
            flag_text='--no-github-scan',
            help_text='Disables scans for GitHub credentials',
            filename='github_token',
        ),
    ]
    opt_in_plugins = [
        PluginDescriptor(
            classname='Db2Detector',
            flag_text='--db2-scan',
            help_text='Enable scanning for DB2 Tokens',
            filename='db2',
        ),
    ]
    all_plugins = opt_in_plugins + opt_out_plugins

    def __init__(self, parser):
        self.parser = parser.add_argument_group(
            title='plugins',
            description=(
                'Configure settings for each secret scanning '
                'ruleset. By default, all plugins are enabled '
                'unless explicitly disabled.'
            ),
        )

    def add_arguments(self):
        self._add_custom_limits()
        self._add_opt_out_options()
        self._add_opt_in_options()
        self._add_keyword_exclude()
        self._add_ghe_instance()

        return self

    @staticmethod
    def get_disabled_plugins(args):
        return [
            plugin.classname
            for plugin in PluginOptions.all_plugins
            if plugin.classname not in args.plugins
        ]

    @staticmethod
    def consolidate_args(args):
        """There are many argument fields related to configuring plugins.
        This function consolidates all of them, and saves the consolidated
        information in args.plugins.

        Note that we're deferring initialization of those plugins, because
        plugins may have various initialization values, referenced in
        different places.

        :param args: output of `argparse.ArgumentParser.parse_args`
        """
        # Using `--hex-limit` as a canary to identify whether this
        # consolidation is appropriate.
        if not hasattr(args, 'hex_limit'):
            return

        active_plugins = {}
        active_plugins_filenames = []
        is_using_default_value = {}

        for plugin in PluginOptions.all_plugins:
            arg_name = PluginOptions._convert_flag_text_to_argument_name(
                plugin.flag_text,
            )

            # Remove disabled plugins
            is_disabled = getattr(args, arg_name, False)
            delattr(args, arg_name)
            if is_disabled:
                continue

            # Consolidate related args
            related_args = {}
            for related_arg_tuple in plugin.related_args:
                flag_name, default_value = related_arg_tuple

                arg_name = PluginOptions._convert_flag_text_to_argument_name(
                    flag_name,
                )

                related_args[arg_name] = getattr(args, arg_name)
                delattr(args, arg_name)

                if default_value and related_args[arg_name] is None:
                    related_args[arg_name] = default_value
                    is_using_default_value[arg_name] = True

            active_plugins.update(
                {
                    plugin.classname: related_args,
                },
            )

        for plugin in PluginOptions.all_plugins:
            if getattr(plugin, 'classname') in list(active_plugins):
                active_plugins_filenames.append(getattr(plugin, 'filename'))

        args.plugins = active_plugins
        args.plugin_filenames = tuple(active_plugins_filenames)
        args.is_using_default_value = is_using_default_value

    def _add_custom_limits(self):
        high_entropy_help_text = (
            'Sets the entropy limit for high entropy strings. '
            'Value must be between 0.0 and 8.0, '
        )

        self.parser.add_argument(
            '--base64-limit',
            type=self._argparse_minmax_type,
            nargs='?',
            help=high_entropy_help_text + 'defaults to 4.5.',
        )
        self.parser.add_argument(
            '--hex-limit',
            type=self._argparse_minmax_type,
            nargs='?',
            help=high_entropy_help_text + 'defaults to 3.0.',
        )

    def _add_opt_out_options(self):
        for plugin in self.opt_out_plugins:
            self.parser.add_argument(
                plugin.flag_text,
                action='store_true',
                help=plugin.help_text,
                default=False,
            )

    def _add_opt_in_options(self):
        for plugin in self.opt_in_plugins:
            self.parser.add_argument(
                plugin.flag_text,
                action='store_false',
                help=plugin.help_text,
                default=True,
            )

    def _argparse_minmax_type(self, string):
        """Custom type for argparse to enforce value limits"""
        value = float(string)
        if value < 0 or value > 8:
            raise argparse.ArgumentTypeError(
                '%s must be between 0.0 and 8.0' % string,
            )

        return value

    @staticmethod
    def _convert_flag_text_to_argument_name(flag_text):
        """This just emulates argparse's underlying logic.

        :type flag_text: str
        :param flag_text: e.g. `--no-hex-string-scan`
        :return: `no_hex_string_scan`
        """
        return flag_text[2:].replace('-', '_')

    def _add_keyword_exclude(self):
        self.parser.add_argument(
            '--keyword-exclude',
            type=str,
            help='Pass in regex to exclude false positives found by keyword detector.',
        )

    def _add_ghe_instance(self):
        self.parser.add_argument(
            '--ghe-instance',
            type=str,
            help='Instance URL for GHE i.e. github.ibm.com',
        )