diff --git a/detect_secrets/plugins/artifactory.py b/detect_secrets/plugins/artifactory.py index acb15d98e..ce57fe0f9 100644 --- a/detect_secrets/plugins/artifactory.py +++ b/detect_secrets/plugins/artifactory.py @@ -11,7 +11,7 @@ class ArtifactoryDetector(RegexBasedDetector): denylist = [ # artifactory tokens begin with AKC - re.compile(r'(\s|=|:|"|^)AKC\w{10,}'), # api token + re.compile(r'(?:\s|=|:|"|^)AKC\w{10,}'), # api token # artifactory encrypted passwords begin with AP6 - re.compile(r'(\s|=|:|"|^)AP6\w{10,}'), # password + re.compile(r'(?:\s|=|:|"|^)AP6\w{10,}'), # password ] diff --git a/detect_secrets/plugins/base.py b/detect_secrets/plugins/base.py index 6acecf677..6179d7214 100644 --- a/detect_secrets/plugins/base.py +++ b/detect_secrets/plugins/base.py @@ -3,8 +3,9 @@ from abc import abstractmethod from abc import abstractproperty +from .common.constants import ALLOWLIST_REGEXES +from .common.filters import is_false_positive from detect_secrets.core.potential_secret import PotentialSecret -from detect_secrets.plugins.common.constants import ALLOWLIST_REGEXES class BasePlugin(object): @@ -169,4 +170,7 @@ def analyze_string_content(self, string, line_num, filename): def secret_generator(self, string, *args, **kwargs): for regex in self.denylist: for match in regex.findall(string): + if is_false_positive(match): + continue + yield match diff --git a/detect_secrets/plugins/common/filters.py b/detect_secrets/plugins/common/filters.py new file mode 100644 index 000000000..6c2dda9c3 --- /dev/null +++ b/detect_secrets/plugins/common/filters.py @@ -0,0 +1,45 @@ +""" +Heuristic, false positive filters that are shared across all plugin types. +This abstraction allows for development of later ML work, or further +heuristical determinations (e.g. word filter, entropy comparator). +""" +import string + + +def is_false_positive(secret): + for func in [ + is_sequential_string, + ]: + if func(secret): + return True + + return False + + +def is_sequential_string(secret): + """ + Returns true if string is sequential. + """ + sequences = ( + ( + string.ascii_uppercase + + string.ascii_uppercase + + string.digits + + string.ascii_uppercase + + string.ascii_uppercase + + '+/' + ), + + # Capturing any number sequences + '0123456789' * 2, + + string.hexdigits.upper() + string.hexdigits.upper(), + string.ascii_uppercase + '=/', + ) + + uppercase = secret.upper() + for sequential_string in sequences: + if uppercase in sequential_string: + return True + + return False diff --git a/detect_secrets/plugins/high_entropy_strings.py b/detect_secrets/plugins/high_entropy_strings.py index cadf25789..a232f538b 100644 --- a/detect_secrets/plugins/high_entropy_strings.py +++ b/detect_secrets/plugins/high_entropy_strings.py @@ -14,23 +14,12 @@ import yaml from .base import BasePlugin +from .common.filters import is_false_positive +from .common.ini_file_parser import IniFileParser +from .common.yaml_file_parser import YamlFileParser from detect_secrets.core.potential_secret import PotentialSecret -from detect_secrets.plugins.common.ini_file_parser import IniFileParser -from detect_secrets.plugins.common.yaml_file_parser import YamlFileParser - - -IGNORED_SEQUENTIAL_STRINGS = ( - ( - string.ascii_uppercase + - string.ascii_uppercase + - string.digits + - string.ascii_uppercase + - string.ascii_uppercase + - '+/' - ), - string.hexdigits.upper() + string.hexdigits.upper(), - string.ascii_uppercase + '=/', -) + + YAML_EXTENSIONS = ( '.yaml', '.yml', @@ -97,13 +86,6 @@ def calculate_shannon_entropy(self, data): return entropy - def _is_sequential_string(self, string): - uppercased_string = string.upper() - for sequential_string in IGNORED_SEQUENTIAL_STRINGS: - if uppercased_string in sequential_string: - return True - return False - def analyze_string_content(self, string, line_num, filename): """Searches string for custom pattern, and captures all high entropy strings that match self.regex, with a limit defined as self.entropy_limit. @@ -111,8 +93,9 @@ def analyze_string_content(self, string, line_num, filename): output = {} for result in self.secret_generator(string): - if self._is_sequential_string(result): + if is_false_positive(result): continue + secret = PotentialSecret(self.secret_type, filename, result, line_num) output[secret] = secret diff --git a/detect_secrets/plugins/stripe.py b/detect_secrets/plugins/stripe.py index 6cf0f5d5f..9eda09a2d 100644 --- a/detect_secrets/plugins/stripe.py +++ b/detect_secrets/plugins/stripe.py @@ -14,5 +14,5 @@ class StripeDetector(RegexBasedDetector): denylist = ( # stripe standard keys begin with sk_live and restricted with rk_live - re.compile(r'(r|s)k_live_[0-9a-zA-Z]{24}'), + re.compile(r'(?:r|s)k_live_[0-9a-zA-Z]{24}'), ) diff --git a/test_data/config.ini b/test_data/config.ini index 754523300..b38fcb42c 100644 --- a/test_data/config.ini +++ b/test_data/config.ini @@ -1,5 +1,5 @@ [credentials] -password = 12345678901234 +password = 123456789a1234 [parent] [child] @@ -7,7 +7,7 @@ password = 12345678901234 keyB = value1 [aws] -aws_secret_key = 2345678901 +aws_secret_key = 23456789a1 [key with multiple values] keyA = diff --git a/tests/plugins/common/filters_test.py b/tests/plugins/common/filters_test.py new file mode 100644 index 000000000..b718bbd3f --- /dev/null +++ b/tests/plugins/common/filters_test.py @@ -0,0 +1,31 @@ +from __future__ import absolute_import + +import pytest + +from detect_secrets.plugins.common import filters + + +class TestIsSequentialString: + # TODO: More tests should be had. + + @pytest.mark.parametrize( + 'secret', + ( + 'ABCDEF', + + # Number sequences + '0123456789', + '1234567890', + ), + ) + def test_success(self, secret): + assert filters.is_sequential_string(secret) + + @pytest.mark.parametrize( + 'secret', + ( + 'BEEF1234', + ), + ) + def test_failure(self, secret): + assert not filters.is_sequential_string(secret) diff --git a/tests/plugins/high_entropy_strings_test.py b/tests/plugins/high_entropy_strings_test.py index e44035866..6861f13e6 100644 --- a/tests/plugins/high_entropy_strings_test.py +++ b/tests/plugins/high_entropy_strings_test.py @@ -165,7 +165,8 @@ def setup(self): 'Location: test_data/config.ini:10', 'Location: test_data/config.ini:15', 'Location: test_data/config.ini:21', - 'Location: test_data/config.ini:22', ], + 'Location: test_data/config.ini:22', + ], ), ( 'test_data/files/file_with_secrets.py',