Skip to content

Improve Keyword detector accuracy for .cls and .yaml files #133

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions detect_secrets/core/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def _add_filenames_argument(self):
self.parser.add_argument(
'filenames',
nargs='*',
help='Filenames to check',
help='Filenames to check.',
)
return self

Expand Down Expand Up @@ -427,5 +427,5 @@ def _add_keyword_exclude(self):
self.parser.add_argument(
'--keyword-exclude',
type=str,
help='Pass in regex to exclude false positives found by keyword detector',
help='Pass in regex to exclude false positives found by keyword detector.',
)
24 changes: 17 additions & 7 deletions detect_secrets/plugins/common/filetype.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@


class FileType(Enum):
JAVASCRIPT = 0
PHP = 1
PYTHON = 2
OTHER = 3
CLS = 0
JAVASCRIPT = 1
PHP = 2
PYTHON = 3
YAML = 4
OTHER = 5


def determine_file_type(filename):
Expand All @@ -14,10 +16,18 @@ def determine_file_type(filename):

:rtype: FileType
"""
if filename.endswith('.js'):
if filename.endswith('.cls'):
return FileType.CLS
elif filename.endswith('.js'):
return FileType.JAVASCRIPT
elif filename.endswith('.py'):
return FileType.PYTHON
elif filename.endswith('.php'):
return FileType.PHP
elif filename.endswith('.py'):
return FileType.PYTHON
elif (
filename.endswith(
('.yaml', '.yml'),
)
):
return FileType.YAML
return FileType.OTHER
21 changes: 13 additions & 8 deletions detect_secrets/plugins/keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 9,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 5,
}
PYTHON_BLACKLIST_REGEX_TO_GROUP = {
QUOTES_REQUIRED_BLACKLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_COLON_QUOTES_REQUIRED_REGEX: 7,
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_REGEX: 9,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 5,
Expand All @@ -149,13 +149,11 @@ class KeywordDetector(BasePlugin):

secret_type = 'Secret Keyword'

def __init__(
self, keyword_exclude=None,
exclude_lines_regex=None, **kwargs
):
def __init__(self, keyword_exclude=None, exclude_lines_regex=None, **kwargs):
super(KeywordDetector, self).__init__(
exclude_lines_regex,
)

self.keyword_exclude = None
if keyword_exclude:
self.keyword_exclude = re.compile(
Expand Down Expand Up @@ -187,8 +185,11 @@ def analyze_string_content(self, string, line_num, filename):
def secret_generator(self, string, filetype):
lowered_string = string.lower()

if filetype == FileType.PYTHON:
blacklist_regex_to_group = PYTHON_BLACKLIST_REGEX_TO_GROUP
if filetype in (
FileType.CLS,
FileType.PYTHON,
):
blacklist_regex_to_group = QUOTES_REQUIRED_BLACKLIST_REGEX_TO_GROUP
else:
blacklist_regex_to_group = BLACKLIST_REGEX_TO_GROUP

Expand Down Expand Up @@ -217,9 +218,13 @@ def probably_false_positive(lowered_secret, filetype):
or lowered_secret.startswith('fs.read')
or lowered_secret == 'new'
)
) or ( # If it is a .php file, do not report $variables
) or (
filetype == FileType.PHP
and lowered_secret[0] == '$'
) or (
filetype == FileType.YAML
and lowered_secret.startswith('{{')
and lowered_secret.endswith('}}')
)
):
return True
Expand Down
11 changes: 7 additions & 4 deletions tests/core/usage_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ class TestPluginOptions(object):
@staticmethod
def parse_args(argument_string=''):
# PluginOptions are added in pre-commit hook
return ParserBuilder().add_pre_commit_arguments()\
return ParserBuilder()\
.add_pre_commit_arguments()\
.parse_args(argument_string.split())

def test_added_by_default(self):
Expand Down Expand Up @@ -61,9 +62,11 @@ def test_custom_limit(self, argument_string, expected_value):
if expected_value is not None:
args = self.parse_args(argument_string)

assert args.plugins['HexHighEntropyString'][
'hex_limit'
] == expected_value
assert (
args.plugins['HexHighEntropyString']['hex_limit']

== expected_value
)
else:
with pytest.raises(SystemExit):
self.parse_args(argument_string)
194 changes: 123 additions & 71 deletions tests/plugins/keyword_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,31 +34,31 @@
]
STANDARD_POSITIVES = {
# FOLLOWED_BY_COLON_RE
"'theapikey': 'h}o)p${e]nob(ody[finds>-_$#thisone'",
'"theapikey": "h}o)p${e]nob(ody[finds>-_$#thisone"',
'apikey: h}o)p${e]nob(ody[finds>-_$#thisone',
'apikey:h}o)p${e]nob(ody[finds>-_$#thisone',
'theapikey:h}o)p${e]nob(ody[finds>-_$#thisone',
'apikey: "h}o)p${e]nob(ody[finds>-_$#thisone"',
"apikey: 'h}o)p${e]nob(ody[finds>-_$#thisone'",
"'theapikey': '{{h}o)p${e]nob(ody[finds>-_$#thisone}}'",
'"theapikey": "{{h}o)p${e]nob(ody[finds>-_$#thisone}}"',
'apikey: {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'apikey:{{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'theapikey:{{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'apikey: "{{h}o)p${e]nob(ody[finds>-_$#thisone}}"',
"apikey: '{{h}o)p${e]nob(ody[finds>-_$#thisone}}'",
# FOLLOWED_BY_EQUAL_SIGNS_RE
'some_dict["secret"] = "h}o)p${e]nob(ody[finds>-_$#thisone"',
"some_dict['secret'] = h}o)p${e]nob(ody[finds>-_$#thisone",
'my_password=h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password= h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password = h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
'the_password=h}o)p${e]nob(ody[finds>-_$#thisone\n',
'the_password= "h}o)p${e]nob(ody[finds>-_$#thisone"\n',
'the_password=\'h}o)p${e]nob(ody[finds>-_$#thisone\'\n',
'some_dict["secret"] = "{{h}o)p${e]nob(ody[finds>-_$#thisone}}"',
"some_dict['secret'] = {{h}o)p${e]nob(ody[finds>-_$#thisone}}",
'my_password={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'my_password= {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'my_password ={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'my_password = {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'my_password ={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'the_password={{h}o)p${e]nob(ody[finds>-_$#thisone}}\n',
'the_password= "{{h}o)p${e]nob(ody[finds>-_$#thisone}}"\n',
'the_password=\'{{h}o)p${e]nob(ody[finds>-_$#thisone}}\'\n',
# FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE
'apikey "h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes
'fooapikeyfoo "h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes
'fooapikeyfoo"h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes
'private_key \'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes
'fooprivate_keyfoo\'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes
'fooprivate_key\'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes
'apikey "{{h}o)p${e]nob(ody[finds>-_$#thisone}}";', # Double-quotes
'fooapikeyfoo "{{h}o)p${e]nob(ody[finds>-_$#thisone}}";', # Double-quotes
'fooapikeyfoo"{{h}o)p${e]nob(ody[finds>-_$#thisone}}";', # Double-quotes
'private_key \'{{h}o)p${e]nob(ody[finds>-_$#thisone}}\';', # Single-quotes
'fooprivate_keyfoo\'{{h}o)p${e]nob(ody[finds>-_$#thisone}}\';', # Single-quotes
'fooprivate_key\'{{h}o)p${e]nob(ody[finds>-_$#thisone}}\';', # Single-quotes
}


Expand All @@ -77,100 +77,152 @@ def test_analyze_standard_positives(self, file_content):
for potential_secret in output:
assert 'mock_filename' == potential_secret.filename
assert (
potential_secret.secret_hash
== PotentialSecret.hash_secret('h}o)p${e]nob(ody[finds>-_$#thisone')
potential_secret.secret_hash ==
PotentialSecret.hash_secret('{{h}o)p${e]nob(ody[finds>-_$#thisone}}')
)

@pytest.mark.parametrize(
'file_content',
STANDARD_POSITIVES - {
# FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
'apikey: h}o)p${e]nob(ody[finds>-_$#thisone',
'apikey:h}o)p${e]nob(ody[finds>-_$#thisone',
'theapikey:h}o)p${e]nob(ody[finds>-_$#thisone',
# FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
"some_dict['secret'] = h}o)p${e]nob(ody[finds>-_$#thisone",
'my_password=h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password= h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password = h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
'the_password=h}o)p${e]nob(ody[finds>-_$#thisone\n',
},
STANDARD_POSITIVES,
)
def test_analyze_with_line_exclude(self, file_content):
logic = KeywordDetector(keyword_exclude='thisone')

f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename.foo')
assert len(output) == 0

@pytest.mark.parametrize(
'file_content, file_extension',
(
(positive, file_extension)
for positive in (
STANDARD_POSITIVES - {
# FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
'apikey: {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'apikey:{{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'theapikey:{{h}o)p${e]nob(ody[finds>-_$#thisone}}',
# FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
"some_dict['secret'] = {{h}o)p${e]nob(ody[finds>-_$#thisone}}",
'my_password={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'my_password= {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'my_password ={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'my_password = {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'my_password ={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
'the_password={{h}o)p${e]nob(ody[finds>-_$#thisone}}\n',
}
) for file_extension in (
'.cls',
'.py',
)
),
)
def test_analyze_python_positives(self, file_content):
def test_analyze_quotes_required_positives(self, file_content, file_extension):
logic = KeywordDetector()

f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename.py')
mock_filename = 'mock_filename{}'.format(file_extension)
output = logic.analyze(f, mock_filename)
assert len(output) == 1
for potential_secret in output:
assert 'mock_filename.py' == potential_secret.filename
assert mock_filename == potential_secret.filename
assert (
potential_secret.secret_hash
== PotentialSecret.hash_secret('h}o)p${e]nob(ody[finds>-_$#thisone')
potential_secret.secret_hash ==
PotentialSecret.hash_secret('{{h}o)p${e]nob(ody[finds>-_$#thisone}}')
)

@pytest.mark.parametrize(
'negative',
'file_content',
STANDARD_NEGATIVES,
)
def test_analyze_standard_negatives(self, negative):
def test_analyze_standard_negatives(self, file_content):
logic = KeywordDetector()

f = mock_file_object(negative)
f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename.foo')
assert len(output) == 0

@pytest.mark.parametrize(
'js_negative',
'file_content',
STANDARD_NEGATIVES + [
# FOLLOWED_BY_COLON_RE
'apiKey: this.apiKey,',
"apiKey: fs.readFileSync('foo',",
],
)
def test_analyze_javascript_negatives(self, js_negative):
def test_analyze_javascript_negatives(self, file_content):
logic = KeywordDetector()

f = mock_file_object(js_negative)
f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename.js')
assert len(output) == 0

@pytest.mark.parametrize(
'secret_starting_with_dollar_sign',
'file_content',
STANDARD_NEGATIVES + [
# FOLLOWED_BY_EQUAL_SIGNS_RE
'$password = $input;',
],
)
def test_analyze_php_negatives(self, secret_starting_with_dollar_sign):
def test_analyze_php_negatives(self, file_content):
logic = KeywordDetector()

f = mock_file_object(secret_starting_with_dollar_sign)
f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename.php')
assert len(output) == 0

@pytest.mark.parametrize(
'secret_with_no_quote',
STANDARD_NEGATIVES + [
# FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
'apikey: hope]nobody[finds>-_$#thisone',
'apikey:hope]nobody[finds>-_$#thisone',
'theapikey:hope]nobody[finds>-_$#thisone',
# FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
"some_dict['secret'] = hope]nobody[finds>-_$#thisone",
'my_password=hope]nobody[finds>-_$#thisone',
'my_password= hope]nobody[finds>-_$#thisone',
'my_password =hope]nobody[finds>-_$#thisone',
'my_password = hope]nobody[finds>-_$#thisone',
'my_password =hope]nobody[finds>-_$#thisone',
'the_password=hope]nobody[finds>-_$#thisone\n',
],
'file_content, file_extension',
(
(negative, file_extension)
for negative in (
STANDARD_NEGATIVES + [
# FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
'apikey: hope]nobody[finds>-_$#thisone',
'apikey:hope]nobody[finds>-_$#thisone',
'theapikey:hope]nobody[finds>-_$#thisone',
# FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
"some_dict['secret'] = hope]nobody[finds>-_$#thisone",
'my_password=hope]nobody[finds>-_$#thisone',
'my_password= hope]nobody[finds>-_$#thisone',
'my_password =hope]nobody[finds>-_$#thisone',
'my_password = hope]nobody[finds>-_$#thisone',
'my_password =hope]nobody[finds>-_$#thisone',
'the_password=hope]nobody[finds>-_$#thisone\n',
]
) for file_extension in (
'.cls',
'.py',
)
),
)
def test_analyze_python_negatives(self, secret_with_no_quote):
def test_analyze_quotes_required_negatives(self, file_content, file_extension):
logic = KeywordDetector()

f = mock_file_object(secret_with_no_quote)
output = logic.analyze(f, 'mock_filename.py')
f = mock_file_object(file_content)
output = logic.analyze(
f,
'mock_filename{}'.format(file_extension),
)
assert len(output) == 0

@pytest.mark.parametrize(
'file_content, file_extension',
(
(yaml_negative, file_extension)
for yaml_negative in STANDARD_POSITIVES
for file_extension in (
'.yaml',
'.yml',
)
),
)
def test_analyze_yaml_negatives(self, file_content, file_extension):
logic = KeywordDetector()

f = mock_file_object(file_content)
output = logic.analyze(
f,
'mock_filename{}'.format(file_extension),
)
assert len(output) == 0