Skip to content

Commit 1afb05f

Browse files
authored
↪️ Merge pull request #133 from Yelp/keyword_detector_accuracy_cls_yaml
Improve Keyword detector accuracy for .cls and .yaml files
2 parents 1fabf92 + af98fc0 commit 1afb05f

File tree

5 files changed

+162
-92
lines changed

5 files changed

+162
-92
lines changed

Diff for: detect_secrets/core/usage.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def _add_filenames_argument(self):
8181
self.parser.add_argument(
8282
'filenames',
8383
nargs='*',
84-
help='Filenames to check',
84+
help='Filenames to check.',
8585
)
8686
return self
8787

@@ -427,5 +427,5 @@ def _add_keyword_exclude(self):
427427
self.parser.add_argument(
428428
'--keyword-exclude',
429429
type=str,
430-
help='Pass in regex to exclude false positives found by keyword detector',
430+
help='Pass in regex to exclude false positives found by keyword detector.',
431431
)

Diff for: detect_secrets/plugins/common/filetype.py

+17-7
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22

33

44
class FileType(Enum):
5-
JAVASCRIPT = 0
6-
PHP = 1
7-
PYTHON = 2
8-
OTHER = 3
5+
CLS = 0
6+
JAVASCRIPT = 1
7+
PHP = 2
8+
PYTHON = 3
9+
YAML = 4
10+
OTHER = 5
911

1012

1113
def determine_file_type(filename):
@@ -14,10 +16,18 @@ def determine_file_type(filename):
1416
1517
:rtype: FileType
1618
"""
17-
if filename.endswith('.js'):
19+
if filename.endswith('.cls'):
20+
return FileType.CLS
21+
elif filename.endswith('.js'):
1822
return FileType.JAVASCRIPT
19-
elif filename.endswith('.py'):
20-
return FileType.PYTHON
2123
elif filename.endswith('.php'):
2224
return FileType.PHP
25+
elif filename.endswith('.py'):
26+
return FileType.PYTHON
27+
elif (
28+
filename.endswith(
29+
('.yaml', '.yml'),
30+
)
31+
):
32+
return FileType.YAML
2333
return FileType.OTHER

Diff for: detect_secrets/plugins/keyword.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@
135135
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 9,
136136
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 5,
137137
}
138-
PYTHON_BLACKLIST_REGEX_TO_GROUP = {
138+
QUOTES_REQUIRED_BLACKLIST_REGEX_TO_GROUP = {
139139
FOLLOWED_BY_COLON_QUOTES_REQUIRED_REGEX: 7,
140140
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_REGEX: 9,
141141
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 5,
@@ -149,13 +149,11 @@ class KeywordDetector(BasePlugin):
149149

150150
secret_type = 'Secret Keyword'
151151

152-
def __init__(
153-
self, keyword_exclude=None,
154-
exclude_lines_regex=None, **kwargs
155-
):
152+
def __init__(self, keyword_exclude=None, exclude_lines_regex=None, **kwargs):
156153
super(KeywordDetector, self).__init__(
157154
exclude_lines_regex,
158155
)
156+
159157
self.keyword_exclude = None
160158
if keyword_exclude:
161159
self.keyword_exclude = re.compile(
@@ -187,8 +185,11 @@ def analyze_string_content(self, string, line_num, filename):
187185
def secret_generator(self, string, filetype):
188186
lowered_string = string.lower()
189187

190-
if filetype == FileType.PYTHON:
191-
blacklist_regex_to_group = PYTHON_BLACKLIST_REGEX_TO_GROUP
188+
if filetype in (
189+
FileType.CLS,
190+
FileType.PYTHON,
191+
):
192+
blacklist_regex_to_group = QUOTES_REQUIRED_BLACKLIST_REGEX_TO_GROUP
192193
else:
193194
blacklist_regex_to_group = BLACKLIST_REGEX_TO_GROUP
194195

@@ -217,9 +218,13 @@ def probably_false_positive(lowered_secret, filetype):
217218
or lowered_secret.startswith('fs.read')
218219
or lowered_secret == 'new'
219220
)
220-
) or ( # If it is a .php file, do not report $variables
221+
) or (
221222
filetype == FileType.PHP
222223
and lowered_secret[0] == '$'
224+
) or (
225+
filetype == FileType.YAML
226+
and lowered_secret.startswith('{{')
227+
and lowered_secret.endswith('}}')
223228
)
224229
):
225230
return True

Diff for: tests/core/usage_test.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ class TestPluginOptions(object):
1010
@staticmethod
1111
def parse_args(argument_string=''):
1212
# PluginOptions are added in pre-commit hook
13-
return ParserBuilder().add_pre_commit_arguments()\
13+
return ParserBuilder()\
14+
.add_pre_commit_arguments()\
1415
.parse_args(argument_string.split())
1516

1617
def test_added_by_default(self):
@@ -61,9 +62,11 @@ def test_custom_limit(self, argument_string, expected_value):
6162
if expected_value is not None:
6263
args = self.parse_args(argument_string)
6364

64-
assert args.plugins['HexHighEntropyString'][
65-
'hex_limit'
66-
] == expected_value
65+
assert (
66+
args.plugins['HexHighEntropyString']['hex_limit']
67+
68+
== expected_value
69+
)
6770
else:
6871
with pytest.raises(SystemExit):
6972
self.parse_args(argument_string)

Diff for: tests/plugins/keyword_test.py

+123-71
Original file line numberDiff line numberDiff line change
@@ -34,31 +34,31 @@
3434
]
3535
STANDARD_POSITIVES = {
3636
# FOLLOWED_BY_COLON_RE
37-
"'theapikey': 'h}o)p${e]nob(ody[finds>-_$#thisone'",
38-
'"theapikey": "h}o)p${e]nob(ody[finds>-_$#thisone"',
39-
'apikey: h}o)p${e]nob(ody[finds>-_$#thisone',
40-
'apikey:h}o)p${e]nob(ody[finds>-_$#thisone',
41-
'theapikey:h}o)p${e]nob(ody[finds>-_$#thisone',
42-
'apikey: "h}o)p${e]nob(ody[finds>-_$#thisone"',
43-
"apikey: 'h}o)p${e]nob(ody[finds>-_$#thisone'",
37+
"'theapikey': '{{h}o)p${e]nob(ody[finds>-_$#thisone}}'",
38+
'"theapikey": "{{h}o)p${e]nob(ody[finds>-_$#thisone}}"',
39+
'apikey: {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
40+
'apikey:{{h}o)p${e]nob(ody[finds>-_$#thisone}}',
41+
'theapikey:{{h}o)p${e]nob(ody[finds>-_$#thisone}}',
42+
'apikey: "{{h}o)p${e]nob(ody[finds>-_$#thisone}}"',
43+
"apikey: '{{h}o)p${e]nob(ody[finds>-_$#thisone}}'",
4444
# FOLLOWED_BY_EQUAL_SIGNS_RE
45-
'some_dict["secret"] = "h}o)p${e]nob(ody[finds>-_$#thisone"',
46-
"some_dict['secret'] = h}o)p${e]nob(ody[finds>-_$#thisone",
47-
'my_password=h}o)p${e]nob(ody[finds>-_$#thisone',
48-
'my_password= h}o)p${e]nob(ody[finds>-_$#thisone',
49-
'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
50-
'my_password = h}o)p${e]nob(ody[finds>-_$#thisone',
51-
'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
52-
'the_password=h}o)p${e]nob(ody[finds>-_$#thisone\n',
53-
'the_password= "h}o)p${e]nob(ody[finds>-_$#thisone"\n',
54-
'the_password=\'h}o)p${e]nob(ody[finds>-_$#thisone\'\n',
45+
'some_dict["secret"] = "{{h}o)p${e]nob(ody[finds>-_$#thisone}}"',
46+
"some_dict['secret'] = {{h}o)p${e]nob(ody[finds>-_$#thisone}}",
47+
'my_password={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
48+
'my_password= {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
49+
'my_password ={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
50+
'my_password = {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
51+
'my_password ={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
52+
'the_password={{h}o)p${e]nob(ody[finds>-_$#thisone}}\n',
53+
'the_password= "{{h}o)p${e]nob(ody[finds>-_$#thisone}}"\n',
54+
'the_password=\'{{h}o)p${e]nob(ody[finds>-_$#thisone}}\'\n',
5555
# FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE
56-
'apikey "h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes
57-
'fooapikeyfoo "h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes
58-
'fooapikeyfoo"h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes
59-
'private_key \'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes
60-
'fooprivate_keyfoo\'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes
61-
'fooprivate_key\'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes
56+
'apikey "{{h}o)p${e]nob(ody[finds>-_$#thisone}}";', # Double-quotes
57+
'fooapikeyfoo "{{h}o)p${e]nob(ody[finds>-_$#thisone}}";', # Double-quotes
58+
'fooapikeyfoo"{{h}o)p${e]nob(ody[finds>-_$#thisone}}";', # Double-quotes
59+
'private_key \'{{h}o)p${e]nob(ody[finds>-_$#thisone}}\';', # Single-quotes
60+
'fooprivate_keyfoo\'{{h}o)p${e]nob(ody[finds>-_$#thisone}}\';', # Single-quotes
61+
'fooprivate_key\'{{h}o)p${e]nob(ody[finds>-_$#thisone}}\';', # Single-quotes
6262
}
6363

6464

@@ -77,100 +77,152 @@ def test_analyze_standard_positives(self, file_content):
7777
for potential_secret in output:
7878
assert 'mock_filename' == potential_secret.filename
7979
assert (
80-
potential_secret.secret_hash
81-
== PotentialSecret.hash_secret('h}o)p${e]nob(ody[finds>-_$#thisone')
80+
potential_secret.secret_hash ==
81+
PotentialSecret.hash_secret('{{h}o)p${e]nob(ody[finds>-_$#thisone}}')
8282
)
8383

8484
@pytest.mark.parametrize(
8585
'file_content',
86-
STANDARD_POSITIVES - {
87-
# FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
88-
'apikey: h}o)p${e]nob(ody[finds>-_$#thisone',
89-
'apikey:h}o)p${e]nob(ody[finds>-_$#thisone',
90-
'theapikey:h}o)p${e]nob(ody[finds>-_$#thisone',
91-
# FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
92-
"some_dict['secret'] = h}o)p${e]nob(ody[finds>-_$#thisone",
93-
'my_password=h}o)p${e]nob(ody[finds>-_$#thisone',
94-
'my_password= h}o)p${e]nob(ody[finds>-_$#thisone',
95-
'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
96-
'my_password = h}o)p${e]nob(ody[finds>-_$#thisone',
97-
'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
98-
'the_password=h}o)p${e]nob(ody[finds>-_$#thisone\n',
99-
},
86+
STANDARD_POSITIVES,
87+
)
88+
def test_analyze_with_line_exclude(self, file_content):
89+
logic = KeywordDetector(keyword_exclude='thisone')
90+
91+
f = mock_file_object(file_content)
92+
output = logic.analyze(f, 'mock_filename.foo')
93+
assert len(output) == 0
94+
95+
@pytest.mark.parametrize(
96+
'file_content, file_extension',
97+
(
98+
(positive, file_extension)
99+
for positive in (
100+
STANDARD_POSITIVES - {
101+
# FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
102+
'apikey: {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
103+
'apikey:{{h}o)p${e]nob(ody[finds>-_$#thisone}}',
104+
'theapikey:{{h}o)p${e]nob(ody[finds>-_$#thisone}}',
105+
# FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
106+
"some_dict['secret'] = {{h}o)p${e]nob(ody[finds>-_$#thisone}}",
107+
'my_password={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
108+
'my_password= {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
109+
'my_password ={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
110+
'my_password = {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
111+
'my_password ={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
112+
'the_password={{h}o)p${e]nob(ody[finds>-_$#thisone}}\n',
113+
}
114+
) for file_extension in (
115+
'.cls',
116+
'.py',
117+
)
118+
),
100119
)
101-
def test_analyze_python_positives(self, file_content):
120+
def test_analyze_quotes_required_positives(self, file_content, file_extension):
102121
logic = KeywordDetector()
103122

104123
f = mock_file_object(file_content)
105-
output = logic.analyze(f, 'mock_filename.py')
124+
mock_filename = 'mock_filename{}'.format(file_extension)
125+
output = logic.analyze(f, mock_filename)
106126
assert len(output) == 1
107127
for potential_secret in output:
108-
assert 'mock_filename.py' == potential_secret.filename
128+
assert mock_filename == potential_secret.filename
109129
assert (
110-
potential_secret.secret_hash
111-
== PotentialSecret.hash_secret('h}o)p${e]nob(ody[finds>-_$#thisone')
130+
potential_secret.secret_hash ==
131+
PotentialSecret.hash_secret('{{h}o)p${e]nob(ody[finds>-_$#thisone}}')
112132
)
113133

114134
@pytest.mark.parametrize(
115-
'negative',
135+
'file_content',
116136
STANDARD_NEGATIVES,
117137
)
118-
def test_analyze_standard_negatives(self, negative):
138+
def test_analyze_standard_negatives(self, file_content):
119139
logic = KeywordDetector()
120140

121-
f = mock_file_object(negative)
141+
f = mock_file_object(file_content)
122142
output = logic.analyze(f, 'mock_filename.foo')
123143
assert len(output) == 0
124144

125145
@pytest.mark.parametrize(
126-
'js_negative',
146+
'file_content',
127147
STANDARD_NEGATIVES + [
128148
# FOLLOWED_BY_COLON_RE
129149
'apiKey: this.apiKey,',
130150
"apiKey: fs.readFileSync('foo',",
131151
],
132152
)
133-
def test_analyze_javascript_negatives(self, js_negative):
153+
def test_analyze_javascript_negatives(self, file_content):
134154
logic = KeywordDetector()
135155

136-
f = mock_file_object(js_negative)
156+
f = mock_file_object(file_content)
137157
output = logic.analyze(f, 'mock_filename.js')
138158
assert len(output) == 0
139159

140160
@pytest.mark.parametrize(
141-
'secret_starting_with_dollar_sign',
161+
'file_content',
142162
STANDARD_NEGATIVES + [
143163
# FOLLOWED_BY_EQUAL_SIGNS_RE
144164
'$password = $input;',
145165
],
146166
)
147-
def test_analyze_php_negatives(self, secret_starting_with_dollar_sign):
167+
def test_analyze_php_negatives(self, file_content):
148168
logic = KeywordDetector()
149169

150-
f = mock_file_object(secret_starting_with_dollar_sign)
170+
f = mock_file_object(file_content)
151171
output = logic.analyze(f, 'mock_filename.php')
152172
assert len(output) == 0
153173

154174
@pytest.mark.parametrize(
155-
'secret_with_no_quote',
156-
STANDARD_NEGATIVES + [
157-
# FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
158-
'apikey: hope]nobody[finds>-_$#thisone',
159-
'apikey:hope]nobody[finds>-_$#thisone',
160-
'theapikey:hope]nobody[finds>-_$#thisone',
161-
# FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
162-
"some_dict['secret'] = hope]nobody[finds>-_$#thisone",
163-
'my_password=hope]nobody[finds>-_$#thisone',
164-
'my_password= hope]nobody[finds>-_$#thisone',
165-
'my_password =hope]nobody[finds>-_$#thisone',
166-
'my_password = hope]nobody[finds>-_$#thisone',
167-
'my_password =hope]nobody[finds>-_$#thisone',
168-
'the_password=hope]nobody[finds>-_$#thisone\n',
169-
],
175+
'file_content, file_extension',
176+
(
177+
(negative, file_extension)
178+
for negative in (
179+
STANDARD_NEGATIVES + [
180+
# FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
181+
'apikey: hope]nobody[finds>-_$#thisone',
182+
'apikey:hope]nobody[finds>-_$#thisone',
183+
'theapikey:hope]nobody[finds>-_$#thisone',
184+
# FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
185+
"some_dict['secret'] = hope]nobody[finds>-_$#thisone",
186+
'my_password=hope]nobody[finds>-_$#thisone',
187+
'my_password= hope]nobody[finds>-_$#thisone',
188+
'my_password =hope]nobody[finds>-_$#thisone',
189+
'my_password = hope]nobody[finds>-_$#thisone',
190+
'my_password =hope]nobody[finds>-_$#thisone',
191+
'the_password=hope]nobody[finds>-_$#thisone\n',
192+
]
193+
) for file_extension in (
194+
'.cls',
195+
'.py',
196+
)
197+
),
170198
)
171-
def test_analyze_python_negatives(self, secret_with_no_quote):
199+
def test_analyze_quotes_required_negatives(self, file_content, file_extension):
172200
logic = KeywordDetector()
173201

174-
f = mock_file_object(secret_with_no_quote)
175-
output = logic.analyze(f, 'mock_filename.py')
202+
f = mock_file_object(file_content)
203+
output = logic.analyze(
204+
f,
205+
'mock_filename{}'.format(file_extension),
206+
)
207+
assert len(output) == 0
208+
209+
@pytest.mark.parametrize(
210+
'file_content, file_extension',
211+
(
212+
(yaml_negative, file_extension)
213+
for yaml_negative in STANDARD_POSITIVES
214+
for file_extension in (
215+
'.yaml',
216+
'.yml',
217+
)
218+
),
219+
)
220+
def test_analyze_yaml_negatives(self, file_content, file_extension):
221+
logic = KeywordDetector()
222+
223+
f = mock_file_object(file_content)
224+
output = logic.analyze(
225+
f,
226+
'mock_filename{}'.format(file_extension),
227+
)
176228
assert len(output) == 0

0 commit comments

Comments
 (0)