Skip to content

Commit c0bea0b

Browse files
authored
Merge pull request #76 from Yelp/password_plugin
Added a KeywordDetector plugin
2 parents 57b8c26 + d912ced commit c0bea0b

21 files changed

+191
-47
lines changed

Diff for: detect_secrets/core/audit.py

-4
Original file line numberDiff line numberDiff line change
@@ -321,10 +321,6 @@ def _highlight_secret(secret_line, secret, filename, plugin_settings):
321321
plugin.secret_type,
322322
filename,
323323
secret=raw_secret,
324-
325-
# This doesn't matter, because PotentialSecret only uses
326-
# line numbers for logging, and we're not logging it.
327-
lineno=0,
328324
)
329325

330326
# There could be more than two secrets on the same line.

Diff for: detect_secrets/core/potential_secret.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ def __init__(
1818
self,
1919
typ,
2020
filename,
21-
lineno,
2221
secret,
22+
lineno=0,
2323
is_secret=None,
2424
):
2525
"""
@@ -31,13 +31,13 @@ def __init__(
3131
:type filename: str
3232
:param filename: name of file that this secret was found
3333
34+
:type secret: str
35+
:param secret: the actual secret identified
36+
3437
:type lineno: int
3538
:param lineno: location of secret, within filename.
3639
Merely used as a reference for easy triage.
3740
38-
:type secret: str
39-
:param secret: the actual secret identified
40-
4141
:type is_secret: bool|None
4242
:param is_secret: whether or not the secret is a true- or false- positive
4343
"""
@@ -87,7 +87,10 @@ def __ne__(self, other):
8787

8888
def __hash__(self):
8989
return hash(
90-
tuple([getattr(self, x) for x in self.fields_to_compare]),
90+
tuple(
91+
getattr(self, x)
92+
for x in self.fields_to_compare
93+
),
9194
)
9295

9396
def __str__(self): # pragma: no cover

Diff for: detect_secrets/core/secrets_collection.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ def _load_baseline_from_dict(cls, data):
8888
secret = PotentialSecret(
8989
item['type'],
9090
filename,
91-
item['line_number'],
9291
secret='will be replaced',
92+
lineno=item['line_number'],
9393
is_secret=item.get('is_secret'),
9494
)
9595
secret.secret_hash = item['hashed_secret']
@@ -204,7 +204,7 @@ def get_secret(self, filename, secret, type_=None):
204204
if type_:
205205
# Optimized lookup, because we know the type of secret
206206
# (and therefore, its hash)
207-
tmp_secret = PotentialSecret(type_, filename, 0, 'will be overriden')
207+
tmp_secret = PotentialSecret(type_, filename, secret='will be overriden')
208208
tmp_secret.secret_hash = secret
209209

210210
if tmp_secret in self.data[filename]:
@@ -251,18 +251,18 @@ def _results_accumulator(self, filename):
251251
Caller is responsible for updating the dictionary with
252252
results of plugin analysis.
253253
"""
254-
results = {}
254+
file_results = {}
255255

256256
for plugin in self.plugins:
257-
yield results, plugin
257+
yield file_results, plugin
258258

259-
if not results:
259+
if not file_results:
260260
return
261261

262262
if filename not in self.data:
263-
self.data[filename] = results
263+
self.data[filename] = file_results
264264
else:
265-
self.data[filename].update(results)
265+
self.data[filename].update(file_results)
266266

267267
def _extract_secrets_from_file(self, f, filename):
268268
"""Extract secrets from a given file object.

Diff for: detect_secrets/core/usage.py

+5
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,11 @@ class PluginOptions(object):
225225
disable_flag_text='--no-basic-auth-scan',
226226
disable_help_text='Disables scanning for Basic Auth formatted URIs.',
227227
),
228+
PluginDescriptor(
229+
classname='KeywordDetector',
230+
disable_flag_text='--no-keyword-scan',
231+
disable_help_text='Disables scanning for secret keywords.',
232+
),
228233
]
229234

230235
def __init__(self, parser):

Diff for: detect_secrets/plugins/basic_auth.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ def analyze_string(self, string, line_num, filename):
2222
secret = PotentialSecret(
2323
self.secret_type,
2424
filename,
25-
line_num,
2625
result,
26+
line_num,
2727
)
2828
output[secret] = secret
2929

Diff for: detect_secrets/plugins/core/initialize.py

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from ..basic_auth import BasicAuthDetector # noqa: F401
99
from ..high_entropy_strings import Base64HighEntropyString # noqa: F401
1010
from ..high_entropy_strings import HexHighEntropyString # noqa: F401
11+
from ..keyword import KeywordDetector # noqa: F401
1112
from ..private_key import PrivateKeyDetector # noqa: F401
1213
from detect_secrets.core.log import log
1314

Diff for: detect_secrets/plugins/high_entropy_strings.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def analyze_string(self, string, line_num, filename):
102102
for result in self.secret_generator(string):
103103
if self.is_sequential_string(result):
104104
continue
105-
secret = PotentialSecret(self.secret_type, filename, line_num, result)
105+
secret = PotentialSecret(self.secret_type, filename, result, line_num)
106106
output[secret] = secret
107107

108108
return output

Diff for: detect_secrets/plugins/keyword.py

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""
2+
This code was extracted in part from
3+
https://github.com/PyCQA/bandit. Using similar heuristic logic,
4+
we adapted it to fit our plugin infrastructure, to create an organized,
5+
concerted effort in detecting all type of secrets in code.
6+
7+
Copyright (c) 2014 Hewlett-Packard Development Company, L.P.
8+
9+
Permission is hereby granted, free of charge, to any person obtaining a copy
10+
of this software and associated documentation files (the "Software"), to deal
11+
in the Software without restriction, including without limitation the rights
12+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13+
copies of the Software, and to permit persons to whom the Software is
14+
furnished to do so, subject to the following conditions:
15+
16+
The above copyright notice and this permission notice shall be included in
17+
all copies or substantial portions of the Software.
18+
19+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25+
THE SOFTWARE.
26+
"""
27+
from __future__ import absolute_import
28+
29+
from .base import BasePlugin
30+
from detect_secrets.core.potential_secret import PotentialSecret
31+
from detect_secrets.plugins.core.constants import WHITELIST_REGEX
32+
33+
34+
BLACKLIST = (
35+
'PASS =',
36+
'password',
37+
'passwd',
38+
'pwd',
39+
'secret',
40+
'secrete',
41+
'token',
42+
)
43+
44+
45+
class KeywordDetector(BasePlugin):
46+
"""This checks if blacklisted keywords
47+
are present in the analyzed string.
48+
"""
49+
50+
secret_type = 'Password'
51+
52+
def analyze_string(self, string, line_num, filename):
53+
output = {}
54+
55+
if WHITELIST_REGEX.search(string):
56+
return output
57+
58+
for identifier in self.secret_generator(string.lower()):
59+
secret = PotentialSecret(
60+
self.secret_type,
61+
filename,
62+
identifier,
63+
line_num,
64+
)
65+
output[secret] = secret
66+
67+
return output
68+
69+
def secret_generator(self, string):
70+
for line in BLACKLIST:
71+
if line in string:
72+
yield line

Diff for: detect_secrets/plugins/private_key.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
This code was extracted in part from
33
https://github.com/pre-commit/pre-commit-hooks. Using similar heuristic logic,
4-
we adapt it to fit our plugin infrastructure, to create an organized,
4+
we adapted it to fit our plugin infrastructure, to create an organized,
55
concerted effort in detecting all type of secrets in code.
66
77
Copyright (c) 2014 pre-commit dev team: Anthony Sottile, Ken Struys
@@ -55,8 +55,8 @@ def analyze_string(self, string, line_num, filename):
5555
secret = PotentialSecret(
5656
self.secret_type,
5757
filename,
58-
line_num,
5958
identifier,
59+
line_num,
6060
)
6161
output[secret] = secret
6262

Diff for: test_data/files/file_with_no_secrets.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#!/usr/bin/python
2-
# Will change this later.
3-
SUPER_SECRET_VALUE = "this is just a long string, like a user facing error message"
2+
REGULAR_VALUE = "this is just a long string, like a user facing error message"
43

54

65
def main():

Diff for: test_data/files/file_with_secrets.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/python
22
# Will change this later.
3-
SUPER_SECRET_VALUE = 'c3VwZXIgbG9uZyBzdHJpbmcgc2hvdWxkIGNhdXNlIGVub3VnaCBlbnRyb3B5'
3+
SUPER_SEECRET_VALUE = 'c3VwZXIgbG9uZyBzdHJpbmcgc2hvdWxkIGNhdXNlIGVub3VnaCBlbnRyb3B5'
44
VERY_SECRET_TOO = 'f6CGV4aMM9zedoh3OUNbSakBymo7yplB' # pragma: whitelist secret
55

66

Diff for: test_data/short_files/first_line.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
secret = 'BEEF0123456789a'
1+
seecret = 'BEEF0123456789a'
22
skipped_sequential_false_positive = '0123456789a'
33
print('second line')
44
var = 'third line'

Diff for: test_data/short_files/last_line.ini

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[some section]
2-
secrets_for_no_one_to_find =
2+
secreets_for_no_one_to_find =
33
hunter2
4-
password123
4+
passsword123
55
BEEF0123456789a

Diff for: test_data/short_files/middle_line.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
deploy:
22
user: aaronloo
3-
password:
3+
passhword:
44
secure: thequickbrownfoxjumpsoverthelazydog
55
on:
6-
repo: Yelp/detect-secrets
6+
repo: Yelp/detect-sechrets

Diff for: testing/factories.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
from detect_secrets.core.secrets_collection import SecretsCollection
55

66

7-
def potential_secret_factory(type_='type', filename='filename', lineno=1, secret='secret'):
7+
def potential_secret_factory(type_='type', filename='filename', secret='secret', lineno=1):
88
"""This is only marginally better than creating PotentialSecret objects directly,
99
because of default values.
1010
"""
11-
return PotentialSecret(type_, filename, lineno, secret)
11+
return PotentialSecret(type_, filename, secret, lineno)
1212

1313

1414
def secrets_collection_factory(secrets=None, plugins=(), exclude_regex=''):
@@ -51,7 +51,7 @@ def _add_secret(collection, type_='type', secret='secret', filename='filename',
5151
tmp_secret = potential_secret_factory(
5252
type_=type_,
5353
filename=filename,
54-
lineno=lineno,
5554
secret=secret,
55+
lineno=lineno,
5656
)
5757
collection.data[filename][tmp_secret] = tmp_secret

Diff for: tests/core/baseline_test.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def test_new_secret_line_old_file(self):
178178
results = get_secrets_not_in_baseline(new_findings, baseline)
179179

180180
assert len(results.data['filename']) == 1
181-
secretA = PotentialSecret('type', 'filename', 1, 'secret1')
181+
secretA = PotentialSecret('type', 'filename', 'secret1', 1)
182182
assert results.data['filename'][secretA].secret_hash == \
183183
PotentialSecret.hash_secret('secret1')
184184
assert baseline.data == backup_baseline
@@ -201,7 +201,7 @@ def test_rolled_creds(self):
201201

202202
assert len(results.data['filename']) == 1
203203

204-
secretA = PotentialSecret('type', 'filename', 1, 'secret_new')
204+
secretA = PotentialSecret('type', 'filename', 'secret_new', 1)
205205
assert results.data['filename'][secretA].secret_hash == \
206206
PotentialSecret.hash_secret('secret_new')
207207
assert baseline.data == backup_baseline

Diff for: tests/core/potential_secret_test.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
class TestPotentialSecret(object):
1010

1111
@pytest.mark.parametrize(
12-
'a,b,is_equal',
12+
'a, b, is_equal',
1313
[
1414
(
1515
potential_secret_factory(lineno=1),

Diff for: tests/core/secrets_collection_test.py

+43-8
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,28 @@ def test_success_multiple_plugins(self):
9393
line_numbers = [entry.lineno for entry in logic.data['filename']]
9494
assert set(line_numbers) == set([1, 2, 3])
9595

96+
def test_reporting_of_password_plugin_secrets_if_reported_already(self):
97+
logic = secrets_collection_factory(
98+
secrets=[
99+
{
100+
'filename': 'filename',
101+
'lineno': 3,
102+
},
103+
],
104+
plugins=(
105+
MockPasswordPluginValue(),
106+
MockPluginFileValue(),
107+
),
108+
)
109+
110+
with mock_open('junk text here'):
111+
logic.scan_file('filename')
112+
113+
assert len(logic.data['filename']) == 3
114+
115+
line_numbers = [entry.lineno for entry in logic.data['filename']]
116+
assert set(line_numbers) == set([2, 3])
117+
96118
def test_unicode_decode_error(self, mock_log):
97119
logic = secrets_collection_factory(
98120
plugins=(MockPluginFileValue(),),
@@ -203,12 +225,14 @@ def test_optional_type(self, filename, secret_hash, expected_value):
203225
)
204226
def test_explicit_type_for_optimization(self, type_, is_none):
205227
with self._mock_secret_hash():
206-
logic = secrets_collection_factory(secrets=[
207-
{
208-
'filename': 'filename',
209-
'type_': 'type',
210-
},
211-
])
228+
logic = secrets_collection_factory(
229+
secrets=[
230+
{
231+
'filename': 'filename',
232+
'type_': 'type',
233+
},
234+
],
235+
)
212236

213237
assert (logic.get_secret('filename', 'secret_hash', type_) is None) == is_none
214238

@@ -343,7 +367,7 @@ class MockPluginFixedValue(MockBasePlugin):
343367
def analyze(self, f, filename):
344368
# We're not testing the plugin's ability to analyze secrets, so
345369
# it doesn't matter what we return
346-
secret = PotentialSecret('mock fixed value type', filename, 1, 'asdf')
370+
secret = PotentialSecret('mock fixed value type', filename, 'asdf', 1)
347371
return {secret: secret}
348372

349373

@@ -354,8 +378,19 @@ class MockPluginFileValue(MockBasePlugin):
354378
def analyze(self, f, filename):
355379
# We're not testing the plugin's ability to analyze secrets, so
356380
# it doesn't matter what we return
357-
secret = PotentialSecret('mock file value type', filename, 2, f.read().strip())
381+
secret = PotentialSecret('mock file value type', filename, f.read().strip(), 2)
358382
return {secret: secret}
359383

360384

385+
class MockPasswordPluginValue(MockBasePlugin):
386+
387+
secret_type = 'mock_plugin_file_value'
388+
389+
def analyze(self, f, filename):
390+
password_secret = PotentialSecret('Password', filename, f.read().strip(), 2)
391+
return {
392+
password_secret: password_secret,
393+
}
394+
395+
361396
MockUnicodeDecodeError = UnicodeDecodeError('encoding type', b'subject', 0, 1, 'exception message')

0 commit comments

Comments
 (0)