Skip to content

Commit 2dce336

Browse files
author
Aaron Loo
committed
supporting multiword adhoc string scans
1 parent 79dad49 commit 2dce336

File tree

2 files changed

+37
-5
lines changed

2 files changed

+37
-5
lines changed

Diff for: detect_secrets/plugins/high_entropy_strings.py

+32-5
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def secret_generator(self, string, *args, **kwargs):
144144
def adhoc_scan(self, string):
145145
# Since it's an individual string, it's just bad UX to require quotes
146146
# around the expected secret.
147-
with self.non_quoted_string_regex():
147+
with self.non_quoted_string_regex(is_exact_match=False):
148148
results = self.analyze_line(
149149
string,
150150
line_num=0,
@@ -153,24 +153,51 @@ def adhoc_scan(self, string):
153153

154154
# NOTE: Trailing space allows for nicer formatting
155155
output = 'False' if not results else 'True '
156-
if self.regex.search(string):
156+
if results:
157+
# We currently assume that there's at most one secret per line.
157158
output += ' ({})'.format(
158-
round(self.calculate_shannon_entropy(string), 3),
159+
round(
160+
self.calculate_shannon_entropy(
161+
list(results.keys())[0].secret_value,
162+
),
163+
3,
164+
),
159165
)
166+
elif ' ' not in string:
167+
# In the case where the string is a single word, and it
168+
# matches the regex, we can show the entropy calculation,
169+
# to assist investigation when it's unclear *why* something
170+
# is not flagged.
171+
#
172+
# Conversely, if there are multiple words in the string,
173+
# the entropy value would be confusing, since it's not clear
174+
# which word the entropy is calculated for.
175+
matches = self.regex.search(string)
176+
if matches and matches.group(1) == string:
177+
output += ' ({})'.format(
178+
round(self.calculate_shannon_entropy(string), 3),
179+
)
160180

161181
return output
162182

163183
@contextmanager
164-
def non_quoted_string_regex(self):
184+
def non_quoted_string_regex(self, is_exact_match=True):
165185
"""For certain file formats, strings need not necessarily follow the
166186
normal convention of being denoted by single or double quotes. In these
167187
cases, we modify the regex accordingly.
168188
169189
Public, because detect_secrets.core.audit needs to reference it.
190+
191+
:param is_exact_match: True if you need to scan the string itself.
192+
However, if the string is a line of text, and you want to see
193+
whether a secret exists in this line, use False.
170194
"""
171195
old_regex = self.regex
172196

173-
regex_alternative = r'^([{}]+)$'.format(re.escape(self.charset))
197+
regex_alternative = r'([{}]+)'.format(re.escape(self.charset))
198+
if is_exact_match:
199+
regex_alternative = r'^' + regex_alternative + r'$'
200+
174201
self.regex = re.compile(regex_alternative)
175202

176203
try:

Diff for: tests/main_test.py

+5
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,11 @@ def test_scan_with_exclude_args(self, mock_baseline_initialize):
136136
'False (2.252)',
137137
'False',
138138
),
139+
(
140+
'key: 012345678ab',
141+
'False',
142+
'True (3.459)',
143+
),
139144
],
140145
)
141146
def test_scan_string_basic(

0 commit comments

Comments
 (0)