Skip to content

Commit 74d3787

Browse files
committed
🎭 Improve the performance of line regexes
This fixes issue #244. Only check the line for allowlist regexes or --exclude-lines if a secret was found.
1 parent 5ca1d7e commit 74d3787

File tree

3 files changed

+45
-25
lines changed

3 files changed

+45
-25
lines changed

detect_secrets/core/secrets_collection.py

+3
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,8 @@ def scan_diff(
156156
at incremental differences, rather than re-scanning the codebase every time.
157157
This function supports this, and adds information to self.data.
158158
159+
Note that this is only called by detect-secrets-server.
160+
159161
:type diff: str
160162
:param diff: diff string.
161163
e.g. The output of `git diff <fileA> <fileB>`
@@ -338,6 +340,7 @@ def _extract_secrets_from_patch(self, f, plugin, filename):
338340
"""Extract secrets from a given patch file object.
339341
340342
Note that we only want to capture incoming secrets (so added lines).
343+
Note that this is only called by detect-secrets-server.
341344
342345
:type f: unidiff.patch.PatchedFile
343346
:type plugin: detect_secrets.plugins.base.BasePlugin

detect_secrets/plugins/base.py

+26-20
Original file line numberDiff line numberDiff line change
@@ -70,17 +70,15 @@ def __init__(
7070
:param false_positive_heuristics: List of fp-heuristic functions
7171
applicable to this plugin
7272
"""
73-
self.exclude_lines_regex = None
74-
if exclude_lines_regex:
75-
self.exclude_lines_regex = re.compile(exclude_lines_regex)
73+
self.exclude_lines_regex = (
74+
re.compile(exclude_lines_regex)
75+
if exclude_lines_regex
76+
else None
77+
)
7678

7779
self.should_verify = should_verify
7880

79-
self.false_positive_heuristics = (
80-
false_positive_heuristics
81-
if false_positive_heuristics
82-
else []
83-
)
81+
self.false_positive_heuristics = false_positive_heuristics or []
8482

8583
@classproperty
8684
def disable_flag_text(cls):
@@ -101,6 +99,19 @@ def disable_flag_text(cls):
10199
def default_options(cls):
102100
return {}
103101

102+
def _is_excluded_line(self, line):
103+
return (
104+
any(
105+
allowlist_regex.search(line)
106+
for allowlist_regex in ALLOWLIST_REGEXES
107+
)
108+
or
109+
(
110+
self.exclude_lines_regex and
111+
self.exclude_lines_regex.search(line)
112+
)
113+
)
114+
104115
def analyze(self, file, filename):
105116
"""
106117
:param file: The File object itself.
@@ -114,6 +125,13 @@ def analyze(self, file, filename):
114125
file_lines = tuple(file.readlines())
115126
for line_num, line in enumerate(file_lines, start=1):
116127
results = self.analyze_line(line, line_num, filename)
128+
if (
129+
not results
130+
or
131+
self._is_excluded_line(line)
132+
):
133+
continue
134+
117135
if not self.should_verify:
118136
potential_secrets.update(results)
119137
continue
@@ -146,18 +164,6 @@ def analyze_line(self, string, line_num, filename):
146164
147165
NOTE: line_num and filename are used for PotentialSecret creation only.
148166
"""
149-
if (
150-
any(
151-
allowlist_regex.search(string) for allowlist_regex in ALLOWLIST_REGEXES
152-
)
153-
154-
or (
155-
self.exclude_lines_regex and
156-
self.exclude_lines_regex.search(string)
157-
)
158-
):
159-
return {}
160-
161167
return self.analyze_string_content(
162168
string,
163169
line_num,

tests/plugins/high_entropy_strings_test.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -297,18 +297,29 @@ def test_discounts_when_all_numbers(self):
297297
)
298298

299299
# This makes sure discounting works.
300-
assert self.logic.calculate_shannon_entropy('0123456789') < \
300+
assert (
301+
self.logic.calculate_shannon_entropy('0123456789')
302+
<
301303
original_scanner.calculate_shannon_entropy('0123456789')
302-
304+
)
303305
# This is the goal.
304306
assert self.logic.calculate_shannon_entropy('0123456789') < 3
305307

306308
# This makes sure it is length dependent.
307-
assert self.logic.calculate_shannon_entropy('0123456789') < \
309+
assert (
310+
self.logic.calculate_shannon_entropy('0123456789')
311+
<
308312
self.logic.calculate_shannon_entropy('01234567890123456789')
313+
)
309314

310315
# This makes sure it only occurs with numbers.
311-
assert self.logic.calculate_shannon_entropy('12345a') == \
316+
assert (
317+
self.logic.calculate_shannon_entropy('12345a')
318+
==
312319
original_scanner.calculate_shannon_entropy('12345a')
313-
assert self.logic.calculate_shannon_entropy('0') == \
320+
)
321+
assert (
322+
self.logic.calculate_shannon_entropy('0')
323+
==
314324
original_scanner.calculate_shannon_entropy('0')
325+
)

0 commit comments

Comments
 (0)