Skip to content

Commit 0a34639

Browse files
author
Victor Zhou
committed
Refactor secret filtering to be a instance method
1 parent ece342b commit 0a34639

File tree

4 files changed

+60
-36
lines changed

4 files changed

+60
-36
lines changed

detect_secrets/plugins/base.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,19 +53,33 @@ class BasePlugin(object):
5353
def secret_type(self):
5454
raise NotImplementedError
5555

56-
def __init__(self, exclude_lines_regex=None, should_verify=False, **kwargs):
56+
def __init__(
57+
self,
58+
exclude_lines_regex=None,
59+
should_verify=False,
60+
false_positive_heuristics=None,
61+
**kwargs
62+
):
5763
"""
5864
:type exclude_lines_regex: str|None
5965
:param exclude_lines_regex: optional regex for ignored lines.
6066
6167
:type should_verify: bool
68+
69+
:type false_positive_heuristics: List[Callable]|None
70+
:param false_positive_heuristics: List of fp-heuristic functions
71+
applicable to this plugin
6272
"""
6373
self.exclude_lines_regex = None
6474
if exclude_lines_regex:
6575
self.exclude_lines_regex = re.compile(exclude_lines_regex)
6676

6777
self.should_verify = should_verify
6878

79+
self.false_positive_heuristics = false_positive_heuristics \
80+
if false_positive_heuristics \
81+
else []
82+
6983
@classproperty
7084
def disable_flag_text(cls):
7185
name = cls.__name__
@@ -232,6 +246,19 @@ def verify(self, token, content=''):
232246
"""
233247
return VerifiedResult.UNVERIFIED
234248

249+
def is_secret_false_positive(self, token):
250+
"""
251+
Checks if the input secret is a false-positive according to
252+
this plugin's heuristics.
253+
254+
:type token: str
255+
:param token: secret found by current plugin
256+
"""
257+
return any(
258+
func(token)
259+
for func in self.false_positive_heuristics
260+
) if self.false_positive_heuristics else False
261+
235262
@property
236263
def __dict__(self):
237264
return {

detect_secrets/plugins/common/filters.py

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,19 @@ def is_found_with_aho_corasick(secret, automaton):
3434
return False
3535

3636

37+
def get_aho_corasick_helper(automaton):
38+
"""
39+
Returns a function which determines if a word matches the
40+
input automaton.
41+
42+
:type automaton: ahocorasick.Automaton
43+
"""
44+
def fn(secret):
45+
return is_found_with_aho_corasick(secret, automaton)
46+
47+
return fn
48+
49+
3750
def is_sequential_string(secret, *args):
3851
"""
3952
:type secret: str
@@ -103,12 +116,6 @@ def is_potential_uuid(secret, *args):
103116
return bool(_UUID_REGEX.search(secret))
104117

105118

106-
DEFAULT_FALSE_POSITIVE_HEURISTICS = [
107-
is_found_with_aho_corasick,
108-
is_sequential_string,
109-
]
110-
111-
112119
# NOTE: this doesn't handle multiple key-values on a line properly.
113120
# NOTE: words that end in "id" will be treated as ids
114121
_ID_DETECTOR_REGEX = re.compile(r'id[^a-z0-9]', re.IGNORECASE)
@@ -136,25 +143,6 @@ def is_likely_id_string(secret, line):
136143
]
137144

138145

139-
def is_false_positive(secret, automaton, functions=DEFAULT_FALSE_POSITIVE_HEURISTICS):
140-
"""
141-
:type secret: str
142-
143-
:type automaton: ahocorasick.Automaton|None
144-
:param automaton: optional automaton for ignoring certain words.
145-
146-
:type functions: Iterable[Callable]
147-
:param functions: list of heuristics to use
148-
149-
:rtype: bool
150-
Returns True if any false positive heuristic function returns True.
151-
"""
152-
return any(
153-
func(secret, automaton)
154-
for func in functions
155-
)
156-
157-
158146
def is_false_positive_with_line_context(
159147
secret,
160148
line,

detect_secrets/plugins/high_entropy_strings.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818
from .base import classproperty
1919
from .common.filetype import determine_file_type
2020
from .common.filetype import FileType
21-
from .common.filters import is_false_positive
21+
from .common.filters import get_aho_corasick_helper
2222
from .common.filters import is_false_positive_with_line_context
2323
from .common.filters import is_potential_uuid
24-
from .common.filters import DEFAULT_FALSE_POSITIVE_HEURISTICS
24+
from .common.filters import is_sequential_string
2525
from .common.ini_file_parser import IniFileParser
2626
from .common.yaml_file_parser import YamlFileParser
2727
from detect_secrets.core.potential_secret import PotentialSecret
@@ -40,11 +40,17 @@ def __init__(self, charset, limit, exclude_lines_regex, automaton, *args):
4040

4141
self.charset = charset
4242
self.entropy_limit = limit
43-
self.automaton = automaton
4443
self.regex = re.compile(r'([\'"])([%s]+)(\1)' % charset)
4544

45+
false_positive_heuristics = [
46+
get_aho_corasick_helper(automaton),
47+
is_sequential_string,
48+
is_potential_uuid,
49+
]
50+
4651
super(HighEntropyStringsPlugin, self).__init__(
4752
exclude_lines_regex=exclude_lines_regex,
53+
false_positive_heuristics=false_positive_heuristics,
4854
)
4955

5056
def analyze(self, file, filename):
@@ -115,11 +121,7 @@ def analyze_string_content(self, string, line_num, filename):
115121
output = {}
116122

117123
for result in self.secret_generator(string):
118-
# py2+py3 compatible way of copying a list
119-
functions = list(DEFAULT_FALSE_POSITIVE_HEURISTICS)
120-
functions.append(is_potential_uuid)
121-
122-
if is_false_positive(result, self.automaton, functions=functions):
124+
if self.is_secret_false_positive(result):
123125
continue
124126

125127
secret = PotentialSecret(self.secret_type, filename, result, line_num)

detect_secrets/plugins/keyword.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
from .base import classproperty
3333
from .common.filetype import determine_file_type
3434
from .common.filetype import FileType
35-
from .common.filters import is_false_positive
35+
from .common.filters import get_aho_corasick_helper
36+
from .common.filters import is_sequential_string
3637
from detect_secrets.core.potential_secret import PotentialSecret
3738

3839

@@ -273,8 +274,14 @@ def __dict__(self):
273274
return output
274275

275276
def __init__(self, keyword_exclude=None, exclude_lines_regex=None, automaton=None, **kwargs):
277+
false_positive_heuristics = [
278+
get_aho_corasick_helper(automaton),
279+
is_sequential_string,
280+
]
281+
276282
super(KeywordDetector, self).__init__(
277283
exclude_lines_regex=exclude_lines_regex,
284+
false_positive_heuristics=false_positive_heuristics,
278285
**kwargs
279286
)
280287

@@ -298,7 +305,7 @@ def analyze_string_content(self, string, line_num, filename):
298305
string,
299306
filetype=determine_file_type(filename),
300307
):
301-
if is_false_positive(identifier, self.automaton):
308+
if self.is_secret_false_positive(identifier):
302309
continue
303310
secret = PotentialSecret(
304311
self.secret_type,

0 commit comments

Comments
 (0)