Skip to content

Commit 15a3ef5

Browse files
author
Aaron Loo
committed
Merge branch 'pre-v1-extract-pragmas' into pre-v1-launch
2 parents 053f1a3 + 3aa62f4 commit 15a3ef5

File tree

12 files changed

+263
-87
lines changed

12 files changed

+263
-87
lines changed

detect_secrets/audit/common.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from ..core.potential_secret import PotentialSecret
1111
from ..exceptions import InvalidBaselineError
1212
from ..exceptions import SecretNotFoundOnSpecifiedLineError
13+
from ..util.inject import get_injectable_variables
14+
from ..util.inject import inject_variables_into_function
1315

1416

1517
def get_baseline_from_file(filename: str) -> Dict[str, Any]:
@@ -42,10 +44,18 @@ def get_raw_secret_from_file(secret: PotentialSecret) -> str:
4244
except IndexError:
4345
raise SecretNotFoundOnSpecifiedLineError(secret.line_number)
4446

45-
identified_secrets = plugin.analyze_line(
47+
function = plugin.__class__.analyze_line
48+
if not hasattr(function, 'injectable_variables'):
49+
function.injectable_variables = set(get_injectable_variables(plugin.analyze_line))
50+
function.path = f'{plugin.__class__.__name__}.analyze_line'
51+
52+
identified_secrets = inject_variables_into_function(
53+
function,
54+
self=plugin,
4655
filename=secret.filename,
4756
line=target_line,
4857
line_number=secret.line_number, # TODO: this will be optional
58+
enable_eager_search=True,
4959
)
5060

5161
for identified_secret in identified_secrets:

detect_secrets/core/baseline.py

+3-36
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,31 @@
11
import json
2-
import os
32
import time
43
from typing import Any
54
from typing import Callable
65
from typing import Dict
7-
from typing import List
86
from typing import Union
97

108
from . import upgrades
119
from ..__version__ import VERSION
1210
from ..exceptions import UnableToReadBaselineError
1311
from ..settings import configure_settings_from_baseline
1412
from ..settings import get_settings
15-
from ..util import git
1613
from ..util.importlib import import_modules_from_package
17-
from ..util.path import get_relative_path_if_in_cwd
1814
from ..util.semver import Version
19-
from .log import log
15+
from .scan import get_files_to_scan
2016
from .secrets_collection import SecretsCollection
2117

2218

2319
def create(*paths: str, should_scan_all_files: bool = False) -> SecretsCollection:
2420
"""Scans all the files recursively in path to initialize a baseline."""
2521
secrets = SecretsCollection()
2622

27-
for path in paths:
28-
files = _get_files_to_scan(path, should_scan_all_files)
29-
for filename in files:
30-
secrets.scan_file(filename)
23+
for filename in get_files_to_scan(*paths, should_scan_all_files=should_scan_all_files):
24+
secrets.scan_file(filename)
3125

3226
return secrets
3327

3428

35-
def _get_files_to_scan(root: str, should_scan_all_files: bool) -> List[str]:
36-
output: List[str] = []
37-
if not should_scan_all_files:
38-
valid_paths = git.get_tracked_files(root)
39-
if not valid_paths:
40-
log.warning('Did not detect git repository. Try scanning all files instead.')
41-
return output
42-
43-
for path_root, _, filenames in os.walk(root):
44-
for filename in filenames:
45-
path = get_relative_path_if_in_cwd(os.path.join(path_root, filename))
46-
if not path:
47-
# e.g. symbolic links may be pointing outside the root directory
48-
continue
49-
50-
if (
51-
not should_scan_all_files
52-
and path not in valid_paths
53-
):
54-
# Not a git-tracked file
55-
continue
56-
57-
output.append(path)
58-
59-
return output
60-
61-
6229
def load(baseline: Dict[str, Any], filename: str) -> SecretsCollection:
6330
"""
6431
With a given baseline file, load all settings and discovered secrets from it.

detect_secrets/core/scan.py

+151-37
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,64 @@
1+
import os
2+
import subprocess
13
from functools import lru_cache
24
from importlib import import_module
35
from typing import Generator
46
from typing import IO
7+
from typing import Iterable
58
from typing import List
69
from typing import Optional
710
from typing import Tuple
811

912
from . import plugins
13+
from ..filters.allowlist import is_line_allowlisted
1014
from ..settings import get_settings
1115
from ..transformers import get_transformers
1216
from ..transformers import ParsingError
1317
from ..types import SelfAwareCallable
18+
from ..util import git
1419
from ..util.code_snippet import get_code_snippet
1520
from ..util.inject import get_injectable_variables
1621
from ..util.inject import inject_variables_into_function
22+
from ..util.path import get_relative_path_if_in_cwd
1723
from .log import log
1824
from .plugins import Plugin
1925
from .potential_secret import PotentialSecret
2026

2127

28+
def get_files_to_scan(*paths: str, should_scan_all_files: bool) -> Generator[str, None, None]:
29+
if not should_scan_all_files:
30+
try:
31+
valid_paths = git.get_tracked_files(git.get_root_directory())
32+
except subprocess.CalledProcessError:
33+
log.warning('Did not detect git repository. Try scanning all files instead.')
34+
return []
35+
36+
for path in paths:
37+
iterator = [(os.getcwd(), None, [path])] if os.path.isfile(path) else os.walk(path)
38+
for path_root, _, filenames in iterator:
39+
for filename in filenames:
40+
path = get_relative_path_if_in_cwd(os.path.join(path_root, filename))
41+
if not path:
42+
# e.g. symbolic links may be pointing outside the root directory
43+
continue
44+
45+
if (
46+
not should_scan_all_files
47+
and path not in valid_paths
48+
):
49+
# Not a git-tracked file
50+
continue
51+
52+
yield path
53+
54+
2255
def scan_line(line: str) -> Generator[PotentialSecret, None, None]:
2356
"""Used for adhoc string scanning."""
2457
# Disable this, since it doesn't make sense to run this for adhoc usage.
2558
get_settings().disable_filters(
2659
'detect_secrets.filters.common.is_invalid_file',
2760
)
61+
get_filters.cache_clear()
2862

2963
for plugin in get_plugins():
3064
for secret in _scan_line(
@@ -60,69 +94,149 @@ def scan_file(filename: str) -> Generator[PotentialSecret, None, None]:
6094
return
6195

6296
try:
63-
with open(filename) as f:
64-
log.info(f'Checking file: {filename}')
65-
66-
try:
67-
lines = _get_transformed_file(f)
68-
if not lines:
69-
lines = f.readlines()
70-
except UnicodeDecodeError:
71-
# We flat out ignore binary files.
72-
return
73-
74-
has_secret = False
97+
has_secret = False
98+
for lines in _get_lines_from_file(filename):
7599
for secret in _process_line_based_plugins(
76100
lines=list(enumerate(lines, 1)),
77-
filename=f.name,
101+
filename=filename,
78102
):
79103
has_secret = True
80104
yield secret
81105

82106
if has_secret:
83-
return
84-
85-
# Only if no secrets, then use eager transformers
86-
f.seek(0)
87-
lines = _get_transformed_file(f, use_eager_transformers=True)
88-
if not lines:
89-
return
90-
91-
yield from _process_line_based_plugins(
92-
lines=list(enumerate(lines, 1)),
93-
filename=f.name,
94-
)
107+
break
95108
except IOError:
96109
log.warning(f'Unable to open file: {filename}')
110+
return
97111

98112

99113
def scan_diff(diff: str) -> Generator[PotentialSecret, None, None]:
100114
"""
101115
:raises: ImportError
102116
"""
103-
# Local imports, so that we don't need to require unidiff for versions of
104-
# detect-secrets that don't use it.
105-
from unidiff import PatchSet
117+
if not get_plugins(): # pragma: no cover
118+
log.warning('No plugins to scan with!')
119+
return
120+
121+
for filename, lines in _get_lines_from_diff(diff):
122+
yield from _process_line_based_plugins(lines, filename=filename)
106123

124+
125+
def scan_for_allowlisted_secrets_in_file(filename: str) -> Generator[PotentialSecret, None, None]:
126+
"""
127+
Developers are able to add individual lines to the allowlist using
128+
`detect_secrets.filters.allowlist.is_line_allowlisted`. However, there are
129+
times when we want to verify that no *actual* secrets are added to the codebase
130+
via this feature.
131+
132+
This scans specifically for these lines, and ignores everything else.
133+
"""
107134
if not get_plugins(): # pragma: no cover
108135
log.warning('No plugins to scan with!')
109136
return
110137

138+
if _filter_files(filename):
139+
return
140+
141+
# NOTE: Unlike `scan_file`, we don't ever have to use eager file transfomers, since we already
142+
# know which lines we want to scan.
143+
try:
144+
for lines in _get_lines_from_file(filename):
145+
yield from _scan_for_allowlisted_secrets_in_lines(enumerate(lines, 1), filename)
146+
break
147+
except IOError:
148+
log.warning(f'Unable to open file: {filename}')
149+
return
150+
151+
152+
def scan_for_allowlisted_secrets_in_diff(diff: str) -> Generator[PotentialSecret, None, None]:
153+
if not get_plugins(): # pragma: no cover
154+
log.warning('No plugins to scan with!')
155+
return
156+
157+
for filename, lines in _get_lines_from_diff(diff):
158+
yield from _scan_for_allowlisted_secrets_in_lines(lines, filename)
159+
160+
161+
def _scan_for_allowlisted_secrets_in_lines(
162+
lines: Iterable[Tuple[int, str]],
163+
filename: str,
164+
) -> Generator[PotentialSecret, None, None]:
165+
# We control the setting here because it makes more sense than requiring the caller
166+
# to set this setting before calling this function.
167+
get_settings().disable_filters('detect_secrets.filters.allowlist.is_line_allowlisted')
168+
get_filters.cache_clear()
169+
170+
for line_number, line in lines:
171+
line = line.rstrip()
172+
173+
if not is_line_allowlisted(filename, line):
174+
continue
175+
176+
if any([
177+
inject_variables_into_function(filter_fn, filename=filename, line=line)
178+
for filter_fn in get_filters_with_parameter('line')
179+
]):
180+
continue
181+
182+
for plugin in get_plugins():
183+
yield from _scan_line(plugin, filename, line, line_number)
184+
185+
186+
def _get_lines_from_file(filename: str) -> Generator[List[str], None, None]:
187+
"""
188+
This attempts to get lines in a given file. If no more lines are needed, the caller
189+
is responsible for breaking out of this loop.
190+
191+
:raises: IOError
192+
:raises: FileNotFoundError
193+
"""
194+
with open(filename) as f:
195+
log.info(f'Checking file: {filename}')
196+
197+
try:
198+
lines = _get_transformed_file(f)
199+
if not lines:
200+
lines = f.readlines()
201+
except UnicodeDecodeError:
202+
# We flat out ignore binary files
203+
return
204+
205+
yield lines
206+
207+
# If the above lines don't prove to be useful to the caller, try using eager transformers.
208+
f.seek(0)
209+
lines = _get_transformed_file(f, use_eager_transformers=True)
210+
if not lines:
211+
return
212+
213+
yield lines
214+
215+
216+
def _get_lines_from_diff(diff: str) -> Generator[Tuple[str, List[Tuple[int, str]]], None, None]:
217+
"""
218+
:raises: ImportError
219+
"""
220+
# Local imports, so that we don't need to require unidiff for versions of
221+
# detect-secrets that don't use it.
222+
from unidiff import PatchSet
223+
111224
patch_set = PatchSet.from_string(diff)
112225
for patch_file in patch_set:
113226
filename = patch_file.path
114227
if _filter_files(filename):
115228
continue
116229

117-
lines = [
118-
(line.target_line_no, line.value)
119-
for chunk in patch_file
120-
# target_lines refers to incoming (new) changes
121-
for line in chunk.target_lines()
122-
if line.is_added
123-
]
124-
125-
yield from _process_line_based_plugins(lines, filename=filename)
230+
yield (
231+
filename,
232+
[
233+
(line.target_line_no, line.value)
234+
for chunk in patch_file
235+
# target_lines refers to incoming (new) changes
236+
for line in chunk.target_lines()
237+
if line.is_added
238+
],
239+
)
126240

127241

128242
def _filter_files(filename: str) -> bool:

detect_secrets/core/upgrades/v1_0.py

+3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ def _migrate_filters(baseline: Dict[str, Any]) -> None:
2222
contain the default filters used before this version upgrade.
2323
"""
2424
baseline['filters_used'] = [
25+
{
26+
'path': 'detect_secrets.filters.allowlist.is_line_allowlisted',
27+
},
2528
{
2629
'path': 'detect_secrets.filters.heuristic.is_sequential_string',
2730
},

detect_secrets/core/usage/scan.py

+12
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def add_scan_action(parent: argparse._SubParsersAction) -> argparse.ArgumentPars
1717
)
1818

1919
_add_adhoc_scanning(parser)
20+
_add_pragma_scanning(parser)
2021
_add_initialize_baseline_options(parser)
2122

2223
return parser
@@ -33,6 +34,17 @@ def _add_adhoc_scanning(parser: argparse.ArgumentParser) -> None:
3334
)
3435

3536

37+
def _add_pragma_scanning(parser: argparse.ArgumentParser) -> None:
38+
parser.add_argument(
39+
'--only-allowlisted',
40+
action='store_true',
41+
help=(
42+
'Only scans the lines that are flagged with `allowlist secret`. This helps '
43+
'verify that individual exceptions are indeed non-secrets.'
44+
),
45+
)
46+
47+
3648
def _add_initialize_baseline_options(parser: argparse.ArgumentParser) -> None:
3749
parser.add_argument(
3850
'path',

0 commit comments

Comments
 (0)