|
| 1 | +import os |
| 2 | +import subprocess |
1 | 3 | from functools import lru_cache
|
2 | 4 | from importlib import import_module
|
3 | 5 | from typing import Generator
|
4 | 6 | from typing import IO
|
| 7 | +from typing import Iterable |
5 | 8 | from typing import List
|
6 | 9 | from typing import Optional
|
7 | 10 | from typing import Tuple
|
8 | 11 |
|
9 | 12 | from . import plugins
|
| 13 | +from ..filters.allowlist import is_line_allowlisted |
10 | 14 | from ..settings import get_settings
|
11 | 15 | from ..transformers import get_transformers
|
12 | 16 | from ..transformers import ParsingError
|
13 | 17 | from ..types import SelfAwareCallable
|
| 18 | +from ..util import git |
14 | 19 | from ..util.code_snippet import get_code_snippet
|
15 | 20 | from ..util.inject import get_injectable_variables
|
16 | 21 | from ..util.inject import inject_variables_into_function
|
| 22 | +from ..util.path import get_relative_path_if_in_cwd |
17 | 23 | from .log import log
|
18 | 24 | from .plugins import Plugin
|
19 | 25 | from .potential_secret import PotentialSecret
|
20 | 26 |
|
21 | 27 |
|
| 28 | +def get_files_to_scan(*paths: str, should_scan_all_files: bool) -> Generator[str, None, None]: |
| 29 | + if not should_scan_all_files: |
| 30 | + try: |
| 31 | + valid_paths = git.get_tracked_files(git.get_root_directory()) |
| 32 | + except subprocess.CalledProcessError: |
| 33 | + log.warning('Did not detect git repository. Try scanning all files instead.') |
| 34 | + return [] |
| 35 | + |
| 36 | + for path in paths: |
| 37 | + iterator = [(os.getcwd(), None, [path])] if os.path.isfile(path) else os.walk(path) |
| 38 | + for path_root, _, filenames in iterator: |
| 39 | + for filename in filenames: |
| 40 | + path = get_relative_path_if_in_cwd(os.path.join(path_root, filename)) |
| 41 | + if not path: |
| 42 | + # e.g. symbolic links may be pointing outside the root directory |
| 43 | + continue |
| 44 | + |
| 45 | + if ( |
| 46 | + not should_scan_all_files |
| 47 | + and path not in valid_paths |
| 48 | + ): |
| 49 | + # Not a git-tracked file |
| 50 | + continue |
| 51 | + |
| 52 | + yield path |
| 53 | + |
| 54 | + |
22 | 55 | def scan_line(line: str) -> Generator[PotentialSecret, None, None]:
|
23 | 56 | """Used for adhoc string scanning."""
|
24 | 57 | # Disable this, since it doesn't make sense to run this for adhoc usage.
|
25 | 58 | get_settings().disable_filters(
|
26 | 59 | 'detect_secrets.filters.common.is_invalid_file',
|
27 | 60 | )
|
| 61 | + get_filters.cache_clear() |
28 | 62 |
|
29 | 63 | for plugin in get_plugins():
|
30 | 64 | for secret in _scan_line(
|
@@ -60,69 +94,149 @@ def scan_file(filename: str) -> Generator[PotentialSecret, None, None]:
|
60 | 94 | return
|
61 | 95 |
|
62 | 96 | try:
|
63 |
| - with open(filename) as f: |
64 |
| - log.info(f'Checking file: {filename}') |
65 |
| - |
66 |
| - try: |
67 |
| - lines = _get_transformed_file(f) |
68 |
| - if not lines: |
69 |
| - lines = f.readlines() |
70 |
| - except UnicodeDecodeError: |
71 |
| - # We flat out ignore binary files. |
72 |
| - return |
73 |
| - |
74 |
| - has_secret = False |
| 97 | + has_secret = False |
| 98 | + for lines in _get_lines_from_file(filename): |
75 | 99 | for secret in _process_line_based_plugins(
|
76 | 100 | lines=list(enumerate(lines, 1)),
|
77 |
| - filename=f.name, |
| 101 | + filename=filename, |
78 | 102 | ):
|
79 | 103 | has_secret = True
|
80 | 104 | yield secret
|
81 | 105 |
|
82 | 106 | if has_secret:
|
83 |
| - return |
84 |
| - |
85 |
| - # Only if no secrets, then use eager transformers |
86 |
| - f.seek(0) |
87 |
| - lines = _get_transformed_file(f, use_eager_transformers=True) |
88 |
| - if not lines: |
89 |
| - return |
90 |
| - |
91 |
| - yield from _process_line_based_plugins( |
92 |
| - lines=list(enumerate(lines, 1)), |
93 |
| - filename=f.name, |
94 |
| - ) |
| 107 | + break |
95 | 108 | except IOError:
|
96 | 109 | log.warning(f'Unable to open file: {filename}')
|
| 110 | + return |
97 | 111 |
|
98 | 112 |
|
99 | 113 | def scan_diff(diff: str) -> Generator[PotentialSecret, None, None]:
|
100 | 114 | """
|
101 | 115 | :raises: ImportError
|
102 | 116 | """
|
103 |
| - # Local imports, so that we don't need to require unidiff for versions of |
104 |
| - # detect-secrets that don't use it. |
105 |
| - from unidiff import PatchSet |
| 117 | + if not get_plugins(): # pragma: no cover |
| 118 | + log.warning('No plugins to scan with!') |
| 119 | + return |
| 120 | + |
| 121 | + for filename, lines in _get_lines_from_diff(diff): |
| 122 | + yield from _process_line_based_plugins(lines, filename=filename) |
106 | 123 |
|
| 124 | + |
| 125 | +def scan_for_allowlisted_secrets_in_file(filename: str) -> Generator[PotentialSecret, None, None]: |
| 126 | + """ |
| 127 | + Developers are able to add individual lines to the allowlist using |
| 128 | + `detect_secrets.filters.allowlist.is_line_allowlisted`. However, there are |
| 129 | + times when we want to verify that no *actual* secrets are added to the codebase |
| 130 | + via this feature. |
| 131 | +
|
| 132 | + This scans specifically for these lines, and ignores everything else. |
| 133 | + """ |
107 | 134 | if not get_plugins(): # pragma: no cover
|
108 | 135 | log.warning('No plugins to scan with!')
|
109 | 136 | return
|
110 | 137 |
|
| 138 | + if _filter_files(filename): |
| 139 | + return |
| 140 | + |
| 141 | + # NOTE: Unlike `scan_file`, we don't ever have to use eager file transfomers, since we already |
| 142 | + # know which lines we want to scan. |
| 143 | + try: |
| 144 | + for lines in _get_lines_from_file(filename): |
| 145 | + yield from _scan_for_allowlisted_secrets_in_lines(enumerate(lines, 1), filename) |
| 146 | + break |
| 147 | + except IOError: |
| 148 | + log.warning(f'Unable to open file: {filename}') |
| 149 | + return |
| 150 | + |
| 151 | + |
| 152 | +def scan_for_allowlisted_secrets_in_diff(diff: str) -> Generator[PotentialSecret, None, None]: |
| 153 | + if not get_plugins(): # pragma: no cover |
| 154 | + log.warning('No plugins to scan with!') |
| 155 | + return |
| 156 | + |
| 157 | + for filename, lines in _get_lines_from_diff(diff): |
| 158 | + yield from _scan_for_allowlisted_secrets_in_lines(lines, filename) |
| 159 | + |
| 160 | + |
| 161 | +def _scan_for_allowlisted_secrets_in_lines( |
| 162 | + lines: Iterable[Tuple[int, str]], |
| 163 | + filename: str, |
| 164 | +) -> Generator[PotentialSecret, None, None]: |
| 165 | + # We control the setting here because it makes more sense than requiring the caller |
| 166 | + # to set this setting before calling this function. |
| 167 | + get_settings().disable_filters('detect_secrets.filters.allowlist.is_line_allowlisted') |
| 168 | + get_filters.cache_clear() |
| 169 | + |
| 170 | + for line_number, line in lines: |
| 171 | + line = line.rstrip() |
| 172 | + |
| 173 | + if not is_line_allowlisted(filename, line): |
| 174 | + continue |
| 175 | + |
| 176 | + if any([ |
| 177 | + inject_variables_into_function(filter_fn, filename=filename, line=line) |
| 178 | + for filter_fn in get_filters_with_parameter('line') |
| 179 | + ]): |
| 180 | + continue |
| 181 | + |
| 182 | + for plugin in get_plugins(): |
| 183 | + yield from _scan_line(plugin, filename, line, line_number) |
| 184 | + |
| 185 | + |
| 186 | +def _get_lines_from_file(filename: str) -> Generator[List[str], None, None]: |
| 187 | + """ |
| 188 | + This attempts to get lines in a given file. If no more lines are needed, the caller |
| 189 | + is responsible for breaking out of this loop. |
| 190 | +
|
| 191 | + :raises: IOError |
| 192 | + :raises: FileNotFoundError |
| 193 | + """ |
| 194 | + with open(filename) as f: |
| 195 | + log.info(f'Checking file: {filename}') |
| 196 | + |
| 197 | + try: |
| 198 | + lines = _get_transformed_file(f) |
| 199 | + if not lines: |
| 200 | + lines = f.readlines() |
| 201 | + except UnicodeDecodeError: |
| 202 | + # We flat out ignore binary files |
| 203 | + return |
| 204 | + |
| 205 | + yield lines |
| 206 | + |
| 207 | + # If the above lines don't prove to be useful to the caller, try using eager transformers. |
| 208 | + f.seek(0) |
| 209 | + lines = _get_transformed_file(f, use_eager_transformers=True) |
| 210 | + if not lines: |
| 211 | + return |
| 212 | + |
| 213 | + yield lines |
| 214 | + |
| 215 | + |
| 216 | +def _get_lines_from_diff(diff: str) -> Generator[Tuple[str, List[Tuple[int, str]]], None, None]: |
| 217 | + """ |
| 218 | + :raises: ImportError |
| 219 | + """ |
| 220 | + # Local imports, so that we don't need to require unidiff for versions of |
| 221 | + # detect-secrets that don't use it. |
| 222 | + from unidiff import PatchSet |
| 223 | + |
111 | 224 | patch_set = PatchSet.from_string(diff)
|
112 | 225 | for patch_file in patch_set:
|
113 | 226 | filename = patch_file.path
|
114 | 227 | if _filter_files(filename):
|
115 | 228 | continue
|
116 | 229 |
|
117 |
| - lines = [ |
118 |
| - (line.target_line_no, line.value) |
119 |
| - for chunk in patch_file |
120 |
| - # target_lines refers to incoming (new) changes |
121 |
| - for line in chunk.target_lines() |
122 |
| - if line.is_added |
123 |
| - ] |
124 |
| - |
125 |
| - yield from _process_line_based_plugins(lines, filename=filename) |
| 230 | + yield ( |
| 231 | + filename, |
| 232 | + [ |
| 233 | + (line.target_line_no, line.value) |
| 234 | + for chunk in patch_file |
| 235 | + # target_lines refers to incoming (new) changes |
| 236 | + for line in chunk.target_lines() |
| 237 | + if line.is_added |
| 238 | + ], |
| 239 | + ) |
126 | 240 |
|
127 | 241 |
|
128 | 242 | def _filter_files(filename: str) -> bool:
|
|
0 commit comments