Skip to content

Commit 3364c14

Browse files
authored
Merge pull request #90 from Yelp/update_baseline_version
Update merge results to only propagate is_secret of new secrets
2 parents 80ae87d + 0a840b2 commit 3364c14

File tree

4 files changed

+20
-46
lines changed

4 files changed

+20
-46
lines changed

detect_secrets/core/audit.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def _get_user_decision(prompt_secret_decision=True, can_step_back=False):
177177
print('Invalid input.')
178178

179179
if 'y' in allowable_user_input:
180-
user_input_string = 'Is this a valid secret? (y)es, (n)o, '
180+
user_input_string = 'Is this a valid secret? i.e. not a false-positive (y)es, (n)o, '
181181
else:
182182
user_input_string = 'What would you like to do? '
183183
if 'b' in allowable_user_input:

detect_secrets/core/baseline.py

+17-40
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ def merge_baseline(old_baseline, new_baseline):
157157
to the new baseline, and will only work with baselines created
158158
after v0.9.
159159
160+
Note: that the exclude regex is handled separately.
161+
160162
:type old_baseline: dict
161163
:param old_baseline: baseline dict, loaded from previous baseline
162164
@@ -174,14 +176,6 @@ def merge_baseline(old_baseline, new_baseline):
174176
def merge_results(old_results, new_results):
175177
"""Update results in baseline with latest information.
176178
177-
As a rule of thumb, we want to favor the new results, yet at the same
178-
time, transfer non-modified data from the old results set.
179-
180-
Assumptions:
181-
* The list of results in each secret set is in the same order.
182-
This means that new_results cannot have *more* results than
183-
old_results.
184-
185179
:type old_results: dict
186180
:param old_results: results of status quo
187181
@@ -190,42 +184,25 @@ def merge_results(old_results, new_results):
190184
191185
:rtype: dict
192186
"""
193-
for filename, secrets in old_results.items():
187+
for filename, old_secrets in old_results.items():
194188
if filename not in new_results:
195-
new_results[filename] = secrets
196189
continue
197190

198-
if len(secrets) == len(new_results[filename]):
199-
# Assuming that secrets remain in order.
200-
for index, secrets_tuple in enumerate(zip(secrets, new_results[filename])):
201-
old_secret, new_secret = secrets_tuple
202-
if old_secret['hashed_secret'] != new_secret['hashed_secret']:
203-
# We don't join the two secret sets, because if the later
204-
# result set did not discover an old secret, it's probably
205-
# moved.
206-
# If it did discover it, then lengths would be different.
207-
continue
208-
209-
if 'is_secret' in old_secret and 'is_secret' not in new_secret:
210-
# If the new_secret has a label, then go with the later
211-
# version.
212-
new_results[filename][index] = old_secret
191+
old_secrets_mapping = dict()
192+
for old_secret in old_secrets:
193+
old_secrets_mapping[old_secret['hashed_secret']] = old_secret
213194

214-
continue
195+
for new_secret in new_results[filename]:
196+
if new_secret['hashed_secret'] not in old_secrets_mapping:
197+
# We don't join the two secret sets, because if the newer
198+
# result set did not discover an old secret, it probably
199+
# moved.
200+
continue
215201

216-
# Need to figure out starting point. That is, while
217-
# len(new_results) < len(old_results), they may not start at the same
218-
# place.
219-
#
220-
# e.g. old_results = A,B,C,D
221-
# new_results = B,C
222-
first_secret_hash = new_results[filename][0]['hashed_secret']
223-
for index, secret in enumerate(secrets):
224-
if secret['hashed_secret'] == first_secret_hash:
225-
new_results[filename] = secrets[:index] + \
226-
new_results[filename] + \
227-
secrets[index + len(new_results[filename]):]
228-
break
202+
old_secret = old_secrets_mapping[new_secret['hashed_secret']]
203+
# Only propogate 'is_secret' if it's not already there
204+
if 'is_secret' in old_secret and 'is_secret' not in new_secret:
205+
new_secret['is_secret'] = old_secret['is_secret']
229206

230207
return new_results
231208

@@ -246,7 +223,7 @@ def format_baseline_for_output(baseline):
246223
indent=2,
247224
sort_keys=True,
248225
separators=(',', ': '),
249-
)
226+
) + '\n'
250227

251228

252229
def _get_git_tracked_files(rootdir='.'):

tests/core/audit_test.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ def test_get_user_decision_invalid_input(self, mock_printer):
500500
[
501501
(
502502
True,
503-
'Is this a valid secret? (y)es, (n)o, (s)kip, (q)uit: ',
503+
'Is this a valid secret? i.e. not a false-positive (y)es, (n)o, (s)kip, (q)uit: ',
504504
),
505505
(
506506
False,

tests/core/baseline_test.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ def test_new_results_has_nothing(self):
422422
],
423423
}
424424

425-
assert merge_results(old_result, {}) == old_result
425+
assert merge_results(old_result, {}) == {}
426426

427427
def test_old_results_have_subset_of_new_results(self):
428428
secretA = self.get_secret()
@@ -446,7 +446,6 @@ def test_old_results_have_subset_of_new_results(self):
446446
) == {
447447
'filenameA': [
448448
modified_secretA,
449-
secretB,
450449
],
451450
}
452451

@@ -480,10 +479,8 @@ def test_old_results_have_shifted_subset(self):
480479
},
481480
) == {
482481
'filename': [
483-
secretA,
484482
modified_secretB,
485483
modified_secretC,
486-
secretD,
487484
],
488485
}
489486

0 commit comments

Comments
 (0)