Merge pull request #90 from Yelp/update_baseline_version

KevinHock · web-flow · commit 3364c140d271 · 2018-10-25T16:36:11.000-07:00
Update merge results to only propagate is_secret of new secrets
diff --git a/detect_secrets/core/audit.py b/detect_secrets/core/audit.py
@@ -177,7 +177,7 @@ def _get_user_decision(prompt_secret_decision=True, can_step_back=False):
             print('Invalid input.')
 
         if 'y' in allowable_user_input:
-            user_input_string = 'Is this a valid secret? (y)es, (n)o, '
+            user_input_string = 'Is this a valid secret? i.e. not a false-positive (y)es, (n)o, '
         else:
             user_input_string = 'What would you like to do? '
         if 'b' in allowable_user_input:
diff --git a/detect_secrets/core/baseline.py b/detect_secrets/core/baseline.py
@@ -157,6 +157,8 @@ def merge_baseline(old_baseline, new_baseline):
     to the new baseline, and will only work with baselines created
     after v0.9.
 
+    Note: that the exclude regex is handled separately.
+
     :type old_baseline: dict
     :param old_baseline: baseline dict, loaded from previous baseline
 
@@ -174,14 +176,6 @@ def merge_baseline(old_baseline, new_baseline):
 def merge_results(old_results, new_results):
     """Update results in baseline with latest information.
 
-    As a rule of thumb, we want to favor the new results, yet at the same
-    time, transfer non-modified data from the old results set.
-
-    Assumptions:
-        * The list of results in each secret set is in the same order.
-          This means that new_results cannot have *more* results than
-          old_results.
-
     :type old_results: dict
     :param old_results: results of status quo
 
@@ -190,42 +184,25 @@ def merge_results(old_results, new_results):
 
     :rtype: dict
     """
-    for filename, secrets in old_results.items():
+    for filename, old_secrets in old_results.items():
         if filename not in new_results:
-            new_results[filename] = secrets
             continue
 
-        if len(secrets) == len(new_results[filename]):
-            # Assuming that secrets remain in order.
-            for index, secrets_tuple in enumerate(zip(secrets, new_results[filename])):
-                old_secret, new_secret = secrets_tuple
-                if old_secret['hashed_secret'] != new_secret['hashed_secret']:
-                    # We don't join the two secret sets, because if the later
-                    # result set did not discover an old secret, it's probably
-                    # moved.
-                    # If it did discover it, then lengths would be different.
-                    continue
-
-                if 'is_secret' in old_secret and 'is_secret' not in new_secret:
-                    # If the new_secret has a label, then go with the later
-                    # version.
-                    new_results[filename][index] = old_secret
+        old_secrets_mapping = dict()
+        for old_secret in old_secrets:
+            old_secrets_mapping[old_secret['hashed_secret']] = old_secret
 
-            continue
+        for new_secret in new_results[filename]:
+            if new_secret['hashed_secret'] not in old_secrets_mapping:
+                # We don't join the two secret sets, because if the newer
+                # result set did not discover an old secret, it probably
+                # moved.
+                continue
 
-        # Need to figure out starting point. That is, while
-        # len(new_results) < len(old_results), they may not start at the same
-        # place.
-        #
-        # e.g. old_results = A,B,C,D
-        #      new_results = B,C
-        first_secret_hash = new_results[filename][0]['hashed_secret']
-        for index, secret in enumerate(secrets):
-            if secret['hashed_secret'] == first_secret_hash:
-                new_results[filename] = secrets[:index] + \
-                    new_results[filename] + \
-                    secrets[index + len(new_results[filename]):]
-                break
+            old_secret = old_secrets_mapping[new_secret['hashed_secret']]
+            # Only propogate 'is_secret' if it's not already there
+            if 'is_secret' in old_secret and 'is_secret' not in new_secret:
+                new_secret['is_secret'] = old_secret['is_secret']
 
     return new_results
 
@@ -246,7 +223,7 @@ def format_baseline_for_output(baseline):
         indent=2,
         sort_keys=True,
         separators=(',', ': '),
-    )
+    ) + '\n'
 
 
 def _get_git_tracked_files(rootdir='.'):
diff --git a/tests/core/audit_test.py b/tests/core/audit_test.py
@@ -500,7 +500,7 @@ def test_get_user_decision_invalid_input(self, mock_printer):
         [
             (
                 True,
-                'Is this a valid secret? (y)es, (n)o, (s)kip, (q)uit: ',
+                'Is this a valid secret? i.e. not a false-positive (y)es, (n)o, (s)kip, (q)uit: ',
             ),
             (
                 False,
diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py
@@ -422,7 +422,7 @@ def test_new_results_has_nothing(self):
             ],
         }
 
-        assert merge_results(old_result, {}) == old_result
+        assert merge_results(old_result, {}) == {}
 
     def test_old_results_have_subset_of_new_results(self):
         secretA = self.get_secret()
@@ -446,7 +446,6 @@ def test_old_results_have_subset_of_new_results(self):
         ) == {
             'filenameA': [
                 modified_secretA,
-                secretB,
             ],
         }
 
@@ -480,10 +479,8 @@ def test_old_results_have_shifted_subset(self):
             },
         ) == {
             'filename': [
-                secretA,
                 modified_secretB,
                 modified_secretC,
-                secretD,
             ],
         }
 

Original file line number	Diff line number	Diff line change
`@@ -500,7 +500,7 @@ def test_get_user_decision_invalid_input(self, mock_printer):`
`500`	`500`	`[`
`501`	`501`	`(`
`502`	`502`	`True,`
`503`		`- 'Is this a valid secret? (y)es, (n)o, (s)kip, (q)uit: ',`
	`503`	`+ 'Is this a valid secret? i.e. not a false-positive (y)es, (n)o, (s)kip, (q)uit: ',`
`504`	`504`	`),`
`505`	`505`	`(`
`506`	`506`	`False,`