↪️ Merge pull request #133 from Yelp/keyword_detector_accuracy_cls_yaml

KevinHock · web-flow · commit 1afb05fb6e9c · 2019-03-20T15:34:05.000-07:00
Improve Keyword detector accuracy for .cls and .yaml files
diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py
@@ -81,7 +81,7 @@ def _add_filenames_argument(self):
         self.parser.add_argument(
             'filenames',
             nargs='*',
-            help='Filenames to check',
+            help='Filenames to check.',
         )
         return self
 
@@ -427,5 +427,5 @@ def _add_keyword_exclude(self):
         self.parser.add_argument(
             '--keyword-exclude',
             type=str,
-            help='Pass in regex to exclude false positives found by keyword detector',
+            help='Pass in regex to exclude false positives found by keyword detector.',
         )
diff --git a/detect_secrets/plugins/common/filetype.py b/detect_secrets/plugins/common/filetype.py
@@ -2,10 +2,12 @@
 
 
 class FileType(Enum):
-    JAVASCRIPT = 0
-    PHP = 1
-    PYTHON = 2
-    OTHER = 3
+    CLS = 0
+    JAVASCRIPT = 1
+    PHP = 2
+    PYTHON = 3
+    YAML = 4
+    OTHER = 5
 
 
 def determine_file_type(filename):
@@ -14,10 +16,18 @@ def determine_file_type(filename):
 
     :rtype: FileType
     """
-    if filename.endswith('.js'):
+    if filename.endswith('.cls'):
+        return FileType.CLS
+    elif filename.endswith('.js'):
         return FileType.JAVASCRIPT
-    elif filename.endswith('.py'):
-        return FileType.PYTHON
     elif filename.endswith('.php'):
         return FileType.PHP
+    elif filename.endswith('.py'):
+        return FileType.PYTHON
+    elif (
+        filename.endswith(
+            ('.yaml', '.yml'),
+        )
+    ):
+        return FileType.YAML
     return FileType.OTHER
diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py
@@ -135,7 +135,7 @@
     FOLLOWED_BY_EQUAL_SIGNS_REGEX: 9,
     FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 5,
 }
-PYTHON_BLACKLIST_REGEX_TO_GROUP = {
+QUOTES_REQUIRED_BLACKLIST_REGEX_TO_GROUP = {
     FOLLOWED_BY_COLON_QUOTES_REQUIRED_REGEX: 7,
     FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_REGEX: 9,
     FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 5,
@@ -149,13 +149,11 @@ class KeywordDetector(BasePlugin):
 
     secret_type = 'Secret Keyword'
 
-    def __init__(
-        self, keyword_exclude=None,
-        exclude_lines_regex=None, **kwargs
-    ):
+    def __init__(self, keyword_exclude=None, exclude_lines_regex=None, **kwargs):
         super(KeywordDetector, self).__init__(
             exclude_lines_regex,
         )
+
         self.keyword_exclude = None
         if keyword_exclude:
             self.keyword_exclude = re.compile(
@@ -187,8 +185,11 @@ def analyze_string_content(self, string, line_num, filename):
     def secret_generator(self, string, filetype):
         lowered_string = string.lower()
 
-        if filetype == FileType.PYTHON:
-            blacklist_regex_to_group = PYTHON_BLACKLIST_REGEX_TO_GROUP
+        if filetype in (
+            FileType.CLS,
+            FileType.PYTHON,
+        ):
+            blacklist_regex_to_group = QUOTES_REQUIRED_BLACKLIST_REGEX_TO_GROUP
         else:
             blacklist_regex_to_group = BLACKLIST_REGEX_TO_GROUP
 
@@ -217,9 +218,13 @@ def probably_false_positive(lowered_secret, filetype):
                 or lowered_secret.startswith('fs.read')
                 or lowered_secret == 'new'
             )
-        ) or (  # If it is a .php file, do not report $variables
+        ) or (
             filetype == FileType.PHP
             and lowered_secret[0] == '$'
+        ) or (
+            filetype == FileType.YAML
+            and lowered_secret.startswith('{{')
+            and lowered_secret.endswith('}}')
         )
     ):
         return True
diff --git a/tests/core/usage_test.py b/tests/core/usage_test.py
@@ -10,7 +10,8 @@ class TestPluginOptions(object):
     @staticmethod
     def parse_args(argument_string=''):
         # PluginOptions are added in pre-commit hook
-        return ParserBuilder().add_pre_commit_arguments()\
+        return ParserBuilder()\
+            .add_pre_commit_arguments()\
             .parse_args(argument_string.split())
 
     def test_added_by_default(self):
@@ -61,9 +62,11 @@ def test_custom_limit(self, argument_string, expected_value):
         if expected_value is not None:
             args = self.parse_args(argument_string)
 
-            assert args.plugins['HexHighEntropyString'][
-                'hex_limit'
-            ] == expected_value
+            assert (
+                args.plugins['HexHighEntropyString']['hex_limit']
+
+                == expected_value
+            )
         else:
             with pytest.raises(SystemExit):
                 self.parse_args(argument_string)
diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py
@@ -34,31 +34,31 @@
 ]
 STANDARD_POSITIVES = {
     # FOLLOWED_BY_COLON_RE
-    "'theapikey': 'h}o)p${e]nob(ody[finds>-_$#thisone'",
-    '"theapikey": "h}o)p${e]nob(ody[finds>-_$#thisone"',
-    'apikey: h}o)p${e]nob(ody[finds>-_$#thisone',
-    'apikey:h}o)p${e]nob(ody[finds>-_$#thisone',
-    'theapikey:h}o)p${e]nob(ody[finds>-_$#thisone',
-    'apikey: "h}o)p${e]nob(ody[finds>-_$#thisone"',
-    "apikey:  'h}o)p${e]nob(ody[finds>-_$#thisone'",
+    "'theapikey': '{{h}o)p${e]nob(ody[finds>-_$#thisone}}'",
+    '"theapikey": "{{h}o)p${e]nob(ody[finds>-_$#thisone}}"',
+    'apikey: {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+    'apikey:{{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+    'theapikey:{{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+    'apikey: "{{h}o)p${e]nob(ody[finds>-_$#thisone}}"',
+    "apikey:  '{{h}o)p${e]nob(ody[finds>-_$#thisone}}'",
     # FOLLOWED_BY_EQUAL_SIGNS_RE
-    'some_dict["secret"] = "h}o)p${e]nob(ody[finds>-_$#thisone"',
-    "some_dict['secret'] = h}o)p${e]nob(ody[finds>-_$#thisone",
-    'my_password=h}o)p${e]nob(ody[finds>-_$#thisone',
-    'my_password= h}o)p${e]nob(ody[finds>-_$#thisone',
-    'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
-    'my_password = h}o)p${e]nob(ody[finds>-_$#thisone',
-    'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
-    'the_password=h}o)p${e]nob(ody[finds>-_$#thisone\n',
-    'the_password= "h}o)p${e]nob(ody[finds>-_$#thisone"\n',
-    'the_password=\'h}o)p${e]nob(ody[finds>-_$#thisone\'\n',
+    'some_dict["secret"] = "{{h}o)p${e]nob(ody[finds>-_$#thisone}}"',
+    "some_dict['secret'] = {{h}o)p${e]nob(ody[finds>-_$#thisone}}",
+    'my_password={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+    'my_password= {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+    'my_password ={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+    'my_password = {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+    'my_password ={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+    'the_password={{h}o)p${e]nob(ody[finds>-_$#thisone}}\n',
+    'the_password= "{{h}o)p${e]nob(ody[finds>-_$#thisone}}"\n',
+    'the_password=\'{{h}o)p${e]nob(ody[finds>-_$#thisone}}\'\n',
     # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE
-    'apikey "h}o)p${e]nob(ody[finds>-_$#thisone";',  # Double-quotes
-    'fooapikeyfoo "h}o)p${e]nob(ody[finds>-_$#thisone";',  # Double-quotes
-    'fooapikeyfoo"h}o)p${e]nob(ody[finds>-_$#thisone";',  # Double-quotes
-    'private_key \'h}o)p${e]nob(ody[finds>-_$#thisone\';',  # Single-quotes
-    'fooprivate_keyfoo\'h}o)p${e]nob(ody[finds>-_$#thisone\';',  # Single-quotes
-    'fooprivate_key\'h}o)p${e]nob(ody[finds>-_$#thisone\';',  # Single-quotes
+    'apikey "{{h}o)p${e]nob(ody[finds>-_$#thisone}}";',  # Double-quotes
+    'fooapikeyfoo "{{h}o)p${e]nob(ody[finds>-_$#thisone}}";',  # Double-quotes
+    'fooapikeyfoo"{{h}o)p${e]nob(ody[finds>-_$#thisone}}";',  # Double-quotes
+    'private_key \'{{h}o)p${e]nob(ody[finds>-_$#thisone}}\';',  # Single-quotes
+    'fooprivate_keyfoo\'{{h}o)p${e]nob(ody[finds>-_$#thisone}}\';',  # Single-quotes
+    'fooprivate_key\'{{h}o)p${e]nob(ody[finds>-_$#thisone}}\';',  # Single-quotes
 }
 
 
@@ -77,100 +77,152 @@ def test_analyze_standard_positives(self, file_content):
         for potential_secret in output:
             assert 'mock_filename' == potential_secret.filename
             assert (
-                potential_secret.secret_hash
-                == PotentialSecret.hash_secret('h}o)p${e]nob(ody[finds>-_$#thisone')
+                potential_secret.secret_hash ==
+                PotentialSecret.hash_secret('{{h}o)p${e]nob(ody[finds>-_$#thisone}}')
             )
 
     @pytest.mark.parametrize(
         'file_content',
-        STANDARD_POSITIVES - {
-            # FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
-            'apikey: h}o)p${e]nob(ody[finds>-_$#thisone',
-            'apikey:h}o)p${e]nob(ody[finds>-_$#thisone',
-            'theapikey:h}o)p${e]nob(ody[finds>-_$#thisone',
-            # FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
-            "some_dict['secret'] = h}o)p${e]nob(ody[finds>-_$#thisone",
-            'my_password=h}o)p${e]nob(ody[finds>-_$#thisone',
-            'my_password= h}o)p${e]nob(ody[finds>-_$#thisone',
-            'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
-            'my_password = h}o)p${e]nob(ody[finds>-_$#thisone',
-            'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
-            'the_password=h}o)p${e]nob(ody[finds>-_$#thisone\n',
-        },
+        STANDARD_POSITIVES,
+    )
+    def test_analyze_with_line_exclude(self, file_content):
+        logic = KeywordDetector(keyword_exclude='thisone')
+
+        f = mock_file_object(file_content)
+        output = logic.analyze(f, 'mock_filename.foo')
+        assert len(output) == 0
+
+    @pytest.mark.parametrize(
+        'file_content, file_extension',
+        (
+            (positive, file_extension)
+            for positive in (
+                STANDARD_POSITIVES - {
+                    # FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
+                    'apikey: {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+                    'apikey:{{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+                    'theapikey:{{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+                    # FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
+                    "some_dict['secret'] = {{h}o)p${e]nob(ody[finds>-_$#thisone}}",
+                    'my_password={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+                    'my_password= {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+                    'my_password ={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+                    'my_password = {{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+                    'my_password ={{h}o)p${e]nob(ody[finds>-_$#thisone}}',
+                    'the_password={{h}o)p${e]nob(ody[finds>-_$#thisone}}\n',
+                }
+            ) for file_extension in (
+                '.cls',
+                '.py',
+            )
+        ),
     )
-    def test_analyze_python_positives(self, file_content):
+    def test_analyze_quotes_required_positives(self, file_content, file_extension):
         logic = KeywordDetector()
 
         f = mock_file_object(file_content)
-        output = logic.analyze(f, 'mock_filename.py')
+        mock_filename = 'mock_filename{}'.format(file_extension)
+        output = logic.analyze(f, mock_filename)
         assert len(output) == 1
         for potential_secret in output:
-            assert 'mock_filename.py' == potential_secret.filename
+            assert mock_filename == potential_secret.filename
             assert (
-                potential_secret.secret_hash
-                == PotentialSecret.hash_secret('h}o)p${e]nob(ody[finds>-_$#thisone')
+                potential_secret.secret_hash ==
+                PotentialSecret.hash_secret('{{h}o)p${e]nob(ody[finds>-_$#thisone}}')
             )
 
     @pytest.mark.parametrize(
-        'negative',
+        'file_content',
         STANDARD_NEGATIVES,
     )
-    def test_analyze_standard_negatives(self, negative):
+    def test_analyze_standard_negatives(self, file_content):
         logic = KeywordDetector()
 
-        f = mock_file_object(negative)
+        f = mock_file_object(file_content)
         output = logic.analyze(f, 'mock_filename.foo')
         assert len(output) == 0
 
     @pytest.mark.parametrize(
-        'js_negative',
+        'file_content',
         STANDARD_NEGATIVES + [
             # FOLLOWED_BY_COLON_RE
             'apiKey: this.apiKey,',
             "apiKey: fs.readFileSync('foo',",
         ],
     )
-    def test_analyze_javascript_negatives(self, js_negative):
+    def test_analyze_javascript_negatives(self, file_content):
         logic = KeywordDetector()
 
-        f = mock_file_object(js_negative)
+        f = mock_file_object(file_content)
         output = logic.analyze(f, 'mock_filename.js')
         assert len(output) == 0
 
     @pytest.mark.parametrize(
-        'secret_starting_with_dollar_sign',
+        'file_content',
         STANDARD_NEGATIVES + [
             # FOLLOWED_BY_EQUAL_SIGNS_RE
             '$password = $input;',
         ],
     )
-    def test_analyze_php_negatives(self, secret_starting_with_dollar_sign):
+    def test_analyze_php_negatives(self, file_content):
         logic = KeywordDetector()
 
-        f = mock_file_object(secret_starting_with_dollar_sign)
+        f = mock_file_object(file_content)
         output = logic.analyze(f, 'mock_filename.php')
         assert len(output) == 0
 
     @pytest.mark.parametrize(
-        'secret_with_no_quote',
-        STANDARD_NEGATIVES + [
-            # FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
-            'apikey: hope]nobody[finds>-_$#thisone',
-            'apikey:hope]nobody[finds>-_$#thisone',
-            'theapikey:hope]nobody[finds>-_$#thisone',
-            # FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
-            "some_dict['secret'] = hope]nobody[finds>-_$#thisone",
-            'my_password=hope]nobody[finds>-_$#thisone',
-            'my_password= hope]nobody[finds>-_$#thisone',
-            'my_password =hope]nobody[finds>-_$#thisone',
-            'my_password = hope]nobody[finds>-_$#thisone',
-            'my_password =hope]nobody[finds>-_$#thisone',
-            'the_password=hope]nobody[finds>-_$#thisone\n',
-        ],
+        'file_content, file_extension',
+        (
+            (negative, file_extension)
+            for negative in (
+                STANDARD_NEGATIVES + [
+                    # FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
+                    'apikey: hope]nobody[finds>-_$#thisone',
+                    'apikey:hope]nobody[finds>-_$#thisone',
+                    'theapikey:hope]nobody[finds>-_$#thisone',
+                    # FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
+                    "some_dict['secret'] = hope]nobody[finds>-_$#thisone",
+                    'my_password=hope]nobody[finds>-_$#thisone',
+                    'my_password= hope]nobody[finds>-_$#thisone',
+                    'my_password =hope]nobody[finds>-_$#thisone',
+                    'my_password = hope]nobody[finds>-_$#thisone',
+                    'my_password =hope]nobody[finds>-_$#thisone',
+                    'the_password=hope]nobody[finds>-_$#thisone\n',
+                ]
+            ) for file_extension in (
+                '.cls',
+                '.py',
+            )
+        ),
     )
-    def test_analyze_python_negatives(self, secret_with_no_quote):
+    def test_analyze_quotes_required_negatives(self, file_content, file_extension):
         logic = KeywordDetector()
 
-        f = mock_file_object(secret_with_no_quote)
-        output = logic.analyze(f, 'mock_filename.py')
+        f = mock_file_object(file_content)
+        output = logic.analyze(
+            f,
+            'mock_filename{}'.format(file_extension),
+        )
+        assert len(output) == 0
+
+    @pytest.mark.parametrize(
+        'file_content, file_extension',
+        (
+            (yaml_negative, file_extension)
+            for yaml_negative in STANDARD_POSITIVES
+            for file_extension in (
+                '.yaml',
+                '.yml',
+            )
+        ),
+    )
+    def test_analyze_yaml_negatives(self, file_content, file_extension):
+        logic = KeywordDetector()
+
+        f = mock_file_object(file_content)
+        output = logic.analyze(
+            f,
+            'mock_filename{}'.format(file_extension),
+        )
         assert len(output) == 0

Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@ def _add_filenames_argument(self):`
`81`	`81`	`self.parser.add_argument(`
`82`	`82`	`'filenames',`
`83`	`83`	`nargs='*',`
`84`		`- help='Filenames to check',`
	`84`	`+ help='Filenames to check.',`
`85`	`85`	`)`
`86`	`86`	`return self`
`87`	`87`
`@@ -427,5 +427,5 @@ def _add_keyword_exclude(self):`
`427`	`427`	`self.parser.add_argument(`
`428`	`428`	`'--keyword-exclude',`
`429`	`429`	`type=str,`
`430`		`- help='Pass in regex to exclude false positives found by keyword detector',`
	`430`	`+ help='Pass in regex to exclude false positives found by keyword detector.',`
`431`	`431`	`)`