Yelp
diff --git a/‎detect_secrets/plugins/base.py
+5-5 b/‎detect_secrets/plugins/base.py
+5-5
diff --git a/‎detect_secrets/plugins/core/constants.py
+4 b/‎detect_secrets/plugins/core/constants.py
+4
diff --git a/‎detect_secrets/plugins/core/ini_file_parser.py
+141 b/‎detect_secrets/plugins/core/ini_file_parser.py
+141
diff --git a/‎detect_secrets/plugins/core/yaml_file_parser.py
+133 b/‎detect_secrets/plugins/core/yaml_file_parser.py
+133
@@ -12,7 +12,7 @@ def __init__(self, **kwargs):
         if not self.secret_type:
             raise ValueError('Plugins need to declare a secret_type.')
 
-    def analyze(self, file, filename):  # pragma: no cover
+    def analyze(self, file, filename):
         """
         :param file:     The File object itself.
         :param filename: string; filename of File object, used for creating
@@ -29,7 +29,7 @@ def analyze(self, file, filename):  # pragma: no cover
         return potential_secrets
 
     @abstractmethod
-    def analyze_string(self, string, line_num, filename):   # pragma: no cover
+    def analyze_string(self, string, line_num, filename):
         """
         :param string:    string; the line to analyze
         :param line_num:  integer; line number that is currently being analyzed
@@ -38,17 +38,17 @@ def analyze_string(self, string, line_num, filename):   # pragma: no cover
 
         NOTE: line_num and filename are used for PotentialSecret creation only.
         """
-        pass
+        raise NotImplementedError
 
     @abstractmethod
-    def secret_generator(self, string):  # pragma: no cover
+    def secret_generator(self, string):
         """Flags secrets in a given string, and yields the raw secret value.
         Used in self.analyze_string for PotentialSecret creation.
 
         :type string: str
         :param string: the secret to scan
         """
-        pass
+        raise NotImplementedError
 
     @property
     def __dict__(self):
 
@@ -0,0 +1,4 @@
+import re
+
+# TODO: Update for not just python comments?
+WHITELIST_REGEX = re.compile(r'# ?pragma: ?whitelist[ -]secret')
@@ -0,0 +1,141 @@
+import configparser
+import re
+
+
+class IniFileParser(object):
+
+    def __init__(self, file):
+        self.parser = configparser.ConfigParser()
+        self.parser.optionxform = str
+        self.parser.read_file(file)
+
+        # Hacky way to keep track of line location
+        file.seek(0)
+        self.lines = list(map(lambda x: x.strip(), file.readlines()))
+        self.line_offset = 0
+
+    def iterator(self):
+        if not self.parser.sections():
+            # To prevent cases where it's not an ini file, but the parser
+            # helpfully attempts to parse everything to a DEFAULT section,
+            # when not explicitly provided.
+            raise configparser.Error
+
+        for section_name, _ in self.parser.items():
+            for key, values in self.parser.items(section_name):
+                for value, offset in self._get_value_and_line_offset(
+                    key,
+                    values,
+                ):
+                    yield value, offset
+
+    def _get_value_and_line_offset(self, key, values):
+        """Returns the index of the location of key, value pair in lines.
+
+        :type key: str
+        :param key: key, in config file.
+
+        :type values: str
+        :param values: values for key, in config file. This is plural,
+            because you can have multiple values per key. Eg.
+
+            >>> key =
+            ...     value1
+            ...     value2
+
+        :type lines: list
+        :param lines: a collection of lines-so-far in file
+
+        :rtype: list(tuple)
+        """
+        values_list = self._construct_values_list(values)
+        if not values_list:
+            return []
+
+        current_value_list_index = 0
+        output = []
+        lines_modified = False
+
+        first_line_regex = re.compile(r'^\s*{}[ :=]+{}'.format(
+            re.escape(key),
+            re.escape(values_list[current_value_list_index]),
+        ))
+        comment_regex = re.compile(r'\s*[;#]')
+        for index, line in enumerate(self.lines):
+            if current_value_list_index == 0:
+                if first_line_regex.match(line):
+                    output.append((
+                        values_list[current_value_list_index],
+                        self.line_offset + index + 1,
+                    ))
+
+                    current_value_list_index += 1
+
+                continue
+
+            # Check ignored lines before checking values, because
+            # you can write comments *after* the value.
+
+            # Ignore blank lines
+            if not line.strip():
+                continue
+
+            # Ignore comments
+            if comment_regex.match(line):
+                continue
+
+            if current_value_list_index == len(values_list):
+                if index == 0:
+                    index = 1       # don't want to count the same line again
+
+                self.line_offset += index
+                self.lines = self.lines[index:]
+                lines_modified = True
+
+                break
+            else:
+                output.append((
+                    values_list[current_value_list_index],
+                    self.line_offset + index + 1,
+                ))
+
+                current_value_list_index += 1
+
+        if not lines_modified:
+            # No more lines left, if loop was not explicitly left.
+            self.lines = []
+
+        return output
+
+    @staticmethod
+    def _construct_values_list(values):
+        """
+        This values_list is a strange construction, because of ini format.
+        We need to extract the values with the following supported format:
+
+            >>> key = value0
+            ...     value1
+            ...
+            ...     # comment line here
+            ...     value2
+
+        given that normally, either value0 is supplied, or (value1, value2),
+        but still allowing for all three at once.
+
+        Furthermore, with the configparser, we will get a list of values,
+        and intermediate blank lines, but no comments. This means that we can't
+        merely use the count of values' items to heuristically "skip ahead" lines,
+        because we still have to manually parse through this.
+
+        Therefore, we construct the values_list in the following fashion:
+            1. Keep the first value (in the example, this is `value0`)
+            2. For all other values, ignore blank lines.
+        Then, we can parse through, and look for values only.
+        """
+        values_list = values.splitlines()
+        return values_list[:1] + list(
+            filter(
+                lambda x: x,
+                values_list[1:],
+            ),
+        )
@@ -0,0 +1,133 @@
+import yaml
+
+from detect_secrets.plugins.core.constants import WHITELIST_REGEX
+
+
+class YamlFileParser(object):
+    """
+    Yaml config files are interesting, because they don't necessarily conform
+    to our basic regex for detecting HighEntropyStrings as strings don't
+    need to be quoted.
+
+    This causes interesting issues, because our regex won't catch non-quoted
+    strings, and if we ignore the quoting requirement, then we increase our
+    false positive rate, because any long string would have high entropy.
+
+    Therefore, we take a different approach: intercept the parsing of the yaml
+    file to identify string values. This assumes:
+
+        1. Secrets are strings
+        2. Secrets are not keys
+
+    Then, we calculate the entropy of those string values.
+
+    The difficulty comes from determining the line number which these values
+    come from. To do this, we transform the string into a dictionary of
+    meta-tags, in the following format:
+
+    >>> {
+        'key': {
+            '__value__': value,
+            '__line__': <line_number>,
+        }
+    }
+
+    This way, we can quickly identify the line number for auditing at a later
+    stage.
+
+    This parsing method is inspired by https://stackoverflow.com/a/13319530.
+    """
+
+    def __init__(self, file):
+        self.content = file.read()
+        self.loader = yaml.SafeLoader(self.content)
+
+        self.loader.compose_node = self._compose_node_shim
+
+    def json(self):
+        return self.loader.get_single_data()
+
+    def _compose_node_shim(self, parent, index):
+        line = self.loader.line
+
+        node = yaml.composer.Composer.compose_node(self.loader, parent, index)
+        node.__line__ = line + 1
+
+        if node.tag.endswith(':map'):
+            return self._tag_dict_values(node)
+
+        # TODO: Not sure if need to do :seq
+
+        return node
+
+    def _tag_dict_values(self, map_node):
+        """
+        :type map_node: yaml.nodes.MappingNode
+        :param map_node: It looks like map_node.value contains a list of
+            pair tuples, corresponding to key,value pairs.
+        """
+        new_values = []
+        for key, value in map_node.value:
+            if not value.tag.endswith(':str'):
+                new_values.append((key, value,))
+                continue
+
+            augmented_string = yaml.nodes.MappingNode(
+                tag=map_node.tag,
+                value=[
+                    self._create_key_value_pair_for_mapping_node_value(
+                        '__value__',
+                        value.value,
+                        'tag:yaml.org,2002:str',
+                    ),
+                    self._create_key_value_pair_for_mapping_node_value(
+                        '__line__',
+                        str(value.__line__),
+                        'tag:yaml.org,2002:int',
+                    ),
+                ],
+            )
+
+            new_values.append((key, augmented_string,))
+
+        output = yaml.nodes.MappingNode(
+            tag=map_node.tag,
+            value=new_values,
+            start_mark=map_node.start_mark,
+            end_mark=map_node.end_mark,
+            flow_style=map_node.flow_style,
+        )
+        return output
+
+    @staticmethod
+    def _create_key_value_pair_for_mapping_node_value(key, value, tag):
+        return (
+            yaml.nodes.ScalarNode(
+                tag='tag:yaml.org,2002:str',
+                value=key,
+            ),
+            yaml.nodes.ScalarNode(
+                tag=tag,
+                value=value,
+            ),
+        )
+
+    def get_ignored_lines(self):
+        """
+        Return a set of integers that refer to line numbers that were
+        whitelisted by the user and should be ignored.
+
+        We need to parse the file separately from PyYAML parsing because
+        the parser drops the comments (at least up to version 3.13):
+        https://github.com/yaml/pyyaml/blob/a2d481b8dbd2b352cb001f07091ccf669227290f/lib3/yaml/scanner.py#L749
+
+        :return: set
+        """
+
+        ignored_lines = set()
+
+        for line_number, line in enumerate(self.content.split('\n'), 1):
+            if WHITELIST_REGEX.search(line):
+                ignored_lines.add(line_number)
+
+        return ignored_lines