Skip to content

[Backport maintenance/2.17.x] Ignore quantifiers when splitting comma-separated regexes (#8898) #8901

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whatsnew/fragments/7229.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
When parsing comma-separated lists of regular expressions in the config, ignore
commas that are inside braces since those indicate quantifiers, not delineation
between expressions.

Closes #7229
2 changes: 1 addition & 1 deletion pylint/config/argument.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def _regex_transformer(value: str) -> Pattern[str]:
def _regexp_csv_transfomer(value: str) -> Sequence[Pattern[str]]:
"""Transforms a comma separated list of regular expressions."""
patterns: list[Pattern[str]] = []
for pattern in _csv_transformer(value):
for pattern in pylint_utils._check_regexp_csv(value):
patterns.append(_regex_transformer(pattern))
return patterns

Expand Down
2 changes: 2 additions & 0 deletions pylint/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
HAS_ISORT_5,
IsortDriver,
_check_csv,
_check_regexp_csv,
_format_option_value,
_splitstrip,
_unquote,
Expand All @@ -34,6 +35,7 @@
"HAS_ISORT_5",
"IsortDriver",
"_check_csv",
"_check_regexp_csv",
"_format_option_value",
"_splitstrip",
"_unquote",
Expand Down
28 changes: 27 additions & 1 deletion pylint/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
import textwrap
import tokenize
import warnings
from collections.abc import Sequence
from collections import deque
from collections.abc import Iterable, Sequence
from io import BufferedReader, BytesIO
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -330,6 +331,31 @@ def _check_csv(value: list[str] | tuple[str] | str) -> Sequence[str]:
return _splitstrip(value)


def _check_regexp_csv(value: list[str] | tuple[str] | str) -> Iterable[str]:
r"""Split a comma-separated list of regexps, taking care to avoid splitting
a regex employing a comma as quantifier, as in `\d{1,2}`."""
if isinstance(value, (list, tuple)):
yield from value
else:
# None is a sentinel value here
regexps: deque[deque[str] | None] = deque([None])
open_braces = False
for char in value:
if char == "{":
open_braces = True
elif char == "}" and open_braces:
open_braces = False

if char == "," and not open_braces:
regexps.append(None)
elif regexps[-1] is None:
regexps.pop()
regexps.append(deque([char]))
else:
regexps[-1].append(char)
yield from ("".join(regexp).strip() for regexp in regexps if regexp is not None)


def _comment(string: str) -> str:
"""Return string as a comment."""
lines = [line.strip() for line in string.splitlines()]
Expand Down
31 changes: 29 additions & 2 deletions tests/config/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from __future__ import annotations

import os
import re
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any

import pytest
from pytest import CaptureFixture
Expand Down Expand Up @@ -113,6 +115,31 @@ def test_unknown_py_version(capsys: CaptureFixture) -> None:
assert "the-newest has an invalid format, should be a version string." in output.err


CSV_REGEX_COMMA_CASES = [
("foo", ["foo"]),
("foo,bar", ["foo", "bar"]),
("foo, bar", ["foo", "bar"]),
("foo, bar{1,3}", ["foo", "bar{1,3}"]),
]


@pytest.mark.parametrize("in_string,expected", CSV_REGEX_COMMA_CASES)
def test_csv_regex_comma_in_quantifier(in_string: str, expected: list[str]) -> None:
"""Check that we correctly parse a comma-separated regex when there are one
or more commas within quantifier expressions.
"""

def _template_run(in_string: str) -> list[re.Pattern[Any]]:
r = Run(
[str(EMPTY_MODULE), rf"--bad-names-rgx={in_string}"],
exit=False,
)
bad_names_rgxs: list[re.Pattern[Any]] = r.linter.config.bad_names_rgxs
return bad_names_rgxs

assert _template_run(in_string) == [re.compile(regex) for regex in expected]


def test_regex_error(capsys: CaptureFixture) -> None:
"""Check that we correctly error when an an option is passed whose value is an invalid regular expression."""
with pytest.raises(SystemExit):
Expand All @@ -135,12 +162,12 @@ def test_csv_regex_error(capsys: CaptureFixture) -> None:
"""
with pytest.raises(SystemExit):
Run(
[str(EMPTY_MODULE), r"--bad-names-rgx=(foo{1,3})"],
[str(EMPTY_MODULE), r"--bad-names-rgx=(foo{1,}, foo{1,3}})"],
exit=False,
)
output = capsys.readouterr()
assert (
r"Error in provided regular expression: (foo{1 beginning at index 0: missing ), unterminated subpattern"
r"Error in provided regular expression: (foo{1,} beginning at index 0: missing ), unterminated subpattern"
in output.err
)

Expand Down