|
55 | 55 | "Rb",
|
56 | 56 | "RB",
|
57 | 57 | }
|
| 58 | +_PAREN_IGNORE_TOKEN_TYPES = ( |
| 59 | + tokenize.NEWLINE, |
| 60 | + tokenize.NL, |
| 61 | + tokenize.COMMENT, |
| 62 | +) |
58 | 63 | SINGLE_QUOTED_REGEX = re.compile(f"({'|'.join(_PREFIXES)})?'''")
|
59 | 64 | DOUBLE_QUOTED_REGEX = re.compile(f"({'|'.join(_PREFIXES)})?\"\"\"")
|
60 | 65 | QUOTE_DELIMITER_REGEX = re.compile(f"({'|'.join(_PREFIXES)})?(\"|')", re.DOTALL)
|
@@ -710,6 +715,7 @@ def __init__(self, linter: PyLinter) -> None:
|
710 | 715 | tuple[int, int], tuple[str, tokenize.TokenInfo | None]
|
711 | 716 | ] = {}
|
712 | 717 | """Token position -> (token value, next token)."""
|
| 718 | + self._parenthesized_string_tokens: dict[tuple[int, int], bool] = {} |
713 | 719 |
|
714 | 720 | def process_module(self, node: nodes.Module) -> None:
|
715 | 721 | self._unicode_literals = "unicode_literals" in node.future_imports
|
@@ -738,10 +744,62 @@ def process_tokens(self, tokens: list[tokenize.TokenInfo]) -> None:
|
738 | 744 | # to match with astroid `.col_offset`
|
739 | 745 | start = (start[0], len(line[: start[1]].encode(encoding)))
|
740 | 746 | self.string_tokens[start] = (str_eval(token), next_token)
|
| 747 | + is_parenthesized = self._is_initial_string_token( |
| 748 | + i, tokens |
| 749 | + ) and self._is_parenthesized(i, tokens) |
| 750 | + self._parenthesized_string_tokens[start] = is_parenthesized |
741 | 751 |
|
742 | 752 | if self.linter.config.check_quote_consistency:
|
743 | 753 | self.check_for_consistent_string_delimiters(tokens)
|
744 | 754 |
|
| 755 | + def _is_initial_string_token( |
| 756 | + self, index: int, tokens: Sequence[tokenize.TokenInfo] |
| 757 | + ) -> bool: |
| 758 | + # Must NOT be preceded by a string literal |
| 759 | + prev_token = self._find_prev_token(index, tokens) |
| 760 | + if prev_token and prev_token.type == tokenize.STRING: |
| 761 | + return False |
| 762 | + # Must be followed by a string literal token. |
| 763 | + next_token = self._find_next_token(index, tokens) |
| 764 | + return bool(next_token and next_token.type == tokenize.STRING) |
| 765 | + |
| 766 | + def _is_parenthesized(self, index: int, tokens: list[tokenize.TokenInfo]) -> bool: |
| 767 | + prev_token = self._find_prev_token( |
| 768 | + index, tokens, ignore=(*_PAREN_IGNORE_TOKEN_TYPES, tokenize.STRING) |
| 769 | + ) |
| 770 | + if not prev_token or prev_token.type != tokenize.OP or prev_token[1] != "(": |
| 771 | + return False |
| 772 | + next_token = self._find_next_token( |
| 773 | + index, tokens, ignore=(*_PAREN_IGNORE_TOKEN_TYPES, tokenize.STRING) |
| 774 | + ) |
| 775 | + return bool( |
| 776 | + next_token and next_token.type == tokenize.OP and next_token[1] == ")" |
| 777 | + ) |
| 778 | + |
| 779 | + def _find_prev_token( |
| 780 | + self, |
| 781 | + index: int, |
| 782 | + tokens: Sequence[tokenize.TokenInfo], |
| 783 | + *, |
| 784 | + ignore: tuple[int, ...] = _PAREN_IGNORE_TOKEN_TYPES, |
| 785 | + ) -> tokenize.TokenInfo | None: |
| 786 | + i = index - 1 |
| 787 | + while i >= 0 and tokens[i].type in ignore: |
| 788 | + i -= 1 |
| 789 | + return tokens[i] if i >= 0 else None |
| 790 | + |
| 791 | + def _find_next_token( |
| 792 | + self, |
| 793 | + index: int, |
| 794 | + tokens: Sequence[tokenize.TokenInfo], |
| 795 | + *, |
| 796 | + ignore: tuple[int, ...] = _PAREN_IGNORE_TOKEN_TYPES, |
| 797 | + ) -> tokenize.TokenInfo | None: |
| 798 | + i = index + 1 |
| 799 | + while i < len(tokens) and tokens[i].type in ignore: |
| 800 | + i += 1 |
| 801 | + return tokens[i] if i < len(tokens) else None |
| 802 | + |
745 | 803 | @only_required_for_messages("implicit-str-concat")
|
746 | 804 | def visit_call(self, node: nodes.Call) -> None:
|
747 | 805 | self.check_for_concatenated_strings(node.args, "call")
|
@@ -815,10 +873,18 @@ def check_for_concatenated_strings(
|
815 | 873 | matching_token, next_token = self.string_tokens[token_index]
|
816 | 874 | # We detect string concatenation: the AST Const is the
|
817 | 875 | # combination of 2 string tokens
|
818 |
| - if matching_token != elt.value and next_token is not None: |
819 |
| - if next_token.type == tokenize.STRING and ( |
820 |
| - next_token.start[0] == elt.lineno |
821 |
| - or self.linter.config.check_str_concat_over_line_jumps |
| 876 | + if ( |
| 877 | + matching_token != elt.value |
| 878 | + and next_token is not None |
| 879 | + and next_token.type == tokenize.STRING |
| 880 | + ): |
| 881 | + if next_token.start[0] == elt.lineno or ( |
| 882 | + self.linter.config.check_str_concat_over_line_jumps |
| 883 | + # Allow implicitly concatenated strings in parens. |
| 884 | + # See https://github.com/pylint-dev/pylint/issues/8552. |
| 885 | + and not self._parenthesized_string_tokens.get( |
| 886 | + (elt.lineno, elt.col_offset) |
| 887 | + ) |
822 | 888 | ):
|
823 | 889 | self.add_message(
|
824 | 890 | "implicit-str-concat",
|
|
0 commit comments