Skip to content

Commit 81917bc

Browse files
authored
Allow parenthesized implicitly concatenated strings inside calls (#8590)
Promotes compatibility with Black. See psf/black#2188 (comment)
1 parent 92485a3 commit 81917bc

File tree

6 files changed

+126
-7
lines changed

6 files changed

+126
-7
lines changed

doc/data/messages/i/implicit-str-concat/details.rst

+13
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,16 @@ for string parameters passed on multiple lines in function calls:
2424
DeprecationWarning,
2525
stacklevel=3,
2626
)
27+
28+
No message will be emitted, though, if you clarify the wanted concatenation with parentheses:
29+
30+
.. code-block:: python
31+
32+
warnings.warn(
33+
(
34+
"rotate() is deprecated and will be removed in a future release. "
35+
"Use the rotation() context manager instead."
36+
),
37+
DeprecationWarning,
38+
stacklevel=3,
39+
)

doc/data/messages/i/implicit-str-concat/related.rst

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Allow parenthesized implicitly concatenated strings when `check-str-concat-over-line-jumps` is enabled.
2+
3+
Closes #8552.

pylint/checkers/strings.py

+70-4
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@
5555
"Rb",
5656
"RB",
5757
}
58+
_PAREN_IGNORE_TOKEN_TYPES = (
59+
tokenize.NEWLINE,
60+
tokenize.NL,
61+
tokenize.COMMENT,
62+
)
5863
SINGLE_QUOTED_REGEX = re.compile(f"({'|'.join(_PREFIXES)})?'''")
5964
DOUBLE_QUOTED_REGEX = re.compile(f"({'|'.join(_PREFIXES)})?\"\"\"")
6065
QUOTE_DELIMITER_REGEX = re.compile(f"({'|'.join(_PREFIXES)})?(\"|')", re.DOTALL)
@@ -710,6 +715,7 @@ def __init__(self, linter: PyLinter) -> None:
710715
tuple[int, int], tuple[str, tokenize.TokenInfo | None]
711716
] = {}
712717
"""Token position -> (token value, next token)."""
718+
self._parenthesized_string_tokens: dict[tuple[int, int], bool] = {}
713719

714720
def process_module(self, node: nodes.Module) -> None:
715721
self._unicode_literals = "unicode_literals" in node.future_imports
@@ -738,10 +744,62 @@ def process_tokens(self, tokens: list[tokenize.TokenInfo]) -> None:
738744
# to match with astroid `.col_offset`
739745
start = (start[0], len(line[: start[1]].encode(encoding)))
740746
self.string_tokens[start] = (str_eval(token), next_token)
747+
is_parenthesized = self._is_initial_string_token(
748+
i, tokens
749+
) and self._is_parenthesized(i, tokens)
750+
self._parenthesized_string_tokens[start] = is_parenthesized
741751

742752
if self.linter.config.check_quote_consistency:
743753
self.check_for_consistent_string_delimiters(tokens)
744754

755+
def _is_initial_string_token(
756+
self, index: int, tokens: Sequence[tokenize.TokenInfo]
757+
) -> bool:
758+
# Must NOT be preceded by a string literal
759+
prev_token = self._find_prev_token(index, tokens)
760+
if prev_token and prev_token.type == tokenize.STRING:
761+
return False
762+
# Must be followed by a string literal token.
763+
next_token = self._find_next_token(index, tokens)
764+
return bool(next_token and next_token.type == tokenize.STRING)
765+
766+
def _is_parenthesized(self, index: int, tokens: list[tokenize.TokenInfo]) -> bool:
767+
prev_token = self._find_prev_token(
768+
index, tokens, ignore=(*_PAREN_IGNORE_TOKEN_TYPES, tokenize.STRING)
769+
)
770+
if not prev_token or prev_token.type != tokenize.OP or prev_token[1] != "(":
771+
return False
772+
next_token = self._find_next_token(
773+
index, tokens, ignore=(*_PAREN_IGNORE_TOKEN_TYPES, tokenize.STRING)
774+
)
775+
return bool(
776+
next_token and next_token.type == tokenize.OP and next_token[1] == ")"
777+
)
778+
779+
def _find_prev_token(
780+
self,
781+
index: int,
782+
tokens: Sequence[tokenize.TokenInfo],
783+
*,
784+
ignore: tuple[int, ...] = _PAREN_IGNORE_TOKEN_TYPES,
785+
) -> tokenize.TokenInfo | None:
786+
i = index - 1
787+
while i >= 0 and tokens[i].type in ignore:
788+
i -= 1
789+
return tokens[i] if i >= 0 else None
790+
791+
def _find_next_token(
792+
self,
793+
index: int,
794+
tokens: Sequence[tokenize.TokenInfo],
795+
*,
796+
ignore: tuple[int, ...] = _PAREN_IGNORE_TOKEN_TYPES,
797+
) -> tokenize.TokenInfo | None:
798+
i = index + 1
799+
while i < len(tokens) and tokens[i].type in ignore:
800+
i += 1
801+
return tokens[i] if i < len(tokens) else None
802+
745803
@only_required_for_messages("implicit-str-concat")
746804
def visit_call(self, node: nodes.Call) -> None:
747805
self.check_for_concatenated_strings(node.args, "call")
@@ -815,10 +873,18 @@ def check_for_concatenated_strings(
815873
matching_token, next_token = self.string_tokens[token_index]
816874
# We detect string concatenation: the AST Const is the
817875
# combination of 2 string tokens
818-
if matching_token != elt.value and next_token is not None:
819-
if next_token.type == tokenize.STRING and (
820-
next_token.start[0] == elt.lineno
821-
or self.linter.config.check_str_concat_over_line_jumps
876+
if (
877+
matching_token != elt.value
878+
and next_token is not None
879+
and next_token.type == tokenize.STRING
880+
):
881+
if next_token.start[0] == elt.lineno or (
882+
self.linter.config.check_str_concat_over_line_jumps
883+
# Allow implicitly concatenated strings in parens.
884+
# See https://github.com/pylint-dev/pylint/issues/8552.
885+
and not self._parenthesized_string_tokens.get(
886+
(elt.lineno, elt.col_offset)
887+
)
822888
):
823889
self.add_message(
824890
"implicit-str-concat",

tests/functional/i/implicit/implicit_str_concat_multiline.py

+38-1
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,46 @@
33
TEST_TUPLE = ('a', 'b' # [implicit-str-concat]
44
'c')
55

6+
# See https://github.com/pylint-dev/pylint/issues/8552.
7+
PARENTHESIZED_IS_OK = [
8+
"a",
9+
(
10+
"Lorem ipsum dolor sit amet, consectetur adipiscing elit,"
11+
" sed do eiusmod tempor incididunt ut labore et dolore "
12+
),
13+
]
14+
15+
# Single argument without trailing comma is OK:
616
print(
7-
"Lorem ipsum dolor sit amet, consectetur adipiscing elit," # [implicit-str-concat]
17+
"Lorem ipsum dolor sit amet, consectetur adipiscing elit,"
818
" sed do eiusmod tempor incididunt ut labore et dolore "
919
"magna aliqua. Ut enim ad minim veniam, quis nostrud "
1020
"exercitation ullamco laboris nisi ut aliquip ex ea "
1121
)
22+
23+
# Implicit concatenated strings on the same line always raises:
24+
print(
25+
"Lorem ipsum dolor sit amet, ""consectetur adipiscing elit," # [implicit-str-concat]
26+
"Lorem ipsum dolor sit amet, consectetur adipiscing elit,"
27+
" sed do eiusmod tempor incididunt ut labore et dolore "
28+
"magna aliqua. Ut enim ad minim veniam, quis nostrud "
29+
"exercitation ullamco laboris nisi ut aliquip ex ea "
30+
)
31+
32+
# Explicitly wrapping in parens with a trailing comma is OK:
33+
print(
34+
(
35+
"Lorem ipsum dolor sit amet, consectetur adipiscing elit,"
36+
" sed do eiusmod tempor incididunt ut labore et dolore "
37+
"magna aliqua. Ut enim ad minim veniam, quis nostrud "
38+
"exercitation ullamco laboris nisi ut aliquip ex ea "
39+
),
40+
)
41+
42+
# But NOT OK when there is a trailing comma and NOT wrapped in parens:
43+
print(
44+
"Lorem ipsum dolor sit amet, consectetur adipiscing elit," # [implicit-str-concat]
45+
" sed do eiusmod tempor incididunt ut labore et dolore "
46+
"magna aliqua. Ut enim ad minim veniam, quis nostrud "
47+
"exercitation ullamco laboris nisi ut aliquip ex ea ",
48+
)
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
implicit-str-concat:3:0:None:None::Implicit string concatenation found in tuple:HIGH
2-
implicit-str-concat:7:0:None:None::Implicit string concatenation found in call:HIGH
2+
implicit-str-concat:25:0:None:None::Implicit string concatenation found in call:HIGH
3+
implicit-str-concat:44:0:None:None::Implicit string concatenation found in call:HIGH

0 commit comments

Comments
 (0)