Skip to content

Commit 551ede2

Browse files
tusharsadhwanihauntsaninjaJelleZijlstra
authored
Add PEP 701 support (#3822)
Co-authored-by: Shantanu <[email protected]> Co-authored-by: hauntsaninja <[email protected]> Co-authored-by: Jelle Zijlstra <[email protected]>
1 parent 944b99a commit 551ede2

File tree

16 files changed

+941
-102
lines changed

16 files changed

+941
-102
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
<!-- Include any especially major or disruptive changes here -->
88

9+
- Add support for the new Python 3.12 f-string syntax introduced by PEP 701 (#3822)
10+
911
### Stable style
1012

1113
<!-- Changes that affect Black's stable style -->

src/black/__init__.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,7 @@
6969
from black.mode import FUTURE_FLAG_TO_FEATURE, VERSION_TO_FEATURES, Feature
7070
from black.mode import Mode as Mode # re-exported
7171
from black.mode import Preview, TargetVersion, supports_feature
72-
from black.nodes import (
73-
STARS,
74-
is_number_token,
75-
is_simple_decorator_expression,
76-
is_string_token,
77-
syms,
78-
)
72+
from black.nodes import STARS, is_number_token, is_simple_decorator_expression, syms
7973
from black.output import color_diff, diff, dump_to_file, err, ipynb_diff, out
8074
from black.parsing import ( # noqa F401
8175
ASTSafetyError,
@@ -91,7 +85,6 @@
9185
sanitized_lines,
9286
)
9387
from black.report import Changed, NothingChanged, Report
94-
from black.trans import iter_fexpr_spans
9588
from blib2to3.pgen2 import token
9689
from blib2to3.pytree import Leaf, Node
9790

@@ -1265,7 +1258,10 @@ def _format_str_once(
12651258
elt = EmptyLineTracker(mode=mode)
12661259
split_line_features = {
12671260
feature
1268-
for feature in {Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF}
1261+
for feature in {
1262+
Feature.TRAILING_COMMA_IN_CALL,
1263+
Feature.TRAILING_COMMA_IN_DEF,
1264+
}
12691265
if supports_feature(versions, feature)
12701266
}
12711267
block: Optional[LinesBlock] = None
@@ -1337,15 +1333,14 @@ def get_features_used( # noqa: C901
13371333
}
13381334

13391335
for n in node.pre_order():
1340-
if is_string_token(n):
1341-
value_head = n.value[:2]
1342-
if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
1343-
features.add(Feature.F_STRINGS)
1344-
if Feature.DEBUG_F_STRINGS not in features:
1345-
for span_beg, span_end in iter_fexpr_spans(n.value):
1346-
if n.value[span_beg : span_end - 1].rstrip().endswith("="):
1347-
features.add(Feature.DEBUG_F_STRINGS)
1348-
break
1336+
if n.type == token.FSTRING_START:
1337+
features.add(Feature.F_STRINGS)
1338+
elif (
1339+
n.type == token.RBRACE
1340+
and n.parent is not None
1341+
and any(child.type == token.EQUAL for child in n.parent.children)
1342+
):
1343+
features.add(Feature.DEBUG_F_STRINGS)
13491344

13501345
elif is_number_token(n):
13511346
if "_" in n.value:

src/black/linegen.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,45 @@ def visit_NUMBER(self, leaf: Leaf) -> Iterator[Line]:
502502
normalize_numeric_literal(leaf)
503503
yield from self.visit_default(leaf)
504504

505+
def visit_fstring(self, node: Node) -> Iterator[Line]:
506+
# currently we don't want to format and split f-strings at all.
507+
string_leaf = _fstring_to_string(node)
508+
node.replace(string_leaf)
509+
yield from self.visit_STRING(string_leaf)
510+
511+
# TODO: Uncomment Implementation to format f-string children
512+
# fstring_start = node.children[0]
513+
# fstring_end = node.children[-1]
514+
# assert isinstance(fstring_start, Leaf)
515+
# assert isinstance(fstring_end, Leaf)
516+
517+
# quote_char = fstring_end.value[0]
518+
# quote_idx = fstring_start.value.index(quote_char)
519+
# prefix, quote = (
520+
# fstring_start.value[:quote_idx],
521+
# fstring_start.value[quote_idx:]
522+
# )
523+
524+
# if not is_docstring(node, self.mode):
525+
# prefix = normalize_string_prefix(prefix)
526+
527+
# assert quote == fstring_end.value
528+
529+
# is_raw_fstring = "r" in prefix or "R" in prefix
530+
# middles = [
531+
# leaf
532+
# for leaf in node.leaves()
533+
# if leaf.type == token.FSTRING_MIDDLE
534+
# ]
535+
536+
# if self.mode.string_normalization:
537+
# middles, quote = normalize_fstring_quotes(quote, middles, is_raw_fstring)
538+
539+
# fstring_start.value = prefix + quote
540+
# fstring_end.value = quote
541+
542+
# yield from self.visit_default(node)
543+
505544
def __post_init__(self) -> None:
506545
"""You are in a twisty little maze of passages."""
507546
self.current_line = Line(mode=self.mode)
@@ -535,6 +574,12 @@ def __post_init__(self) -> None:
535574
self.visit_guard = partial(v, keywords=Ø, parens={"if"})
536575

537576

577+
def _fstring_to_string(node: Node) -> Leaf:
578+
"""Converts an fstring node back to a string node."""
579+
string_without_prefix = str(node)[len(node.prefix) :]
580+
return Leaf(token.STRING, string_without_prefix, prefix=node.prefix)
581+
582+
538583
def _hugging_power_ops_line_to_string(
539584
line: Line,
540585
features: Collection[Feature],

src/black/lines.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,12 @@ def append(
7272
7373
Inline comments are put aside.
7474
"""
75-
has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
75+
has_value = (
76+
leaf.type in BRACKETS
77+
# empty fstring-middles must not be truncated
78+
or leaf.type == token.FSTRING_MIDDLE
79+
or bool(leaf.value.strip())
80+
)
7681
if not has_value:
7782
return
7883

src/black/mode.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class Feature(Enum):
4646
DEBUG_F_STRINGS = 16
4747
PARENTHESIZED_CONTEXT_MANAGERS = 17
4848
TYPE_PARAMS = 18
49+
FSTRING_PARSING = 19
4950
FORCE_OPTIONAL_PARENTHESES = 50
5051

5152
# __future__ flags
@@ -156,6 +157,7 @@ class Feature(Enum):
156157
Feature.EXCEPT_STAR,
157158
Feature.VARIADIC_GENERICS,
158159
Feature.TYPE_PARAMS,
160+
Feature.FSTRING_PARSING,
159161
},
160162
}
161163

src/black/nodes.py

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,13 @@
145145
OPENING_BRACKETS: Final = set(BRACKET.keys())
146146
CLOSING_BRACKETS: Final = set(BRACKET.values())
147147
BRACKETS: Final = OPENING_BRACKETS | CLOSING_BRACKETS
148-
ALWAYS_NO_SPACE: Final = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
148+
ALWAYS_NO_SPACE: Final = CLOSING_BRACKETS | {
149+
token.COMMA,
150+
STANDALONE_COMMENT,
151+
token.FSTRING_MIDDLE,
152+
token.FSTRING_END,
153+
token.BANG,
154+
}
149155

150156
RARROW = 55
151157

@@ -211,6 +217,9 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool, mode: Mode) -> str: # no
211217
}:
212218
return NO
213219

220+
if t == token.LBRACE and p.type == syms.fstring_replacement_field:
221+
return NO
222+
214223
prev = leaf.prev_sibling
215224
if not prev:
216225
prevp = preceding_leaf(p)
@@ -272,6 +281,9 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool, mode: Mode) -> str: # no
272281
elif prev.type in OPENING_BRACKETS:
273282
return NO
274283

284+
elif prev.type == token.BANG:
285+
return NO
286+
275287
if p.type in {syms.parameters, syms.arglist}:
276288
# untyped function signatures or calls
277289
if not prev or prev.type != token.COMMA:
@@ -393,6 +405,7 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool, mode: Mode) -> str: # no
393405
elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
394406
return NO
395407

408+
# TODO: add fstring here?
396409
elif t in {token.NAME, token.NUMBER, token.STRING}:
397410
return NO
398411

@@ -542,31 +555,32 @@ def is_arith_like(node: LN) -> bool:
542555
}
543556

544557

545-
def is_docstring(leaf: Leaf, mode: Mode) -> bool:
546-
if leaf.type != token.STRING:
547-
return False
558+
def is_docstring(node: NL, mode: Mode) -> bool:
559+
if isinstance(node, Leaf):
560+
if node.type != token.STRING:
561+
return False
548562

549-
prefix = get_string_prefix(leaf.value)
550-
if set(prefix).intersection("bBfF"):
551-
return False
563+
prefix = get_string_prefix(node.value)
564+
if set(prefix).intersection("bBfF"):
565+
return False
552566

553567
if (
554568
Preview.unify_docstring_detection in mode
555-
and leaf.parent
556-
and leaf.parent.type == syms.simple_stmt
557-
and not leaf.parent.prev_sibling
558-
and leaf.parent.parent
559-
and leaf.parent.parent.type == syms.file_input
569+
and node.parent
570+
and node.parent.type == syms.simple_stmt
571+
and not node.parent.prev_sibling
572+
and node.parent.parent
573+
and node.parent.parent.type == syms.file_input
560574
):
561575
return True
562576

563577
if prev_siblings_are(
564-
leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
578+
node.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
565579
):
566580
return True
567581

568582
# Multiline docstring on the same line as the `def`.
569-
if prev_siblings_are(leaf.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
583+
if prev_siblings_are(node.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
570584
# `syms.parameters` is only used in funcdefs and async_funcdefs in the Python
571585
# grammar. We're safe to return True without further checks.
572586
return True
@@ -954,10 +968,6 @@ def is_rpar_token(nl: NL) -> TypeGuard[Leaf]:
954968
return nl.type == token.RPAR
955969

956970

957-
def is_string_token(nl: NL) -> TypeGuard[Leaf]:
958-
return nl.type == token.STRING
959-
960-
961971
def is_number_token(nl: NL) -> TypeGuard[Leaf]:
962972
return nl.type == token.NUMBER
963973

src/black/strings.py

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import re
66
import sys
77
from functools import lru_cache
8-
from typing import Final, List, Match, Pattern
8+
from typing import Final, List, Match, Pattern, Tuple
99

1010
from black._width_table import WIDTH_TABLE
1111
from blib2to3.pytree import Leaf
@@ -169,8 +169,7 @@ def _cached_compile(pattern: str) -> Pattern[str]:
169169
def normalize_string_quotes(s: str) -> str:
170170
"""Prefer double quotes but only if it doesn't cause more escaping.
171171
172-
Adds or removes backslashes as appropriate. Doesn't parse and fix
173-
strings nested in f-strings.
172+
Adds or removes backslashes as appropriate.
174173
"""
175174
value = s.lstrip(STRING_PREFIX_CHARS)
176175
if value[:3] == '"""':
@@ -211,6 +210,7 @@ def normalize_string_quotes(s: str) -> str:
211210
s = f"{prefix}{orig_quote}{body}{orig_quote}"
212211
new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
213212
new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
213+
214214
if "f" in prefix.casefold():
215215
matches = re.findall(
216216
r"""
@@ -240,6 +240,71 @@ def normalize_string_quotes(s: str) -> str:
240240
return f"{prefix}{new_quote}{new_body}{new_quote}"
241241

242242

243+
def normalize_fstring_quotes(
244+
quote: str,
245+
middles: List[Leaf],
246+
is_raw_fstring: bool,
247+
) -> Tuple[List[Leaf], str]:
248+
"""Prefer double quotes but only if it doesn't cause more escaping.
249+
250+
Adds or removes backslashes as appropriate.
251+
"""
252+
if quote == '"""':
253+
return middles, quote
254+
255+
elif quote == "'''":
256+
new_quote = '"""'
257+
elif quote == '"':
258+
new_quote = "'"
259+
else:
260+
new_quote = '"'
261+
262+
unescaped_new_quote = _cached_compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
263+
escaped_new_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
264+
escaped_orig_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){quote}")
265+
if is_raw_fstring:
266+
for middle in middles:
267+
if unescaped_new_quote.search(middle.value):
268+
# There's at least one unescaped new_quote in this raw string
269+
# so converting is impossible
270+
return middles, quote
271+
272+
# Do not introduce or remove backslashes in raw strings, just use double quote
273+
return middles, '"'
274+
275+
new_segments = []
276+
for middle in middles:
277+
segment = middle.value
278+
# remove unnecessary escapes
279+
new_segment = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", segment)
280+
if segment != new_segment:
281+
# Consider the string without unnecessary escapes as the original
282+
middle.value = new_segment
283+
284+
new_segment = sub_twice(escaped_orig_quote, rf"\1\2{quote}", new_segment)
285+
new_segment = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_segment)
286+
new_segments.append(new_segment)
287+
288+
if new_quote == '"""' and new_segments[-1].endswith('"'):
289+
# edge case:
290+
new_segments[-1] = new_segments[-1][:-1] + '\\"'
291+
292+
for middle, new_segment in zip(middles, new_segments):
293+
orig_escape_count = middle.value.count("\\")
294+
new_escape_count = new_segment.count("\\")
295+
296+
if new_escape_count > orig_escape_count:
297+
return middles, quote # Do not introduce more escaping
298+
299+
if new_escape_count == orig_escape_count and quote == '"':
300+
return middles, quote # Prefer double quotes
301+
302+
for middle, new_segment in zip(middles, new_segments):
303+
middle.value = new_segment
304+
305+
return middles, new_quote
306+
307+
243308
def normalize_unicode_escape_sequences(leaf: Leaf) -> None:
244309
"""Replace hex codes in Unicode escape sequences with lowercase representation."""
245310
text = leaf.value

src/blib2to3/Grammar.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ atom: ('(' [yield_expr|testlist_gexp] ')' |
163163
'[' [listmaker] ']' |
164164
'{' [dictsetmaker] '}' |
165165
'`' testlist1 '`' |
166-
NAME | NUMBER | STRING+ | '.' '.' '.')
166+
NAME | NUMBER | (STRING | fstring)+ | '.' '.' '.')
167167
listmaker: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] )
168168
testlist_gexp: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] )
169169
lambdef: 'lambda' [varargslist] ':' test
@@ -254,3 +254,8 @@ case_block: "case" patterns [guard] ':' suite
254254
guard: 'if' namedexpr_test
255255
patterns: pattern (',' pattern)* [',']
256256
pattern: (expr|star_expr) ['as' expr]
257+
258+
fstring: FSTRING_START fstring_middle* FSTRING_END
259+
fstring_middle: fstring_replacement_field | FSTRING_MIDDLE
260+
fstring_replacement_field: '{' (yield_expr | testlist_star_expr) ['='] [ "!" NAME ] [ ':' fstring_format_spec* ] '}'
261+
fstring_format_spec: FSTRING_MIDDLE | fstring_replacement_field

src/blib2to3/pgen2/driver.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,9 @@ def parse_tokens(self, tokens: Iterable[GoodTokenInfo], debug: bool = False) ->
167167
if type in {token.INDENT, token.DEDENT}:
168168
prefix = _prefix
169169
lineno, column = end
170-
if value.endswith("\n"):
170+
# FSTRING_MIDDLE is the only token that can end with a newline, and
171+
# `end` will point to the next line. For that case, don't increment lineno.
172+
if value.endswith("\n") and type != token.FSTRING_MIDDLE:
171173
lineno += 1
172174
column = 0
173175
else:

src/blib2to3/pgen2/grammar.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ def report(self) -> None:
218218
//= DOUBLESLASHEQUAL
219219
-> RARROW
220220
:= COLONEQUAL
221+
! BANG
221222
"""
222223

223224
opmap = {}

0 commit comments

Comments
 (0)