Skip to content

Commit cc389ef

Browse files
authored
gh-110259: Fix f-strings with multiline expressions and format specs (#110271)
Signed-off-by: Pablo Galindo <[email protected]>
1 parent af29282 commit cc389ef

File tree

5 files changed

+128
-10
lines changed

5 files changed

+128
-10
lines changed

Lib/ast.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -1270,13 +1270,15 @@ def visit_JoinedStr(self, node):
12701270
quote_type = quote_types[0]
12711271
self.write(f"{quote_type}{value}{quote_type}")
12721272

1273-
def _write_fstring_inner(self, node):
1273+
def _write_fstring_inner(self, node, scape_newlines=False):
12741274
if isinstance(node, JoinedStr):
12751275
# for both the f-string itself, and format_spec
12761276
for value in node.values:
1277-
self._write_fstring_inner(value)
1277+
self._write_fstring_inner(value, scape_newlines=scape_newlines)
12781278
elif isinstance(node, Constant) and isinstance(node.value, str):
12791279
value = node.value.replace("{", "{{").replace("}", "}}")
1280+
if scape_newlines:
1281+
value = value.replace("\n", "\\n")
12801282
self.write(value)
12811283
elif isinstance(node, FormattedValue):
12821284
self.visit_FormattedValue(node)
@@ -1299,7 +1301,10 @@ def unparse_inner(inner):
12991301
self.write(f"!{chr(node.conversion)}")
13001302
if node.format_spec:
13011303
self.write(":")
1302-
self._write_fstring_inner(node.format_spec)
1304+
self._write_fstring_inner(
1305+
node.format_spec,
1306+
scape_newlines=True
1307+
)
13031308

13041309
def visit_Name(self, node):
13051310
self.write(node.id)

Lib/test/test_tokenize.py

+97
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,55 @@ def test_string(self):
566566
OP '=' (3, 0) (3, 1)
567567
OP '}' (3, 1) (3, 2)
568568
FSTRING_END "'''" (3, 2) (3, 5)
569+
""")
570+
self.check_tokenize("""\
571+
f'''__{
572+
x:a
573+
}__'''""", """\
574+
FSTRING_START "f'''" (1, 0) (1, 4)
575+
FSTRING_MIDDLE '__' (1, 4) (1, 6)
576+
OP '{' (1, 6) (1, 7)
577+
NL '\\n' (1, 7) (1, 8)
578+
NAME 'x' (2, 4) (2, 5)
579+
OP ':' (2, 5) (2, 6)
580+
FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0)
581+
OP '}' (3, 0) (3, 1)
582+
FSTRING_MIDDLE '__' (3, 1) (3, 3)
583+
FSTRING_END "'''" (3, 3) (3, 6)
584+
""")
585+
self.check_tokenize("""\
586+
f'''__{
587+
x:a
588+
b
589+
c
590+
d
591+
}__'''""", """\
592+
FSTRING_START "f'''" (1, 0) (1, 4)
593+
FSTRING_MIDDLE '__' (1, 4) (1, 6)
594+
OP '{' (1, 6) (1, 7)
595+
NL '\\n' (1, 7) (1, 8)
596+
NAME 'x' (2, 4) (2, 5)
597+
OP ':' (2, 5) (2, 6)
598+
FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0)
599+
OP '}' (6, 0) (6, 1)
600+
FSTRING_MIDDLE '__' (6, 1) (6, 3)
601+
FSTRING_END "'''" (6, 3) (6, 6)
602+
""")
603+
self.check_tokenize("""\
604+
f'__{
605+
x:d
606+
}__'""", """\
607+
FSTRING_START "f'" (1, 0) (1, 2)
608+
FSTRING_MIDDLE '__' (1, 2) (1, 4)
609+
OP '{' (1, 4) (1, 5)
610+
NL '\\n' (1, 5) (1, 6)
611+
NAME 'x' (2, 4) (2, 5)
612+
OP ':' (2, 5) (2, 6)
613+
FSTRING_MIDDLE 'd' (2, 6) (2, 7)
614+
NL '\\n' (2, 7) (2, 8)
615+
OP '}' (3, 0) (3, 1)
616+
FSTRING_MIDDLE '__' (3, 1) (3, 3)
617+
FSTRING_END "'" (3, 3) (3, 4)
569618
""")
570619

571620
def test_function(self):
@@ -2277,6 +2326,54 @@ def test_string(self):
22772326
FSTRING_START \'f"\' (1, 0) (1, 2)
22782327
FSTRING_MIDDLE 'hola\\\\\\\\\\\\r\\\\ndfgf' (1, 2) (1, 16)
22792328
FSTRING_END \'"\' (1, 16) (1, 17)
2329+
""")
2330+
2331+
self.check_tokenize("""\
2332+
f'''__{
2333+
x:a
2334+
}__'''""", """\
2335+
FSTRING_START "f'''" (1, 0) (1, 4)
2336+
FSTRING_MIDDLE '__' (1, 4) (1, 6)
2337+
LBRACE '{' (1, 6) (1, 7)
2338+
NAME 'x' (2, 4) (2, 5)
2339+
COLON ':' (2, 5) (2, 6)
2340+
FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0)
2341+
RBRACE '}' (3, 0) (3, 1)
2342+
FSTRING_MIDDLE '__' (3, 1) (3, 3)
2343+
FSTRING_END "'''" (3, 3) (3, 6)
2344+
""")
2345+
2346+
self.check_tokenize("""\
2347+
f'''__{
2348+
x:a
2349+
b
2350+
c
2351+
d
2352+
}__'''""", """\
2353+
FSTRING_START "f'''" (1, 0) (1, 4)
2354+
FSTRING_MIDDLE '__' (1, 4) (1, 6)
2355+
LBRACE '{' (1, 6) (1, 7)
2356+
NAME 'x' (2, 4) (2, 5)
2357+
COLON ':' (2, 5) (2, 6)
2358+
FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0)
2359+
RBRACE '}' (6, 0) (6, 1)
2360+
FSTRING_MIDDLE '__' (6, 1) (6, 3)
2361+
FSTRING_END "'''" (6, 3) (6, 6)
2362+
""")
2363+
2364+
self.check_tokenize("""\
2365+
f'__{
2366+
x:d
2367+
}__'""", """\
2368+
FSTRING_START "f'" (1, 0) (1, 2)
2369+
FSTRING_MIDDLE '__' (1, 2) (1, 4)
2370+
LBRACE '{' (1, 4) (1, 5)
2371+
NAME 'x' (2, 4) (2, 5)
2372+
COLON ':' (2, 5) (2, 6)
2373+
FSTRING_MIDDLE 'd' (2, 6) (2, 7)
2374+
RBRACE '}' (3, 0) (3, 1)
2375+
FSTRING_MIDDLE '__' (3, 1) (3, 3)
2376+
FSTRING_END "'" (3, 3) (3, 4)
22802377
""")
22812378

22822379
def test_function(self):

Lib/test/test_unparse.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -730,7 +730,8 @@ class DirectoryTestCase(ASTTestCase):
730730
test_directories = (lib_dir, lib_dir / "test")
731731
run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py",
732732
"test_ast.py", "test_asdl_parser.py", "test_fstring.py",
733-
"test_patma.py", "test_type_alias.py", "test_type_params.py"}
733+
"test_patma.py", "test_type_alias.py", "test_type_params.py",
734+
"test_tokenize.py"}
734735

735736
_files_to_test = None
736737

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Correctly identify the format spec in f-strings (with single or triple
2+
quotes) that have multiple lines in the expression part and include a
3+
formatting spec. Patch by Pablo Galindo

Parser/tokenizer.c

+18-6
Original file line numberDiff line numberDiff line change
@@ -2690,11 +2690,28 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
26902690
if (tok->done == E_ERROR) {
26912691
return MAKE_TOKEN(ERRORTOKEN);
26922692
}
2693-
if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
2693+
int in_format_spec = (
2694+
current_tok->last_expr_end != -1
2695+
&&
2696+
INSIDE_FSTRING_EXPR(current_tok)
2697+
);
2698+
2699+
if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
26942700
if (tok->decoding_erred) {
26952701
return MAKE_TOKEN(ERRORTOKEN);
26962702
}
26972703

2704+
// If we are in a format spec and we found a newline,
2705+
// it means that the format spec ends here and we should
2706+
// return to the regular mode.
2707+
if (in_format_spec && c == '\n') {
2708+
tok_backup(tok, c);
2709+
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
2710+
p_start = tok->start;
2711+
p_end = tok->cur;
2712+
return MAKE_TOKEN(FSTRING_MIDDLE);
2713+
}
2714+
26982715
assert(tok->multi_line_start != NULL);
26992716
// shift the tok_state's location into
27002717
// the start of string, and report the error
@@ -2726,11 +2743,6 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
27262743
end_quote_size = 0;
27272744
}
27282745

2729-
int in_format_spec = (
2730-
current_tok->last_expr_end != -1
2731-
&&
2732-
INSIDE_FSTRING_EXPR(current_tok)
2733-
);
27342746
if (c == '{') {
27352747
int peek = tok_nextc(tok);
27362748
if (peek != '{' || in_format_spec) {

0 commit comments

Comments
 (0)