Skip to content

Commit c46d64e

Browse files
authored
gh-121130: Fix f-string format specifiers with debug expressions (#121150)
1 parent 69c68de commit c46d64e

File tree

8 files changed

+75
-31
lines changed

8 files changed

+75
-31
lines changed

Doc/library/ast.rst

+1-3
Original file line numberDiff line numberDiff line change
@@ -316,9 +316,7 @@ Literals
316316
args=[
317317
Name(id='a', ctx=Load())]),
318318
conversion=-1,
319-
format_spec=JoinedStr(
320-
values=[
321-
Constant(value='.3')]))]))
319+
format_spec=Constant(value='.3'))]))
322320

323321

324322
.. class:: List(elts, ctx)

Lib/test/test_ast.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3638,7 +3638,7 @@ def main():
36383638
('Expression', ('Subscript', (1, 0, 1, 10), ('List', (1, 0, 1, 3), [('Constant', (1, 1, 1, 2), 5, None)], ('Load',)), ('Slice', (1, 4, 1, 9), ('Constant', (1, 4, 1, 5), 1, None), ('Constant', (1, 6, 1, 7), 1, None), ('Constant', (1, 8, 1, 9), 1, None)), ('Load',))),
36393639
('Expression', ('IfExp', (1, 0, 1, 21), ('Name', (1, 9, 1, 10), 'x', ('Load',)), ('Call', (1, 0, 1, 5), ('Name', (1, 0, 1, 3), 'foo', ('Load',)), [], []), ('Call', (1, 16, 1, 21), ('Name', (1, 16, 1, 19), 'bar', ('Load',)), [], []))),
36403640
('Expression', ('JoinedStr', (1, 0, 1, 6), [('FormattedValue', (1, 2, 1, 5), ('Name', (1, 3, 1, 4), 'a', ('Load',)), -1, None)])),
3641-
('Expression', ('JoinedStr', (1, 0, 1, 10), [('FormattedValue', (1, 2, 1, 9), ('Name', (1, 3, 1, 4), 'a', ('Load',)), -1, ('JoinedStr', (1, 4, 1, 8), [('Constant', (1, 5, 1, 8), '.2f', None)]))])),
3641+
('Expression', ('JoinedStr', (1, 0, 1, 10), [('FormattedValue', (1, 2, 1, 9), ('Name', (1, 3, 1, 4), 'a', ('Load',)), -1, ('Constant', (1, 5, 1, 8), '.2f', None))])),
36423642
('Expression', ('JoinedStr', (1, 0, 1, 8), [('FormattedValue', (1, 2, 1, 7), ('Name', (1, 3, 1, 4), 'a', ('Load',)), 114, None)])),
36433643
('Expression', ('JoinedStr', (1, 0, 1, 11), [('Constant', (1, 2, 1, 6), 'foo(', None), ('FormattedValue', (1, 6, 1, 9), ('Name', (1, 7, 1, 8), 'a', ('Load',)), -1, None), ('Constant', (1, 9, 1, 10), ')', None)])),
36443644
]

Lib/test/test_fstring.py

+9
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
# Unicode identifiers in tests is allowed by PEP 3131.
99

1010
import ast
11+
import datetime
1112
import dis
1213
import os
1314
import re
@@ -1601,6 +1602,12 @@ def f(a):
16011602
self.assertEqual(f'{f(a=4)}', '3=')
16021603
self.assertEqual(x, 4)
16031604

1605+
# Check debug expressions in format spec
1606+
y = 20
1607+
self.assertEqual(f"{2:{y=}}", "yyyyyyyyyyyyyyyyyyy2")
1608+
self.assertEqual(f"{datetime.datetime.now():h1{y=}h2{y=}h3{y=}}",
1609+
'h1y=20h2y=20h3y=20')
1610+
16041611
# Make sure __format__ is being called.
16051612
class C:
16061613
def __format__(self, s):
@@ -1614,9 +1621,11 @@ def __repr__(self):
16141621
self.assertEqual(f'{C()=: }', 'C()=FORMAT- ')
16151622
self.assertEqual(f'{C()=:x}', 'C()=FORMAT-x')
16161623
self.assertEqual(f'{C()=!r:*^20}', 'C()=********REPR********')
1624+
self.assertEqual(f"{C():{20=}}", 'FORMAT-20=20')
16171625

16181626
self.assertRaises(SyntaxError, eval, "f'{C=]'")
16191627

1628+
16201629
# Make sure leading and following text works.
16211630
x = 'foo'
16221631
self.assertEqual(f'X{x=}Y', 'Xx='+repr(x)+'Y')
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix f-strings with debug expressions in format specifiers. Patch by Pablo
2+
Galindo

Parser/action_helpers.c

+41-22
Original file line numberDiff line numberDiff line change
@@ -969,6 +969,8 @@ _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
969969
return result_token_with_metadata(p, conv, conv_token->metadata);
970970
}
971971

972+
static asdl_expr_seq *
973+
unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions);
972974
ResultTokenWithMetadata *
973975
_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset,
974976
int end_lineno, int end_col_offset, PyArena *arena)
@@ -1007,8 +1009,15 @@ _PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, in
10071009
assert(j == non_empty_count);
10081010
spec = resized_spec;
10091011
}
1010-
expr_ty res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno,
1011-
end_col_offset, p->arena);
1012+
expr_ty res;
1013+
if (asdl_seq_LEN(spec) == 0) {
1014+
res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno,
1015+
end_col_offset, p->arena);
1016+
} else {
1017+
res = _PyPegen_concatenate_strings(p, spec,
1018+
lineno, col_offset, end_lineno,
1019+
end_col_offset, arena);
1020+
}
10121021
if (!res) {
10131022
return NULL;
10141023
}
@@ -1308,6 +1317,7 @@ unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions)
13081317

13091318
expr_ty
13101319
_PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) {
1320+
13111321
asdl_expr_seq *expr = unpack_top_level_joined_strs(p, raw_expressions);
13121322
Py_ssize_t n_items = asdl_seq_LEN(expr);
13131323

@@ -1472,7 +1482,6 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, Re
14721482
debug_end_offset = end_col_offset;
14731483
debug_metadata = closing_brace->metadata;
14741484
}
1475-
14761485
expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
14771486
debug_end_offset - 1, p->arena);
14781487
if (!debug_text) {
@@ -1505,16 +1514,23 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
15051514
Py_ssize_t n_flattened_elements = 0;
15061515
for (i = 0; i < len; i++) {
15071516
expr_ty elem = asdl_seq_GET(strings, i);
1508-
if (elem->kind == Constant_kind) {
1509-
if (PyBytes_CheckExact(elem->v.Constant.value)) {
1510-
bytes_found = 1;
1511-
} else {
1512-
unicode_string_found = 1;
1513-
}
1514-
n_flattened_elements++;
1515-
} else {
1516-
n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
1517-
f_string_found = 1;
1517+
switch(elem->kind) {
1518+
case Constant_kind:
1519+
if (PyBytes_CheckExact(elem->v.Constant.value)) {
1520+
bytes_found = 1;
1521+
} else {
1522+
unicode_string_found = 1;
1523+
}
1524+
n_flattened_elements++;
1525+
break;
1526+
case JoinedStr_kind:
1527+
n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
1528+
f_string_found = 1;
1529+
break;
1530+
default:
1531+
n_flattened_elements++;
1532+
f_string_found = 1;
1533+
break;
15181534
}
15191535
}
15201536

@@ -1556,16 +1572,19 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
15561572
Py_ssize_t j = 0;
15571573
for (i = 0; i < len; i++) {
15581574
expr_ty elem = asdl_seq_GET(strings, i);
1559-
if (elem->kind == Constant_kind) {
1560-
asdl_seq_SET(flattened, current_pos++, elem);
1561-
} else {
1562-
for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
1563-
expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
1564-
if (subvalue == NULL) {
1565-
return NULL;
1575+
switch(elem->kind) {
1576+
case JoinedStr_kind:
1577+
for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
1578+
expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
1579+
if (subvalue == NULL) {
1580+
return NULL;
1581+
}
1582+
asdl_seq_SET(flattened, current_pos++, subvalue);
15661583
}
1567-
asdl_seq_SET(flattened, current_pos++, subvalue);
1568-
}
1584+
break;
1585+
default:
1586+
asdl_seq_SET(flattened, current_pos++, elem);
1587+
break;
15691588
}
15701589
}
15711590

Parser/lexer/lexer.c

+19-5
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
989989
the_current_tok->last_expr_buffer = NULL;
990990
the_current_tok->last_expr_size = 0;
991991
the_current_tok->last_expr_end = -1;
992+
the_current_tok->in_format_spec = 0;
992993
the_current_tok->f_string_debug = 0;
993994

994995
switch (*tok->start) {
@@ -1137,15 +1138,20 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
11371138
* by the `{` case, so for ensuring that we are on the 0th level, we need
11381139
* to adjust it manually */
11391140
int cursor = current_tok->curly_bracket_depth - (c != '{');
1140-
if (cursor == 0 && !_PyLexer_update_fstring_expr(tok, c)) {
1141+
int in_format_spec = current_tok->in_format_spec;
1142+
int cursor_in_format_with_debug =
1143+
cursor == 1 && (current_tok->f_string_debug || in_format_spec);
1144+
int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1145+
if ((cursor_valid) && !_PyLexer_update_fstring_expr(tok, c)) {
11411146
return MAKE_TOKEN(ENDMARKER);
11421147
}
1143-
if (cursor == 0 && c != '{' && set_fstring_expr(tok, token, c)) {
1148+
if ((cursor_valid) && c != '{' && set_fstring_expr(tok, token, c)) {
11441149
return MAKE_TOKEN(ERRORTOKEN);
11451150
}
11461151

11471152
if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
11481153
current_tok->kind = TOK_FSTRING_MODE;
1154+
current_tok->in_format_spec = 1;
11491155
p_start = tok->start;
11501156
p_end = tok->cur;
11511157
return MAKE_TOKEN(_PyToken_OneChar(c));
@@ -1235,6 +1241,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
12351241
if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
12361242
current_tok->curly_bracket_expr_start_depth--;
12371243
current_tok->kind = TOK_FSTRING_MODE;
1244+
current_tok->in_format_spec = 0;
12381245
current_tok->f_string_debug = 0;
12391246
}
12401247
}
@@ -1317,11 +1324,11 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
13171324
tok->multi_line_start = tok->line_start;
13181325
while (end_quote_size != current_tok->f_string_quote_size) {
13191326
int c = tok_nextc(tok);
1320-
if (tok->done == E_ERROR) {
1327+
if (tok->done == E_ERROR || tok->done == E_DECODE) {
13211328
return MAKE_TOKEN(ERRORTOKEN);
13221329
}
13231330
int in_format_spec = (
1324-
current_tok->last_expr_end != -1
1331+
current_tok->in_format_spec
13251332
&&
13261333
INSIDE_FSTRING_EXPR(current_tok)
13271334
);
@@ -1337,6 +1344,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
13371344
if (in_format_spec && c == '\n') {
13381345
tok_backup(tok, c);
13391346
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1347+
current_tok->in_format_spec = 0;
13401348
p_start = tok->start;
13411349
p_end = tok->cur;
13421350
return MAKE_TOKEN(FSTRING_MIDDLE);
@@ -1378,6 +1386,9 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
13781386
}
13791387

13801388
if (c == '{') {
1389+
if (!_PyLexer_update_fstring_expr(tok, c)) {
1390+
return MAKE_TOKEN(ENDMARKER);
1391+
}
13811392
int peek = tok_nextc(tok);
13821393
if (peek != '{' || in_format_spec) {
13831394
tok_backup(tok, peek);
@@ -1387,6 +1398,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
13871398
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expressions nested too deeply"));
13881399
}
13891400
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1401+
current_tok->in_format_spec = 0;
13901402
p_start = tok->start;
13911403
p_end = tok->cur;
13921404
} else {
@@ -1406,13 +1418,15 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
14061418
// scanning (indicated by the end of the expression being set) and we are not at the top level
14071419
// of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
14081420
// brackets, we can bypass it here.
1409-
if (peek == '}' && !in_format_spec) {
1421+
int cursor = current_tok->curly_bracket_depth;
1422+
if (peek == '}' && !in_format_spec && cursor == 0) {
14101423
p_start = tok->start;
14111424
p_end = tok->cur - 1;
14121425
} else {
14131426
tok_backup(tok, peek);
14141427
tok_backup(tok, c);
14151428
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1429+
current_tok->in_format_spec = 0;
14161430
p_start = tok->start;
14171431
p_end = tok->cur;
14181432
}

Parser/lexer/state.c

+1
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ free_fstring_expressions(struct tok_state *tok)
7474
mode->last_expr_buffer = NULL;
7575
mode->last_expr_size = 0;
7676
mode->last_expr_end = -1;
77+
mode->in_format_spec = 0;
7778
}
7879
}
7980
}

Parser/lexer/state.h

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ typedef struct _tokenizer_mode {
5858
Py_ssize_t last_expr_end;
5959
char* last_expr_buffer;
6060
int f_string_debug;
61+
int in_format_spec;
6162
} tokenizer_mode;
6263

6364
/* Tokenizer state */

0 commit comments

Comments
 (0)