11
11
#include "tokenizer.h"
12
12
#include "errcode.h"
13
13
14
- #include "unicodeobject.h"
15
- #include "bytesobject.h"
16
- #include "fileobject.h"
17
- #include "abstract.h"
18
-
19
14
/* Alternate tab spacing */
20
15
#define ALTTABSIZE 1
21
16
43
38
tok->lineno++; \
44
39
tok->col_offset = 0;
45
40
41
+ #define INSIDE_FSTRING (tok ) (tok->tok_mode_stack_index > 0)
42
+ #define INSIDE_FSTRING_EXPR (tok ) (tok->curly_bracket_expr_start_depth >= 0)
46
43
#ifdef Py_DEBUG
47
44
static inline tokenizer_mode * TOK_GET_MODE (struct tok_state * tok ) {
48
45
assert (tok -> tok_mode_stack_index >= 0 );
@@ -54,15 +51,9 @@ static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
54
51
assert (tok -> tok_mode_stack_index < MAXLEVEL );
55
52
return & (tok -> tok_mode_stack [++ tok -> tok_mode_stack_index ]);
56
53
}
57
- static inline int * TOK_GET_BRACKET_MARK (tokenizer_mode * mode ) {
58
- assert (mode -> bracket_mark_index >= 0 );
59
- assert (mode -> bracket_mark_index < MAX_EXPR_NESTING );
60
- return & (mode -> bracket_mark [mode -> bracket_mark_index ]);
61
- }
62
54
#else
63
55
#define TOK_GET_MODE (tok ) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
64
56
#define TOK_NEXT_MODE (tok ) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
65
- #define TOK_GET_BRACKET_MARK (mode ) (&(mode->bracket_mark[mode->bracket_mark_index]))
66
57
#endif
67
58
68
59
/* Forward */
@@ -398,20 +389,7 @@ update_fstring_expr(struct tok_state *tok, char cur)
398
389
tokenizer_mode * tok_mode = TOK_GET_MODE (tok );
399
390
400
391
switch (cur ) {
401
- case '{' :
402
- if (tok_mode -> last_expr_buffer != NULL ) {
403
- PyMem_Free (tok_mode -> last_expr_buffer );
404
- }
405
- tok_mode -> last_expr_buffer = PyMem_Malloc (size );
406
- if (tok_mode -> last_expr_buffer == NULL ) {
407
- tok -> done = E_NOMEM ;
408
- return 0 ;
409
- }
410
- tok_mode -> last_expr_size = size ;
411
- tok_mode -> last_expr_end = -1 ;
412
- strncpy (tok_mode -> last_expr_buffer , tok -> cur , size );
413
- break ;
414
- case 0 :
392
+ case 0 :
415
393
if (!tok_mode -> last_expr_buffer || tok_mode -> last_expr_end >= 0 ) {
416
394
return 1 ;
417
395
}
@@ -421,23 +399,38 @@ update_fstring_expr(struct tok_state *tok, char cur)
421
399
);
422
400
if (new_buffer == NULL ) {
423
401
PyMem_Free (tok_mode -> last_expr_buffer );
424
- tok -> done = E_NOMEM ;
425
- return 0 ;
402
+ goto error ;
426
403
}
427
404
tok_mode -> last_expr_buffer = new_buffer ;
428
405
strncpy (tok_mode -> last_expr_buffer + tok_mode -> last_expr_size , tok -> cur , size );
429
406
tok_mode -> last_expr_size += size ;
430
407
break ;
408
+ case '{' :
409
+ if (tok_mode -> last_expr_buffer != NULL ) {
410
+ PyMem_Free (tok_mode -> last_expr_buffer );
411
+ }
412
+ tok_mode -> last_expr_buffer = PyMem_Malloc (size );
413
+ if (tok_mode -> last_expr_buffer == NULL ) {
414
+ goto error ;
415
+ }
416
+ tok_mode -> last_expr_size = size ;
417
+ tok_mode -> last_expr_end = -1 ;
418
+ strncpy (tok_mode -> last_expr_buffer , tok -> cur , size );
419
+ break ;
431
420
case '}' :
432
421
case '!' :
433
422
case ':' :
434
423
if (tok_mode -> last_expr_end == -1 ) {
435
424
tok_mode -> last_expr_end = strlen (tok -> start );
436
425
}
437
426
break ;
427
+ default :
428
+ Py_UNREACHABLE ();
438
429
}
439
-
440
430
return 1 ;
431
+ error :
432
+ tok -> done = E_NOMEM ;
433
+ return 0 ;
441
434
}
442
435
443
436
static void
@@ -1766,7 +1759,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
1766
1759
/* Skip comment, unless it's a type comment */
1767
1760
if (c == '#' ) {
1768
1761
1769
- if (tok -> tok_mode_stack_index > 0 ) {
1762
+ if (INSIDE_FSTRING ( tok ) ) {
1770
1763
return MAKE_TOKEN (syntaxerror (tok , "f-string expression part cannot include '#'" ));
1771
1764
}
1772
1765
@@ -2208,32 +2201,31 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
2208
2201
2209
2202
p_start = tok -> start ;
2210
2203
p_end = tok -> cur ;
2211
- tokenizer_mode * current_tok = TOK_NEXT_MODE (tok );
2212
- current_tok -> kind = TOK_FSTRING_MODE ;
2213
- current_tok -> f_string_quote = quote ;
2214
- current_tok -> f_string_quote_size = quote_size ;
2215
- current_tok -> f_string_start = tok -> start ;
2216
- current_tok -> f_string_multi_line_start = tok -> line_start ;
2217
- current_tok -> last_expr_buffer = NULL ;
2218
- current_tok -> last_expr_size = 0 ;
2219
- current_tok -> last_expr_end = -1 ;
2204
+ tokenizer_mode * the_current_tok = TOK_NEXT_MODE (tok );
2205
+ the_current_tok -> kind = TOK_FSTRING_MODE ;
2206
+ the_current_tok -> f_string_quote = quote ;
2207
+ the_current_tok -> f_string_quote_size = quote_size ;
2208
+ the_current_tok -> f_string_start = tok -> start ;
2209
+ the_current_tok -> f_string_multi_line_start = tok -> line_start ;
2210
+ the_current_tok -> last_expr_buffer = NULL ;
2211
+ the_current_tok -> last_expr_size = 0 ;
2212
+ the_current_tok -> last_expr_end = -1 ;
2220
2213
2221
2214
switch (* tok -> start ) {
2222
2215
case 'F' :
2223
2216
case 'f' :
2224
- current_tok -> f_string_raw = tolower (* (tok -> start + 1 )) == 'r' ;
2217
+ the_current_tok -> f_string_raw = tolower (* (tok -> start + 1 )) == 'r' ;
2225
2218
break ;
2226
2219
case 'R' :
2227
2220
case 'r' :
2228
- current_tok -> f_string_raw = 1 ;
2221
+ the_current_tok -> f_string_raw = 1 ;
2229
2222
break ;
2230
2223
default :
2231
2224
Py_UNREACHABLE ();
2232
2225
}
2233
2226
2234
- current_tok -> bracket_stack = 0 ;
2235
- current_tok -> bracket_mark [0 ] = 0 ;
2236
- current_tok -> bracket_mark_index = -1 ;
2227
+ the_current_tok -> curly_bracket_depth = 0 ;
2228
+ the_current_tok -> curly_bracket_expr_start_depth = -1 ;
2237
2229
return MAKE_TOKEN (FSTRING_START );
2238
2230
}
2239
2231
@@ -2282,15 +2274,15 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
2282
2274
int start = tok -> lineno ;
2283
2275
tok -> lineno = tok -> first_lineno ;
2284
2276
2285
- if (tok -> tok_mode_stack_index > 0 ) {
2277
+ if (INSIDE_FSTRING ( tok ) ) {
2286
2278
/* When we are in an f-string, before raising the
2287
2279
* unterminated string literal error, check whether
2288
2280
* does the initial quote matches with f-strings quotes
2289
2281
* and if it is, then this must be a missing '}' token
2290
2282
* so raise the proper error */
2291
- tokenizer_mode * current_tok = TOK_GET_MODE (tok );
2292
- if (current_tok -> f_string_quote == quote &&
2293
- current_tok -> f_string_quote_size == quote_size ) {
2283
+ tokenizer_mode * the_current_tok = TOK_GET_MODE (tok );
2284
+ if (the_current_tok -> f_string_quote == quote &&
2285
+ the_current_tok -> f_string_quote_size == quote_size ) {
2294
2286
return MAKE_TOKEN (syntaxerror (tok , "f-string: expecting '}'" , start ));
2295
2287
}
2296
2288
}
@@ -2339,18 +2331,17 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
2339
2331
2340
2332
/* Punctuation character */
2341
2333
int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{' );
2342
- if (is_punctuation && tok -> tok_mode_stack_index > 0 && current_tok -> bracket_mark_index >= 0 ) {
2343
- int mark = * TOK_GET_BRACKET_MARK (current_tok );
2344
- /* This code block gets executed before the bracket_stack is incremented
2334
+ if (is_punctuation && INSIDE_FSTRING (tok ) && INSIDE_FSTRING_EXPR (current_tok )) {
2335
+ /* This code block gets executed before the curly_bracket_depth is incremented
2345
2336
* by the `{` case, so for ensuring that we are on the 0th level, we need
2346
2337
* to adjust it manually */
2347
- int cursor = current_tok -> bracket_stack - (c != '{' );
2338
+ int cursor = current_tok -> curly_bracket_depth - (c != '{' );
2348
2339
2349
2340
if (cursor == 0 && !update_fstring_expr (tok , c )) {
2350
2341
return MAKE_TOKEN (ENDMARKER );
2351
2342
}
2352
2343
2353
- if (c == ':' && cursor == mark ) {
2344
+ if (c == ':' && cursor == current_tok -> curly_bracket_expr_start_depth ) {
2354
2345
current_tok -> kind = TOK_FSTRING_MODE ;
2355
2346
p_start = tok -> start ;
2356
2347
p_end = tok -> cur ;
@@ -2390,16 +2381,15 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
2390
2381
tok -> parenlinenostack [tok -> level ] = tok -> lineno ;
2391
2382
tok -> parencolstack [tok -> level ] = (int )(tok -> start - tok -> line_start );
2392
2383
tok -> level ++ ;
2393
-
2394
- if (tok -> tok_mode_stack_index > 0 ) {
2395
- current_tok -> bracket_stack ++ ;
2384
+ if (INSIDE_FSTRING (tok )) {
2385
+ current_tok -> curly_bracket_depth ++ ;
2396
2386
}
2397
2387
break ;
2398
2388
case ')' :
2399
2389
case ']' :
2400
2390
case '}' :
2401
2391
if (!tok -> level ) {
2402
- if (tok -> tok_mode_stack_index > 0 && !current_tok -> bracket_stack && c == '}' ) {
2392
+ if (INSIDE_FSTRING ( tok ) && !current_tok -> curly_bracket_depth && c == '}' ) {
2403
2393
return MAKE_TOKEN (syntaxerror (tok , "f-string: single '}' is not allowed" ));
2404
2394
}
2405
2395
return MAKE_TOKEN (syntaxerror (tok , "unmatched '%c'" , c ));
@@ -2415,10 +2405,10 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
2415
2405
nested expression, then instead of matching a different
2416
2406
syntactical construct with it; we'll throw an unmatched
2417
2407
parentheses error. */
2418
- if (tok -> tok_mode_stack_index > 0 && opening == '{' ) {
2419
- assert (current_tok -> bracket_stack >= 0 );
2420
- int previous_bracket = current_tok -> bracket_stack - 1 ;
2421
- if (previous_bracket == * TOK_GET_BRACKET_MARK ( current_tok ) ) {
2408
+ if (INSIDE_FSTRING ( tok ) && opening == '{' ) {
2409
+ assert (current_tok -> curly_bracket_depth >= 0 );
2410
+ int previous_bracket = current_tok -> curly_bracket_depth - 1 ;
2411
+ if (previous_bracket == current_tok -> curly_bracket_expr_start_depth ) {
2422
2412
return MAKE_TOKEN (syntaxerror (tok , "f-string: unmatched '%c'" , c ));
2423
2413
}
2424
2414
}
@@ -2436,14 +2426,16 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
2436
2426
}
2437
2427
}
2438
2428
2439
- if (tok -> tok_mode_stack_index > 0 ) {
2440
- current_tok -> bracket_stack -- ;
2441
- if (c == '}' && current_tok -> bracket_stack == * TOK_GET_BRACKET_MARK ( current_tok ) ) {
2442
- current_tok -> bracket_mark_index -- ;
2429
+ if (INSIDE_FSTRING ( tok ) ) {
2430
+ current_tok -> curly_bracket_depth -- ;
2431
+ if (c == '}' && current_tok -> curly_bracket_depth == current_tok -> curly_bracket_expr_start_depth ) {
2432
+ current_tok -> curly_bracket_expr_start_depth -- ;
2443
2433
current_tok -> kind = TOK_FSTRING_MODE ;
2444
2434
}
2445
2435
}
2446
2436
break ;
2437
+ default :
2438
+ break ;
2447
2439
}
2448
2440
2449
2441
if (!Py_UNICODE_ISPRINTABLE (c )) {
@@ -2479,11 +2471,10 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
2479
2471
2480
2472
if ((start_char == '{' && peek1 != '{' ) || (start_char == '}' && peek1 != '}' )) {
2481
2473
if (start_char == '{' ) {
2482
- current_tok -> bracket_mark_index ++ ;
2483
- if (current_tok -> bracket_mark_index >= MAX_EXPR_NESTING ) {
2474
+ current_tok -> curly_bracket_expr_start_depth ++ ;
2475
+ if (current_tok -> curly_bracket_expr_start_depth >= MAX_EXPR_NESTING ) {
2484
2476
return MAKE_TOKEN (syntaxerror (tok , "f-string: expressions nested too deeply" ));
2485
2477
}
2486
- * TOK_GET_BRACKET_MARK (current_tok ) = current_tok -> bracket_stack ;
2487
2478
}
2488
2479
TOK_GET_MODE (tok )-> kind = TOK_REGULAR_MODE ;
2489
2480
return tok_get_normal_mode (tok , current_tok , token );
@@ -2544,17 +2535,20 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
2544
2535
end_quote_size = 0 ;
2545
2536
}
2546
2537
2547
- int in_format_spec = current_tok -> last_expr_end != -1 && current_tok -> bracket_mark_index >= 0 ;
2538
+ int in_format_spec = (
2539
+ current_tok -> last_expr_end != -1
2540
+ &&
2541
+ INSIDE_FSTRING_EXPR (current_tok )
2542
+ );
2548
2543
if (c == '{' ) {
2549
2544
int peek = tok_nextc (tok );
2550
2545
if (peek != '{' || in_format_spec ) {
2551
2546
tok_backup (tok , peek );
2552
2547
tok_backup (tok , c );
2553
- current_tok -> bracket_mark_index ++ ;
2554
- if (current_tok -> bracket_mark_index >= MAX_EXPR_NESTING ) {
2548
+ current_tok -> curly_bracket_expr_start_depth ++ ;
2549
+ if (current_tok -> curly_bracket_expr_start_depth >= MAX_EXPR_NESTING ) {
2555
2550
return MAKE_TOKEN (syntaxerror (tok , "f-string: expressions nested too deeply" ));
2556
2551
}
2557
- * TOK_GET_BRACKET_MARK (current_tok ) = current_tok -> bracket_stack ;
2558
2552
TOK_GET_MODE (tok )-> kind = TOK_REGULAR_MODE ;
2559
2553
p_start = tok -> start ;
2560
2554
p_end = tok -> cur ;
0 commit comments