1111#include "tokenizer.h"
1212#include "errcode.h"
1313
14- #include "unicodeobject.h"
15- #include "bytesobject.h"
16- #include "fileobject.h"
17- #include "abstract.h"
18-
1914/* Alternate tab spacing */
2015#define ALTTABSIZE 1
2116
4338 tok->lineno++; \
4439 tok->col_offset = 0;
4540
41+ #define INSIDE_FSTRING (tok ) (tok->tok_mode_stack_index > 0)
42+ #define INSIDE_FSTRING_EXPR (tok ) (tok->curly_bracket_expr_start_depth >= 0)
4643#ifdef Py_DEBUG
4744static inline tokenizer_mode * TOK_GET_MODE (struct tok_state * tok ) {
4845 assert (tok -> tok_mode_stack_index >= 0 );
@@ -54,15 +51,9 @@ static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
5451 assert (tok -> tok_mode_stack_index < MAXLEVEL );
5552 return & (tok -> tok_mode_stack [++ tok -> tok_mode_stack_index ]);
5653}
57- static inline int * TOK_GET_BRACKET_MARK (tokenizer_mode * mode ) {
58- assert (mode -> bracket_mark_index >= 0 );
59- assert (mode -> bracket_mark_index < MAX_EXPR_NESTING );
60- return & (mode -> bracket_mark [mode -> bracket_mark_index ]);
61- }
6254#else
6355#define TOK_GET_MODE (tok ) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
6456#define TOK_NEXT_MODE (tok ) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
65- #define TOK_GET_BRACKET_MARK (mode ) (&(mode->bracket_mark[mode->bracket_mark_index]))
6657#endif
6758
6859/* Forward */
@@ -398,20 +389,7 @@ update_fstring_expr(struct tok_state *tok, char cur)
398389 tokenizer_mode * tok_mode = TOK_GET_MODE (tok );
399390
400391 switch (cur ) {
401- case '{' :
402- if (tok_mode -> last_expr_buffer != NULL ) {
403- PyMem_Free (tok_mode -> last_expr_buffer );
404- }
405- tok_mode -> last_expr_buffer = PyMem_Malloc (size );
406- if (tok_mode -> last_expr_buffer == NULL ) {
407- tok -> done = E_NOMEM ;
408- return 0 ;
409- }
410- tok_mode -> last_expr_size = size ;
411- tok_mode -> last_expr_end = -1 ;
412- strncpy (tok_mode -> last_expr_buffer , tok -> cur , size );
413- break ;
414- case 0 :
392+ case 0 :
415393 if (!tok_mode -> last_expr_buffer || tok_mode -> last_expr_end >= 0 ) {
416394 return 1 ;
417395 }
@@ -421,23 +399,38 @@ update_fstring_expr(struct tok_state *tok, char cur)
421399 );
422400 if (new_buffer == NULL ) {
423401 PyMem_Free (tok_mode -> last_expr_buffer );
424- tok -> done = E_NOMEM ;
425- return 0 ;
402+ goto error ;
426403 }
427404 tok_mode -> last_expr_buffer = new_buffer ;
428405 strncpy (tok_mode -> last_expr_buffer + tok_mode -> last_expr_size , tok -> cur , size );
429406 tok_mode -> last_expr_size += size ;
430407 break ;
408+ case '{' :
409+ if (tok_mode -> last_expr_buffer != NULL ) {
410+ PyMem_Free (tok_mode -> last_expr_buffer );
411+ }
412+ tok_mode -> last_expr_buffer = PyMem_Malloc (size );
413+ if (tok_mode -> last_expr_buffer == NULL ) {
414+ goto error ;
415+ }
416+ tok_mode -> last_expr_size = size ;
417+ tok_mode -> last_expr_end = -1 ;
418+ strncpy (tok_mode -> last_expr_buffer , tok -> cur , size );
419+ break ;
431420 case '}' :
432421 case '!' :
433422 case ':' :
434423 if (tok_mode -> last_expr_end == -1 ) {
435424 tok_mode -> last_expr_end = strlen (tok -> start );
436425 }
437426 break ;
427+ default :
428+ Py_UNREACHABLE ();
438429 }
439-
440430 return 1 ;
431+ error :
432+ tok -> done = E_NOMEM ;
433+ return 0 ;
441434}
442435
443436static void
@@ -1766,7 +1759,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
17661759 /* Skip comment, unless it's a type comment */
17671760 if (c == '#' ) {
17681761
1769- if (tok -> tok_mode_stack_index > 0 ) {
1762+ if (INSIDE_FSTRING ( tok ) ) {
17701763 return MAKE_TOKEN (syntaxerror (tok , "f-string expression part cannot include '#'" ));
17711764 }
17721765
@@ -2208,32 +2201,31 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
22082201
22092202 p_start = tok -> start ;
22102203 p_end = tok -> cur ;
2211- tokenizer_mode * current_tok = TOK_NEXT_MODE (tok );
2212- current_tok -> kind = TOK_FSTRING_MODE ;
2213- current_tok -> f_string_quote = quote ;
2214- current_tok -> f_string_quote_size = quote_size ;
2215- current_tok -> f_string_start = tok -> start ;
2216- current_tok -> f_string_multi_line_start = tok -> line_start ;
2217- current_tok -> last_expr_buffer = NULL ;
2218- current_tok -> last_expr_size = 0 ;
2219- current_tok -> last_expr_end = -1 ;
2204+ tokenizer_mode * the_current_tok = TOK_NEXT_MODE (tok );
2205+ the_current_tok -> kind = TOK_FSTRING_MODE ;
2206+ the_current_tok -> f_string_quote = quote ;
2207+ the_current_tok -> f_string_quote_size = quote_size ;
2208+ the_current_tok -> f_string_start = tok -> start ;
2209+ the_current_tok -> f_string_multi_line_start = tok -> line_start ;
2210+ the_current_tok -> last_expr_buffer = NULL ;
2211+ the_current_tok -> last_expr_size = 0 ;
2212+ the_current_tok -> last_expr_end = -1 ;
22202213
22212214 switch (* tok -> start ) {
22222215 case 'F' :
22232216 case 'f' :
2224- current_tok -> f_string_raw = tolower (* (tok -> start + 1 )) == 'r' ;
2217+ the_current_tok -> f_string_raw = tolower (* (tok -> start + 1 )) == 'r' ;
22252218 break ;
22262219 case 'R' :
22272220 case 'r' :
2228- current_tok -> f_string_raw = 1 ;
2221+ the_current_tok -> f_string_raw = 1 ;
22292222 break ;
22302223 default :
22312224 Py_UNREACHABLE ();
22322225 }
22332226
2234- current_tok -> bracket_stack = 0 ;
2235- current_tok -> bracket_mark [0 ] = 0 ;
2236- current_tok -> bracket_mark_index = -1 ;
2227+ the_current_tok -> curly_bracket_depth = 0 ;
2228+ the_current_tok -> curly_bracket_expr_start_depth = -1 ;
22372229 return MAKE_TOKEN (FSTRING_START );
22382230 }
22392231
@@ -2282,15 +2274,15 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
22822274 int start = tok -> lineno ;
22832275 tok -> lineno = tok -> first_lineno ;
22842276
2285- if (tok -> tok_mode_stack_index > 0 ) {
2277+ if (INSIDE_FSTRING ( tok ) ) {
22862278 /* When we are in an f-string, before raising the
22872279 * unterminated string literal error, check whether
22882280 * does the initial quote matches with f-strings quotes
22892281 * and if it is, then this must be a missing '}' token
22902282 * so raise the proper error */
2291- tokenizer_mode * current_tok = TOK_GET_MODE (tok );
2292- if (current_tok -> f_string_quote == quote &&
2293- current_tok -> f_string_quote_size == quote_size ) {
2283+ tokenizer_mode * the_current_tok = TOK_GET_MODE (tok );
2284+ if (the_current_tok -> f_string_quote == quote &&
2285+ the_current_tok -> f_string_quote_size == quote_size ) {
22942286 return MAKE_TOKEN (syntaxerror (tok , "f-string: expecting '}'" , start ));
22952287 }
22962288 }
@@ -2339,18 +2331,17 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
23392331
23402332 /* Punctuation character */
23412333 int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{' );
2342- if (is_punctuation && tok -> tok_mode_stack_index > 0 && current_tok -> bracket_mark_index >= 0 ) {
2343- int mark = * TOK_GET_BRACKET_MARK (current_tok );
2344- /* This code block gets executed before the bracket_stack is incremented
2334+ if (is_punctuation && INSIDE_FSTRING (tok ) && INSIDE_FSTRING_EXPR (current_tok )) {
2335+ /* This code block gets executed before the curly_bracket_depth is incremented
23452336 * by the `{` case, so for ensuring that we are on the 0th level, we need
23462337 * to adjust it manually */
2347- int cursor = current_tok -> bracket_stack - (c != '{' );
2338+ int cursor = current_tok -> curly_bracket_depth - (c != '{' );
23482339
23492340 if (cursor == 0 && !update_fstring_expr (tok , c )) {
23502341 return MAKE_TOKEN (ENDMARKER );
23512342 }
23522343
2353- if (c == ':' && cursor == mark ) {
2344+ if (c == ':' && cursor == current_tok -> curly_bracket_expr_start_depth ) {
23542345 current_tok -> kind = TOK_FSTRING_MODE ;
23552346 p_start = tok -> start ;
23562347 p_end = tok -> cur ;
@@ -2390,16 +2381,15 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
23902381 tok -> parenlinenostack [tok -> level ] = tok -> lineno ;
23912382 tok -> parencolstack [tok -> level ] = (int )(tok -> start - tok -> line_start );
23922383 tok -> level ++ ;
2393-
2394- if (tok -> tok_mode_stack_index > 0 ) {
2395- current_tok -> bracket_stack ++ ;
2384+ if (INSIDE_FSTRING (tok )) {
2385+ current_tok -> curly_bracket_depth ++ ;
23962386 }
23972387 break ;
23982388 case ')' :
23992389 case ']' :
24002390 case '}' :
24012391 if (!tok -> level ) {
2402- if (tok -> tok_mode_stack_index > 0 && !current_tok -> bracket_stack && c == '}' ) {
2392+ if (INSIDE_FSTRING ( tok ) && !current_tok -> curly_bracket_depth && c == '}' ) {
24032393 return MAKE_TOKEN (syntaxerror (tok , "f-string: single '}' is not allowed" ));
24042394 }
24052395 return MAKE_TOKEN (syntaxerror (tok , "unmatched '%c'" , c ));
@@ -2415,10 +2405,10 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
24152405 nested expression, then instead of matching a different
24162406 syntactical construct with it; we'll throw an unmatched
24172407 parentheses error. */
2418- if (tok -> tok_mode_stack_index > 0 && opening == '{' ) {
2419- assert (current_tok -> bracket_stack >= 0 );
2420- int previous_bracket = current_tok -> bracket_stack - 1 ;
2421- if (previous_bracket == * TOK_GET_BRACKET_MARK ( current_tok ) ) {
2408+ if (INSIDE_FSTRING ( tok ) && opening == '{' ) {
2409+ assert (current_tok -> curly_bracket_depth >= 0 );
2410+ int previous_bracket = current_tok -> curly_bracket_depth - 1 ;
2411+ if (previous_bracket == current_tok -> curly_bracket_expr_start_depth ) {
24222412 return MAKE_TOKEN (syntaxerror (tok , "f-string: unmatched '%c'" , c ));
24232413 }
24242414 }
@@ -2436,14 +2426,16 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
24362426 }
24372427 }
24382428
2439- if (tok -> tok_mode_stack_index > 0 ) {
2440- current_tok -> bracket_stack -- ;
2441- if (c == '}' && current_tok -> bracket_stack == * TOK_GET_BRACKET_MARK ( current_tok ) ) {
2442- current_tok -> bracket_mark_index -- ;
2429+ if (INSIDE_FSTRING ( tok ) ) {
2430+ current_tok -> curly_bracket_depth -- ;
2431+ if (c == '}' && current_tok -> curly_bracket_depth == current_tok -> curly_bracket_expr_start_depth ) {
2432+ current_tok -> curly_bracket_expr_start_depth -- ;
24432433 current_tok -> kind = TOK_FSTRING_MODE ;
24442434 }
24452435 }
24462436 break ;
2437+ default :
2438+ break ;
24472439 }
24482440
24492441 if (!Py_UNICODE_ISPRINTABLE (c )) {
@@ -2479,11 +2471,10 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
24792471
24802472 if ((start_char == '{' && peek1 != '{' ) || (start_char == '}' && peek1 != '}' )) {
24812473 if (start_char == '{' ) {
2482- current_tok -> bracket_mark_index ++ ;
2483- if (current_tok -> bracket_mark_index >= MAX_EXPR_NESTING ) {
2474+ current_tok -> curly_bracket_expr_start_depth ++ ;
2475+ if (current_tok -> curly_bracket_expr_start_depth >= MAX_EXPR_NESTING ) {
24842476 return MAKE_TOKEN (syntaxerror (tok , "f-string: expressions nested too deeply" ));
24852477 }
2486- * TOK_GET_BRACKET_MARK (current_tok ) = current_tok -> bracket_stack ;
24872478 }
24882479 TOK_GET_MODE (tok )-> kind = TOK_REGULAR_MODE ;
24892480 return tok_get_normal_mode (tok , current_tok , token );
@@ -2544,17 +2535,20 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
25442535 end_quote_size = 0 ;
25452536 }
25462537
2547- int in_format_spec = current_tok -> last_expr_end != -1 && current_tok -> bracket_mark_index >= 0 ;
2538+ int in_format_spec = (
2539+ current_tok -> last_expr_end != -1
2540+ &&
2541+ INSIDE_FSTRING_EXPR (current_tok )
2542+ );
25482543 if (c == '{' ) {
25492544 int peek = tok_nextc (tok );
25502545 if (peek != '{' || in_format_spec ) {
25512546 tok_backup (tok , peek );
25522547 tok_backup (tok , c );
2553- current_tok -> bracket_mark_index ++ ;
2554- if (current_tok -> bracket_mark_index >= MAX_EXPR_NESTING ) {
2548+ current_tok -> curly_bracket_expr_start_depth ++ ;
2549+ if (current_tok -> curly_bracket_expr_start_depth >= MAX_EXPR_NESTING ) {
25552550 return MAKE_TOKEN (syntaxerror (tok , "f-string: expressions nested too deeply" ));
25562551 }
2557- * TOK_GET_BRACKET_MARK (current_tok ) = current_tok -> bracket_stack ;
25582552 TOK_GET_MODE (tok )-> kind = TOK_REGULAR_MODE ;
25592553 p_start = tok -> start ;
25602554 p_end = tok -> cur ;
0 commit comments