Skip to content

Commit 9207c87

Browse files
[3.12] gh-88943: Improve syntax error for non-ASCII character that follows a numerical literal (GH-109081) (#109090)
gh-88943: Improve syntax error for non-ASCII character that follows a numerical literal (GH-109081) It now points on the invalid non-ASCII character, not on the valid numerical literal. (cherry picked from commit b2729e9) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent a4f186a commit 9207c87

File tree

3 files changed

+8
-1
lines changed

3 files changed

+8
-1
lines changed

Lib/test/test_grammar.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,10 @@ def check(test, error=False):
236236
check(f"[{num}for x in ()]")
237237
check(f"{num}spam", error=True)
238238

239+
# gh-88943: Invalid non-ASCII character following a numerical literal.
240+
with self.assertRaisesRegex(SyntaxError, r"invalid character '⁄' \(U\+2044\)"):
241+
compile(f"{num}⁄7", "<testcase>", "eval")
242+
239243
with self.assertWarnsRegex(SyntaxWarning, r'invalid \w+ literal'):
240244
compile(f"{num}is x", "<testcase>", "eval")
241245
with warnings.catch_warnings():
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Improve syntax error for non-ASCII character that follows a numerical
2+
literal. It now points on the invalid non-ASCII character, not on the valid
3+
numerical literal.

Parser/tokenizer.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1648,7 +1648,7 @@ verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
16481648
tok_nextc(tok);
16491649
}
16501650
else /* In future releases, only error will remain. */
1651-
if (is_potential_identifier_char(c)) {
1651+
if (c < 128 && is_potential_identifier_char(c)) {
16521652
tok_backup(tok, c);
16531653
syntaxerror(tok, "invalid %s literal", kind);
16541654
return 0;

0 commit comments

Comments
 (0)