From 0123ccd31a008e00257c36f5a06375525b78f433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marta=20G=C3=B3mez=20Mac=C3=ADas?= Date: Mon, 22 May 2023 12:44:25 +0200 Subject: [PATCH 1/3] fix(tokenize): Add line number attribute to indentation error exception --- Lib/test/test_tokenize.py | 3 ++- Python/Python-tokenize.c | 8 +++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index dda7243bfa19fe..e8c5b5dcdb1e3f 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -92,9 +92,10 @@ def k(x): readline = BytesIO(indent_error_file).readline with self.assertRaisesRegex(IndentationError, "unindent does not match any " - "outer indentation level"): + "outer indentation level") as e: for tok in tokenize(readline): pass + self.assertEqual(e.exception.lineno, 3) def test_int(self): # Ordinary integers and binary operators diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 43b44be94583ee..1cc673bbc4a6a8 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -89,11 +89,9 @@ _tokenizer_error(struct tok_state *tok) } return -1; case E_DEDENT: - PyErr_Format(PyExc_IndentationError, - "unindent does not match any outer indentation level " - "(, line %d)", - tok->lineno); - return -1; + msg = "unindent does not match any outer indentation level"; + errtype = PyExc_IndentationError; + break; case E_INTR: if (!PyErr_Occurred()) { PyErr_SetNone(PyExc_KeyboardInterrupt); From bd9a8a3ceacf9f309774ef3c8a21a995bce3de34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marta=20G=C3=B3mez=20Mac=C3=ADas?= Date: Mon, 22 May 2023 12:47:02 +0200 Subject: [PATCH 2/3] Fix tabnanny test --- Lib/test/test_tabnanny.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_tabnanny.py b/Lib/test/test_tabnanny.py index dac47318011d9d..aa700118f735d9 100644 --- a/Lib/test/test_tabnanny.py +++ b/Lib/test/test_tabnanny.py @@ -317,7 +317,7 @@ def test_with_errored_file(self): with TemporaryPyFile(SOURCE_CODES["wrong_indented"]) as file_path: stderr = f"{file_path!r}: Token Error: " stderr += ('unindent does not match any outer indentation level' - ' (, line 3)') + ' (, line 3)') self.validate_cmd(file_path, stderr=stderr, expect_failure=True) def test_with_error_free_file(self): From 42d1db81feb92be1826a8641db2c1827aeff741c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marta=20G=C3=B3mez=20Mac=C3=ADas?= Date: Mon, 22 May 2023 13:04:46 +0200 Subject: [PATCH 3/3] Add additional attributes and checks --- Lib/test/test_tokenize.py | 8 ++++++++ Python/Python-tokenize.c | 7 ++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index e8c5b5dcdb1e3f..8e7ab3d4b7b578 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -96,6 +96,14 @@ def k(x): for tok in tokenize(readline): pass self.assertEqual(e.exception.lineno, 3) + self.assertEqual(e.exception.filename, '') + self.assertEqual(e.exception.end_lineno, None) + self.assertEqual(e.exception.end_offset, None) + self.assertEqual( + e.exception.msg, + 'unindent does not match any outer indentation level') + self.assertEqual(e.exception.offset, 9) + self.assertEqual(e.exception.text, ' x += 5\n') def test_int(self): # Ordinary integers and binary operators diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 1cc673bbc4a6a8..f7e32d3af9a9f7 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -129,7 +129,12 @@ _tokenizer_error(struct tok_state *tok) goto exit; } - tmp = Py_BuildValue("(OnnOii)", tok->filename, tok->lineno, 0, error_line, 0, 0); + Py_ssize_t offset = _PyPegen_byte_offset_to_character_offset(error_line, tok->inp - tok->buf); + if (offset == -1) { + result = -1; + goto exit; + } + tmp = Py_BuildValue("(OnnOOO)", tok->filename, tok->lineno, offset, error_line, Py_None, Py_None); if (!tmp) { result = -1; goto exit;