Skip to content

Commit e81810d

Browse files
committed
pythongh-104825: Remove implicit newline in the line attribute in tokens emitted in the tokenize module
1 parent e561c09 commit e81810d

File tree

4 files changed

+10
-4
lines changed

4 files changed

+10
-4
lines changed

Lib/test/test_tokenize.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def k(x):
103103
e.exception.msg,
104104
'unindent does not match any outer indentation level')
105105
self.assertEqual(e.exception.offset, 9)
106-
self.assertEqual(e.exception.text, ' x += 5\n')
106+
self.assertEqual(e.exception.text, ' x += 5')
107107

108108
def test_int(self):
109109
# Ordinary integers and binary operators
@@ -1157,7 +1157,7 @@ def readline():
11571157

11581158
# skip the initial encoding token and the end tokens
11591159
tokens = list(_tokenize(readline(), encoding='utf-8'))[:-2]
1160-
expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"\n')]
1160+
expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
11611161
self.assertEqual(tokens, expected_tokens,
11621162
"bytes not decoded with encoding")
11631163

Lib/tokenize.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -518,8 +518,8 @@ def error(message, filename=None, location=None):
518518
if args.exact:
519519
token_type = token.exact_type
520520
token_range = "%d,%d-%d,%d:" % (token.start + token.end)
521-
print("%-20s%-15s%-15r" %
522-
(token_range, tok_name[token_type], token.string))
521+
print("%-20s%-15s%-15r%-15r" %
522+
(token_range, tok_name[token_type], token.string, token.line))
523523
except IndentationError as err:
524524
line, column = err.args[1][1:3]
525525
error(err.args[0], filename, (line, column))
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Tokens emitted by the :mod:`tokenize` module do not include an implicit
2+
``\n`` character in the ``line`` attribute anymore. Patch by Pablo Galindo

Python/Python-tokenize.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ _tokenizer_error(struct tok_state *tok)
123123
int result = 0;
124124

125125
Py_ssize_t size = tok->inp - tok->buf;
126+
assert(tok->buf[size-1] == '\n');
127+
size -= 1; // Remove the newline character from the end of the line
126128
error_line = PyUnicode_DecodeUTF8(tok->buf, size, "replace");
127129
if (!error_line) {
128130
result = -1;
@@ -193,6 +195,8 @@ tokenizeriter_next(tokenizeriterobject *it)
193195
}
194196

195197
Py_ssize_t size = it->tok->inp - it->tok->buf;
198+
assert(it->tok->buf[size-1] == '\n');
199+
size -= 1; // Remove the newline character from the end of the line
196200
PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace");
197201
if (line == NULL) {
198202
Py_DECREF(str);

0 commit comments

Comments
 (0)