From 739fdceda17212cc3d628d43003a0bea15f87137 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 10 Jun 2020 02:44:55 +0300 Subject: [PATCH 1/5] bpo-40847: Consider a line with only a LINECONT a blank line A line with only a line continuation character should be considered a blank line at tokenizer level, so that only a single NEWLINE token gets emitted. The old parser was working around the issue, but the new parser threw a `SyntaxError` for valid input. For example, an empty line following a line continuation character was interpreted as a `SyntaxError`. --- Lib/test/test_peg_parser.py | 7 +++++++ Lib/test/test_syntax.py | 14 ++++++++++++++ Parser/tokenizer.c | 3 ++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_peg_parser.py b/Lib/test/test_peg_parser.py index 6ccb2573176bb5..fae85e323da044 100644 --- a/Lib/test/test_peg_parser.py +++ b/Lib/test/test_peg_parser.py @@ -153,6 +153,13 @@ def f(): ('dict_comp', '{x:1 for x in a}'), ('dict_comp_if', '{x:1+2 for x in a if b}'), ('dict_empty', '{}'), + ('empty_line_after_linecont', + r''' + pass + \ + + pass + '''), ('for', ''' for i in a: diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index f41426a4e9d2da..0c207ec8fc07cc 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -858,6 +858,20 @@ def test_kwargs_last3(self): "iterable argument unpacking follows " "keyword argument unpacking") + def test_empty_line_after_linecont(self): + # See issue-40847 + s = r"""\ +pass + \ + +pass +""" + try: + compile(s, '', 'exec') + except SyntaxError: + self.fail("Empty line after a line continuation character is valid.") + + def test_main(): support.run_unittest(SyntaxTestCase) from test import test_syntax diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index cebfadc8e89f30..d461e4e24e721c 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1203,8 +1203,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) } } tok_backup(tok, c); - if (c == '#' || c == '\n') { + if (c == '#' || c == '\n' || c == '\\') { /* Lines with only whitespace and/or comments + and/or a line continuation character shouldn't affect the indentation and are not passed to the parser as NEWLINE tokens, except *totally* empty lines in interactive From a5b627cfa107e2df4ed40d45d2d0ac8f3c9b578f Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 9 Jun 2020 23:52:36 +0000 Subject: [PATCH 2/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst b/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst new file mode 100644 index 00000000000000..9c09979df0631b --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst @@ -0,0 +1,2 @@ +A line with only a line continuation character should be considered a blank line at tokenizer level, so that only a single NEWLINE token gets +emitted. The old parser was working around the issue, but the new parser threw a SyntaxError for valid input. For example, an empty line following a line continuation character was interpreted as a SyntaxError. \ No newline at end of file From 550704ee175a23fed1d647588c3a69b72683cd0d Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 10 Jun 2020 02:59:28 +0300 Subject: [PATCH 3/5] Fix formatting of blurb item --- .../2020-06-09-23-52-32.bpo-40847.4XAACw.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst b/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst index 9c09979df0631b..c75c4dafdac1e4 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst @@ -1,2 +1,3 @@ -A line with only a line continuation character should be considered a blank line at tokenizer level, so that only a single NEWLINE token gets -emitted. The old parser was working around the issue, but the new parser threw a SyntaxError for valid input. For example, an empty line following a line continuation character was interpreted as a SyntaxError. \ No newline at end of file +A line with only a line continuation character should be considered a blank line at tokenizer level, so that only a +single NEWLINE token gets emitted. The old parser was working around the issue, but the new parser threw a SyntaxError +for valid input. For example, an empty line following a line continuation character was interpreted as a SyntaxError. \ No newline at end of file From 239da773164584f6e5f0a3b1da5acf97bb40e011 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 10 Jun 2020 22:43:58 +0300 Subject: [PATCH 4/5] Improve blurb file --- .../2020-06-09-23-52-32.bpo-40847.4XAACw.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst b/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst index c75c4dafdac1e4..52a0dda1f8b6a6 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst @@ -1,3 +1,4 @@ -A line with only a line continuation character should be considered a blank line at tokenizer level, so that only a -single NEWLINE token gets emitted. The old parser was working around the issue, but the new parser threw a SyntaxError -for valid input. For example, an empty line following a line continuation character was interpreted as a SyntaxError. \ No newline at end of file +Fix a bug where a line with only a line continuation character is not considered a blank line at tokenizer level. +In such cases more than a single `NEWLINE` token were emitted. The old parser was working around the issue, +but the new parser threw a :exc:`SyntaxError` for valid input due to this. For example, an empty line following +a line continuation character was interpreted as a :exc:`SyntaxError`. \ No newline at end of file From 6e51d0ade2a1d0e9370804daa3f5e6cc8e614f8b Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Thu, 11 Jun 2020 01:56:16 +0300 Subject: [PATCH 5/5] Update Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst Co-authored-by: Pablo Galindo --- .../2020-06-09-23-52-32.bpo-40847.4XAACw.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst b/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst index 52a0dda1f8b6a6..0b489f24832159 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst @@ -1,4 +1,4 @@ Fix a bug where a line with only a line continuation character is not considered a blank line at tokenizer level. -In such cases more than a single `NEWLINE` token were emitted. The old parser was working around the issue, +In such cases, more than a single `NEWLINE` token was emitted. The old parser was working around the issue, but the new parser threw a :exc:`SyntaxError` for valid input due to this. For example, an empty line following -a line continuation character was interpreted as a :exc:`SyntaxError`. \ No newline at end of file +a line continuation character was interpreted as a :exc:`SyntaxError`.