From 8768bb3e6ef884ddc6aa68f9af7a3d56fbde5d5e Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Sat, 10 May 2025 13:36:06 -0400 Subject: [PATCH] gh-86155: Fix data loss after unclosed script or style tag in HTMLParser (GH-22658) When calling .close() the HTMLParser should flush all remaining content, even when that content is in an unclosed script or style tag. (cherry picked from commit 53383e90e4df7029f792b7aa81aa2e4cff348ed0) Co-authored-by: Waylan Limberg --- Lib/html/parser.py | 2 +- Lib/test/test_htmlparser.py | 10 ++++++++++ .../2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst | 2 ++ 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 1b8b6ea0e5ab7a..1e30956fe24f83 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -260,7 +260,7 @@ def goahead(self, end): else: assert 0, "interesting.search() lied" # end while - if end and i < n and not self.cdata_elem: + if end and i < n: if self.convert_charrefs and not self.cdata_elem: self.handle_data(unescape(rawdata[i:n])) else: diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 68649e9d6d5e9c..61fa24fab574f2 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -317,6 +317,16 @@ def get_events(self): ("endtag", element_lower)], collector=Collector(convert_charrefs=False)) + def test_EOF_in_cdata(self): + content = """ ¬-an-entity-ref; +

+ ''""" + s = f'