Skip to content

gh-118350: Add escapable-raw-text mode to html parser #121770

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion Lib/html/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,4 +471,4 @@ def handle_pi(self, data):
pass

def unknown_decl(self, data):
pass
pass
53 changes: 52 additions & 1 deletion Lib/test/test_htmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,57 @@ def test_cdata_content(self):
("data", content),
("endtag", element_lower)])

def test_raw_text_content(self):
# Tags should be treated as text in raw text and escapable raw text content.
content = """<h1>tagshould be handled as text"""
elements = [
"script",
"style",
"title",
"textarea",
"SCRIPT",
"STYLE",
"TITLE",
"TEXTAREA",
"Script",
"Style",
"Title",
"Textarea",
]
for element in elements:
source = f"<{element}>{content}"
self._run_check(source, [
("starttag", element.lower(), []),
("data", content)
])

def test_escapable_raw_text_content(self):
# Charrefs should be escaped in esacapable raw text content.
class Collector(EventCollector):
pass

content = "Timon &amp; Pumba"
expected = "Timon & Pumba"
elements = [
"title",
"textarea",
"TITLE",
"TEXTAREA",
"Title",
"Textarea",
]
for element in elements:
source = f"<{element}>{content}"
self._run_check(
source, [
("starttag", element.lower(), []),
('data', 'Timon '),
('entityref', 'amp'),
('data', ' Pumba')
],
collector=Collector(convert_charrefs=False),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you mean this test? @serhiy-storchaka

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Thanks.

)

def test_cdata_with_closing_tags(self):
# see issue #13358
# make sure that HTMLParser calls handle_data only once for each CDATA.
Expand Down Expand Up @@ -473,7 +524,7 @@ def test_slashes_in_starttag(self):
('starttag', 'a', [('foo', None), ('=', None), ('bar', None)])
]
self._run_check(html, expected)
#see issue #14538
# see issue #14538
html = ('<meta><meta / ><meta // ><meta / / >'
'<meta/><meta /><meta //><meta//>')
expected = [
Expand Down
Loading