Skip to content

Commit 175f677

Browse files
Greg Gutheg-k
Greg Guthe
authored andcommitted
fix bug 1621692
1 parent e0ad450 commit 175f677

File tree

2 files changed

+37
-3
lines changed

2 files changed

+37
-3
lines changed

Diff for: bleach/html5lib_shim.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,18 @@ def next_possible_entity(text):
534534

535535

536536
class BleachHTMLSerializer(HTMLSerializer):
537-
"""HTMLSerializer that undoes & -> & in attributes"""
537+
"""HTMLSerializer that undoes & -> & in attributes and sets
538+
escape_rcdata to True
539+
"""
540+
541+
# per the HTMLSerializer.__init__ docstring:
542+
#
543+
# Whether to escape characters that need to be
544+
# escaped within normal elements within rcdata elements such as
545+
# style.
546+
#
547+
escape_rcdata = True
548+
538549
def escape_base_amp(self, stoken):
539550
"""Escapes just bare & in HTML attribute values"""
540551
# First, undo escaping of &. We need to do this because html5lib's

Diff for: tests/test_clean.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from bleach import clean
88
from bleach.html5lib_shim import Filter
99
from bleach.sanitizer import Cleaner
10-
10+
from bleach._vendor.html5lib.constants import rcdataElements
1111

1212
def test_clean_idempotent():
1313
"""Make sure that applying the filter twice doesn't change anything."""
@@ -789,7 +789,7 @@ def test_nonexistent_namespace():
789789
(
790790
raw_tag,
791791
"<noscript><%s></noscript><img src=x onerror=alert(1) />" % raw_tag,
792-
"<noscript><%s></noscript>&lt;img src=x onerror=alert(1) /&gt;" % raw_tag,
792+
"<noscript>&lt;%s&gt;</noscript>&lt;img src=x onerror=alert(1) /&gt;" % raw_tag,
793793
)
794794
for raw_tag in _raw_tags
795795
],
@@ -799,6 +799,29 @@ def test_noscript_rawtag_(raw_tag, data, expected):
799799
assert clean(data, tags=["noscript", raw_tag]) == expected
800800

801801

802+
@pytest.mark.parametrize(
803+
"namespace_tag, rc_data_element_tag, data, expected",
804+
[
805+
(
806+
namespace_tag,
807+
rc_data_element_tag,
808+
"<%s><%s><img src=x onerror=alert(1)>" % (namespace_tag, rc_data_element_tag),
809+
"<%s><%s>&lt;img src=x onerror=alert(1)&gt;</%s></%s>" % (namespace_tag, rc_data_element_tag, rc_data_element_tag, namespace_tag),
810+
)
811+
for namespace_tag in ["math", "svg"]
812+
# https://dev.w3.org/html5/html-author/#rcdata-elements
813+
# https://html.spec.whatwg.org/index.html#parsing-html-fragments
814+
# in html5lib: 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', and 'noscript'
815+
for rc_data_element_tag in rcdataElements
816+
],
817+
)
818+
def test_namespace_rc_data_element_strip_false(namespace_tag, rc_data_element_tag, data, expected):
819+
# refs: bug 1621692 / GHSA-m6xf-fq7q-8743
820+
#
821+
# browsers will pull the img out of the namespace and rc data tag resulting in XSS
822+
assert clean(data, tags=[namespace_tag, rc_data_element_tag], strip=False) == expected
823+
824+
802825
def get_ids_and_tests():
803826
"""Retrieves regression tests from data/ directory
804827

0 commit comments

Comments
 (0)