Skip to content

Commit cabd665

Browse files
authored
Merge pull request #432 from willkg/431-charencoding
Fix parsing "meta" tag with encoding attribute
2 parents 93a060e + cb156cb commit cabd665

File tree

5 files changed

+78
-13
lines changed

5 files changed

+78
-13
lines changed

CHANGES

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Bleach changes
22
==============
33

4-
Version 3.0.3 (In development)
4+
Version 3.1.0 (In development)
55
------------------------------
66

77
**Security fixes**
@@ -25,6 +25,12 @@ None
2525
* Fix cases where attribute names could have invalid characters in them.
2626
(#419)
2727

28+
* Fix problems with ``LinkifyFilter`` not being able to match links
29+
across ``&``. (#422)
30+
31+
* Fix ``InputStreamWithMemory`` when the ``BleachHTMLParser`` is
32+
parsing ``meta`` tags. (#431)
33+
2834

2935
Version 3.0.2 (October 11th, 2018)
3036
----------------------------------

bleach/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
# yyyymmdd
2121
__releasedate__ = ''
2222
# x.y.z or x.y.z.dev0 -- semver
23-
__version__ = '3.0.3.dev0'
23+
__version__ = '3.1.0.dev0'
2424
VERSION = parse_version(__version__)
2525

2626

bleach/html5lib_shim.py

+8
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,14 @@ def __init__(self, inner_stream):
181181
def errors(self):
182182
return self._inner_stream.errors
183183

184+
@property
185+
def charEncoding(self):
186+
return self._inner_stream.charEncoding
187+
188+
@property
189+
def changeEncoding(self):
190+
return self._inner_stream.changeEncoding
191+
184192
def char(self):
185193
c = self._inner_stream.char()
186194
# char() can return None if EOF, so ignore that

tests/test_html5lib_shim.py

+62
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,65 @@ def test_serializer(data, expected):
8080
serialized = serializer.render(walker(dom))
8181

8282
assert serialized == expected
83+
84+
85+
@pytest.mark.parametrize('parser_args, data, expected', [
86+
# Make sure InputStreamWithMemory has charEncoding and changeEncoding
87+
(
88+
{},
89+
'<meta charset="utf-8">',
90+
'<meta charset="utf-8">'
91+
),
92+
# Handle consume entities False--all entities are passed along and then
93+
# escaped when serialized
94+
(
95+
{'consume_entities': False},
96+
'text &amp;&gt;&quot;',
97+
'text &amp;amp;&amp;gt;&amp;quot;'
98+
),
99+
# Handle consume entities True--all entities are consumed and converted
100+
# to their character equivalents and then &, <, and > are escaped when
101+
# serialized
102+
(
103+
{'consume_entities': True},
104+
'text &amp;&gt;&quot;',
105+
'text &amp;&gt;"'
106+
),
107+
# Test that "invalid-character-in-attribute-name" errors in tokenizing
108+
# result in attributes with invalid names getting dropped
109+
(
110+
{},
111+
'<a href="http://example.com"">',
112+
'<a href="http://example.com"></a>'
113+
),
114+
(
115+
{},
116+
'<a href=\'http://example.com\'\'>',
117+
'<a href="http://example.com"></a>'
118+
)
119+
])
120+
def test_bleach_html_parser(parser_args, data, expected):
121+
args = {
122+
'tags': None,
123+
'strip': True,
124+
'consume_entities': True
125+
}
126+
args.update(parser_args)
127+
128+
# Build a parser, walker, and serializer just like we do in clean()
129+
parser = html5lib_shim.BleachHTMLParser(**args)
130+
walker = html5lib_shim.getTreeWalker('etree')
131+
serializer = html5lib_shim.BleachHTMLSerializer(
132+
quote_attr_values='always',
133+
omit_optional_tags=False,
134+
escape_lt_in_attrs=True,
135+
resolve_entities=False,
136+
sanitize=False,
137+
alphabetical_attributes=False,
138+
)
139+
140+
# Parse, walk, and then serialize the output
141+
dom = parser.parseFragment(data)
142+
serialized = serializer.render(walker(dom))
143+
144+
assert serialized == expected

tests/test_linkify.py

-11
Original file line numberDiff line numberDiff line change
@@ -69,17 +69,6 @@ def ft(attrs, new=False):
6969
)
7070

7171

72-
def test_invalid_attribute_names():
73-
"""Test that "invalid-character-in-attribute-name" errors in tokenizing
74-
result in attributes with invalid names get dropped.
75-
76-
"""
77-
assert (
78-
linkify('<a href="http://example.com/"">') ==
79-
'<a href="http://example.com/" rel="nofollow"></a>'
80-
)
81-
82-
8372
@pytest.mark.parametrize('data,parse_email,expected', [
8473
(
8574
'a [email protected] mailto',

0 commit comments

Comments
 (0)