Skip to content

Commit 961b205

Browse files
committed
Avoid cdata_mode outside of HTML blocks.
Fixes Python-Markdown#1036.
1 parent f4d67ec commit 961b205

File tree

2 files changed

+101
-0
lines changed

2 files changed

+101
-0
lines changed

markdown/htmlparser.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ def handle_starttag(self, tag, attrs):
131131
self._cache.append(text)
132132
else:
133133
self.cleandoc.append(text)
134+
if tag in self.CDATA_CONTENT_ELEMENTS:
135+
# This is presumably a standalone tag in a code span (see #1036).
136+
self.clear_cdata_mode()
134137

135138
def handle_endtag(self, tag):
136139
text = self.get_endtag_text(tag)
@@ -207,3 +210,63 @@ def handle_pi(self, data):
207210
def unknown_decl(self, data):
208211
end = ']]>' if data.startswith('CDATA[') else ']>'
209212
self.handle_empty_tag('<![{}{}'.format(data, end), is_block=True)
213+
214+
# The rest has been copied from base class in standard lib to address #1036.
215+
# As __startag_text is private, all references to it must be in this subclass.
216+
# The last few lines of parse_starttag are reversed so that handle_starttag
217+
# can override cdata_mode in certain situations (in a code span).
218+
__starttag_text = None
219+
220+
def get_starttag_text(self):
221+
"""Return full source of start tag: '<...>'."""
222+
return self.__starttag_text
223+
224+
def parse_starttag(self, i):
225+
self.__starttag_text = None
226+
endpos = self.check_for_whole_start_tag(i)
227+
if endpos < 0:
228+
return endpos
229+
rawdata = self.rawdata
230+
self.__starttag_text = rawdata[i:endpos]
231+
232+
# Now parse the data between i+1 and j into a tag and attrs
233+
attrs = []
234+
match = htmlparser.tagfind_tolerant.match(rawdata, i+1)
235+
assert match, 'unexpected call to parse_starttag()'
236+
k = match.end()
237+
self.lasttag = tag = match.group(1).lower()
238+
while k < endpos:
239+
m = htmlparser.attrfind_tolerant.match(rawdata, k)
240+
if not m:
241+
break
242+
attrname, rest, attrvalue = m.group(1, 2, 3)
243+
if not rest:
244+
attrvalue = None
245+
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
246+
attrvalue[:1] == '"' == attrvalue[-1:]:
247+
attrvalue = attrvalue[1:-1]
248+
if attrvalue:
249+
attrvalue = htmlparser.unescape(attrvalue)
250+
attrs.append((attrname.lower(), attrvalue))
251+
k = m.end()
252+
253+
end = rawdata[k:endpos].strip()
254+
if end not in (">", "/>"):
255+
lineno, offset = self.getpos()
256+
if "\n" in self.__starttag_text:
257+
lineno = lineno + self.__starttag_text.count("\n")
258+
offset = len(self.__starttag_text) \
259+
- self.__starttag_text.rfind("\n")
260+
else:
261+
offset = offset + len(self.__starttag_text)
262+
self.handle_data(rawdata[i:endpos])
263+
return endpos
264+
if end.endswith('/>'):
265+
# XHTML-style empty tag: <span attr="value" />
266+
self.handle_startendtag(tag, attrs)
267+
else:
268+
# *** set cdata_mode first so we can override it in handle_starttag (see #1036) ***
269+
if tag in self.CDATA_CONTENT_ELEMENTS:
270+
self.set_cdata_mode(tag)
271+
self.handle_starttag(tag, attrs)
272+
return endpos

tests/test_syntax/blocks/test_html_blocks.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,4 +1363,42 @@ def test_unclosed_script_tag(self):
13631363
Still part of the *script* tag
13641364
"""
13651365
)
1366+
)
1367+
1368+
def test_inline_script_tags(self):
1369+
# Ensure inline script tags doesn't cause the parser to eat content (see #1036).
1370+
self.assertMarkdownRenders(
1371+
self.dedent(
1372+
"""
1373+
Text `<script>` more *text*.
1374+
1375+
<div>
1376+
*foo*
1377+
</div>
1378+
1379+
<div>
1380+
1381+
bar
1382+
1383+
</div>
1384+
1385+
A new paragraph with a closing `</script>` tag.
1386+
"""
1387+
),
1388+
self.dedent(
1389+
"""
1390+
<p>Text <code>&lt;script&gt;</code> more <em>text</em>.</p>
1391+
<div>
1392+
*foo*
1393+
</div>
1394+
1395+
<div>
1396+
1397+
bar
1398+
1399+
</div>
1400+
1401+
<p>A new paragraph with a closing <code>&lt;/script&gt;</code> tag.</p>
1402+
"""
1403+
)
13661404
)

0 commit comments

Comments
 (0)