@@ -131,6 +131,9 @@ def handle_starttag(self, tag, attrs):
131
131
self ._cache .append (text )
132
132
else :
133
133
self .cleandoc .append (text )
134
+ if tag in self .CDATA_CONTENT_ELEMENTS :
135
+ # This is presumably a standalone tag in a code span (see #1036).
136
+ self .clear_cdata_mode ()
134
137
135
138
def handle_endtag (self , tag ):
136
139
text = self .get_endtag_text (tag )
@@ -207,3 +210,63 @@ def handle_pi(self, data):
207
210
def unknown_decl (self , data ):
208
211
end = ']]>' if data .startswith ('CDATA[' ) else ']>'
209
212
self .handle_empty_tag ('<![{}{}' .format (data , end ), is_block = True )
213
+
214
+ # The rest has been copied from base class in standard lib to address #1036.
215
+ # As __startag_text is private, all references to it must be in this subclass.
216
+ # The last few lines of parse_starttag are reversed so that handle_starttag
217
+ # can override cdata_mode in certain situations (in a code span).
218
+ __starttag_text = None
219
+
220
+ def get_starttag_text (self ):
221
+ """Return full source of start tag: '<...>'."""
222
+ return self .__starttag_text
223
+
224
+ def parse_starttag (self , i ):
225
+ self .__starttag_text = None
226
+ endpos = self .check_for_whole_start_tag (i )
227
+ if endpos < 0 :
228
+ return endpos
229
+ rawdata = self .rawdata
230
+ self .__starttag_text = rawdata [i :endpos ]
231
+
232
+ # Now parse the data between i+1 and j into a tag and attrs
233
+ attrs = []
234
+ match = htmlparser .tagfind_tolerant .match (rawdata , i + 1 )
235
+ assert match , 'unexpected call to parse_starttag()'
236
+ k = match .end ()
237
+ self .lasttag = tag = match .group (1 ).lower ()
238
+ while k < endpos :
239
+ m = htmlparser .attrfind_tolerant .match (rawdata , k )
240
+ if not m :
241
+ break
242
+ attrname , rest , attrvalue = m .group (1 , 2 , 3 )
243
+ if not rest :
244
+ attrvalue = None
245
+ elif attrvalue [:1 ] == '\' ' == attrvalue [- 1 :] or \
246
+ attrvalue [:1 ] == '"' == attrvalue [- 1 :]:
247
+ attrvalue = attrvalue [1 :- 1 ]
248
+ if attrvalue :
249
+ attrvalue = htmlparser .unescape (attrvalue )
250
+ attrs .append ((attrname .lower (), attrvalue ))
251
+ k = m .end ()
252
+
253
+ end = rawdata [k :endpos ].strip ()
254
+ if end not in (">" , "/>" ):
255
+ lineno , offset = self .getpos ()
256
+ if "\n " in self .__starttag_text :
257
+ lineno = lineno + self .__starttag_text .count ("\n " )
258
+ offset = len (self .__starttag_text ) \
259
+ - self .__starttag_text .rfind ("\n " )
260
+ else :
261
+ offset = offset + len (self .__starttag_text )
262
+ self .handle_data (rawdata [i :endpos ])
263
+ return endpos
264
+ if end .endswith ('/>' ):
265
+ # XHTML-style empty tag: <span attr="value" />
266
+ self .handle_startendtag (tag , attrs )
267
+ else :
268
+ # *** set cdata_mode first so we can override it in handle_starttag (see #1036) ***
269
+ if tag in self .CDATA_CONTENT_ELEMENTS :
270
+ self .set_cdata_mode (tag )
271
+ self .handle_starttag (tag , attrs )
272
+ return endpos
0 commit comments