Skip to content

Commit de5c696

Browse files
facelessuserwaylan
authored andcommitted
Feature ancestry (Python-Markdown#598)
Ancestry exclusion for inline patterns. Adds the ability for an inline pattern to define a list of ancestor tag names that should be avoided. If a pattern would create a descendant of one of the listed tag names, the pattern will not match. Fixes Python-Markdown#596.
1 parent 007bd2a commit de5c696

File tree

5 files changed

+107
-13
lines changed

5 files changed

+107
-13
lines changed

.spell-dict

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ traceback
103103
Tredinnick
104104
Treeprocessor
105105
Treeprocessors
106+
tuple
106107
tuples
107108
unordered
108109
untrusted
@@ -122,4 +123,4 @@ wiki
122123
JavaScript
123124
plugin
124125
plugins
125-
configs
126+
configs

docs/extensions/api.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ A pseudo example:
5353
Inline Patterns {: #inlinepatterns }
5454
------------------------------------
5555

56-
Inline Patterns implement the inline HTML element syntax for Markdown such as
56+
Inline Patterns implement the inline HTML element syntax for Markdown such as
5757
`*emphasis*` or `[links](http://example.com)`. Pattern objects should be
5858
instances of classes that inherit from `markdown.inlinepatterns.Pattern` or
5959
one of its children. Each pattern object uses a single regular expression and
@@ -68,6 +68,10 @@ must have the following methods:
6868
Accepts a match object and returns an ElementTree element of a plain
6969
Unicode string.
7070

71+
Also, Inline Patterns can define the property `ANCESTOR_EXCLUDES` with either
72+
a list or tuple of undesirable ancestors. The pattern should not match if it
73+
would cause the content to be a descendant of one of the defined tag names.
74+
7175
Note that any regular expression returned by `getCompiledRegExp` must capture
7276
the whole block. Therefore, they should all start with `r'^(.*?)'` and end
7377
with `r'(.*?)!'`. When using the default `getCompiledRegExp()` method

markdown/inlinepatterns.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@ def attributeCallback(match):
189189
class Pattern(object):
190190
"""Base class that inline patterns subclass. """
191191

192+
ANCESTOR_EXCLUDES = tuple()
193+
192194
def __init__(self, pattern, markdown_instance=None):
193195
"""
194196
Create an instant of an inline pattern.

markdown/treeprocessors.py

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def __init__(self, md):
5454
self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
5555
self.markdown = md
5656
self.inlinePatterns = md.inlinePatterns
57+
self.ancestors = []
5758

5859
def __makePlaceholder(self, type):
5960
""" Generate a placeholder """
@@ -138,7 +139,7 @@ def __processElementText(self, node, subnode, isText=True):
138139

139140
childResult.reverse()
140141
for newChild in childResult:
141-
node.insert(pos, newChild)
142+
node.insert(pos, newChild[0])
142143

143144
def __processPlaceholders(self, data, parent, isText=True):
144145
"""
@@ -155,10 +156,10 @@ def __processPlaceholders(self, data, parent, isText=True):
155156
def linkText(text):
156157
if text:
157158
if result:
158-
if result[-1].tail:
159-
result[-1].tail += text
159+
if result[-1][0].tail:
160+
result[-1][0].tail += text
160161
else:
161-
result[-1].tail = text
162+
result[-1][0].tail = text
162163
elif not isText:
163164
if parent.tail:
164165
parent.tail += text
@@ -199,7 +200,7 @@ def linkText(text):
199200
continue
200201

201202
strartIndex = phEndIndex
202-
result.append(node)
203+
result.append((node, self.ancestors[:]))
203204

204205
else: # wrong placeholder
205206
end = index + len(self.__placeholder_prefix)
@@ -230,6 +231,11 @@ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
230231
Returns: String with placeholders instead of ElementTree elements.
231232
232233
"""
234+
235+
for exclude in pattern.ANCESTOR_EXCLUDES:
236+
if exclude.lower() in self.ancestors:
237+
return data, False, 0
238+
233239
match = pattern.getCompiledRegExp().match(data[startIndex:])
234240
leftData = data[:startIndex]
235241

@@ -247,9 +253,11 @@ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
247253
for child in [node] + list(node):
248254
if not isString(node):
249255
if child.text:
256+
self.ancestors.append(child.tag.lower())
250257
child.text = self.__handleInline(
251258
child.text, patternIndex + 1
252259
)
260+
self.ancestors.pop()
253261
if child.tail:
254262
child.tail = self.__handleInline(
255263
child.tail, patternIndex
@@ -261,7 +269,17 @@ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
261269
match.group(1),
262270
placeholder, match.groups()[-1]), True, 0
263271

264-
def run(self, tree):
272+
def __build_ancestors(self, parent, parents):
273+
"""Build the ancestor list."""
274+
ancestors = []
275+
while parent:
276+
if parent:
277+
ancestors.append(parent.tag.lower())
278+
parent = self.parent_map.get(parent)
279+
ancestors.reverse()
280+
parents.extend(ancestors)
281+
282+
def run(self, tree, ancestors=None):
265283
"""Apply inline patterns to a parsed Markdown tree.
266284
267285
Iterate over ElementTree, find elements with inline tag, apply inline
@@ -274,28 +292,42 @@ def run(self, tree):
274292
Arguments:
275293
276294
* tree: ElementTree object, representing Markdown tree.
295+
* ancestors: List of parent tag names that preceed the tree node (if needed).
277296
278297
Returns: ElementTree object with applied inline patterns.
279298
280299
"""
281300
self.stashed_nodes = {}
282301

283-
stack = [tree]
302+
# Ensure a valid parent list, but copy passed in lists
303+
# to ensure we don't have the user accidentally change it on us.
304+
tree_parents = [] if ancestors is None else ancestors[:]
305+
306+
self.parent_map = dict((c, p) for p in tree.getiterator() for c in p)
307+
stack = [(tree, tree_parents)]
284308

285309
while stack:
286-
currElement = stack.pop()
310+
currElement, parents = stack.pop()
311+
312+
self.ancestors = parents
313+
self.__build_ancestors(currElement, self.ancestors)
314+
287315
insertQueue = []
288316
for child in currElement:
289317
if child.text and not isinstance(
290318
child.text, util.AtomicString
291319
):
320+
self.ancestors.append(child.tag.lower())
292321
text = child.text
293322
child.text = None
294323
lst = self.__processPlaceholders(
295324
self.__handleInline(text), child
296325
)
326+
for l in lst:
327+
self.parent_map[l[0]] = child
297328
stack += lst
298329
insertQueue.append((child, lst))
330+
self.ancestors.pop()
299331
if child.tail:
300332
tail = self.__handleInline(child.tail)
301333
dumby = util.etree.Element('d')
@@ -306,9 +338,11 @@ def run(self, tree):
306338
pos = list(currElement).index(child) + 1
307339
tailResult.reverse()
308340
for newChild in tailResult:
309-
currElement.insert(pos, newChild)
341+
self.parent_map[newChild[0]] = currElement
342+
currElement.insert(pos, newChild[0])
310343
if len(child):
311-
stack.append(child)
344+
self.parent_map[child] = currElement
345+
stack.append((child, self.ancestors[:]))
312346

313347
for element, lst in insertQueue:
314348
if self.markdown.enable_attributes:
@@ -317,7 +351,8 @@ def run(self, tree):
317351
element.text, element
318352
)
319353
i = 0
320-
for newChild in lst:
354+
for obj in lst:
355+
newChild = obj[0]
321356
if self.markdown.enable_attributes:
322357
# Processing attributes
323358
if newChild.tail and isString(newChild.tail):

tests/test_apis.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -770,3 +770,55 @@ def testAppend(self):
770770
self.assertEqual('|' in md.ESCAPED_CHARS, True)
771771
md2 = markdown.Markdown()
772772
self.assertEqual('|' not in md2.ESCAPED_CHARS, True)
773+
774+
775+
class TestAncestorExclusion(unittest.TestCase):
776+
""" Tests exclusion of tags in ancestor list. """
777+
778+
class AncestorExample(markdown.inlinepatterns.SimpleTagPattern):
779+
""" Ancestor Test. """
780+
781+
ANCESTOR_EXCLUDES = ('a',)
782+
783+
def handleMatch(self, m):
784+
""" Handle match. """
785+
el = markdown.util.etree.Element(self.tag)
786+
el.text = m.group(3)
787+
return el
788+
789+
class AncestorExtension(markdown.Extension):
790+
791+
def __init__(self, *args, **kwargs):
792+
"""Initialize."""
793+
794+
self.config = {}
795+
796+
def extendMarkdown(self, md, md_globals):
797+
"""Modify inline patterns."""
798+
799+
pattern = r'(\+)([^\+]+)\2'
800+
md.inlinePatterns["ancestor-test"] = TestAncestorExclusion.AncestorExample(pattern, 'strong')
801+
802+
def setUp(self):
803+
"""Setup markdown object."""
804+
self.md = markdown.Markdown(extensions=[TestAncestorExclusion.AncestorExtension()])
805+
806+
def test_ancestors(self):
807+
""" Test that an extension can exclude parent tags. """
808+
test = """
809+
Some +test+ and a [+link+](http://test.com)
810+
"""
811+
result = """<p>Some <strong>test</strong> and a <a href="http://test.com">+link+</a></p>"""
812+
813+
self.md.reset()
814+
self.assertEqual(self.md.convert(test), result)
815+
816+
def test_ancestors_tail(self):
817+
""" Test that an extension can exclude parent tags when dealing with a tail. """
818+
test = """
819+
[***+em+*+strong+**](http://test.com)
820+
"""
821+
result = """<p><a href="http://test.com"><strong><em>+em+</em>+strong+</strong></a></p>"""
822+
823+
self.md.reset()
824+
self.assertEqual(self.md.convert(test), result)

0 commit comments

Comments
 (0)