Skip to content

Commit e07d686

Browse files
committed
Merge pull request #30 from OpenScienceFramework/issue_30
Do cleanup on insert/delete/sdt tags
2 parents c8649fc + ef28c4a commit e07d686

File tree

3 files changed

+19
-14
lines changed

3 files changed

+19
-14
lines changed

pydocx/DocxParser.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -576,12 +576,22 @@ def parse_p(self, el, text):
576576
def _should_append_break_tag(self, next_el):
577577
paragraph_like_tags = [
578578
'p',
579-
'sdt',
579+
]
580+
inline_like_tags = [
581+
'smartTag',
582+
'ins',
583+
'delText',
580584
]
581585
if next_el.is_list_item:
582586
return False
583587
if next_el.previous is None:
584588
return False
589+
tag_is_inline_like = any(
590+
next_el.has_descendant_with_tag(tag) for
591+
tag in inline_like_tags
592+
)
593+
if tag_is_inline_like:
594+
return False
585595
if next_el.previous.is_last_list_item_in_root:
586596
return False
587597
if next_el.previous.tag not in paragraph_like_tags:

pydocx/parsers/Docx2Html.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,7 @@ def heading(self, text, heading_value):
5454

5555
def insertion(self, text, author, date):
5656
return (
57-
"<span class='pydocx-insert' author='%(author)s' "
58-
"date='%(date)s'>%(text)s</span>"
57+
"<span class='pydocx-insert'>%(text)s</span>"
5958
) % {
6059
'author': author,
6160
'date': date,
@@ -88,8 +87,7 @@ def image(self, path, x, y):
8887

8988
def deletion(self, text, author, date):
9089
return (
91-
"<span class='pydocx-delete' author='%(author)s' "
92-
"date='%(date)s'>%(text)s</span>"
90+
"<span class='pydocx-delete'>%(text)s</span>"
9391
) % {
9492
'author': author,
9593
'date': date,

pydocx/tests/test_xml.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ def test_performance(self):
641641

642642
class NonStandardTextTagsTestCase(_TranslationTestCase):
643643
expected_output = '''
644-
<p><span class='pydocx-insert' author='' date=''>insert </span>
644+
<p><span class='pydocx-insert'>insert </span>
645645
smarttag</p>
646646
'''
647647

@@ -674,8 +674,8 @@ def get_xml(self):
674674
class DeleteTagInList(_TranslationTestCase):
675675
expected_output = '''
676676
<ol list-style-type="decimal">
677-
<li>AAA<br />
678-
<span class='pydocx-delete' author='' date=''>BBB</span>
677+
<li>AAA
678+
<span class='pydocx-delete'>BBB</span>
679679
</li>
680680
<li>CCC</li>
681681
</ol>
@@ -696,8 +696,7 @@ def get_xml(self):
696696
class InsertTagInList(_TranslationTestCase):
697697
expected_output = '''
698698
<ol list-style-type="decimal">
699-
<li>AAA<br />
700-
<span class='pydocx-insert' author='' date=''>BBB</span>
699+
<li>AAA<span class='pydocx-insert'>BBB</span>
701700
</li>
702701
<li>CCC</li>
703702
</ol>
@@ -719,8 +718,7 @@ def get_xml(self):
719718
class SmartTagInList(_TranslationTestCase):
720719
expected_output = '''
721720
<ol list-style-type="decimal">
722-
<li>AAA<br />
723-
BBB
721+
<li>AAABBB
724722
</li>
725723
<li>CCC</li>
726724
</ol>
@@ -875,8 +873,7 @@ def get_xml(self):
875873
class SDTTestCase(_TranslationTestCase):
876874
expected_output = '''
877875
<ol list-style-type="decimal">
878-
<li>AAA<br />
879-
BBB
876+
<li>AAABBB
880877
</li>
881878
<li>CCC</li>
882879
</ol>

0 commit comments

Comments
 (0)