Skip to content

Commit 856dc6b

Browse files
committed
Merge pull request #28 from OpenScienceFramework/issue_28
Stop using deprecated tags
2 parents 286e7c4 + b5911f8 commit 856dc6b

File tree

4 files changed

+41
-27
lines changed

4 files changed

+41
-27
lines changed

README.md

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,7 @@ DocxParser includes abstracts methods that each parser overwrites to satsify its
113113

114114
@abstractmethod
115115
def table(self, text):
116-
return text
117-
116+
return text
118117
@abstractmethod
119118
def table_row(self, text):
120119
return text
@@ -161,4 +160,16 @@ OR, let's say FOO is your new favorite markup language. Simply customize your ow
161160

162161
def linebreak(self):
163162
return '!!!!!!!!!!!!' # because linebreaks in are denoted by '!!!!!!!!!!!!'
164-
# with the FOO markup langauge :)
163+
# with the FOO markup langauge :)
164+
165+
#Styles
166+
167+
The base parser `Docx2Html` relies on certain css class being set for certain behaviour to occur. Currently these include:
168+
169+
* class `insert` -> Turns the text green.
170+
* class `delete` -> Turns the text red and draws a line through the text.
171+
* class `center` -> Aligns the text to the center.
172+
* class `right` -> Aligns the text to the right.
173+
* class `left` -> Aligns the text to the left.
174+
* class `comment` -> Turns the text blue.
175+
* class `pydocx-underline` -> Underlines the text.

pydocx/parsers/Docx2Html.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@ class Docx2Html(DocxParser):
99
@property
1010
def parsed(self):
1111
content = self._parsed
12-
content = content.replace('<p></p><p></p>', '<br />')
13-
content = content.replace('</p><br /><p>', '</p><p>')
14-
content = content.replace('</p><br /><ul>', '</p><ul>')
1512
content = "<html>%(head)s<body>%(content)s</body></html>" % {
1613
'head': self.head(),
1714
'content': content,
@@ -28,6 +25,7 @@ def style(self):
2825
{{color:red; text-decoration:line-through}}.center
2926
{{text-align:center}}.right{{text-align:right}}
3027
.left{{text-align:left}} .comment{{color:blue}}
28+
.pydocx-underline {text-decoration: underline;}
3129
body{{width:%(width)spx; margin:0px auto;
3230
}}</style>''') % {
3331
'width': (self.page_width * (4 / 3)),
@@ -109,13 +107,13 @@ def unordered_list(self, text):
109107
}
110108

111109
def bold(self, text):
112-
return '<b>' + text + '</b>'
110+
return '<strong>' + text + '</strong>'
113111

114112
def italics(self, text):
115-
return '<i>' + text + '</i>'
113+
return '<em>' + text + '</em>'
116114

117115
def underline(self, text):
118-
return '<u>' + text + '</u>'
116+
return '<span class="pydocx-underline">' + text + '</span>'
119117

120118
def tab(self):
121119
# Insert before the text right?? So got the text and just do an insert
@@ -142,7 +140,7 @@ def table_cell(self, text, col='', row=''):
142140
}
143141

144142
def page_break(self):
145-
return '<hr>'
143+
return '<hr />'
146144

147145
def indent(self, text, just='', firstLine='', left='', right=''):
148146
slug = '<div'
@@ -167,4 +165,4 @@ def indent(self, text, just='', firstLine='', left='', right=''):
167165
}
168166

169167
def break_tag(self):
170-
return '<br/>'
168+
return '<br />'

pydocx/tests/test_docx.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,13 @@ def test_inline_tags():
143143
'inline_tags.docx',
144144
)
145145
actual_html = convert(file_path)
146-
assert_html_equal(actual_html, '''
147-
<html><body><p>This sentence has some <b>bold</b>, some <i>italics</i> and some <u>underline</u>, as well as a <a href="http://www.google.com/">hyperlink</a>.</p></body></html>''') # noqa
146+
assert_html_equal(actual_html, (
147+
'<html><body><p>This sentence has some <strong>bold</strong>, '
148+
'some <em>italics</em> and some '
149+
'<span class="pydocx-underline">underline</span>, '
150+
'as well as a <a href="http://www.google.com/">hyperlink</a>'
151+
'.</p></body></html>'
152+
))
148153

149154

150155
def test_unicode():
@@ -639,16 +644,16 @@ def test_shift_enter():
639644
actual_html = convert(file_path)
640645
assert_html_equal(actual_html, '''
641646
<html><body>
642-
<p>AAA<br/>BBB</p>
647+
<p>AAA<br />BBB</p>
643648
<p>CCC</p>
644649
<ol data-list-type="decimal">
645-
<li>DDD<br/>EEE</li>
650+
<li>DDD<br />EEE</li>
646651
<li>FFF</li>
647652
</ol>
648653
<table>
649654
<tr>
650-
<td>GGG<br/>HHH</td>
651-
<td>III<br/>JJJ</td>
655+
<td>GGG<br />HHH</td>
656+
<td>III<br />JJJ</td>
652657
</tr>
653658
<tr>
654659
<td>KKK</td>
@@ -767,7 +772,7 @@ def test_simple_table():
767772
assert_html_equal(actual_html, '''
768773
<html><body>
769774
<table>
770-
<tr><td>Cell1<br/>Cell3</td><td>Cell2<br/>
775+
<tr><td>Cell1<br />Cell3</td><td>Cell2<br />
771776
And I am writing in the table</td></tr>
772777
<tr><td></td><td>Cell4</td></tr>
773778
</table>

pydocx/tests/test_xml.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
class BoldTestCase(_TranslationTestCase):
1717
expected_output = """
1818
<html><body>
19-
<p><b>AAA</b></p>
19+
<p><strong>AAA</strong></p>
2020
<p>BBB</p>
2121
</body></html>
2222
"""
@@ -121,7 +121,7 @@ class HyperlinkWithBreakTestCase(_TranslationTestCase):
121121

122122
expected_output = '''
123123
<html><body>
124-
<p><a href="www.google.com">link<br/></a></p>
124+
<p><a href="www.google.com">link<br /></a></p>
125125
</body></html>
126126
'''
127127

@@ -382,7 +382,7 @@ class TableWithListAndParagraph(_TranslationTestCase):
382382
<li>AAA</li>
383383
<li>BBB</li>
384384
</ol>
385-
CCC<br/>
385+
CCC<br />
386386
DDD
387387
</td>
388388
</tr>
@@ -478,7 +478,7 @@ class ListWithContinuationTestCase(_TranslationTestCase):
478478
expected_output = '''
479479
<html><body>
480480
<ol data-list-type="decimal">
481-
<li>AAA<br/>BBB</li>
481+
<li>AAA<br />BBB</li>
482482
<li>CCC
483483
<table>
484484
<tr>
@@ -722,7 +722,7 @@ class DeleteTagInList(_TranslationTestCase):
722722
expected_output = '''
723723
<html><body>
724724
<ol data-list-type="decimal">
725-
<li>AAA<br/>
725+
<li>AAA<br />
726726
<span class='delete' author='' date=''>BBB</span>
727727
</li>
728728
<li>CCC</li>
@@ -746,7 +746,7 @@ class InsertTagInList(_TranslationTestCase):
746746
expected_output = '''
747747
<html><body>
748748
<ol data-list-type="decimal">
749-
<li>AAA<br/>
749+
<li>AAA<br />
750750
<span class='insert' author='' date=''>BBB</span>
751751
</li>
752752
<li>CCC</li>
@@ -771,7 +771,7 @@ class SmartTagInList(_TranslationTestCase):
771771
expected_output = '''
772772
<html><body>
773773
<ol data-list-type="decimal">
774-
<li>AAA<br/>
774+
<li>AAA<br />
775775
BBB
776776
</li>
777777
<li>CCC</li>
@@ -850,7 +850,7 @@ class MissingIlvl(_TranslationTestCase):
850850
expected_output = '''
851851
<html><body>
852852
<ol data-list-type="decimal">
853-
<li>AAA<br/>
853+
<li>AAA<br />
854854
BBB
855855
</li>
856856
<li>CCC</li>
@@ -923,7 +923,7 @@ class SDTTestCase(_TranslationTestCase):
923923
expected_output = '''
924924
<html><body>
925925
<ol data-list-type="decimal">
926-
<li>AAA<br/>
926+
<li>AAA<br />
927927
BBB
928928
</li>
929929
<li>CCC</li>

0 commit comments

Comments
 (0)