Skip to content

Commit 2656689

Browse files
committed
Merge branch 'develop'
2 parents 47856cd + 13183f9 commit 2656689

File tree

4 files changed

+71
-18
lines changed

4 files changed

+71
-18
lines changed

markdownify/__init__.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def should_remove_whitespace_inside(el):
106106
return el.name in ('p', 'blockquote',
107107
'article', 'div', 'section',
108108
'ol', 'ul', 'li',
109+
'dl', 'dt', 'dd',
109110
'table', 'thead', 'tbody', 'tfoot',
110111
'tr', 'td', 'th')
111112

@@ -442,7 +443,7 @@ def _indent_for_blockquote(match):
442443

443444
def convert_br(self, el, text, parent_tags):
444445
if '_inline' in parent_tags:
445-
return ""
446+
return ' '
446447

447448
if self.options['newline_style'].lower() == BACKSLASH:
448449
return '\\\n'
@@ -489,6 +490,11 @@ def _indent_for_dd(match):
489490

490491
return '%s\n' % text
491492

493+
# definition lists are formatted as follows:
494+
# https://pandoc.org/MANUAL.html#definition-lists
495+
# https://michelf.ca/projects/php-markdown/extra/#def-list
496+
convert_dl = convert_div
497+
492498
def convert_dt(self, el, text, parent_tags):
493499
# remove newlines from term text
494500
text = (text or '').strip()
@@ -501,7 +507,7 @@ def convert_dt(self, el, text, parent_tags):
501507
# TODO - format consecutive <dt> elements as directly adjacent lines):
502508
# https://michelf.ca/projects/php-markdown/extra/#def-list
503509

504-
return '\n%s\n' % text
510+
return '\n\n%s\n' % text
505511

506512
def _convert_hn(self, n, el, text, parent_tags):
507513
""" Method name prefixed with _ to prevent <hn> to call this """
@@ -538,6 +544,24 @@ def convert_img(self, el, text, parent_tags):
538544

539545
return '![%s](%s%s)' % (alt, src, title_part)
540546

547+
def convert_video(self, el, text, parent_tags):
548+
if ('_inline' in parent_tags
549+
and el.parent.name not in self.options['keep_inline_images_in']):
550+
return text
551+
src = el.attrs.get('src', None) or ''
552+
if not src:
553+
sources = el.find_all('source', attrs={'src': True})
554+
if sources:
555+
src = sources[0].attrs.get('src', None) or ''
556+
poster = el.attrs.get('poster', None) or ''
557+
if src and poster:
558+
return '[![%s](%s)](%s)' % (text, poster, src)
559+
if src:
560+
return '[%s](%s)' % (text, src)
561+
if poster:
562+
return '![%s](%s)' % (text, poster)
563+
return text
564+
541565
def convert_list(self, el, text, parent_tags):
542566

543567
# Converting a list to inline is undefined.
@@ -677,6 +701,12 @@ def convert_tr(self, el, text, parent_tags):
677701
)
678702
overline = ''
679703
underline = ''
704+
full_colspan = 0
705+
for cell in cells:
706+
if 'colspan' in cell.attrs and cell['colspan'].isdigit():
707+
full_colspan += int(cell["colspan"])
708+
else:
709+
full_colspan += 1
680710
if ((is_headrow
681711
or (is_head_row_missing
682712
and self.options['table_infer_header']))
@@ -685,12 +715,6 @@ def convert_tr(self, el, text, parent_tags):
685715
# - is headline or
686716
# - headline is missing and header inference is enabled
687717
# print headline underline
688-
full_colspan = 0
689-
for cell in cells:
690-
if 'colspan' in cell.attrs and cell['colspan'].isdigit():
691-
full_colspan += int(cell["colspan"])
692-
else:
693-
full_colspan += 1
694718
underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
695719
elif ((is_head_row_missing
696720
and not self.options['table_infer_header'])
@@ -703,8 +727,8 @@ def convert_tr(self, el, text, parent_tags):
703727
# - the parent is table or
704728
# - the parent is tbody at the beginning of a table.
705729
# print empty headline above this row
706-
overline += '| ' + ' | '.join([''] * len(cells)) + ' |' + '\n'
707-
overline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n'
730+
overline += '| ' + ' | '.join([''] * full_colspan) + ' |' + '\n'
731+
overline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
708732
return overline + '|' + text + '\n' + underline
709733

710734

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "markdownify"
7-
version = "1.0.0"
7+
version = "1.1.0"
88
authors = [{name = "Matthew Tretter", email = "[email protected]"}]
99
description = "Convert HTML to markdown."
1010
readme = "README.rst"

tests/test_conversions.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ def test_blockquote_nested():
7979
def test_br():
8080
assert md('a<br />b<br />c') == 'a \nb \nc'
8181
assert md('a<br />b<br />c', newline_style=BACKSLASH) == 'a\\\nb\\\nc'
82+
assert md('<h1>foo<br />bar</h1>', heading_style=ATX) == '\n\n# foo bar\n\n'
83+
assert md('<td>foo<br />bar</td>', heading_style=ATX) == ' foo bar |'
8284

8385

8486
def test_code():
@@ -102,13 +104,13 @@ def test_code():
102104

103105

104106
def test_dl():
105-
assert md('<dl><dt>term</dt><dd>definition</dd></dl>') == '\nterm\n: definition\n'
106-
assert md('<dl><dt><p>te</p><p>rm</p></dt><dd>definition</dd></dl>') == '\nte rm\n: definition\n'
107-
assert md('<dl><dt>term</dt><dd><p>definition-p1</p><p>definition-p2</p></dd></dl>') == '\nterm\n: definition-p1\n\n definition-p2\n'
108-
assert md('<dl><dt>term</dt><dd><p>definition 1</p></dd><dd><p>definition 2</p></dd></dl>') == '\nterm\n: definition 1\n: definition 2\n'
109-
assert md('<dl><dt>term 1</dt><dd>definition 1</dd><dt>term 2</dt><dd>definition 2</dd></dl>') == '\nterm 1\n: definition 1\nterm 2\n: definition 2\n'
110-
assert md('<dl><dt>term</dt><dd><blockquote><p>line 1</p><p>line 2</p></blockquote></dd></dl>') == '\nterm\n: > line 1\n >\n > line 2\n'
111-
assert md('<dl><dt>term</dt><dd><ol><li><p>1</p><ul><li>2a</li><li>2b</li></ul></li><li><p>3</p></li></ol></dd></dl>') == '\nterm\n: 1. 1\n\n * 2a\n * 2b\n 2. 3\n'
107+
assert md('<dl><dt>term</dt><dd>definition</dd></dl>') == '\n\nterm\n: definition\n\n'
108+
assert md('<dl><dt><p>te</p><p>rm</p></dt><dd>definition</dd></dl>') == '\n\nte rm\n: definition\n\n'
109+
assert md('<dl><dt>term</dt><dd><p>definition-p1</p><p>definition-p2</p></dd></dl>') == '\n\nterm\n: definition-p1\n\n definition-p2\n\n'
110+
assert md('<dl><dt>term</dt><dd><p>definition 1</p></dd><dd><p>definition 2</p></dd></dl>') == '\n\nterm\n: definition 1\n: definition 2\n\n'
111+
assert md('<dl><dt>term 1</dt><dd>definition 1</dd><dt>term 2</dt><dd>definition 2</dd></dl>') == '\n\nterm 1\n: definition 1\n\nterm 2\n: definition 2\n\n'
112+
assert md('<dl><dt>term</dt><dd><blockquote><p>line 1</p><p>line 2</p></blockquote></dd></dl>') == '\n\nterm\n: > line 1\n >\n > line 2\n\n'
113+
assert md('<dl><dt>term</dt><dd><ol><li><p>1</p><ul><li>2a</li><li>2b</li></ul></li><li><p>3</p></li></ol></dd></dl>') == '\n\nterm\n: 1. 1\n\n * 2a\n * 2b\n 2. 3\n\n'
112114

113115

114116
def test_del():
@@ -243,6 +245,14 @@ def test_img():
243245
assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == '![Alt text](/path/to/img.jpg)'
244246

245247

248+
def test_video():
249+
assert md('<video src="/path/to/video.mp4" poster="/path/to/img.jpg">text</video>') == '[![text](/path/to/img.jpg)](/path/to/video.mp4)'
250+
assert md('<video src="/path/to/video.mp4">text</video>') == '[text](/path/to/video.mp4)'
251+
assert md('<video><source src="/path/to/video.mp4"/>text</video>') == '[text](/path/to/video.mp4)'
252+
assert md('<video poster="/path/to/img.jpg">text</video>') == '![text](/path/to/img.jpg)'
253+
assert md('<video>text</video>') == 'text'
254+
255+
246256
def test_kbd():
247257
inline_tests('kbd', '`')
248258

tests/test_tables.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,23 @@
267267
</tr>
268268
</table>"""
269269

270+
table_with_colspan_missing_head = """<table>
271+
<tr>
272+
<td colspan="2">Name</td>
273+
<td>Age</td>
274+
</tr>
275+
<tr>
276+
<td>Jill</td>
277+
<td>Smith</td>
278+
<td>50</td>
279+
</tr>
280+
<tr>
281+
<td>Eve</td>
282+
<td>Jackson</td>
283+
<td>94</td>
284+
</tr>
285+
</table>"""
286+
270287

271288
def test_table():
272289
assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
@@ -283,6 +300,7 @@ def test_table():
283300
assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n'
284301
assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
285302
assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
303+
assert md(table_with_colspan_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Name | | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
286304

287305

288306
def test_table_infer_header():
@@ -300,3 +318,4 @@ def test_table_infer_header():
300318
assert md(table_with_caption, table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
301319
assert md(table_with_colspan, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
302320
assert md(table_with_undefined_colspan, table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
321+
assert md(table_with_colspan_missing_head, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'

0 commit comments

Comments
 (0)