From 01fe37dc45c661154e36a394406dfca58d139da0 Mon Sep 17 00:00:00 2001
From: chrispy
Date: Sun, 26 Jan 2025 21:27:09 -0500
Subject: [PATCH 1/3] remove superfluous leading/trailing whitespace
Signed-off-by: chrispy
---
markdownify/__init__.py | 12 ++-
tests/test_advanced.py | 2 +-
tests/test_conversions.py | 181 +++++++++++++++++++-------------------
tests/test_lists.py | 28 +++---
tests/test_tables.py | 57 ++++++------
5 files changed, 144 insertions(+), 136 deletions(-)
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index ef4e7ca..10ff153 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -180,7 +180,10 @@ def process_tag(self, node, convert_as_inline):
return text
def convert__document_(self, el, text, convert_as_inline):
- # for BeautifulSoup objects (where node.name == "[document]"), return content results as-is
+ """Final document-level formatting for BeautifulSoup object (node.name == "[document]")"""
+ # remove all leading newlines
+ text = text.lstrip('\n')
+
return text
def process_text(self, el):
@@ -454,6 +457,7 @@ def _indent_for_li(match):
def convert_p(self, el, text, convert_as_inline):
if convert_as_inline:
return ' ' + text.strip() + ' '
+ text = text.strip()
if self.options['wrap']:
# Preserve newlines (and preceding whitespace) resulting
# from tags. Newlines in the input have already been
@@ -500,13 +504,13 @@ def convert_style(self, el, text, convert_as_inline):
convert_sup = abstract_inline_conversion(lambda self: self.options['sup_symbol'])
def convert_table(self, el, text, convert_as_inline):
- return '\n\n' + text + '\n'
+ return '\n\n' + text.strip() + '\n\n'
def convert_caption(self, el, text, convert_as_inline):
- return text + '\n\n'
+ return text.strip() + '\n\n'
def convert_figcaption(self, el, text, convert_as_inline):
- return '\n\n' + text + '\n\n'
+ return '\n\n' + text.strip() + '\n\n'
def convert_td(self, el, text, convert_as_inline):
colspan = 1
diff --git a/tests/test_advanced.py b/tests/test_advanced.py
index a3a5fda..14bf3cd 100644
--- a/tests/test_advanced.py
+++ b/tests/test_advanced.py
@@ -14,7 +14,7 @@ def test_chomp():
def test_nested():
text = md('This is an example link .
')
- assert text == '\n\nThis is an [example link](http://example.com/).\n\n'
+ assert text == 'This is an [example link](http://example.com/).\n\n'
def test_ignore_comments():
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 05c6cd4..4df024e 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -41,7 +41,7 @@ def test_a_no_autolinks():
def test_a_in_code():
assert md('Google
') == '`Google`'
- assert md('Google ') == '\n\n```\nGoogle\n```\n\n'
+ assert md('Google ') == '```\nGoogle\n```\n\n'
def test_b():
@@ -56,22 +56,22 @@ def test_b_spaces():
def test_blockquote():
- assert md('Hello ') == '\n> Hello\n\n'
- assert md('\nHello\n ') == '\n> Hello\n\n'
+ assert md('Hello ') == '> Hello\n\n'
+ assert md('\nHello\n ') == '> Hello\n\n'
def test_blockquote_with_nested_paragraph():
- assert md('Hello
') == '\n> Hello\n\n'
- assert md('Hello
Hello again
') == '\n> Hello\n>\n> Hello again\n\n'
+ assert md('Hello
') == '> Hello\n\n'
+ assert md('Hello
Hello again
') == '> Hello\n>\n> Hello again\n\n'
def test_blockquote_with_paragraph():
- assert md('Hello handsome
') == '\n> Hello\n\nhandsome\n\n'
+ assert md('Hello handsome
') == '> Hello\n\nhandsome\n\n'
def test_blockquote_nested():
text = md('And she was like Hello ')
- assert text == '\n> And she was like\n> > Hello\n\n'
+ assert text == '> And she was like\n> > Hello\n\n'
def test_br():
@@ -79,11 +79,6 @@ def test_br():
assert md('a b c', newline_style=BACKSLASH) == 'a\\\nb\\\nc'
-def test_caption():
- assert md('TEXTCaption SPAN ') == 'TEXT\n\nCaption\n\nSPAN'
- assert md('SPAN Caption TEXT') == 'SPAN\n\nCaption\n\nTEXT'
-
-
def test_code():
inline_tests('code', '`')
assert md('*this_should_not_escape*
') == '`*this_should_not_escape*`'
@@ -105,13 +100,13 @@ def test_code():
def test_dl():
- assert md('term definition ') == '\nterm\n: definition\n'
- assert md('te
rm
definition ') == '\nte rm\n: definition\n'
- assert md('term definition-p1
definition-p2
') == '\nterm\n: definition-p1\n\n definition-p2\n'
- assert md('term definition 1
definition 2
') == '\nterm\n: definition 1\n: definition 2\n'
- assert md('term 1 definition 1 term 2 definition 2 ') == '\nterm 1\n: definition 1\nterm 2\n: definition 2\n'
- assert md('term line 1
line 2
') == '\nterm\n: > line 1\n >\n > line 2\n'
- assert md('term 1
3
') == '\nterm\n: 1. 1\n\n * 2a\n * 2b\n 2. 3\n'
+ assert md('term definition ') == 'term\n: definition\n'
+ assert md('te
rm
definition ') == 'te rm\n: definition\n'
+ assert md('term definition-p1
definition-p2
') == 'term\n: definition-p1\n\n definition-p2\n'
+ assert md('term definition 1
definition 2
') == 'term\n: definition 1\n: definition 2\n'
+ assert md('term 1 definition 1 term 2 definition 2 ') == 'term 1\n: definition 1\nterm 2\n: definition 2\n'
+ assert md('term line 1
line 2
') == 'term\n: > line 1\n >\n > line 2\n'
+ assert md('term 1
3
') == 'term\n: 1. 1\n\n * 2a\n * 2b\n 2. 3\n'
def test_del():
@@ -126,42 +121,47 @@ def test_em():
inline_tests('em', '*')
+def test_figcaption():
+ assert (md("TEXT\nCaption\n SPAN ") == "TEXT\n\nCaption\n\nSPAN")
+ assert (md("SPAN \nCaption\n TEXT") == "SPAN\n\nCaption\n\nTEXT")
+
+
def test_header_with_space():
- assert md('\n\nHello ') == '\n\n### Hello\n\n'
- assert md('Hello\n\n\nWorld ') == '\n\n### Hello World\n\n'
- assert md('\n\nHello ') == '\n\n#### Hello\n\n'
- assert md('\n\nHello ') == '\n\n##### Hello\n\n'
- assert md('\n\nHello\n\n ') == '\n\n##### Hello\n\n'
- assert md('\n\nHello \n\n ') == '\n\n##### Hello\n\n'
+ assert md('\n\nHello ') == '### Hello\n\n'
+ assert md('Hello\n\n\nWorld ') == '### Hello World\n\n'
+ assert md('\n\nHello ') == '#### Hello\n\n'
+ assert md('\n\nHello ') == '##### Hello\n\n'
+ assert md('\n\nHello\n\n ') == '##### Hello\n\n'
+ assert md('\n\nHello \n\n ') == '##### Hello\n\n'
def test_h1():
- assert md('Hello ') == '\n\nHello\n=====\n\n'
+ assert md('Hello ') == 'Hello\n=====\n\n'
def test_h2():
- assert md('Hello ') == '\n\nHello\n-----\n\n'
+ assert md('Hello ') == 'Hello\n-----\n\n'
def test_hn():
- assert md('Hello ') == '\n\n### Hello\n\n'
- assert md('Hello ') == '\n\n#### Hello\n\n'
- assert md('Hello ') == '\n\n##### Hello\n\n'
- assert md('Hello ') == '\n\n###### Hello\n\n'
+ assert md('Hello ') == '### Hello\n\n'
+ assert md('Hello ') == '#### Hello\n\n'
+ assert md('Hello ') == '##### Hello\n\n'
+ assert md('Hello ') == '###### Hello\n\n'
assert md('Hello ') == md('Hello ')
assert md('Hello ') == md('Hello')
def test_hn_chained():
- assert md('First \nSecond \nThird ', heading_style=ATX) == '\n\n# First\n\n## Second\n\n### Third\n\n'
+ assert md('First \nSecond \nThird ', heading_style=ATX) == '# First\n\n## Second\n\n### Third\n\n'
assert md('XFirst ', heading_style=ATX) == 'X\n\n# First\n\n'
assert md('XFirst ', heading_style=ATX_CLOSED) == 'X\n\n# First #\n\n'
assert md('XFirst ') == 'X\n\nFirst\n=====\n\n'
def test_hn_nested_tag_heading_style():
- assert md('A P
C ', heading_style=ATX_CLOSED) == '\n\n# A P C #\n\n'
- assert md('A P
C ', heading_style=ATX) == '\n\n# A P C\n\n'
+ assert md('A P
C ', heading_style=ATX_CLOSED) == '# A P C #\n\n'
+ assert md('A P
C ', heading_style=ATX) == '# A P C\n\n'
def test_hn_nested_simple_tag():
@@ -177,9 +177,9 @@ def test_hn_nested_simple_tag():
]
for tag, markdown in tag_to_markdown:
- assert md('A <' + tag + '>' + tag + '' + tag + '> B ') == '\n\n### A ' + markdown + ' B\n\n'
+ assert md('A <' + tag + '>' + tag + '' + tag + '> B ') == '### A ' + markdown + ' B\n\n'
- assert md('A B ', heading_style=ATX) == '\n\n### A B\n\n'
+ assert md('A B ', heading_style=ATX) == '### A B\n\n'
# Nested lists not supported
# assert md('A ', heading_style=ATX) == '\n### A li1 li2 B\n\n'
@@ -192,23 +192,23 @@ def test_hn_nested_img():
("alt='Alt Text' title='Optional title'", "Alt Text", " \"Optional title\""),
]
for image_attributes, markdown, title in image_attributes_to_markdown:
- assert md('A B ') == '\n\n### A' + (' ' + markdown + ' ' if markdown else ' ') + 'B\n\n'
- assert md('A B ', keep_inline_images_in=['h3']) == '\n\n### A  B\n\n'
+ assert md('A B ') == '### A' + (' ' + markdown + ' ' if markdown else ' ') + 'B\n\n'
+ assert md('A B ', keep_inline_images_in=['h3']) == '### A  B\n\n'
def test_hn_atx_headings():
- assert md('Hello ', heading_style=ATX) == '\n\n# Hello\n\n'
- assert md('Hello ', heading_style=ATX) == '\n\n## Hello\n\n'
+ assert md('Hello ', heading_style=ATX) == '# Hello\n\n'
+ assert md('Hello ', heading_style=ATX) == '## Hello\n\n'
def test_hn_atx_closed_headings():
- assert md('Hello ', heading_style=ATX_CLOSED) == '\n\n# Hello #\n\n'
- assert md('Hello ', heading_style=ATX_CLOSED) == '\n\n## Hello ##\n\n'
+ assert md('Hello ', heading_style=ATX_CLOSED) == '# Hello #\n\n'
+ assert md('Hello ', heading_style=ATX_CLOSED) == '## Hello ##\n\n'
def test_hn_newlines():
- assert md("H1-1 TEXTH2-2 TEXTH1-2 TEXT", heading_style=ATX) == '\n\n# H1-1\n\nTEXT\n\n## H2-2\n\nTEXT\n\n# H1-2\n\nTEXT'
- assert md('H1-1 \nTEXT
\nH2-2 \nTEXT
\nH1-2 \nTEXT
', heading_style=ATX) == '\n\n# H1-1\n\nTEXT\n\n## H2-2\n\nTEXT\n\n# H1-2\n\nTEXT\n\n'
+ assert md("H1-1 TEXTH2-2 TEXTH1-2 TEXT", heading_style=ATX) == '# H1-1\n\nTEXT\n\n## H2-2\n\nTEXT\n\n# H1-2\n\nTEXT'
+ assert md('H1-1 \nTEXT
\nH2-2 \nTEXT
\nH1-2 \nTEXT
', heading_style=ATX) == '# H1-1\n\nTEXT\n\n## H2-2\n\nTEXT\n\n# H1-2\n\nTEXT\n\n'
def test_head():
@@ -218,7 +218,7 @@ def test_head():
def test_hr():
assert md('Hello World') == 'Hello\n\n---\n\nWorld'
assert md('Hello World') == 'Hello\n\n---\n\nWorld'
- assert md('Hello
\n \nWorld
') == '\n\nHello\n\n---\n\nWorld\n\n'
+ assert md('Hello
\n \nWorld
') == 'Hello\n\n---\n\nWorld\n\n'
def test_i():
@@ -235,48 +235,49 @@ def test_kbd():
def test_p():
- assert md('hello
') == '\n\nhello\n\n'
- assert md('123456789 123456789
') == '\n\n123456789 123456789\n\n'
- assert md('123456789\n\n\n123456789
') == '\n\n123456789\n123456789\n\n'
- assert md('123456789\n\n\n123456789
', wrap=True, wrap_width=80) == '\n\n123456789 123456789\n\n'
- assert md('123456789\n\n\n123456789
', wrap=True, wrap_width=None) == '\n\n123456789 123456789\n\n'
- assert md('123456789 123456789
', wrap=True, wrap_width=10) == '\n\n123456789\n123456789\n\n'
- assert md('Some long link
', wrap=True, wrap_width=10) == '\n\n[Some long\nlink](https://example.com)\n\n'
- assert md('12345 67890
', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '\n\n12345\\\n67890\n\n'
- assert md('12345 67890
', wrap=True, wrap_width=50, newline_style=BACKSLASH) == '\n\n12345\\\n67890\n\n'
- assert md('12345 67890
', wrap=True, wrap_width=10, newline_style=SPACES) == '\n\n12345 \n67890\n\n'
- assert md('12345 67890
', wrap=True, wrap_width=50, newline_style=SPACES) == '\n\n12345 \n67890\n\n'
- assert md('12345678901 12345
', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '\n\n12345678901\\\n12345\n\n'
- assert md('12345678901 12345
', wrap=True, wrap_width=50, newline_style=BACKSLASH) == '\n\n12345678901\\\n12345\n\n'
- assert md('12345678901 12345
', wrap=True, wrap_width=10, newline_style=SPACES) == '\n\n12345678901 \n12345\n\n'
- assert md('12345678901 12345
', wrap=True, wrap_width=50, newline_style=SPACES) == '\n\n12345678901 \n12345\n\n'
- assert md('1234 5678 9012 67890
', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '\n\n1234 5678\n9012\\\n67890\n\n'
- assert md('1234 5678 9012 67890
', wrap=True, wrap_width=10, newline_style=SPACES) == '\n\n1234 5678\n9012 \n67890\n\n'
+ assert md('hello
') == 'hello\n\n'
+ assert md("
hello
") == "hello\n\n"
+ assert md('123456789 123456789
') == '123456789 123456789\n\n'
+ assert md('123456789\n\n\n123456789
') == '123456789\n123456789\n\n'
+ assert md('123456789\n\n\n123456789
', wrap=True, wrap_width=80) == '123456789 123456789\n\n'
+ assert md('123456789\n\n\n123456789
', wrap=True, wrap_width=None) == '123456789 123456789\n\n'
+ assert md('123456789 123456789
', wrap=True, wrap_width=10) == '123456789\n123456789\n\n'
+ assert md('Some long link
', wrap=True, wrap_width=10) == '[Some long\nlink](https://example.com)\n\n'
+ assert md('12345 67890
', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '12345\\\n67890\n\n'
+ assert md('12345 67890
', wrap=True, wrap_width=50, newline_style=BACKSLASH) == '12345\\\n67890\n\n'
+ assert md('12345 67890
', wrap=True, wrap_width=10, newline_style=SPACES) == '12345 \n67890\n\n'
+ assert md('12345 67890
', wrap=True, wrap_width=50, newline_style=SPACES) == '12345 \n67890\n\n'
+ assert md('12345678901 12345
', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '12345678901\\\n12345\n\n'
+ assert md('12345678901 12345
', wrap=True, wrap_width=50, newline_style=BACKSLASH) == '12345678901\\\n12345\n\n'
+ assert md('12345678901 12345
', wrap=True, wrap_width=10, newline_style=SPACES) == '12345678901 \n12345\n\n'
+ assert md('12345678901 12345
', wrap=True, wrap_width=50, newline_style=SPACES) == '12345678901 \n12345\n\n'
+ assert md('1234 5678 9012 67890
', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '1234 5678\n9012\\\n67890\n\n'
+ assert md('1234 5678 9012 67890
', wrap=True, wrap_width=10, newline_style=SPACES) == '1234 5678\n9012 \n67890\n\n'
assert md('FirstSecond
Third
Fourth') == 'First\n\nSecond\n\nThird\n\nFourth'
def test_pre():
- assert md('test\n foo\nbar ') == '\n\n```\ntest\n foo\nbar\n```\n\n'
- assert md('test\n foo\nbar
') == '\n\n```\ntest\n foo\nbar\n```\n\n'
- assert md('*this_should_not_escape* ') == '\n\n```\n*this_should_not_escape*\n```\n\n'
- assert md('*this_should_not_escape* ') == '\n\n```\n*this_should_not_escape*\n```\n\n'
- assert md('\t\tthis should\t\tnot normalize ') == '\n\n```\n\t\tthis should\t\tnot normalize\n```\n\n'
- assert md('\t\tthis should\t\tnot normalize ') == '\n\n```\n\t\tthis should\t\tnot normalize\n```\n\n'
- assert md('foo\nbar\n baz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n baz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar \nbaz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\n baz ') == '\n\n```\nfoo\nbaz\n```\n\n'
- assert md('foo\nbar\nbaz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n baz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n
baz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n baz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n baz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n baz ', sup_symbol='^') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n baz ', sub_symbol='^') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n baz ', sub_symbol='^') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('test\n foo\nbar ') == '```\ntest\n foo\nbar\n```\n\n'
+ assert md('test\n foo\nbar
') == '```\ntest\n foo\nbar\n```\n\n'
+ assert md('*this_should_not_escape* ') == '```\n*this_should_not_escape*\n```\n\n'
+ assert md('*this_should_not_escape* ') == '```\n*this_should_not_escape*\n```\n\n'
+ assert md('\t\tthis should\t\tnot normalize ') == '```\n\t\tthis should\t\tnot normalize\n```\n\n'
+ assert md('\t\tthis should\t\tnot normalize ') == '```\n\t\tthis should\t\tnot normalize\n```\n\n'
+ assert md('foo\nbar\n baz ') == '```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n baz ') == '```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar \nbaz ') == '```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\n baz ') == '```\nfoo\nbaz\n```\n\n'
+ assert md('foo\nbar\nbaz ') == '```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n baz ') == '```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n
baz ') == '```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n baz ') == '```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n baz ') == '```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n baz ', sup_symbol='^') == '```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n baz ', sub_symbol='^') == '```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n baz ', sub_symbol='^') == '```\nfoo\nbar\nbaz\n```\n\n'
assert md('foobar baz', sub_symbol='^') == 'foo\n\n```\nbar\n```\n\nbaz'
- assert md("foo
\nbar \nbaz", sub_symbol="^") == "\n\nfoo\n\n```\nbar\n```\n\nbaz"
+ assert md("foo
\nbar \nbaz", sub_symbol="^") == 'foo\n\n```\nbar\n```\n\nbaz'
def test_script():
@@ -319,24 +320,24 @@ def test_sup():
def test_lang():
- assert md('test\n foo\nbar ', code_language='python') == '\n\n```python\ntest\n foo\nbar\n```\n\n'
- assert md('test\n foo\nbar
', code_language='javascript') == '\n\n```javascript\ntest\n foo\nbar\n```\n\n'
+ assert md('test\n foo\nbar ', code_language='python') == '```python\ntest\n foo\nbar\n```\n\n'
+ assert md('test\n foo\nbar
', code_language='javascript') == '```javascript\ntest\n foo\nbar\n```\n\n'
def test_lang_callback():
def callback(el):
return el['class'][0] if el.has_attr('class') else None
- assert md('test\n foo\nbar ', code_language_callback=callback) == '\n\n```python\ntest\n foo\nbar\n```\n\n'
- assert md('test\n foo\nbar
', code_language_callback=callback) == '\n\n```javascript\ntest\n foo\nbar\n```\n\n'
- assert md('test\n foo\nbar
', code_language_callback=callback) == '\n\n```javascript\ntest\n foo\nbar\n```\n\n'
+ assert md('test\n foo\nbar ', code_language_callback=callback) == '```python\ntest\n foo\nbar\n```\n\n'
+ assert md('test\n foo\nbar
', code_language_callback=callback) == '```javascript\ntest\n foo\nbar\n```\n\n'
+ assert md('test\n foo\nbar
', code_language_callback=callback) == '```javascript\ntest\n foo\nbar\n```\n\n'
def test_spaces():
- assert md(' a b
c d
') == '\n\na b\n\nc d\n\n'
- assert md(' a
') == '\n\n*a*\n\n'
+ assert md(' a b
c d
') == 'a b\n\nc d\n\n'
+ assert md(' a
') == '*a*\n\n'
assert md('test again
') == 'test\n\nagain\n\n'
assert md('test text after') == 'test\n> text\n\nafter'
- assert md(' x y ') == '\n\n1. x\n2. y\n'
- assert md(' x y ') == '\n\n* x\n* y\n'
+ assert md(' x y ') == '1. x\n2. y\n'
+ assert md(' x y ') == '* x\n* y\n'
assert md('test foo bar') == 'test\n\n```\n foo \n```\n\nbar'
diff --git a/tests/test_lists.py b/tests/test_lists.py
index ce54a87..2d7b570 100644
--- a/tests/test_lists.py
+++ b/tests/test_lists.py
@@ -41,21 +41,21 @@
def test_ol():
- assert md('a b ') == '\n\n1. a\n2. b\n'
- assert md('a b ') == '\n\n3. a\n4. b\n'
+ assert md('a b ') == '1. a\n2. b\n'
+ assert md('a b ') == '3. a\n4. b\n'
assert md('fooa b bar') == 'foo\n\n3. a\n4. b\n\nbar'
- assert md('a b ') == '\n\n1. a\n2. b\n'
- assert md('a b ') == '\n\n1. a\n2. b\n'
- assert md('a b ') == '\n\n1. a\n2. b\n'
- assert md('first para
second para
third para
fourth para
') == '\n\n1234. first para\n\n second para\n1235. third para\n\n fourth para\n'
+ assert md('a b ') == '1. a\n2. b\n'
+ assert md('a b ') == '1. a\n2. b\n'
+ assert md('a b ') == '1. a\n2. b\n'
+ assert md('first para
second para
third para
fourth para
') == '1234. first para\n\n second para\n1235. third para\n\n fourth para\n'
def test_nested_ols():
- assert md(nested_ols) == '\n\n1. 1\n 1. a\n 1. I\n 2. II\n 3. III\n 2. b\n 3. c\n2. 2\n3. 3\n'
+ assert md(nested_ols) == '1. 1\n 1. a\n 1. I\n 2. II\n 3. III\n 2. b\n 3. c\n2. 2\n3. 3\n'
def test_ul():
- assert md('') == '\n\n* a\n* b\n'
+ assert md('') == '* a\n* b\n'
assert md("""
a
@@ -63,12 +63,12 @@ def test_ul():
b
c
- """) == '\n\n* a\n* b\n* c\n'
- assert md('first para
second para
third para
fourth para
') == '\n\n* first para\n\n second para\n* third para\n\n fourth para\n'
+ """) == '* a\n* b\n* c\n'
+ assert md('first para
second para
third para
fourth para
') == '* first para\n\n second para\n* third para\n\n fourth para\n'
def test_inline_ul():
- assert md('foo
bar
') == '\n\nfoo\n\n* a\n* b\n\nbar\n\n'
+ assert md('foo
bar
') == 'foo\n\n* a\n* b\n\nbar\n\n'
assert md('foobaz') == 'foo\n\n* bar\n\nbaz'
@@ -77,12 +77,12 @@ def test_nested_uls():
Nested ULs should alternate bullet characters.
"""
- assert md(nested_uls) == '\n\n* 1\n + a\n - I\n - II\n - III\n + b\n + c\n* 2\n* 3\n'
+ assert md(nested_uls) == '* 1\n + a\n - I\n - II\n - III\n + b\n + c\n* 2\n* 3\n'
def test_bullets():
- assert md(nested_uls, bullets='-') == '\n\n- 1\n - a\n - I\n - II\n - III\n - b\n - c\n- 2\n- 3\n'
+ assert md(nested_uls, bullets='-') == '- 1\n - a\n - I\n - II\n - III\n - b\n - c\n- 2\n- 3\n'
def test_li_text():
- assert md('foo bar foo bar foo bar space . ') == '\n\n* foo [bar](#)\n* foo bar\n* foo **bar** *space*.\n'
+ assert md('foo bar foo bar foo bar space . ') == '* foo [bar](#)\n* foo bar\n* foo **bar** *space*.\n'
diff --git a/tests/test_tables.py b/tests/test_tables.py
index da4bf53..dee0960 100644
--- a/tests/test_tables.py
+++ b/tests/test_tables.py
@@ -228,7 +228,10 @@
"""
-table_with_caption = """TEXTCaption
+table_with_caption = """TEXT
+
+ Caption
+
Firstname
Lastname
Age
@@ -266,34 +269,34 @@
def test_table():
- assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_paragraphs) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_linebreaks) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
- assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_head_body_multiple_head) == '\n\n| | | |\n| --- | --- | --- |\n| Creator | Editor | Server |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n'
- assert md(table_head_body_missing_head) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_missing_text) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_body) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_html_content) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_paragraphs) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_linebreaks) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
+ assert md(table_with_header_column) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_head_body) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_head_body_multiple_head) == '| | | |\n| --- | --- | --- |\n| Creator | Editor | Server |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n'
+ assert md(table_head_body_missing_head) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_missing_text) == '| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_missing_head) == '| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_body) == '| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n'
- assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
+ assert md(table_with_colspan) == '| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_undefined_colspan) == '| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
def test_table_infer_header():
- assert md(table, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_html_content, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_paragraphs, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_linebreaks, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
- assert md(table_with_header_column, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_head_body, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_head_body_multiple_head, table_infer_header=True) == '\n\n| Creator | Editor | Server |\n| --- | --- | --- |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n'
- assert md(table_head_body_missing_head, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_missing_text, table_infer_header=True) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_missing_head, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_body, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_html_content, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_paragraphs, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_linebreaks, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
+ assert md(table_with_header_column, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_head_body, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_head_body_multiple_head, table_infer_header=True) == '| Creator | Editor | Server |\n| --- | --- | --- |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n'
+ assert md(table_head_body_missing_head, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_missing_text, table_infer_header=True) == '| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_missing_head, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_body, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_caption, table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
- assert md(table_with_colspan, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_undefined_colspan, table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
+ assert md(table_with_colspan, table_infer_header=True) == '| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_undefined_colspan, table_infer_header=True) == '| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
From dd251c6b8931cfe6fab31e0eaf7be9cd61eff1ce Mon Sep 17 00:00:00 2001
From: chrispy
Date: Mon, 27 Jan 2025 09:04:47 -0500
Subject: [PATCH 2/3] disable document stripping for unit tests
Signed-off-by: chrispy
---
README.rst | 7 ++
markdownify/__init__.py | 18 +++-
tests/test_advanced.py | 11 ++-
tests/test_args.py | 10 ++-
tests/test_basic.py | 9 +-
tests/test_conversions.py | 181 ++++++++++++++++++++------------------
tests/test_escaping.py | 9 +-
tests/test_lists.py | 37 ++++----
tests/test_tables.py | 61 +++++++------
9 files changed, 207 insertions(+), 136 deletions(-)
diff --git a/README.rst b/README.rst
index 34ed7e0..5c655d2 100644
--- a/README.rst
+++ b/README.rst
@@ -150,6 +150,13 @@ wrap, wrap_width
Use with ``newline_style=BACKSLASH`` to keep line breaks in paragraphs.
A `wrap_width` value of `None` reflows lines to unlimited line length.
+strip_document
+ Controls whether leading and/or trailing separation newlines are removed from
+ the final converted document. Supported values are ``LSTRIP`` (leading),
+ ``RSTRIP`` (trailing), ``STRIP`` (both), and ``None`` (no removal). Newlines
+ within the document are unaffected.
+ Defaults to ``LSTRIP``.
+
Options may be specified as kwargs to the ``markdownify`` function, or as a
nested ``Options`` class in ``MarkdownConverter`` subclasses.
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 10ff153..6f77d59 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -26,6 +26,11 @@
ASTERISK = '*'
UNDERSCORE = '_'
+# Document strip styles
+LSTRIP = 'lstrip'
+RSTRIP = 'rstrip'
+STRIP = 'strip'
+
def chomp(text):
"""
@@ -99,6 +104,7 @@ class DefaultOptions:
keep_inline_images_in = []
newline_style = SPACES
strip = None
+ strip_document = LSTRIP
strong_em_symbol = ASTERISK
sub_symbol = ''
sup_symbol = ''
@@ -181,8 +187,16 @@ def process_tag(self, node, convert_as_inline):
def convert__document_(self, el, text, convert_as_inline):
"""Final document-level formatting for BeautifulSoup object (node.name == "[document]")"""
- # remove all leading newlines
- text = text.lstrip('\n')
+ if self.options['strip_document'] == LSTRIP:
+ text = text.lstrip('\n') # remove leading separation newlines
+ elif self.options['strip_document'] == RSTRIP:
+ text = text.rstrip('\n') # remove trailing separation newlines
+ elif self.options['strip_document'] == STRIP:
+ text = text.strip('\n') # remove leading and trailing separation newlines
+ elif self.options['strip_document'] is None:
+ pass # leave leading and trailing separation newlines as-is
+ else:
+ raise ValueError('Invalid value for strip_document: %s' % self.options['strip_document'])
return text
diff --git a/tests/test_advanced.py b/tests/test_advanced.py
index 14bf3cd..286b354 100644
--- a/tests/test_advanced.py
+++ b/tests/test_advanced.py
@@ -1,4 +1,11 @@
-from markdownify import markdownify as md
+from markdownify import MarkdownConverter
+
+
+def md(html, **options):
+ # disable document-level stripping so separation newlines are included in testing
+ options = {**options, "strip_document": None}
+
+ return MarkdownConverter(**options).convert(html)
def test_chomp():
@@ -14,7 +21,7 @@ def test_chomp():
def test_nested():
text = md('This is an example link .
')
- assert text == 'This is an [example link](http://example.com/).\n\n'
+ assert text == '\n\nThis is an [example link](http://example.com/).\n\n'
def test_ignore_comments():
diff --git a/tests/test_args.py b/tests/test_args.py
index ebce4a8..30324ad 100644
--- a/tests/test_args.py
+++ b/tests/test_args.py
@@ -2,7 +2,7 @@
Test whitelisting/blacklisting of specific tags.
"""
-from markdownify import markdownify as md
+from markdownify import markdownify as md, LSTRIP, RSTRIP, STRIP
def test_strip():
@@ -23,3 +23,11 @@ def test_convert():
def test_do_not_convert():
text = md('Some Text ', convert=[])
assert text == 'Some Text'
+
+
+def test_strip_document():
+ assert md("Hello
") == "Hello\n\n" # defaults to LSTRIP
+ assert md("Hello
", strip_document=LSTRIP) == "Hello\n\n"
+ assert md("Hello
", strip_document=RSTRIP) == "\n\nHello"
+ assert md("Hello
", strip_document=STRIP) == "Hello"
+ assert md("Hello
", strip_document=None) == "\n\nHello\n\n"
diff --git a/tests/test_basic.py b/tests/test_basic.py
index 66f8b6c..b91d8ee 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -1,4 +1,11 @@
-from markdownify import markdownify as md
+from markdownify import MarkdownConverter
+
+
+def md(html, **options):
+ # disable document-level stripping so separation newlines are included in testing
+ options = {**options, "strip_document": None}
+
+ return MarkdownConverter(**options).convert(html)
def test_single_tag():
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 4df024e..ed61643 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -1,4 +1,11 @@
-from markdownify import markdownify as md, ATX, ATX_CLOSED, BACKSLASH, SPACES, UNDERSCORE
+from markdownify import MarkdownConverter, ATX, ATX_CLOSED, BACKSLASH, SPACES, UNDERSCORE
+
+
+def md(html, **options):
+ # disable document-level stripping so separation newlines are included in testing
+ options = {**options, "strip_document": None}
+
+ return MarkdownConverter(**options).convert(html)
def inline_tests(tag, markup):
@@ -41,7 +48,7 @@ def test_a_no_autolinks():
def test_a_in_code():
assert md('Google
') == '`Google`'
- assert md('Google ') == '```\nGoogle\n```\n\n'
+ assert md('Google ') == '\n\n```\nGoogle\n```\n\n'
def test_b():
@@ -56,22 +63,22 @@ def test_b_spaces():
def test_blockquote():
- assert md('Hello ') == '> Hello\n\n'
- assert md('\nHello\n ') == '> Hello\n\n'
+ assert md('Hello ') == '\n> Hello\n\n'
+ assert md('\nHello\n ') == '\n> Hello\n\n'
def test_blockquote_with_nested_paragraph():
- assert md('Hello
') == '> Hello\n\n'
- assert md('Hello
Hello again
') == '> Hello\n>\n> Hello again\n\n'
+ assert md('Hello
') == '\n> Hello\n\n'
+ assert md('Hello
Hello again
') == '\n> Hello\n>\n> Hello again\n\n'
def test_blockquote_with_paragraph():
- assert md('Hello handsome
') == '> Hello\n\nhandsome\n\n'
+ assert md('Hello handsome
') == '\n> Hello\n\nhandsome\n\n'
def test_blockquote_nested():
text = md('And she was like Hello ')
- assert text == '> And she was like\n> > Hello\n\n'
+ assert text == '\n> And she was like\n> > Hello\n\n'
def test_br():
@@ -100,13 +107,13 @@ def test_code():
def test_dl():
- assert md('term definition ') == 'term\n: definition\n'
- assert md('te
rm
definition ') == 'te rm\n: definition\n'
- assert md('term definition-p1
definition-p2
') == 'term\n: definition-p1\n\n definition-p2\n'
- assert md('term definition 1
definition 2
') == 'term\n: definition 1\n: definition 2\n'
- assert md('term 1 definition 1 term 2 definition 2 ') == 'term 1\n: definition 1\nterm 2\n: definition 2\n'
- assert md('term line 1
line 2
') == 'term\n: > line 1\n >\n > line 2\n'
- assert md('term 1
3
') == 'term\n: 1. 1\n\n * 2a\n * 2b\n 2. 3\n'
+ assert md('term definition ') == '\nterm\n: definition\n'
+ assert md('te
rm
definition ') == '\nte rm\n: definition\n'
+ assert md('term definition-p1
definition-p2
') == '\nterm\n: definition-p1\n\n definition-p2\n'
+ assert md('term definition 1
definition 2
') == '\nterm\n: definition 1\n: definition 2\n'
+ assert md('term 1 definition 1 term 2 definition 2 ') == '\nterm 1\n: definition 1\nterm 2\n: definition 2\n'
+ assert md('term line 1
line 2
') == '\nterm\n: > line 1\n >\n > line 2\n'
+ assert md('term 1
3
') == '\nterm\n: 1. 1\n\n * 2a\n * 2b\n 2. 3\n'
def test_del():
@@ -127,41 +134,41 @@ def test_figcaption():
def test_header_with_space():
- assert md('\n\nHello ') == '### Hello\n\n'
- assert md('Hello\n\n\nWorld ') == '### Hello World\n\n'
- assert md('\n\nHello ') == '#### Hello\n\n'
- assert md('\n\nHello ') == '##### Hello\n\n'
- assert md('\n\nHello\n\n ') == '##### Hello\n\n'
- assert md('\n\nHello \n\n ') == '##### Hello\n\n'
+ assert md('\n\nHello ') == '\n\n### Hello\n\n'
+ assert md('Hello\n\n\nWorld ') == '\n\n### Hello World\n\n'
+ assert md('\n\nHello ') == '\n\n#### Hello\n\n'
+ assert md('\n\nHello ') == '\n\n##### Hello\n\n'
+ assert md('\n\nHello\n\n ') == '\n\n##### Hello\n\n'
+ assert md('\n\nHello \n\n ') == '\n\n##### Hello\n\n'
def test_h1():
- assert md('Hello ') == 'Hello\n=====\n\n'
+ assert md('Hello ') == '\n\nHello\n=====\n\n'
def test_h2():
- assert md('Hello ') == 'Hello\n-----\n\n'
+ assert md('Hello ') == '\n\nHello\n-----\n\n'
def test_hn():
- assert md('Hello ') == '### Hello\n\n'
- assert md('Hello ') == '#### Hello\n\n'
- assert md('Hello ') == '##### Hello\n\n'
- assert md('Hello ') == '###### Hello\n\n'
+ assert md('Hello ') == '\n\n### Hello\n\n'
+ assert md('Hello ') == '\n\n#### Hello\n\n'
+ assert md('Hello ') == '\n\n##### Hello\n\n'
+ assert md('Hello ') == '\n\n###### Hello\n\n'
assert md('Hello ') == md('Hello ')
assert md('Hello ') == md('Hello')
def test_hn_chained():
- assert md('First \nSecond \nThird ', heading_style=ATX) == '# First\n\n## Second\n\n### Third\n\n'
+ assert md('First \nSecond \nThird ', heading_style=ATX) == '\n\n# First\n\n## Second\n\n### Third\n\n'
assert md('XFirst ', heading_style=ATX) == 'X\n\n# First\n\n'
assert md('XFirst ', heading_style=ATX_CLOSED) == 'X\n\n# First #\n\n'
assert md('XFirst ') == 'X\n\nFirst\n=====\n\n'
def test_hn_nested_tag_heading_style():
- assert md('A P
C ', heading_style=ATX_CLOSED) == '# A P C #\n\n'
- assert md('A P
C ', heading_style=ATX) == '# A P C\n\n'
+ assert md('A P
C ', heading_style=ATX_CLOSED) == '\n\n# A P C #\n\n'
+ assert md('A P
C ', heading_style=ATX) == '\n\n# A P C\n\n'
def test_hn_nested_simple_tag():
@@ -177,9 +184,9 @@ def test_hn_nested_simple_tag():
]
for tag, markdown in tag_to_markdown:
- assert md('A <' + tag + '>' + tag + '' + tag + '> B ') == '### A ' + markdown + ' B\n\n'
+ assert md('A <' + tag + '>' + tag + '' + tag + '> B ') == '\n\n### A ' + markdown + ' B\n\n'
- assert md('A B ', heading_style=ATX) == '### A B\n\n'
+ assert md('A B ', heading_style=ATX) == '\n\n### A B\n\n'
# Nested lists not supported
# assert md('A ', heading_style=ATX) == '\n### A li1 li2 B\n\n'
@@ -192,23 +199,23 @@ def test_hn_nested_img():
("alt='Alt Text' title='Optional title'", "Alt Text", " \"Optional title\""),
]
for image_attributes, markdown, title in image_attributes_to_markdown:
- assert md('A B ') == '### A' + (' ' + markdown + ' ' if markdown else ' ') + 'B\n\n'
- assert md('A B ', keep_inline_images_in=['h3']) == '### A  B\n\n'
+ assert md('A B ') == '\n\n### A' + (' ' + markdown + ' ' if markdown else ' ') + 'B\n\n'
+ assert md('A B ', keep_inline_images_in=['h3']) == '\n\n### A  B\n\n'
def test_hn_atx_headings():
- assert md('Hello ', heading_style=ATX) == '# Hello\n\n'
- assert md('Hello ', heading_style=ATX) == '## Hello\n\n'
+ assert md('Hello ', heading_style=ATX) == '\n\n# Hello\n\n'
+ assert md('Hello ', heading_style=ATX) == '\n\n## Hello\n\n'
def test_hn_atx_closed_headings():
- assert md('Hello ', heading_style=ATX_CLOSED) == '# Hello #\n\n'
- assert md('Hello ', heading_style=ATX_CLOSED) == '## Hello ##\n\n'
+ assert md('Hello ', heading_style=ATX_CLOSED) == '\n\n# Hello #\n\n'
+ assert md('Hello ', heading_style=ATX_CLOSED) == '\n\n## Hello ##\n\n'
def test_hn_newlines():
- assert md("H1-1 TEXTH2-2 TEXTH1-2 TEXT", heading_style=ATX) == '# H1-1\n\nTEXT\n\n## H2-2\n\nTEXT\n\n# H1-2\n\nTEXT'
- assert md('H1-1 \nTEXT
\nH2-2 \nTEXT
\nH1-2 \nTEXT
', heading_style=ATX) == '# H1-1\n\nTEXT\n\n## H2-2\n\nTEXT\n\n# H1-2\n\nTEXT\n\n'
+ assert md("H1-1 TEXTH2-2 TEXTH1-2 TEXT", heading_style=ATX) == '\n\n# H1-1\n\nTEXT\n\n## H2-2\n\nTEXT\n\n# H1-2\n\nTEXT'
+ assert md('H1-1 \nTEXT
\nH2-2 \nTEXT
\nH1-2 \nTEXT
', heading_style=ATX) == '\n\n# H1-1\n\nTEXT\n\n## H2-2\n\nTEXT\n\n# H1-2\n\nTEXT\n\n'
def test_head():
@@ -218,7 +225,7 @@ def test_head():
def test_hr():
assert md('Hello World') == 'Hello\n\n---\n\nWorld'
assert md('Hello World') == 'Hello\n\n---\n\nWorld'
- assert md('Hello
\n \nWorld
') == 'Hello\n\n---\n\nWorld\n\n'
+ assert md('Hello
\n \nWorld
') == '\n\nHello\n\n---\n\nWorld\n\n'
def test_i():
@@ -235,49 +242,49 @@ def test_kbd():
def test_p():
- assert md('hello
') == 'hello\n\n'
- assert md("
hello
") == "hello\n\n"
- assert md('123456789 123456789
') == '123456789 123456789\n\n'
- assert md('123456789\n\n\n123456789
') == '123456789\n123456789\n\n'
- assert md('123456789\n\n\n123456789
', wrap=True, wrap_width=80) == '123456789 123456789\n\n'
- assert md('123456789\n\n\n123456789
', wrap=True, wrap_width=None) == '123456789 123456789\n\n'
- assert md('123456789 123456789
', wrap=True, wrap_width=10) == '123456789\n123456789\n\n'
- assert md('Some long link
', wrap=True, wrap_width=10) == '[Some long\nlink](https://example.com)\n\n'
- assert md('12345 67890
', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '12345\\\n67890\n\n'
- assert md('12345 67890
', wrap=True, wrap_width=50, newline_style=BACKSLASH) == '12345\\\n67890\n\n'
- assert md('12345 67890
', wrap=True, wrap_width=10, newline_style=SPACES) == '12345 \n67890\n\n'
- assert md('12345 67890
', wrap=True, wrap_width=50, newline_style=SPACES) == '12345 \n67890\n\n'
- assert md('12345678901 12345
', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '12345678901\\\n12345\n\n'
- assert md('12345678901 12345
', wrap=True, wrap_width=50, newline_style=BACKSLASH) == '12345678901\\\n12345\n\n'
- assert md('12345678901 12345
', wrap=True, wrap_width=10, newline_style=SPACES) == '12345678901 \n12345\n\n'
- assert md('12345678901 12345
', wrap=True, wrap_width=50, newline_style=SPACES) == '12345678901 \n12345\n\n'
- assert md('1234 5678 9012 67890
', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '1234 5678\n9012\\\n67890\n\n'
- assert md('1234 5678 9012 67890
', wrap=True, wrap_width=10, newline_style=SPACES) == '1234 5678\n9012 \n67890\n\n'
+ assert md('hello
') == '\n\nhello\n\n'
+ assert md("
hello
") == "\n\nhello\n\n"
+ assert md('123456789 123456789
') == '\n\n123456789 123456789\n\n'
+ assert md('123456789\n\n\n123456789
') == '\n\n123456789\n123456789\n\n'
+ assert md('123456789\n\n\n123456789
', wrap=True, wrap_width=80) == '\n\n123456789 123456789\n\n'
+ assert md('123456789\n\n\n123456789
', wrap=True, wrap_width=None) == '\n\n123456789 123456789\n\n'
+ assert md('123456789 123456789
', wrap=True, wrap_width=10) == '\n\n123456789\n123456789\n\n'
+ assert md('Some long link
', wrap=True, wrap_width=10) == '\n\n[Some long\nlink](https://example.com)\n\n'
+ assert md('12345 67890
', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '\n\n12345\\\n67890\n\n'
+ assert md('12345 67890
', wrap=True, wrap_width=50, newline_style=BACKSLASH) == '\n\n12345\\\n67890\n\n'
+ assert md('12345 67890
', wrap=True, wrap_width=10, newline_style=SPACES) == '\n\n12345 \n67890\n\n'
+ assert md('12345 67890
', wrap=True, wrap_width=50, newline_style=SPACES) == '\n\n12345 \n67890\n\n'
+ assert md('12345678901 12345
', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '\n\n12345678901\\\n12345\n\n'
+ assert md('12345678901 12345
', wrap=True, wrap_width=50, newline_style=BACKSLASH) == '\n\n12345678901\\\n12345\n\n'
+ assert md('12345678901 12345
', wrap=True, wrap_width=10, newline_style=SPACES) == '\n\n12345678901 \n12345\n\n'
+ assert md('12345678901 12345
', wrap=True, wrap_width=50, newline_style=SPACES) == '\n\n12345678901 \n12345\n\n'
+ assert md('1234 5678 9012 67890
', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '\n\n1234 5678\n9012\\\n67890\n\n'
+ assert md('1234 5678 9012 67890
', wrap=True, wrap_width=10, newline_style=SPACES) == '\n\n1234 5678\n9012 \n67890\n\n'
assert md('FirstSecond
Third
Fourth') == 'First\n\nSecond\n\nThird\n\nFourth'
def test_pre():
- assert md('test\n foo\nbar ') == '```\ntest\n foo\nbar\n```\n\n'
- assert md('test\n foo\nbar
') == '```\ntest\n foo\nbar\n```\n\n'
- assert md('*this_should_not_escape* ') == '```\n*this_should_not_escape*\n```\n\n'
- assert md('*this_should_not_escape* ') == '```\n*this_should_not_escape*\n```\n\n'
- assert md('\t\tthis should\t\tnot normalize ') == '```\n\t\tthis should\t\tnot normalize\n```\n\n'
- assert md('\t\tthis should\t\tnot normalize ') == '```\n\t\tthis should\t\tnot normalize\n```\n\n'
- assert md('foo\nbar\n baz ') == '```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n baz ') == '```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar \nbaz ') == '```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\n baz ') == '```\nfoo\nbaz\n```\n\n'
- assert md('foo\nbar\nbaz ') == '```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n baz ') == '```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n
baz ') == '```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n baz ') == '```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n baz ') == '```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n baz ', sup_symbol='^') == '```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n baz ', sub_symbol='^') == '```\nfoo\nbar\nbaz\n```\n\n'
- assert md('foo\nbar\n baz ', sub_symbol='^') == '```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('test\n foo\nbar ') == '\n\n```\ntest\n foo\nbar\n```\n\n'
+ assert md('test\n foo\nbar
') == '\n\n```\ntest\n foo\nbar\n```\n\n'
+ assert md('*this_should_not_escape* ') == '\n\n```\n*this_should_not_escape*\n```\n\n'
+ assert md('*this_should_not_escape* ') == '\n\n```\n*this_should_not_escape*\n```\n\n'
+ assert md('\t\tthis should\t\tnot normalize ') == '\n\n```\n\t\tthis should\t\tnot normalize\n```\n\n'
+ assert md('\t\tthis should\t\tnot normalize ') == '\n\n```\n\t\tthis should\t\tnot normalize\n```\n\n'
+ assert md('foo\nbar\n baz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n baz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar \nbaz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\n baz ') == '\n\n```\nfoo\nbaz\n```\n\n'
+ assert md('foo\nbar\nbaz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n baz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n
baz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n baz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n baz ') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n baz ', sup_symbol='^') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n baz ', sub_symbol='^') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
+ assert md('foo\nbar\n baz ', sub_symbol='^') == '\n\n```\nfoo\nbar\nbaz\n```\n\n'
assert md('foobar baz', sub_symbol='^') == 'foo\n\n```\nbar\n```\n\nbaz'
- assert md("foo
\nbar \nbaz", sub_symbol="^") == 'foo\n\n```\nbar\n```\n\nbaz'
+ assert md("foo
\nbar \nbaz", sub_symbol="^") == "\n\nfoo\n\n```\nbar\n```\n\nbaz"
def test_script():
@@ -320,24 +327,24 @@ def test_sup():
def test_lang():
- assert md('test\n foo\nbar ', code_language='python') == '```python\ntest\n foo\nbar\n```\n\n'
- assert md('test\n foo\nbar
', code_language='javascript') == '```javascript\ntest\n foo\nbar\n```\n\n'
+ assert md('test\n foo\nbar ', code_language='python') == '\n\n```python\ntest\n foo\nbar\n```\n\n'
+ assert md('test\n foo\nbar
', code_language='javascript') == '\n\n```javascript\ntest\n foo\nbar\n```\n\n'
def test_lang_callback():
def callback(el):
return el['class'][0] if el.has_attr('class') else None
- assert md('test\n foo\nbar ', code_language_callback=callback) == '```python\ntest\n foo\nbar\n```\n\n'
- assert md('test\n foo\nbar
', code_language_callback=callback) == '```javascript\ntest\n foo\nbar\n```\n\n'
- assert md('test\n foo\nbar
', code_language_callback=callback) == '```javascript\ntest\n foo\nbar\n```\n\n'
+ assert md('test\n foo\nbar ', code_language_callback=callback) == '\n\n```python\ntest\n foo\nbar\n```\n\n'
+ assert md('test\n foo\nbar
', code_language_callback=callback) == '\n\n```javascript\ntest\n foo\nbar\n```\n\n'
+ assert md('test\n foo\nbar
', code_language_callback=callback) == '\n\n```javascript\ntest\n foo\nbar\n```\n\n'
def test_spaces():
- assert md(' a b
c d
') == 'a b\n\nc d\n\n'
- assert md(' a
') == '*a*\n\n'
+ assert md(' a b
c d
') == '\n\na b\n\nc d\n\n'
+ assert md(' a
') == '\n\n*a*\n\n'
assert md('test again
') == 'test\n\nagain\n\n'
assert md('test text after') == 'test\n> text\n\nafter'
- assert md(' x y ') == '1. x\n2. y\n'
- assert md(' x y ') == '* x\n* y\n'
+ assert md(' x y ') == '\n\n1. x\n2. y\n'
+ assert md(' x y ') == '\n\n* x\n* y\n'
assert md('test foo bar') == 'test\n\n```\n foo \n```\n\nbar'
diff --git a/tests/test_escaping.py b/tests/test_escaping.py
index 878760a..3634b37 100644
--- a/tests/test_escaping.py
+++ b/tests/test_escaping.py
@@ -1,6 +1,13 @@
import warnings
from bs4 import MarkupResemblesLocatorWarning
-from markdownify import markdownify as md
+from markdownify import MarkdownConverter
+
+
+def md(html, **options):
+ # disable document-level stripping so separation newlines are included in testing
+ options = {**options, "strip_document": None}
+
+ return MarkdownConverter(**options).convert(html)
def test_asterisks():
diff --git a/tests/test_lists.py b/tests/test_lists.py
index 2d7b570..21ab541 100644
--- a/tests/test_lists.py
+++ b/tests/test_lists.py
@@ -1,4 +1,11 @@
-from markdownify import markdownify as md
+from markdownify import MarkdownConverter
+
+
+def md(html, **options):
+ # disable document-level stripping so separation newlines are included in testing
+ options = {**options, "strip_document": None}
+
+ return MarkdownConverter(**options).convert(html)
nested_uls = """
@@ -41,21 +48,21 @@
def test_ol():
- assert md('a b ') == '1. a\n2. b\n'
- assert md('a b ') == '3. a\n4. b\n'
+ assert md('a b ') == '\n\n1. a\n2. b\n'
+ assert md('a b ') == '\n\n3. a\n4. b\n'
assert md('fooa b bar') == 'foo\n\n3. a\n4. b\n\nbar'
- assert md('a b ') == '1. a\n2. b\n'
- assert md('a b ') == '1. a\n2. b\n'
- assert md('a b ') == '1. a\n2. b\n'
- assert md('first para
second para
third para
fourth para
') == '1234. first para\n\n second para\n1235. third para\n\n fourth para\n'
+ assert md('a b ') == '\n\n1. a\n2. b\n'
+ assert md('a b ') == '\n\n1. a\n2. b\n'
+ assert md('a b ') == '\n\n1. a\n2. b\n'
+ assert md('first para
second para
third para
fourth para
') == '\n\n1234. first para\n\n second para\n1235. third para\n\n fourth para\n'
def test_nested_ols():
- assert md(nested_ols) == '1. 1\n 1. a\n 1. I\n 2. II\n 3. III\n 2. b\n 3. c\n2. 2\n3. 3\n'
+ assert md(nested_ols) == '\n\n1. 1\n 1. a\n 1. I\n 2. II\n 3. III\n 2. b\n 3. c\n2. 2\n3. 3\n'
def test_ul():
- assert md('') == '* a\n* b\n'
+ assert md('') == '\n\n* a\n* b\n'
assert md("""
a
@@ -63,12 +70,12 @@ def test_ul():
b
c
- """) == '* a\n* b\n* c\n'
- assert md('first para
second para
third para
fourth para
') == '* first para\n\n second para\n* third para\n\n fourth para\n'
+ """) == '\n\n* a\n* b\n* c\n'
+ assert md('first para
second para
third para
fourth para
') == '\n\n* first para\n\n second para\n* third para\n\n fourth para\n'
def test_inline_ul():
- assert md('foo
bar
') == 'foo\n\n* a\n* b\n\nbar\n\n'
+ assert md('foo
bar
') == '\n\nfoo\n\n* a\n* b\n\nbar\n\n'
assert md('foobaz') == 'foo\n\n* bar\n\nbaz'
@@ -77,12 +84,12 @@ def test_nested_uls():
Nested ULs should alternate bullet characters.
"""
- assert md(nested_uls) == '* 1\n + a\n - I\n - II\n - III\n + b\n + c\n* 2\n* 3\n'
+ assert md(nested_uls) == '\n\n* 1\n + a\n - I\n - II\n - III\n + b\n + c\n* 2\n* 3\n'
def test_bullets():
- assert md(nested_uls, bullets='-') == '- 1\n - a\n - I\n - II\n - III\n - b\n - c\n- 2\n- 3\n'
+ assert md(nested_uls, bullets='-') == '\n\n- 1\n - a\n - I\n - II\n - III\n - b\n - c\n- 2\n- 3\n'
def test_li_text():
- assert md('foo bar foo bar foo bar space . ') == '* foo [bar](#)\n* foo bar\n* foo **bar** *space*.\n'
+ assert md('foo bar foo bar foo bar space . ') == '\n\n* foo [bar](#)\n* foo bar\n* foo **bar** *space*.\n'
diff --git a/tests/test_tables.py b/tests/test_tables.py
index dee0960..f0aee56 100644
--- a/tests/test_tables.py
+++ b/tests/test_tables.py
@@ -1,4 +1,11 @@
-from markdownify import markdownify as md
+from markdownify import MarkdownConverter
+
+
+def md(html, **options):
+ # disable document-level stripping so separation newlines are included in testing
+ options = {**options, "strip_document": None}
+
+ return MarkdownConverter(**options).convert(html)
table = """
@@ -269,34 +276,34 @@
def test_table():
- assert md(table) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_html_content) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_paragraphs) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_linebreaks) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
- assert md(table_with_header_column) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_head_body) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_head_body_multiple_head) == '| | | |\n| --- | --- | --- |\n| Creator | Editor | Server |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n'
- assert md(table_head_body_missing_head) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_missing_text) == '| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_missing_head) == '| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_body) == '| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_paragraphs) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_linebreaks) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
+ assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_head_body_multiple_head) == '\n\n| | | |\n| --- | --- | --- |\n| Creator | Editor | Server |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n'
+ assert md(table_head_body_missing_head) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_missing_text) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_body) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n'
- assert md(table_with_colspan) == '| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_undefined_colspan) == '| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
+ assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
def test_table_infer_header():
- assert md(table, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_html_content, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_paragraphs, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_linebreaks, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
- assert md(table_with_header_column, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_head_body, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_head_body_multiple_head, table_infer_header=True) == '| Creator | Editor | Server |\n| --- | --- | --- |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n'
- assert md(table_head_body_missing_head, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_missing_text, table_infer_header=True) == '| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_missing_head, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_body, table_infer_header=True) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_html_content, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_paragraphs, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_linebreaks, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
+ assert md(table_with_header_column, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_head_body, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_head_body_multiple_head, table_infer_header=True) == '\n\n| Creator | Editor | Server |\n| --- | --- | --- |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n'
+ assert md(table_head_body_missing_head, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_missing_text, table_infer_header=True) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_missing_head, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_body, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_caption, table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
- assert md(table_with_colspan, table_infer_header=True) == '| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_undefined_colspan, table_infer_header=True) == '| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
+ assert md(table_with_colspan, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_undefined_colspan, table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
From 1586f9834fdb6176c79c436b205c9816757502db Mon Sep 17 00:00:00 2001
From: chrispy
Date: Mon, 27 Jan 2025 11:35:30 -0500
Subject: [PATCH 3/3] change default to strip_document=STRIP, move unit-testing
md() to common test utils file
Signed-off-by: chrispy
---
README.rst | 4 ++--
markdownify/__init__.py | 2 +-
tests/test_advanced.py | 9 +--------
tests/test_args.py | 13 +++++++------
tests/test_basic.py | 9 +--------
tests/test_conversions.py | 10 ++--------
tests/test_custom_converter.py | 4 ++--
tests/test_escaping.py | 9 +--------
tests/test_lists.py | 9 +--------
tests/test_tables.py | 9 +--------
tests/utils.py | 9 +++++++++
11 files changed, 28 insertions(+), 59 deletions(-)
create mode 100644 tests/utils.py
diff --git a/README.rst b/README.rst
index 5c655d2..b37a503 100644
--- a/README.rst
+++ b/README.rst
@@ -153,9 +153,9 @@ wrap, wrap_width
strip_document
Controls whether leading and/or trailing separation newlines are removed from
the final converted document. Supported values are ``LSTRIP`` (leading),
- ``RSTRIP`` (trailing), ``STRIP`` (both), and ``None`` (no removal). Newlines
+ ``RSTRIP`` (trailing), ``STRIP`` (both), and ``None`` (neither). Newlines
within the document are unaffected.
- Defaults to ``LSTRIP``.
+ Defaults to ``STRIP``.
Options may be specified as kwargs to the ``markdownify`` function, or as a
nested ``Options`` class in ``MarkdownConverter`` subclasses.
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 6f77d59..7d14fe7 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -104,7 +104,7 @@ class DefaultOptions:
keep_inline_images_in = []
newline_style = SPACES
strip = None
- strip_document = LSTRIP
+ strip_document = STRIP
strong_em_symbol = ASTERISK
sub_symbol = ''
sup_symbol = ''
diff --git a/tests/test_advanced.py b/tests/test_advanced.py
index 286b354..6123d8c 100644
--- a/tests/test_advanced.py
+++ b/tests/test_advanced.py
@@ -1,11 +1,4 @@
-from markdownify import MarkdownConverter
-
-
-def md(html, **options):
- # disable document-level stripping so separation newlines are included in testing
- options = {**options, "strip_document": None}
-
- return MarkdownConverter(**options).convert(html)
+from .utils import md
def test_chomp():
diff --git a/tests/test_args.py b/tests/test_args.py
index 30324ad..301c19f 100644
--- a/tests/test_args.py
+++ b/tests/test_args.py
@@ -2,7 +2,8 @@
Test whitelisting/blacklisting of specific tags.
"""
-from markdownify import markdownify as md, LSTRIP, RSTRIP, STRIP
+from markdownify import markdownify, LSTRIP, RSTRIP, STRIP
+from .utils import md
def test_strip():
@@ -26,8 +27,8 @@ def test_do_not_convert():
def test_strip_document():
- assert md("Hello
") == "Hello\n\n" # defaults to LSTRIP
- assert md("Hello
", strip_document=LSTRIP) == "Hello\n\n"
- assert md("Hello
", strip_document=RSTRIP) == "\n\nHello"
- assert md("Hello
", strip_document=STRIP) == "Hello"
- assert md("Hello
", strip_document=None) == "\n\nHello\n\n"
+ assert markdownify("Hello
") == "Hello" # test default of STRIP
+ assert markdownify("Hello
", strip_document=LSTRIP) == "Hello\n\n"
+ assert markdownify("Hello
", strip_document=RSTRIP) == "\n\nHello"
+ assert markdownify("Hello
", strip_document=STRIP) == "Hello"
+ assert markdownify("Hello
", strip_document=None) == "\n\nHello\n\n"
diff --git a/tests/test_basic.py b/tests/test_basic.py
index b91d8ee..584adb9 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -1,11 +1,4 @@
-from markdownify import MarkdownConverter
-
-
-def md(html, **options):
- # disable document-level stripping so separation newlines are included in testing
- options = {**options, "strip_document": None}
-
- return MarkdownConverter(**options).convert(html)
+from .utils import md
def test_single_tag():
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index ed61643..1367006 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -1,11 +1,5 @@
-from markdownify import MarkdownConverter, ATX, ATX_CLOSED, BACKSLASH, SPACES, UNDERSCORE
-
-
-def md(html, **options):
- # disable document-level stripping so separation newlines are included in testing
- options = {**options, "strip_document": None}
-
- return MarkdownConverter(**options).convert(html)
+from markdownify import ATX, ATX_CLOSED, BACKSLASH, SPACES, UNDERSCORE
+from .utils import md
def inline_tests(tag, markup):
diff --git a/tests/test_custom_converter.py b/tests/test_custom_converter.py
index adc83f7..0d3f6af 100644
--- a/tests/test_custom_converter.py
+++ b/tests/test_custom_converter.py
@@ -20,8 +20,8 @@ def test_custom_conversion_functions():
def md(html, **options):
return UnitTestConverter(**options).convert(html)
- assert md(' ') == '\n\n'
- assert md(' ') == '\n\n'
+ assert md(' text') == '\n\ntext'
+ assert md(' text') == '\n\ntext'
assert md("text ") == "FUNCTION USED: text"
diff --git a/tests/test_escaping.py b/tests/test_escaping.py
index 3634b37..d213675 100644
--- a/tests/test_escaping.py
+++ b/tests/test_escaping.py
@@ -1,13 +1,6 @@
import warnings
from bs4 import MarkupResemblesLocatorWarning
-from markdownify import MarkdownConverter
-
-
-def md(html, **options):
- # disable document-level stripping so separation newlines are included in testing
- options = {**options, "strip_document": None}
-
- return MarkdownConverter(**options).convert(html)
+from .utils import md
def test_asterisks():
diff --git a/tests/test_lists.py b/tests/test_lists.py
index 21ab541..6b320ca 100644
--- a/tests/test_lists.py
+++ b/tests/test_lists.py
@@ -1,11 +1,4 @@
-from markdownify import MarkdownConverter
-
-
-def md(html, **options):
- # disable document-level stripping so separation newlines are included in testing
- options = {**options, "strip_document": None}
-
- return MarkdownConverter(**options).convert(html)
+from .utils import md
nested_uls = """
diff --git a/tests/test_tables.py b/tests/test_tables.py
index f0aee56..e41b389 100644
--- a/tests/test_tables.py
+++ b/tests/test_tables.py
@@ -1,11 +1,4 @@
-from markdownify import MarkdownConverter
-
-
-def md(html, **options):
- # disable document-level stripping so separation newlines are included in testing
- options = {**options, "strip_document": None}
-
- return MarkdownConverter(**options).convert(html)
+from .utils import md
table = """
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 0000000..0dac580
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,9 @@
+from markdownify import MarkdownConverter
+
+
+# for unit testing, disable document-level stripping by default so that
+# separation newlines are included in testing
+def md(html, **options):
+ options = {"strip_document": None, **options}
+
+ return MarkdownConverter(**options).convert(html)