diff --git a/README.rst b/README.rst index 946c83d..9a0798f 100644 --- a/README.rst +++ b/README.rst @@ -157,6 +157,12 @@ strip_document within the document are unaffected. Defaults to ``STRIP``. +strip_pre + Controls whether leading/trailing blank lines are removed from ``
`` + tags. Supported values are ``STRIP`` (all leading/trailing blank lines), + ``STRIP_ONE`` (one leading/trailing blank line), and ``None`` (neither). + Defaults to ``STRIP``. + bs4_options Specify additional configuration options for the ``BeautifulSoup`` object used to interpret the HTML markup. String and list values (such as ``lxml`` diff --git a/markdownify/__init__.py b/markdownify/__init__.py index b219ca2..72c5214 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -11,6 +11,10 @@ re_all_whitespace = re.compile(r'[\t \r\n]+') re_newline_whitespace = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*') re_html_heading = re.compile(r'h(\d+)') +re_pre_lstrip1 = re.compile(r'^ *\n') +re_pre_rstrip1 = re.compile(r'\n *$') +re_pre_lstrip = re.compile(r'^[ \n]*\n') +re_pre_rstrip = re.compile(r'[ \n]*$') # Pattern for creating convert_function names from tag names re_make_convert_fn_name = re.compile(r'[\[\]:-]') @@ -51,10 +55,25 @@ ASTERISK = '*' UNDERSCORE = '_' -# Document strip styles +# Document/pre strip styles LSTRIP = 'lstrip' RSTRIP = 'rstrip' STRIP = 'strip' +STRIP_ONE = 'strip_one' + + +def strip1_pre(text): + """Strip one leading and trailing newline from a string.""" + text = re_pre_lstrip1.sub('', text) + text = re_pre_rstrip1.sub('', text) + return text + + +def strip_pre(text): + """Strip all leading and trailing newlines from astring.""" + text = re_pre_lstrip.sub('', text) + text = re_pre_rstrip.sub('', text) + return text def chomp(text): @@ -168,6 +187,7 @@ class DefaultOptions: newline_style = SPACES strip = None strip_document = STRIP + strip_pre = STRIP strong_em_symbol = ASTERISK sub_symbol = '' sup_symbol = '' @@ -656,6 +676,15 @@ def convert_pre(self, el, text, parent_tags): if self.options['code_language_callback']: code_language = self.options['code_language_callback'](el) or code_language + if self.options['strip_pre'] == STRIP: + text = strip_pre(text) # remove all leading/trailing newlines + elif self.options['strip_pre'] == STRIP_ONE: + text = strip1_pre(text) # remove one leading/trailing newline + elif self.options['strip_pre'] is None: + pass # leave leading and trailing newlines as-is + else: + raise ValueError('Invalid value for strip_pre: %s' % self.options['strip_pre']) + return '\n\n```%s\n%s\n```\n\n' % (code_language, text) def convert_q(self, el, text, parent_tags): diff --git a/tests/test_args.py b/tests/test_args.py index 1ba6482..838ef9d 100644 --- a/tests/test_args.py +++ b/tests/test_args.py @@ -2,7 +2,7 @@ Test whitelisting/blacklisting of specific tags. """ -from markdownify import markdownify, LSTRIP, RSTRIP, STRIP +from markdownify import markdownify, LSTRIP, RSTRIP, STRIP, STRIP_ONE from .utils import md @@ -34,6 +34,13 @@ def test_strip_document(): assert markdownify("Hello
", strip_document=None) == "\n\nHello\n\n" +def test_strip_pre(): + assert markdownify("\n \n Hello \n \n") == "```\n Hello\n```" + assert markdownify("\n \n Hello \n \n", strip_pre=STRIP) == "```\n Hello\n```" + assert markdownify("\n \n Hello \n \n", strip_pre=STRIP_ONE) == "```\n \n Hello \n \n```" + assert markdownify("\n \n Hello \n \n", strip_pre=None) == "```\n \n \n Hello \n \n \n```" + + def bs4_options(): assert markdownify("Hello
", bs4_options="html.parser") == "Hello" assert markdownify("Hello
", bs4_options=["html.parser"]) == "Hello" diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 6145411..825559b 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -370,4 +370,4 @@ def test_spaces(): assert md('testtextafter') == 'test\n> text\n\nafter' assert md('') == '\n\n1. x\n2. y\n' assert md('
- x
- y
- x
- y
') == '\n\n* x\n* y\n' - assert md('testfoobar') == 'test\n\n```\n foo \n```\n\nbar' + assert md('testfoobar') == 'test\n\n```\n foo\n```\n\nbar'