diff --git a/README.rst b/README.rst index 946c83d..9a0798f 100644 --- a/README.rst +++ b/README.rst @@ -157,6 +157,12 @@ strip_document within the document are unaffected. Defaults to ``STRIP``. +strip_pre + Controls whether leading/trailing blank lines are removed from ``
``
+  tags. Supported values are ``STRIP`` (all leading/trailing blank lines),
+  ``STRIP_ONE`` (one leading/trailing blank line), and ``None`` (neither).
+  Defaults to ``STRIP``.
+
 bs4_options
   Specify additional configuration options for the ``BeautifulSoup`` object
   used to interpret the HTML markup. String and list values (such as ``lxml``
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index b219ca2..72c5214 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -11,6 +11,10 @@
 re_all_whitespace = re.compile(r'[\t \r\n]+')
 re_newline_whitespace = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*')
 re_html_heading = re.compile(r'h(\d+)')
+re_pre_lstrip1 = re.compile(r'^ *\n')
+re_pre_rstrip1 = re.compile(r'\n *$')
+re_pre_lstrip = re.compile(r'^[ \n]*\n')
+re_pre_rstrip = re.compile(r'[ \n]*$')
 
 # Pattern for creating convert_ function names from tag names
 re_make_convert_fn_name = re.compile(r'[\[\]:-]')
@@ -51,10 +55,25 @@
 ASTERISK = '*'
 UNDERSCORE = '_'
 
-# Document strip styles
+# Document/pre strip styles
 LSTRIP = 'lstrip'
 RSTRIP = 'rstrip'
 STRIP = 'strip'
+STRIP_ONE = 'strip_one'
+
+
+def strip1_pre(text):
+    """Strip one leading and trailing newline from a 
 string."""
+    text = re_pre_lstrip1.sub('', text)
+    text = re_pre_rstrip1.sub('', text)
+    return text
+
+
+def strip_pre(text):
+    """Strip all leading and trailing newlines from a 
 string."""
+    text = re_pre_lstrip.sub('', text)
+    text = re_pre_rstrip.sub('', text)
+    return text
 
 
 def chomp(text):
@@ -168,6 +187,7 @@ class DefaultOptions:
         newline_style = SPACES
         strip = None
         strip_document = STRIP
+        strip_pre = STRIP
         strong_em_symbol = ASTERISK
         sub_symbol = ''
         sup_symbol = ''
@@ -656,6 +676,15 @@ def convert_pre(self, el, text, parent_tags):
         if self.options['code_language_callback']:
             code_language = self.options['code_language_callback'](el) or code_language
 
+        if self.options['strip_pre'] == STRIP:
+            text = strip_pre(text)  # remove all leading/trailing newlines
+        elif self.options['strip_pre'] == STRIP_ONE:
+            text = strip1_pre(text)  # remove one leading/trailing newline
+        elif self.options['strip_pre'] is None:
+            pass  # leave leading and trailing newlines as-is
+        else:
+            raise ValueError('Invalid value for strip_pre: %s' % self.options['strip_pre'])
+
         return '\n\n```%s\n%s\n```\n\n' % (code_language, text)
 
     def convert_q(self, el, text, parent_tags):
diff --git a/tests/test_args.py b/tests/test_args.py
index 1ba6482..838ef9d 100644
--- a/tests/test_args.py
+++ b/tests/test_args.py
@@ -2,7 +2,7 @@
 Test whitelisting/blacklisting of specific tags.
 
 """
-from markdownify import markdownify, LSTRIP, RSTRIP, STRIP
+from markdownify import markdownify, LSTRIP, RSTRIP, STRIP, STRIP_ONE
 from .utils import md
 
 
@@ -34,6 +34,13 @@ def test_strip_document():
     assert markdownify("

Hello

", strip_document=None) == "\n\nHello\n\n" +def test_strip_pre(): + assert markdownify("
  \n  \n  Hello  \n  \n  
") == "```\n Hello\n```" + assert markdownify("
  \n  \n  Hello  \n  \n  
", strip_pre=STRIP) == "```\n Hello\n```" + assert markdownify("
  \n  \n  Hello  \n  \n  
", strip_pre=STRIP_ONE) == "```\n \n Hello \n \n```" + assert markdownify("
  \n  \n  Hello  \n  \n  
", strip_pre=None) == "```\n \n \n Hello \n \n \n```" + + def bs4_options(): assert markdownify("

Hello

", bs4_options="html.parser") == "Hello" assert markdownify("

Hello

", bs4_options=["html.parser"]) == "Hello" diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 6145411..825559b 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -370,4 +370,4 @@ def test_spaces(): assert md('test
text
after') == 'test\n> text\n\nafter' assert md('
  1. x
  2. y
') == '\n\n1. x\n2. y\n' assert md('
  • x
  • y
  • ') == '\n\n* x\n* y\n' - assert md('test
     foo 
    bar') == 'test\n\n```\n foo \n```\n\nbar' + assert md('test
     foo 
    bar') == 'test\n\n```\n foo\n```\n\nbar'