From 1223b588c54db1054aeb31baef2930dd171d656e Mon Sep 17 00:00:00 2001
From: divyesh_06 <divyeshlakhotia@gmail.com>
Date: Sat, 21 Dec 2024 12:43:24 +0530
Subject: [PATCH 1/4] Added convert_all func

---
 markdownify/__init__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 3272ce5..e057508 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -171,6 +171,9 @@ def process_tag(self, node, convert_as_inline, children_only=False):
                 text = text_strip + newlines + next_text_strip
 
         if not children_only:
+            convert_all = getattr(self, 'convert_all', None)
+            if convert_all:
+                text = convert_all(node, text, convert_as_inline)
             convert_fn = getattr(self, 'convert_%s' % node.name, None)
             if convert_fn and self.should_convert_tag(node.name):
                 text = convert_fn(node, text, convert_as_inline)

From 7ff6da8d799fb2b9278d7bcce5b786e965b0318e Mon Sep 17 00:00:00 2001
From: divyesh_06 <divyeshlakhotia@gmail.com>
Date: Fri, 10 Jan 2025 18:11:32 +0530
Subject: [PATCH 2/4] Added postprocess and preprocess functions

---
 markdownify/__init__.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index e057508..f6c56b0 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -101,6 +101,8 @@ class DefaultOptions:
         strip = None
         strong_em_symbol = ASTERISK
         sub_symbol = ''
+        postprocess_fn = None
+        preprocess_fn = None
         sup_symbol = ''
         wrap = False
         wrap_width = 80
@@ -114,6 +116,8 @@ def __init__(self, **options):
         self.options = _todict(self.DefaultOptions)
         self.options.update(_todict(self.Options))
         self.options.update(options)
+        self.postprocess_fn = self.options['postprocess_fn']
+        self.preprocess_fn = self.options['preprocess_fn']
         if self.options['strip'] is not None and self.options['convert'] is not None:
             raise ValueError('You may specify either tags to strip or tags to'
                              ' convert, but not both.')
@@ -171,13 +175,16 @@ def process_tag(self, node, convert_as_inline, children_only=False):
                 text = text_strip + newlines + next_text_strip
 
         if not children_only:
-            convert_all = getattr(self, 'convert_all', None)
-            if convert_all:
-                text = convert_all(node, text, convert_as_inline)
+            if self.preprocess_fn and self.should_convert_tag(node.name):
+                text = self.preprocess_fn(node, text, convert_as_inline)
+
             convert_fn = getattr(self, 'convert_%s' % node.name, None)
             if convert_fn and self.should_convert_tag(node.name):
                 text = convert_fn(node, text, convert_as_inline)
 
+            if self.postprocess_fn and self.should_convert_tag(node.name):
+                text = self.postprocess_fn(node, text, convert_as_inline)
+                
         return text
 
     def process_text(self, el):

From 8ecd7aebedd37d84d7b4874d2c4521225abcb762 Mon Sep 17 00:00:00 2001
From: divyesh-vipermetrics <divyesh@vipermetrics.com>
Date: Sat, 31 May 2025 13:41:05 +0530
Subject: [PATCH 3/4] Added tests for preprocess and postprocess

---
 markdownify/__init__.py              |   5 +-
 tests/test_preprocess_postprocess.py | 126 +++++++++++++++++++++++++++
 2 files changed, 128 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_preprocess_postprocess.py

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index f6c56b0..b078d20 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -101,8 +101,8 @@ class DefaultOptions:
         strip = None
         strong_em_symbol = ASTERISK
         sub_symbol = ''
-        postprocess_fn = None
         preprocess_fn = None
+        postprocess_fn = None
         sup_symbol = ''
         wrap = False
         wrap_width = 80
@@ -116,8 +116,8 @@ def __init__(self, **options):
         self.options = _todict(self.DefaultOptions)
         self.options.update(_todict(self.Options))
         self.options.update(options)
-        self.postprocess_fn = self.options['postprocess_fn']
         self.preprocess_fn = self.options['preprocess_fn']
+        self.postprocess_fn = self.options['postprocess_fn']
         if self.options['strip'] is not None and self.options['convert'] is not None:
             raise ValueError('You may specify either tags to strip or tags to'
                              ' convert, but not both.')
@@ -184,7 +184,6 @@ def process_tag(self, node, convert_as_inline, children_only=False):
 
             if self.postprocess_fn and self.should_convert_tag(node.name):
                 text = self.postprocess_fn(node, text, convert_as_inline)
-                
         return text
 
     def process_text(self, el):
diff --git a/tests/test_preprocess_postprocess.py b/tests/test_preprocess_postprocess.py
new file mode 100644
index 0000000..25e850f
--- /dev/null
+++ b/tests/test_preprocess_postprocess.py
@@ -0,0 +1,126 @@
+from markdownify import markdownify as md
+
+
+def test_preprocess_all_tags():
+
+    def preprocess(node, text, convert_as_inline):
+        alignment = ""
+        if 'style' in node.attrs and 'text-align' in node.attrs['style']:
+            style = node.attrs['style']
+            alignment = style.split("text-align:")[1].split(";")[0].strip()
+
+        if alignment:
+            return f"[align={alignment}]{text}[/align]"
+        return text
+
+    assert md(
+        '<p style="text-align: center;">para</p><b style="text-align: left;">bold</b>',
+        preprocess_fn=preprocess) == '\n\n[align=center]para[/align]\n\n**[align=left]bold[/align]**'
+
+
+def test_postprocess_all_tags():
+
+    def postprocess(node, text, convert_as_inline):
+        alignment = ""
+        if 'style' in node.attrs and 'text-align' in node.attrs['style']:
+            style = node.attrs['style']
+            alignment = style.split("text-align:")[1].split(";")[0].strip()
+
+        if alignment:
+            return f"[align={alignment}]{text}[/align]"
+        return text
+    b = md(
+        '<p style="text-align: center;">para</p><b style="text-align: left;">bold</b>',
+        postprocess_fn=postprocess)
+    print(b)
+    assert md(
+        '<p style="text-align: center;">para</p><b style="text-align: left;">bold</b>',
+        postprocess_fn=postprocess) == '[align=center]\n\npara\n\n[/align][align=left]**bold**[/align]'
+
+
+def test_preprocess_runs_before_conversion():
+
+    def preprocess(node, text, convert_as_inline):
+        if node.name == 'b':
+            return f"PRE_{text}_PRE"
+        return text
+
+    # Default conversion would make this "**bold**"
+    # With preprocessing it should become "**PRE_bold_PRE**"
+    assert md('<b>bold</b>', preprocess_fn=preprocess) == '**PRE_bold_PRE**'
+
+
+def test_postprocess_runs_after_conversion():
+
+    def postprocess(node, text, convert_as_inline):
+        if node.name == 'b':
+            return f"POST_{text}_POST"
+        return text
+
+    # Default conversion makes this "**bold**"
+    # With postprocessing it should become "POST_**bold**_POST"
+    assert md(
+        '<b>bold</b>',
+        postprocess_fn=postprocess) == 'POST_**bold**_POST'
+
+
+def test_preprocess_doesnt_prevent_conversion():
+
+    def preprocess(node, text, convert_as_inline):
+        return text.upper()  # Just modify the text, don't prevent conversion
+
+    # Should still get converted to markdown, just with uppercase content
+    assert md('<b>bold</b>', preprocess_fn=preprocess) == '**BOLD**'
+
+
+def test_postprocess_doesnt_prevent_conversion():
+
+    def postprocess(node, text, convert_as_inline):
+        return text.upper()  # Just modify the result, don't prevent conversion
+
+    # Should get normal markdown conversion but then uppercased
+    assert md('<b>bold</b>', postprocess_fn=postprocess) == '**BOLD**'
+
+
+def test_combined_pre_and_post_processing():
+
+    def preprocess(node, text, convert_as_inline):
+        return f"({text})"
+
+    def postprocess(node, text, convert_as_inline):
+        return f"[{text}]"
+
+    # <b>bold</b> normally becomes "**bold**"
+    # With preprocessing: "(bold)" -> "**(bold)**"
+    # Then postprocessing: "[**(bold)**]"
+    assert md('<b>bold</b>',
+              preprocess_fn=preprocess,
+              postprocess_fn=postprocess) == '[**(bold)**]'
+
+
+def test_processing_with_multiple_tags():
+
+    def preprocess(node, text, convert_as_inline):
+        if node.name == 'b':
+            return f"B:{text}"
+        elif node.name == 'i':
+            return f"I:{text}"
+        return text
+
+    # <p><b>bold</b> and <i>italic</i></p>
+    # Should become "**B:bold** and *I:italic*"
+    assert md('<p><b>bold</b> and <i>italic</i></p>',
+              preprocess_fn=preprocess) == '\n\n**B:bold** and *I:italic*\n\n'
+
+
+def test_processing_with_nested_tags():
+
+    def postprocess(node, text, convert_as_inline):
+        if node.name == 'p':
+            return f"P:{text}"
+        return text
+
+    # <p><b>bold</b> text</p> normally becomes "**bold** text"
+    # With postprocessing becomes "P:**bold** text"
+    assert md('<p><b>bold</b> text</p>',
+              postprocess_fn=postprocess) == 'P:\n\n**bold** text\n\n'

From c28a1cf4d38bbcbc83e031f6d029cb39e09e628a Mon Sep 17 00:00:00 2001
From: divyesh-vipermetrics <divyesh@vipermetrics.com>
Date: Sat, 31 May 2025 14:10:53 +0530
Subject: [PATCH 4/4] Fixed failed test cases

---
 markdownify/__init__.py              | 92 ++++++++++++++++++----------
 tests/test_preprocess_postprocess.py | 16 ++---
 2 files changed, 69 insertions(+), 39 deletions(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index b0d8c46..f23a1b3 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -17,7 +17,9 @@
 
 # Extract (leading_nl, content, trailing_nl) from a string
 # (functionally equivalent to r'^(\n*)(.*?)(\n*)$', but greedy is faster than reluctant here)
-re_extract_newlines = re.compile(r'^(\n*)((?:.*[^\n])?)(\n*)$', flags=re.DOTALL)
+re_extract_newlines = re.compile(
+    r'^(\n*)((?:.*[^\n])?)(\n*)$',
+    flags=re.DOTALL)
 
 # Escape miscellaneous special Markdown characters
 re_escape_misc_chars = re.compile(r'([]\\&<`[>~=+|])')
@@ -78,6 +80,7 @@ def abstract_inline_conversion(markup_fn):
     the text if it looks like an HTML tag. markup_fn is necessary to allow for
     references to self.strong_em_symbol etc.
     """
+
     def implementation(self, el, text, parent_tags):
         markup_prefix = markup_fn(self)
         if markup_prefix.startswith('<') and markup_prefix.endswith('>'):
@@ -89,12 +92,14 @@ def implementation(self, el, text, parent_tags):
         prefix, suffix, text = chomp(text)
         if not text:
             return ''
-        return '%s%s%s%s%s' % (prefix, markup_prefix, text, markup_suffix, suffix)
+        return '%s%s%s%s%s' % (prefix, markup_prefix,
+                               text, markup_suffix, suffix)
     return implementation
 
 
 def _todict(obj):
-    return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith('_'))
+    return dict((k, getattr(obj, k))
+                for k in dir(obj) if not k.startswith('_'))
 
 
 def should_remove_whitespace_inside(el):
@@ -209,7 +214,8 @@ def process_element(self, node, parent_tags=None):
             return self.process_tag(node, parent_tags=parent_tags)
 
     def process_tag(self, node, parent_tags=None):
-        # For the top-level element, initialize the parent context with an empty set.
+        # For the top-level element, initialize the parent context with an
+        # empty set.
         if parent_tags is None:
             parent_tags = set()
 
@@ -230,10 +236,12 @@ def _can_ignore(el):
                     # Non-whitespace text nodes are always processed.
                     return False
                 elif should_remove_inside and (not el.previous_sibling or not el.next_sibling):
-                    # Inside block elements (excluding <pre>), ignore adjacent whitespace elements.
+                    # Inside block elements (excluding <pre>), ignore adjacent
+                    # whitespace elements.
                     return True
                 elif should_remove_whitespace_outside(el.previous_sibling) or should_remove_whitespace_outside(el.next_sibling):
-                    # Outside block elements (including <pre>), ignore adjacent whitespace elements.
+                    # Outside block elements (including <pre>), ignore adjacent
+                    # whitespace elements.
                     return True
                 else:
                     return False
@@ -242,22 +250,24 @@ def _can_ignore(el):
             else:
                 raise ValueError('Unexpected element type: %s' % type(el))
 
-
-        children_to_convert = [el for el in node.children if not _can_ignore(el)]
+        children_to_convert = [
+            el for el in node.children if not _can_ignore(el)]
 
         # Create a copy of this tag's parent context, then update it to include this tag
         # to propagate down into the children.
         parent_tags_for_children = set(parent_tags)
         parent_tags_for_children.add(node.name)
 
-        # if this tag is a heading or table cell, add an '_inline' parent pseudo-tag
+        # if this tag is a heading or table cell, add an '_inline' parent
+        # pseudo-tag
         if (
             re_html_heading.match(node.name) is not None  # headings
             or node.name in {'td', 'th'}  # table cells
         ):
             parent_tags_for_children.add('_inline')
 
-        # if this tag is a preformatted element, add a '_noformat' parent pseudo-tag
+        # if this tag is a preformatted element, add a '_noformat' parent
+        # pseudo-tag
         if node.name in {'pre', 'code', 'kbd', 'samp'}:
             parent_tags_for_children.add('_noformat')
 
@@ -279,17 +289,21 @@ def _can_ignore(el):
             updated_child_strings = ['']  # so the first lookback works
             for child_string in child_strings:
                 # Separate the leading/trailing newlines from the content.
-                leading_nl, content, trailing_nl = re_extract_newlines.match(child_string).groups()
+                leading_nl, content, trailing_nl = re_extract_newlines.match(
+                    child_string).groups()
 
                 # If the last child had trailing newlines and this child has leading newlines,
                 # use the larger newline count, limited to 2.
                 if updated_child_strings[-1] and leading_nl:
-                    prev_trailing_nl = updated_child_strings.pop()  # will be replaced by the collapsed value
-                    num_newlines = min(2, max(len(prev_trailing_nl), len(leading_nl)))
+                    # will be replaced by the collapsed value
+                    prev_trailing_nl = updated_child_strings.pop()
+                    num_newlines = min(
+                        2, max(len(prev_trailing_nl), len(leading_nl)))
                     leading_nl = '\n' * num_newlines
 
                 # Add the results to the updated child string list.
-                updated_child_strings.extend([leading_nl, content, trailing_nl])
+                updated_child_strings.extend(
+                    [leading_nl, content, trailing_nl])
 
             child_strings = updated_child_strings
 
@@ -297,7 +311,6 @@ def _can_ignore(el):
         text = ''.join(child_strings)
 
         # apply this tag's final conversion function
-        
 
         if self.preprocess_fn and self.should_convert_tag(node.name):
             text = self.preprocess_fn(node, text, parent_tags=parent_tags)
@@ -307,7 +320,7 @@ def _can_ignore(el):
             text = convert_fn(node, text, parent_tags=parent_tags)
         if self.postprocess_fn and self.should_convert_tag(node.name):
             text = self.postprocess_fn(node, text, parent_tags=parent_tags)
-            
+
         return text
 
     def convert__document_(self, el, text, parent_tags):
@@ -317,16 +330,20 @@ def convert__document_(self, el, text, parent_tags):
         elif self.options['strip_document'] == RSTRIP:
             text = text.rstrip('\n')  # remove trailing separation newlines
         elif self.options['strip_document'] == STRIP:
-            text = text.strip('\n')  # remove leading and trailing separation newlines
+            # remove leading and trailing separation newlines
+            text = text.strip('\n')
         elif self.options['strip_document'] is None:
             pass  # leave leading and trailing separation newlines as-is
         else:
-            raise ValueError('Invalid value for strip_document: %s' % self.options['strip_document'])
+            raise ValueError(
+                'Invalid value for strip_document: %s' %
+                self.options['strip_document'])
 
         return text
 
     def process_text(self, el, parent_tags=None):
-        # For the top-level element, initialize the parent context with an empty set.
+        # For the top-level element, initialize the parent context with an
+        # empty set.
         if parent_tags is None:
             parent_tags = set()
 
@@ -340,7 +357,8 @@ def process_text(self, el, parent_tags=None):
                 text = re_newline_whitespace.sub('\n', text)
                 text = re_whitespace.sub(' ', text)
 
-        # escape special characters if we're not inside a preformatted or code element
+        # escape special characters if we're not inside a preformatted or code
+        # element
         if '_noformat' not in parent_tags:
             text = self.escape(text, parent_tags)
 
@@ -376,7 +394,8 @@ def get_conv_fn(self, tag_name):
             return None
 
         # Look for an explicitly defined conversion function by tag name first
-        convert_fn_name = "convert_%s" % re_make_convert_fn_name.sub("_", tag_name)
+        convert_fn_name = "convert_%s" % re_make_convert_fn_name.sub(
+            "_", tag_name)
         convert_fn = getattr(self, convert_fn_name, None)
         if convert_fn:
             return convert_fn
@@ -385,7 +404,8 @@ def get_conv_fn(self, tag_name):
         match = re_html_heading.match(tag_name)
         if match:
             n = int(match.group(1))  # get value of N from <hN>
-            return lambda el, text, parent_tags: self.convert_hN(n, el, text, parent_tags)
+            return lambda el, text, parent_tags: self.convert_hN(
+                n, el, text, parent_tags)
 
         # No conversion function was found
         return None
@@ -438,9 +458,11 @@ def convert_a(self, el, text, parent_tags):
         if self.options['default_title'] and not title:
             title = href
         title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
-        return '%s[%s](%s%s)%s' % (prefix, text, href, title_part, suffix) if href else text
+        return '%s[%s](%s%s)%s' % (prefix, text, href,
+                                   title_part, suffix) if href else text
 
-    convert_b = abstract_inline_conversion(lambda self: 2 * self.options['strong_em_symbol'])
+    convert_b = abstract_inline_conversion(
+        lambda self: 2 * self.options['strong_em_symbol'])
 
     def convert_blockquote(self, el, text, parent_tags):
         # handle some early-exit scenarios
@@ -485,7 +507,8 @@ def convert_div(self, el, text, parent_tags):
 
     convert_section = convert_div
 
-    convert_em = abstract_inline_conversion(lambda self: self.options['strong_em_symbol'])
+    convert_em = abstract_inline_conversion(
+        lambda self: self.options['strong_em_symbol'])
 
     convert_kbd = convert_code
 
@@ -662,7 +685,8 @@ def convert_pre(self, el, text, parent_tags):
         code_language = self.options['code_language']
 
         if self.options['code_language_callback']:
-            code_language = self.options['code_language_callback'](el) or code_language
+            code_language = self.options['code_language_callback'](
+                el) or code_language
 
         return '\n\n```%s\n%s\n```\n\n' % (code_language, text)
 
@@ -681,9 +705,11 @@ def convert_style(self, el, text, parent_tags):
 
     convert_samp = convert_code
 
-    convert_sub = abstract_inline_conversion(lambda self: self.options['sub_symbol'])
+    convert_sub = abstract_inline_conversion(
+        lambda self: self.options['sub_symbol'])
 
-    convert_sup = abstract_inline_conversion(lambda self: self.options['sup_symbol'])
+    convert_sup = abstract_inline_conversion(
+        lambda self: self.options['sup_symbol'])
 
     def convert_table(self, el, text, parent_tags):
         return '\n\n' + text.strip() + '\n\n'
@@ -716,9 +742,10 @@ def convert_tr(self, el, text, parent_tags):
                 and len(el.parent.find_all('tr')) == 1)
         )
         is_head_row_missing = (
-            (is_first_row and not el.parent.name == 'tbody')
-            or (is_first_row and el.parent.name == 'tbody' and len(el.parent.parent.find_all(['thead'])) < 1)
-        )
+            (is_first_row and not el.parent.name == 'tbody') or (
+                is_first_row and el.parent.name == 'tbody' and len(
+                    el.parent.parent.find_all(
+                        ['thead'])) < 1))
         overline = ''
         underline = ''
         full_colspan = 0
@@ -735,7 +762,8 @@ def convert_tr(self, el, text, parent_tags):
             # - is headline or
             # - headline is missing and header inference is enabled
             # print headline underline
-            underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
+            underline += '| ' + \
+                ' | '.join(['---'] * full_colspan) + ' |' + '\n'
         elif ((is_head_row_missing
                and not self.options['table_infer_header'])
               or (is_first_row
diff --git a/tests/test_preprocess_postprocess.py b/tests/test_preprocess_postprocess.py
index ffbc08d..c05e4b4 100644
--- a/tests/test_preprocess_postprocess.py
+++ b/tests/test_preprocess_postprocess.py
@@ -15,7 +15,7 @@ def preprocess(node, text, parent_tags):
 
     assert md(
         '<p style="text-align: center;">para</p><b style="text-align: left;">bold</b>',
-        preprocess_fn=preprocess) == '\n\n[align=center]para[/align]\n\n**[align=left]bold[/align]**'
+        preprocess_fn=preprocess) == '[align=center]para[/align]\n\n**[align=left]bold[/align]**'
 
 
 def test_postprocess_all_tags():
@@ -85,17 +85,19 @@ def postprocess(node, text, parent_tags):
 def test_combined_pre_and_post_processing():
 
     def preprocess(node, text, parent_tags):
-        return f"({text})"
+        print("Running preprocess on", text)
+        return f"PRE:{text}:PRE"
 
     def postprocess(node, text, parent_tags):
-        return f"[{text}]"
+        print("Running postprocess on", text)
+        return f"POST:{text}:POST"
 
     # <b>bold</b> normally becomes "**bold**"
     # With preprocessing: "(bold)" -> "**(bold)**"
     # Then postprocessing: "[**(bold)**]"
-    assert md('<b>bold</b>',
+    assert md('text',
               preprocess_fn=preprocess,
-              postprocess_fn=postprocess) == '[**(bold)**]'
+              postprocess_fn=postprocess) == 'POST:PRE:text:PRE:POST'
 
 
 def test_processing_with_multiple_tags():
@@ -110,7 +112,7 @@ def preprocess(node, text, parent_tags):
     # <p><b>bold</b> and <i>italic</i></p>
     # Should become "**B:bold** and *I:italic*"
     assert md('<p><b>bold</b> and <i>italic</i></p>',
-              preprocess_fn=preprocess) == '\n\n**B:bold** and *I:italic*\n\n'
+              preprocess_fn=preprocess) == '**B:bold** and *I:italic*'
 
 
 def test_processing_with_nested_tags():
@@ -123,4 +125,4 @@ def postprocess(node, text, parent_tags):
     # <p><b>bold</b> text</p> normally becomes "**bold** text"
     # With postprocessing becomes "P:**bold** text"
     assert md('<p><b>bold</b> text</p>',
-              postprocess_fn=postprocess) == 'P:\n\n**bold** text\n\n'
+              postprocess_fn=postprocess) == 'P:\n\n**bold** text'