diff --git a/README.rst b/README.rst index b37a503..c6c6d84 100644 --- a/README.rst +++ b/README.rst @@ -180,7 +180,7 @@ If you have a special usecase that calls for a special conversion, you can always inherit from ``MarkdownConverter`` and override the method you want to change. The function that handles a HTML tag named ``abc`` is called -``convert_abc(self, el, text, convert_as_inline)`` and returns a string +``convert_abc(self, el, text, parent_tags)`` and returns a string containing the converted HTML tag. The ``MarkdownConverter`` object will handle the conversion based on the function names: @@ -193,8 +193,8 @@ function names: """ Create a custom MarkdownConverter that adds two newlines after an image """ - def convert_img(self, el, text, convert_as_inline): - return super().convert_img(el, text, convert_as_inline) + '\n\n' + def convert_img(self, el, text, parent_tags): + return super().convert_img(el, text, parent_tags) + '\n\n' # Create shorthand method for conversion def md(html, **options): @@ -208,7 +208,7 @@ function names: """ Create a custom MarkdownConverter that ignores paragraphs """ - def convert_p(self, el, text, convert_as_inline): + def convert_p(self, el, text, parent_tags): return '' # Create shorthand method for conversion diff --git a/markdownify/__init__.py b/markdownify/__init__.py index e2cacd9..79ba8e7 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -57,13 +57,13 @@ def abstract_inline_conversion(markup_fn): the text if it looks like an HTML tag. markup_fn is necessary to allow for references to self.strong_em_symbol etc. """ - def implementation(self, el, text, convert_as_inline): + def implementation(self, el, text, parent_tags): markup_prefix = markup_fn(self) if markup_prefix.startswith('<') and markup_prefix.endswith('>'): markup_suffix = ' to call this """ - if convert_as_inline: + if '_inline' in parent_tags: return text # prevent MemoryErrors in case of very large n @@ -478,46 +495,40 @@ def _convert_hn(self, n, el, text, convert_as_inline): return '\n\n%s %s %s\n\n' % (hashes, text, hashes) return '\n\n%s %s\n\n' % (hashes, text) - def convert_hr(self, el, text, convert_as_inline): + def convert_hr(self, el, text, parent_tags): return '\n\n---\n\n' convert_i = convert_em - def convert_img(self, el, text, convert_as_inline): + def convert_img(self, el, text, parent_tags): alt = el.attrs.get('alt', None) or '' src = el.attrs.get('src', None) or '' title = el.attrs.get('title', None) or '' title_part = ' "%s"' % title.replace('"', r'\"') if title else '' - if (convert_as_inline + if ('_inline' in parent_tags and el.parent.name not in self.options['keep_inline_images_in']): return alt return '![%s](%s%s)' % (alt, src, title_part) - def convert_list(self, el, text, convert_as_inline): + def convert_list(self, el, text, parent_tags): # Converting a list to inline is undefined. - # Ignoring convert_to_inline for list. + # Ignoring inline conversion parents for list. - nested = False before_paragraph = False next_sibling = _next_block_content_sibling(el) if next_sibling and next_sibling.name not in ['ul', 'ol']: before_paragraph = True - while el: - if el.name == 'li': - nested = True - break - el = el.parent - if nested: - # remove trailing newline if nested + if 'li' in parent_tags: + # remove trailing newline if we're in a nested list return '\n' + text.rstrip() return '\n\n' + text + ('\n' if before_paragraph else '') convert_ul = convert_list convert_ol = convert_list - def convert_li(self, el, text, convert_as_inline): + def convert_li(self, el, text, parent_tags): # handle some early-exit scenarios text = (text or '').strip() if not text: @@ -554,8 +565,8 @@ def _indent_for_li(match): return '%s\n' % text - def convert_p(self, el, text, convert_as_inline): - if convert_as_inline: + def convert_p(self, el, text, parent_tags): + if '_inline' in parent_tags: return ' ' + text.strip() + ' ' text = text.strip() if self.options['wrap']: @@ -577,7 +588,7 @@ def convert_p(self, el, text, convert_as_inline): text = '\n'.join(new_lines) return '\n\n%s\n\n' % text if text else '' - def convert_pre(self, el, text, convert_as_inline): + def convert_pre(self, el, text, parent_tags): if not text: return '' code_language = self.options['code_language'] @@ -587,10 +598,10 @@ def convert_pre(self, el, text, convert_as_inline): return '\n\n```%s\n%s\n```\n\n' % (code_language, text) - def convert_script(self, el, text, convert_as_inline): + def convert_script(self, el, text, parent_tags): return '' - def convert_style(self, el, text, convert_as_inline): + def convert_style(self, el, text, parent_tags): return '' convert_s = convert_del @@ -603,28 +614,28 @@ def convert_style(self, el, text, convert_as_inline): convert_sup = abstract_inline_conversion(lambda self: self.options['sup_symbol']) - def convert_table(self, el, text, convert_as_inline): + def convert_table(self, el, text, parent_tags): return '\n\n' + text.strip() + '\n\n' - def convert_caption(self, el, text, convert_as_inline): + def convert_caption(self, el, text, parent_tags): return text.strip() + '\n\n' - def convert_figcaption(self, el, text, convert_as_inline): + def convert_figcaption(self, el, text, parent_tags): return '\n\n' + text.strip() + '\n\n' - def convert_td(self, el, text, convert_as_inline): + def convert_td(self, el, text, parent_tags): colspan = 1 if 'colspan' in el.attrs and el['colspan'].isdigit(): colspan = int(el['colspan']) return ' ' + text.strip().replace("\n", " ") + ' |' * colspan - def convert_th(self, el, text, convert_as_inline): + def convert_th(self, el, text, parent_tags): colspan = 1 if 'colspan' in el.attrs and el['colspan'].isdigit(): colspan = int(el['colspan']) return ' ' + text.strip().replace("\n", " ") + ' |' * colspan - def convert_tr(self, el, text, convert_as_inline): + def convert_tr(self, el, text, parent_tags): cells = el.find_all(['td', 'th']) is_first_row = el.find_previous_sibling() is None is_headrow = ( diff --git a/tests/test_custom_converter.py b/tests/test_custom_converter.py index 0d3f6af..f4734c9 100644 --- a/tests/test_custom_converter.py +++ b/tests/test_custom_converter.py @@ -6,11 +6,11 @@ class UnitTestConverter(MarkdownConverter): """ Create a custom MarkdownConverter for unit tests """ - def convert_img(self, el, text, convert_as_inline): + def convert_img(self, el, text, parent_tags): """Add two newlines after an image""" - return super().convert_img(el, text, convert_as_inline) + '\n\n' + return super().convert_img(el, text, parent_tags) + '\n\n' - def convert_custom_tag(self, el, text, convert_as_inline): + def convert_custom_tag(self, el, text, parent_tags): """Ensure conversion function is found for tags with special characters in name""" return "FUNCTION USED: %s" % text