From a308230f2ce9ed49421429d40d70b761023a91fc Mon Sep 17 00:00:00 2001 From: SomeBottle Date: Sat, 7 Dec 2024 14:36:55 +0800 Subject: [PATCH 1/7] Add option for table header fallback --- README.rst | 5 +++++ markdownify/__init__.py | 32 +++++++++++++++++++++----------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/README.rst b/README.rst index af87f51..d1aefa6 100644 --- a/README.rst +++ b/README.rst @@ -139,6 +139,11 @@ keep_inline_images_in that should be allowed to contain inline images, for example ``['td']``. Defaults to an empty list. +table_header_fallback + If set to ``False``, when a table header is missing, it will create an empty + row as the header instead of using the first row of the table. + Defaults to ``True``. + wrap, wrap_width If ``wrap`` is set to ``True``, all text paragraphs are wrapped at ``wrap_width`` characters. Defaults to ``False`` and ``80``. diff --git a/markdownify/__init__.py b/markdownify/__init__.py index ac53077..8eadb01 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -102,6 +102,7 @@ class DefaultOptions: strong_em_symbol = ASTERISK sub_symbol = '' sup_symbol = '' + table_header_fallback = True wrap = False wrap_width = 80 @@ -516,15 +517,21 @@ def convert_th(self, el, text, convert_as_inline): def convert_tr(self, el, text, convert_as_inline): cells = el.find_all(['td', 'th']) - is_headrow = ( - all([cell.name == 'th' for cell in cells]) - or (not el.previous_sibling and not el.parent.name == 'tbody') + is_headrow = all([cell.name == 'th' for cell in cells]) + is_head_row_missing = ( + (not el.previous_sibling and not el.parent.name == 'tbody') or (not el.previous_sibling and el.parent.name == 'tbody' and len(el.parent.parent.find_all(['thead'])) < 1) ) overline = '' underline = '' - if is_headrow and not el.previous_sibling: - # first row and is headline: print headline underline + if ((is_headrow + or (is_head_row_missing + and self.options['table_header_fallback'])) + and not el.previous_sibling): + # first row and: + # - is headline or + # - headline is missing and fallback is enabled + # print headline underline full_colspan = 0 for cell in cells: if 'colspan' in cell.attrs and cell['colspan'].isdigit(): @@ -532,13 +539,16 @@ def convert_tr(self, el, text, convert_as_inline): else: full_colspan += 1 underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n' - elif (not el.previous_sibling - and (el.parent.name == 'table' - or (el.parent.name == 'tbody' - and not el.parent.previous_sibling))): + elif ((is_head_row_missing + and not self.options['table_header_fallback']) + or (not el.previous_sibling + and (el.parent.name == 'table' + or (el.parent.name == 'tbody' + and not el.parent.previous_sibling)))): + # headline is missing and fallback is disabled or: # first row, not headline, and: - # - the parent is table or - # - the parent is tbody at the beginning of a table. + # - the parent is table or + # - the parent is tbody at the beginning of a table. # print empty headline above this row overline += '| ' + ' | '.join([''] * len(cells)) + ' |' + '\n' overline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n' From 768fada19f4adaebc612be0d417c1f3d722b2a96 Mon Sep 17 00:00:00 2001 From: SomeBottle Date: Sat, 7 Dec 2024 14:53:35 +0800 Subject: [PATCH 2/7] Add options to argument parser --- markdownify/main.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/markdownify/main.py b/markdownify/main.py index 4e1c874..ee14e9c 100644 --- a/markdownify/main.py +++ b/markdownify/main.py @@ -55,12 +55,19 @@ def main(argv=sys.argv[1:]): parser.add_argument('--no-escape-underscores', dest='escape_underscores', action='store_false', help="Do not escape '_' to '\\_' in text.") + parser.add_argument('--escape-misc', dest='escape_misc', + action='store_true', + help="Escape miscellaneous punctuation characters that sometimes " + "have Markdown significance in text.") parser.add_argument('-i', '--keep-inline-images-in', nargs='*', help="Images are converted to their alt-text when the images are " "located inside headlines or table cells. If some inline images " "should be converted to markdown images instead, this option can " "be set to a list of parent tags that should be allowed to " "contain inline images.") + parser.add_argument('--no-table-header-fallback', dest='table_header_fallback', + action='store_false', + help="Create an empty row as the header if a table header is missing.") parser.add_argument('-w', '--wrap', action='store_true', help="Wrap all text paragraphs at --wrap-width characters.") parser.add_argument('--wrap-width', type=int, default=80) From 99da38ee57709b6025029817f68e06f1fadc9dcd Mon Sep 17 00:00:00 2001 From: SomeBottle Date: Sat, 7 Dec 2024 15:45:23 +0800 Subject: [PATCH 3/7] Fix table header fallback option for thead and add tests --- README.rst | 4 ++-- markdownify/__init__.py | 7 ++++++- markdownify/main.py | 2 +- tests/test_tables.py | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index d1aefa6..e670a50 100644 --- a/README.rst +++ b/README.rst @@ -140,8 +140,8 @@ keep_inline_images_in Defaults to an empty list. table_header_fallback - If set to ``False``, when a table header is missing, it will create an empty - row as the header instead of using the first row of the table. + If set to ``False``, when a table header (defined by ```` or ````) is missing, + it will create an empty row as the header instead of using the first row of the table. Defaults to ``True``. wrap, wrap_width diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 8eadb01..f9518a7 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -517,7 +517,12 @@ def convert_th(self, el, text, convert_as_inline): def convert_tr(self, el, text, convert_as_inline): cells = el.find_all(['td', 'th']) - is_headrow = all([cell.name == 'th' for cell in cells]) + is_headrow = ( + all([cell.name == 'th' for cell in cells]) + or (el.parent.name == 'thead' + # avoid multiple tr in thead + and len(el.parent.find_all('tr')) == 1) + ) is_head_row_missing = ( (not el.previous_sibling and not el.parent.name == 'tbody') or (not el.previous_sibling and el.parent.name == 'tbody' and len(el.parent.parent.find_all(['thead'])) < 1) diff --git a/markdownify/main.py b/markdownify/main.py index ee14e9c..51696ec 100644 --- a/markdownify/main.py +++ b/markdownify/main.py @@ -67,7 +67,7 @@ def main(argv=sys.argv[1:]): "contain inline images.") parser.add_argument('--no-table-header-fallback', dest='table_header_fallback', action='store_false', - help="Create an empty row as the header if a table header is missing.") + help="Create an empty row as the header if a table header (defined by '' or '') is missing.") parser.add_argument('-w', '--wrap', action='store_true', help="Wrap all text paragraphs at --wrap-width characters.") parser.add_argument('--wrap-width', type=int, default=80) diff --git a/tests/test_tables.py b/tests/test_tables.py index dcf9ad7..de80995 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -141,6 +141,33 @@ """ +table_head_body_multiple_head = """ + + + + + + + + + + + + + + + + + + + + + + + + +
CreatorEditorServer
OperatorManagerEngineer
BobOliverTom
ThomasLucasEthan
""" + table_missing_text = """ @@ -245,6 +272,7 @@ def test_table(): assert md(table_with_linebreaks) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n' assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_head_body_multiple_head) == '\n\n| Creator | Editor | Server |\n| --- | --- | --- |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n' assert md(table_head_body_missing_head) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_missing_text) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_missing_head) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' @@ -252,3 +280,11 @@ def test_table(): assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n' assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' + + +def test_no_table_header_fallback(): + assert md(table_head_body, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_head_body_multiple_head, table_header_fallback=False) == '\n\n| | | |\n| --- | --- | --- |\n| Creator | Editor | Server |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n' + assert md(table_head_body_missing_head, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_missing_text, table_header_fallback=False) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_missing_head, table_header_fallback=False) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' From c37fa897fbd39d15c20b766749dc549552e4c5ff Mon Sep 17 00:00:00 2001 From: SomeBottle Date: Sat, 7 Dec 2024 16:25:31 +0800 Subject: [PATCH 4/7] Add more test cases --- tests/test_tables.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_tables.py b/tests/test_tables.py index de80995..a696722 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -283,8 +283,17 @@ def test_table(): def test_no_table_header_fallback(): + assert md(table, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_html_content, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_paragraphs, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_linebreaks, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n' + assert md(table_with_header_column, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_head_body, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_head_body_multiple_head, table_header_fallback=False) == '\n\n| | | |\n| --- | --- | --- |\n| Creator | Editor | Server |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n' assert md(table_head_body_missing_head, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_missing_text, table_header_fallback=False) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_missing_head, table_header_fallback=False) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_body, table_header_fallback=False) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_caption, table_header_fallback=False) == 'TEXT\n\nCaption\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n' + assert md(table_with_colspan, table_header_fallback=False) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_undefined_colspan, table_header_fallback=False) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' From f44a5c694ba6e8259230e74a982aca1220d558bb Mon Sep 17 00:00:00 2001 From: SomeBottle Date: Thu, 2 Jan 2025 10:14:34 +0800 Subject: [PATCH 5/7] Rename option table_header_fallback to table_infer_header --- README.rst | 2 +- markdownify/__init__.py | 14 +++++++------- markdownify/main.py | 6 +----- tests/test_tables.py | 30 +++++++++++++++--------------- 4 files changed, 24 insertions(+), 28 deletions(-) diff --git a/README.rst b/README.rst index e670a50..b1fc709 100644 --- a/README.rst +++ b/README.rst @@ -139,7 +139,7 @@ keep_inline_images_in that should be allowed to contain inline images, for example ``['td']``. Defaults to an empty list. -table_header_fallback +table_infer_header If set to ``False``, when a table header (defined by ```` or ``' or '`` or ```` or ``' or '' or '`` or ```` + or ``' or '' " + "or '
``) is missing, it will create an empty row as the header instead of using the first row of the table. Defaults to ``True``. diff --git a/markdownify/__init__.py b/markdownify/__init__.py index f9518a7..f202f2d 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -102,7 +102,7 @@ class DefaultOptions: strong_em_symbol = ASTERISK sub_symbol = '' sup_symbol = '' - table_header_fallback = True + table_infer_header = True wrap = False wrap_width = 80 @@ -531,11 +531,11 @@ def convert_tr(self, el, text, convert_as_inline): underline = '' if ((is_headrow or (is_head_row_missing - and self.options['table_header_fallback'])) + and self.options['table_infer_header'])) and not el.previous_sibling): # first row and: # - is headline or - # - headline is missing and fallback is enabled + # - headline is missing and header inference is enabled # print headline underline full_colspan = 0 for cell in cells: @@ -545,15 +545,15 @@ def convert_tr(self, el, text, convert_as_inline): full_colspan += 1 underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n' elif ((is_head_row_missing - and not self.options['table_header_fallback']) + and not self.options['table_infer_header']) or (not el.previous_sibling and (el.parent.name == 'table' or (el.parent.name == 'tbody' and not el.parent.previous_sibling)))): - # headline is missing and fallback is disabled or: + # headline is missing and header inference is disabled or: # first row, not headline, and: - # - the parent is table or - # - the parent is tbody at the beginning of a table. + # - the parent is table or + # - the parent is tbody at the beginning of a table. # print empty headline above this row overline += '| ' + ' | '.join([''] * len(cells)) + ' |' + '\n' overline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n' diff --git a/markdownify/main.py b/markdownify/main.py index 51696ec..4051144 100644 --- a/markdownify/main.py +++ b/markdownify/main.py @@ -55,17 +55,13 @@ def main(argv=sys.argv[1:]): parser.add_argument('--no-escape-underscores', dest='escape_underscores', action='store_false', help="Do not escape '_' to '\\_' in text.") - parser.add_argument('--escape-misc', dest='escape_misc', - action='store_true', - help="Escape miscellaneous punctuation characters that sometimes " - "have Markdown significance in text.") parser.add_argument('-i', '--keep-inline-images-in', nargs='*', help="Images are converted to their alt-text when the images are " "located inside headlines or table cells. If some inline images " "should be converted to markdown images instead, this option can " "be set to a list of parent tags that should be allowed to " "contain inline images.") - parser.add_argument('--no-table-header-fallback', dest='table_header_fallback', + parser.add_argument('--no-table-infer-header', dest='table_infer_header', action='store_false', help="Create an empty row as the header if a table header (defined by '
') is missing.") parser.add_argument('-w', '--wrap', action='store_true', diff --git a/tests/test_tables.py b/tests/test_tables.py index a696722..5cf576b 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -282,18 +282,18 @@ def test_table(): assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' -def test_no_table_header_fallback(): - assert md(table, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_with_html_content, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n' - assert md(table_with_paragraphs, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_with_linebreaks, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n' - assert md(table_with_header_column, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_head_body, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_head_body_multiple_head, table_header_fallback=False) == '\n\n| | | |\n| --- | --- | --- |\n| Creator | Editor | Server |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n' - assert md(table_head_body_missing_head, table_header_fallback=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_missing_text, table_header_fallback=False) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_missing_head, table_header_fallback=False) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_body, table_header_fallback=False) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_with_caption, table_header_fallback=False) == 'TEXT\n\nCaption\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n' - assert md(table_with_colspan, table_header_fallback=False) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_with_undefined_colspan, table_header_fallback=False) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' +def test_no_table_infer_header(): + assert md(table, table_infer_header=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_html_content, table_infer_header=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_paragraphs, table_infer_header=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_linebreaks, table_infer_header=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n' + assert md(table_with_header_column, table_infer_header=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_head_body, table_infer_header=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_head_body_multiple_head, table_infer_header=False) == '\n\n| | | |\n| --- | --- | --- |\n| Creator | Editor | Server |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n' + assert md(table_head_body_missing_head, table_infer_header=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_missing_text, table_infer_header=False) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_missing_head, table_infer_header=False) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_body, table_infer_header=False) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_caption, table_infer_header=False) == 'TEXT\n\nCaption\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n' + assert md(table_with_colspan, table_infer_header=False) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_undefined_colspan, table_infer_header=False) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' From 8feee1a684bc18b01b50a789e988ec5f96312815 Mon Sep 17 00:00:00 2001 From: SomeBottle Date: Thu, 2 Jan 2025 20:13:38 +0800 Subject: [PATCH 6/7] Default table_infer_header to False --- README.rst | 7 ++++--- markdownify/__init__.py | 2 +- markdownify/main.py | 7 ++++--- tests/test_tables.py | 38 +++++++++++++++++++------------------- 4 files changed, 28 insertions(+), 26 deletions(-) diff --git a/README.rst b/README.rst index b1fc709..b15b707 100644 --- a/README.rst +++ b/README.rst @@ -140,9 +140,10 @@ keep_inline_images_in Defaults to an empty list. table_infer_header - If set to ``False``, when a table header (defined by ``
``) is missing, - it will create an empty row as the header instead of using the first row of the table. - Defaults to ``True``. + If set to ``True``, when a table header (defined by ``
``) is missing, + it will use the first row of the body as the header. + Otherwise it will create an empty row as the header. + Defaults to ``False``. wrap, wrap_width If ``wrap`` is set to ``True``, all text paragraphs are wrapped at diff --git a/markdownify/__init__.py b/markdownify/__init__.py index f202f2d..2360210 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -102,7 +102,7 @@ class DefaultOptions: strong_em_symbol = ASTERISK sub_symbol = '' sup_symbol = '' - table_infer_header = True + table_infer_header = False wrap = False wrap_width = 80 diff --git a/markdownify/main.py b/markdownify/main.py index 4051144..20fbfc5 100644 --- a/markdownify/main.py +++ b/markdownify/main.py @@ -61,9 +61,10 @@ def main(argv=sys.argv[1:]): "should be converted to markdown images instead, this option can " "be set to a list of parent tags that should be allowed to " "contain inline images.") - parser.add_argument('--no-table-infer-header', dest='table_infer_header', - action='store_false', - help="Create an empty row as the header if a table header (defined by '
') is missing.") + parser.add_argument('--table-infer-header', dest='table_infer_header', + action='store_true', + help="Use the first row of the table body as the header " + "when a table header (defined by '
') is missing.") parser.add_argument('-w', '--wrap', action='store_true', help="Wrap all text paragraphs at --wrap-width characters.") parser.add_argument('--wrap-width', type=int, default=80) diff --git a/tests/test_tables.py b/tests/test_tables.py index 5cf576b..da4bf53 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -272,28 +272,28 @@ def test_table(): assert md(table_with_linebreaks) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n' assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_head_body_multiple_head) == '\n\n| Creator | Editor | Server |\n| --- | --- | --- |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n' + assert md(table_head_body_multiple_head) == '\n\n| | | |\n| --- | --- | --- |\n| Creator | Editor | Server |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n' assert md(table_head_body_missing_head) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_missing_text) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_missing_head) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n' + assert md(table_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_body) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n' assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' -def test_no_table_infer_header(): - assert md(table, table_infer_header=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_with_html_content, table_infer_header=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n' - assert md(table_with_paragraphs, table_infer_header=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_with_linebreaks, table_infer_header=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n' - assert md(table_with_header_column, table_infer_header=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_head_body, table_infer_header=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_head_body_multiple_head, table_infer_header=False) == '\n\n| | | |\n| --- | --- | --- |\n| Creator | Editor | Server |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n' - assert md(table_head_body_missing_head, table_infer_header=False) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_missing_text, table_infer_header=False) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_missing_head, table_infer_header=False) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_body, table_infer_header=False) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_with_caption, table_infer_header=False) == 'TEXT\n\nCaption\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n' - assert md(table_with_colspan, table_infer_header=False) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_with_undefined_colspan, table_infer_header=False) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' +def test_table_infer_header(): + assert md(table, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_html_content, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_paragraphs, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_linebreaks, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n' + assert md(table_with_header_column, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_head_body, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_head_body_multiple_head, table_infer_header=True) == '\n\n| Creator | Editor | Server |\n| --- | --- | --- |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n' + assert md(table_head_body_missing_head, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_missing_text, table_infer_header=True) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_missing_head, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_body, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_caption, table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n' + assert md(table_with_colspan, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_undefined_colspan, table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' From 123fdaa2b6337410b4ef71133cce5c8d7c9527c8 Mon Sep 17 00:00:00 2001 From: chrispy Date: Fri, 3 Jan 2025 10:12:44 -0500 Subject: [PATCH 7/7] some minor cosmetic changes --- README.rst | 7 +++---- markdownify/main.py | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index b15b707..34ed7e0 100644 --- a/README.rst +++ b/README.rst @@ -140,10 +140,9 @@ keep_inline_images_in Defaults to an empty list. table_infer_header - If set to ``True``, when a table header (defined by ``
``) is missing, - it will use the first row of the body as the header. - Otherwise it will create an empty row as the header. - Defaults to ``False``. + Controls handling of tables with no header row (as indicated by ``
``). When set to ``True``, the first body row is used as the header row. + Defaults to ``False``, which leaves the header row empty. wrap, wrap_width If ``wrap`` is set to ``True``, all text paragraphs are wrapped at diff --git a/markdownify/main.py b/markdownify/main.py index 20fbfc5..432efb5 100644 --- a/markdownify/main.py +++ b/markdownify/main.py @@ -63,8 +63,8 @@ def main(argv=sys.argv[1:]): "contain inline images.") parser.add_argument('--table-infer-header', dest='table_infer_header', action='store_true', - help="Use the first row of the table body as the header " - "when a table header (defined by '
') is missing.") + help="When a table has no header row (as indicated by '
'), use the first body row as the header row.") parser.add_argument('-w', '--wrap', action='store_true', help="Wrap all text paragraphs at --wrap-width characters.") parser.add_argument('--wrap-width', type=int, default=80)