From 5e39d3c2dae64a4b37cec3ae6a5e2c82b5a01f7c Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Wed, 5 Feb 2025 16:44:11 +0000 Subject: [PATCH 1/4] Escape right square brackets As discussed in #148, right square brackets need to be escaped for correct handling in links in some cases. This is an alternative to that PR that is based on current sources and adds the requested testcase, since there was no response to the request in that PR for a testcase to be added to the testsuite. --- markdownify/__init__.py | 2 +- tests/test_escaping.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 9e4c99f..f3fd80e 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -303,7 +303,7 @@ def escape(self, text): if not text: return '' if self.options['escape_misc']: - text = re.sub(r'([\\&<`[>~=+|])', r'\\\1', text) + text = re.sub(r'([]\\&<`[>~=+|])', r'\\\1', text) # A sequence of one or more consecutive '-', preceded and # followed by whitespace or start/end of fragment, might # be confused with an underline of a header, or with a diff --git a/tests/test_escaping.py b/tests/test_escaping.py index d213675..d78cd0f 100644 --- a/tests/test_escaping.py +++ b/tests/test_escaping.py @@ -51,7 +51,9 @@ def test_misc(): assert md('-y', escape_misc=True) == '-y' assert md('+ x\n+ y\n', escape_misc=True) == '\\+ x\n\\+ y\n' assert md('`x`', escape_misc=True) == r'\`x\`' - assert md('[text](link)', escape_misc=True) == r'\[text](link)' + assert md('[text](link)', escape_misc=True) == r'\[text\](link)' + assert md('test]', escape_misc=True) == r'[test\]](test)' + assert md('[test]', escape_misc=True) == r'[\[test\]](test)' assert md('1. x', escape_misc=True) == r'1\. x' # assert md('1. x', escape_misc=True) == r'1\. x' assert md('1. x', escape_misc=True) == r'1\. x' From 5c7706e5d67c4024ad2bcaad9a9644c9a1e397bd Mon Sep 17 00:00:00 2001 From: chrispy Date: Tue, 18 Feb 2025 17:00:59 -0500 Subject: [PATCH 2/4] use consistent text strings in new unit tests --- tests/test_escaping.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_escaping.py b/tests/test_escaping.py index d78cd0f..bab4d11 100644 --- a/tests/test_escaping.py +++ b/tests/test_escaping.py @@ -51,9 +51,9 @@ def test_misc(): assert md('-y', escape_misc=True) == '-y' assert md('+ x\n+ y\n', escape_misc=True) == '\\+ x\n\\+ y\n' assert md('`x`', escape_misc=True) == r'\`x\`' - assert md('[text](link)', escape_misc=True) == r'\[text\](link)' - assert md('test]', escape_misc=True) == r'[test\]](test)' - assert md('[test]', escape_misc=True) == r'[\[test\]](test)' + assert md('[text](notalink)', escape_misc=True) == r'\[text\](notalink)' + assert md('text]', escape_misc=True) == r'[text\]](link)' + assert md('[text]', escape_misc=True) == r'[\[text\]](link)' assert md('1. x', escape_misc=True) == r'1\. x' # assert md('1. x', escape_misc=True) == r'1\. x' assert md('1. x', escape_misc=True) == r'1\. x' From 2502925745a8681adde109ad4b138091f7ad797e Mon Sep 17 00:00:00 2001 From: chrispy Date: Tue, 18 Feb 2025 17:06:11 -0500 Subject: [PATCH 3/4] only escape closing square brackets in tags --- markdownify/__init__.py | 11 ++++++++--- tests/test_escaping.py | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 7ebbc6d..e126414 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -305,7 +305,7 @@ def process_text(self, el, parent_tags=None): # escape special characters if we're not inside a preformatted or code element if '_noformat' not in parent_tags: - text = self.escape(text) + text = self.escape(text, parent_tags) # remove leading whitespace at the start or just after a # block-level element; remove traliing whitespace at the end @@ -347,11 +347,16 @@ def should_convert_tag(self, tag): else: return True - def escape(self, text): + def escape(self, text, parent_tags): if not text: return '' if self.options['escape_misc']: - text = re.sub(r'([]\\&<`[>~=+|])', r'\\\1', text) + text = re.sub(r'([\\&<`[>~=+|])', r'\\\1', text) + + if 'a' in parent_tags: + # inside tags, also escape closing brackets in link text + text = re.sub(r'(])', r'\\\1', text) + # A sequence of one or more consecutive '-', preceded and # followed by whitespace or start/end of fragment, might # be confused with an underline of a header, or with a diff --git a/tests/test_escaping.py b/tests/test_escaping.py index bab4d11..13a4ab0 100644 --- a/tests/test_escaping.py +++ b/tests/test_escaping.py @@ -51,7 +51,7 @@ def test_misc(): assert md('-y', escape_misc=True) == '-y' assert md('+ x\n+ y\n', escape_misc=True) == '\\+ x\n\\+ y\n' assert md('`x`', escape_misc=True) == r'\`x\`' - assert md('[text](notalink)', escape_misc=True) == r'\[text\](notalink)' + assert md('[text](notalink)', escape_misc=True) == r'\[text](notalink)' assert md('text]', escape_misc=True) == r'[text\]](link)' assert md('[text]', escape_misc=True) == r'[\[text\]](link)' assert md('1. x', escape_misc=True) == r'1\. x' From f173a42f9eb0350a41a95ba5ada09b6970146786 Mon Sep 17 00:00:00 2001 From: chrispy Date: Wed, 19 Feb 2025 10:01:49 -0500 Subject: [PATCH 4/4] revert to jsm28's original proposed change --- markdownify/__init__.py | 7 +------ tests/test_escaping.py | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index e126414..85e81e2 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -351,12 +351,7 @@ def escape(self, text, parent_tags): if not text: return '' if self.options['escape_misc']: - text = re.sub(r'([\\&<`[>~=+|])', r'\\\1', text) - - if 'a' in parent_tags: - # inside tags, also escape closing brackets in link text - text = re.sub(r'(])', r'\\\1', text) - + text = re.sub(r'([]\\&<`[>~=+|])', r'\\\1', text) # A sequence of one or more consecutive '-', preceded and # followed by whitespace or start/end of fragment, might # be confused with an underline of a header, or with a diff --git a/tests/test_escaping.py b/tests/test_escaping.py index 13a4ab0..bab4d11 100644 --- a/tests/test_escaping.py +++ b/tests/test_escaping.py @@ -51,7 +51,7 @@ def test_misc(): assert md('-y', escape_misc=True) == '-y' assert md('+ x\n+ y\n', escape_misc=True) == '\\+ x\n\\+ y\n' assert md('`x`', escape_misc=True) == r'\`x\`' - assert md('[text](notalink)', escape_misc=True) == r'\[text](notalink)' + assert md('[text](notalink)', escape_misc=True) == r'\[text\](notalink)' assert md('text]', escape_misc=True) == r'[text\]](link)' assert md('[text]', escape_misc=True) == r'[\[text\]](link)' assert md('1. x', escape_misc=True) == r'1\. x'