Skip to content

Commit 46af45b

Browse files
jsm28AlexVonB
andauthored
Escape all characters with Markdown significance (#118)
* Escape all characters with Markdown significance There are many punctuation characters that sometimes have significance in Markdown; more systematically escape them all (based on a new escape_misc configuration option). A limited attempt is made to limit the escaping of '.' and ')' to the context where they might have Markdown significance (after a number, where they can indicate an ordered list item); no such attempt is made for the other characters (and even that limiting of '.' and ')' may not be entirely safe in all cases, as it's possible the HTML could have the number outside the block being escaped in one go, e.g. `<span>1</span>.`. --------- Co-authored-by: AlexVonB <AlexVonB@users.noreply.github.com>
1 parent 2bd0772 commit 46af45b

File tree

3 files changed

+30
-2
lines changed

3 files changed

+30
-2
lines changed

README.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ escape_underscores
123123
If set to ``False``, do not escape ``_`` to ``\_`` in text.
124124
Defaults to ``True``.
125125

126+
escape_misc
127+
If set to ``False``, do not escape miscellaneous punctuation characters
128+
that sometimes have Markdown significance in text.
129+
Defaults to ``True``.
130+
126131
keep_inline_images_in
127132
Images are converted to their alt-text when the images are located inside
128133
headlines or table cells. If some inline images should be converted to

markdownify/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ class DefaultOptions:
7171
default_title = False
7272
escape_asterisks = True
7373
escape_underscores = True
74+
escape_misc = True
7475
heading_style = UNDERLINED
7576
keep_inline_images_in = []
7677
newline_style = SPACES
@@ -201,6 +202,9 @@ def should_convert_tag(self, tag):
201202
def escape(self, text):
202203
if not text:
203204
return ''
205+
if self.options['escape_misc']:
206+
text = re.sub(r'([\\&<`[>~#=+|-])', r'\\\1', text)
207+
text = re.sub(r'([0-9])([.)])', r'\1\\\2', text)
204208
if self.options['escape_asterisks']:
205209
text = text.replace('*', r'\*')
206210
if self.options['escape_underscores']:

tests/test_escaping.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def test_underscore():
1212

1313

1414
def test_xml_entities():
15-
assert md('&amp;') == '&'
15+
assert md('&amp;') == r'\&'
1616

1717

1818
def test_named_entities():
@@ -25,4 +25,23 @@ def test_hexadecimal_entities():
2525

2626

2727
def test_single_escaping_entities():
28-
assert md('&amp;amp;') == '&amp;'
28+
assert md('&amp;amp;') == r'\&amp;'
29+
30+
31+
def text_misc():
32+
assert md('\\*') == r'\\\*'
33+
assert md('<foo>') == r'\<foo\>'
34+
assert md('# foo') == r'\# foo'
35+
assert md('> foo') == r'\> foo'
36+
assert md('~~foo~~') == r'\~\~foo\~\~'
37+
assert md('foo\n===\n') == 'foo\n\\=\\=\\=\n'
38+
assert md('---\n') == '\\-\\-\\-\n'
39+
assert md('+ x\n+ y\n') == '\\+ x\n\\+ y\n'
40+
assert md('`x`') == r'\`x\`'
41+
assert md('[text](link)') == r'\[text](link)'
42+
assert md('1. x') == r'1\. x'
43+
assert md('not a number. x') == r'not a number. x'
44+
assert md('1) x') == r'1\) x'
45+
assert md('not a number) x') == r'not a number) x'
46+
assert md('|not table|') == r'\|not table\|'
47+
assert md(r'\ <foo> &amp;amp; | ` `', escape_misc=False) == r'\ <foo> &amp; | ` `'

0 commit comments

Comments
 (0)