Skip to content

Commit f2f854a

Browse files
authored
✨ NEW: Add attrs_plugin (#50)
1 parent 6fbc43f commit f2f854a

File tree

6 files changed

+386
-0
lines changed

6 files changed

+386
-0
lines changed

docs/index.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ html_string = md.render("some *Markdown*")
8585
.. autofunction:: mdit_py_plugins.container.container_plugin
8686
```
8787

88+
## Inline Attributes
89+
90+
```{eval-rst}
91+
.. autofunction:: mdit_py_plugins.attrs.attrs_plugin
92+
```
93+
8894
## Math
8995

9096
```{eval-rst}

mdit_py_plugins/attrs/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .index import attrs_plugin # noqa: F401

mdit_py_plugins/attrs/index.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from markdown_it import MarkdownIt
2+
from markdown_it.rules_inline import StateInline
3+
4+
from .parse import ParseError, parse
5+
6+
7+
def attrs_plugin(md: MarkdownIt, *, after=("image", "code_inline")):
8+
"""Parse inline attributes that immediately follow certain inline elements::
9+
10+
![alt](https://image.com){#id .a b=c}
11+
12+
Inside the curly braces, the following syntax is possible:
13+
14+
- `.foo` specifies foo as a class.
15+
Multiple classes may be given in this way; they will be combined.
16+
- `#foo` specifies foo as an identifier.
17+
An element may have only one identifier;
18+
if multiple identifiers are given, the last one is used.
19+
- `key="value"` or `key=value` specifies a key-value attribute.
20+
Quotes are not needed when the value consists entirely of
21+
ASCII alphanumeric characters or `_` or `:` or `-`.
22+
Backslash escapes may be used inside quoted values.
23+
- `%` begins a comment, which ends with the next `%` or the end of the attribute (`}`).
24+
25+
**Note:** This plugin is currently limited to "self-closing" elements,
26+
such as images and code spans. It does not work with links or emphasis.
27+
28+
:param md: The MarkdownIt instance to modify.
29+
:param after: The names of inline elements after which attributes may be specified.
30+
"""
31+
32+
def attr_rule(state: StateInline, silent: bool):
33+
if state.pending or not state.tokens:
34+
return False
35+
token = state.tokens[-1]
36+
if token.type not in after:
37+
return False
38+
try:
39+
new_pos, attrs = parse(state.src[state.pos :])
40+
except ParseError:
41+
return False
42+
state.pos += new_pos + 1
43+
if not silent:
44+
if "class" in attrs and "class" in token.attrs:
45+
attrs["class"] = f"{token.attrs['class']} {attrs['class']}"
46+
token.attrs.update(attrs)
47+
48+
return True
49+
50+
md.inline.ruler.push("attr", attr_rule)

mdit_py_plugins/attrs/parse.py

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
"""Parser for attributes::
2+
3+
attributes { id = "foo", class = "bar baz",
4+
key1 = "val1", key2 = "val2" }
5+
6+
Adapted from:
7+
https://github.com/jgm/djot/blob/fae7364b86bfce69bc6d5b5eede1f5196d845fd6/djot/attributes.lua#L1
8+
9+
syntax:
10+
11+
attributes <- '{' whitespace* attribute (whitespace attribute)* whitespace* '}'
12+
attribute <- identifier | class | keyval
13+
identifier <- '#' name
14+
class <- '.' name
15+
name <- (nonspace, nonpunctuation other than ':', '_', '-')+
16+
keyval <- key '=' val
17+
key <- (ASCII_ALPHANUM | ':' | '_' | '-')+
18+
val <- bareval | quotedval
19+
bareval <- (ASCII_ALPHANUM | ':' | '_' | '-')+
20+
quotedval <- '"' ([^"] | '\"') '"'
21+
"""
22+
from __future__ import annotations
23+
24+
from enum import Enum
25+
import re
26+
from typing import Callable
27+
28+
29+
class State(Enum):
30+
START = 0
31+
SCANNING = 1
32+
SCANNING_ID = 2
33+
SCANNING_CLASS = 3
34+
SCANNING_KEY = 4
35+
SCANNING_VALUE = 5
36+
SCANNING_BARE_VALUE = 6
37+
SCANNING_QUOTED_VALUE = 7
38+
SCANNING_COMMENT = 8
39+
SCANNING_ESCAPED = 9
40+
DONE = 10
41+
42+
43+
REGEX_SPACE = re.compile(r"\s")
44+
REGEX_SPACE_PUNCTUATION = re.compile(r"[\s!\"#$%&'()*+,./;<=>?@[\]^`{|}~]")
45+
REGEX_KEY_CHARACTERS = re.compile(r"[a-zA-Z\d_:-]")
46+
47+
48+
class TokenState:
49+
def __init__(self):
50+
self._tokens = []
51+
self.start: int = 0
52+
53+
def set_start(self, start: int) -> None:
54+
self.start = start
55+
56+
def append(self, start: int, end: int, ttype: str):
57+
self._tokens.append((start, end, ttype))
58+
59+
def compile(self, string: str) -> dict[str, str]:
60+
"""compile the tokens into a dictionary"""
61+
attributes = {}
62+
classes = []
63+
idx = 0
64+
while idx < len(self._tokens):
65+
start, end, ttype = self._tokens[idx]
66+
if ttype == "id":
67+
attributes["id"] = string[start:end]
68+
elif ttype == "class":
69+
classes.append(string[start:end])
70+
elif ttype == "key":
71+
key = string[start:end]
72+
if idx + 1 < len(self._tokens):
73+
start, end, ttype = self._tokens[idx + 1]
74+
if ttype == "value":
75+
if key == "class":
76+
classes.append(string[start:end])
77+
else:
78+
attributes[key] = string[start:end]
79+
idx += 1
80+
idx += 1
81+
if classes:
82+
attributes["class"] = " ".join(classes)
83+
return attributes
84+
85+
def __str__(self) -> str:
86+
return str(self._tokens)
87+
88+
def __repr__(self) -> str:
89+
return repr(self._tokens)
90+
91+
92+
class ParseError(Exception):
93+
def __init__(self, msg: str, pos: int) -> None:
94+
self.pos = pos
95+
super().__init__(msg + f" at position {pos}")
96+
97+
98+
def parse(string: str) -> tuple[int, dict[str, str]]:
99+
"""Parse attributes from start of string.
100+
101+
:returns: (length of parsed string, dict of attributes)
102+
"""
103+
pos = 0
104+
state: State = State.START
105+
tokens = TokenState()
106+
while pos < len(string):
107+
state = HANDLERS[state](string[pos], pos, tokens)
108+
if state == State.DONE:
109+
return pos, tokens.compile(string)
110+
pos = pos + 1
111+
112+
return pos, tokens.compile(string)
113+
114+
115+
def handle_start(char: str, pos: int, tokens: TokenState) -> State:
116+
117+
if char == "{":
118+
return State.SCANNING
119+
raise ParseError("Attributes must start with '{'", pos)
120+
121+
122+
def handle_scanning(char: str, pos: int, tokens: TokenState) -> State:
123+
124+
if char == " " or char == "\t" or char == "\n" or char == "\r":
125+
return State.SCANNING
126+
if char == "}":
127+
return State.DONE
128+
if char == "#":
129+
tokens.set_start(pos)
130+
return State.SCANNING_ID
131+
if char == "%":
132+
tokens.set_start(pos)
133+
return State.SCANNING_COMMENT
134+
if char == ".":
135+
tokens.set_start(pos)
136+
return State.SCANNING_CLASS
137+
if REGEX_KEY_CHARACTERS.fullmatch(char):
138+
tokens.set_start(pos)
139+
return State.SCANNING_KEY
140+
141+
raise ParseError(f"Unexpected character whilst scanning: {char}", pos)
142+
143+
144+
def handle_scanning_comment(char: str, pos: int, tokens: TokenState) -> State:
145+
146+
if char == "%":
147+
return State.SCANNING
148+
149+
return State.SCANNING_COMMENT
150+
151+
152+
def handle_scanning_id(char: str, pos: int, tokens: TokenState) -> State:
153+
154+
if not REGEX_SPACE_PUNCTUATION.fullmatch(char):
155+
return State.SCANNING_ID
156+
157+
if char == "}":
158+
if (pos - 1) > tokens.start:
159+
tokens.append(tokens.start + 1, pos, "id")
160+
return State.DONE
161+
162+
if REGEX_SPACE.fullmatch(char):
163+
if (pos - 1) > tokens.start:
164+
tokens.append(tokens.start + 1, pos, "id")
165+
return State.SCANNING
166+
167+
raise ParseError(f"Unexpected character whilst scanning id: {char}", pos)
168+
169+
170+
def handle_scanning_class(char: str, pos: int, tokens: TokenState) -> State:
171+
172+
if not REGEX_SPACE_PUNCTUATION.fullmatch(char):
173+
return State.SCANNING_CLASS
174+
175+
if char == "}":
176+
if (pos - 1) > tokens.start:
177+
tokens.append(tokens.start + 1, pos, "class")
178+
return State.DONE
179+
180+
if REGEX_SPACE.fullmatch(char):
181+
if (pos - 1) > tokens.start:
182+
tokens.append(tokens.start + 1, pos, "class")
183+
return State.SCANNING
184+
185+
raise ParseError(f"Unexpected character whilst scanning class: {char}", pos)
186+
187+
188+
def handle_scanning_key(char: str, pos: int, tokens: TokenState) -> State:
189+
190+
if char == "=":
191+
tokens.append(tokens.start, pos, "key")
192+
return State.SCANNING_VALUE
193+
194+
if REGEX_KEY_CHARACTERS.fullmatch(char):
195+
return State.SCANNING_KEY
196+
197+
raise ParseError(f"Unexpected character whilst scanning key: {char}", pos)
198+
199+
200+
def handle_scanning_value(char: str, pos: int, tokens: TokenState) -> State:
201+
202+
if char == '"':
203+
tokens.set_start(pos)
204+
return State.SCANNING_QUOTED_VALUE
205+
206+
if REGEX_KEY_CHARACTERS.fullmatch(char):
207+
tokens.set_start(pos)
208+
return State.SCANNING_BARE_VALUE
209+
210+
raise ParseError(f"Unexpected character whilst scanning value: {char}", pos)
211+
212+
213+
def handle_scanning_bare_value(char: str, pos: int, tokens: TokenState) -> State:
214+
215+
if REGEX_KEY_CHARACTERS.fullmatch(char):
216+
return State.SCANNING_BARE_VALUE
217+
218+
if char == "}":
219+
tokens.append(tokens.start, pos, "value")
220+
return State.DONE
221+
222+
if REGEX_SPACE.fullmatch(char):
223+
tokens.append(tokens.start, pos, "value")
224+
return State.SCANNING
225+
226+
raise ParseError(f"Unexpected character whilst scanning bare value: {char}", pos)
227+
228+
229+
def handle_scanning_escaped(char: str, pos: int, tokens: TokenState) -> State:
230+
return State.SCANNING_QUOTED_VALUE
231+
232+
233+
def handle_scanning_quoted_value(char: str, pos: int, tokens: TokenState) -> State:
234+
235+
if char == '"':
236+
tokens.append(tokens.start + 1, pos, "value")
237+
return State.SCANNING
238+
239+
if char == "\\":
240+
return State.SCANNING_ESCAPED
241+
242+
if char == "{" or char == "}":
243+
raise ParseError(
244+
f"Unexpected character whilst scanning quoted value: {char}", pos
245+
)
246+
247+
if char == "\n":
248+
tokens.append(tokens.start + 1, pos, "value")
249+
return State.SCANNING_QUOTED_VALUE
250+
251+
return State.SCANNING_QUOTED_VALUE
252+
253+
254+
HANDLERS: dict[State, Callable[[str, int, TokenState], State]] = {
255+
State.START: handle_start,
256+
State.SCANNING: handle_scanning,
257+
State.SCANNING_COMMENT: handle_scanning_comment,
258+
State.SCANNING_ID: handle_scanning_id,
259+
State.SCANNING_CLASS: handle_scanning_class,
260+
State.SCANNING_KEY: handle_scanning_key,
261+
State.SCANNING_VALUE: handle_scanning_value,
262+
State.SCANNING_BARE_VALUE: handle_scanning_bare_value,
263+
State.SCANNING_QUOTED_VALUE: handle_scanning_quoted_value,
264+
State.SCANNING_ESCAPED: handle_scanning_escaped,
265+
}

tests/fixtures/attrs.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
simple image
2+
.
3+
![a](b){#id .a b=c}
4+
.
5+
<p><img src="b" alt="a" id="id" b="c" class="a"></p>
6+
.
7+
8+
simple inline code
9+
.
10+
`a`{#id .a b=c}
11+
.
12+
<p><code id="id" b="c" class="a">a</code></p>
13+
.
14+
15+
ignore if space
16+
.
17+
![a](b) {#id key="*"}
18+
.
19+
<p><img src="b" alt="a"> {#id key=&quot;*&quot;}</p>
20+
.
21+
22+
ignore if text
23+
.
24+
![a](b)b{#id key="*"}
25+
.
26+
<p><img src="b" alt="a">b{#id key=&quot;*&quot;}</p>
27+
.
28+
29+
multi-line
30+
.
31+
![a](b){
32+
#id .a
33+
b=c
34+
}
35+
more
36+
.
37+
<p><img src="b" alt="a" id="id" b="c" class="a">
38+
more</p>
39+
.
40+
41+
combined
42+
.
43+
![a](b){#a .a}{.b class=x other=h}{#x class="x g" other=a}
44+
.
45+
<p><img src="b" alt="a" id="x" class="a b x x g" other="a"></p>
46+
.

tests/test_attrs.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from pathlib import Path
2+
3+
from markdown_it import MarkdownIt
4+
from markdown_it.utils import read_fixture_file
5+
import pytest
6+
7+
from mdit_py_plugins.attrs import attrs_plugin
8+
9+
FIXTURE_PATH = Path(__file__).parent.joinpath("fixtures", "attrs.md")
10+
11+
12+
@pytest.mark.parametrize("line,title,input,expected", read_fixture_file(FIXTURE_PATH))
13+
def test_fixture(line, title, input, expected):
14+
md = MarkdownIt("commonmark").use(attrs_plugin)
15+
md.options["xhtmlOut"] = False
16+
text = md.render(input)
17+
print(text)
18+
assert text.rstrip() == expected.rstrip()

0 commit comments

Comments
 (0)