Skip to content

Commit 1709be3

Browse files
DOC-5093 add shortcode iterator class
1 parent a411092 commit 1709be3

File tree

1 file changed

+138
-0
lines changed

1 file changed

+138
-0
lines changed

build/image_report.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
"""Image report
2+
"""
3+
4+
from enum import Enum
5+
from typing import Iterator, Match
6+
7+
import re
8+
import argparse
9+
10+
shortcode_re_pattern_start = r"(\n)|\{\{[<%]\s*"
11+
shortcode_re_body = "(/)?([\w\-]+)\s*(.+?)?"
12+
shortcode_re_pattern_end = r"\s*[>%]\}\}"
13+
14+
shortcode_re_pattern = (
15+
shortcode_re_pattern_start +
16+
shortcode_re_body +
17+
shortcode_re_pattern_end
18+
)
19+
20+
21+
class ShortcodeTagType(Enum):
22+
"""Specifies open or close shortcode tag."""
23+
OPEN = 1
24+
CLOSE = 2
25+
26+
27+
class ShortcodeInfo:
28+
"""Represents the information in a shortcode.
29+
"""
30+
tag_type: ShortcodeTagType
31+
tag: str
32+
param_text: str
33+
pos_params: list[str]
34+
named_params: dict[str, str]
35+
36+
def parse_params(self, param_str: str):
37+
param_re = r'"(([^"]|(?<=\\)")*)"|((\w+)=([^"\s]+))|((\w+)="(([^"]|(?<=\\)")*)")|([^"=\s]+)'
38+
39+
for match in re.finditer(param_re, param_str):
40+
if match is None:
41+
self.pos_params = []
42+
self.named_params = {}
43+
return
44+
elif match[1]:
45+
self.pos_params.append(match[1])
46+
elif match[3]:
47+
self.named_params[match[4]] = match[5]
48+
elif match[6]:
49+
self.named_params[match[7]] = match[8]
50+
elif match[10]:
51+
self.pos_params.append(match[10])
52+
53+
def __init__(
54+
self, tag: str,
55+
tag_type: ShortcodeTagType,
56+
param_text: str = ""
57+
):
58+
self.tag = tag
59+
self.tag_type = tag_type
60+
# self.param_text = param_text
61+
self.pos_params = []
62+
self.named_params = {}
63+
self.parse_params(param_text or "")
64+
65+
def __str__(self) -> str:
66+
type_text: str
67+
68+
if self.tag_type == ShortcodeTagType.OPEN:
69+
type_text = "OPEN"
70+
else:
71+
type_text = "CLOSE"
72+
73+
result = f"{type_text} {self.tag}"
74+
75+
if self.pos_params or self.named_params:
76+
result += ":"
77+
78+
for pos_param in self.pos_params:
79+
result += f"\n '{pos_param}'"
80+
81+
for named_param, named_value in self.named_params.items():
82+
result += f"\n {named_param} = {named_value}"
83+
84+
return result
85+
86+
87+
class ShortcodeIterator:
88+
"""Iterates through all shortcodes in a string.
89+
"""
90+
re_iterator: Iterator[Match[str]]
91+
linenum: int
92+
93+
def __init__(self, text: str):
94+
self.re_iterator = re.finditer(shortcode_re_pattern, text)
95+
self.linenum = 1
96+
97+
def __iter__(self):
98+
return self
99+
100+
def __next__(self) -> tuple[ShortcodeInfo, int]:
101+
next_match = self.re_iterator.__next__()
102+
103+
while True:
104+
if next_match[1]:
105+
self.linenum += 1
106+
next_match = self.re_iterator.__next__()
107+
elif next_match[2]:
108+
result = ShortcodeInfo(
109+
next_match[3], ShortcodeTagType.CLOSE
110+
)
111+
112+
return (result, self.linenum)
113+
else:
114+
result = ShortcodeInfo(
115+
next_match[3],
116+
ShortcodeTagType.OPEN,
117+
next_match[4]
118+
)
119+
120+
return (result, self.linenum)
121+
122+
123+
parser = argparse.ArgumentParser(
124+
"Image report",
125+
"Scans a folder and report all Hugo image shortcodes found"
126+
)
127+
128+
parser.add_argument("pathname", help="Path of the folder to scan")
129+
130+
args = parser.parse_args()
131+
132+
print(f"Scanning '{args.pathname}'")
133+
134+
with open(args.pathname, encoding="utf_8") as md_file:
135+
filetext = md_file.read()
136+
137+
for shortcode, line in ShortcodeIterator(filetext):
138+
print(f"Line: {line}: {shortcode}")

0 commit comments

Comments
 (0)