Skip to content

Commit 85f405a

Browse files
authored
Script to generate release blog post from changelog (#16447)
This is an adaptation of an older script that we used to convert from a Paper doc to HTML. This includes changes by @gvanrossum and @svalentin.
1 parent ec90046 commit 85f405a

File tree

1 file changed

+171
-0
lines changed

1 file changed

+171
-0
lines changed

misc/gen_blog_post_html.py

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
"""Converter from CHANGELOG.md (Markdown) to HTML suitable for a mypy blog post.
2+
3+
How to use:
4+
5+
1. Write release notes in CHANGELOG.md.
6+
2. Make sure the heading for the next release is of form `## Mypy X.Y`.
7+
2. Run `misc/gen_blog_post_html.py X.Y > target.html`.
8+
4. Manually inspect and tweak the result.
9+
10+
Notes:
11+
12+
* There are some fragile assumptions. Double check the output.
13+
"""
14+
15+
import argparse
16+
import html
17+
import os
18+
import re
19+
import sys
20+
21+
22+
def format_lists(h: str) -> str:
23+
a = h.splitlines()
24+
r = []
25+
i = 0
26+
bullets = ("- ", "* ", " * ")
27+
while i < len(a):
28+
if a[i].startswith(bullets):
29+
r.append("<p><ul>")
30+
while i < len(a) and a[i].startswith(bullets):
31+
r.append("<li>%s" % a[i][2:].lstrip())
32+
i += 1
33+
r.append("</ul>")
34+
else:
35+
r.append(a[i])
36+
i += 1
37+
return "\n".join(r)
38+
39+
40+
def format_code(h: str) -> str:
41+
a = h.splitlines()
42+
r = []
43+
i = 0
44+
while i < len(a):
45+
if a[i].startswith(" ") or a[i].startswith("```"):
46+
indent = a[i].startswith(" ")
47+
if not indent:
48+
i += 1
49+
r.append("<pre>")
50+
while i < len(a) and (
51+
(indent and a[i].startswith(" ")) or (not indent and not a[i].startswith("```"))
52+
):
53+
# Undo &gt; and &lt;
54+
line = a[i].replace("&gt;", ">").replace("&lt;", "<")
55+
if not indent:
56+
line = " " + line
57+
r.append(html.escape(line))
58+
i += 1
59+
r.append("</pre>")
60+
if not indent and a[i].startswith("```"):
61+
i += 1
62+
else:
63+
r.append(a[i])
64+
i += 1
65+
return "\n".join(r)
66+
67+
68+
def convert(src: str) -> str:
69+
h = src
70+
71+
# Replace < and >.
72+
h = re.sub(r"<", "&lt;", h)
73+
h = re.sub(r">", "&gt;", h)
74+
75+
# Title
76+
h = re.sub(r"^## (Mypy [0-9.]+)", r"<h1>\1 Released</h1>", h, flags=re.MULTILINE)
77+
78+
# Subheadings
79+
h = re.sub(r"\n#### ([A-Z`].*)\n", r"\n<h2>\1</h2>\n", h)
80+
81+
# Sub-subheadings
82+
h = re.sub(r"\n\*\*([A-Z_`].*)\*\*\n", r"\n<h3>\1</h3>\n", h)
83+
h = re.sub(r"\n`\*\*([A-Z_`].*)\*\*\n", r"\n<h3>`\1</h3>\n", h)
84+
85+
# Translate `**`
86+
h = re.sub(r"`\*\*`", "<tt>**</tt>", h)
87+
88+
# Paragraphs
89+
h = re.sub(r"\n([A-Z])", r"\n<p>\1", h)
90+
91+
# Bullet lists
92+
h = format_lists(h)
93+
94+
# Code blocks
95+
h = format_code(h)
96+
97+
# Code fragments
98+
h = re.sub(r"`([^`]+)`", r"<tt>\1</tt>", h)
99+
100+
# Remove **** noise
101+
h = re.sub(r"\*\*\*\*", "", h)
102+
103+
# Bold text
104+
h = re.sub(r"\*\*([A-Za-z].*?)\*\*", r" <b>\1</b>", h)
105+
106+
# Emphasized text
107+
h = re.sub(r" \*([A-Za-z].*?)\*", r" <i>\1</i>", h)
108+
109+
# Remove redundant PR links to avoid double links (they will be generated below)
110+
h = re.sub(r"\[(#[0-9]+)\]\(https://github.com/python/mypy/pull/[0-9]+/?\)", r"\1", h)
111+
112+
# Issue and PR links
113+
h = re.sub(r"\((#[0-9]+)\) +\(([^)]+)\)", r"(\2, \1)", h)
114+
h = re.sub(
115+
r"fixes #([0-9]+)",
116+
r'fixes issue <a href="https://github.com/python/mypy/issues/\1">\1</a>',
117+
h,
118+
)
119+
h = re.sub(r"#([0-9]+)", r'PR <a href="https://github.com/python/mypy/pull/\1">\1</a>', h)
120+
h = re.sub(r"\) \(PR", ", PR", h)
121+
122+
# Markdown links
123+
h = re.sub(r"\[([^]]*)\]\(([^)]*)\)", r'<a href="\2">\1</a>', h)
124+
125+
# Add random links in case they are missing
126+
h = re.sub(
127+
r"contributors to typeshed:",
128+
'contributors to <a href="https://github.com/python/typeshed">typeshed</a>:',
129+
h,
130+
)
131+
132+
# Add missing top-level HTML tags
133+
h = '<html>\n<meta charset="utf-8" />\n<body>\n' + h + "</body>\n</html>"
134+
135+
return h
136+
137+
138+
def extract_version(src: str, version: str) -> str:
139+
a = src.splitlines()
140+
i = 0
141+
heading = f"## Mypy {version}"
142+
while i < len(a):
143+
if a[i].strip() == heading:
144+
break
145+
i += 1
146+
else:
147+
raise RuntimeError(f"Can't find heading {heading!r}")
148+
j = i + 1
149+
while not a[j].startswith("## "):
150+
j += 1
151+
return "\n".join(a[i:j])
152+
153+
154+
def main() -> None:
155+
parser = argparse.ArgumentParser(
156+
description="Generate HTML release blog post based on CHANGELOG.md and write to stdout."
157+
)
158+
parser.add_argument("version", help="mypy version, in form X.Y or X.Y.Z")
159+
args = parser.parse_args()
160+
version: str = args.version
161+
if not re.match(r"[0-9]+(\.[0-9]+)+$", version):
162+
sys.exit(f"error: Version must be of form X.Y or X.Y.Z, not {version!r}")
163+
changelog_path = os.path.join(os.path.dirname(__file__), os.path.pardir, "CHANGELOG.md")
164+
src = open(changelog_path).read()
165+
src = extract_version(src, version)
166+
dst = convert(src)
167+
sys.stdout.write(dst)
168+
169+
170+
if __name__ == "__main__":
171+
main()

0 commit comments

Comments
 (0)