-
-
Notifications
You must be signed in to change notification settings - Fork 2.9k
/
Copy pathgen_blog_post_html.py
171 lines (136 loc) · 4.68 KB
/
gen_blog_post_html.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
"""Converter from CHANGELOG.md (Markdown) to HTML suitable for a mypy blog post.
How to use:
1. Write release notes in CHANGELOG.md.
2. Make sure the heading for the next release is of form `## Mypy X.Y`.
2. Run `misc/gen_blog_post_html.py X.Y > target.html`.
4. Manually inspect and tweak the result.
Notes:
* There are some fragile assumptions. Double check the output.
"""
import argparse
import html
import os
import re
import sys
def format_lists(h: str) -> str:
a = h.splitlines()
r = []
i = 0
bullets = ("- ", "* ", " * ")
while i < len(a):
if a[i].startswith(bullets):
r.append("<p><ul>")
while i < len(a) and a[i].startswith(bullets):
r.append("<li>%s" % a[i][2:].lstrip())
i += 1
r.append("</ul>")
else:
r.append(a[i])
i += 1
return "\n".join(r)
def format_code(h: str) -> str:
a = h.splitlines()
r = []
i = 0
while i < len(a):
if a[i].startswith(" ") or a[i].startswith("```"):
indent = a[i].startswith(" ")
if not indent:
i += 1
r.append("<pre>")
while i < len(a) and (
(indent and a[i].startswith(" ")) or (not indent and not a[i].startswith("```"))
):
# Undo > and <
line = a[i].replace(">", ">").replace("<", "<")
if not indent:
line = " " + line
r.append(html.escape(line))
i += 1
r.append("</pre>")
if not indent and a[i].startswith("```"):
i += 1
else:
r.append(a[i])
i += 1
return "\n".join(r)
def convert(src: str) -> str:
h = src
# Replace < and >.
h = re.sub(r"<", "<", h)
h = re.sub(r">", ">", h)
# Title
h = re.sub(r"^## (Mypy [0-9.]+)", r"<h1>\1 Released</h1>", h, flags=re.MULTILINE)
# Subheadings
h = re.sub(r"\n#### ([A-Z`].*)\n", r"\n<h2>\1</h2>\n", h)
# Sub-subheadings
h = re.sub(r"\n\*\*([A-Z_`].*)\*\*\n", r"\n<h3>\1</h3>\n", h)
h = re.sub(r"\n`\*\*([A-Z_`].*)\*\*\n", r"\n<h3>`\1</h3>\n", h)
# Translate `**`
h = re.sub(r"`\*\*`", "<tt>**</tt>", h)
# Paragraphs
h = re.sub(r"\n([A-Z])", r"\n<p>\1", h)
# Bullet lists
h = format_lists(h)
# Code blocks
h = format_code(h)
# Code fragments
h = re.sub(r"`([^`]+)`", r"<tt>\1</tt>", h)
# Remove **** noise
h = re.sub(r"\*\*\*\*", "", h)
# Bold text
h = re.sub(r"\*\*([A-Za-z].*?)\*\*", r" <b>\1</b>", h)
# Emphasized text
h = re.sub(r" \*([A-Za-z].*?)\*", r" <i>\1</i>", h)
# Remove redundant PR links to avoid double links (they will be generated below)
h = re.sub(r"\[(#[0-9]+)\]\(https://github.com/python/mypy/pull/[0-9]+/?\)", r"\1", h)
# Issue and PR links
h = re.sub(r"\((#[0-9]+)\) +\(([^)]+)\)", r"(\2, \1)", h)
h = re.sub(
r"fixes #([0-9]+)",
r'fixes issue <a href="https://github.com/python/mypy/issues/\1">\1</a>',
h,
)
h = re.sub(r"#([0-9]+)", r'PR <a href="https://github.com/python/mypy/pull/\1">\1</a>', h)
h = re.sub(r"\) \(PR", ", PR", h)
# Markdown links
h = re.sub(r"\[([^]]*)\]\(([^)]*)\)", r'<a href="\2">\1</a>', h)
# Add random links in case they are missing
h = re.sub(
r"contributors to typeshed:",
'contributors to <a href="https://github.com/python/typeshed">typeshed</a>:',
h,
)
# Add missing top-level HTML tags
h = '<html>\n<meta charset="utf-8" />\n<body>\n' + h + "</body>\n</html>"
return h
def extract_version(src: str, version: str) -> str:
a = src.splitlines()
i = 0
heading = f"## Mypy {version}"
while i < len(a):
if a[i].strip() == heading:
break
i += 1
else:
raise RuntimeError(f"Can't find heading {heading!r}")
j = i + 1
while not a[j].startswith("## "):
j += 1
return "\n".join(a[i:j])
def main() -> None:
parser = argparse.ArgumentParser(
description="Generate HTML release blog post based on CHANGELOG.md and write to stdout."
)
parser.add_argument("version", help="mypy version, in form X.Y or X.Y.Z")
args = parser.parse_args()
version: str = args.version
if not re.match(r"[0-9]+(\.[0-9]+)+$", version):
sys.exit(f"error: Version must be of form X.Y or X.Y.Z, not {version!r}")
changelog_path = os.path.join(os.path.dirname(__file__), os.path.pardir, "CHANGELOG.md")
src = open(changelog_path).read()
src = extract_version(src, version)
dst = convert(src)
sys.stdout.write(dst)
if __name__ == "__main__":
main()