Skip to content

Commit f5722f5

Browse files
committed
Move regular expressions to be global statics.
This just makes it a little easier to refer to them.
1 parent d76b0bc commit f5722f5

File tree

3 files changed

+44
-36
lines changed

3 files changed

+44
-36
lines changed

mdbook-spec/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

mdbook-spec/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ license = "MIT OR Apache-2.0"
88
[dependencies]
99
anyhow = "1.0.79"
1010
mdbook = { version = "0.4.36", default-features = false }
11+
once_cell = "1.19.0"
1112
pathdiff = "0.2.1"
1213
regex = "1.10.3"
1314
semver = "1.0.21"

mdbook-spec/src/main.rs

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use mdbook::book::{Book, Chapter};
22
use mdbook::errors::Error;
33
use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext};
44
use mdbook::BookItem;
5+
use once_cell::sync::Lazy;
56
use regex::{Captures, Regex};
67
use semver::{Version, VersionReq};
78
use std::collections::BTreeMap;
@@ -11,6 +12,40 @@ use std::io::{self, Write as _};
1112
use std::path::PathBuf;
1213
use std::process::{self, Command};
1314

15+
/// The Regex for rules like `r[foo]`.
16+
static RULE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?m)^r\[([^]]+)]$").unwrap());
17+
18+
/// The Regex for the syntax for blockquotes that have a specific CSS class,
19+
/// like `> [!WARNING]`.
20+
static ADMONITION_RE: Lazy<Regex> = Lazy::new(|| {
21+
Regex::new(r"(?m)^ *> \[!(?<admon>[^]]+)\]\n(?<blockquote>(?: *> .*\n)+)").unwrap()
22+
});
23+
24+
/// A markdown link (without the brackets) that might possibly be a link to
25+
/// the standard library using rustdoc's intra-doc notation.
26+
const STD_LINK: &str = r"(?: [a-z]+@ )?
27+
(?: std|core|alloc|proc_macro|test )
28+
(?: ::[A-Za-z_!:<>{}()\[\]]+ )?";
29+
30+
/// The Regex for a markdown link that might be a link to the standard library.
31+
static STD_LINK_RE: Lazy<Regex> = Lazy::new(|| {
32+
Regex::new(&format!(
33+
r"(?x)
34+
(?:
35+
( \[`[^`]+`\] ) \( ({STD_LINK}) \)
36+
)
37+
| (?:
38+
( \[`{STD_LINK}`\] )
39+
)
40+
"
41+
))
42+
.unwrap()
43+
});
44+
45+
/// The Regex used to extract the std links from the HTML generated by rustdoc.
46+
static STD_LINK_EXTRACT_RE: Lazy<Regex> =
47+
Lazy::new(|| Regex::new(r#"<li><a [^>]*href="(https://doc.rust-lang.org/[^"]+)""#).unwrap());
48+
1449
fn main() {
1550
let mut args = std::env::args().skip(1);
1651
match args.next().as_deref() {
@@ -56,41 +91,15 @@ fn handle_preprocessing(pre: &dyn Preprocessor) -> Result<(), Error> {
5691
}
5792

5893
struct Spec {
94+
/// Whether or not warnings should be errors (set by SPEC_DENY_WARNINGS
95+
/// environment variable).
5996
deny_warnings: bool,
60-
rule_re: Regex,
61-
admonition_re: Regex,
62-
std_link_re: Regex,
63-
std_link_extract_re: Regex,
6497
}
6598

6699
impl Spec {
67100
pub fn new() -> Spec {
68-
// This is roughly a rustdoc intra-doc link definition.
69-
let std_link = r"(?: [a-z]+@ )?
70-
(?: std|core|alloc|proc_macro|test )
71-
(?: ::[A-Za-z_!:<>{}()\[\]]+ )?";
72101
Spec {
73102
deny_warnings: std::env::var("SPEC_DENY_WARNINGS").as_deref() == Ok("1"),
74-
rule_re: Regex::new(r"(?m)^r\[([^]]+)]$").unwrap(),
75-
admonition_re: Regex::new(
76-
r"(?m)^ *> \[!(?<admon>[^]]+)\]\n(?<blockquote>(?: *> .*\n)+)",
77-
)
78-
.unwrap(),
79-
std_link_re: Regex::new(&format!(
80-
r"(?x)
81-
(?:
82-
( \[`[^`]+`\] ) \( ({std_link}) \)
83-
)
84-
| (?:
85-
( \[`{std_link}`\] )
86-
)
87-
"
88-
))
89-
.unwrap(),
90-
std_link_extract_re: Regex::new(
91-
r#"<li><a [^>]*href="(https://doc.rust-lang.org/[^"]+)""#,
92-
)
93-
.unwrap(),
94103
}
95104
}
96105

@@ -103,7 +112,7 @@ impl Spec {
103112
) -> String {
104113
let source_path = chapter.source_path.clone().unwrap_or_default();
105114
let path = chapter.path.clone().unwrap_or_default();
106-
self.rule_re
115+
RULE_RE
107116
.replace_all(&chapter.content, |caps: &Captures| {
108117
let rule_id = &caps[1];
109118
if let Some((old, _)) =
@@ -165,7 +174,7 @@ impl Spec {
165174
/// be a CSS class is valid. The actual styling needs to be added in a CSS
166175
/// file.
167176
fn admonitions(&self, chapter: &Chapter) -> String {
168-
self.admonition_re
177+
ADMONITION_RE
169178
.replace_all(&chapter.content, |caps: &Captures| {
170179
let lower = caps["admon"].to_lowercase();
171180
format!(
@@ -185,8 +194,7 @@ impl Spec {
185194
//
186195
// links are tuples of ("[`std::foo`]", None) for links without dest,
187196
// or ("[`foo`]", "std::foo") with a dest.
188-
let mut links: Vec<_> = self
189-
.std_link_re
197+
let mut links: Vec<_> = STD_LINK_RE
190198
.captures_iter(&chapter.content)
191199
.map(|cap| {
192200
if let Some(no_dest) = cap.get(3) {
@@ -250,8 +258,7 @@ impl Spec {
250258
// Extract the links from the generated html.
251259
let generated =
252260
fs::read_to_string(tmp.path().join("doc/a/index.html")).expect("index.html generated");
253-
let urls: Vec<_> = self
254-
.std_link_extract_re
261+
let urls: Vec<_> = STD_LINK_EXTRACT_RE
255262
.captures_iter(&generated)
256263
.map(|cap| cap.get(1).unwrap().as_str())
257264
.collect();
@@ -267,8 +274,7 @@ impl Spec {
267274
}
268275

269276
// Replace any disambiguated links with just the disambiguation.
270-
let mut output = self
271-
.std_link_re
277+
let mut output = STD_LINK_RE
272278
.replace_all(&chapter.content, |caps: &Captures| {
273279
if let Some(dest) = caps.get(2) {
274280
// Replace destination parenthesis with a link definition (square brackets).

0 commit comments

Comments
 (0)