Skip to content

Commit a2a3c51

Browse files
committed
RustDoc links: store and reuse compiled regexes
1 parent 911f9cb commit a2a3c51

File tree

1 file changed

+72
-54
lines changed

1 file changed

+72
-54
lines changed

bindings_generator/src/class_docs.rs

Lines changed: 72 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ use std::{collections::HashMap, fs};
33
use regex::{Captures, Regex};
44
use roxmltree::Node;
55

6-
#[derive(Debug)]
76
pub struct GodotXmlDocs {
87
class_fn_desc: HashMap<(String, String), String>,
8+
regexes: Regexes,
99
}
1010

1111
impl GodotXmlDocs {
@@ -17,6 +17,7 @@ impl GodotXmlDocs {
1717

1818
let mut docs = GodotXmlDocs {
1919
class_fn_desc: HashMap::default(),
20+
regexes: Regexes::new(),
2021
};
2122

2223
for entry in entries {
@@ -127,10 +128,11 @@ impl GodotXmlDocs {
127128

128129
self.class_fn_desc.insert(
129130
(class.into(), method.into()),
130-
Self::reformat_as_rustdoc(doc),
131+
Self::reformat_as_rustdoc(&self.regexes, doc),
131132
);
132133
}
133134

135+
// For types that godot-rust names differently than Godot
134136
fn translate_type(godot_type: &str) -> &str {
135137
// Note: there is some code duplication with Ty::from_src() in api.rs
136138
match godot_type {
@@ -153,47 +155,23 @@ impl GodotXmlDocs {
153155
}
154156

155157
/// Takes the Godot documentation markup and transforms it to Rustdoc.
156-
/// Very basic approach with limitations, but already helps readability quite a bit.
157-
fn reformat_as_rustdoc(godot_doc: String) -> String {
158-
let gdscript_note = if godot_doc.contains("[codeblock]") {
159-
"_Sample code is GDScript unless otherwise noted._\n\n"
160-
} else {
161-
""
162-
};
163-
164-
// TODO reuse regex across classes
165-
// Note: there are still a few special cases, such as:
158+
/// Replaces BBCode syntax with Rustdoc/Markdown equivalents and implements working intra-doc links.
159+
fn reformat_as_rustdoc(re: &Regexes, godot_doc: String) -> String {
160+
// Note: there are still a few unsupported cases, such as:
166161
// * OK and ERR_CANT_CREATE (corresponding Result.Ok() and GodotError.ERR_CANT_CREATE)
162+
// * "indexed properties" which are not also exposed as getters, e.g. `gravity_point` in
163+
// https://docs.godotengine.org/en/stable/classes/class_area2d.html#properties.
164+
// This needs to be implemented first: https://github.com/godot-rust/godot-rust/issues/689
167165

168-
// Covers:
169-
// * [url=U]text[/url]
170-
// * [url=U][/url]
171-
let url_regex = Regex::new("\\[url=(.+?)](.*?)\\[/url]").unwrap();
172-
173-
// Covers:
174-
// * [code]C[/code]
175-
// * [signal C]
176-
// Must run before others, as [code] will itself match the link syntax
177-
let no_link_regex = Regex::new("\\[signal ([A-Za-z0-9_]+?)]").unwrap();
178-
179-
// Covers:
180-
// * [C]
181-
// * [enum C]
182-
let type_regex = Regex::new("\\[(enum )?([A-Za-z0-9_]+?)]").unwrap();
183-
184-
// Covers:
185-
// * [member M]
186-
// * [method M]
187-
// * [constant M]
188-
let self_member_regex =
189-
Regex::new("\\[(member|method|constant) ([A-Za-z0-9_]+?)]").unwrap();
190-
191-
// Covers:
192-
// * [member C.M]
193-
// * [method C.M]
194-
// * [constant C.M]
195-
let class_member_regex =
196-
Regex::new("\\[(member|method|constant) ([A-Za-z0-9_]+?)\\.([A-Za-z0-9_]+?)]").unwrap();
166+
// Info for GDScript blocks
167+
let godot_doc = if godot_doc.contains("[codeblock]") {
168+
format!(
169+
"_Sample code is GDScript unless otherwise noted._\n\n{}",
170+
godot_doc
171+
)
172+
} else {
173+
godot_doc
174+
};
197175

198176
// Before any regex replacement, do verbatim replacements
199177
// Note: maybe some can be expressed as regex, but if text-replace does the job reliably enough, it's even faster
@@ -207,15 +185,8 @@ impl GodotXmlDocs {
207185
.replace("[i]", "_")
208186
.replace("[/i]", "_");
209187

210-
// Note: we currently don't use c[1], which would be the "kind" (method/member/constant/...)
211-
// This one could be used to disambiguate the doc-link, e.g. [`{method}`][fn@Self::{method}]
212-
213-
// What currently doesn't work are "indexed properties" which are not also exposed as getters, e.g.
214-
// https://docs.godotengine.org/en/stable/classes/class_area2d.html#properties 'gravity_point'
215-
// This needs to be implemented first: https://github.com/godot-rust/godot-rust/issues/689
216-
217188
// URLs
218-
let godot_doc = url_regex.replace_all(&godot_doc, |c: &Captures| {
189+
let godot_doc = re.url.replace_all(&godot_doc, |c: &Captures| {
219190
let url = &c[1];
220191
let text = &c[2];
221192

@@ -227,7 +198,7 @@ impl GodotXmlDocs {
227198
});
228199

229200
// [Type::member] style
230-
let godot_doc = class_member_regex.replace_all(&godot_doc, |c: &Captures| {
201+
let godot_doc = re.class_member.replace_all(&godot_doc, |c: &Captures| {
231202
let godot_ty = &c[2];
232203
let rust_ty = Self::translate_type(godot_ty);
233204

@@ -240,17 +211,17 @@ impl GodotXmlDocs {
240211
});
241212

242213
// [member] style
243-
let godot_doc = self_member_regex.replace_all(&godot_doc, |c: &Captures| {
214+
let godot_doc = re.self_member.replace_all(&godot_doc, |c: &Captures| {
244215
format!("[`{member}`][Self::{member}]", member = &c[2])
245216
});
246217

247218
// `member` style (no link)
248-
let godot_doc = no_link_regex.replace_all(&godot_doc, |c: &Captures| {
219+
let godot_doc = re.no_link.replace_all(&godot_doc, |c: &Captures| {
249220
format!("`{member}`", member = &c[1])
250221
});
251222

252223
// [Type] style
253-
let godot_doc = type_regex.replace_all(&godot_doc, |c: &Captures| {
224+
let godot_doc = re.class.replace_all(&godot_doc, |c: &Captures| {
254225
let godot_ty = &c[2];
255226
let rust_ty = Self::translate_type(godot_ty);
256227

@@ -261,6 +232,53 @@ impl GodotXmlDocs {
261232
)
262233
});
263234

264-
format!("{}{}", gdscript_note, godot_doc)
235+
godot_doc.to_string()
236+
}
237+
}
238+
239+
// Holds several compiled regexes to reuse across classes
240+
// could also use 'lazy_regex' crate, but standard 'regex' has better IDE support and works well enough
241+
struct Regexes {
242+
url: Regex,
243+
no_link: Regex,
244+
class: Regex,
245+
self_member: Regex,
246+
class_member: Regex,
247+
}
248+
249+
impl Regexes {
250+
fn new() -> Self {
251+
Self {
252+
// Covers:
253+
// * [url=U]text[/url]
254+
// * [url=U][/url]
255+
url: Regex::new("\\[url=(.+?)](.*?)\\[/url]").unwrap(),
256+
257+
// Covers:
258+
// * [code]C[/code]
259+
// * [signal C]
260+
// Must run before others, as [code] will itself match the link syntax
261+
no_link: Regex::new("\\[signal ([A-Za-z0-9_]+?)]").unwrap(),
262+
263+
// Covers:
264+
// * [C]
265+
// * [enum C]
266+
class: Regex::new("\\[(enum )?([A-Za-z0-9_]+?)]").unwrap(),
267+
268+
// Covers:
269+
// * [member M]
270+
// * [method M]
271+
// * [constant M]
272+
self_member: Regex::new("\\[(member|method|constant) ([A-Za-z0-9_]+?)]").unwrap(),
273+
274+
// Covers:
275+
// * [member C.M]
276+
// * [method C.M]
277+
// * [constant C.M]
278+
class_member: Regex::new(
279+
"\\[(member|method|constant) ([A-Za-z0-9_]+?)\\.([A-Za-z0-9_]+?)]",
280+
)
281+
.unwrap(),
282+
}
265283
}
266284
}

0 commit comments

Comments
 (0)