Skip to content

Commit 940a9c4

Browse files
committed
Shrink down lookup tables
1 parent f785988 commit 940a9c4

File tree

3 files changed

+1844
-1848
lines changed

3 files changed

+1844
-1848
lines changed

codegen/src/main.rs

Lines changed: 61 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -4,77 +4,85 @@ use regex::Regex;
44

55
use std::fs::File;
66
use std::io::prelude::*;
7-
use std::io::{BufRead, BufWriter};
7+
use std::io::BufWriter;
88

99
// Generate character mapping tables directly from the specification.
1010
fn main() {
11-
// Input from the RFC.
12-
let reader = include_bytes!("rfc3454.txt");
13-
14-
// Output to a Rust source file.
1511
let out_file = File::create("../src/rfc3454.rs").unwrap();
1612
let mut writer = BufWriter::new(out_file);
1713

18-
// Generate tables.
19-
include_table(&mut writer, &mut &reader[..], "A.1");
20-
include_table(&mut writer, &mut &reader[..], "B.2");
14+
write!(writer, "// AUTOGENERATED CODE - DO NOT EDIT\n\n").unwrap();
15+
16+
lookup_table("A.1", &mut writer);
17+
mapping_table("B.2", &mut writer);
2118
}
2219

23-
// Generate code for the named mapping table.
24-
fn include_table<R: BufRead, W: Write>(writer: &mut W, reader: &mut R, tablename: &str) {
25-
// Scan to start of table.
26-
loop {
27-
let mut line = String::new();
28-
reader.read_line(&mut line).unwrap();
29-
if line.contains("Start Table") && line.contains(tablename) {
30-
break;
20+
fn lookup_table<W: Write>(table: &str, writer: &mut W) {
21+
write!(
22+
writer,
23+
"pub const {}: &'static [(char, char)] = &[\n",
24+
table.replace(".", "_")
25+
).unwrap();
26+
27+
let regex = Regex::new("^([0-9A-F]+)(?:-([0-9A-F]+))?$").unwrap();
28+
table_lines(table, |line| {
29+
let captures = regex.captures(line).unwrap();
30+
let start = captures.get(1).unwrap().as_str();
31+
let end = captures.get(2).map_or(start, |c| c.as_str());
32+
write!(writer, " ('\\u{{{}}}', '\\u{{{}}}'),\n", start, end).unwrap();
33+
});
34+
35+
write!(writer, "];\n\n").unwrap();
36+
}
37+
38+
fn mapping_table<W: Write>(table: &str, writer: &mut W) {
39+
write!(
40+
writer,
41+
"pub const {}: &'static [(char, &'static str)] = &[\n",
42+
table.replace(".", "_"),
43+
).unwrap();
44+
45+
let regex = Regex::new(
46+
"^([0-9A-F]+); ([0-9A-F]+)(?: ([0-9A-F]+))?(?: ([0-9A-F]+))?(?: ([0-9A-F]+))?;",
47+
).unwrap();
48+
table_lines(table, |line| {
49+
let captures = regex.captures(line).unwrap();
50+
let mut it = captures.iter().filter_map(|i| i);
51+
it.next(); // skip whole match
52+
53+
let input = it.next().unwrap().as_str();
54+
write!(writer, " ('\\u{{{}}}', \"", input).unwrap();
55+
56+
for output in it {
57+
write!(writer, "\\u{{{}}}", output.as_str()).unwrap();
3158
}
32-
}
3359

34-
// Output table declaration.
35-
write!(writer, "pub const {}: &[(char, char, &str)] = &[\n", tablename.replace(".", "_")).unwrap();
60+
write!(writer, "\"),\n").unwrap();
61+
});
62+
63+
write!(writer, "];\n\n").unwrap();
64+
}
3665

37-
// For each line:
38-
let target_re = Regex::new(r"([0-9A-F]+)(-([0-9A-F]+))?(; ([0-9A-F]+)( ([0-9A-F]+))?( ([0-9A-F]+))?( ([0-9A-F]+))?;)?").unwrap();
39-
loop {
40-
let mut line = String::new();
41-
reader.read_line(&mut line).unwrap();
66+
fn table_lines<F>(table: &str, mut f: F)
67+
where
68+
F: FnMut(&str),
69+
{
70+
let mut lines = include_str!("rfc3454.txt").split('\n');
4271

43-
// Done when reach the end of the table.
72+
// fast forward to the start of the table
73+
lines.find(|line| line.contains("Start Table") && line.contains(table));
74+
75+
for line in lines {
76+
let line = line.trim();
4477
if line.contains("End Table") {
4578
break;
4679
}
4780

48-
// Skip RFC metadata.
49-
if line.contains("Hoffman & Blanchet") || line.contains("RFC 3454") {
81+
// Skip page headers/footers
82+
if line.is_empty() || line.contains("Hoffman & Blanchet") || line.contains("RFC 3454") {
5083
continue;
5184
}
5285

53-
// Generate an entry for each data line.
54-
if let Some(captures) = target_re.captures(&line) {
55-
// start char
56-
let start = captures.get(1).unwrap().as_str();
57-
58-
// end char (inclusive)
59-
let end = captures.get(3).map_or(start, |m| m.as_str());
60-
61-
// 0-4 character replacement string
62-
let mut replace = String::new();
63-
for &i in [5, 7, 9, 11].iter() {
64-
match captures.get(i) {
65-
None => break,
66-
Some(c) => {
67-
replace.push_str("\\u{");
68-
replace.push_str(c.as_str());
69-
replace.push_str("}");
70-
}
71-
}
72-
}
73-
74-
write!(writer, " ('\\u{{{}}}', '\\u{{{}}}', \"{}\"),\n", start, end, replace).unwrap()
75-
}
86+
f(line);
7687
}
77-
78-
// End table definition.
79-
write!(writer, "];\n\n").unwrap();
8088
}

0 commit comments

Comments
 (0)