@@ -4,77 +4,85 @@ use regex::Regex;
4
4
5
5
use std:: fs:: File ;
6
6
use std:: io:: prelude:: * ;
7
- use std:: io:: { BufRead , BufWriter } ;
7
+ use std:: io:: BufWriter ;
8
8
9
9
// Generate character mapping tables directly from the specification.
10
10
fn main ( ) {
11
- // Input from the RFC.
12
- let reader = include_bytes ! ( "rfc3454.txt" ) ;
13
-
14
- // Output to a Rust source file.
15
11
let out_file = File :: create ( "../src/rfc3454.rs" ) . unwrap ( ) ;
16
12
let mut writer = BufWriter :: new ( out_file) ;
17
13
18
- // Generate tables.
19
- include_table ( & mut writer, & mut & reader[ ..] , "A.1" ) ;
20
- include_table ( & mut writer, & mut & reader[ ..] , "B.2" ) ;
14
+ write ! ( writer, "// AUTOGENERATED CODE - DO NOT EDIT\n \n " ) . unwrap ( ) ;
15
+
16
+ lookup_table ( "A.1" , & mut writer) ;
17
+ mapping_table ( "B.2" , & mut writer) ;
21
18
}
22
19
23
- // Generate code for the named mapping table.
24
- fn include_table < R : BufRead , W : Write > ( writer : & mut W , reader : & mut R , tablename : & str ) {
25
- // Scan to start of table.
26
- loop {
27
- let mut line = String :: new ( ) ;
28
- reader. read_line ( & mut line) . unwrap ( ) ;
29
- if line. contains ( "Start Table" ) && line. contains ( tablename) {
30
- break ;
20
+ fn lookup_table < W : Write > ( table : & str , writer : & mut W ) {
21
+ write ! (
22
+ writer,
23
+ "pub const {}: &'static [(char, char)] = &[\n " ,
24
+ table. replace( "." , "_" )
25
+ ) . unwrap ( ) ;
26
+
27
+ let regex = Regex :: new ( "^([0-9A-F]+)(?:-([0-9A-F]+))?$" ) . unwrap ( ) ;
28
+ table_lines ( table, |line| {
29
+ let captures = regex. captures ( line) . unwrap ( ) ;
30
+ let start = captures. get ( 1 ) . unwrap ( ) . as_str ( ) ;
31
+ let end = captures. get ( 2 ) . map_or ( start, |c| c. as_str ( ) ) ;
32
+ write ! ( writer, " ('\\ u{{{}}}', '\\ u{{{}}}'),\n " , start, end) . unwrap ( ) ;
33
+ } ) ;
34
+
35
+ write ! ( writer, "];\n \n " ) . unwrap ( ) ;
36
+ }
37
+
38
+ fn mapping_table < W : Write > ( table : & str , writer : & mut W ) {
39
+ write ! (
40
+ writer,
41
+ "pub const {}: &'static [(char, &'static str)] = &[\n " ,
42
+ table. replace( "." , "_" ) ,
43
+ ) . unwrap ( ) ;
44
+
45
+ let regex = Regex :: new (
46
+ "^([0-9A-F]+); ([0-9A-F]+)(?: ([0-9A-F]+))?(?: ([0-9A-F]+))?(?: ([0-9A-F]+))?;" ,
47
+ ) . unwrap ( ) ;
48
+ table_lines ( table, |line| {
49
+ let captures = regex. captures ( line) . unwrap ( ) ;
50
+ let mut it = captures. iter ( ) . filter_map ( |i| i) ;
51
+ it. next ( ) ; // skip whole match
52
+
53
+ let input = it. next ( ) . unwrap ( ) . as_str ( ) ;
54
+ write ! ( writer, " ('\\ u{{{}}}', \" " , input) . unwrap ( ) ;
55
+
56
+ for output in it {
57
+ write ! ( writer, "\\ u{{{}}}" , output. as_str( ) ) . unwrap ( ) ;
31
58
}
32
- }
33
59
34
- // Output table declaration.
35
- write ! ( writer, "pub const {}: &[(char, char, &str)] = &[\n " , tablename. replace( "." , "_" ) ) . unwrap ( ) ;
60
+ write ! ( writer, "\" ),\n " ) . unwrap ( ) ;
61
+ } ) ;
62
+
63
+ write ! ( writer, "];\n \n " ) . unwrap ( ) ;
64
+ }
36
65
37
- // For each line:
38
- let target_re = Regex :: new ( r"([0-9A-F]+)(-([0-9A-F]+))?(; ([0-9A-F]+)( ([0-9A-F]+))?( ([0-9A-F]+))?( ([0-9A-F]+))?;)?" ) . unwrap ( ) ;
39
- loop {
40
- let mut line = String :: new ( ) ;
41
- reader . read_line ( & mut line ) . unwrap ( ) ;
66
+ fn table_lines < F > ( table : & str , mut f : F )
67
+ where
68
+ F : FnMut ( & str ) ,
69
+ {
70
+ let mut lines = include_str ! ( "rfc3454.txt" ) . split ( '\n' ) ;
42
71
43
- // Done when reach the end of the table.
72
+ // fast forward to the start of the table
73
+ lines. find ( |line| line. contains ( "Start Table" ) && line. contains ( table) ) ;
74
+
75
+ for line in lines {
76
+ let line = line. trim ( ) ;
44
77
if line. contains ( "End Table" ) {
45
78
break ;
46
79
}
47
80
48
- // Skip RFC metadata.
49
- if line. contains ( "Hoffman & Blanchet" ) || line. contains ( "RFC 3454" ) {
81
+ // Skip page headers/footers
82
+ if line. is_empty ( ) || line . contains ( "Hoffman & Blanchet" ) || line. contains ( "RFC 3454" ) {
50
83
continue ;
51
84
}
52
85
53
- // Generate an entry for each data line.
54
- if let Some ( captures) = target_re. captures ( & line) {
55
- // start char
56
- let start = captures. get ( 1 ) . unwrap ( ) . as_str ( ) ;
57
-
58
- // end char (inclusive)
59
- let end = captures. get ( 3 ) . map_or ( start, |m| m. as_str ( ) ) ;
60
-
61
- // 0-4 character replacement string
62
- let mut replace = String :: new ( ) ;
63
- for & i in [ 5 , 7 , 9 , 11 ] . iter ( ) {
64
- match captures. get ( i) {
65
- None => break ,
66
- Some ( c) => {
67
- replace. push_str ( "\\ u{" ) ;
68
- replace. push_str ( c. as_str ( ) ) ;
69
- replace. push_str ( "}" ) ;
70
- }
71
- }
72
- }
73
-
74
- write ! ( writer, " ('\\ u{{{}}}', '\\ u{{{}}}', \" {}\" ),\n " , start, end, replace) . unwrap ( )
75
- }
86
+ f ( line) ;
76
87
}
77
-
78
- // End table definition.
79
- write ! ( writer, "];\n \n " ) . unwrap ( ) ;
80
88
}
0 commit comments