@@ -47,37 +47,39 @@ def fetch(f):
47
47
sys .stderr .write ("cannot load %s\n " % f )
48
48
exit (1 )
49
49
50
- # load identifier status data
51
- def load_identifier_status ():
52
- f = "IdentifierStatus.txt"
50
+ # Implementation from unicode-segmentation
51
+ def load_properties (f , interestingprops = None ):
53
52
fetch (f )
54
- statuses = []
55
- re1 = re .compile ("^ ([0-9A-F]+) +; + (\w+)" )
56
- re2 = re .compile ("^ ([0-9A-F]+)\.\.([0-9A-F]+) +; + (\w+)" )
53
+ props = {}
54
+ re1 = re .compile (r"^ * ([0-9A-F]+) *; * (\w+)" )
55
+ re2 = re .compile (r"^ * ([0-9A-F]+)\.\.([0-9A-F]+) *; * (\w+)" )
57
56
58
- for line in fileinput .input (f ):
57
+ for line in fileinput .input (os .path .basename (f )):
58
+ prop = None
59
59
d_lo = 0
60
60
d_hi = 0
61
- cat = None
62
61
m = re1 .match (line )
63
62
if m :
64
63
d_lo = m .group (1 )
65
64
d_hi = m .group (1 )
66
- cat = m .group (2 )
65
+ prop = m .group (2 ). strip ( )
67
66
else :
68
67
m = re2 .match (line )
69
68
if m :
70
69
d_lo = m .group (1 )
71
70
d_hi = m .group (2 )
72
- cat = m .group (3 )
71
+ prop = m .group (3 ). strip ( )
73
72
else :
74
73
continue
75
- if cat != "Allowed" :
74
+ if interestingprops and prop not in interestingprops :
76
75
continue
77
76
d_lo = int (d_lo , 16 )
78
77
d_hi = int (d_hi , 16 )
79
- statuses .append ((d_lo , d_hi ))
80
- return statuses
78
+ if prop not in props :
79
+ props [prop ] = []
80
+ props [prop ].append ((d_lo , d_hi ))
81
+
82
+ return props
81
83
82
84
def format_table_content (f , content , indent ):
83
85
line = " " * indent
@@ -115,41 +117,57 @@ def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
115
117
format_table_content (f , data , 8 )
116
118
f .write ("\n ];\n \n " )
117
119
118
- def emit_identifier_status_module ( f , statuses_table ):
119
- f .write ("pub mod identifier_status {" )
120
+ def emit_identifier_module ( f ):
121
+ f .write ("pub mod identifier {" )
120
122
f .write ("""
121
- use core::result::Result::{Ok, Err};
123
+ #[inline]
124
+ pub fn identifier_status_allowed(c: char) -> bool {
125
+ // FIXME: do we want to special case ASCII here?
126
+ match c as usize {
127
+ _ => super::util::bsearch_range_table(c, identifier_status_table)
128
+ }
129
+ }
130
+ """ )
131
+
132
+ f .write (" // Identifier status table:\n " )
133
+ identifier_status_table = load_properties ("IdentifierStatus.txt" )
134
+ emit_table (f , "identifier_status_table" , identifier_status_table ['Allowed' ], "&'static [(char, char)]" , is_pub = False ,
135
+ pfun = lambda x : "(%s,%s)" % (escape_char (x [0 ]), escape_char (x [1 ])))
136
+ f .write ("}\n \n " )
122
137
138
+ def emit_util_mod (f ):
139
+ f .write ("""
140
+ pub mod util {
141
+ use core::result::Result::{Ok, Err};
123
142
#[inline]
124
- fn bsearch_range_value_table (c: char, r: &'static [(char, char)]) -> bool {
143
+ pub fn bsearch_range_table (c: char, r: &'static [(char,char)]) -> bool {
125
144
use core::cmp::Ordering::{Equal, Less, Greater};
126
- match r.binary_search_by(|&(lo, hi)| {
145
+ r.binary_search_by(|&(lo,hi)| {
146
+ if lo <= c && c <= hi { Equal }
147
+ else if hi < c { Less }
148
+ else { Greater }
149
+ }).is_ok()
150
+ }
151
+
152
+ pub fn bsearch_range_value_table<T: Copy>(c: char, r: &'static [(char, char, T)]) -> Option<T> {
153
+ use core::cmp::Ordering::{Equal, Less, Greater};
154
+ match r.binary_search_by(|&(lo, hi, _)| {
127
155
if lo <= c && c <= hi { Equal }
128
156
else if hi < c { Less }
129
157
else { Greater }
130
158
}) {
131
- Ok(_) => true,
132
- Err(_) => false
159
+ Ok(idx) => {
160
+ let (_, _, cat) = r[idx];
161
+ Some(cat)
162
+ }
163
+ Err(_) => None
133
164
}
134
165
}
135
- """ )
136
166
137
- f .write ("""
138
- #[inline]
139
- pub fn identifier_status_allowed(c: char) -> bool {
140
- // FIXME: do we want to special case ASCII here?
141
- match c as usize {
142
- _ => bsearch_range_value_table(c, identifier_status_table)
143
- }
144
- }
167
+ }
145
168
146
169
""" )
147
170
148
- f .write (" // identifier status table.\n " )
149
- emit_table (f , "identifier_status_table" , statuses_table , "&'static [(char, char)]" , is_pub = False ,
150
- pfun = lambda x : "(%s,%s)" % (escape_char (x [0 ]), escape_char (x [1 ])))
151
- f .write ("}\n \n " )
152
-
153
171
if __name__ == "__main__" :
154
172
r = "tables.rs"
155
173
if os .path .exists (r ):
@@ -164,6 +182,7 @@ def emit_identifier_status_module(f, statuses_table):
164
182
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
165
183
166
184
""" % UNICODE_VERSION )
167
- ### identifier status module
168
- identifier_status_table = load_identifier_status ()
169
- emit_identifier_status_module (rf , identifier_status_table )
185
+
186
+ emit_util_mod (rf )
187
+ ### identifier module
188
+ emit_identifier_module (rf )
0 commit comments