Skip to content

Commit 14ed9b1

Browse files
committed
Add IdentifierType tables
1 parent 0df76e8 commit 14ed9b1

File tree

2 files changed

+1529
-4
lines changed

2 files changed

+1529
-4
lines changed

scripts/unicode.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,19 +120,57 @@ def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
120120
def emit_identifier_module(f):
121121
f.write("pub mod identifier {")
122122
f.write("""
123+
124+
#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Debug)]
125+
#[allow(non_camel_case_types)]
126+
/// https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type
127+
pub enum IdentifierType {
128+
// Restricted
129+
Not_Character,
130+
Deprecated,
131+
Default_Ignorable,
132+
Not_NFKC,
133+
Not_XID,
134+
Exclusion,
135+
Obsolete,
136+
Technical,
137+
Uncommon_Use,
138+
Limited_Use,
139+
140+
// Allowed
141+
Inclusion,
142+
Recommended
143+
}
123144
#[inline]
124145
pub fn identifier_status_allowed(c: char) -> bool {
125146
// FIXME: do we want to special case ASCII here?
126147
match c as usize {
127-
_ => super::util::bsearch_range_table(c, identifier_status_table)
148+
_ => super::util::bsearch_range_table(c, IDENTIFIER_STATUS)
149+
}
150+
}
151+
152+
#[inline]
153+
pub fn identifier_type(c: char) -> Option<IdentifierType> {
154+
// FIXME: do we want to special case ASCII here?
155+
match c as usize {
156+
_ => super::util::bsearch_range_value_table(c, IDENTIFIER_TYPE)
128157
}
129158
}
130159
""")
131160

132161
f.write(" // Identifier status table:\n")
133162
identifier_status_table = load_properties("IdentifierStatus.txt")
134-
emit_table(f, "identifier_status_table", identifier_status_table['Allowed'], "&'static [(char, char)]", is_pub=False,
163+
emit_table(f, "IDENTIFIER_STATUS", identifier_status_table['Allowed'], "&'static [(char, char)]", is_pub=False,
135164
pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])))
165+
identifier_type = load_properties("IdentifierType.txt")
166+
type_table = []
167+
for ty in identifier_type:
168+
type_table.extend([(x, y, ty) for (x, y) in identifier_type[ty]])
169+
170+
type_table.sort(key=lambda w: w[0])
171+
172+
emit_table(f, "IDENTIFIER_TYPE", type_table, "&'static [(char, char, IdentifierType)]", is_pub=False,
173+
pfun=lambda x: "(%s,%s, IdentifierType::%s)" % (escape_char(x[0]), escape_char(x[1]), x[2]))
136174
f.write("}\n\n")
137175

138176
def emit_util_mod(f):

0 commit comments

Comments
 (0)