Skip to content

Commit 2d7bfd1

Browse files
committed
Use perfect hashing for decomposition also
1 parent db57ffc commit 2d7bfd1

File tree

3 files changed

+11516
-5768
lines changed

3 files changed

+11516
-5768
lines changed

scripts/unicode.py

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -316,23 +316,20 @@ def gen_composition_table(canon_comp, out):
316316
def gen_decomposition_tables(canon_decomp, compat_decomp, out):
317317
tables = [(canon_decomp, 'canonical'), (compat_decomp, 'compatibility')]
318318
for table, name in tables:
319-
out.write("#[inline]\n")
319+
(salt, keys) = minimal_perfect_hash(table)
320+
out.write("const {}_DECOMPOSED_KV: &[(u32, &'static [char])] = &[\n".format(name.upper()))
321+
for char in keys:
322+
d = ", ".join("'\\u{%s}'" % hexify(c) for c in table[char])
323+
out.write(" (0x{:x}, &[{}]),\n".format(char, d))
324+
out.write("];\n")
320325
out.write("pub fn %s_fully_decomposed(c: char) -> Option<&'static [char]> {\n" % name)
321-
# The "Some" constructor is around the match statement here, because
322-
# putting it into the individual arms would make the item_bodies
323-
# checking of rustc takes almost twice as long, and it's already pretty
324-
# slow because of the huge number of match arms and the fact that there
325-
# is a borrow inside each arm
326-
out.write(" Some(match c {\n")
327-
328-
for char, chars in sorted(table.items()):
329-
d = ", ".join("'\\u{%s}'" % hexify(c) for c in chars)
330-
out.write(" '\\u{%s}' => &[%s],\n" % (hexify(char), d))
331-
332-
out.write(" _ => return None,\n")
333-
out.write(" })\n")
326+
out.write(" mph_lookup(c.into(), &[\n")
327+
for s in salt:
328+
out.write(" 0x{:x},\n".format(s))
329+
out.write(" ],\n")
330+
out.write(" {}_DECOMPOSED_KV,\n".format(name.upper()))
331+
out.write(" pair_lookup_fk, pair_lookup_fv_opt, None)\n")
334332
out.write("}\n")
335-
out.write("\n")
336333

337334
def gen_qc_match(prop_table, out):
338335
out.write(" match c {\n")
@@ -374,7 +371,6 @@ def gen_nfkd_qc(prop_tables, out):
374371
out.write("}\n")
375372

376373
def gen_combining_mark(general_category_mark, out):
377-
out.write("#[inline]\n")
378374
(salt, keys) = minimal_perfect_hash(general_category_mark)
379375
out.write("pub fn is_combining_mark(c: char) -> bool {\n")
380376
out.write(" mph_lookup(c.into(), &[\n")
@@ -388,8 +384,8 @@ def gen_combining_mark(general_category_mark, out):
388384
out.write(" bool_lookup_fk, bool_lookup_fv, false)\n")
389385
out.write("}\n")
390386

391-
392387
def gen_stream_safe(leading, trailing, out):
388+
# This could be done as a hash but the table is very small.
393389
out.write("#[inline]\n")
394390
out.write("pub fn stream_safe_leading_nonstarters(c: char) -> usize {\n")
395391
out.write(" match c {\n")

src/perfect_hash.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,16 @@ pub(crate) fn bool_lookup_fk(kv: u32) -> u32 {
5656
pub(crate) fn bool_lookup_fv(_kv: u32) -> bool {
5757
true
5858
}
59+
60+
/// Extract the key in a pair.
61+
#[inline]
62+
pub(crate) fn pair_lookup_fk<T>(kv: (u32, T)) -> u32 {
63+
kv.0
64+
}
65+
66+
/// Extract the value in a pair, returning an option.
67+
#[inline]
68+
pub(crate) fn pair_lookup_fv_opt<T>(kv: (u32, T)) -> Option<T> {
69+
Some(kv.1)
70+
}
71+

0 commit comments

Comments
 (0)