Skip to content

Commit f64d47c

Browse files
committed
Use perfect hashing for compose lookup
1 parent 2d7bfd1 commit f64d47c

File tree

3 files changed

+1906
-947
lines changed

3 files changed

+1906
-947
lines changed

scripts/unicode.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -302,14 +302,31 @@ def gen_combining_class(combining_classes, out):
302302
out.write("}\n")
303303

304304
def gen_composition_table(canon_comp, out):
305-
out.write("#[inline]\n")
305+
table = {}
306+
for (c1, c2), c3 in canon_comp.items():
307+
if c1 < 0x10000 and c2 < 0x10000:
308+
table[(c1 << 16) | c2] = c3
309+
(salt, keys) = minimal_perfect_hash(table)
306310
out.write("pub fn composition_table(c1: char, c2: char) -> Option<char> {\n")
307-
out.write(" match (c1, c2) {\n")
311+
out.write(" if c1 < '\\u{10000}' && c2 < '\\u{10000}' {\n")
312+
out.write(" mph_lookup((c1 as u32) << 16 | (c2 as u32), &[\n")
313+
for s in salt:
314+
out.write(" 0x{:x},\n".format(s))
315+
out.write(" ],\n")
316+
out.write(" &[\n")
317+
for k in keys:
318+
out.write(" (0x%s, '\\u{%s}'),\n" % (hexify(k), hexify(table[k])))
319+
out.write(" ],\n")
320+
out.write(" pair_lookup_fk, pair_lookup_fv_opt, None)\n")
321+
out.write(" } else {\n")
322+
out.write(" match (c1, c2) {\n")
308323

309324
for (c1, c2), c3 in sorted(canon_comp.items()):
310-
out.write(" ('\\u{%s}', '\\u{%s}') => Some('\\u{%s}'),\n" % (hexify(c1), hexify(c2), hexify(c3)))
325+
if c1 >= 0x10000 and c2 >= 0x10000:
326+
out.write(" ('\\u{%s}', '\\u{%s}') => Some('\\u{%s}'),\n" % (hexify(c1), hexify(c2), hexify(c3)))
311327

312-
out.write(" _ => None,\n")
328+
out.write(" _ => None,\n")
329+
out.write(" }\n")
313330
out.write(" }\n")
314331
out.write("}\n")
315332

src/perfect_hash.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@ fn my_hash(key: u32, salt: u32, n: usize) -> usize {
1919
(((y as u64) * (n as u64)) >> 32) as usize
2020
}
2121

22+
/// Do a lookup using minimal perfect hashing.
23+
///
24+
/// The table is stored as a sequence of "salt" values, then a sequence of
25+
/// values that contain packed key/value pairs. The strategy is to hash twice.
26+
/// The first hash retrieves a salt value that makes the second hash unique.
27+
/// The hash function doesn't have to be very good, just good enough that the
28+
/// resulting map is unique.
2229
#[inline]
2330
pub(crate) fn mph_lookup<KV, V, FK, FV>(x: u32, salt: &[u16], kv: &[KV], fk: FK, fv: FV,
2431
default: V) -> V

0 commit comments

Comments
 (0)