Skip to content

Commit 94e836c

Browse files
author
Markus Westerlind
committed
perf: Merge ranges which only consists of a single character
By making `MAPPING_TABLE` consist of slices we merge all runs of single character ranges into a single range and subtract the start of the range from the actual codepoint to figure out which `Mapping` each of those individual character has. This reduces the size of the table that is binary searched to about 1/4 which does not impact runtime performance very much but does remove about (6000 * 8 - 1500 * 16 =) 24kb from the resulting binary (with more improvements to follow)
1 parent bb83d99 commit 94e836c

File tree

3 files changed

+1821
-13862
lines changed

3 files changed

+1821
-13862
lines changed

idna/src/make_uts46_mapping_table.py

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# You can get the latest idna table from
1111
# http://www.unicode.org/Public/idna/latest/IdnaMappingTable.txt
1212

13+
from __future__ import print_function
1314
import collections
1415
import itertools
1516

@@ -82,6 +83,7 @@ def rust_slice(s):
8283

8384
def mergeable_key(r):
8485
mapping = r[2]
86+
8587
# These types have associated data, so we should not merge them.
8688
if mapping in ('Mapped', 'Deviation', 'DisallowedStd3Mapped'):
8789
return r
@@ -121,23 +123,49 @@ def mergeable_key(r):
121123
unicode_str = group[0][3]
122124
optimized_ranges.append((first, last, mapping, unicode_str))
123125

126+
import sys
127+
def merge_single_char_ranges(ranges):
128+
current = []
129+
for r in ranges:
130+
mapping = r[2]
131+
132+
if not current or current[-1][0] == current[-1][1] and r[0] == r[1]:
133+
current.append(r)
134+
continue
135+
if len(current) != 0:
136+
ret = current
137+
current = [r]
138+
yield ret
139+
continue
140+
current.append(r)
141+
ret = current
142+
current = []
143+
yield ret
144+
145+
optimized_ranges = list(merge_single_char_ranges(optimized_ranges))
146+
124147

125148
print("static TABLE: &'static [Range] = &[")
126149

127-
for (first, last, mapping, unicode_str) in optimized_ranges:
128-
if unicode_str is not None:
129-
mapping += rust_slice(strtab_slice(unicode_str))
150+
for ranges in optimized_ranges:
151+
first = ranges[0][0]
152+
last = ranges[-1][1]
130153
print(" Range { from: '%s', to: '%s', }," % (escape_char(char(first)),
131-
escape_char(char(last))))
154+
escape_char(char(last))))
132155

133156
print("];\n")
134157

135-
print("static MAPPING_TABLE: &'static [Mapping] = &[")
158+
print("static MAPPING_TABLE: &'static [&[Mapping]] = &[")
159+
160+
for ranges in optimized_ranges:
161+
print("&[", end='')
162+
163+
for (first, last, mapping, unicode_str) in ranges:
164+
if unicode_str is not None:
165+
mapping += rust_slice(strtab_slice(unicode_str))
166+
print("%s, " % mapping, end='')
136167

137-
for (first, last, mapping, unicode_str) in optimized_ranges:
138-
if unicode_str is not None:
139-
mapping += rust_slice(strtab_slice(unicode_str))
140-
print(" %s," % mapping)
168+
print("],")
141169

142170
print("];\n")
143171

idna/src/uts46.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,14 @@ fn find_char(codepoint: char) -> &'static Mapping {
6767
Equal
6868
}
6969
});
70-
r.ok().map(|i| &MAPPING_TABLE[i]).unwrap()
70+
r.ok().map(|i| {
71+
let xs = &MAPPING_TABLE[i];
72+
if xs.len() == 1 {
73+
&xs[0]
74+
} else {
75+
&xs[codepoint as usize - TABLE[i].from as usize]
76+
}
77+
}).unwrap()
7178
}
7279

7380
fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec<Error>) {

0 commit comments

Comments
 (0)