Skip to content

Commit 9267aea

Browse files
authored
Merge branch 'master' into smallvec
2 parents 1f3bd9a + 921cab3 commit 9267aea

File tree

6 files changed

+6804
-5750
lines changed

6 files changed

+6804
-5750
lines changed

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Decomposition and Recomposition, as described in
1818
Unicode Standard Annex #15.
1919
"""
2020

21-
exclude = [ "target/*", "Cargo.lock", "scripts/tmp", "*.txt" ]
21+
exclude = [ "target/*", "Cargo.lock", "scripts/tmp", "*.txt", "src/normalization_tests.rs", "src/test.rs" ]
2222

2323
[dependencies]
24-
smallvec = "0.6"
24+
smallvec = "0.6"

scripts/unicode.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -315,14 +315,19 @@ def gen_decomposition_tables(canon_decomp, compat_decomp, out):
315315
for table, name in tables:
316316
out.write("#[inline]\n")
317317
out.write("pub fn %s_fully_decomposed(c: char) -> Option<&'static [char]> {\n" % name)
318-
out.write(" match c {\n")
318+
# The "Some" constructor is around the match statement here, because
319+
# putting it into the individual arms would make the item_bodies
320+
# checking of rustc takes almost twice as long, and it's already pretty
321+
# slow because of the huge number of match arms and the fact that there
322+
# is a borrow inside each arm
323+
out.write(" Some(match c {\n")
319324

320325
for char, chars in sorted(table.items()):
321326
d = ", ".join("'\u{%s}'" % hexify(c) for c in chars)
322-
out.write(" '\u{%s}' => Some(&[%s]),\n" % (hexify(char), d))
327+
out.write(" '\u{%s}' => &[%s],\n" % (hexify(char), d))
323328

324-
out.write(" _ => None,\n")
325-
out.write(" }\n")
329+
out.write(" _ => return None,\n")
330+
out.write(" })\n")
326331
out.write("}\n")
327332
out.write("\n")
328333

@@ -347,12 +352,24 @@ def gen_nfc_qc(prop_tables, out):
347352
gen_qc_match(prop_tables['NFC_QC'], out)
348353
out.write("}\n")
349354

355+
def gen_nfkc_qc(prop_tables, out):
356+
out.write("#[inline]\n")
357+
out.write("pub fn qc_nfkc(c: char) -> IsNormalized {\n")
358+
gen_qc_match(prop_tables['NFKC_QC'], out)
359+
out.write("}\n")
360+
350361
def gen_nfd_qc(prop_tables, out):
351362
out.write("#[inline]\n")
352363
out.write("pub fn qc_nfd(c: char) -> IsNormalized {\n")
353364
gen_qc_match(prop_tables['NFD_QC'], out)
354365
out.write("}\n")
355366

367+
def gen_nfkd_qc(prop_tables, out):
368+
out.write("#[inline]\n")
369+
out.write("pub fn qc_nfkd(c: char) -> IsNormalized {\n")
370+
gen_qc_match(prop_tables['NFKD_QC'], out)
371+
out.write("}\n")
372+
356373
def gen_combining_mark(general_category_mark, out):
357374
out.write("#[inline]\n")
358375
out.write("pub fn is_combining_mark(c: char) -> bool {\n")
@@ -441,9 +458,15 @@ def gen_tests(tests, out):
441458
gen_nfc_qc(data.norm_props, out)
442459
out.write("\n")
443460

461+
gen_nfkc_qc(data.norm_props, out)
462+
out.write("\n")
463+
444464
gen_nfd_qc(data.norm_props, out)
445465
out.write("\n")
446466

467+
gen_nfkd_qc(data.norm_props, out)
468+
out.write("\n")
469+
447470
gen_stream_safe(data.ss_leading, data.ss_trailing, out)
448471
out.write("\n")
449472

src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,14 @@ pub use quick_check::{
4949
IsNormalized,
5050
is_nfc,
5151
is_nfc_quick,
52+
is_nfkc,
53+
is_nfkc_quick,
5254
is_nfc_stream_safe,
5355
is_nfc_stream_safe_quick,
5456
is_nfd,
5557
is_nfd_quick,
58+
is_nfkd,
59+
is_nfkd_quick,
5660
is_nfd_stream_safe,
5761
is_nfd_stream_safe_quick,
5862
};

src/quick_check.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,25 @@ pub fn is_nfc_quick<I: Iterator<Item=char>>(s: I) -> IsNormalized {
7070
quick_check(s, tables::qc_nfc, false)
7171
}
7272

73+
74+
/// Quickly check if a string is in NFKC.
75+
#[inline]
76+
pub fn is_nfkc_quick<I: Iterator<Item=char>>(s: I) -> IsNormalized {
77+
quick_check(s, tables::qc_nfkc, false)
78+
}
79+
7380
/// Quickly check if a string is in NFD.
7481
#[inline]
7582
pub fn is_nfd_quick<I: Iterator<Item=char>>(s: I) -> IsNormalized {
7683
quick_check(s, tables::qc_nfd, false)
7784
}
7885

86+
/// Quickly check if a string is in NFKD.
87+
#[inline]
88+
pub fn is_nfkd_quick<I: Iterator<Item=char>>(s: I) -> IsNormalized {
89+
quick_check(s, tables::qc_nfkd, false)
90+
}
91+
7992
/// Quickly check if a string is Stream-Safe NFC.
8093
#[inline]
8194
pub fn is_nfc_stream_safe_quick<I: Iterator<Item=char>>(s: I) -> IsNormalized {
@@ -98,6 +111,16 @@ pub fn is_nfc(s: &str) -> bool {
98111
}
99112
}
100113

114+
/// Authoritatively check if a string is in NFKC.
115+
#[inline]
116+
pub fn is_nfkc(s: &str) -> bool {
117+
match is_nfkc_quick(s.chars()) {
118+
IsNormalized::Yes => true,
119+
IsNormalized::No => false,
120+
IsNormalized::Maybe => s.chars().eq(s.chars().nfkc()),
121+
}
122+
}
123+
101124
/// Authoritatively check if a string is in NFD.
102125
#[inline]
103126
pub fn is_nfd(s: &str) -> bool {
@@ -108,6 +131,16 @@ pub fn is_nfd(s: &str) -> bool {
108131
}
109132
}
110133

134+
/// Authoritatively check if a string is in NFKD.
135+
#[inline]
136+
pub fn is_nfkd(s: &str) -> bool {
137+
match is_nfkd_quick(s.chars()) {
138+
IsNormalized::Yes => true,
139+
IsNormalized::No => false,
140+
IsNormalized::Maybe => s.chars().eq(s.chars().nfkd()),
141+
}
142+
}
143+
111144
/// Authoritatively check if a string is Stream-Safe NFC.
112145
#[inline]
113146
pub fn is_nfc_stream_safe(s: &str) -> bool {

0 commit comments

Comments
 (0)