Skip to content

Commit 23b97ea

Browse files
committed
Don't allocate a new string in case mapping
1 parent 940a9c4 commit 23b97ea

File tree

2 files changed

+26
-17
lines changed

2 files changed

+26
-17
lines changed

src/lib.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,7 @@ pub fn nameprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
128128
// 3. Mapping
129129
let mapped = s.chars()
130130
.filter(|&c| !tables::commonly_mapped_to_nothing(c))
131-
.collect::<String>();
132-
133-
let mapped = tables::case_fold_for_nfkc(&mapped);
131+
.flat_map(tables::case_fold_for_nfkc);
134132

135133
// 4. Normalization
136134
let normalized = mapped.nfkc().collect::<String>();
@@ -174,9 +172,7 @@ pub fn nodeprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
174172
// A.3. Mapping
175173
let mapped = s.chars()
176174
.filter(|&c| !tables::commonly_mapped_to_nothing(c))
177-
.collect::<String>();
178-
179-
let mapped = tables::case_fold_for_nfkc(&mapped);
175+
.flat_map(tables::case_fold_for_nfkc);
180176

181177
// A.4. Normalization
182178
let normalized = mapped.nfkc().collect::<String>();

src/tables.rs

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! Character Tables
22
use unicode_bidi::{bidi_class, BidiClass};
33
use std::cmp::Ordering;
4+
use std::str::Chars;
45

56
use super::rfc3454;
67

@@ -30,19 +31,31 @@ pub fn commonly_mapped_to_nothing(c: char) -> bool {
3031
}
3132

3233
/// B.2 Mapping for case-folding used with NFKC.
33-
pub fn case_fold_for_nfkc(s: &str) -> String {
34-
let mut result = String::new();
35-
36-
// Each character either maps to a sequence of replacement characters,
37-
// or is passed through as-is.
38-
for c in s.chars() {
39-
match rfc3454::B_2.binary_search_by_key(&c, |e| e.0) {
40-
Ok(idx) => result.push_str(rfc3454::B_2[idx].1),
41-
Err(_) => result.push(c),
34+
pub fn case_fold_for_nfkc(c: char) -> CaseFoldForNfkc {
35+
let inner = match rfc3454::B_2.binary_search_by_key(&c, |e| e.0) {
36+
Ok(idx) => FoldInner::Chars(rfc3454::B_2[idx].1.chars()),
37+
Err(_) => FoldInner::Char(Some(c)),
38+
};
39+
CaseFoldForNfkc(inner)
40+
}
41+
42+
enum FoldInner {
43+
Chars(Chars<'static>),
44+
Char(Option<char>),
45+
}
46+
47+
/// The iterator returned by `case_fold_for_nfkc`.
48+
pub struct CaseFoldForNfkc(FoldInner);
49+
50+
impl Iterator for CaseFoldForNfkc {
51+
type Item = char;
52+
53+
fn next(&mut self) -> Option<char> {
54+
match self.0 {
55+
FoldInner::Chars(ref mut it) => it.next(),
56+
FoldInner::Char(ref mut ch) => ch.take(),
4257
}
4358
}
44-
45-
result
4659
}
4760

4861
/// C.1.1 ASCII space characters

0 commit comments

Comments
 (0)