Skip to content

Commit 2af0c65

Browse files
author
David Judd
committed
Use SmallVec in place of Vec in decompose & avoid remove(0)
1 parent 038cf74 commit 2af0c65

File tree

1 file changed

+49
-32
lines changed

1 file changed

+49
-32
lines changed

src/decompose.rs

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -7,40 +7,41 @@
77
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
10+
use smallvec::SmallVec;
1011
use std::fmt::{self, Write};
12+
use std::ops::Range;
1113

1214
#[derive(Clone)]
1315
enum DecompositionType {
1416
Canonical,
15-
Compatible
17+
Compatible,
1618
}
1719

1820
/// External iterator for a string decomposition's characters.
1921
#[derive(Clone)]
2022
pub struct Decompositions<I> {
2123
kind: DecompositionType,
2224
iter: I,
23-
done: bool,
2425

2526
// This buffer stores pairs of (canonical combining class, character),
2627
// pushed onto the end in text order.
2728
//
28-
// It's split into two contiguous regions by the `ready` offset. The first
29-
// `ready` pairs are sorted and ready to emit on demand. The "pending"
30-
// suffix afterwards still needs more characters for us to be able to sort
31-
// in canonical order and is not safe to emit.
32-
buffer: Vec<(u8, char)>,
33-
ready: usize,
29+
// It's divided into up to three sections:
30+
// 1) A prefix that is free space;
31+
// 2) "Ready" characters which are sorted and ready to emit on demand;
32+
// 3) A "pending" block which stills needs more characters for us to be able
33+
// to sort in canonical order and is not safe to emit.
34+
buffer: SmallVec<[(u8, char); 4]>,
35+
ready: Range<usize>,
3436
}
3537

3638
#[inline]
3739
pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
3840
Decompositions {
3941
kind: self::DecompositionType::Canonical,
4042
iter: iter,
41-
done: false,
42-
buffer: Vec::new(),
43-
ready: 0,
43+
buffer: SmallVec::new(),
44+
ready: 0..0,
4445
}
4546
}
4647

@@ -49,41 +50,50 @@ pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
4950
Decompositions {
5051
kind: self::DecompositionType::Compatible,
5152
iter: iter,
52-
done: false,
53-
buffer: Vec::new(),
54-
ready: 0,
53+
buffer: SmallVec::new(),
54+
ready: 0..0,
5555
}
5656
}
5757

5858
impl<I> Decompositions<I> {
5959
#[inline]
6060
fn push_back(&mut self, ch: char) {
6161
let class = super::char::canonical_combining_class(ch);
62+
6263
if class == 0 {
6364
self.sort_pending();
6465
}
66+
6567
self.buffer.push((class, ch));
6668
}
6769

6870
#[inline]
6971
fn sort_pending(&mut self) {
70-
if self.ready == 0 && self.buffer.is_empty() {
71-
return;
72-
}
73-
7472
// NB: `sort_by_key` is stable, so it will preserve the original text's
7573
// order within a combining class.
76-
self.buffer[self.ready..].sort_by_key(|k| k.0);
77-
self.ready = self.buffer.len();
74+
self.buffer[self.ready.end..].sort_by_key(|k| k.0);
75+
self.ready.end = self.buffer.len();
7876
}
7977

8078
#[inline]
81-
fn pop_front(&mut self) -> Option<char> {
82-
if self.ready == 0 {
83-
None
79+
fn reset_buffer(&mut self) {
80+
// Equivalent to `self.buffer.drain(0..self.ready.end)` (if SmallVec
81+
// supported this API)
82+
let pending = self.buffer.len() - self.ready.end;
83+
for i in 0..pending {
84+
self.buffer[i] = self.buffer[i + self.ready.end];
85+
}
86+
self.buffer.truncate(pending);
87+
self.ready = 0..0;
88+
}
89+
90+
#[inline]
91+
fn increment_next_ready(&mut self) {
92+
let next = self.ready.start + 1;
93+
if next == self.ready.end {
94+
self.reset_buffer();
8495
} else {
85-
self.ready -= 1;
86-
Some(self.buffer.remove(0).1)
96+
self.ready.start = next;
8797
}
8898
}
8999
}
@@ -93,21 +103,28 @@ impl<I: Iterator<Item=char>> Iterator for Decompositions<I> {
93103

94104
#[inline]
95105
fn next(&mut self) -> Option<char> {
96-
while self.ready == 0 && !self.done {
106+
while self.ready.end == 0 {
97107
match (self.iter.next(), &self.kind) {
98108
(Some(ch), &DecompositionType::Canonical) => {
99109
super::char::decompose_canonical(ch, |d| self.push_back(d));
100-
},
110+
}
101111
(Some(ch), &DecompositionType::Compatible) => {
102112
super::char::decompose_compatible(ch, |d| self.push_back(d));
103-
},
113+
}
104114
(None, _) => {
105-
self.sort_pending();
106-
self.done = true;
107-
},
115+
if self.buffer.is_empty() {
116+
return None;
117+
} else {
118+
self.sort_pending();
119+
break;
120+
}
121+
}
108122
}
109123
}
110-
self.pop_front()
124+
125+
let (_, ch) = self.buffer[self.ready.start];
126+
self.increment_next_ready();
127+
Some(ch)
111128
}
112129

113130
fn size_hint(&self) -> (usize, Option<usize>) {

0 commit comments

Comments
 (0)