7
7
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8
8
// option. This file may not be copied, modified, or distributed
9
9
// except according to those terms.
10
+ use smallvec:: SmallVec ;
10
11
use std:: fmt:: { self , Write } ;
12
+ use std:: ops:: Range ;
11
13
12
14
#[ derive( Clone ) ]
13
15
enum DecompositionType {
14
16
Canonical ,
15
- Compatible
17
+ Compatible ,
16
18
}
17
19
18
20
/// External iterator for a string decomposition's characters.
19
21
#[ derive( Clone ) ]
20
22
pub struct Decompositions < I > {
21
23
kind : DecompositionType ,
22
24
iter : I ,
23
- done : bool ,
24
25
25
26
// This buffer stores pairs of (canonical combining class, character),
26
27
// pushed onto the end in text order.
27
28
//
28
- // It's split into two contiguous regions by the `ready` offset. The first
29
- // `ready` pairs are sorted and ready to emit on demand. The "pending"
30
- // suffix afterwards still needs more characters for us to be able to sort
31
- // in canonical order and is not safe to emit.
32
- buffer : Vec < ( u8 , char ) > ,
33
- ready : usize ,
29
+ // It's divided into up to three sections:
30
+ // 1) A prefix that is free space;
31
+ // 2) "Ready" characters which are sorted and ready to emit on demand;
32
+ // 3) A "pending" block which stills needs more characters for us to be able
33
+ // to sort in canonical order and is not safe to emit.
34
+ buffer : SmallVec < [ ( u8 , char ) ; 4 ] > ,
35
+ ready : Range < usize > ,
34
36
}
35
37
36
38
#[ inline]
37
39
pub fn new_canonical < I : Iterator < Item =char > > ( iter : I ) -> Decompositions < I > {
38
40
Decompositions {
39
41
kind : self :: DecompositionType :: Canonical ,
40
42
iter : iter,
41
- done : false ,
42
- buffer : Vec :: new ( ) ,
43
- ready : 0 ,
43
+ buffer : SmallVec :: new ( ) ,
44
+ ready : 0 ..0 ,
44
45
}
45
46
}
46
47
@@ -49,41 +50,50 @@ pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
49
50
Decompositions {
50
51
kind : self :: DecompositionType :: Compatible ,
51
52
iter : iter,
52
- done : false ,
53
- buffer : Vec :: new ( ) ,
54
- ready : 0 ,
53
+ buffer : SmallVec :: new ( ) ,
54
+ ready : 0 ..0 ,
55
55
}
56
56
}
57
57
58
58
impl < I > Decompositions < I > {
59
59
#[ inline]
60
60
fn push_back ( & mut self , ch : char ) {
61
61
let class = super :: char:: canonical_combining_class ( ch) ;
62
+
62
63
if class == 0 {
63
64
self . sort_pending ( ) ;
64
65
}
66
+
65
67
self . buffer . push ( ( class, ch) ) ;
66
68
}
67
69
68
70
#[ inline]
69
71
fn sort_pending ( & mut self ) {
70
- if self . ready == 0 && self . buffer . is_empty ( ) {
71
- return ;
72
- }
73
-
74
72
// NB: `sort_by_key` is stable, so it will preserve the original text's
75
73
// order within a combining class.
76
- self . buffer [ self . ready ..] . sort_by_key ( |k| k. 0 ) ;
77
- self . ready = self . buffer . len ( ) ;
74
+ self . buffer [ self . ready . end . .] . sort_by_key ( |k| k. 0 ) ;
75
+ self . ready . end = self . buffer . len ( ) ;
78
76
}
79
77
80
78
#[ inline]
81
- fn pop_front ( & mut self ) -> Option < char > {
82
- if self . ready == 0 {
83
- None
79
+ fn reset_buffer ( & mut self ) {
80
+ // Equivalent to `self.buffer.drain(0..self.ready.end)` (if SmallVec
81
+ // supported this API)
82
+ let pending = self . buffer . len ( ) - self . ready . end ;
83
+ for i in 0 ..pending {
84
+ self . buffer [ i] = self . buffer [ i + self . ready . end ] ;
85
+ }
86
+ self . buffer . truncate ( pending) ;
87
+ self . ready = 0 ..0 ;
88
+ }
89
+
90
+ #[ inline]
91
+ fn increment_next_ready ( & mut self ) {
92
+ let next = self . ready . start + 1 ;
93
+ if next == self . ready . end {
94
+ self . reset_buffer ( ) ;
84
95
} else {
85
- self . ready -= 1 ;
86
- Some ( self . buffer . remove ( 0 ) . 1 )
96
+ self . ready . start = next;
87
97
}
88
98
}
89
99
}
@@ -93,21 +103,28 @@ impl<I: Iterator<Item=char>> Iterator for Decompositions<I> {
93
103
94
104
#[ inline]
95
105
fn next ( & mut self ) -> Option < char > {
96
- while self . ready == 0 && ! self . done {
106
+ while self . ready . end == 0 {
97
107
match ( self . iter . next ( ) , & self . kind ) {
98
108
( Some ( ch) , & DecompositionType :: Canonical ) => {
99
109
super :: char:: decompose_canonical ( ch, |d| self . push_back ( d) ) ;
100
- } ,
110
+ }
101
111
( Some ( ch) , & DecompositionType :: Compatible ) => {
102
112
super :: char:: decompose_compatible ( ch, |d| self . push_back ( d) ) ;
103
- } ,
113
+ }
104
114
( None , _) => {
105
- self . sort_pending ( ) ;
106
- self . done = true ;
107
- } ,
115
+ if self . buffer . is_empty ( ) {
116
+ return None ;
117
+ } else {
118
+ self . sort_pending ( ) ;
119
+ break ;
120
+ }
121
+ }
108
122
}
109
123
}
110
- self . pop_front ( )
124
+
125
+ let ( _, ch) = self . buffer [ self . ready . start ] ;
126
+ self . increment_next_ready ( ) ;
127
+ Some ( ch)
111
128
}
112
129
113
130
fn size_hint ( & self ) -> ( usize , Option < usize > ) {
0 commit comments