@@ -8,11 +8,10 @@ use std::sync::Arc;
8
8
9
9
pub type Rank = u32 ;
10
10
11
- fn _byte_pair_merge < T > (
12
- piece : & [ u8 ] ,
11
+ fn _byte_pair_merge (
13
12
ranks : & HashMap < Vec < u8 > , Rank > ,
14
- f : impl Fn ( std :: ops :: Range < usize > ) -> T ,
15
- ) -> Vec < T > {
13
+ piece : & [ u8 ] ,
14
+ ) -> Vec < ( usize , Rank ) > {
16
15
// This is a vector of (start, rank).
17
16
// The rank is of the byte pair starting at position start.
18
17
// The rank of the last item in the vector is not a valid value.
@@ -87,25 +86,24 @@ fn _byte_pair_merge<T>(
87
86
break ;
88
87
}
89
88
}
90
- let mut out: Vec < T > = Vec :: with_capacity ( parts. len ( ) - 1 ) ;
91
- for i in 0 ..parts. len ( ) - 1 {
92
- out. push ( f ( parts[ i] . 0 ..parts[ i + 1 ] . 0 ) ) ;
93
- }
94
- out
89
+
90
+ parts
95
91
}
96
92
97
93
pub fn byte_pair_encode ( piece : & [ u8 ] , ranks : & HashMap < Vec < u8 > , Rank > ) -> Vec < Rank > {
98
- if piece. len ( ) == 1 {
99
- return vec ! [ ranks[ piece] ] ;
100
- }
101
- _byte_pair_merge ( piece, ranks, |p| ranks[ & piece[ p. start ..p. end ] ] )
94
+ assert ! ( piece. len( ) > 1 ) ;
95
+ _byte_pair_merge ( & ranks, & piece)
96
+ . windows ( 2 )
97
+ . map ( |part| ranks[ & piece[ part[ 0 ] . 0 ..part[ 1 ] . 0 ] ] )
98
+ . collect ( )
102
99
}
103
100
104
101
pub fn byte_pair_split < ' a > ( piece : & ' a [ u8 ] , ranks : & HashMap < Vec < u8 > , Rank > ) -> Vec < & ' a [ u8 ] > {
105
- if piece. len ( ) == 1 {
106
- return vec ! [ piece] ;
107
- }
108
- _byte_pair_merge ( piece, ranks, |p| & piece[ p. start ..p. end ] )
102
+ assert ! ( piece. len( ) > 1 ) ;
103
+ _byte_pair_merge ( & ranks, & piece)
104
+ . windows ( 2 )
105
+ . map ( |part| & piece[ part[ 0 ] . 0 ..part[ 1 ] . 0 ] )
106
+ . collect ( )
109
107
}
110
108
111
109
// Various performance notes:
0 commit comments