@@ -36,12 +36,16 @@ fn clamp_01(f: f32) -> f32 {
36
36
}
37
37
38
38
fn normalize_cosine_distance ( f : f32 ) -> f32 {
39
- clamp_01 ( ( f- 1.0 ) / -2.0 )
39
+ clamp_01 ( ( f - 1.0 ) / -2.0 )
40
40
}
41
41
42
-
43
42
pub fn normalized_cosine_distance_cpu ( left : & Embedding , right : & Embedding ) -> f32 {
44
- normalize_cosine_distance ( left. iter ( ) . zip ( right. iter ( ) ) . map ( |( l, r) |l* r) . sum :: < f32 > ( ) )
43
+ normalize_cosine_distance (
44
+ left. iter ( )
45
+ . zip ( right. iter ( ) )
46
+ . map ( |( l, r) | l * r)
47
+ . sum :: < f32 > ( ) ,
48
+ )
45
49
}
46
50
47
51
#[ cfg( feature = "simd" ) ]
@@ -57,7 +61,7 @@ pub fn normalized_cosine_distance_simd(left: &Embedding, right: &Embedding) -> f
57
61
pub fn normalize_vec_cpu ( vec : & mut Embedding ) {
58
62
let mut sum = 0.0 ;
59
63
for f in vec. iter ( ) {
60
- sum += f* f;
64
+ sum += f * f;
61
65
}
62
66
let magnitude = sum. sqrt ( ) ;
63
67
//eprintln!("cpu magnitude: {}", magnitude);
@@ -89,9 +93,9 @@ pub fn normalize_vec(vec: &mut Embedding) {
89
93
90
94
#[ cfg( feature = "simd" ) ]
91
95
pub mod simd {
92
- use packed_simd:: f32x16;
93
- use aligned_box:: AlignedBox ;
94
96
use super :: * ;
97
+ use aligned_box:: AlignedBox ;
98
+ use packed_simd:: f32x16;
95
99
96
100
pub fn aligned_box ( e : Embedding ) -> AlignedBox < Embedding > {
97
101
AlignedBox :: new ( std:: mem:: align_of :: < f32x16 > ( ) , e) . unwrap ( )
@@ -115,12 +119,15 @@ pub mod simd {
115
119
}
116
120
}
117
121
118
- pub unsafe fn normalized_cosine_distance_simd_aligned_unchecked ( left : & Embedding , right : & Embedding ) -> f32 {
122
+ pub unsafe fn normalized_cosine_distance_simd_aligned_unchecked (
123
+ left : & Embedding ,
124
+ right : & Embedding ,
125
+ ) -> f32 {
119
126
//eprintln!("using {} ({} lanes)", stringify!(f32x16), 16);
120
127
let mut sum = <f32x16 >:: splat ( 0. ) ;
121
- for x in 0 ..left. len ( ) / 16 {
122
- let l = <f32x16 >:: from_slice_aligned_unchecked ( & left[ x* 16 ..( x+ 1 ) * 16 ] ) ;
123
- let r = <f32x16 >:: from_slice_aligned_unchecked ( & right[ x* 16 ..( x+ 1 ) * 16 ] ) ;
128
+ for x in 0 ..left. len ( ) / 16 {
129
+ let l = <f32x16 >:: from_slice_aligned_unchecked ( & left[ x * 16 ..( x + 1 ) * 16 ] ) ;
130
+ let r = <f32x16 >:: from_slice_aligned_unchecked ( & right[ x * 16 ..( x + 1 ) * 16 ] ) ;
124
131
sum += l * r;
125
132
}
126
133
normalize_cosine_distance ( sum. sum ( ) )
@@ -130,26 +137,27 @@ pub mod simd {
130
137
//eprintln!("using {} ({} lanes)", stringify!(f32x16), 16);
131
138
let mut sum = <f32x16 >:: splat ( 0. ) ;
132
139
let exp = <f32x16 >:: splat ( 2. ) ;
133
- for x in 0 ..vec. len ( ) / 16 {
134
- let part = <f32x16 >:: from_slice_aligned_unchecked ( & vec[ x* 16 ..( x+ 1 ) * 16 ] ) ;
135
- sum += part* part;
140
+ for x in 0 ..vec. len ( ) / 16 {
141
+ let part = <f32x16 >:: from_slice_aligned_unchecked ( & vec[ x * 16 ..( x + 1 ) * 16 ] ) ;
142
+ sum += part * part;
136
143
}
137
144
let magnitude = sum. sum ( ) . sqrt ( ) ;
138
145
//eprintln!("simd magnitude: {}", magnitude);
139
146
let magnitude = <f32x16 >:: splat ( magnitude) ;
140
147
141
- for x in 0 ..vec. len ( ) /16 {
142
- let scaled = <f32x16 >:: from_slice_aligned_unchecked ( & vec[ x* 16 ..( x+1 ) * 16 ] ) / magnitude;
143
- scaled. write_to_slice_aligned_unchecked ( & mut vec[ x* 16 ..( x+1 ) * 16 ] ) ;
148
+ for x in 0 ..vec. len ( ) / 16 {
149
+ let scaled =
150
+ <f32x16 >:: from_slice_aligned_unchecked ( & vec[ x * 16 ..( x + 1 ) * 16 ] ) / magnitude;
151
+ scaled. write_to_slice_aligned_unchecked ( & mut vec[ x * 16 ..( x + 1 ) * 16 ] ) ;
144
152
}
145
153
}
146
154
147
155
pub fn normalized_cosine_distance_simd_unaligned ( left : & Embedding , right : & Embedding ) -> f32 {
148
156
//eprintln!("using {} ({} lanes, unaligned)", stringify!(f32x16), 16);
149
157
let mut sum = <f32x16 >:: splat ( 0. ) ;
150
- for x in 0 ..left. len ( ) / 16 {
151
- let l = <f32x16 >:: from_slice_unaligned ( & left[ x* 16 ..( x+ 1 ) * 16 ] ) ;
152
- let r = <f32x16 >:: from_slice_unaligned ( & right[ x* 16 ..( x+ 1 ) * 16 ] ) ;
158
+ for x in 0 ..left. len ( ) / 16 {
159
+ let l = <f32x16 >:: from_slice_unaligned ( & left[ x * 16 ..( x + 1 ) * 16 ] ) ;
160
+ let r = <f32x16 >:: from_slice_unaligned ( & right[ x * 16 ..( x + 1 ) * 16 ] ) ;
153
161
sum += l * r;
154
162
}
155
163
normalize_cosine_distance ( sum. sum ( ) )
@@ -159,17 +167,17 @@ pub mod simd {
159
167
//eprintln!("using {} ({} lanes, unaligned)", stringify!(f32x16), 16);
160
168
let mut sum = <f32x16 >:: splat ( 0. ) ;
161
169
//let exp = <f32x16>::splat(2.);
162
- for x in 0 ..vec. len ( ) / 16 {
163
- let part = <f32x16 >:: from_slice_unaligned ( & vec[ x* 16 ..( x+ 1 ) * 16 ] ) ;
164
- sum += part* part;
170
+ for x in 0 ..vec. len ( ) / 16 {
171
+ let part = <f32x16 >:: from_slice_unaligned ( & vec[ x * 16 ..( x + 1 ) * 16 ] ) ;
172
+ sum += part * part;
165
173
}
166
174
let magnitude = sum. sum ( ) . sqrt ( ) ;
167
175
//eprintln!("simd magnitude: {}", magnitude);
168
176
let magnitude = <f32x16 >:: splat ( magnitude) ;
169
177
170
- for x in 0 ..vec. len ( ) / 16 {
171
- let scaled = <f32x16 >:: from_slice_unaligned ( & vec[ x* 16 ..( x+ 1 ) * 16 ] ) / magnitude;
172
- scaled. write_to_slice_unaligned ( & mut vec[ x* 16 ..( x+ 1 ) * 16 ] ) ;
178
+ for x in 0 ..vec. len ( ) / 16 {
179
+ let scaled = <f32x16 >:: from_slice_unaligned ( & vec[ x * 16 ..( x + 1 ) * 16 ] ) / magnitude;
180
+ scaled. write_to_slice_unaligned ( & mut vec[ x * 16 ..( x + 1 ) * 16 ] ) ;
173
181
}
174
182
}
175
183
}
@@ -178,7 +186,9 @@ pub mod simd {
178
186
mod tests {
179
187
use rand:: { rngs:: StdRng , SeedableRng } ;
180
188
181
- use crate :: vecmath:: simd:: { normalize_vec_simd_unaligned, normalized_cosine_distance_simd_unaligned} ;
189
+ use crate :: vecmath:: simd:: {
190
+ normalize_vec_simd_unaligned, normalized_cosine_distance_simd_unaligned,
191
+ } ;
182
192
183
193
use super :: * ;
184
194
#[ test]
@@ -194,14 +204,29 @@ mod tests {
194
204
normalize_vec_cpu ( & mut e1) ;
195
205
normalize_vec_simd_unaligned ( & mut e2) ;
196
206
197
- eprintln ! ( "distance (cpu): {}" , normalized_cosine_distance_cpu( & e1, & e2) ) ;
198
- eprintln ! ( "distance (simd): {}" , normalized_cosine_distance_simd_unaligned( & e1, & e2) ) ;
199
- eprintln ! ( "distance (simd same): {}" , normalized_cosine_distance_simd_unaligned( & e1, & e1) ) ;
207
+ eprintln ! (
208
+ "distance (cpu): {}" ,
209
+ normalized_cosine_distance_cpu( & e1, & e2)
210
+ ) ;
211
+ eprintln ! (
212
+ "distance (simd): {}" ,
213
+ normalized_cosine_distance_simd_unaligned( & e1, & e2)
214
+ ) ;
215
+ eprintln ! (
216
+ "distance (simd same): {}" ,
217
+ normalized_cosine_distance_simd_unaligned( & e1, & e1)
218
+ ) ;
200
219
201
220
let mut e3 = random_embedding ( & mut rng) ;
202
221
normalize_vec_cpu ( & mut e3) ;
203
- eprintln ! ( "distance (cpu): {}" , normalized_cosine_distance_cpu( & e1, & e3) ) ;
204
- eprintln ! ( "distance (simd): {}" , normalized_cosine_distance_simd_unaligned( & e1, & e3) ) ;
222
+ eprintln ! (
223
+ "distance (cpu): {}" ,
224
+ normalized_cosine_distance_cpu( & e1, & e3)
225
+ ) ;
226
+ eprintln ! (
227
+ "distance (simd): {}" ,
228
+ normalized_cosine_distance_simd_unaligned( & e1, & e3)
229
+ ) ;
205
230
206
231
assert_eq ! ( e1, e2) ;
207
232
}
0 commit comments