Skip to content

Commit 1d186dc

Browse files
authored
feat: add vector_norm, vector_dims and normalize for all vector types (#485)
fix: remove linking hacks in CI Signed-off-by: usamoi <usamoi@outlook.com>
1 parent 887a851 commit 1d186dc

31 files changed

+608
-486
lines changed

.github/workflows/psql.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ jobs:
9595
echo PGRX_TARGET_INFO_PATH_PG$VERSION=$HOME/.pgrx_binding >> "$GITHUB_ENV"
9696
- name: Build Release
9797
run: |
98-
cargo build --no-default-features --features "pg$VERSION" --release
98+
cargo build --lib --no-default-features --features "pg$VERSION" --release
9999
./tools/schema.sh --no-default-features --features "pg$VERSION" --release | expand -t 4 > ./target/vectors--$SEMVER.sql
100100
- name: Set up PostgreSQL
101101
run: |

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ jobs:
9292
echo PGRX_TARGET_INFO_PATH_PG$VERSION=$HOME/.pgrx_binding >> "$GITHUB_ENV"
9393
- name: Build
9494
run: |
95-
cargo build --no-default-features --features pg$VERSION --release --target $ARCH-unknown-linux-gnu
95+
cargo build --lib --no-default-features --features pg$VERSION --release --target $ARCH-unknown-linux-gnu
9696
./tools/schema.sh --no-default-features --features pg$VERSION --release --target $ARCH-unknown-linux-gnu | expand -t 4 > ./target/vectors--$SEMVER.sql
9797
- name: Package
9898
run: |

.github/workflows/rust.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ jobs:
100100
- name: Clippy
101101
run: cargo clippy --no-default-features --features "pg$VERSION" --target $ARCH-unknown-linux-gnu
102102
- name: Build
103-
run: cargo build --no-default-features --features "pg$VERSION" --target $ARCH-unknown-linux-gnu
103+
run: cargo build --lib --no-default-features --features "pg$VERSION" --target $ARCH-unknown-linux-gnu
104104
- name: Post Set up Cache
105105
uses: actions/cache/save@v4
106106
if: ${{ !steps.cache.outputs.cache-hit }}

crates/base/src/scalar/f32.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use super::ScalarLike;
2+
use num_traits::Zero;
23
use serde::{Deserialize, Serialize};
34
use std::cmp::Ordering;
45
use std::fmt::{Debug, Display};
@@ -65,7 +66,7 @@ unsafe impl bytemuck::Zeroable for F32 {}
6566

6667
unsafe impl bytemuck::Pod for F32 {}
6768

68-
impl num_traits::Zero for F32 {
69+
impl Zero for F32 {
6970
fn zero() -> Self {
7071
Self(f32::zero())
7172
}
@@ -468,7 +469,7 @@ impl AddAssign<F32> for F32 {
468469

469470
impl Sum for F32 {
470471
fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
471-
iter.fold(F32(0.0), Add::add)
472+
iter.fold(F32::zero(), Add::add)
472473
}
473474
}
474475

crates/base/src/scalar/half_f16.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
use super::ScalarLike;
22
use crate::scalar::F32;
33
use half::f16;
4+
use num_traits::Zero;
45
use serde::{Deserialize, Serialize};
56
use std::cmp::Ordering;
67
use std::fmt::{Debug, Display};
8+
use std::iter::Sum;
79
use std::num::ParseFloatError;
810
use std::ops::*;
911
use std::str::FromStr;
@@ -51,7 +53,7 @@ unsafe impl bytemuck::Zeroable for F16 {}
5153

5254
unsafe impl bytemuck::Pod for F16 {}
5355

54-
impl num_traits::Zero for F16 {
56+
impl Zero for F16 {
5557
fn zero() -> Self {
5658
Self(f16::zero())
5759
}
@@ -452,6 +454,12 @@ impl AddAssign<F16> for F16 {
452454
}
453455
}
454456

457+
impl Sum for F16 {
458+
fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
459+
iter.fold(F16::zero(), Add::add)
460+
}
461+
}
462+
455463
impl Sub<F16> for F16 {
456464
type Output = F16;
457465

crates/base/src/vector/bvecf32.rs

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ impl BVecf32Owned {
1717
pub fn new(dims: u16, data: Vec<usize>) -> Self {
1818
Self::new_checked(dims, data).unwrap()
1919
}
20+
2021
#[inline(always)]
2122
pub fn new_checked(dims: u16, data: Vec<usize>) -> Option<Self> {
2223
if dims == 0 {
@@ -31,6 +32,7 @@ impl BVecf32Owned {
3132
}
3233
unsafe { Some(Self::new_unchecked(dims, data)) }
3334
}
35+
3436
/// # Safety
3537
///
3638
/// * `dims` must be in `1..=65535`.
@@ -50,24 +52,6 @@ impl BVecf32Owned {
5052
data: vec![0; size],
5153
}
5254
}
53-
54-
#[inline(always)]
55-
pub fn set(&mut self, index: usize, value: bool) {
56-
assert!(index < self.dims as usize);
57-
if value {
58-
self.data[index / BVEC_WIDTH] |= 1 << (index % BVEC_WIDTH);
59-
} else {
60-
self.data[index / BVEC_WIDTH] &= !(1 << (index % BVEC_WIDTH));
61-
}
62-
}
63-
64-
/// # Safety
65-
///
66-
/// The caller must ensure that it won't modify the padding bits
67-
#[inline(always)]
68-
pub unsafe fn data_mut(&mut self) -> &mut [usize] {
69-
&mut self.data
70-
}
7155
}
7256

7357
impl VectorOwned for BVecf32Owned {
@@ -81,13 +65,15 @@ impl VectorOwned for BVecf32Owned {
8165
self.dims as u32
8266
}
8367

68+
#[inline(always)]
8469
fn for_borrow(&self) -> BVecf32Borrowed<'_> {
8570
BVecf32Borrowed {
8671
dims: self.dims,
8772
data: &self.data,
8873
}
8974
}
9075

76+
#[inline(always)]
9177
fn to_vec(&self) -> Vec<F32> {
9278
self.for_borrow().to_vec()
9379
}
@@ -104,6 +90,7 @@ impl<'a> BVecf32Borrowed<'a> {
10490
pub fn new(dims: u16, data: &'a [usize]) -> Self {
10591
Self::new_checked(dims, data).unwrap()
10692
}
93+
10794
#[inline(always)]
10895
pub fn new_checked(dims: u16, data: &'a [usize]) -> Option<Self> {
10996
if dims == 0 {
@@ -118,6 +105,7 @@ impl<'a> BVecf32Borrowed<'a> {
118105
}
119106
unsafe { Some(Self::new_unchecked(dims, data)) }
120107
}
108+
121109
/// # Safety
122110
///
123111
/// * `dims` must be in `1..=65535`.
@@ -128,6 +116,18 @@ impl<'a> BVecf32Borrowed<'a> {
128116
Self { dims, data }
129117
}
130118

119+
#[inline(always)]
120+
pub fn data(&self) -> &'a [usize] {
121+
self.data
122+
}
123+
124+
#[inline(always)]
125+
pub fn get(&self, index: usize) -> bool {
126+
assert!(index < self.dims as usize);
127+
self.data[index / BVEC_WIDTH] & (1 << (index % BVEC_WIDTH)) != 0
128+
}
129+
130+
#[inline(always)]
131131
pub fn iter(self) -> impl Iterator<Item = bool> + 'a {
132132
let mut index = 0;
133133
std::iter::from_fn(move || {
@@ -140,16 +140,6 @@ impl<'a> BVecf32Borrowed<'a> {
140140
}
141141
})
142142
}
143-
144-
pub fn get(&self, index: usize) -> bool {
145-
assert!(index < self.dims as usize);
146-
self.data[index / BVEC_WIDTH] & (1 << (index % BVEC_WIDTH)) != 0
147-
}
148-
149-
#[inline(always)]
150-
pub fn data(&self) -> &'a [usize] {
151-
self.data
152-
}
153143
}
154144

155145
impl<'a> VectorBorrowed for BVecf32Borrowed<'a> {
@@ -168,9 +158,20 @@ impl<'a> VectorBorrowed for BVecf32Borrowed<'a> {
168158
}
169159
}
170160

161+
#[inline(always)]
171162
fn to_vec(&self) -> Vec<F32> {
172163
self.iter().map(|i| F32(i as u32 as f32)).collect()
173164
}
165+
166+
#[inline(always)]
167+
fn length(&self) -> F32 {
168+
length(*self)
169+
}
170+
171+
#[inline(always)]
172+
fn normalize(&self) -> BVecf32Owned {
173+
unimplemented!()
174+
}
174175
}
175176

176177
impl<'a> Ord for BVecf32Borrowed<'a> {

crates/base/src/vector/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ pub use vecf32::{Vecf32Borrowed, Vecf32Owned};
1111
pub use veci8::{Veci8Borrowed, Veci8Owned};
1212

1313
use crate::scalar::ScalarLike;
14+
use crate::scalar::F32;
1415
use serde::{Deserialize, Serialize};
1516

1617
#[repr(u8)]
@@ -45,6 +46,10 @@ pub trait VectorBorrowed: Copy {
4546
fn dims(&self) -> u32;
4647

4748
fn to_vec(&self) -> Vec<Self::Scalar>;
49+
50+
fn length(&self) -> F32;
51+
52+
fn normalize(&self) -> Self::Owned;
4853
}
4954

5055
#[derive(Debug, Clone, Serialize, Deserialize)]

crates/base/src/vector/svecf32.rs

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ impl SVecf32Owned {
1515
pub fn new(dims: u32, indexes: Vec<u32>, values: Vec<F32>) -> Self {
1616
Self::new_checked(dims, indexes, values).unwrap()
1717
}
18+
1819
#[inline(always)]
1920
pub fn new_checked(dims: u32, indexes: Vec<u32>, values: Vec<F32>) -> Option<Self> {
2021
if !(1..=1_048_575).contains(&dims) {
@@ -39,6 +40,7 @@ impl SVecf32Owned {
3940
}
4041
unsafe { Some(Self::new_unchecked(dims, indexes, values)) }
4142
}
43+
4244
/// # Safety
4345
///
4446
/// * `dims` must be in `1..=1_048_575`.
@@ -53,10 +55,12 @@ impl SVecf32Owned {
5355
values,
5456
}
5557
}
58+
5659
#[inline(always)]
5760
pub fn indexes(&self) -> &[u32] {
5861
&self.indexes
5962
}
63+
6064
#[inline(always)]
6165
pub fn values(&self) -> &[F32] {
6266
&self.values
@@ -74,6 +78,7 @@ impl VectorOwned for SVecf32Owned {
7478
self.dims
7579
}
7680

81+
#[inline(always)]
7782
fn for_borrow(&self) -> SVecf32Borrowed<'_> {
7883
SVecf32Borrowed {
7984
dims: self.dims,
@@ -82,6 +87,7 @@ impl VectorOwned for SVecf32Owned {
8287
}
8388
}
8489

90+
#[inline(always)]
8591
fn to_vec(&self) -> Vec<F32> {
8692
let mut dense = vec![F32::zero(); self.dims as usize];
8793
for (&index, &value) in self.indexes.iter().zip(self.values.iter()) {
@@ -103,6 +109,7 @@ impl<'a> SVecf32Borrowed<'a> {
103109
pub fn new(dims: u32, indexes: &'a [u32], values: &'a [F32]) -> Self {
104110
Self::new_checked(dims, indexes, values).unwrap()
105111
}
112+
106113
#[inline(always)]
107114
pub fn new_checked(dims: u32, indexes: &'a [u32], values: &'a [F32]) -> Option<Self> {
108115
if !(1..=1_048_575).contains(&dims) {
@@ -127,6 +134,7 @@ impl<'a> SVecf32Borrowed<'a> {
127134
}
128135
unsafe { Some(Self::new_unchecked(dims, indexes, values)) }
129136
}
137+
130138
/// # Safety
131139
///
132140
/// * `dims` must be in `1..=1_048_575`.
@@ -141,14 +149,21 @@ impl<'a> SVecf32Borrowed<'a> {
141149
values,
142150
}
143151
}
152+
144153
#[inline(always)]
145154
pub fn indexes(&self) -> &[u32] {
146155
self.indexes
147156
}
157+
148158
#[inline(always)]
149159
pub fn values(&self) -> &[F32] {
150160
self.values
151161
}
162+
163+
#[inline(always)]
164+
pub fn len(&self) -> u32 {
165+
self.indexes.len().try_into().unwrap()
166+
}
152167
}
153168

154169
impl<'a> VectorBorrowed for SVecf32Borrowed<'a> {
@@ -160,6 +175,7 @@ impl<'a> VectorBorrowed for SVecf32Borrowed<'a> {
160175
self.dims
161176
}
162177

178+
#[inline(always)]
163179
fn for_own(&self) -> SVecf32Owned {
164180
SVecf32Owned {
165181
dims: self.dims,
@@ -168,19 +184,25 @@ impl<'a> VectorBorrowed for SVecf32Borrowed<'a> {
168184
}
169185
}
170186

187+
#[inline(always)]
171188
fn to_vec(&self) -> Vec<F32> {
172189
let mut dense = vec![F32::zero(); self.dims as usize];
173190
for (&index, &value) in self.indexes.iter().zip(self.values.iter()) {
174191
dense[index as usize] = value;
175192
}
176193
dense
177194
}
178-
}
179195

180-
impl<'a> SVecf32Borrowed<'a> {
181196
#[inline(always)]
182-
pub fn len(&self) -> u32 {
183-
self.indexes.len().try_into().unwrap()
197+
fn length(&self) -> F32 {
198+
length(*self)
199+
}
200+
201+
#[inline(always)]
202+
fn normalize(&self) -> SVecf32Owned {
203+
let mut own = self.for_own();
204+
l2_normalize(&mut own);
205+
own
184206
}
185207
}
186208

0 commit comments

Comments
 (0)