Skip to content

Commit cbf9900

Browse files
committed
refactor: use sparse struct to parse
Signed-off-by: cutecutecat <junyuchen@tensorchord.ai>
1 parent 2d6c196 commit cbf9900

File tree

2 files changed

+16
-24
lines changed

2 files changed

+16
-24
lines changed

src/datatype/text_svecf32.rs

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,24 +16,14 @@ fn _vectors_svecf32_in(input: &CStr, _oid: Oid, typmod: i32) -> SVecf32Output {
1616
.dims()
1717
.map(|x| x.get())
1818
.unwrap_or(0);
19-
let v = parse_pgvector_svector(input.to_bytes(), reserve as usize, |s| {
20-
s.parse::<F32>().ok()
21-
});
19+
let v = parse_pgvector_svector(input.to_bytes(), |s| s.parse::<F32>().ok());
2220
match v {
2321
Err(e) => {
2422
bad_literal(&e.to_string());
2523
}
26-
Ok(vector) => {
27-
check_value_dims_1048575(vector.len());
28-
let mut indexes = Vec::<u32>::new();
29-
let mut values = Vec::<F32>::new();
30-
for (i, &x) in vector.iter().enumerate() {
31-
if !x.is_zero() {
32-
indexes.push(i as u32);
33-
values.push(x);
34-
}
35-
}
36-
SVecf32Output::new(SVecf32Borrowed::new(vector.len() as u32, &indexes, &values))
24+
Ok((indexes, values, dims)) => {
25+
check_value_dims_1048575(dims);
26+
SVecf32Output::new(SVecf32Borrowed::new(dims as u32, &indexes, &values))
3727
}
3828
}
3929
}

src/utils/parse.rs

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,16 +88,16 @@ where
8888
#[inline(always)]
8989
pub fn parse_pgvector_svector<T: Zero + Clone, F>(
9090
input: &[u8],
91-
reserve: usize,
9291
f: F,
93-
) -> Result<Vec<T>, ParseVectorError>
92+
) -> Result<(Vec<u32>, Vec<T>, usize), ParseVectorError>
9493
where
9594
F: Fn(&str) -> Option<T>,
9695
{
9796
use arrayvec::ArrayVec;
9897
if input.is_empty() {
9998
return Err(ParseVectorError::EmptyString {});
10099
}
100+
let mut dims: usize = 0;
101101
let left = 'a: {
102102
for position in 0..input.len() - 1 {
103103
match input[position] {
@@ -109,7 +109,6 @@ where
109109
return Err(ParseVectorError::BadParentheses { character: '{' });
110110
};
111111
let mut token: ArrayVec<u8, 48> = ArrayVec::new();
112-
let mut capacity = reserve;
113112
let right = 'a: {
114113
for position in (1..input.len()).rev() {
115114
match input[position] {
@@ -121,7 +120,7 @@ where
121120
b'/' => {
122121
token.reverse();
123122
let s = unsafe { std::str::from_utf8_unchecked(&token[..]) };
124-
capacity = s
123+
dims = s
125124
.parse::<usize>()
126125
.map_err(|_| ParseVectorError::BadParsing { position })?;
127126
}
@@ -135,8 +134,9 @@ where
135134
}
136135
return Err(ParseVectorError::BadParentheses { character: '}' });
137136
};
138-
let mut vector = vec![T::zero(); capacity];
139-
let mut index: usize = 0;
137+
let mut indexes = Vec::<u32>::new();
138+
let mut values = Vec::<T>::new();
139+
let mut index: u32 = 0;
140140
for position in left + 1..right {
141141
let c = input[position];
142142
match c {
@@ -153,7 +153,8 @@ where
153153
// Safety: all bytes in `token` are ascii characters
154154
let s = unsafe { std::str::from_utf8_unchecked(&token[1..]) };
155155
let num = f(s).ok_or(ParseVectorError::BadParsing { position })?;
156-
vector[index] = num;
156+
indexes.push(index);
157+
values.push(num);
157158
token.clear();
158159
} else {
159160
return Err(ParseVectorError::TooShortNumber { position });
@@ -164,7 +165,7 @@ where
164165
// Safety: all bytes in `token` are ascii characters
165166
let s = unsafe { std::str::from_utf8_unchecked(&token[1..]) };
166167
index = s
167-
.parse::<usize>()
168+
.parse::<u32>()
168169
.map_err(|_| ParseVectorError::BadParsing { position })?;
169170
token.clear();
170171
} else {
@@ -180,8 +181,9 @@ where
180181
// Safety: all bytes in `token` are ascii characters
181182
let s = unsafe { std::str::from_utf8_unchecked(&token[1..]) };
182183
let num = f(s).ok_or(ParseVectorError::BadParsing { position })?;
183-
vector[index] = num;
184+
indexes.push(index);
185+
values.push(num);
184186
token.clear();
185187
}
186-
Ok(vector)
188+
Ok((indexes, values, dims))
187189
}

0 commit comments

Comments
 (0)