Skip to content

Commit e2814a4

Browse files
Improve recall, abstract hnsw, add loader
1 parent 683d1bf commit e2814a4

File tree

3 files changed

+11
-12
lines changed

3 files changed

+11
-12
lines changed

src/indexer.rs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ use std::{
1616
};
1717
use urlencoding::{decode, encode};
1818

19-
pub type HnswIndex = Hnsw<OpenAI, Point, Lcg128Xsl64, 12, 24>;
20-
pub type HnswStorageIndex = Hnsw<OpenAI, IndexPoint, Lcg128Xsl64, 12, 24>;
19+
pub type HnswIndex = Hnsw<OpenAI, Point, Lcg128Xsl64, 24, 48>;
20+
pub type HnswStorageIndex = Hnsw<OpenAI, IndexPoint, Lcg128Xsl64, 24, 48>;
2121

2222
#[derive(Clone, Debug, PartialEq)]
2323
pub enum Point {
@@ -183,11 +183,7 @@ impl PointQuery {
183183
}
184184
}
185185

186-
pub fn search(
187-
p: &Point,
188-
num: usize,
189-
hnsw: &Hnsw<OpenAI, Point, Lcg128Xsl64, 12, 24>,
190-
) -> Result<Vec<PointQuery>, SearchError> {
186+
pub fn search(p: &Point, num: usize, hnsw: &HnswIndex) -> Result<Vec<PointQuery>, SearchError> {
191187
let mut output: Vec<_> = iter::repeat(Neighbor {
192188
index: !0,
193189
distance: !0,

src/main.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use std::path::Path;
44

55
use clap::{Parser, Subcommand, ValueEnum};
66
use hnsw::Hnsw;
7+
use indexer::serialize_index;
78
use indexer::start_indexing_from_operations;
89
use indexer::Point;
910
use indexer::{operations_to_point_operations, OpenAI};
@@ -47,7 +48,7 @@ enum Commands {
4748
key: String,
4849
#[arg(short, long)]
4950
commit: String,
50-
#[arg(short, long)]
51+
#[arg(long)]
5152
domain: String,
5253
#[arg(short, long)]
5354
directory: String,
@@ -118,7 +119,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
118119
println!(
119120
"same? {}, distance: {}",
120121
p1 == p2,
121-
OpenAI.distance(&p1, &p2)
122+
f32::from_bits(OpenAI.distance(&p1, &p2))
122123
);
123124
}
124125
Commands::Compare2 {
@@ -168,12 +169,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
168169
let mut hnsw: HnswIndex = Hnsw::new(OpenAI);
169170
let store = VectorStore::new(dirpath, size);
170171

171-
let f = File::options().read(true).create(true).open(path)?;
172+
let f = File::options().read(true).open(path)?;
172173

173174
let lines = io::BufReader::new(f).lines();
174-
175175
let opstream = &lines
176176
.map(|l| {
177+
dbg!(&l);
177178
let ro: io::Result<Operation> = serde_json::from_str(&l.unwrap())
178179
.map_err(|e| std::io::Error::new(ErrorKind::Other, e));
179180
ro
@@ -186,6 +187,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
186187
operations_to_point_operations(&domain.clone(), &store, structs, &key).await;
187188
hnsw = start_indexing_from_operations(hnsw, new_ops).unwrap();
188189
}
190+
let index_id = create_index_name(&domain, &commit);
191+
serialize_index(dirpath.to_path_buf(), &index_id, hnsw.clone()).unwrap();
189192
}
190193
}
191194

src/server.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ impl Service {
333333
commit: String,
334334
previous: Option<String>,
335335
index_id: &str,
336-
) -> (String, Hnsw<OpenAI, Point, rand_pcg::Lcg128Xsl64, 12, 24>) {
336+
) -> (String, HnswIndex) {
337337
let id = create_index_name(&domain, &commit);
338338
let mut hnsw = self
339339
.load_hnsw_for_indexing(IndexIdentifier {

0 commit comments

Comments
 (0)