Skip to content

Commit 97ce096

Browse files
feat: stand alone test (#448)
* add stand alone test Signed-off-by: whateveraname <12011319@mail.sustech.edu.cn> * update .gitignore Signed-off-by: whateveraname <12011319@mail.sustech.edu.cn> * change data path Signed-off-by: whateveraname <12011319@mail.sustech.edu.cn> * update data path Signed-off-by: whateveraname <12011319@mail.sustech.edu.cn> * cargo fmt Signed-off-by: whateveraname <12011319@mail.sustech.edu.cn> * clean code Signed-off-by: whateveraname <12011319@mail.sustech.edu.cn> * taplo fmt Signed-off-by: whateveraname <12011319@mail.sustech.edu.cn> * gate code with cargo feature Signed-off-by: whateveraname <12011319@mail.sustech.edu.cn> * clean Cargo.toml and add CLI for stand-alone-test Signed-off-by: whateveraname <12011319@mail.sustech.edu.cn> * remove feature from package stand-alone-test Signed-off-by: whateveraname <12011319@mail.sustech.edu.cn> * cleanup Signed-off-by: usamoi <usamoi@outlook.com> --------- Signed-off-by: whateveraname <12011319@mail.sustech.edu.cn> Signed-off-by: usamoi <usamoi@outlook.com> Co-authored-by: usamoi <usamoi@outlook.com>
1 parent 6fc8708 commit 97ce096

File tree

13 files changed

+1087
-131
lines changed

13 files changed

+1087
-131
lines changed

Cargo.lock

Lines changed: 68 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/base/src/vector/bvecf32.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,7 @@ pub fn l2_normalize<'a>(vector: BVecf32Borrowed<'a>) -> Vecf32Owned {
530530
#[cfg(all(target_arch = "x86_64", test))]
531531
fn random_bvector() -> BVecf32Owned {
532532
let mut x = vec![0; 126];
533-
x.fill_with(|| rand::random());
533+
x.fill_with(rand::random);
534534
x[125] &= 1;
535535
BVecf32Owned::new(8001, x)
536536
}

crates/hnsw/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ name = "hnsw"
33
version.workspace = true
44
edition.workspace = true
55

6+
[features]
7+
stand-alone-test = []
8+
69
[dependencies]
710
bytemuck.workspace = true
811
parking_lot.workspace = true

crates/hnsw/src/lib.rs

Lines changed: 61 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
#![feature(trait_alias)]
22
#![allow(clippy::len_without_is_empty)]
33

4+
pub mod visited;
5+
46
use base::index::*;
57
use base::operator::*;
68
use base::scalar::F32;
79
use base::search::*;
810
use bytemuck::{Pod, Zeroable};
911
use common::dir_ops::sync_dir;
1012
use common::mmap_array::MmapArray;
11-
use parking_lot::{Mutex, RwLock, RwLockWriteGuard};
13+
use parking_lot::{RwLock, RwLockWriteGuard};
1214
use quantization::operator::OperatorQuantization;
1315
use quantization::Quantization;
1416
use rayon::iter::{IntoParallelIterator, ParallelIterator};
@@ -20,6 +22,7 @@ use std::path::Path;
2022
use std::sync::Arc;
2123
use storage::operator::OperatorStorage;
2224
use storage::StorageCollection;
25+
use visited::{VisitedGuard, VisitedPool};
2326

2427
pub trait OperatorHnsw = Operator + OperatorQuantization + OperatorStorage;
2528

@@ -28,6 +31,11 @@ pub struct Hnsw<O: OperatorHnsw> {
2831
}
2932

3033
impl<O: OperatorHnsw> Hnsw<O> {
34+
#[cfg(feature = "stand-alone-test")]
35+
pub fn new(mmap: HnswMmap<O>) -> Self {
36+
Self { mmap }
37+
}
38+
3139
pub fn create<S: Source<O>>(path: &Path, options: IndexOptions, source: &S) -> Self {
3240
create_dir(path).unwrap();
3341
let ram = make(path, options, source);
@@ -86,22 +94,41 @@ pub struct HnswRam<O: OperatorHnsw> {
8694
visited: VisitedPool,
8795
}
8896

89-
struct HnswRamGraph {
90-
vertexs: Vec<HnswRamVertex>,
97+
impl<O: OperatorHnsw> HnswRam<O> {
98+
#[cfg(feature = "stand-alone-test")]
99+
pub fn new(
100+
storage: Arc<StorageCollection<O>>,
101+
quantization: Quantization<O, StorageCollection<O>>,
102+
m: u32,
103+
graph: HnswRamGraph,
104+
visited: VisitedPool,
105+
) -> Self {
106+
Self {
107+
storage,
108+
quantization,
109+
m,
110+
graph,
111+
visited,
112+
}
113+
}
114+
}
115+
116+
pub struct HnswRamGraph {
117+
pub vertexs: Vec<HnswRamVertex>,
91118
}
92119

93-
struct HnswRamVertex {
94-
layers: Vec<RwLock<HnswRamLayer>>,
120+
pub struct HnswRamVertex {
121+
pub layers: Vec<RwLock<HnswRamLayer>>,
95122
}
96123

97124
impl HnswRamVertex {
98-
fn levels(&self) -> u8 {
125+
pub fn levels(&self) -> u8 {
99126
self.layers.len() as u8 - 1
100127
}
101128
}
102129

103-
struct HnswRamLayer {
104-
edges: Vec<(F32, u32)>,
130+
pub struct HnswRamLayer {
131+
pub edges: Vec<(F32, u32)>,
105132
}
106133

107134
pub struct HnswMmap<O: OperatorHnsw> {
@@ -117,8 +144,31 @@ pub struct HnswMmap<O: OperatorHnsw> {
117144
visited: VisitedPool,
118145
}
119146

147+
impl<O: OperatorHnsw> HnswMmap<O> {
148+
#[cfg(feature = "stand-alone-test")]
149+
pub fn new(
150+
storage: Arc<StorageCollection<O>>,
151+
quantization: Quantization<O, StorageCollection<O>>,
152+
m: u32,
153+
edges: MmapArray<HnswMmapEdge>,
154+
by_layer_id: MmapArray<usize>,
155+
by_vertex_id: MmapArray<usize>,
156+
visited: VisitedPool,
157+
) -> Self {
158+
Self {
159+
storage,
160+
quantization,
161+
m,
162+
edges,
163+
by_layer_id,
164+
by_vertex_id,
165+
visited,
166+
}
167+
}
168+
}
169+
120170
#[derive(Debug, Clone, Copy, Default)]
121-
struct HnswMmapEdge(#[allow(dead_code)] F32, u32);
171+
pub struct HnswMmapEdge(#[allow(dead_code)] F32, u32);
122172
// we may convert a memory-mapped graph to a memory graph
123173
// so that it speeds merging sealed segments
124174

@@ -574,7 +624,7 @@ pub fn local_search_vbase<'a, O: OperatorHnsw>(
574624
})
575625
}
576626

577-
fn count_layers_of_a_vertex(m: u32, i: u32) -> u8 {
627+
pub fn count_layers_of_a_vertex(m: u32, i: u32) -> u8 {
578628
let mut x = i + 1;
579629
let mut ans = 1;
580630
while x % m == 0 {
@@ -584,7 +634,7 @@ fn count_layers_of_a_vertex(m: u32, i: u32) -> u8 {
584634
ans
585635
}
586636

587-
fn count_max_edges_of_a_layer(m: u32, j: u8) -> u32 {
637+
pub fn count_max_edges_of_a_layer(m: u32, j: u8) -> u32 {
588638
if j == 0 {
589639
m * 2
590640
} else {
@@ -610,123 +660,6 @@ fn find_edges<O: OperatorHnsw>(mmap: &HnswMmap<O>, u: u32, level: u8) -> &[HnswM
610660
&mmap.edges[index]
611661
}
612662

613-
struct VisitedPool {
614-
n: u32,
615-
locked_buffers: Mutex<Vec<VisitedBuffer>>,
616-
}
617-
618-
impl VisitedPool {
619-
pub fn new(n: u32) -> Self {
620-
Self {
621-
n,
622-
locked_buffers: Mutex::new(Vec::new()),
623-
}
624-
}
625-
pub fn fetch(&self) -> VisitedGuard {
626-
let buffer = self
627-
.locked_buffers
628-
.lock()
629-
.pop()
630-
.unwrap_or_else(|| VisitedBuffer::new(self.n as _));
631-
VisitedGuard { buffer, pool: self }
632-
}
633-
634-
fn fetch2(&self) -> VisitedGuardChecker {
635-
let mut buffer = self
636-
.locked_buffers
637-
.lock()
638-
.pop()
639-
.unwrap_or_else(|| VisitedBuffer::new(self.n as _));
640-
{
641-
buffer.version = buffer.version.wrapping_add(1);
642-
if buffer.version == 0 {
643-
buffer.data.fill(0);
644-
}
645-
}
646-
VisitedGuardChecker { buffer, pool: self }
647-
}
648-
}
649-
650-
struct VisitedGuard<'a> {
651-
buffer: VisitedBuffer,
652-
pool: &'a VisitedPool,
653-
}
654-
655-
impl<'a> VisitedGuard<'a> {
656-
fn fetch(&mut self) -> VisitedChecker<'_> {
657-
self.buffer.version = self.buffer.version.wrapping_add(1);
658-
if self.buffer.version == 0 {
659-
self.buffer.data.fill(0);
660-
}
661-
VisitedChecker {
662-
buffer: &mut self.buffer,
663-
}
664-
}
665-
}
666-
667-
impl<'a> Drop for VisitedGuard<'a> {
668-
fn drop(&mut self) {
669-
let src = VisitedBuffer {
670-
version: 0,
671-
data: Vec::new(),
672-
};
673-
let buffer = std::mem::replace(&mut self.buffer, src);
674-
self.pool.locked_buffers.lock().push(buffer);
675-
}
676-
}
677-
678-
struct VisitedChecker<'a> {
679-
buffer: &'a mut VisitedBuffer,
680-
}
681-
682-
impl<'a> VisitedChecker<'a> {
683-
fn check(&mut self, i: u32) -> bool {
684-
self.buffer.data[i as usize] != self.buffer.version
685-
}
686-
fn mark(&mut self, i: u32) {
687-
self.buffer.data[i as usize] = self.buffer.version;
688-
}
689-
}
690-
691-
struct VisitedGuardChecker<'a> {
692-
buffer: VisitedBuffer,
693-
pool: &'a VisitedPool,
694-
}
695-
696-
impl<'a> VisitedGuardChecker<'a> {
697-
fn check(&mut self, i: u32) -> bool {
698-
self.buffer.data[i as usize] != self.buffer.version
699-
}
700-
fn mark(&mut self, i: u32) {
701-
self.buffer.data[i as usize] = self.buffer.version;
702-
}
703-
}
704-
705-
impl<'a> Drop for VisitedGuardChecker<'a> {
706-
fn drop(&mut self) {
707-
let src = VisitedBuffer {
708-
version: 0,
709-
data: Vec::new(),
710-
};
711-
let buffer = std::mem::replace(&mut self.buffer, src);
712-
self.pool.locked_buffers.lock().push(buffer);
713-
}
714-
}
715-
716-
struct VisitedBuffer {
717-
version: usize,
718-
data: Vec<usize>,
719-
}
720-
721-
impl VisitedBuffer {
722-
fn new(capacity: usize) -> Self {
723-
Self {
724-
version: 0,
725-
data: bytemuck::zeroed_vec(capacity),
726-
}
727-
}
728-
}
729-
730663
pub struct ElementHeap {
731664
binary_heap: BinaryHeap<Element>,
732665
k: usize,

0 commit comments

Comments
 (0)