1
1
use std:: io;
2
- use std:: path:: Path ;
2
+ use std:: path:: { Path , PathBuf } ;
3
3
4
4
use mmap_bitvec:: combinatorial:: rank;
5
5
use serde:: de:: DeserializeOwned ;
@@ -12,37 +12,42 @@ pub struct BField<T> {
12
12
read_only : bool ,
13
13
}
14
14
15
- impl < ' a , T : Clone + DeserializeOwned + Serialize > BField < T > {
15
+ // This is safe in theory, as the mmap is send+sync
16
+ unsafe impl < T > Send for BField < T > { }
17
+ unsafe impl < T > Sync for BField < T > { }
18
+
19
+ impl < T : Clone + DeserializeOwned + Serialize > BField < T > {
16
20
#[ allow( clippy:: too_many_arguments) ]
17
21
pub fn create < P > (
18
- filename : P ,
22
+ directory : P ,
23
+ filename : & str ,
19
24
size : usize ,
20
25
n_hashes : u8 , // k
21
26
marker_width : u8 , // nu
22
27
n_marker_bits : u8 , // kappa
23
28
secondary_scaledown : f64 , // beta
24
29
max_scaledown : f64 ,
25
30
n_secondaries : u8 ,
31
+ in_memory : bool ,
26
32
other_params : T ,
27
33
) -> Result < Self , io:: Error >
28
34
where
29
35
P : AsRef < Path > ,
30
36
{
37
+ debug_assert ! ( !filename. is_empty( ) ) ;
31
38
let mut cur_size = size;
32
39
let mut members = Vec :: new ( ) ;
40
+
33
41
for n in 0 ..n_secondaries {
34
- // panics if filename == ''
35
- let file = filename. as_ref ( ) . with_file_name ( Path :: with_extension (
36
- Path :: file_stem ( filename. as_ref ( ) ) . unwrap ( ) . as_ref ( ) ,
37
- format ! ( "{}.bfd" , n) ,
38
- ) ) ;
42
+ let file = directory. as_ref ( ) . join ( format ! ( "{}.{}.bfd" , filename, n) ) ;
39
43
let params = if n == 0 {
40
44
Some ( other_params. clone ( ) )
41
45
} else {
42
46
None
43
47
} ;
44
48
let member = BFieldMember :: create (
45
49
file,
50
+ in_memory,
46
51
cur_size,
47
52
n_hashes,
48
53
marker_width,
@@ -66,33 +71,58 @@ impl<'a, T: Clone + DeserializeOwned + Serialize> BField<T> {
66
71
} )
67
72
}
68
73
69
- pub fn from_file < P > ( filename : P , read_only : bool ) -> Result < Self , io:: Error >
70
- where
71
- P : AsRef < Path > ,
72
- {
74
+ pub fn load < P : AsRef < Path > > ( main_db_path : P , read_only : bool ) -> Result < Self , io:: Error > {
73
75
let mut members = Vec :: new ( ) ;
74
76
let mut n = 0 ;
77
+
78
+ let main_db_filename = match main_db_path. as_ref ( ) . file_name ( ) {
79
+ Some ( p) => p. to_string_lossy ( ) ,
80
+ None => {
81
+ return Err ( io:: Error :: new (
82
+ io:: ErrorKind :: NotFound ,
83
+ format ! ( "Couldn't get filename from {:?}" , main_db_path. as_ref( ) ) ,
84
+ ) ) ;
85
+ }
86
+ } ;
87
+ assert ! ( main_db_path. as_ref( ) . parent( ) . is_some( ) ) ;
88
+ assert ! ( main_db_filename. ends_with( "0.bfd" ) ) ;
89
+
75
90
loop {
76
- let member_filename = filename. as_ref ( ) . with_file_name ( Path :: with_extension (
77
- Path :: file_stem ( filename. as_ref ( ) ) . unwrap ( ) . as_ref ( ) ,
78
- format ! ( "{}.bfd" , n) ,
79
- ) ) ;
80
- if !member_filename. exists ( ) {
91
+ let member_filename =
92
+ PathBuf :: from ( & main_db_filename. replace ( "0.bfd" , & format ! ( "{n}.bfd" ) ) ) ;
93
+ let member_path = main_db_path
94
+ . as_ref ( )
95
+ . parent ( )
96
+ . unwrap ( )
97
+ . join ( member_filename) ;
98
+ if !member_path. exists ( ) {
81
99
break ;
82
100
}
83
- let member = BFieldMember :: open ( & member_filename , read_only) ?;
101
+ let member = BFieldMember :: open ( & member_path , read_only) ?;
84
102
members. push ( member) ;
85
103
n += 1 ;
86
104
}
105
+
87
106
if members. is_empty ( ) {
88
107
return Err ( io:: Error :: new (
89
108
io:: ErrorKind :: NotFound ,
90
- format ! ( "No Bfield found at {:?}" , filename . as_ref( ) ) ,
109
+ format ! ( "No Bfield found at {:?}" , main_db_path . as_ref( ) ) ,
91
110
) ) ;
92
111
}
93
112
Ok ( BField { members, read_only } )
94
113
}
95
114
115
+ pub fn persist_to_disk ( self ) -> Result < Self , io:: Error > {
116
+ let mut members = Vec :: with_capacity ( self . members . len ( ) ) ;
117
+ for m in self . members {
118
+ members. push ( m. persist_to_disk ( ) ?) ;
119
+ }
120
+ Ok ( Self {
121
+ members,
122
+ read_only : self . read_only ,
123
+ } )
124
+ }
125
+
96
126
pub fn build_params ( & self ) -> ( u8 , u8 , u8 , Vec < usize > ) {
97
127
let ( _, n_hashes, marker_width, n_marker_bits) = self . members [ 0 ] . info ( ) ;
98
128
let sizes = self . members . iter ( ) . map ( |i| i. info ( ) . 0 ) . collect ( ) ;
@@ -117,36 +147,36 @@ impl<'a, T: Clone + DeserializeOwned + Serialize> BField<T> {
117
147
/// of the b-field by making them indeterminate (which will make them fall
118
148
/// back to the secondaries where they don't exist and thus it'll appear
119
149
/// as if they were never inserted to begin with)
120
- pub fn force_insert ( & mut self , key : & [ u8 ] , value : BFieldVal ) {
150
+ pub fn force_insert ( & self , key : & [ u8 ] , value : BFieldVal ) {
121
151
debug_assert ! ( !self . read_only, "Can't insert into read_only bfields" ) ;
122
- for secondary in self . members . iter_mut ( ) {
123
- if secondary. mask_or_insert ( & key, value) {
152
+ for secondary in & self . members {
153
+ if secondary. mask_or_insert ( key, value) {
124
154
break ;
125
155
}
126
156
}
127
157
}
128
158
129
- pub fn insert ( & mut self , key : & [ u8 ] , value : BFieldVal , pass : usize ) -> bool {
159
+ pub fn insert ( & self , key : & [ u8 ] , value : BFieldVal , pass : usize ) -> bool {
130
160
debug_assert ! ( !self . read_only, "Can't insert into read_only bfields" ) ;
131
161
debug_assert ! (
132
162
pass < self . members. len( ) ,
133
163
"Can't have more passes than bfield members"
134
164
) ;
135
165
if pass > 0 {
136
166
for secondary in self . members [ ..pass] . iter ( ) {
137
- match secondary. get ( & key) {
167
+ match secondary. get ( key) {
138
168
BFieldLookup :: Indeterminate => continue ,
139
169
_ => return false ,
140
170
}
141
171
}
142
172
}
143
- self . members [ pass] . insert ( & key, value) ;
173
+ self . members [ pass] . insert ( key, value) ;
144
174
true
145
175
}
146
176
147
177
pub fn get ( & self , key : & [ u8 ] ) -> Option < BFieldVal > {
148
178
for secondary in self . members . iter ( ) {
149
- match secondary. get ( & key) {
179
+ match secondary. get ( key) {
150
180
BFieldLookup :: Indeterminate => continue ,
151
181
BFieldLookup :: Some ( value) => return Some ( value) ,
152
182
BFieldLookup :: None => return None ,
@@ -161,3 +191,90 @@ impl<'a, T: Clone + DeserializeOwned + Serialize> BField<T> {
161
191
self . members . iter ( ) . map ( |m| m. info ( ) ) . collect ( )
162
192
}
163
193
}
194
+
195
+ #[ cfg( test) ]
196
+ mod tests {
197
+ use super :: * ;
198
+
199
+ #[ test]
200
+ fn can_build_and_query_file_bfield ( ) {
201
+ let tmp_dir = tempfile:: tempdir ( ) . unwrap ( ) ;
202
+ let n_secondaries = 4 ;
203
+ let bfield = BField :: create (
204
+ tmp_dir. path ( ) ,
205
+ "bfield" ,
206
+ 1_000_000 ,
207
+ 10 ,
208
+ 39 ,
209
+ 4 ,
210
+ 0.1 ,
211
+ 0.025 ,
212
+ n_secondaries,
213
+ false ,
214
+ String :: new ( ) ,
215
+ )
216
+ . expect ( "to build" ) ;
217
+
218
+ // Identity database
219
+ let max_value: u32 = 10_000 ;
220
+ for p in 0 ..n_secondaries {
221
+ for i in 0 ..max_value {
222
+ bfield. insert ( & i. to_be_bytes ( ) . to_vec ( ) , i, p as usize ) ;
223
+ }
224
+ }
225
+
226
+ for i in 0 ..max_value {
227
+ let val = bfield. get ( & i. to_be_bytes ( ) . to_vec ( ) ) . unwrap ( ) ;
228
+ assert_eq ! ( i, val) ;
229
+ }
230
+ drop ( bfield) ;
231
+
232
+ // and we can load them
233
+ let bfield = BField :: < String > :: load ( & tmp_dir. path ( ) . join ( "bfield.0.bfd" ) , true ) . unwrap ( ) ;
234
+ for i in 0 ..max_value {
235
+ let val = bfield. get ( & i. to_be_bytes ( ) . to_vec ( ) ) . unwrap ( ) ;
236
+ assert_eq ! ( i, val) ;
237
+ }
238
+ }
239
+
240
+ #[ test]
241
+ fn can_build_and_query_in_memory_bfield ( ) {
242
+ let tmp_dir = tempfile:: tempdir ( ) . unwrap ( ) ;
243
+ let n_secondaries = 4 ;
244
+ let mut bfield = BField :: create (
245
+ tmp_dir. path ( ) ,
246
+ "bfield" ,
247
+ 1_000_000 ,
248
+ 10 ,
249
+ 39 ,
250
+ 4 ,
251
+ 0.1 ,
252
+ 0.025 ,
253
+ n_secondaries,
254
+ true ,
255
+ String :: new ( ) ,
256
+ )
257
+ . expect ( "to build" ) ;
258
+
259
+ // Identity database
260
+ let max_value: u32 = 10_000 ;
261
+ for p in 0 ..n_secondaries {
262
+ for i in 0 ..max_value {
263
+ bfield. insert ( & i. to_be_bytes ( ) . to_vec ( ) , i, p as usize ) ;
264
+ }
265
+ }
266
+
267
+ for i in 0 ..max_value {
268
+ let val = bfield. get ( & i. to_be_bytes ( ) . to_vec ( ) ) . unwrap ( ) ;
269
+ assert_eq ! ( i, val) ;
270
+ }
271
+ bfield. persist_to_disk ( ) . unwrap ( ) ;
272
+ for m in & bfield. members {
273
+ assert ! ( m. filename. exists( ) ) ;
274
+ }
275
+ for i in 0 ..max_value {
276
+ let val = bfield. get ( & i. to_be_bytes ( ) . to_vec ( ) ) . unwrap ( ) ;
277
+ assert_eq ! ( i, val) ;
278
+ }
279
+ }
280
+ }
0 commit comments