Skip to content

Commit 712c331

Browse files
author
Roderick Bovee
committed
Move combinatorial fns to mmap-bitvec and assume markers are 128 bits
1 parent 3908024 commit 712c331

File tree

5 files changed

+20
-173
lines changed

5 files changed

+20
-173
lines changed

Cargo.toml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
11
[package]
22
name = "bfield"
3-
version = "0.1.3"
3+
version = "0.2.0"
44
authors = ["Roderick Bovee <roderick@onecodex.com>"]
55
edition = "2018"
66

77
[dependencies]
88
bincode = "0.9.2"
9-
mmap-bitvec = { git="ssh://git@github.com/onecodex/mmap-bitvec.git", tag="v0.1.1" }
9+
mmap-bitvec = { git="ssh://git@github.com/onecodex/mmap-bitvec.git", tag="v0.3.0" }
1010
murmurhash3 = "0.0.5"
1111
serde = "1.0.15"
1212
serde_derive = "1.0.15"
1313
serde_json = { version = "1.0.4", optional = true}
1414

1515
[features]
16-
marker_lookup = []
16+
marker_lookup = ["mmap-bitvec/rank_lookup"]
1717
prefetching = []
1818
legacy = ["mmap-bitvec/backward_bytes", "serde_json"]
19-
wide_markers = ["mmap-bitvec/u128"]

src/bfield.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
use std::io;
22
use std::path::Path;
33

4+
use mmap_bitvec::combinatorial::rank;
45
use serde::de::DeserializeOwned;
56
use serde::Serialize;
67

78
use crate::bfield_member::{BFieldLookup, BFieldMember, BFieldVal};
8-
use crate::marker::to_marker;
99

1010
pub struct BField<T> {
1111
members: Vec<BFieldMember<T>>,
@@ -59,7 +59,7 @@ impl<'a, T: Clone + DeserializeOwned + Serialize> BField<T> {
5959

6060
// Initialize our marker table, so we don't
6161
// have any race conditions across threads
62-
let _ = to_marker(0, n_marker_bits);
62+
let _ = rank(0, n_marker_bits);
6363

6464
Ok(BField {
6565
members,

src/bfield_member.rs

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,14 @@ use std::io;
77
use std::path::Path;
88

99
use bincode::{deserialize, serialize, Infinite};
10-
use mmap_bitvec::{BitVecSlice, BitVector, MmapBitVec};
10+
use mmap_bitvec::combinatorial::{rank, unrank};
11+
use mmap_bitvec::{BitVector, MmapBitVec};
1112
use murmurhash3::murmurhash3_x64_128;
1213
use serde::de::DeserializeOwned;
1314
use serde::Serialize;
1415
#[cfg(feature = "legacy")]
1516
use serde_json;
1617

17-
use crate::marker::{from_marker, to_marker};
18-
1918
#[derive(Debug, Deserialize, Serialize)]
2019
pub(crate) struct BFieldParams<T> {
2120
n_hashes: u8, // k
@@ -59,7 +58,7 @@ impl<T: Clone + DeserializeOwned + Serialize> BFieldMember<T> {
5958
};
6059

6160
let header: Vec<u8> = serialize(&bf_params, Infinite).unwrap();
62-
let bv = MmapBitVec::create(filename, size, &BF_MAGIC, &header)?;
61+
let bv = MmapBitVec::create(filename, size, BF_MAGIC, &header)?;
6362

6463
Ok(BFieldMember {
6564
bitvec: bv,
@@ -136,11 +135,11 @@ impl<T: Clone + DeserializeOwned + Serialize> BFieldMember<T> {
136135
// TODO: need to do a check that `value` < allowable range based on
137136
// self.params.marker_width and self.params.n_marker_bits
138137
let k = self.params.n_marker_bits;
139-
self.insert_raw(key, to_marker(value, k));
138+
self.insert_raw(key, rank(value as usize, k));
140139
}
141140

142141
#[inline]
143-
fn insert_raw(&mut self, key: &[u8], marker: BitVecSlice) {
142+
fn insert_raw(&mut self, key: &[u8], marker: u128) {
144143
let marker_width = self.params.marker_width as usize;
145144
let hash = murmurhash3_x64_128(key, 0);
146145
let aligned_marker = align_bits(marker, marker_width);
@@ -160,7 +159,7 @@ impl<T: Clone + DeserializeOwned + Serialize> BFieldMember<T> {
160159
/// the correct value; `false` if masking occured or if it was already
161160
/// indeterminate.
162161
pub fn mask_or_insert(&mut self, key: &[u8], value: BFieldVal) -> bool {
163-
let correct_marker = to_marker(value, self.params.n_marker_bits);
162+
let correct_marker = rank(value as usize, self.params.n_marker_bits);
164163
let k = u32::from(self.params.n_marker_bits);
165164
let existing_marker = self.get_raw(key, k);
166165

@@ -198,16 +197,16 @@ impl<T: Clone + DeserializeOwned + Serialize> BFieldMember<T> {
198197
let putative_marker = self.get_raw(key, k);
199198
match putative_marker.count_ones().cmp(&k) {
200199
Ordering::Greater => BFieldLookup::Indeterminate,
201-
Ordering::Equal => BFieldLookup::Some(from_marker(putative_marker)),
200+
Ordering::Equal => BFieldLookup::Some(unrank(putative_marker) as u32),
202201
Ordering::Less => BFieldLookup::None,
203202
}
204203
}
205204

206205
#[inline]
207-
fn get_raw(&self, key: &[u8], k: u32) -> BitVecSlice {
206+
fn get_raw(&self, key: &[u8], k: u32) -> u128 {
208207
let marker_width = self.params.marker_width as usize;
209208
let hash = murmurhash3_x64_128(key, 0);
210-
let mut merged_marker = BitVecSlice::max_value();
209+
let mut merged_marker = u128::max_value();
211210
let mut positions: [usize; 16] = [0; 16]; // support up to 16 hashes
212211
for marker_ix in 0usize..self.params.n_hashes as usize {
213212
let pos = marker_pos(hash, marker_ix, self.bitvec.size(), marker_width);
@@ -217,7 +216,8 @@ impl<T: Clone + DeserializeOwned + Serialize> BFieldMember<T> {
217216
if cfg!(feature = "prefetching") {
218217
unsafe {
219218
let byte_idx_st = (pos >> 3) as usize;
220-
let ptr: *const u8 = self.bitvec.mmap.as_ptr().offset(byte_idx_st as isize);
219+
#[allow(unused_variables)]
220+
let ptr: *const u8 = self.bitvec.mmap.as_ptr().add(byte_idx_st);
221221
#[cfg(feature = "prefetching")]
222222
intrinsics::prefetch_read_data(ptr, 3);
223223
}
@@ -247,17 +247,17 @@ impl<T: Clone + DeserializeOwned + Serialize> BFieldMember<T> {
247247

248248
#[cfg(not(feature = "legacy"))]
249249
#[inline]
250-
fn align_bits(b: BitVecSlice, _: usize) -> BitVecSlice {
250+
fn align_bits(b: u128, _: usize) -> u128 {
251251
// everything is normal if we're not in legacy mode (this is a noop)
252252
b
253253
}
254254

255255
#[cfg(feature = "legacy")]
256256
#[inline]
257-
fn align_bits(b: BitVecSlice, len: usize) -> BitVecSlice {
257+
fn align_bits(b: u128, len: usize) -> u128 {
258258
// we need to reverse the bits (everything is backwards at both the byte
259259
// and the marker level in the existing nim implementation)
260-
let mut new_b = 0 as BitVecSlice;
260+
let mut new_b = 0u128;
261261
for i in 0..len {
262262
new_b |= (b & (1 << (len - i - 1))) >> (len - i - 1) << i;
263263
}

src/lib.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ extern crate serde_derive;
66

77
mod bfield;
88
mod bfield_member;
9-
mod marker;
109

11-
pub use crate::marker::choose;
1210
pub use crate::bfield::BField;
1311
pub use crate::bfield_member::BFieldVal;
12+
pub use mmap_bitvec::combinatorial::choose;

src/marker.rs

Lines changed: 0 additions & 151 deletions
This file was deleted.

0 commit comments

Comments
 (0)