Skip to content

Commit a7243fb

Browse files
Keatsboydgreenfield
authored andcommitted
Remove marker_lookup feature and make it on by default
1 parent d00dc4c commit a7243fb

File tree

4 files changed

+38
-79
lines changed

4 files changed

+38
-79
lines changed

.github/workflows/ci.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,3 @@ jobs:
2525
run: rustc --version
2626
- name: Run tests
2727
run: cargo test
28-
- name: Run tests with features
29-
run: RUST_TEST_THREADS=1 cargo test --features marker_lookup

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ edition = "2018"
88
[dependencies]
99
memmap = "0.7.0"
1010
murmurhash3 = "0.0.5"
11+
once_cell = "1.3.1"
1112

1213
[features]
1314
backward_bytes = []
14-
rank_lookup = []
1515

1616
[dev-dependencies]
1717
criterion = "0.3"

benches/benchmark.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::mem::transmute;
77
use std::ops::Range;
88

99
use memmap::{MmapMut, MmapOptions};
10-
use mmap_bitvec::{BitVector, MmapBitVec};
10+
use mmap_bitvec::{combinatorial::rank, BitVector, MmapBitVec};
1111

1212
use criterion::{criterion_group, criterion_main, Criterion};
1313

src/combinatorial.rs

Lines changed: 36 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,44 @@
1+
use once_cell::sync::Lazy;
2+
use std::collections::HashMap;
13
use std::convert::TryFrom;
24

3-
#[cfg(feature = "rank_lookup")]
4-
const MARKER_TABLE_SIZE: usize = 200000;
5-
#[cfg(feature = "rank_lookup")]
6-
static mut MARKER_TABLE: [u128; MARKER_TABLE_SIZE] = [0; MARKER_TABLE_SIZE];
7-
#[cfg(feature = "rank_lookup")]
8-
static mut MARKER_BITS: u8 = 0;
9-
10-
#[cfg(feature = "rank_lookup")]
11-
pub fn rank(value: usize, k: u8) -> u128 {
12-
// note that you can only test this feature with
13-
// `RUST_TEST_THREADS=1 cargo test` or else you'll get tons of
14-
// errors because of data races between threads with different k's
15-
16-
unsafe {
17-
// clear out the lookup table if we have a new k and fill
18-
// it with values for the new k
19-
if MARKER_BITS != k {
20-
let mut table_size = MARKER_TABLE_SIZE;
21-
if k == 1 {
22-
table_size = 128;
23-
} else if k == 2 {
24-
table_size = 8128;
25-
}
26-
MARKER_TABLE = [0; MARKER_TABLE_SIZE];
27-
MARKER_TABLE[0] = (1 << k) - 1;
28-
for i in 1..table_size {
29-
MARKER_TABLE[i] = next_rank(MARKER_TABLE[i - 1]);
30-
}
31-
MARKER_BITS = k;
5+
const MARKER_TABLE_SIZE: usize = 200_000;
6+
7+
// TODO: replace with const fn when it is possible
8+
// (for and if are not allowed in const fn on current stable)
9+
static MARKER_TABLES: Lazy<HashMap<u8, Vec<u128>>> = Lazy::new(|| {
10+
let mut m = HashMap::new();
11+
for k in 1..10u8 {
12+
let mut table = vec![0u128; MARKER_TABLE_SIZE];
13+
let mut table_size = table.len();
14+
if k == 1 {
15+
table_size = 128;
16+
} else if k == 2 {
17+
table_size = 8128;
3218
}
33-
// it's possible this may overflow if value > (128 choose k) or return
34-
// a bad value (0) if value > (128 choose k) and k == 1 or 2
35-
if value as usize >= MARKER_TABLE_SIZE {
36-
let mut marker = MARKER_TABLE[MARKER_TABLE_SIZE - 1];
37-
for _ in 0..(value as usize - MARKER_TABLE_SIZE) {
38-
marker = next_rank(marker);
39-
}
40-
marker
41-
} else {
42-
MARKER_TABLE[value as usize]
19+
20+
table[0] = ((1 << k) - 1) as u128;
21+
for i in 1..table_size {
22+
table[i] = next_rank(table[i - 1]);
4323
}
24+
m.insert(k, table);
4425
}
45-
}
26+
m
27+
});
4628

47-
#[cfg(not(feature = "rank_lookup"))]
4829
pub fn rank(value: usize, k: u8) -> u128 {
49-
// set the appropriate number of bits in the marker
50-
let mut marker = (1 << k) - 1 as u128;
51-
// just step through `value` times until we find the marker we want
52-
// (this could be speed up *a lot* with some kind of lookup table)
53-
for _ in 0..value {
54-
marker = next_rank(marker)
30+
assert!(k > 0 && k < 10, "kappa needs to be less than 10");
31+
// it's possible this may overflow if value > (128 choose k) or return
32+
// a bad value (0) if value > (128 choose k) and k == 1 or 2
33+
if value as usize >= MARKER_TABLE_SIZE {
34+
let mut marker = MARKER_TABLES[&k][MARKER_TABLE_SIZE - 1];
35+
for _ in 0..(value - MARKER_TABLE_SIZE) {
36+
marker = next_rank(marker);
37+
}
38+
marker
39+
} else {
40+
MARKER_TABLES[&k][value]
5541
}
56-
marker
57-
// note that we could potentially calculate this much faster for larger ranks
58-
// by analytically solving for the bit positions (see following for an example)
59-
//
60-
// this requires solving the equation `choose(x, n_bits) == rank` to find the
61-
// first bit position (x) and then updating rank and n_bits and solving again
62-
63-
// from scipy.special import comb
64-
//
65-
// def rank(rank: int, bits: int) -> int:
66-
// cur_rank = rank
67-
// marker = 0
68-
// for i in range(bits, 0, -1):
69-
// # there's probably a better way to do this by skipping more values?
70-
// # maybe something with b < log_i(factorial(i) * cur_rank) + i + 1 ?
71-
// # (i think i inverted the binomial properly, but please check)
72-
//
73-
// # (right now we use the fact that bit n has to be in at least the nth position)
74-
// b = bits
75-
// while comb(b, i) <= cur_rank:
76-
// b += 1
77-
// b -= 1
78-
// marker += 2**b
79-
// cur_rank -= comb(b, bits)
80-
// return marker
81-
//
82-
// assert rank(70, 2) == 4112 # 1000000010000
83-
// assert rank(70, 3) == 304 # 100110000
8442
}
8543

8644
pub fn unrank(marker: u128) -> usize {
@@ -143,6 +101,9 @@ pub fn choose(n: u64, k: u8) -> u64 {
143101

144102
#[inline]
145103
fn next_rank(marker: u128) -> u128 {
104+
if marker == 0 {
105+
panic!("WOOPS");
106+
}
146107
let t = marker | (marker - 1);
147108
(t + 1) | (((!t & (t + 1)) - 1) >> (marker.trailing_zeros() + 1))
148109
}

0 commit comments

Comments
 (0)