Skip to content

Commit 10fd3d0

Browse files
author
Roderick Bovee
committed
Cargo fmt & clippy & setup CI
1 parent 4ccfe2e commit 10fd3d0

File tree

9 files changed

+158
-109
lines changed

9 files changed

+158
-109
lines changed

.circleci/config.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
version: 2
2+
jobs:
3+
build:
4+
docker:
5+
- image: circleci/rust:1.34
6+
steps:
7+
- checkout
8+
- run:
9+
name: Version information
10+
command: rustc --version; cargo --version; rustup --version
11+
- run:
12+
name: Calculate dependencies
13+
command: cargo generate-lockfile
14+
- restore_cache:
15+
keys:
16+
- v4-cargo-cache-{{ arch }}-{{ checksum "Cargo.lock" }}
17+
- run:
18+
name: Build all targets
19+
command: cargo build --all --all-targets
20+
- save_cache:
21+
paths:
22+
- ~/.cargo/registry
23+
- ~/.cargo/bin/cargo-tarpaulin
24+
- target/debug/.fingerprint
25+
- target/debug/build
26+
- target/debug/deps
27+
key: v4-cargo-cache-{{ arch }}-{{ checksum "Cargo.lock" }}
28+
- run:
29+
name: Format
30+
command: |
31+
rustup component add rustfmt
32+
cargo fmt -- --check
33+
- run:
34+
name: Clippy
35+
command: |
36+
rustup component add clippy
37+
cargo clippy --all --
38+
- run:
39+
name: Run all tests
40+
command: cargo test --all

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "mmap-bitvec"
3-
version = "0.1.1"
3+
version = "0.2.0"
44
authors = ["Roderick Bovee <roderick@onecodex.com>"]
55

66
[dependencies]

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# mmap-bitvec #
22

3+
[![CircleCI](https://circleci.com/gh/onecodex/mmap-bitvec.svg?style=svg&circle-token=dcb1850cbbec3e55d28cec4cb5082bb30199cf97)](https://circleci.com/gh/onecodex/mmap-bitvec)
4+
35
mmap-bitvec is a library for working with mmap-backed bit-vectors and some simple
46
data structures derived from bit-vectors.
57

benches/benchmark.rs

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,13 @@ use std::mem::transmute;
88
use std::ops::Range;
99

1010
use bencher::Bencher;
11-
use memmap::{MmapOptions, MmapMut};
12-
use mmap_bitvec::{MmapBitVec, BitVector};
13-
11+
use memmap::{MmapMut, MmapOptions};
12+
use mmap_bitvec::{BitVector, MmapBitVec};
1413

1514
type BitVecSlice = u64;
1615
const BIT_VEC_SLICE_SIZE: u8 = 64;
1716
const FILENAME: &str = "/Users/roderick/Documents/mgo_data/targeted_loci/bfield.mmap";
1817

19-
2018
// we could use an RNG, but I want to make sure everything is
2119
// as comparable as possible
2220
fn next_random(n: usize) -> usize {
@@ -36,9 +34,7 @@ fn get_range_simplified(mmap: &MmapMut, size: usize, l: usize) -> BitVecSlice {
3634
let ptr: *const u8 = mmap.as_ptr();
3735

3836
// read the last byte first
39-
let end = unsafe {
40-
*ptr.offset(byte_idx_en as isize)
41-
};
37+
let end = unsafe { *ptr.offset(byte_idx_en as isize) };
4238
// align the end of the data with the end of the u64/u128
4339
let mut v = BitVecSlice::from(end);
4440
v >>= 7 - ((l + 63) & 7);
@@ -55,8 +51,8 @@ fn get_range_simplified(mmap: &MmapMut, size: usize, l: usize) -> BitVecSlice {
5551
let bit_offset = new_size + (l & 7) as u8;
5652
for (new_idx, old_idx) in (byte_idx_st..byte_idx_en).enumerate() {
5753
unsafe {
58-
v |= BitVecSlice::from(*ptr.offset(old_idx as isize)) <<
59-
(bit_offset - 8u8 * (new_idx as u8 + 1));
54+
v |= BitVecSlice::from(*ptr.offset(old_idx as isize))
55+
<< (bit_offset - 8u8 * (new_idx as u8 + 1));
6056
}
6157
}
6258
}
@@ -85,7 +81,6 @@ fn get_range(mmap: &MmapMut, size: usize, r: Range<usize>) -> BitVecSlice {
8581
// align the end of the data with the end of the u64/u128
8682
v >>= 7 - ((r.end - 1) & 7);
8783

88-
8984
if r.start < size - BIT_VEC_SLICE_SIZE as usize {
9085
// really nasty/unsafe, but we're just reading a u64/u128 out instead of doing it
9186
// byte-wise --- also does not work with legacy mode!!!
@@ -98,8 +93,8 @@ fn get_range(mmap: &MmapMut, size: usize, r: Range<usize>) -> BitVecSlice {
9893
let bit_offset = new_size + (r.start & 7) as u8;
9994
for (new_idx, old_idx) in (byte_idx_st..byte_idx_en).enumerate() {
10095
unsafe {
101-
v |= BitVecSlice::from(*ptr.offset(old_idx as isize)) <<
102-
(bit_offset - 8u8 * (new_idx as u8 + 1));
96+
v |= BitVecSlice::from(*ptr.offset(old_idx as isize))
97+
<< (bit_offset - 8u8 * (new_idx as u8 + 1));
10398
}
10499
}
105100
}
@@ -109,11 +104,13 @@ fn get_range(mmap: &MmapMut, size: usize, r: Range<usize>) -> BitVecSlice {
109104
}
110105

111106
fn bench_get_range_simplified(bench: &mut Bencher) {
112-
let file = OpenOptions::new().read(true).write(true).open(FILENAME).unwrap();
107+
let file = OpenOptions::new()
108+
.read(true)
109+
.write(true)
110+
.open(FILENAME)
111+
.unwrap();
113112
let size = file.metadata().unwrap().len() as usize;
114-
let mmap = unsafe {
115-
MmapOptions::new().map_mut(&file).unwrap()
116-
};
113+
let mmap = unsafe { MmapOptions::new().map_mut(&file).unwrap() };
117114

118115
let mut r = 0;
119116
let mut i = 1;
@@ -127,18 +124,20 @@ fn bench_get_range_simplified(bench: &mut Bencher) {
127124
}
128125

129126
fn bench_get_range(bench: &mut Bencher) {
130-
let file = OpenOptions::new().read(true).write(true).open(FILENAME).unwrap();
127+
let file = OpenOptions::new()
128+
.read(true)
129+
.write(true)
130+
.open(FILENAME)
131+
.unwrap();
131132
let size = file.metadata().unwrap().len() as usize;
132-
let mmap = unsafe {
133-
MmapOptions::new().map_mut(&file).unwrap()
134-
};
133+
let mmap = unsafe { MmapOptions::new().map_mut(&file).unwrap() };
135134

136135
let mut r = 0;
137136
let mut i = 1;
138137
bench.iter(|| {
139138
for _ in 0..100000 {
140139
let l = i % (size - 64);
141-
r += get_range(&mmap, size, l..l+64).count_ones();
140+
r += get_range(&mmap, size, l..l + 64).count_ones();
142141
i = next_random(i);
143142
}
144143
})
@@ -151,12 +150,17 @@ fn bench_get_range_actual(bench: &mut Bencher) {
151150
bench.iter(|| {
152151
for _ in 0..100000 {
153152
let l = i % (bitvec.size() - 64);
154-
r += bitvec.get_range(l..l+64).count_ones();
153+
r += bitvec.get_range(l..l + 64).count_ones();
155154
i = next_random(i);
156155
}
157156
})
158157
}
159158

160-
benchmark_group!(get_fns, bench_get_range, bench_get_range_simplified, bench_get_range_actual);
159+
benchmark_group!(
160+
get_fns,
161+
bench_get_range,
162+
bench_get_range_simplified,
163+
bench_get_range_actual
164+
);
161165

162166
benchmark_main!(get_fns);

src/bin/test_get_range.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ extern crate mmap_bitvec;
22

33
use std::env::args;
44

5-
use mmap_bitvec::{MmapBitVec, BitVector};
5+
use mmap_bitvec::{BitVector, MmapBitVec};
66

77
// we could use an RNG, but I want to make sure everything is
88
// as comparable as possible
@@ -17,14 +17,18 @@ fn next_random(n: usize) -> usize {
1717

1818
fn main() {
1919
let filename = args().nth(1).expect("need [filename] [n_samples]");
20-
let n_samples = args().nth(2).expect("need [n_samples]").parse::<usize>().expect("n_samples must be an integer");
20+
let n_samples = args()
21+
.nth(2)
22+
.expect("need [n_samples]")
23+
.parse::<usize>()
24+
.expect("n_samples must be an integer");
2125

2226
let bitvec = MmapBitVec::open_no_header(filename, 0).unwrap();
2327
let mut r = 0;
2428
let mut i = 1;
2529
for _ in 0..n_samples {
2630
let l = i % (bitvec.size() - 64);
27-
r += bitvec.get_range(l..l+64).count_ones();
31+
r += bitvec.get_range(l..l + 64).count_ones();
2832
i = next_random(i);
2933
}
3034
println!("{}", r);

src/bin/test_get_range_fast.rs

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1-
extern crate mmap_bitvec;
21
extern crate memmap;
2+
extern crate mmap_bitvec;
33

4-
use std::fs::OpenOptions;
54
use std::env::args;
65
use std::mem::transmute;
76
use std::ops::Range;
87

9-
use mmap_bitvec::{MmapBitVec, BitVector};
10-
use memmap::{MmapOptions, MmapMut};
8+
use memmap::MmapMut;
9+
use mmap_bitvec::{BitVector, MmapBitVec};
1110

1211
// we could use an RNG, but I want to make sure everything is
1312
// as comparable as possible
@@ -20,11 +19,9 @@ fn next_random(n: usize) -> usize {
2019
x as usize
2120
}
2221

23-
2422
type BitVecSlice = u64;
2523
const BIT_VEC_SLICE_SIZE: u8 = 64;
2624

27-
2825
fn get_range(mmap: &MmapMut, size: usize, r: Range<usize>) -> BitVecSlice {
2926
if r.end - r.start > BIT_VEC_SLICE_SIZE as usize {
3027
panic!(format!("Range too large (>{})", BIT_VEC_SLICE_SIZE))
@@ -40,26 +37,26 @@ fn get_range(mmap: &MmapMut, size: usize, r: Range<usize>) -> BitVecSlice {
4037

4138
// read the last byte first
4239
unsafe {
43-
v = BitVecSlice::from(*ptr.offset(byte_idx_en as isize));
40+
v = BitVecSlice::from(*ptr.add(byte_idx_en));
4441
}
4542
// align the end of the data with the end of the u64/u128
4643
v >>= 7 - ((r.end - 1) & 7);
4744

48-
4945
if r.start < size - BIT_VEC_SLICE_SIZE as usize {
5046
// really nasty/unsafe, but we're just reading a u64/u128 out instead of doing it
5147
// byte-wise --- also does not work with legacy mode!!!
5248
unsafe {
53-
let lg_ptr: *const BitVecSlice = transmute(ptr.offset(byte_idx_st as isize));
49+
#[allow(clippy::transmute_ptr_to_ptr)]
50+
let lg_ptr: *const BitVecSlice = transmute(ptr.add(byte_idx_st));
5451
v |= (*lg_ptr).to_be() << (r.start & 7) >> (BIT_VEC_SLICE_SIZE - new_size);
5552
}
5653
} else {
5754
// special case if we can't get a whole u64 out without running outside the buffer
5855
let bit_offset = new_size + (r.start & 7) as u8;
5956
for (new_idx, old_idx) in (byte_idx_st..byte_idx_en).enumerate() {
6057
unsafe {
61-
v |= BitVecSlice::from(*ptr.offset(old_idx as isize)) <<
62-
(bit_offset - 8u8 * (new_idx as u8 + 1));
58+
v |= BitVecSlice::from(*ptr.add(old_idx))
59+
<< (bit_offset - 8u8 * (new_idx as u8 + 1));
6360
}
6461
}
6562
}
@@ -68,10 +65,13 @@ fn get_range(mmap: &MmapMut, size: usize, r: Range<usize>) -> BitVecSlice {
6865
v & (BitVecSlice::max_value() >> (BIT_VEC_SLICE_SIZE - new_size))
6966
}
7067

71-
7268
fn main() {
7369
let filename = args().nth(1).expect("need [filename] [n_samples]");
74-
let n_samples = args().nth(2).expect("need [n_samples]").parse::<usize>().expect("n_samples must be an integer");
70+
let n_samples = args()
71+
.nth(2)
72+
.expect("need [n_samples]")
73+
.parse::<usize>()
74+
.expect("n_samples must be an integer");
7575

7676
// let file = OpenOptions::new().read(true).write(true).open(filename).unwrap();
7777
// let size = (8 * file.metadata().unwrap().len()) as usize;
@@ -86,7 +86,7 @@ fn main() {
8686
let mut i = 1;
8787
for _ in 0..n_samples {
8888
let l = i % (size - 64);
89-
r += get_range(&bitvec.mmap, size, l..l+64).count_ones();
89+
r += get_range(&bitvec.mmap, size, l..l + 64).count_ones();
9090
i = next_random(i);
9191
}
9292
println!("{}", r);

src/bloom.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@ use std::path::Path;
44

55
use murmurhash3::murmurhash3_x64_128;
66

7-
use mmap_bitvec::MmapBitVec;
87
use bitvec::BitVector;
8+
use mmap_bitvec::MmapBitVec;
99

1010
// we don't want to use murmurhash3::Murmur3Hasher b/c it makes copies of the
1111
// bytes to be hashed with every single `hash` call
12+
#[derive(Default)]
1213
pub struct MurmurHasher(u64, u64);
1314

1415
impl MurmurHasher {
@@ -33,7 +34,6 @@ impl Hasher for MurmurHasher {
3334
}
3435
}
3536

36-
3737
/// A simple implementation of a Bloom filter backed by `BitVec`
3838
pub struct BloomFilter {
3939
bit_vec: MmapBitVec,
@@ -53,9 +53,9 @@ impl BloomFilter {
5353
let bitvec = match filename {
5454
Some(filename) => {
5555
if Path::exists(filename.as_ref()) {
56-
MmapBitVec::open(&filename, Some(b"!!"), false)?
56+
MmapBitVec::open(&filename, Some(b"!!"))?
5757
} else {
58-
MmapBitVec::create(&filename, bits, b"!!", &header)?
58+
MmapBitVec::create(&filename, bits, *b"!!", &header)?
5959
}
6060
}
6161
None => MmapBitVec::from_memory(bits)?,
@@ -100,6 +100,8 @@ impl BloomFilter {
100100

101101
#[cfg(test)]
102102
mod test {
103+
use super::BloomFilter;
104+
103105
#[test]
104106
fn test_bloom_filter() {
105107
use std::fs::remove_file;

src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ pub mod bloom;
1111
pub mod mmap_bitvec;
1212

1313
#[doc(inline)]
14-
pub use bitvec::{BitVector, BitVecSlice, BIT_VEC_SLICE_SIZE};
15-
#[doc(inline)]
16-
pub use mmap_bitvec::MmapBitVec;
14+
pub use bitvec::{BitVecSlice, BitVector, BIT_VEC_SLICE_SIZE};
1715
#[doc(inline)]
1816
pub use bloom::BloomFilter;
17+
#[doc(inline)]
18+
pub use mmap_bitvec::MmapBitVec;

0 commit comments

Comments
 (0)