Skip to content

Commit 3bbeb3e

Browse files
authored
Updates (#12)
* wip * Clippy * Allow upgrading from in memory mmap to on disk mmap
1 parent 9015cb3 commit 3bbeb3e

File tree

6 files changed

+63
-23
lines changed

6 files changed

+63
-23
lines changed

.github/workflows/ci.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ jobs:
1010
build: [pinned, stable]
1111
include:
1212
- build: pinned
13-
os: ubuntu-18.04
14-
rust: 1.40.0
13+
os: ubuntu-22.04
14+
rust: 1.60.0
1515
- build: stable
16-
os: ubuntu-18.04
16+
os: ubuntu-22.04
1717
rust: stable
1818
steps:
19-
- uses: actions/checkout@v1
19+
- uses: actions/checkout@v3
2020
- name: Install Rust
2121
uses: hecrj/setup-rust-action@v1
2222
with:

Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
[package]
22
name = "mmap-bitvec"
3-
version = "0.3.1"
3+
version = "0.4.0"
44
authors = ["Roderick Bovee <roderick@onecodex.com>"]
55
autobenches = false
66
edition = "2018"
77

88
[dependencies]
9-
memmap = "0.7.0"
9+
memmap2 = "0.5"
1010
murmurhash3 = "0.0.5"
1111
once_cell = "1.3.1"
1212

1313
[dev-dependencies]
1414
criterion = "0.3"
15+
tempfile = "3.3.0"
1516

1617
[[bench]]
1718
name = "benchmark"

benches/benchmark.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
extern crate criterion;
2-
extern crate memmap;
2+
extern crate memmap2;
33
extern crate mmap_bitvec;
44

55
use std::fs::OpenOptions;
66
use std::mem::transmute;
77
use std::ops::Range;
88

9-
use memmap::{MmapMut, MmapOptions};
9+
use memmap2::{MmapMut, MmapOptions};
1010
use mmap_bitvec::{combinatorial::rank, BitVector, MmapBitVec};
1111

1212
use criterion::{criterion_group, criterion_main, Criterion};

src/bitvec.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ pub trait BitVector {
2525
}
2626

2727
fn get_range(&self, r: Range<usize>) -> u128 {
28-
if r.end - r.start > 128usize {
28+
if r.end - r.start > 128 {
2929
panic!("Range too large (>128)")
3030
} else if r.end > self.size() {
3131
panic!("Range ends outside of BitVec")
@@ -90,7 +90,7 @@ impl_bitvector!(u128, 128);
9090

9191
impl BitVector for &[u8] {
9292
fn get(&self, i: usize) -> bool {
93-
if i / 8 >= self.len() {
93+
if i / 8 >= self.size() {
9494
panic!("Invalid bit vector index");
9595
}
9696
self[i / 8] >> (8 - i % 8) & 1 == 1

src/combinatorial.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ const MARKER_TABLE_SIZE: usize = 200_000;
66

77
// TODO: replace with const fn when it is possible
88
// (for and if are not allowed in const fn on current stable)
9+
// https://github.com/rust-lang/rust/issues/87575
910
static MARKER_TABLES: Lazy<HashMap<u8, Vec<u128>>> = Lazy::new(|| {
1011
let mut m = HashMap::new();
1112
for k in 1..10u8 {
@@ -102,7 +103,7 @@ pub fn choose(n: u64, k: u8) -> u64 {
102103
#[inline]
103104
fn next_rank(marker: u128) -> u128 {
104105
if marker == 0 {
105-
panic!("WOOPS");
106+
unreachable!("WOOPS");
106107
}
107108
let t = marker | (marker - 1);
108109
(t + 1) | (((!t & (t + 1)) - 1) >> (marker.trailing_zeros() + 1))

src/mmap_bitvec.rs

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::mem::transmute;
55
use std::ops::Range;
66
use std::path::Path;
77

8-
use memmap::{Mmap, MmapMut, MmapOptions};
8+
use memmap2::{Mmap, MmapMut, MmapOptions};
99

1010
use crate::bitvec::BitVector;
1111

@@ -59,22 +59,20 @@ pub struct MmapBitVec {
5959
pub mmap: CommonMmap,
6060
pub size: usize,
6161
header: Box<[u8]>,
62+
is_anon: bool,
6263
}
6364

6465
impl MmapBitVec {
6566
/// Creates a new `MmapBitVec` file
6667
///
6768
/// The overall size of bit vector (in bits) and a fixed-size header must
6869
/// also be provided (although the header can be 0-length).
69-
pub fn create<P>(
70+
pub fn create<P: AsRef<Path>>(
7071
filename: P,
7172
size: usize,
7273
magic: [u8; 2],
7374
header: &[u8],
74-
) -> Result<Self, io::Error>
75-
where
76-
P: AsRef<Path>,
77-
{
75+
) -> Result<Self, io::Error> {
7876
assert!(
7977
header.len() < 65_536,
8078
"Headers longer than 65636 bytes not supported"
@@ -94,17 +92,18 @@ impl MmapBitVec {
9492
// file.seek(io::SeekFrom::Start(0))?;
9593

9694
file.write_all(&magic)?;
97-
let serialized_header_size: [u8; 2] = unsafe { transmute((header.len() as u16).to_be()) };
95+
let serialized_header_size: [u8; 2] = (header.len() as u16).to_be_bytes();
9896
file.write_all(&serialized_header_size)?;
9997
file.write_all(header)?;
100-
let serialized_size: [u8; 8] = unsafe { transmute((size as u64).to_be()) };
98+
let serialized_size: [u8; 8] = (size as u64).to_be_bytes();
10199
file.write_all(&serialized_size)?;
102100

103101
let mmap = unsafe { MmapOptions::new().offset(total_header_size).map_mut(&file) }?;
104102
Ok(MmapBitVec {
105103
mmap: CommonMmap::MmapMut(mmap),
106104
size,
107105
header: header.to_vec().into_boxed_slice(),
106+
is_anon: false,
108107
})
109108
}
110109

@@ -182,6 +181,7 @@ impl MmapBitVec {
182181
mmap,
183182
size: size as usize,
184183
header: header.into_boxed_slice(),
184+
is_anon: false,
185185
})
186186
}
187187

@@ -201,6 +201,7 @@ impl MmapBitVec {
201201
mmap: CommonMmap::Mmap(mmap),
202202
size: byte_size * 8,
203203
header: Box::new([]),
204+
is_anon: false,
204205
})
205206
}
206207

@@ -215,9 +216,33 @@ impl MmapBitVec {
215216
mmap: CommonMmap::MmapMut(mmap),
216217
size,
217218
header: vec![].into_boxed_slice(),
219+
is_anon: true,
218220
})
219221
}
220222

223+
/// Converts an in-memory mmap bitvector to a file-backed one.
224+
/// This is a no-op if the mmap is already file-backed.
225+
/// Returns the new mmap after flushing.
226+
pub fn into_mmap_file<P: AsRef<Path>>(
227+
self,
228+
filename: P,
229+
magic: [u8; 2],
230+
header: &[u8],
231+
) -> Result<Self, io::Error> {
232+
if !self.is_anon {
233+
return Ok(self);
234+
}
235+
let mut file_mmap = MmapBitVec::create(filename, self.size, magic, header)?;
236+
237+
// Not super efficient
238+
for i in 0..self.size {
239+
file_mmap.set(i, self.get(i));
240+
}
241+
file_mmap.mmap.flush()?;
242+
243+
Ok(file_mmap)
244+
}
245+
221246
// Returns the header
222247
pub fn header(&self) -> &[u8] {
223248
&self.header
@@ -533,10 +558,7 @@ impl BitVector for MmapBitVec {
533558
// u64 is stored in the "right" order in memory
534559
let main_chunk = (x << (128 - size_main)).to_be();
535560

536-
let bytes: [u8; 16];
537-
unsafe {
538-
bytes = transmute(main_chunk);
539-
}
561+
let bytes: [u8; 16] = main_chunk.to_le_bytes();
540562
for (byte_idx, byte) in ((byte_idx_st + 1)..byte_idx_en).zip(bytes.iter()) {
541563
unsafe {
542564
*mmap.add(byte_idx) |= *byte;
@@ -783,4 +805,20 @@ mod test {
783805
assert_eq!(b.select(1, 0), Some(7));
784806
assert_eq!(b.select(3, 0), Some(127));
785807
}
808+
809+
#[test]
810+
fn can_convert_memory_to_file() {
811+
let mut b = MmapBitVec::from_memory(128).unwrap();
812+
b.set(7, true);
813+
b.set(56, true);
814+
b.set(127, true);
815+
let dir = tempfile::tempdir().unwrap();
816+
let f = b
817+
.into_mmap_file(dir.path().join("test"), *b"!!", &[])
818+
.unwrap();
819+
assert_eq!(f.get(7), true);
820+
assert_eq!(f.get(56), true);
821+
assert_eq!(f.get(127), true);
822+
assert_eq!(f.get(10), false);
823+
}
786824
}

0 commit comments

Comments
 (0)