Skip to content

Commit 2bbeecd

Browse files
authored
Faster in-memory mmap save to disk (#14)
1 parent 45b97f5 commit 2bbeecd

File tree

4 files changed

+64
-40
lines changed

4 files changed

+64
-40
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ target
22
Cargo.lock
33
.DS_Store
44
.idea/
5-
bfield.mmap
5+
bfield.mmap
6+
hello.tmp

benches/benchmark.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,12 +150,23 @@ fn bench_get_range_actual() {
150150
}
151151
}
152152

153+
fn bench_save_to_disk(bv: &MmapBitVec) {
154+
bv.save_to_disk("hello.tmp", [0, 1], &[]).unwrap();
155+
}
156+
153157
fn criterion_benchmark(c: &mut Criterion) {
154158
c.bench_function("get_range_actual", |b| b.iter(|| bench_get_range_actual()));
155159
c.bench_function("get_range_simplified", |b| {
156160
b.iter(|| bench_get_range_simplified())
157161
});
158162
c.bench_function("get_range", |b| b.iter(|| bench_get_range()));
163+
c.bench_function("save_to_disk", |b| {
164+
let mut bitvec = MmapBitVec::from_memory(1_000_000_000).unwrap();
165+
for i in 0..1_000_000_000 {
166+
bitvec.set(i, true);
167+
}
168+
b.iter(|| bench_save_to_disk(&bitvec))
169+
});
159170
}
160171

161172
criterion_group!(benches, criterion_benchmark);

src/combinatorial.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ pub fn rank(value: usize, k: u8) -> u128 {
3636
if value as usize >= MARKER_TABLE_SIZE {
3737
let mut marker = MARKER_TABLES[&k][MARKER_TABLE_SIZE - 1];
3838
for _ in 0..(value - MARKER_TABLE_SIZE) {
39-
// next_rank would underflow if we pass 0, we return it instead
39+
// next_rank would overflow if we pass 0, we return it instead
4040
if marker == 0 {
4141
return marker;
4242
}

src/mmap_bitvec.rs

Lines changed: 50 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,15 @@ impl CommonMmap {
4747
CommonMmap::Mmap(_) => Ok(()),
4848
}
4949
}
50+
51+
/// Gets the slice
52+
#[inline]
53+
pub fn as_slice(&self) -> &[u8] {
54+
match self {
55+
CommonMmap::MmapMut(x) => x.as_ref(),
56+
CommonMmap::Mmap(x) => x.as_ref(),
57+
}
58+
}
5059
}
5160

5261
/// Bit vector backed by a mmap-ed file
@@ -69,6 +78,32 @@ pub struct MmapBitVec {
6978
is_anon: bool,
7079
}
7180

81+
fn create_bitvec_file(
82+
filename: &Path,
83+
size: usize,
84+
magic: [u8; 2],
85+
header: &[u8],
86+
) -> Result<(std::fs::File, u64), io::Error> {
87+
let byte_size = ((size - 1) >> 3) as u64 + 1;
88+
let mut file = OpenOptions::new()
89+
.read(true)
90+
.write(true)
91+
.create(true)
92+
.open(filename)?;
93+
// two magic bytes, u16 header length, header, u64 bitvec length, bitvec
94+
let total_header_size = (2 + 2 + header.len() + 8) as u64;
95+
file.set_len(total_header_size + byte_size)?;
96+
97+
file.write_all(&magic)?;
98+
let serialized_header_size: [u8; 2] = (header.len() as u16).to_be_bytes();
99+
file.write_all(&serialized_header_size)?;
100+
file.write_all(header)?;
101+
let serialized_size: [u8; 8] = (size as u64).to_be_bytes();
102+
file.write_all(&serialized_size)?;
103+
104+
Ok((file, total_header_size))
105+
}
106+
72107
impl MmapBitVec {
73108
/// Creates a new `MmapBitVec` file
74109
///
@@ -85,26 +120,7 @@ impl MmapBitVec {
85120
"Headers longer than 65636 bytes not supported"
86121
);
87122

88-
let byte_size = ((size - 1) >> 3) as u64 + 1;
89-
// if we're creating the file, we need to make sure it's bug enough for our
90-
// purposes (memmap doesn't automatically size the file)
91-
let mut file = OpenOptions::new()
92-
.read(true)
93-
.write(true)
94-
.create(true)
95-
.open(filename)?;
96-
// two magic bytes, u16 header length, header, u64 bitvec length, bitvec
97-
let total_header_size = (2 + 2 + header.len() + 8) as u64;
98-
file.set_len(total_header_size + byte_size)?;
99-
// file.seek(io::SeekFrom::Start(0))?;
100-
101-
file.write_all(&magic)?;
102-
let serialized_header_size: [u8; 2] = (header.len() as u16).to_be_bytes();
103-
file.write_all(&serialized_header_size)?;
104-
file.write_all(header)?;
105-
let serialized_size: [u8; 8] = (size as u64).to_be_bytes();
106-
file.write_all(&serialized_size)?;
107-
123+
let (file, total_header_size) = create_bitvec_file(filename.as_ref(), size, magic, header)?;
108124
let mmap = unsafe { MmapOptions::new().offset(total_header_size).map_mut(&file) }?;
109125
Ok(MmapBitVec {
110126
mmap: CommonMmap::MmapMut(mmap),
@@ -227,27 +243,21 @@ impl MmapBitVec {
227243
})
228244
}
229245

230-
/// Converts an in-memory mmap bitvector to a file-backed one.
246+
/// Save in-memory mmap bitvector to disk.
231247
/// This is a no-op if the mmap is already file-backed.
232-
/// Returns the new mmap after flushing.
233-
pub fn into_mmap_file<P: AsRef<Path>>(
234-
self,
248+
pub fn save_to_disk<P: AsRef<Path>>(
249+
&self,
235250
filename: P,
236251
magic: [u8; 2],
237252
header: &[u8],
238-
) -> Result<Self, io::Error> {
253+
) -> Result<(), io::Error> {
239254
if !self.is_anon {
240-
return Ok(self);
241-
}
242-
let mut file_mmap = MmapBitVec::create(filename, self.size, magic, header)?;
243-
244-
// Not super efficient
245-
for i in 0..self.size {
246-
file_mmap.set(i, self.get(i));
255+
return Ok(());
247256
}
248-
file_mmap.mmap.flush()?;
249-
250-
Ok(file_mmap)
257+
let (mut file, _) = create_bitvec_file(filename.as_ref(), self.size, magic, header)?;
258+
// We should already be at the right byte to write the content
259+
file.write_all(self.mmap.as_slice())?;
260+
Ok(())
251261
}
252262

253263
/// Returns the header
@@ -814,15 +824,17 @@ mod test {
814824
}
815825

816826
#[test]
817-
fn can_convert_memory_to_file() {
827+
fn can_write_anon_mmap_to_disk() {
818828
let mut b = MmapBitVec::from_memory(128).unwrap();
829+
b.set(0, true);
819830
b.set(7, true);
820831
b.set(56, true);
821832
b.set(127, true);
822833
let dir = tempfile::tempdir().unwrap();
823-
let f = b
824-
.into_mmap_file(dir.path().join("test"), *b"!!", &[])
834+
b.save_to_disk(dir.path().join("test"), *b"!!", &[])
825835
.unwrap();
836+
let f = MmapBitVec::open(dir.path().join("test"), Some(b"!!"), false).unwrap();
837+
assert_eq!(f.get(0), true);
826838
assert_eq!(f.get(7), true);
827839
assert_eq!(f.get(56), true);
828840
assert_eq!(f.get(127), true);

0 commit comments

Comments
 (0)