Skip to content

Commit 457e7c9

Browse files
authored
Merge pull request #125 from Dr-Emann/roaring64
Bindings CRoaring 3.0, including 64 bit bitmaps
2 parents a61067e + 1db901c commit 457e7c9

31 files changed

+16706
-6867
lines changed

croaring-sys/CRoaring/bindgen_bundled_version.rs

Lines changed: 479 additions & 40 deletions
Large diffs are not rendered by default.

croaring-sys/CRoaring/roaring.c

Lines changed: 10843 additions & 6132 deletions
Large diffs are not rendered by default.

croaring-sys/CRoaring/roaring.h

Lines changed: 1222 additions & 334 deletions
Large diffs are not rendered by default.

croaring-sys/CRoaring/roaring.hh

Lines changed: 333 additions & 280 deletions
Large diffs are not rendered by default.

croaring-sys/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "croaring-sys"
3-
version = "1.1.0"
3+
version = "2.0.0"
44
edition = "2021"
55
authors = ["croaring-rs developers"]
66
license = "Apache-2.0"

croaring-sys/build.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
use std::env;
2-
use std::path::PathBuf;
32

43
fn main() {
54
println!("cargo:rerun-if-changed=CRoaring");

croaring/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "croaring"
3-
version = "1.0.1"
3+
version = "1.1.0"
44
edition = "2021"
55
authors = ["croaring-rs developers"]
66
license = "Apache-2.0"
@@ -22,7 +22,7 @@ roaring = "0.10"
2222
criterion = { version = "0.5", features = ["html_reports"] }
2323

2424
[dependencies]
25-
ffi = { package = "croaring-sys", path = "../croaring-sys", version = "1.1.0" }
25+
ffi = { package = "croaring-sys", path = "../croaring-sys", version = "2.0.0" }
2626
byteorder = "1.4.3"
2727

2828
[[bench]]

croaring/benches/benches.rs

Lines changed: 134 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
use criterion::{
22
black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput,
33
};
4+
use std::ops::ControlFlow;
45

5-
use croaring::{Bitmap, Portable};
6+
use croaring::{Bitmap, Bitmap64, Portable};
67

78
fn new(c: &mut Criterion) {
89
c.bench_function("new", |b| b.iter(Bitmap::new));
@@ -130,10 +131,25 @@ fn flip(c: &mut Criterion) {
130131
}
131132

132133
fn to_vec(c: &mut Criterion) {
133-
c.bench_function("to_vec", |b| {
134-
let bitmap = Bitmap::of(&[1, 2, 3]);
134+
const N: usize = 100_000;
135+
let bitmap: Bitmap = random_iter().take(N).collect();
136+
let mut g = c.benchmark_group("collect");
137+
g.bench_function("to_vec", |b| {
135138
b.iter(|| bitmap.to_vec());
136139
});
140+
g.bench_function("via_iter", |b| {
141+
b.iter(|| bitmap.iter().collect::<Vec<_>>());
142+
});
143+
g.bench_function("foreach", |b| {
144+
b.iter(|| {
145+
let mut vec = Vec::with_capacity(bitmap.cardinality() as usize);
146+
bitmap.for_each(|item| -> ControlFlow<()> {
147+
vec.push(item);
148+
ControlFlow::Continue(())
149+
});
150+
vec
151+
});
152+
});
137153
}
138154

139155
fn get_serialized_size_in_bytes(c: &mut Criterion) {
@@ -213,24 +229,35 @@ fn bulk_new(c: &mut Criterion) {
213229
group.finish();
214230
}
215231

216-
fn random_iter(c: &mut Criterion) {
232+
#[derive(Clone)]
233+
struct RandomIter {
234+
x: u32,
235+
}
236+
237+
impl Iterator for RandomIter {
238+
type Item = u32;
239+
240+
fn next(&mut self) -> Option<u32> {
241+
const MULTIPLIER: u32 = 742938285;
242+
const MODULUS: u32 = (1 << 31) - 1;
243+
self.x = (MULTIPLIER.wrapping_mul(self.x)) % MODULUS;
244+
Some(self.x)
245+
}
246+
}
247+
248+
fn random_iter() -> RandomIter {
249+
RandomIter { x: 20170705 }
250+
}
251+
252+
fn create_random(c: &mut Criterion) {
217253
const N: u32 = 5_000;
218254
// Clamp values so we get some re-use of containers
219255
const MAX: u32 = 8 * (u16::MAX as u32 + 1);
220256

221257
let mut group = c.benchmark_group("random_iter");
222258
group.throughput(Throughput::Elements(N.into()));
223259

224-
let rand_iter = {
225-
const MULTIPLIER: u32 = 742938285;
226-
const MODULUS: u32 = (1 << 31) - 1;
227-
// Super simple LCG iterator
228-
let mut z = 20170705; // seed
229-
std::iter::from_fn(move || {
230-
z = (MULTIPLIER * z) % MODULUS;
231-
Some(z % MAX)
232-
})
233-
};
260+
let rand_iter = random_iter();
234261

235262
group.bench_function("random_adds", |b| {
236263
b.iter(|| {
@@ -252,6 +279,96 @@ fn random_iter(c: &mut Criterion) {
252279
});
253280
}
254281

282+
fn collect_bitmap64_to_vec(c: &mut Criterion) {
283+
const N: u64 = 1_000_000;
284+
285+
let mut group = c.benchmark_group("collect_bitmap64_to_vec");
286+
group.throughput(Throughput::Elements(N.into()));
287+
let bitmap = Bitmap64::from_range(0..N);
288+
group.bench_function("to_vec", |b| {
289+
b.iter_batched(|| (), |()| bitmap.to_vec(), BatchSize::LargeInput);
290+
});
291+
group.bench_function("foreach", |b| {
292+
b.iter_batched(
293+
|| (),
294+
|()| {
295+
let mut vec = Vec::with_capacity(bitmap.cardinality() as usize);
296+
bitmap.for_each(|item| -> ControlFlow<()> {
297+
vec.push(item);
298+
ControlFlow::Continue(())
299+
});
300+
vec
301+
},
302+
BatchSize::LargeInput,
303+
);
304+
});
305+
group.bench_function("iter", |b| {
306+
b.iter_batched(
307+
|| (),
308+
|()| {
309+
let mut vec = Vec::with_capacity(bitmap.cardinality() as usize);
310+
vec.extend(bitmap.iter());
311+
vec
312+
},
313+
BatchSize::LargeInput,
314+
);
315+
});
316+
group.bench_function("iter_many", |b| {
317+
b.iter_batched(
318+
|| (),
319+
|()| {
320+
let mut vec = vec![0; bitmap.cardinality() as usize];
321+
let mut iter = bitmap.cursor();
322+
assert_eq!(iter.read_many(&mut vec), vec.len());
323+
vec
324+
},
325+
BatchSize::LargeInput,
326+
);
327+
});
328+
329+
group.finish();
330+
}
331+
332+
fn iterate_bitmap64(c: &mut Criterion) {
333+
const N: u64 = 1_000_000;
334+
const END_ITER: u64 = N - 100;
335+
336+
let mut group = c.benchmark_group("bitmap64_iterate");
337+
group.throughput(Throughput::Elements(N.into()));
338+
let bitmap = Bitmap64::from_range(0..N);
339+
group.bench_function("iter", |b| {
340+
b.iter(|| {
341+
for x in bitmap.iter() {
342+
if x == END_ITER {
343+
break;
344+
}
345+
}
346+
})
347+
});
348+
group.bench_function("cursor", |b| {
349+
b.iter(|| {
350+
let mut cursor = bitmap.cursor();
351+
while let Some(x) = cursor.next() {
352+
if x == END_ITER {
353+
break;
354+
}
355+
}
356+
})
357+
});
358+
group.bench_function("for_each", |b| {
359+
b.iter(|| {
360+
bitmap.for_each(|x| -> ControlFlow<()> {
361+
if x == END_ITER {
362+
return ControlFlow::Break(());
363+
}
364+
ControlFlow::Continue(())
365+
})
366+
})
367+
});
368+
369+
group.finish();
370+
}
371+
255372
criterion_group!(
256373
benches,
257374
new,
@@ -269,6 +386,8 @@ criterion_group!(
269386
serialize,
270387
deserialize,
271388
bulk_new,
272-
random_iter,
389+
create_random,
390+
collect_bitmap64_to_vec,
391+
iterate_bitmap64,
273392
);
274393
criterion_main!(benches);

croaring/src/bitmap/imp.rs

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1+
use crate::callback::CallbackWrapper;
12
use crate::Bitset;
23
use ffi::roaring_bitmap_t;
3-
use std::convert::TryInto;
44
use std::ffi::{c_void, CStr};
5-
use std::ops::{Bound, RangeBounds};
6-
use std::{mem, ptr};
5+
use std::ops::{Bound, ControlFlow, RangeBounds};
6+
use std::{mem, panic, ptr};
77

88
use super::serialization::{Deserializer, Serializer};
99
use super::{Bitmap, Statistics};
@@ -21,7 +21,7 @@ impl Bitmap {
2121
// (it can be moved safely), and can be freed with `free`, without freeing the underlying
2222
// containers and auxiliary data. Ensure this is still valid every time we update
2323
// the version of croaring.
24-
const _: () = assert!(ffi::ROARING_VERSION_MAJOR == 2 && ffi::ROARING_VERSION_MINOR == 0);
24+
const _: () = assert!(ffi::ROARING_VERSION_MAJOR == 3 && ffi::ROARING_VERSION_MINOR == 0);
2525
ffi::roaring_free(p.cast::<c_void>());
2626
result
2727
}
@@ -279,6 +279,29 @@ impl Bitmap {
279279
unsafe { ffi::roaring_bitmap_remove_checked(&mut self.bitmap, element) }
280280
}
281281

282+
/// Remove many values from the bitmap
283+
///
284+
/// This should be faster than calling `remove` multiple times.
285+
///
286+
/// In order to exploit this optimization, the caller should attempt to keep values with the same high 48 bits of
287+
/// the value as consecutive elements in `vals`
288+
///
289+
/// # Examples
290+
///
291+
/// ```
292+
/// use croaring::Bitmap;
293+
/// let mut bitmap = Bitmap::of(&[1, 2, 3, 4, 5, 6, 7, 8, 9]);
294+
/// bitmap.remove_many(&[1, 2, 3, 4, 5, 6, 7, 8]);
295+
/// assert_eq!(bitmap.to_vec(), vec![9]);
296+
/// ```
297+
#[inline]
298+
#[doc(alias = "roaring_bitmap_remove_many")]
299+
pub fn remove_many(&mut self, elements: &[u32]) {
300+
unsafe {
301+
ffi::roaring_bitmap_remove_many(&mut self.bitmap, elements.len(), elements.as_ptr())
302+
}
303+
}
304+
282305
/// Contains returns true if the integer element is contained in the bitmap
283306
///
284307
/// # Examples
@@ -721,6 +744,49 @@ impl Bitmap {
721744
unsafe { ffi::roaring_bitmap_flip_inplace(&mut self.bitmap, start, end) }
722745
}
723746

747+
/// Iterate over the values in the bitmap in sorted order
748+
///
749+
/// If `f` returns `Break`, iteration will stop and the value will be returned,
750+
/// Otherwise, iteration continues. If `f` never returns break, `None` is returned after all values are visited.
751+
///
752+
/// # Examples
753+
///
754+
/// ```
755+
/// use croaring::Bitmap;
756+
/// use std::ops::ControlFlow;
757+
///
758+
/// let bitmap = Bitmap::of(&[1, 2, 3, 14, 20, 21, 100]);
759+
/// let mut even_nums_under_50 = vec![];
760+
///
761+
/// let first_over_50 = bitmap.for_each(|value| {
762+
/// if value > 50 {
763+
/// return ControlFlow::Break(value);
764+
/// }
765+
/// if value % 2 == 0 {
766+
/// even_nums_under_50.push(value);
767+
/// }
768+
/// ControlFlow::Continue(())
769+
/// });
770+
///
771+
/// assert_eq!(even_nums_under_50, vec![2, 14, 20]);
772+
/// assert_eq!(first_over_50, ControlFlow::Break(100));
773+
/// ```
774+
#[inline]
775+
pub fn for_each<F, O>(&self, f: F) -> ControlFlow<O>
776+
where
777+
F: FnMut(u32) -> ControlFlow<O>,
778+
{
779+
let mut callback_wrapper = CallbackWrapper::new(f);
780+
let (callback, context) = callback_wrapper.callback_and_ctx();
781+
unsafe {
782+
ffi::roaring_iterate(&self.bitmap, Some(callback), context);
783+
}
784+
match callback_wrapper.result() {
785+
Ok(cf) => cf,
786+
Err(e) => panic::resume_unwind(e),
787+
}
788+
}
789+
724790
/// Returns a vector containing all of the integers stored in the Bitmap
725791
/// in sorted order.
726792
///
@@ -922,6 +988,13 @@ impl Bitmap {
922988
/// // Exclusive ranges still step from the start, but do not include it
923989
/// let bitmap = Bitmap::from_range_with_step((Bound::Excluded(10), Bound::Included(30)), 10);
924990
/// assert_eq!(bitmap.to_vec(), [20, 30]);
991+
///
992+
/// // Ranges including max value
993+
/// let bitmap = Bitmap::from_range_with_step((u32::MAX - 1)..=u32::MAX, 1);
994+
/// assert_eq!(bitmap.to_vec(), vec![u32::MAX - 1, u32::MAX]);
995+
///
996+
/// let bitmap = Bitmap::from_range_with_step((u32::MAX - 1)..=u32::MAX, 3);
997+
/// assert_eq!(bitmap.to_vec(), vec![u32::MAX - 1]);
925998
/// ```
926999
#[inline]
9271000
#[doc(alias = "roaring_bitmap_from_range")]

0 commit comments

Comments
 (0)