Skip to content

Commit 88700eb

Browse files
committed
replace RoaringBitmap with plain old HashSet
I think we could use RoaringBimtmap to maintain the set of snapshots that references a segment, but not sure about which crate to use, and if they meet the performance assumptions and implemented correctly. let's re-enable RoaringBitmap later(after evaluations).
1 parent 56a92ce commit 88700eb

File tree

5 files changed

+26
-44
lines changed

5 files changed

+26
-44
lines changed

Cargo.lock

Lines changed: 0 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/query/storages/fuse/fuse/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ futures-util = "0.3.24"
4242
itertools = "0.10.5"
4343
metrics = "0.20.1"
4444
opendal = "0.22"
45-
roaring = "0.10.1"
4645
serde = { workspace = true }
4746
serde_json = { workspace = true }
4847
tracing = "0.1.36"

src/query/storages/fuse/fuse/src/io/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ pub use read::TableSnapshotReader;
3131
pub use segments::try_join_futures;
3232
pub use segments::SegmentsIO;
3333
pub use snapshots::ListSnapshotLiteOption;
34-
pub use snapshots::PositionTagged;
3534
pub use snapshots::SnapshotLiteListExtended;
3635
pub use snapshots::SnapshotsIO;
3736
pub use write::write_block;

src/query/storages/fuse/fuse/src/io/snapshots.rs

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@ use common_catalog::table_context::TableContext;
2626
use common_exception::ErrorCode;
2727
use common_exception::Result;
2828
use common_storages_table_meta::meta::Location;
29+
use common_storages_table_meta::meta::SnapshotId;
2930
use common_storages_table_meta::meta::TableSnapshot;
3031
use common_storages_table_meta::meta::TableSnapshotLite;
3132
use futures::stream::StreamExt;
3233
use futures_util::future;
3334
use futures_util::TryStreamExt;
3435
use opendal::ObjectMode;
3536
use opendal::Operator;
36-
use roaring::RoaringBitmap;
3737
use tracing::info;
3838
use tracing::warn;
3939
use tracing::Instrument;
@@ -49,11 +49,10 @@ pub struct SnapshotsIO {
4949
format_version: u64,
5050
}
5151

52-
pub type PositionTagged<T> = (T, usize);
5352
pub struct SnapshotLiteListExtended {
5453
pub chained_snapshot_lites: Vec<TableSnapshotLite>,
55-
pub segment_locations: HashMap<Location, RoaringBitmap>,
56-
pub orphan_snapshot_lites: Vec<PositionTagged<TableSnapshotLite>>,
54+
pub segment_locations: HashMap<Location, HashSet<SnapshotId>>,
55+
pub orphan_snapshot_lites: Vec<TableSnapshotLite>,
5756
}
5857

5958
pub enum ListSnapshotLiteOption<'a> {
@@ -173,7 +172,8 @@ impl SnapshotsIO {
173172
// List all the snapshot file paths
174173
// note that snapshot file paths of ongoing txs might be included
175174
let mut snapshot_files = vec![];
176-
let mut segment_location_with_index: HashMap<Location, RoaringBitmap> = HashMap::new();
175+
let mut segment_location_with_index: HashMap<Location, HashSet<SnapshotId>> =
176+
HashMap::new();
177177
if let Some(prefix) = Self::get_s3_prefix_from_file(&root_snapshot_file) {
178178
snapshot_files = self.list_files(&prefix, limit, None).await?;
179179
}
@@ -198,11 +198,9 @@ impl SnapshotsIO {
198198
continue;
199199
}
200200
let snapshot_lite = TableSnapshotLite::from(snapshot.as_ref());
201+
let snapshot_id = snapshot_lite.snapshot_id;
201202
snapshot_lites.push(snapshot_lite);
202203

203-
// since we use u32 RoaringBitmap to index the snapshots
204-
// just in case, here we check if number of snapshot is within upper bound
205-
let idx = u32::try_from(snapshot_lites.len() - 1)?;
206204
if let ListSnapshotLiteOption::NeedSegmentsWithExclusion(filter) = list_options {
207205
// collects segments, and the snapshots that reference them.
208206
for segment_location in &snapshot.segments {
@@ -214,9 +212,9 @@ impl SnapshotsIO {
214212
segment_location_with_index
215213
.entry(segment_location.clone())
216214
.and_modify(|v| {
217-
v.insert(idx);
215+
v.insert(snapshot_id);
218216
})
219-
.or_insert_with(|| RoaringBitmap::from_iter(vec![idx]));
217+
.or_insert_with(|| HashSet::from_iter(vec![snapshot_id]));
220218
}
221219
}
222220
}
@@ -255,11 +253,11 @@ impl SnapshotsIO {
255253
fn chain_snapshots(
256254
snapshot_lites: Vec<TableSnapshotLite>,
257255
root_snapshot: &TableSnapshot,
258-
) -> (Vec<TableSnapshotLite>, Vec<(TableSnapshotLite, usize)>) {
256+
) -> (Vec<TableSnapshotLite>, Vec<TableSnapshotLite>) {
259257
let mut snapshot_map = HashMap::new();
260258
let mut chained_snapshot_lites = vec![];
261-
for (idx, snapshot_lite) in snapshot_lites.into_iter().enumerate() {
262-
snapshot_map.insert(snapshot_lite.snapshot_id, (snapshot_lite, idx));
259+
for snapshot_lite in snapshot_lites.into_iter() {
260+
snapshot_map.insert(snapshot_lite.snapshot_id, snapshot_lite);
263261
}
264262
let root_snapshot_lite = TableSnapshotLite::from(root_snapshot);
265263
let mut prev_snapshot_id_tuple = root_snapshot_lite.prev_snapshot_id;
@@ -270,7 +268,7 @@ impl SnapshotsIO {
270268
None => {
271269
break;
272270
}
273-
Some((prev_snapshot, _idx)) => {
271+
Some(prev_snapshot) => {
274272
prev_snapshot_id_tuple = prev_snapshot.prev_snapshot_id;
275273
chained_snapshot_lites.push(prev_snapshot);
276274
}

src/query/storages/fuse/fuse/src/operations/gc.rs

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,11 @@ use common_storages_table_meta::caches::CacheManager;
2828
use common_storages_table_meta::meta::Location;
2929
use common_storages_table_meta::meta::SnapshotId;
3030
use common_storages_table_meta::meta::TableSnapshotLite;
31-
use roaring::RoaringBitmap;
3231
use tracing::info;
3332
use tracing::warn;
3433

3534
use crate::io::Files;
3635
use crate::io::ListSnapshotLiteOption;
37-
use crate::io::PositionTagged;
3836
use crate::io::SegmentsIO;
3937
use crate::io::SnapshotsIO;
4038
use crate::FuseTable;
@@ -131,16 +129,13 @@ impl FuseTable {
131129
partitioned_snapshots
132130
.within_retention
133131
.into_iter()
134-
.map(|(_, index)| index as u32),
132+
.map(|snapshot| snapshot.snapshot_id)
133+
.collect(),
135134
snapshot_lites_extended.segment_locations,
136135
);
137136

138137
// orphan_snapshots that beyond retention period are allowed to be collected
139-
orphan_snapshots = partitioned_snapshots
140-
.beyond_retention
141-
.into_iter()
142-
.map(|(v, _)| v)
143-
.collect();
138+
orphan_snapshots = partitioned_snapshots.beyond_retention;
144139

145140
// FIXME: we do not need to write last snapshot hint here(since last snapshot never changed
146141
// during gc). introduce a dedicated stmt to refresh the hint file instead pls.
@@ -360,14 +355,14 @@ impl FuseTable {
360355
fn apply_retention_rule(
361356
ctx: &dyn TableContext,
362357
base_timestamp: Option<DateTime<Utc>>,
363-
snapshot_lites: Vec<(TableSnapshotLite, usize)>,
358+
snapshot_lites: Vec<TableSnapshotLite>,
364359
) -> Result<RetentionPartition> {
365360
// let retention_interval = Duration::hours(DEFAULT_RETENTION_PERIOD_HOURS as i64);
366361
let retention_interval = Duration::hours(ctx.get_settings().get_retention_period()? as i64);
367362
let retention_point = base_timestamp.map(|s| s - retention_interval);
368363
let (beyond_retention, within_retention) = snapshot_lites
369364
.into_iter()
370-
.partition(|(lite, _idx)| lite.timestamp < retention_point);
365+
.partition(|lite| lite.timestamp < retention_point);
371366
Ok(RetentionPartition {
372367
beyond_retention,
373368
within_retention,
@@ -377,12 +372,15 @@ impl FuseTable {
377372
// filter out segments that are referenced by orphan snapshots
378373
// which are within retention period
379374
fn filter_out_segments_within_retention(
380-
orphan_snapshot_index: impl IntoIterator<Item = u32>,
381-
mut segment_with_refer_index: HashMap<Location, RoaringBitmap>,
375+
// orphan_snapshot_index: impl IntoIterator<Item = SnapshotId>,
376+
orphan_snapshot_index: HashSet<SnapshotId>,
377+
mut segment_with_refer_index: HashMap<Location, HashSet<SnapshotId>>,
382378
) -> HashSet<Location> {
383-
let orphan_snapshot_index_bitmap = RoaringBitmap::from_iter(orphan_snapshot_index);
379+
// let orphan_snapshot_index_bitmap = RoaringBitmap::from_iter(orphan_snapshot_index);
380+
// segment_with_refer_index
381+
// .retain(|_location, refer_map| orphan_snapshot_index_bitmap.is_disjoint(refer_map));
384382
segment_with_refer_index
385-
.retain(|_location, refer_map| orphan_snapshot_index_bitmap.is_disjoint(refer_map));
383+
.retain(|_location, refer_map| orphan_snapshot_index.is_disjoint(refer_map));
386384
segment_with_refer_index.into_keys().collect()
387385
}
388386

@@ -452,6 +450,6 @@ impl FuseTable {
452450
}
453451

454452
struct RetentionPartition {
455-
beyond_retention: Vec<PositionTagged<TableSnapshotLite>>,
456-
within_retention: Vec<PositionTagged<TableSnapshotLite>>,
453+
beyond_retention: Vec<TableSnapshotLite>,
454+
within_retention: Vec<TableSnapshotLite>,
457455
}

0 commit comments

Comments
 (0)