Skip to content

feat(sqlite): Improve throughput of SqliteEventCacheStore::load_all_chunks_metadata by 1140% #5411

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 44 additions & 24 deletions benchmarks/benches/linked_chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,14 @@ fn reading(c: &mut Criterion) {

while events.peek().is_some() {
let events_chunk = events.by_ref().take(80).collect::<Vec<_>>();

if events_chunk.is_empty() {
break;
}

lc.push_items_back(events_chunk);
lc.push_gap_back(Gap { prev_token: format!("gap{num_gaps}") });

num_gaps += 1;
}

Expand All @@ -205,30 +208,47 @@ fn reading(c: &mut Criterion) {
// Define the throughput.
group.throughput(Throughput::Elements(num_events));

// Get a bencher.
group.bench_function(BenchmarkId::new(store_name, num_events), |bencher| {
// Bench the routine.
bencher.to_async(&runtime).iter(|| async {
// Load the last chunk first,
let (last_chunk, chunk_id_gen) =
store.load_last_chunk(linked_chunk_id).await.unwrap();

let mut lc =
lazy_loader::from_last_chunk::<128, _, _>(last_chunk, chunk_id_gen)
.expect("no error when reconstructing the linked chunk")
.expect("there is a linked chunk in the store");

// Then load until the start of the linked chunk.
let mut cur_chunk_id = lc.chunks().next().unwrap().identifier();
while let Some(prev) =
store.load_previous_chunk(linked_chunk_id, cur_chunk_id).await.unwrap()
{
cur_chunk_id = prev.identifier;
lazy_loader::insert_new_first_chunk(&mut lc, prev)
.expect("no error when linking the previous lazy-loaded chunk");
}
})
});
// Bench the lazy loader.
group.bench_function(
BenchmarkId::new(format!("lazy_loader/{store_name}"), num_events),
|bencher| {
// Bench the routine.
bencher.to_async(&runtime).iter(|| async {
// Load the last chunk first,
let (last_chunk, chunk_id_gen) =
store.load_last_chunk(linked_chunk_id).await.unwrap();

let mut lc =
lazy_loader::from_last_chunk::<128, _, _>(last_chunk, chunk_id_gen)
.expect("no error when reconstructing the linked chunk")
.expect("there is a linked chunk in the store");

// Then load until the start of the linked chunk.
let mut cur_chunk_id = lc.chunks().next().unwrap().identifier();
while let Some(prev) =
store.load_previous_chunk(linked_chunk_id, cur_chunk_id).await.unwrap()
{
cur_chunk_id = prev.identifier;
lazy_loader::insert_new_first_chunk(&mut lc, prev)
.expect("no error when linking the previous lazy-loaded chunk");
}
})
},
);

// Bench the metadata loader.
group.bench_function(
BenchmarkId::new(format!("metadata/{store_name}"), num_events),
|bencher| {
// Bench the routine.
bencher.to_async(&runtime).iter(|| async {
let _metadata = store
.load_all_chunks_metadata(linked_chunk_id)
.await
.expect("metadata must load");
})
},
);

{
let _guard = runtime.enter();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ CREATE TABLE "event_chunks" (
-- Primary key is the event ID.
PRIMARY KEY (event_id),

-- We need a uniqueness constraint over the linked chunk id, `chunk_id` and
-- We need a uniqueness constraint over the `linked_chunk_id`, `chunk_id` and
-- `position` tuple because (i) they must be unique, (ii) it dramatically
-- improves the performance.
UNIQUE (linked_chunk_id, chunk_id, position),
Expand Down
40 changes: 23 additions & 17 deletions crates/matrix-sdk-sqlite/src/event_cache_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -882,31 +882,37 @@ impl EventCacheStore for SqliteEventCacheStore {
self.read()
.await?
.with_transaction(move |txn| -> Result<_> {
// I'm not a DB analyst, so for my own future sanity: this query joins the
// linked_chunks and events_chunks tables together, with a few specificities:
// This query will visit all chunks of a linked chunk with ID
// `hashed_linked_chunk_id`. For each chunk, it collects its ID
// (`ChunkIdentifier`), previous chunk, next chunk, and number of events
// (`num_events`). If it's a gap, `num_events` is equal to 0, otherwise it
// counts the number of events in `event_chunks` where `event_chunks.chunk_id =
// linked_chunks.id`.
//
// - the `GROUP BY` clause will regroup the joined item lines by chunk.
// - the `COUNT(ec.event_id)` counts the number of unique non-NULL lines from
// the events_chunks table, aka the number of events in the chunk.
// - using a `LEFT JOIN` makes it so that if there's a chunk that has no events
// (because it's a gap, or an empty events chunk), there will still be a
// result for that chunk, and the count will be `0` (because the joined lines
// would be `NULL`).
//
// Overall, this query will return what we want:
// - for a gap or an empty item chunk: a count of 0,
// - otherwise, the number of related lines in `event_chunks` for that chunk,
// i.e. the number of events in that chunk.
// Why not using a `(LEFT) JOIN` + `COUNT`? Because for gaps, the entire
// `event_chunks` will be traversed every time. It's extremely inefficient. To
// speed that up, we could use an `INDEX` but it will consume more storage
// space. This solution is nice trade-off and offers great performance.
//
// Also, use `ORDER BY id` to get a deterministic ordering for testing purposes.

txn.prepare(
r#"
SELECT lc.id, lc.previous, lc.next, COUNT(ec.event_id)
SELECT
lc.id,
lc.previous,
lc.next,
CASE lc.type
WHEN 'E' THEN (
SELECT COUNT(ec.event_id)
FROM event_chunks as ec
WHERE ec.chunk_id = lc.id
)
ELSE
0
END as num_events
FROM linked_chunks as lc
LEFT JOIN event_chunks as ec ON ec.chunk_id = lc.id
WHERE lc.linked_chunk_id = ?
GROUP BY lc.id
ORDER BY lc.id"#,
)?
.query_map((&hashed_linked_chunk_id,), |row| {
Expand Down
Loading