diff --git a/benchmarks/benches/linked_chunk.rs b/benchmarks/benches/linked_chunk.rs index 15aa8d9843f..deed9645be6 100644 --- a/benchmarks/benches/linked_chunk.rs +++ b/benchmarks/benches/linked_chunk.rs @@ -187,11 +187,14 @@ fn reading(c: &mut Criterion) { while events.peek().is_some() { let events_chunk = events.by_ref().take(80).collect::>(); + if events_chunk.is_empty() { break; } + lc.push_items_back(events_chunk); lc.push_gap_back(Gap { prev_token: format!("gap{num_gaps}") }); + num_gaps += 1; } @@ -205,30 +208,47 @@ fn reading(c: &mut Criterion) { // Define the throughput. group.throughput(Throughput::Elements(num_events)); - // Get a bencher. - group.bench_function(BenchmarkId::new(store_name, num_events), |bencher| { - // Bench the routine. - bencher.to_async(&runtime).iter(|| async { - // Load the last chunk first, - let (last_chunk, chunk_id_gen) = - store.load_last_chunk(linked_chunk_id).await.unwrap(); - - let mut lc = - lazy_loader::from_last_chunk::<128, _, _>(last_chunk, chunk_id_gen) - .expect("no error when reconstructing the linked chunk") - .expect("there is a linked chunk in the store"); - - // Then load until the start of the linked chunk. - let mut cur_chunk_id = lc.chunks().next().unwrap().identifier(); - while let Some(prev) = - store.load_previous_chunk(linked_chunk_id, cur_chunk_id).await.unwrap() - { - cur_chunk_id = prev.identifier; - lazy_loader::insert_new_first_chunk(&mut lc, prev) - .expect("no error when linking the previous lazy-loaded chunk"); - } - }) - }); + // Bench the lazy loader. + group.bench_function( + BenchmarkId::new(format!("lazy_loader/{store_name}"), num_events), + |bencher| { + // Bench the routine. + bencher.to_async(&runtime).iter(|| async { + // Load the last chunk first, + let (last_chunk, chunk_id_gen) = + store.load_last_chunk(linked_chunk_id).await.unwrap(); + + let mut lc = + lazy_loader::from_last_chunk::<128, _, _>(last_chunk, chunk_id_gen) + .expect("no error when reconstructing the linked chunk") + .expect("there is a linked chunk in the store"); + + // Then load until the start of the linked chunk. + let mut cur_chunk_id = lc.chunks().next().unwrap().identifier(); + while let Some(prev) = + store.load_previous_chunk(linked_chunk_id, cur_chunk_id).await.unwrap() + { + cur_chunk_id = prev.identifier; + lazy_loader::insert_new_first_chunk(&mut lc, prev) + .expect("no error when linking the previous lazy-loaded chunk"); + } + }) + }, + ); + + // Bench the metadata loader. + group.bench_function( + BenchmarkId::new(format!("metadata/{store_name}"), num_events), + |bencher| { + // Bench the routine. + bencher.to_async(&runtime).iter(|| async { + let _metadata = store + .load_all_chunks_metadata(linked_chunk_id) + .await + .expect("metadata must load"); + }) + }, + ); { let _guard = runtime.enter(); diff --git a/crates/matrix-sdk-sqlite/migrations/event_cache_store/008_linked_chunk_id.sql b/crates/matrix-sdk-sqlite/migrations/event_cache_store/008_linked_chunk_id.sql index b7e18f6a81f..c15d737fea7 100644 --- a/crates/matrix-sdk-sqlite/migrations/event_cache_store/008_linked_chunk_id.sql +++ b/crates/matrix-sdk-sqlite/migrations/event_cache_store/008_linked_chunk_id.sql @@ -41,7 +41,7 @@ CREATE TABLE "event_chunks" ( -- Primary key is the event ID. PRIMARY KEY (event_id), - -- We need a uniqueness constraint over the linked chunk id, `chunk_id` and + -- We need a uniqueness constraint over the `linked_chunk_id`, `chunk_id` and -- `position` tuple because (i) they must be unique, (ii) it dramatically -- improves the performance. UNIQUE (linked_chunk_id, chunk_id, position), diff --git a/crates/matrix-sdk-sqlite/src/event_cache_store.rs b/crates/matrix-sdk-sqlite/src/event_cache_store.rs index 6f5fc8797fb..0bf4242529a 100644 --- a/crates/matrix-sdk-sqlite/src/event_cache_store.rs +++ b/crates/matrix-sdk-sqlite/src/event_cache_store.rs @@ -882,31 +882,37 @@ impl EventCacheStore for SqliteEventCacheStore { self.read() .await? .with_transaction(move |txn| -> Result<_> { - // I'm not a DB analyst, so for my own future sanity: this query joins the - // linked_chunks and events_chunks tables together, with a few specificities: + // This query will visit all chunks of a linked chunk with ID + // `hashed_linked_chunk_id`. For each chunk, it collects its ID + // (`ChunkIdentifier`), previous chunk, next chunk, and number of events + // (`num_events`). If it's a gap, `num_events` is equal to 0, otherwise it + // counts the number of events in `event_chunks` where `event_chunks.chunk_id = + // linked_chunks.id`. // - // - the `GROUP BY` clause will regroup the joined item lines by chunk. - // - the `COUNT(ec.event_id)` counts the number of unique non-NULL lines from - // the events_chunks table, aka the number of events in the chunk. - // - using a `LEFT JOIN` makes it so that if there's a chunk that has no events - // (because it's a gap, or an empty events chunk), there will still be a - // result for that chunk, and the count will be `0` (because the joined lines - // would be `NULL`). - // - // Overall, this query will return what we want: - // - for a gap or an empty item chunk: a count of 0, - // - otherwise, the number of related lines in `event_chunks` for that chunk, - // i.e. the number of events in that chunk. + // Why not using a `(LEFT) JOIN` + `COUNT`? Because for gaps, the entire + // `event_chunks` will be traversed every time. It's extremely inefficient. To + // speed that up, we could use an `INDEX` but it will consume more storage + // space. This solution is nice trade-off and offers great performance. // // Also, use `ORDER BY id` to get a deterministic ordering for testing purposes. txn.prepare( r#" - SELECT lc.id, lc.previous, lc.next, COUNT(ec.event_id) + SELECT + lc.id, + lc.previous, + lc.next, + CASE lc.type + WHEN 'E' THEN ( + SELECT COUNT(ec.event_id) + FROM event_chunks as ec + WHERE ec.chunk_id = lc.id + ) + ELSE + 0 + END as num_events FROM linked_chunks as lc - LEFT JOIN event_chunks as ec ON ec.chunk_id = lc.id WHERE lc.linked_chunk_id = ? - GROUP BY lc.id ORDER BY lc.id"#, )? .query_map((&hashed_linked_chunk_id,), |row| {