Skip to content

Commit 407e425

Browse files
authored
feat: hybrid column data cache (#17771)
* feat: hybrid disk cache * cleanup * fix typos * tweak ut * refine doc * refactor: allow in-mem cache or hybrid cache to be none * update ut test golden file
1 parent f384932 commit 407e425

File tree

22 files changed

+320
-178
lines changed

22 files changed

+320
-178
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/query/config/src/config.rs

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3039,6 +3039,14 @@ pub struct CacheConfig {
30393039
)]
30403040
pub table_data_cache_population_queue_size: u32,
30413041

3042+
/// Bytes of data cache in-memory
3043+
#[clap(
3044+
long = "cache-data-cache-in-memory-bytes",
3045+
value_name = "VALUE",
3046+
default_value = "0"
3047+
)]
3048+
pub data_cache_in_memory_bytes: u64,
3049+
30423050
/// Storage that hold the data caches
30433051
#[clap(flatten)]
30443052
#[serde(rename = "disk")]
@@ -3144,10 +3152,17 @@ pub struct DiskCacheConfig {
31443152
)]
31453153
pub path: String,
31463154

3147-
/// Whether sync data after write.
3148-
/// If the query node's memory is managed by cgroup (at least cgroup v1),
3149-
/// it's recommended to set this to true to prevent the container from
3150-
/// being killed due to high dirty page memory usage.
3155+
/// Controls whether to synchronize data to disk after write operations.
3156+
///
3157+
/// When enabled, this option forces written data to be flushed to physical storage,
3158+
/// reducing the amount of dirty pages in memory. This is particularly important in
3159+
/// containerized environments where:
3160+
///
3161+
/// 1. Memory limits are enforced by cgroups (v1, maybe v2 as well, though accounting differs)
3162+
/// 2. Dirty pages are counted against the memory limit, which increases the possibility of triggering OOM kills
3163+
///
3164+
/// Setting this to true improves stability in memory-constrained / containerized environments at the cost
3165+
/// of potentially reduced write performance.
31513166
#[clap(
31523167
long = "cache-disk-sync-data",
31533168
value_name = "VALUE",
@@ -3300,6 +3315,7 @@ mod cache_config_converters {
33003315
data_cache_storage: value.data_cache_storage.try_into()?,
33013316
table_data_cache_population_queue_size: value
33023317
.table_data_cache_population_queue_size,
3318+
data_cache_in_memory_bytes: value.data_cache_in_memory_bytes,
33033319
disk_cache_config: value.disk_cache_config.try_into()?,
33043320
data_cache_key_reload_policy: value.data_cache_key_reload_policy.try_into()?,
33053321
table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes,
@@ -3335,6 +3351,7 @@ mod cache_config_converters {
33353351
data_cache_key_reload_policy: value.data_cache_key_reload_policy.into(),
33363352
table_data_cache_population_queue_size: value
33373353
.table_data_cache_population_queue_size,
3354+
data_cache_in_memory_bytes: value.data_cache_in_memory_bytes,
33383355
disk_cache_config: value.disk_cache_config.into(),
33393356
table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes,
33403357
table_data_deserialized_memory_ratio: value.table_data_deserialized_memory_ratio,

src/query/config/src/inner.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,9 @@ pub struct CacheConfig {
615615
/// increase this value.
616616
pub table_data_cache_population_queue_size: u32,
617617

618+
/// Bytes of data cache in-memory
619+
pub data_cache_in_memory_bytes: u64,
620+
618621
/// Storage that hold the raw data caches
619622
pub disk_cache_config: DiskCacheConfig,
620623

@@ -732,6 +735,7 @@ impl Default for CacheConfig {
732735
table_prune_partitions_count: 256,
733736
data_cache_storage: Default::default(),
734737
table_data_cache_population_queue_size: 0,
738+
data_cache_in_memory_bytes: 0,
735739
disk_cache_config: Default::default(),
736740
data_cache_key_reload_policy: Default::default(),
737741
table_data_deserialized_data_bytes: 0,

src/query/service/tests/it/storages/testdata/configs_table_basic.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo
55
| Column 0 | Column 1 | Column 2 | Column 3 |
66
+-----------+-------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------+
77
| 'cache' | 'block_meta_count' | '0' | '' |
8+
| 'cache' | 'data_cache_in_memory_bytes' | '0' | '' |
89
| 'cache' | 'data_cache_key_reload_policy' | 'reset' | '' |
910
| 'cache' | 'data_cache_storage' | 'none' | '' |
1011
| 'cache' | 'disk.max_bytes' | '21474836480' | '' |
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use bytes::Bytes;
16+
pub use databend_common_catalog::plan::PartStatistics;
17+
pub use databend_common_catalog::plan::Partitions;
18+
pub use databend_common_catalog::table::Table;
19+
use databend_common_exception::ErrorCode;
20+
pub use databend_storages_common_index::filters::Xor8Filter;
21+
pub use databend_storages_common_index::BloomIndexMeta;
22+
pub use databend_storages_common_index::InvertedIndexFile;
23+
pub use databend_storages_common_index::InvertedIndexMeta;
24+
pub use databend_storages_common_table_meta::meta::column_oriented_segment::ColumnOrientedSegment;
25+
pub use databend_storages_common_table_meta::meta::BlockMeta;
26+
pub use databend_storages_common_table_meta::meta::CompactSegmentInfo;
27+
pub use databend_storages_common_table_meta::meta::SegmentInfo;
28+
pub use databend_storages_common_table_meta::meta::TableSnapshot;
29+
pub use databend_storages_common_table_meta::meta::TableSnapshotStatistics;
30+
pub use parquet::file::metadata::ParquetMetaData;
31+
32+
use crate::HybridCache;
33+
34+
pub struct ColumnData(Bytes);
35+
36+
impl ColumnData {
37+
pub fn from_merge_io_read_result(bytes: Bytes) -> Self {
38+
// Bytes are from merge read result, may refer to a large chunk of memory;
39+
// Caching this large buffer wastes memory, so we need to copy it out.
40+
ColumnData(bytes.to_vec().into())
41+
}
42+
43+
pub fn bytes(&self) -> Bytes {
44+
self.0.clone()
45+
}
46+
47+
pub fn size(&self) -> usize {
48+
self.0.len()
49+
}
50+
}
51+
52+
pub type ColumnDataCache = HybridCache<ColumnData>;
53+
impl TryFrom<&ColumnData> for Vec<u8> {
54+
type Error = ErrorCode;
55+
fn try_from(value: &ColumnData) -> Result<Self, Self::Error> {
56+
Ok(value.0.to_vec())
57+
}
58+
}
59+
60+
// copying should be avoided
61+
impl TryFrom<Bytes> for ColumnData {
62+
type Error = ErrorCode;
63+
64+
fn try_from(value: Bytes) -> Result<Self, Self::Error> {
65+
Ok(ColumnData(value))
66+
}
67+
}

src/query/storages/common/cache/src/caches.rs

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,8 @@ use std::time::Instant;
1818

1919
use arrow::array::ArrayRef;
2020
use databend_common_cache::MemSized;
21-
use databend_common_catalog::plan::PartStatistics;
22-
use databend_common_catalog::plan::Partitions;
23-
use databend_common_catalog::table::Table;
24-
use databend_storages_common_index::filters::Xor8Filter;
25-
use databend_storages_common_index::BloomIndexMeta;
26-
use databend_storages_common_index::InvertedIndexFile;
27-
use databend_storages_common_index::InvertedIndexMeta;
28-
use databend_storages_common_table_meta::meta::column_oriented_segment::ColumnOrientedSegment;
29-
use databend_storages_common_table_meta::meta::BlockMeta;
30-
use databend_storages_common_table_meta::meta::CompactSegmentInfo;
31-
use databend_storages_common_table_meta::meta::SegmentInfo;
32-
use databend_storages_common_table_meta::meta::TableSnapshot;
33-
use databend_storages_common_table_meta::meta::TableSnapshotStatistics;
34-
use parquet::file::metadata::ParquetMetaData;
3521

22+
pub use crate::cache_items::*;
3623
use crate::manager::CacheManager;
3724
use crate::providers::HybridCache;
3825
use crate::CacheAccessor;
@@ -266,6 +253,15 @@ impl From<BloomIndexMeta> for CacheValue<BloomIndexMeta> {
266253
}
267254
}
268255

256+
impl From<ColumnData> for CacheValue<ColumnData> {
257+
fn from(value: ColumnData) -> Self {
258+
CacheValue {
259+
mem_bytes: value.size(),
260+
inner: Arc::new(value),
261+
}
262+
}
263+
}
264+
269265
impl From<InvertedIndexMeta> for CacheValue<InvertedIndexMeta> {
270266
fn from(value: InvertedIndexMeta) -> Self {
271267
CacheValue {

src/query/storages/common/cache/src/lib.rs

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,15 @@
1717
#![feature(assert_matches)]
1818

1919
mod cache;
20+
mod cache_items;
2021
mod caches;
2122
mod manager;
2223
mod providers;
2324
mod read;
2425
mod temp_dir;
2526

26-
pub use cache::CacheAccessor;
27-
pub use cache::Unit;
28-
pub use caches::BlockMetaCache;
29-
pub use caches::CacheValue;
30-
pub use caches::CachedObject;
31-
pub use caches::SegmentBlockMetasCache;
32-
pub use caches::SizedColumnArray;
27+
pub use cache::*;
28+
pub use caches::*;
3329
pub use manager::CacheManager;
3430
// Unfortunately, criterion benchmarks need it to be pub
3531
pub use providers::disk_cache::disk_cache_lru::read_cache_content;
@@ -44,11 +40,5 @@ pub use providers::LruDiskCache;
4440
pub use providers::LruDiskCacheBuilder;
4541
pub use providers::LruDiskCacheHolder;
4642
pub use providers::TableDataCacheKey;
47-
pub use read::CacheKey;
48-
pub use read::CachedReader;
49-
pub use read::HybridCacheReader;
50-
pub use read::InMemoryCacheReader;
51-
pub use read::InMemoryCacheTTLReader;
52-
pub use read::LoadParams;
53-
pub use read::Loader;
43+
pub use read::*;
5444
pub use temp_dir::*;

0 commit comments

Comments
 (0)