Skip to content

Commit 6a89aa1

Browse files
dantengskyBohuTANG
andauthored
feat: fuzzy table data disk cache key reload (#15566)
* feat: fuzzy table data disk cache key reload During query node restart, if the config item `data_cache_key_reload_policy` is set to "fuzzy", disk cache keys will be reloaded from the cache directory instead of directly removing previous cache data. This means that the cache data existing before the restart will not be deleted. Note that during the reloading of cache keys, cache capacity will NOT be checked. Therefore, if `cache.disk.max_bytes` is decreased between restarts, no cached items on disk will be removed immediately. Instead, items will be removed when the first new item is put into the cache. New config item introduced: ~~~ [cache] Policy of data cache key reloading: - Available options: [reset|fuzzy] - "reset": remove previous data cache during restart - "fuzzy": reload cache keys from cache dir, retaining the cache data that existed before the restart data_cache_key_reload_policy = "reset" ~~~ * Update src/query/storages/common/cache/src/providers/disk_cache.rs Co-authored-by: Bohu <overred.shuttler@gmail.com> * cargo fmt * parallel deletion * cleanup --------- Co-authored-by: Bohu <overred.shuttler@gmail.com>
1 parent ef4fe8e commit 6a89aa1

File tree

10 files changed

+290
-26
lines changed

10 files changed

+290
-26
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/query/config/src/config.rs

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,6 @@ use super::inner::QueryConfig as InnerQueryConfig;
7171
use crate::background_config::BackgroundConfig;
7272
use crate::DATABEND_COMMIT_VERSION;
7373

74-
// FIXME: too much boilerplate here
75-
7674
const CATALOG_HIVE: &str = "hive";
7775

7876
/// Config for `query`.
@@ -2806,6 +2804,15 @@ pub struct CacheConfig {
28062804
)]
28072805
pub data_cache_storage: CacheStorageTypeConfig,
28082806

2807+
/// Policy of disk cache restart
2808+
#[clap(
2809+
long = "cache-data-cache-key-reload-policy",
2810+
value_name = "VALUE",
2811+
value_enum,
2812+
default_value_t
2813+
)]
2814+
pub data_cache_key_reload_policy: DiskCacheKeyReloadPolicy,
2815+
28092816
/// Max size of external cache population queue length
28102817
///
28112818
/// the items being queued reference table column raw data, which are
@@ -2890,6 +2897,22 @@ impl Default for CacheStorageTypeConfig {
28902897
}
28912898
}
28922899

2900+
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, ValueEnum)]
2901+
#[serde(rename_all = "lowercase")]
2902+
pub enum DiskCacheKeyReloadPolicy {
2903+
// remove all the disk cache during restart
2904+
Reset,
2905+
// recovery the cache keys during restart,
2906+
// but cache capacity will not be checked
2907+
Fuzzy,
2908+
}
2909+
2910+
impl Default for DiskCacheKeyReloadPolicy {
2911+
fn default() -> Self {
2912+
Self::Reset
2913+
}
2914+
}
2915+
28932916
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Args, Default)]
28942917
#[serde(default, deny_unknown_fields)]
28952918
pub struct DiskCacheConfig {
@@ -2991,6 +3014,7 @@ mod cache_config_converters {
29913014
table_data_cache_population_queue_size: value
29923015
.table_data_cache_population_queue_size,
29933016
disk_cache_config: value.disk_cache_config.try_into()?,
3017+
data_cache_key_reload_policy: value.data_cache_key_reload_policy.try_into()?,
29943018
table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes,
29953019
table_data_deserialized_memory_ratio: value.table_data_deserialized_memory_ratio,
29963020
})
@@ -3013,6 +3037,7 @@ mod cache_config_converters {
30133037
inverted_index_filter_memory_ratio: value.inverted_index_filter_memory_ratio,
30143038
table_prune_partitions_count: value.table_prune_partitions_count,
30153039
data_cache_storage: value.data_cache_storage.into(),
3040+
data_cache_key_reload_policy: value.data_cache_key_reload_policy.into(),
30163041
table_data_cache_population_queue_size: value
30173042
.table_data_cache_population_queue_size,
30183043
disk_cache_config: value.disk_cache_config.into(),
@@ -3060,4 +3085,23 @@ mod cache_config_converters {
30603085
}
30613086
}
30623087
}
3088+
3089+
impl TryFrom<DiskCacheKeyReloadPolicy> for inner::DiskCacheKeyReloadPolicy {
3090+
type Error = ErrorCode;
3091+
fn try_from(value: DiskCacheKeyReloadPolicy) -> std::result::Result<Self, Self::Error> {
3092+
Ok(match value {
3093+
DiskCacheKeyReloadPolicy::Reset => inner::DiskCacheKeyReloadPolicy::Reset,
3094+
DiskCacheKeyReloadPolicy::Fuzzy => inner::DiskCacheKeyReloadPolicy::Fuzzy,
3095+
})
3096+
}
3097+
}
3098+
3099+
impl From<inner::DiskCacheKeyReloadPolicy> for DiskCacheKeyReloadPolicy {
3100+
fn from(value: inner::DiskCacheKeyReloadPolicy) -> Self {
3101+
match value {
3102+
inner::DiskCacheKeyReloadPolicy::Reset => DiskCacheKeyReloadPolicy::Reset,
3103+
inner::DiskCacheKeyReloadPolicy::Fuzzy => DiskCacheKeyReloadPolicy::Fuzzy,
3104+
}
3105+
}
3106+
}
30633107
}

src/query/config/src/inner.rs

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,9 @@ pub struct CacheConfig {
568568
/// Storage that hold the raw data caches
569569
pub disk_cache_config: DiskCacheConfig,
570570

571+
/// Policy of reloading disk cache keys
572+
pub data_cache_key_reload_policy: DiskCacheKeyReloadPolicy,
573+
571574
/// Max size of in memory table column object cache. By default it is 0 (disabled)
572575
///
573576
/// CAUTION: The cache items are deserialized table column objects, may take a lot of memory.
@@ -589,7 +592,6 @@ pub struct CacheConfig {
589592
pub enum CacheStorageTypeConfig {
590593
None,
591594
Disk,
592-
// Redis,
593595
}
594596

595597
impl Default for CacheStorageTypeConfig {
@@ -598,6 +600,20 @@ impl Default for CacheStorageTypeConfig {
598600
}
599601
}
600602

603+
#[derive(Clone, Debug, PartialEq, Eq)]
604+
pub enum DiskCacheKeyReloadPolicy {
605+
// remove all the disk cache during restart
606+
Reset,
607+
// recovery the cache keys during restart,
608+
// but cache capacity will not be checked
609+
Fuzzy,
610+
}
611+
impl Default for DiskCacheKeyReloadPolicy {
612+
fn default() -> Self {
613+
Self::Reset
614+
}
615+
}
616+
601617
impl ToString for CacheStorageTypeConfig {
602618
fn to_string(&self) -> String {
603619
match self {
@@ -607,6 +623,15 @@ impl ToString for CacheStorageTypeConfig {
607623
}
608624
}
609625

626+
impl ToString for DiskCacheKeyReloadPolicy {
627+
fn to_string(&self) -> String {
628+
match self {
629+
DiskCacheKeyReloadPolicy::Reset => "reset".to_string(),
630+
DiskCacheKeyReloadPolicy::Fuzzy => "fuzzy".to_string(),
631+
}
632+
}
633+
}
634+
610635
#[derive(Clone, Debug, PartialEq, Eq)]
611636
pub struct DiskCacheConfig {
612637
/// Max bytes of cached raw table data. Default 20GB, set it to 0 to disable it.
@@ -643,6 +668,7 @@ impl Default for CacheConfig {
643668
data_cache_storage: Default::default(),
644669
table_data_cache_population_queue_size: 0,
645670
disk_cache_config: Default::default(),
671+
data_cache_key_reload_policy: Default::default(),
646672
table_data_deserialized_data_bytes: 0,
647673
table_data_deserialized_memory_ratio: 0,
648674
}

src/query/config/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ pub use inner::CacheConfig;
4646
pub use inner::CacheStorageTypeConfig as CacheStorageTypeInnerConfig;
4747
pub use inner::CatalogConfig;
4848
pub use inner::CatalogHiveConfig;
49+
pub use inner::DiskCacheKeyReloadPolicy;
4950
pub use inner::InnerConfig;
5051
pub use inner::ThriftProtocol;
5152
pub use version::DATABEND_COMMIT_VERSION;

src/query/service/tests/it/storages/testdata/configs_table_basic.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo
44
+-----------+--------------------------------------------+----------------------------------------------------------------+----------+
55
| Column 0 | Column 1 | Column 2 | Column 3 |
66
+-----------+--------------------------------------------+----------------------------------------------------------------+----------+
7+
| 'cache' | 'data_cache_key_reload_policy' | 'reset' | '' |
78
| 'cache' | 'data_cache_storage' | 'none' | '' |
89
| 'cache' | 'disk.max_bytes' | '21474836480' | '' |
910
| 'cache' | 'disk.path' | './.databend/_cache' | '' |

src/query/storages/common/cache/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ crossbeam-channel = "0.5.6"
2525
hex = "0.4.3"
2626
log = { workspace = true }
2727
parking_lot = { workspace = true }
28+
rayon = "1.9.0"
2829
siphasher = "0.3.10"
2930

3031
[dev-dependencies]

0 commit comments

Comments
 (0)