Skip to content

Commit 11d9980

Browse files
authored
feat: new settings data_retention_num_snapshots_to_keep (#17893)
feat: new setting `data_retention_num_snapshots_to_keep` Which specifies how many snapshots to retain during vacuum operations. - Overrides setting 'data_retention_time_in_days' - If set to 0, this setting will be ignored logic test
1 parent 46316c7 commit 11d9980

File tree

4 files changed

+131
-5
lines changed

4 files changed

+131
-5
lines changed

src/query/settings/src/settings_default.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,13 @@ impl DefaultSettings {
193193
scope: SettingScope::Both,
194194
range: Some(SettingRange::Numeric(0..=data_retention_time_in_days_max)),
195195
}),
196+
("data_retention_num_snapshots_to_keep", DefaultSettingValue {
197+
value: UserSettingValue::UInt64(0),
198+
desc: "Specifies how many snapshots to retain during vacuum operations. Overrides 'data_retention_time_in_days'. If set to 0, this setting will be ignored.",
199+
mode: SettingMode::Both,
200+
scope: SettingScope::Both,
201+
range: Some(SettingRange::Numeric(0..=500)),
202+
}),
196203
("max_spill_io_requests", DefaultSettingValue {
197204
value: UserSettingValue::UInt64(default_max_spill_io_requests),
198205
desc: "Sets the maximum number of concurrent spill I/O requests.",

src/query/settings/src/settings_getter_setter.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,10 @@ impl Settings {
234234
self.try_get_u64("data_retention_time_in_days")
235235
}
236236

237+
pub fn get_data_retention_num_snapshots_to_keep(&self) -> Result<u64> {
238+
self.try_get_u64("data_retention_num_snapshots_to_keep")
239+
}
240+
237241
pub fn get_max_storage_io_requests(&self) -> Result<u64> {
238242
self.try_get_u64("max_storage_io_requests")
239243
}

src/query/storages/fuse/src/fuse_table.rs

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -463,21 +463,63 @@ impl FuseTable {
463463
}
464464
}
465465

466+
/// Returns the data retention policy for this table.
467+
/// Policy is determined in the following priority order:
468+
/// 1. Number of snapshots to keep (from table option or setting)
469+
/// 2. Time-based retention period (if snapshot count is not specified)
466470
pub fn get_data_retention_policy(&self, ctx: &dyn TableContext) -> Result<RetentionPolicy> {
467-
let table_options = &self.table_info.meta.options;
468-
469471
let policy =
470-
if let Some(v) = table_options.get(FUSE_OPT_KEY_DATA_RETENTION_NUM_SNAPSHOTS_TO_KEEP) {
471-
let num_snapshot_keep = v.parse::<usize>()?;
472-
RetentionPolicy::ByNumOfSnapshotsToKeep(num_snapshot_keep)
472+
// Try to get number of snapshots to keep
473+
if let Some(num_snapshots) = self.try_get_policy_by_num_snapshots_to_keep(ctx)? {
474+
RetentionPolicy::ByNumOfSnapshotsToKeep(num_snapshots as usize)
473475
} else {
476+
// Fall back to time-based retention policy
474477
let duration = self.get_data_retention_period(ctx)?;
475478
RetentionPolicy::ByTimePeriod(duration)
476479
};
477480

478481
Ok(policy)
479482
}
480483

484+
/// Tries to retrieve the number of snapshots to keep for the retention policy.
485+
/// Priority order:
486+
/// 1. Table option (if set to a positive value)
487+
/// 2. Global setting (if set to a positive value and table option is not applicable)
488+
///
489+
/// Returns Some(value) if a valid positive value is found, None otherwise.
490+
fn try_get_policy_by_num_snapshots_to_keep(
491+
&self,
492+
ctx: &dyn TableContext,
493+
) -> Result<Option<u64>> {
494+
// Check table option first (highest priority).
495+
//
496+
// A positive value means we use this many snapshots for retention.
497+
// If value of this table option is not set or is Some(0), we'll check the corresponding setting instead.
498+
if let Some(tbl_opt) = self
499+
.table_info
500+
.meta
501+
.options
502+
.get(FUSE_OPT_KEY_DATA_RETENTION_NUM_SNAPSHOTS_TO_KEEP)
503+
{
504+
let num_snapshots = tbl_opt.parse::<u64>()?;
505+
if num_snapshots > 0 {
506+
return Ok(Some(num_snapshots));
507+
}
508+
}
509+
510+
// Check if there is a valid setting of num snapshots to keep:
511+
// Only positive value of setting counts.
512+
let settings_value = ctx
513+
.get_settings()
514+
.get_data_retention_num_snapshots_to_keep()?;
515+
if settings_value > 0 {
516+
return Ok(Some(settings_value));
517+
}
518+
519+
// No valid num_snapshots_to_keep found
520+
Ok(None)
521+
}
522+
481523
pub fn get_data_retention_period(&self, ctx: &dyn TableContext) -> Result<Duration> {
482524
let retention_period = if let Some(v) = self
483525
.table_info
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
## Copyright 2023 Databend Cloud
2+
##
3+
## Licensed under the Elastic License, Version 2.0 (the "License");
4+
## you may not use this file except in compliance with the License.
5+
## You may obtain a copy of the License at
6+
##
7+
## https://www.elastic.co/licensing/elastic-license
8+
##
9+
## Unless required by applicable law or agreed to in writing, software
10+
## distributed under the License is distributed on an "AS IS" BASIS,
11+
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
## See the License for the specific language governing permissions and
13+
## limitations under the License.
14+
15+
statement ok
16+
create or replace database setting_num_snapshot_to_keep;
17+
18+
statement ok
19+
use setting_num_snapshot_to_keep;
20+
21+
statement ok
22+
create or replace table t (c int) 'fs:///tmp/setting_num_snapshot_to_keep/';
23+
24+
statement ok
25+
create or replace stage stage_av url = 'fs:///tmp/setting_num_snapshot_to_keep/';
26+
27+
# Enable auto vacuum
28+
statement ok
29+
set enable_auto_vacuum = 1;
30+
31+
# CASE1: Setting `data_retention_num_snapshots_to_keep` overrides 'data_retention_time_in_days'
32+
33+
statement ok
34+
set data_retention_time_in_days = 2;
35+
36+
statement ok
37+
set data_retention_num_snapshots_to_keep = 1;
38+
39+
statement ok
40+
insert into t values(1);
41+
42+
statement ok
43+
insert into t values(2);
44+
45+
onlyif mysql
46+
query I
47+
select count() from list_stage(location=> '@stage_av') where name like '%_ss%';
48+
----
49+
1
50+
51+
# CASE 2: Set `data_retention_num_snapshots_to_keep` to 0 will make it effectless
52+
53+
statement ok
54+
set data_retention_num_snapshots_to_keep = 0;
55+
56+
statement ok
57+
insert into t values(1);
58+
59+
statement ok
60+
insert into t values(2);
61+
62+
onlyif mysql
63+
query I
64+
select count() from list_stage(location=> '@stage_av') where name like '%_ss%';
65+
----
66+
3
67+
68+
statement ok
69+
remove @stage_av;
70+
71+
statement ok
72+
drop stage stage_av;
73+

0 commit comments

Comments
 (0)