Skip to content

Commit 614bd25

Browse files
authored
chore: add a setting to decide if table sample is deterministic (#16275)
1 parent 6282d5f commit 614bd25

File tree

6 files changed

+61
-37
lines changed

6 files changed

+61
-37
lines changed

src/query/expression/src/function.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ pub struct FunctionContext {
117117
pub parse_datetime_ignore_remainder: bool,
118118
pub enable_dst_hour_fix: bool,
119119
pub enable_strict_datetime_parser: bool,
120+
pub random_function_seed: bool,
120121
}
121122

122123
impl Default for FunctionContext {
@@ -139,6 +140,7 @@ impl Default for FunctionContext {
139140
parse_datetime_ignore_remainder: false,
140141
enable_dst_hour_fix: false,
141142
enable_strict_datetime_parser: true,
143+
random_function_seed: false,
142144
}
143145
}
144146
}

src/query/functions/src/scalars/other.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,11 @@ pub fn register(registry: &mut FunctionRegistry) {
144144
})
145145
},
146146
|ctx| {
147-
let mut rng = rand::rngs::SmallRng::from_entropy();
147+
let mut rng = if ctx.func_ctx.random_function_seed {
148+
rand::rngs::SmallRng::seed_from_u64(1)
149+
} else {
150+
rand::rngs::SmallRng::from_entropy()
151+
};
148152
let rand_nums = (0..ctx.num_rows)
149153
.map(|_| rng.gen::<F64>())
150154
.collect::<Vec<_>>();

src/query/service/src/sessions/query_ctx.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,7 @@ impl TableContext for QueryContext {
708708
let enable_dst_hour_fix = settings.get_enable_dst_hour_fix()?;
709709
let enable_strict_datetime_parser = settings.get_enable_strict_datetime_parser()?;
710710
let query_config = &GlobalConfig::instance().query;
711+
let random_function_seed = settings.get_random_function_seed()?;
711712

712713
Ok(FunctionContext {
713714
tz,
@@ -729,6 +730,7 @@ impl TableContext for QueryContext {
729730
parse_datetime_ignore_remainder,
730731
enable_dst_hour_fix,
731732
enable_strict_datetime_parser,
733+
random_function_seed,
732734
})
733735
}
734736

src/query/settings/src/settings_default.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,12 @@ impl DefaultSettings {
824824
desc: "Format NULL as str in query api response",
825825
mode: SettingMode::Both,
826826
range: Some(SettingRange::Numeric(0..=1)),
827+
}),
828+
("random_function_seed", DefaultSettingValue {
829+
value: UserSettingValue::UInt64(0),
830+
desc: "Seed for random function",
831+
mode: SettingMode::Both,
832+
range: Some(SettingRange::Numeric(0..=1)),
827833
})
828834
]);
829835

src/query/settings/src/settings_getter_setter.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,4 +685,8 @@ impl Settings {
685685
pub fn get_max_data_retention_period_in_days() -> u64 {
686686
DefaultSettings::data_retention_time_in_days_max()
687687
}
688+
689+
pub fn get_random_function_seed(&self) -> Result<bool> {
690+
Ok(self.try_get_u64("random_function_seed")? == 1)
691+
}
688692
}

tests/sqllogictests/suites/mode/standalone/explain/table_sample.test

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,45 @@
11
statement ok
22
create or replace table t as select number as a from numbers(1000);
33

4-
## query T
5-
## explain select * from t sample row (10 rows);
6-
## ----
7-
## Filter
8-
## ├── output columns: [t.a (#0)]
9-
## ├── filters: [rand() <= 0.01]
10-
## ├── estimated rows: 200.00
11-
## └── TableScan
12-
## ├── table: default.default.t
13-
## ├── output columns: [a (#0)]
14-
## ├── read rows: 1000
15-
## ├── read size: 1.40 KiB
16-
## ├── partitions total: 1
17-
## ├── partitions scanned: 1
18-
## ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
19-
## ├── push downs: [filters: [rand() <= 0.01], limit: NONE]
20-
## └── estimated rows: 1000.00
21-
##
22-
## query T
23-
## explain select * from t sample row (99.1);
24-
## ----
25-
## Filter
26-
## ├── output columns: [t.a (#0)]
27-
## ├── filters: [rand() <= 0.991]
28-
## ├── estimated rows: 200.00
29-
## └── TableScan
30-
## ├── table: default.default.t
31-
## ├── output columns: [a (#0)]
32-
## ├── read rows: 1000
33-
## ├── read size: 1.40 KiB
34-
## ├── partitions total: 1
35-
## ├── partitions scanned: 1
36-
## ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
37-
## ├── push downs: [filters: [rand() <= 0.991], limit: NONE]
38-
## └── estimated rows: 1000.00
39-
##
4+
statement ok
5+
set random_function_seed = 1;
6+
7+
query T
8+
explain select * from t sample row (10 rows);
9+
----
10+
Filter
11+
├── output columns: [t.a (#0)]
12+
├── filters: [rand() <= 0.01]
13+
├── estimated rows: 200.00
14+
└── TableScan
15+
├── table: default.default.t
16+
├── output columns: [a (#0)]
17+
├── read rows: 1000
18+
├── read size: 1.40 KiB
19+
├── partitions total: 1
20+
├── partitions scanned: 1
21+
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
22+
├── push downs: [filters: [rand() <= 0.01], limit: NONE]
23+
└── estimated rows: 1000.00
24+
25+
query T
26+
explain select * from t sample row (99.1);
27+
----
28+
Filter
29+
├── output columns: [t.a (#0)]
30+
├── filters: [rand() <= 0.991]
31+
├── estimated rows: 200.00
32+
└── TableScan
33+
├── table: default.default.t
34+
├── output columns: [a (#0)]
35+
├── read rows: 1000
36+
├── read size: 1.40 KiB
37+
├── partitions total: 1
38+
├── partitions scanned: 1
39+
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
40+
├── push downs: [filters: [rand() <= 0.991], limit: NONE]
41+
└── estimated rows: 1000.00
42+
4043

4144
statement ok
4245
drop table t;
@@ -92,3 +95,6 @@ query
9295
select count(distinct number) < 10000000 from numbers(10000000) sample block(50);
9396
----
9497
1
98+
99+
statement ok
100+
set random_function_seed = 0;

0 commit comments

Comments
 (0)