Skip to content

Commit 3d294c7

Browse files
authored
feat: support sample for table scan (#16216)
* feat: support sample for table scan * add tests * fix tests * fix tests * fix tests
1 parent 752c3b4 commit 3d294c7

File tree

29 files changed

+796
-47
lines changed

29 files changed

+796
-47
lines changed

src/query/ast/src/ast/expr.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,23 @@ pub enum Literal {
789789
Null,
790790
}
791791

792+
impl Literal {
793+
pub fn as_double(&self) -> Result<f64> {
794+
match self {
795+
Literal::UInt64(val) => Ok(*val as f64),
796+
Literal::Float64(val) => Ok(*val),
797+
Literal::Decimal256 { value, scale, .. } => {
798+
let div = 10_f64.powi(*scale as i32);
799+
Ok(value.as_f64() / div)
800+
}
801+
_ => Err(ParseError(
802+
None,
803+
format!("Cannot convert {:?} to double", self),
804+
)),
805+
}
806+
}
807+
}
808+
792809
impl Display for Literal {
793810
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
794811
match self {

src/query/ast/src/ast/format/ast_format.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3327,6 +3327,7 @@ impl<'ast> Visitor<'ast> for AstFormatVisitor {
33273327
consume,
33283328
pivot,
33293329
unpivot,
3330+
sample,
33303331
} => {
33313332
let mut name = String::new();
33323333
name.push_str("TableIdentifier ");
@@ -3354,6 +3355,11 @@ impl<'ast> Visitor<'ast> for AstFormatVisitor {
33543355
name.push_str(&unpivot.to_string());
33553356
}
33563357

3358+
if let Some(sample) = sample {
3359+
name.push(' ');
3360+
name.push_str(&sample.to_string());
3361+
}
3362+
33573363
let mut children = Vec::new();
33583364

33593365
if let Some(temporal) = temporal {

src/query/ast/src/ast/format/syntax/query.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ pub(crate) fn pretty_table(table: TableReference) -> RcDoc<'static> {
322322
consume,
323323
pivot,
324324
unpivot,
325+
sample,
325326
} => if let Some(catalog) = catalog {
326327
RcDoc::text(catalog.to_string()).append(RcDoc::text("."))
327328
} else {
@@ -353,6 +354,11 @@ pub(crate) fn pretty_table(table: TableReference) -> RcDoc<'static> {
353354
} else {
354355
RcDoc::nil()
355356
})
357+
.append(if let Some(sample) = sample {
358+
RcDoc::text(format!(" {sample}"))
359+
} else {
360+
RcDoc::nil()
361+
})
356362
.append(if let Some(alias) = alias {
357363
RcDoc::text(format!(" AS {alias}"))
358364
} else {

src/query/ast/src/ast/query.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use derive_visitor::Drive;
1919
use derive_visitor::DriveMut;
2020

2121
use super::Lambda;
22+
use super::Literal;
2223
use crate::ast::write_comma_separated_list;
2324
use crate::ast::write_dot_separated_list;
2425
use crate::ast::Expr;
@@ -608,6 +609,39 @@ impl Display for TemporalClause {
608609
}
609610
}
610611

612+
#[derive(Debug, Clone, PartialEq, Drive, DriveMut)]
613+
pub enum SampleLevel {
614+
ROW,
615+
BLOCK,
616+
}
617+
618+
#[derive(Debug, Clone, PartialEq, Drive, DriveMut)]
619+
pub enum SampleConfig {
620+
Probability(Literal),
621+
RowsNum(Literal),
622+
}
623+
624+
#[derive(Debug, Clone, PartialEq, Drive, DriveMut)]
625+
pub struct Sample {
626+
pub sample_level: SampleLevel,
627+
pub sample_conf: SampleConfig,
628+
}
629+
630+
impl Display for Sample {
631+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
632+
write!(f, "SAMPLE ")?;
633+
match self.sample_level {
634+
SampleLevel::ROW => write!(f, "ROW ")?,
635+
SampleLevel::BLOCK => write!(f, "BLOCK ")?,
636+
}
637+
match &self.sample_conf {
638+
SampleConfig::Probability(prob) => write!(f, "({})", prob)?,
639+
SampleConfig::RowsNum(rows) => write!(f, "({} ROWS)", rows)?,
640+
}
641+
Ok(())
642+
}
643+
}
644+
611645
/// A table name or a parenthesized subquery with an optional alias
612646
#[derive(Debug, Clone, PartialEq, Drive, DriveMut)]
613647
pub enum TableReference {
@@ -623,6 +657,7 @@ pub enum TableReference {
623657
consume: bool,
624658
pivot: Option<Box<Pivot>>,
625659
unpivot: Option<Box<Unpivot>>,
660+
sample: Option<Sample>,
626661
},
627662
// `TABLE(expr)[ AS alias ]`
628663
TableFunction {
@@ -697,6 +732,7 @@ impl Display for TableReference {
697732
consume,
698733
pivot,
699734
unpivot,
735+
sample,
700736
} => {
701737
write_dot_separated_list(
702738
f,
@@ -721,6 +757,10 @@ impl Display for TableReference {
721757
if let Some(unpivot) = unpivot {
722758
write!(f, " {unpivot}")?;
723759
}
760+
761+
if let Some(sample) = sample {
762+
write!(f, " {sample}")?;
763+
}
724764
}
725765
TableReference::TableFunction {
726766
span: _,

src/query/ast/src/ast/statements/merge_into.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ impl MergeSource {
218218
consume: false,
219219
pivot: None,
220220
unpivot: None,
221+
sample: None,
221222
},
222223
}
223224
}

src/query/ast/src/parser/query.rs

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,7 @@ pub enum TableReferenceElement {
685685
consume: bool,
686686
pivot: Option<Box<Pivot>>,
687687
unpivot: Option<Box<Unpivot>>,
688+
sample: Option<Sample>,
688689
},
689690
// `TABLE(expr)[ AS alias ]`
690691
TableFunction {
@@ -741,9 +742,43 @@ pub fn table_reference_element(i: Input) -> IResult<WithSpan<TableReferenceEleme
741742
);
742743
let aliased_table = map(
743744
rule! {
744-
#dot_separated_idents_1_to_3 ~ #temporal_clause? ~ (WITH ~ CONSUME)? ~ #table_alias? ~ #pivot? ~ #unpivot?
745+
#dot_separated_idents_1_to_3 ~ #temporal_clause? ~ (WITH ~ CONSUME)? ~ #table_alias? ~ #pivot? ~ #unpivot? ~ SAMPLE? ~ (ROW | BLOCK)? ~ ("(" ~ #expr ~ ROWS? ~ ")")?
745746
},
746-
|((catalog, database, table), temporal, opt_consume, alias, pivot, unpivot)| {
747+
|(
748+
(catalog, database, table),
749+
temporal,
750+
opt_consume,
751+
alias,
752+
pivot,
753+
unpivot,
754+
sample,
755+
level,
756+
sample_conf,
757+
)| {
758+
let mut table_sample = None;
759+
if sample.is_some() {
760+
let sample_level = match level {
761+
// If the sample level is not specified, it defaults to ROW
762+
Some(level) => match level.kind {
763+
ROW => SampleLevel::ROW,
764+
BLOCK => SampleLevel::BLOCK,
765+
_ => unreachable!(),
766+
},
767+
None => SampleLevel::ROW,
768+
};
769+
let mut default_sample_conf = SampleConfig::Probability(Literal::Float64(100.0));
770+
if let Some((_, Expr::Literal { value, .. }, rows, _)) = sample_conf {
771+
default_sample_conf = if rows.is_some() {
772+
SampleConfig::RowsNum(value)
773+
} else {
774+
SampleConfig::Probability(value)
775+
};
776+
}
777+
table_sample = Some(Sample {
778+
sample_level,
779+
sample_conf: default_sample_conf,
780+
})
781+
};
747782
TableReferenceElement::Table {
748783
catalog,
749784
database,
@@ -753,6 +788,7 @@ pub fn table_reference_element(i: Input) -> IResult<WithSpan<TableReferenceEleme
753788
consume: opt_consume.is_some(),
754789
pivot: pivot.map(Box::new),
755790
unpivot: unpivot.map(Box::new),
791+
sample: table_sample,
756792
}
757793
},
758794
);
@@ -864,6 +900,7 @@ impl<'a, I: Iterator<Item = WithSpan<'a, TableReferenceElement>>> PrattParser<I>
864900
consume,
865901
pivot,
866902
unpivot,
903+
sample,
867904
} => TableReference::Table {
868905
span: transform_span(input.span.tokens),
869906
catalog,
@@ -874,6 +911,7 @@ impl<'a, I: Iterator<Item = WithSpan<'a, TableReferenceElement>>> PrattParser<I>
874911
consume,
875912
pivot,
876913
unpivot,
914+
sample,
877915
},
878916
TableReferenceElement::TableFunction {
879917
lateral,

src/query/ast/src/parser/statement.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4149,6 +4149,7 @@ pub fn table_reference_with_alias(i: Input) -> IResult<TableReference> {
41494149
consume: false,
41504150
pivot: None,
41514151
unpivot: None,
4152+
sample: None,
41524153
},
41534154
)(i)
41544155
}
@@ -4168,6 +4169,7 @@ pub fn table_reference_only(i: Input) -> IResult<TableReference> {
41684169
consume: false,
41694170
pivot: None,
41704171
unpivot: None,
4172+
sample: None,
41714173
},
41724174
)(i)
41734175
}

src/query/ast/src/parser/token.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,8 @@ pub enum TokenKind {
405405
BROTLI,
406406
#[token("BZ2", ignore(ascii_case))]
407407
BZ2,
408+
#[token("BLOCK", ignore(ascii_case))]
409+
BLOCK,
408410
#[token("CALL", ignore(ascii_case))]
409411
CALL,
410412
#[token("CASE", ignore(ascii_case))]
@@ -924,6 +926,8 @@ pub enum TokenKind {
924926
RETURN_FAILED_ONLY,
925927
#[token("REVERSE", ignore(ascii_case))]
926928
REVERSE,
929+
#[token("SAMPLE", ignore(ascii_case))]
930+
SAMPLE,
927931
#[token("MERGE", ignore(ascii_case))]
928932
MERGE,
929933
#[token("MATCHED", ignore(ascii_case))]
@@ -1567,6 +1571,7 @@ impl TokenKind {
15671571
// | TokenKind::AUTHORIZATION
15681572
// | TokenKind::BINARY
15691573
| TokenKind::BOTH
1574+
| TokenKind::BLOCK
15701575
| TokenKind::CASE
15711576
| TokenKind::CAST
15721577
// | TokenKind::CHECK
@@ -1624,6 +1629,7 @@ impl TokenKind {
16241629
// | TokenKind::SIMILAR
16251630
| TokenKind::SOME
16261631
| TokenKind::SEMI
1632+
| TokenKind::SAMPLE
16271633
// | TokenKind::SYMMETRIC
16281634
// | TokenKind::TABLESAMPLE
16291635
| TokenKind::THEN

src/query/ast/tests/it/parser.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,10 @@ fn test_statement() {
229229
r#"select * FROM t where ((a));"#,
230230
r#"select * FROM t where ((select 1) > 1);"#,
231231
r#"select ((t1.a)>=(((((t2.b)<=(t3.c))) IS NOT NULL)::INTEGER));"#,
232+
r#"select * from t sample row (99);"#,
233+
r#"select * from t sample block (99);"#,
234+
r#"select * from t sample row (10 rows);"#,
235+
r#"select * from t sample block (10 rows);"#,
232236
r#"insert into t (c1, c2) values (1, 2), (3, 4);"#,
233237
r#"insert into t (c1, c2) values (1, 2);"#,
234238
r#"insert into table t select * from t2;"#,

0 commit comments

Comments
 (0)