Skip to content

Commit 2b987e7

Browse files
authored
chore: optimize q17 join order (#14624)
* chore: optimized q17 join order * add runtime filtert * fix test * fix test * fix * fix test * rebase * fix test * fix test * fix test * try to fix test * refactor
1 parent 6f47727 commit 2b987e7

File tree

18 files changed

+533
-386
lines changed

18 files changed

+533
-386
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/query/ast/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ doctest = false
1313
# Workspace dependencies
1414
databend-common-base = { path = "../../common/base" }
1515
databend-common-exception = { path = "../../common/exception" }
16+
databend-common-functions = { path = "../functions" }
1617
databend-common-io = { path = "../../common/io" }
1718
databend-common-meta-app = { path = "../../meta/app" }
1819

src/query/ast/src/ast/expr.rs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use std::fmt::Formatter;
1818
use databend_common_exception::ErrorCode;
1919
use databend_common_exception::Result;
2020
use databend_common_exception::Span;
21+
use databend_common_functions::aggregates::AggregateFunctionFactory;
2122
use databend_common_io::display_decimal_256;
2223
use databend_common_io::escape_string_with_quote;
2324
use enum_as_inner::EnumAsInner;
@@ -1346,3 +1347,90 @@ pub fn split_equivalent_predicate_expr(expr: &Expr) -> Option<(Expr, Expr)> {
13461347
_ => None,
13471348
}
13481349
}
1350+
1351+
// If contain agg function in Expr
1352+
pub fn contain_agg_func(expr: &Expr) -> bool {
1353+
match expr {
1354+
Expr::ColumnRef { .. } => false,
1355+
Expr::IsNull { expr, .. } => contain_agg_func(expr),
1356+
Expr::IsDistinctFrom { left, right, .. } => {
1357+
contain_agg_func(left) || contain_agg_func(right)
1358+
}
1359+
Expr::InList { expr, list, .. } => {
1360+
contain_agg_func(expr) || list.iter().any(contain_agg_func)
1361+
}
1362+
Expr::InSubquery { expr, .. } => contain_agg_func(expr),
1363+
Expr::Between {
1364+
expr, low, high, ..
1365+
} => contain_agg_func(expr) || contain_agg_func(low) || contain_agg_func(high),
1366+
Expr::BinaryOp { left, right, .. } => contain_agg_func(left) || contain_agg_func(right),
1367+
Expr::JsonOp { left, right, .. } => contain_agg_func(left) || contain_agg_func(right),
1368+
Expr::UnaryOp { expr, .. } => contain_agg_func(expr),
1369+
Expr::Cast { expr, .. } => contain_agg_func(expr),
1370+
Expr::TryCast { expr, .. } => contain_agg_func(expr),
1371+
Expr::Extract { expr, .. } => contain_agg_func(expr),
1372+
Expr::DatePart { expr, .. } => contain_agg_func(expr),
1373+
Expr::Position {
1374+
substr_expr,
1375+
str_expr,
1376+
..
1377+
} => contain_agg_func(substr_expr) || contain_agg_func(str_expr),
1378+
Expr::Substring {
1379+
expr,
1380+
substring_for,
1381+
substring_from,
1382+
..
1383+
} => {
1384+
if let Some(substring_for) = substring_for {
1385+
contain_agg_func(expr) || contain_agg_func(substring_for)
1386+
} else {
1387+
contain_agg_func(expr) || contain_agg_func(substring_from)
1388+
}
1389+
}
1390+
Expr::Trim { expr, .. } => contain_agg_func(expr),
1391+
Expr::Literal { .. } => false,
1392+
Expr::CountAll { .. } => false,
1393+
Expr::Tuple { exprs, .. } => exprs.iter().any(contain_agg_func),
1394+
Expr::FunctionCall { name, .. } => {
1395+
AggregateFunctionFactory::instance().contains(name.to_string())
1396+
}
1397+
Expr::Case {
1398+
operand,
1399+
conditions,
1400+
results,
1401+
else_result,
1402+
..
1403+
} => {
1404+
if let Some(operand) = operand {
1405+
if contain_agg_func(operand) {
1406+
return true;
1407+
}
1408+
}
1409+
if conditions.iter().any(contain_agg_func) {
1410+
return true;
1411+
}
1412+
if results.iter().any(contain_agg_func) {
1413+
return true;
1414+
}
1415+
if let Some(else_result) = else_result {
1416+
if contain_agg_func(else_result) {
1417+
return true;
1418+
}
1419+
}
1420+
false
1421+
}
1422+
Expr::Exists { .. } => false,
1423+
Expr::Subquery { .. } => false,
1424+
Expr::MapAccess { expr, .. } => contain_agg_func(expr),
1425+
Expr::Array { exprs, .. } => exprs.iter().any(contain_agg_func),
1426+
Expr::Map { kvs, .. } => kvs.iter().any(|(_, v)| contain_agg_func(v)),
1427+
Expr::Interval { expr, .. } => contain_agg_func(expr),
1428+
Expr::DateAdd { interval, date, .. } => {
1429+
contain_agg_func(interval) || contain_agg_func(date)
1430+
}
1431+
Expr::DateSub { interval, date, .. } => {
1432+
contain_agg_func(interval) || contain_agg_func(date)
1433+
}
1434+
Expr::DateTrunc { date, .. } => contain_agg_func(date),
1435+
}
1436+
}

src/query/sql/src/planner/binder/project.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ impl Binder {
130130
projection_index: Some(column_binding.index),
131131
data_type,
132132
outer_columns,
133+
contain_agg: None,
133134
})
134135
} else {
135136
item.scalar.clone()

src/query/sql/src/planner/optimizer/decorrelate/decorrelate.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,11 +267,25 @@ impl SubqueryRewriter {
267267
&mut right_conditions,
268268
&mut left_conditions,
269269
)?;
270+
271+
let mut join_type = JoinType::LeftSingle;
272+
if subquery.contain_agg.unwrap() {
273+
let rel_expr = RelExpr::with_s_expr(&subquery.subquery);
274+
let has_precise_cardinality = rel_expr
275+
.derive_cardinality()?
276+
.statistics
277+
.precise_cardinality
278+
.is_some();
279+
if has_precise_cardinality {
280+
join_type = JoinType::Left;
281+
}
282+
}
283+
270284
let join_plan = Join {
271285
left_conditions,
272286
right_conditions,
273287
non_equi_conditions: vec![],
274-
join_type: JoinType::LeftSingle,
288+
join_type,
275289
marker_index: None,
276290
from_correlated_subquery: true,
277291
need_hold_hash_table: false,
@@ -423,6 +437,7 @@ impl SubqueryRewriter {
423437
Box::from(column_entry.data_type()),
424438
Visibility::Visible,
425439
)
440+
.table_index(column_entry.table_index())
426441
.build(),
427442
});
428443
let derive_column = self.derived_columns.get(correlated_column).unwrap();
@@ -435,6 +450,7 @@ impl SubqueryRewriter {
435450
Box::from(column_entry.data_type()),
436451
Visibility::Visible,
437452
)
453+
.table_index(column_entry.table_index())
438454
.build(),
439455
});
440456
left_conditions.push(left_column);

src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,12 @@ impl DPhpy {
178178
}
179179
// Add join conditions
180180
for condition_pair in op.left_conditions.iter().zip(op.right_conditions.iter()) {
181+
let left_used_tables = condition_pair.0.used_tables()?;
182+
let right_used_tables = condition_pair.1.used_tables()?;
183+
if left_used_tables.is_empty() || right_used_tables.is_empty() {
184+
is_inner_join = false;
185+
break;
186+
}
181187
join_conditions.push((condition_pair.0.clone(), condition_pair.1.clone()));
182188
}
183189
if !op.non_equi_conditions.is_empty() {

src/query/sql/src/planner/plans/aggregate.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -268,12 +268,13 @@ impl Operator for Aggregate {
268268
f64::min(res, cardinality)
269269
};
270270

271-
let precise_cardinality =
272-
if self.group_items.is_empty() && self.mode == AggregateMode::Final {
273-
Some(1)
274-
} else {
275-
None
276-
};
271+
let precise_cardinality = if self.group_items.is_empty()
272+
&& matches!(self.mode, AggregateMode::Final | AggregateMode::Initial)
273+
{
274+
Some(1)
275+
} else {
276+
None
277+
};
277278
Ok(Arc::new(StatInfo {
278279
cardinality,
279280
statistics: Statistics {

src/query/sql/src/planner/plans/scalar_expr.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,8 @@ pub struct SubqueryExpr {
566566
pub(crate) data_type: Box<DataType>,
567567
#[educe(Hash(method = "hash_column_set"))]
568568
pub outer_columns: ColumnSet,
569+
// If contain aggregation function in scalar subquery output
570+
pub contain_agg: Option<bool>,
569571
}
570572

571573
impl SubqueryExpr {

src/query/sql/src/planner/semantic/type_check.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use std::collections::VecDeque;
1717
use std::sync::Arc;
1818
use std::vec;
1919

20+
use databend_common_ast::ast::contain_agg_func;
2021
use databend_common_ast::ast::BinaryOperator;
2122
use databend_common_ast::ast::ColumnID;
2223
use databend_common_ast::ast::Expr;
@@ -26,6 +27,8 @@ use databend_common_ast::ast::Lambda;
2627
use databend_common_ast::ast::Literal;
2728
use databend_common_ast::ast::MapAccessor;
2829
use databend_common_ast::ast::Query;
30+
use databend_common_ast::ast::SelectTarget;
31+
use databend_common_ast::ast::SetExpr;
2932
use databend_common_ast::ast::SubqueryModifier;
3033
use databend_common_ast::ast::TrimWhere;
3134
use databend_common_ast::ast::TypeName;
@@ -2337,6 +2340,17 @@ impl<'a> TypeChecker<'a> {
23372340
)));
23382341
}
23392342

2343+
let mut contain_agg = None;
2344+
if let SetExpr::Select(select_stmt) = &subquery.body {
2345+
if typ == SubqueryType::Scalar {
2346+
let select = &select_stmt.select_list[0];
2347+
if let SelectTarget::AliasedExpr { expr, .. } = select {
2348+
// Check if contain aggregation function
2349+
contain_agg = Some(contain_agg_func(expr));
2350+
}
2351+
}
2352+
}
2353+
23402354
let mut data_type = output_context.columns[0].data_type.clone();
23412355

23422356
let rel_expr = RelExpr::with_s_expr(&s_expr);
@@ -2362,6 +2376,7 @@ impl<'a> TypeChecker<'a> {
23622376
data_type: data_type.clone(),
23632377
typ,
23642378
outer_columns: rel_prop.outer_columns.clone(),
2379+
contain_agg,
23652380
};
23662381

23672382
let data_type = subquery_expr.data_type();
@@ -3429,6 +3444,7 @@ impl<'a> TypeChecker<'a> {
34293444
data_type: data_type.clone(),
34303445
typ: SubqueryType::Any,
34313446
outer_columns: rel_prop.outer_columns.clone(),
3447+
contain_agg: None,
34323448
};
34333449
let data_type = subquery_expr.data_type();
34343450
Ok(Box::new((subquery_expr.into(), data_type)))

tests/sqllogictests/suites/mode/standalone/explain/explain.test

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,49 +1169,49 @@ from numbers(10) where number > 5
11691169
) b on a.number=b.number order by a.number) where pt = register_at;
11701170
----
11711171
Sort
1172-
├── output columns: [register_at (#3), numbers.number (#0), pt (#1)]
1172+
├── output columns: [numbers.number (#0), pt (#1), register_at (#3)]
11731173
├── sort keys: [number ASC NULLS LAST]
11741174
├── estimated rows: 0.00
11751175
└── HashJoin
1176-
├── output columns: [register_at (#3), numbers.number (#0), pt (#1)]
1176+
├── output columns: [numbers.number (#0), pt (#1), register_at (#3)]
11771177
├── join type: INNER
1178-
├── build keys: [a.number (#0), a.pt (#1)]
1179-
├── probe keys: [b.number (#2), b.register_at (#3)]
1178+
├── build keys: [b.number (#2), b.register_at (#3)]
1179+
├── probe keys: [a.number (#0), a.pt (#1)]
11801180
├── filters: []
11811181
├── estimated rows: 0.00
11821182
├── EvalScalar(Build)
1183-
│ ├── output columns: [numbers.number (#0), pt (#1)]
1184-
│ ├── expressions: [to_yyyymmdd(to_timestamp(to_int64(numbers.number (#0))))]
1183+
│ ├── output columns: [numbers.number (#2), register_at (#3)]
1184+
│ ├── expressions: [to_yyyymmdd(to_timestamp(to_int64(numbers.number (#2))))]
11851185
│ ├── estimated rows: 0.00
11861186
│ └── Filter
1187-
│ ├── output columns: [numbers.number (#0)]
1188-
│ ├── filters: [numbers.number (#0) > 5]
1187+
│ ├── output columns: [numbers.number (#2)]
1188+
│ ├── filters: [numbers.number (#2) > 5]
11891189
│ ├── estimated rows: 0.00
11901190
│ └── TableScan
11911191
│ ├── table: default.system.numbers
1192-
│ ├── output columns: [number (#0)]
1192+
│ ├── output columns: [number (#2)]
11931193
│ ├── read rows: 10
11941194
│ ├── read bytes: 80
11951195
│ ├── partitions total: 1
11961196
│ ├── partitions scanned: 1
1197-
│ ├── push downs: [filters: [numbers.number (#0) > 5], limit: NONE]
1197+
│ ├── push downs: [filters: [numbers.number (#2) > 5], limit: NONE]
11981198
│ └── estimated rows: 10.00
11991199
└── EvalScalar(Probe)
1200-
├── output columns: [numbers.number (#2), register_at (#3)]
1201-
├── expressions: [to_yyyymmdd(to_timestamp(to_int64(numbers.number (#2))))]
1200+
├── output columns: [numbers.number (#0), pt (#1)]
1201+
├── expressions: [to_yyyymmdd(to_timestamp(to_int64(numbers.number (#0))))]
12021202
├── estimated rows: 0.00
12031203
└── Filter
1204-
├── output columns: [numbers.number (#2)]
1205-
├── filters: [numbers.number (#2) > 5]
1204+
├── output columns: [numbers.number (#0)]
1205+
├── filters: [numbers.number (#0) > 5]
12061206
├── estimated rows: 0.00
12071207
└── TableScan
12081208
├── table: default.system.numbers
1209-
├── output columns: [number (#2)]
1209+
├── output columns: [number (#0)]
12101210
├── read rows: 10
12111211
├── read bytes: 80
12121212
├── partitions total: 1
12131213
├── partitions scanned: 1
1214-
├── push downs: [filters: [numbers.number (#2) > 5], limit: NONE]
1214+
├── push downs: [filters: [numbers.number (#0) > 5], limit: NONE]
12151215
└── estimated rows: 10.00
12161216

12171217

@@ -1485,7 +1485,7 @@ FROM numbers(500);
14851485
query T
14861486
explain join SELECT customer_name, segment, (SELECT SUM(net_paid) FROM sales WHERE customer_id IN (SELECT customer_id FROM customers WHERE segment = c.segment AND active = true)) FROM customers c WHERE c.customer_id IN (SELECT customer_id FROM sales WHERE net_paid > 100) LIMIT 10;
14871487
----
1488-
HashJoin: LEFT SINGLE
1488+
HashJoin: LEFT OUTER
14891489
├── Build
14901490
│ └── HashJoin: RIGHT SEMI
14911491
│ ├── Build

0 commit comments

Comments
 (0)