Skip to content

Commit 2108f85

Browse files
committed
sql/planner/optimizer/cascades/mod.rs -> sql/planner/optimizer/cascades/cascade.rs
1 parent e5c4c84 commit 2108f85

File tree

2 files changed

+231
-215
lines changed

2 files changed

+231
-215
lines changed
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
// Copyright 2022 Datafuse Labs.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use std::collections::hash_map::Entry;
16+
use std::collections::HashMap;
17+
use std::sync::Arc;
18+
19+
use common_catalog::table_context::TableContext;
20+
use common_exception::ErrorCode;
21+
use common_exception::Result;
22+
use common_planner::IndexType;
23+
24+
use crate::sql::optimizer::cascades::explore_rules::get_explore_rule_set;
25+
use crate::sql::optimizer::cascades::implement_rules::get_implement_rule_set;
26+
use crate::sql::optimizer::cost::Cost;
27+
use crate::sql::optimizer::cost::CostContext;
28+
use crate::sql::optimizer::cost::CostModel;
29+
use crate::sql::optimizer::cost::DefaultCostModel;
30+
use crate::sql::optimizer::format::display_memo;
31+
use crate::sql::optimizer::m_expr::MExpr;
32+
use crate::sql::optimizer::memo::Memo;
33+
use crate::sql::optimizer::rule::RuleSet;
34+
use crate::sql::optimizer::rule::TransformState;
35+
use crate::sql::optimizer::SExpr;
36+
use crate::sql::plans::Operator;
37+
38+
/// A cascades-style search engine to enumerate possible alternations of a relational expression and
39+
/// find the optimal one.
40+
pub struct CascadesOptimizer {
41+
memo: Memo,
42+
explore_rules: RuleSet,
43+
implement_rules: RuleSet,
44+
45+
cost_model: Box<dyn CostModel>,
46+
47+
/// group index -> best cost context
48+
best_cost_map: HashMap<IndexType, CostContext>,
49+
_ctx: Arc<dyn TableContext>,
50+
}
51+
52+
impl CascadesOptimizer {
53+
pub fn create(ctx: Arc<dyn TableContext>) -> Result<Self> {
54+
let explore_rules = if ctx.get_settings().get_enable_cbo()? {
55+
get_explore_rule_set()
56+
} else {
57+
RuleSet::create_with_ids(vec![]).unwrap()
58+
};
59+
Ok(CascadesOptimizer {
60+
memo: Memo::create(),
61+
explore_rules,
62+
implement_rules: get_implement_rule_set(),
63+
cost_model: Box::new(DefaultCostModel),
64+
best_cost_map: HashMap::new(),
65+
_ctx: ctx,
66+
})
67+
}
68+
69+
fn init(&mut self, expression: SExpr) -> Result<()> {
70+
self.memo.init(expression)?;
71+
72+
Ok(())
73+
}
74+
75+
pub fn optimize(mut self, s_expr: SExpr) -> Result<SExpr> {
76+
self.init(s_expr)?;
77+
78+
let root_index = self
79+
.memo
80+
.root()
81+
.ok_or_else(|| {
82+
ErrorCode::LogicalError("Root group cannot be None after initialization")
83+
})?
84+
.group_index;
85+
86+
self.explore_group(root_index)?;
87+
88+
self.implement_group(root_index)?;
89+
90+
self.optimize_group(root_index)?;
91+
92+
tracing::debug!("Memo: \n{}", display_memo(&self.memo));
93+
94+
self.find_optimal_plan(root_index)
95+
}
96+
97+
fn explore_group(&mut self, group_index: IndexType) -> Result<()> {
98+
let group = self.memo.group(group_index)?;
99+
for m_expr in group.m_exprs.clone() {
100+
self.explore_expr(&m_expr)?;
101+
}
102+
103+
Ok(())
104+
}
105+
106+
fn explore_expr(&mut self, m_expr: &MExpr) -> Result<()> {
107+
for child in m_expr.children.iter() {
108+
self.explore_group(*child)?;
109+
}
110+
111+
let mut state = TransformState::new();
112+
for rule in self.explore_rules.iter() {
113+
m_expr.apply_rule(&self.memo, rule, &mut state)?;
114+
}
115+
self.insert_from_transform_state(m_expr.group_index, state)?;
116+
117+
Ok(())
118+
}
119+
120+
fn implement_group(&mut self, group_index: IndexType) -> Result<()> {
121+
let group = self.memo.group(group_index)?;
122+
for m_expr in group.m_exprs.clone() {
123+
self.implement_expr(&m_expr)?;
124+
}
125+
126+
Ok(())
127+
}
128+
129+
fn implement_expr(&mut self, m_expr: &MExpr) -> Result<()> {
130+
for child in m_expr.children.iter() {
131+
self.implement_group(*child)?;
132+
}
133+
134+
let mut state = TransformState::new();
135+
for rule in self.implement_rules.iter() {
136+
m_expr.apply_rule(&self.memo, rule, &mut state)?;
137+
}
138+
self.insert_from_transform_state(m_expr.group_index, state)?;
139+
140+
Ok(())
141+
}
142+
143+
fn insert_from_transform_state(
144+
&mut self,
145+
group_index: IndexType,
146+
state: TransformState,
147+
) -> Result<()> {
148+
for result in state.results() {
149+
self.insert_expression(group_index, result)?;
150+
}
151+
152+
Ok(())
153+
}
154+
155+
fn insert_expression(&mut self, group_index: IndexType, expression: &SExpr) -> Result<()> {
156+
self.memo.insert(Some(group_index), expression.clone())?;
157+
158+
Ok(())
159+
}
160+
161+
fn find_optimal_plan(&self, group_index: IndexType) -> Result<SExpr> {
162+
let group = self.memo.group(group_index)?;
163+
let cost_context = self.best_cost_map.get(&group_index).ok_or_else(|| {
164+
ErrorCode::LogicalError(format!("Cannot find CostContext of group: {group_index}"))
165+
})?;
166+
167+
let m_expr = group.m_exprs.get(cost_context.expr_index).ok_or_else(|| {
168+
ErrorCode::LogicalError(format!(
169+
"Cannot find best expression of group: {group_index}"
170+
))
171+
})?;
172+
173+
let children = m_expr
174+
.children
175+
.iter()
176+
.map(|index| self.find_optimal_plan(*index))
177+
.collect::<Result<Vec<_>>>()?;
178+
179+
let result = SExpr::create(m_expr.plan.clone(), children, None, None);
180+
181+
Ok(result)
182+
}
183+
184+
fn optimize_group(&mut self, group_index: IndexType) -> Result<()> {
185+
let group = self.memo.group(group_index)?.clone();
186+
for m_expr in group.m_exprs.iter() {
187+
if m_expr.plan.is_physical() {
188+
self.optimize_m_expr(m_expr)?;
189+
}
190+
}
191+
192+
Ok(())
193+
}
194+
195+
fn optimize_m_expr(&mut self, m_expr: &MExpr) -> Result<()> {
196+
let mut cost = Cost::from(0);
197+
for child in m_expr.children.iter() {
198+
self.optimize_group(*child)?;
199+
let cost_context = self.best_cost_map.get(child).ok_or_else(|| {
200+
ErrorCode::LogicalError(format!("Cannot find CostContext of group: {child}"))
201+
})?;
202+
203+
cost = cost + cost_context.cost;
204+
}
205+
206+
let op_cost = self.cost_model.compute_cost(&self.memo, m_expr)?;
207+
cost = cost + op_cost;
208+
209+
let cost_context = CostContext {
210+
cost,
211+
group_index: m_expr.group_index,
212+
expr_index: m_expr.index,
213+
};
214+
215+
match self.best_cost_map.entry(m_expr.group_index) {
216+
Entry::Vacant(entry) => {
217+
entry.insert(cost_context);
218+
}
219+
Entry::Occupied(mut entry) => {
220+
// Replace the cost context of the group if current context is lower
221+
if cost < entry.get().cost {
222+
entry.insert(cost_context);
223+
}
224+
}
225+
}
226+
227+
Ok(())
228+
}
229+
}

0 commit comments

Comments
 (0)