-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Fix wildcard expansion for HAVING
clause
#12046
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
c443264
fdb850d
950472f
7bf37d7
ffedb96
2392092
9284e87
39aa319
020ee7d
ccc2507
ab68aa6
6cb23da
1879329
69109ad
15cb062
17ebb04
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
// under the License. | ||
|
||
use std::collections::HashSet; | ||
use std::ops::Deref; | ||
use std::sync::Arc; | ||
|
||
use crate::planner::{ | ||
|
@@ -34,9 +35,7 @@ use datafusion_expr::expr_rewriter::{ | |
normalize_col, normalize_col_with_schemas_and_ambiguity_check, normalize_cols, | ||
}; | ||
use datafusion_expr::logical_plan::tree_node::unwrap_arc; | ||
use datafusion_expr::utils::{ | ||
expr_as_column_expr, expr_to_columns, find_aggregate_exprs, find_window_exprs, | ||
}; | ||
use datafusion_expr::utils::{expr_as_column_expr, expr_to_columns, exprlist_to_fields, find_aggregate_exprs, find_window_exprs}; | ||
use datafusion_expr::{ | ||
qualified_wildcard_with_options, wildcard_with_options, Aggregate, Expr, Filter, | ||
GroupingSet, LogicalPlan, LogicalPlanBuilder, Partitioning, | ||
|
@@ -214,7 +213,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | |
|
||
let plan = if let Some(having_expr_post_aggr) = having_expr_post_aggr { | ||
LogicalPlanBuilder::from(plan) | ||
.filter(having_expr_post_aggr)? | ||
.having(having_expr_post_aggr)? | ||
.build()? | ||
} else { | ||
plan | ||
|
@@ -749,11 +748,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | |
.map(|expr| rebase_expr(expr, &aggr_projection_exprs, input)) | ||
.collect::<Result<Vec<Expr>>>()?; | ||
|
||
let wildcard_exprs = select_exprs_post_aggr.iter().filter(|expr| matches!(expr, Expr::Wildcard { .. })).collect::<Vec<_>>(); | ||
let wildcard_fields = exprlist_to_fields(wildcard_exprs, input)?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found that there are too many cases to handle, making it difficult to manage them cleanly. This could complicate the codebase. Currently, since we've expanded the wildcard for the schema, we can retrieve what we need based on the expanded schema. |
||
|
||
// finally, we have some validation that the re-written projection can be resolved | ||
// from the aggregate output columns | ||
check_columns_satisfy_exprs( | ||
&column_exprs_post_aggr, | ||
&select_exprs_post_aggr, | ||
&wildcard_fields, | ||
"Projection references non-aggregate values", | ||
)?; | ||
|
||
|
@@ -766,9 +769,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | |
check_columns_satisfy_exprs( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the function can be shared. The difference is that the check of |
||
&column_exprs_post_aggr, | ||
&[having_expr_post_aggr.clone()], | ||
&wildcard_fields, | ||
"HAVING clause references non-aggregate values", | ||
)?; | ||
|
||
Some(having_expr_post_aggr) | ||
} else { | ||
None | ||
|
@@ -778,6 +781,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | |
} | ||
} | ||
|
||
fn check_contain_scalar_only(exprs: &[Expr]) -> bool { | ||
exprs.iter().all(|expr| match expr { | ||
Expr::ScalarFunction(_) => true, | ||
Expr::Literal(_) => true, | ||
Expr::Alias(alias) => check_contain_scalar_only(&[alias.expr.deref().clone()]), | ||
_ => false, | ||
}) | ||
} | ||
|
||
// If there are any multiple-defined windows, we raise an error. | ||
fn check_conflicting_windows(window_defs: &[NamedWindowDefinition]) -> Result<()> { | ||
for (i, window_def_i) in window_defs.iter().enumerate() { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,19 +18,20 @@ | |
//! SQL Utility Functions | ||
|
||
use std::collections::HashMap; | ||
|
||
use arrow_schema::{ | ||
DataType, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE, | ||
}; | ||
use std::sync::Arc; | ||
use arrow_schema::{DataType, Field, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE}; | ||
use datafusion_common::tree_node::{ | ||
Transformed, TransformedResult, TreeNode, TreeNodeRecursion, | ||
}; | ||
use datafusion_common::{ | ||
exec_err, internal_err, plan_err, Column, DataFusionError, Result, ScalarValue, | ||
TableReference, | ||
}; | ||
use datafusion_expr::builder::get_unnested_columns; | ||
use datafusion_expr::expr::{Alias, GroupingSet, Unnest, WindowFunction}; | ||
use datafusion_expr::utils::{expr_as_column_expr, find_column_exprs}; | ||
use datafusion_expr::utils::{ | ||
expr_as_column_expr, find_column_exprs, | ||
}; | ||
use datafusion_expr::{expr_vec_fmt, Expr, ExprSchemable, LogicalPlan}; | ||
use sqlparser::ast::{Ident, Value}; | ||
|
||
|
@@ -90,6 +91,7 @@ pub(crate) fn rebase_expr( | |
pub(crate) fn check_columns_satisfy_exprs( | ||
columns: &[Expr], | ||
exprs: &[Expr], | ||
wildcard_fields: &[(Option<TableReference>, Arc<Field>)], | ||
message_prefix: &str, | ||
) -> Result<()> { | ||
columns.iter().try_for_each(|c| match c { | ||
|
@@ -119,9 +121,34 @@ pub(crate) fn check_columns_satisfy_exprs( | |
_ => check_column_satisfies_expr(columns, e, message_prefix)?, | ||
} | ||
} | ||
let column_names = columns | ||
.iter() | ||
.map(|c| format!("{}", c.schema_name())) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. eq to |
||
.collect::<Vec<_>>(); | ||
|
||
wildcard_fields.into_iter().try_for_each(|(table, field)| { | ||
let column_name = qualified_name(table, field.name()); | ||
if !column_names.iter().any(|c| c == &column_name) { | ||
This comment was marked as outdated.
Sorry, something went wrong. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think there are two valid cases In group by clause
Part of an aggregate function I think we just need to check group by expressions and both select + having expressions. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm ok. I added back the checking for the rules. I tried to add more comments to explain why we need those checks. |
||
plan_err!( | ||
"{}: Wildcard column {} could not be resolved from available columns: {}", | ||
message_prefix, | ||
column_name, | ||
expr_vec_fmt!(columns) | ||
) | ||
} else { | ||
Ok(()) | ||
} | ||
})?; | ||
Ok(()) | ||
} | ||
|
||
fn qualified_name(qualifier: &Option<TableReference>, name: &str) -> String { | ||
match qualifier { | ||
Some(q) => format!("{}.{}", q, name), | ||
None => name.to_string(), | ||
} | ||
} | ||
|
||
fn check_column_satisfies_expr( | ||
columns: &[Expr], | ||
expr: &Expr, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's important to mention the filter is used for the
HAVING
clause because the wildcard expansion behavior is different.