-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Fix wildcard expansion for HAVING
clause
#12046
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
c443264
fdb850d
950472f
7bf37d7
ffedb96
2392092
9284e87
39aa319
020ee7d
ccc2507
ab68aa6
6cb23da
1879329
69109ad
15cb062
17ebb04
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -160,13 +160,14 @@ fn replace_columns( | |
mod tests { | ||
use arrow::datatypes::{DataType, Field, Schema}; | ||
|
||
use crate::test::{assert_analyzed_plan_eq_display_indent, test_table_scan}; | ||
use crate::Analyzer; | ||
use datafusion_common::{JoinType, TableReference}; | ||
use datafusion_expr::{ | ||
col, in_subquery, qualified_wildcard, table_scan, wildcard, LogicalPlanBuilder, | ||
col, ident, in_subquery, lit, qualified_wildcard, table_scan, wildcard, | ||
LogicalPlanBuilder, | ||
}; | ||
|
||
use crate::test::{assert_analyzed_plan_eq_display_indent, test_table_scan}; | ||
use crate::Analyzer; | ||
use datafusion_functions_aggregate::expr_fn::max; | ||
|
||
use super::*; | ||
|
||
|
@@ -301,4 +302,32 @@ mod tests { | |
|
||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn plan_having_wildcard_projection() -> Result<()> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could test this kind of test in slt with |
||
let aggregate = | ||
table_scan(Some("t1"), &employee_schema(), Some(vec![0, 1, 2, 3, 4]))? | ||
.aggregate( | ||
vec![ | ||
col("t1.id"), | ||
col("t1.first_name"), | ||
col("t1.last_name"), | ||
col("t1.state"), | ||
col("t1.salary"), | ||
], | ||
vec![max(col("t1.salary"))], | ||
)? | ||
.build()?; | ||
let plan = LogicalPlanBuilder::from(aggregate) | ||
.having(ident("max(t1.salary)").gt(lit(100)))? | ||
.project(vec![wildcard()])? | ||
.build()?; | ||
|
||
let expected = "Projection: t1.id, t1.first_name, t1.last_name, t1.state, t1.salary [id:Int32, first_name:Utf8, last_name:Utf8, state:Utf8, salary:Int32]\ | ||
\n Filter: max(t1.salary) > Int32(100) [id:Int32, first_name:Utf8, last_name:Utf8, state:Utf8, salary:Int32, max(t1.salary):Int32;N]\ | ||
\n Aggregate: groupBy=[[t1.id, t1.first_name, t1.last_name, t1.state, t1.salary]], aggr=[[max(t1.salary)]] [id:Int32, first_name:Utf8, last_name:Utf8, state:Utf8, salary:Int32, max(t1.salary):Int32;N]\ | ||
\n TableScan: t1 projection=[id, first_name, last_name, state, salary] [id:Int32, first_name:Utf8, last_name:Utf8, state:Utf8, salary:Int32]"; | ||
|
||
assert_plan_eq(plan, expected) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -35,7 +35,8 @@ use datafusion_expr::expr_rewriter::{ | |||||||||||||||||||
}; | ||||||||||||||||||||
use datafusion_expr::logical_plan::tree_node::unwrap_arc; | ||||||||||||||||||||
use datafusion_expr::utils::{ | ||||||||||||||||||||
expr_as_column_expr, expr_to_columns, find_aggregate_exprs, find_window_exprs, | ||||||||||||||||||||
expand_qualified_wildcard, expand_wildcard, expr_as_column_expr, expr_to_columns, | ||||||||||||||||||||
find_aggregate_exprs, find_window_exprs, | ||||||||||||||||||||
}; | ||||||||||||||||||||
use datafusion_expr::{ | ||||||||||||||||||||
qualified_wildcard_with_options, wildcard_with_options, Aggregate, Expr, Filter, | ||||||||||||||||||||
|
@@ -214,7 +215,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | |||||||||||||||||||
|
||||||||||||||||||||
let plan = if let Some(having_expr_post_aggr) = having_expr_post_aggr { | ||||||||||||||||||||
LogicalPlanBuilder::from(plan) | ||||||||||||||||||||
.filter(having_expr_post_aggr)? | ||||||||||||||||||||
.having(having_expr_post_aggr)? | ||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's important to mention the filter is used for the |
||||||||||||||||||||
.build()? | ||||||||||||||||||||
} else { | ||||||||||||||||||||
plan | ||||||||||||||||||||
|
@@ -749,6 +750,37 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | |||||||||||||||||||
.map(|expr| rebase_expr(expr, &aggr_projection_exprs, input)) | ||||||||||||||||||||
.collect::<Result<Vec<Expr>>>()?; | ||||||||||||||||||||
|
||||||||||||||||||||
// If the having expression is present and the group by expression is not present, | ||||||||||||||||||||
// we can ensure this is an invalid query. Expand the wildcard expression here to | ||||||||||||||||||||
// get a better error message. | ||||||||||||||||||||
let select_exprs_post_aggr = if having_expr_opt.is_some() | ||||||||||||||||||||
&& group_by_exprs.is_empty() | ||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Expand the wildcard only when we ensure the query isn't valid.
This comment was marked as outdated.
Sorry, something went wrong.
This comment was marked as outdated.
Sorry, something went wrong. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of expanding wildcard, does it make sense to return the error if neither column is in group by nor is it part of the aggregate function?
In this case, if we see wildcard, we can just return the error without actually expanding the actual columns. Maybe modify There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think it makes sense for the
Instead of modifying datafusion/datafusion/sql/src/select.rs Lines 762 to 770 in a91be04
|
||||||||||||||||||||
{ | ||||||||||||||||||||
select_exprs_post_aggr | ||||||||||||||||||||
.into_iter() | ||||||||||||||||||||
.map(|expr| { | ||||||||||||||||||||
if let Expr::Wildcard { qualifier, options } = expr { | ||||||||||||||||||||
if let Some(qualifier) = qualifier { | ||||||||||||||||||||
Ok::<_, DataFusionError>(expand_qualified_wildcard( | ||||||||||||||||||||
&qualifier, | ||||||||||||||||||||
input.schema(), | ||||||||||||||||||||
Some(&options), | ||||||||||||||||||||
)?) | ||||||||||||||||||||
} else { | ||||||||||||||||||||
Ok(expand_wildcard(input.schema(), input, Some(&options))?) | ||||||||||||||||||||
} | ||||||||||||||||||||
} else { | ||||||||||||||||||||
Ok(vec![expr]) | ||||||||||||||||||||
} | ||||||||||||||||||||
}) | ||||||||||||||||||||
.collect::<Result<Vec<_>>>()? | ||||||||||||||||||||
.into_iter() | ||||||||||||||||||||
.flatten() | ||||||||||||||||||||
.collect() | ||||||||||||||||||||
} else { | ||||||||||||||||||||
select_exprs_post_aggr | ||||||||||||||||||||
}; | ||||||||||||||||||||
|
||||||||||||||||||||
// finally, we have some validation that the re-written projection can be resolved | ||||||||||||||||||||
// from the aggregate output columns | ||||||||||||||||||||
check_columns_satisfy_exprs( | ||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5643,3 +5643,24 @@ query I??III?T | |
select count(null), min(null), max(null), bit_and(NULL), bit_or(NULL), bit_xor(NULL), nth_value(NULL, 1), string_agg(NULL, ','); | ||
---- | ||
0 NULL NULL NULL NULL NULL NULL NULL | ||
|
||
statement ok | ||
create table having_test(v1 int, v2 int) | ||
|
||
statement ok | ||
insert into having_test values (1, 2), (2, 3), (3, 4) | ||
|
||
query II | ||
select * from having_test group by v1, v2 having max(v1) = 3 | ||
---- | ||
3 4 | ||
|
||
query error DataFusion error: Error during planning: Projection references non-aggregate values: Expression having_test\.v1 could not be resolved from available columns: max\(having_test\.v1\) | ||
select * from having_test having max(v1) = 3 | ||
|
||
# because v2 is not in the group by clause, the sql is invalid | ||
query error | ||
select * from having_test group by v1 having max(v1) = 3 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This error won't be detected when planning the aggregation because we won't expand the wildcard there. This error will be thrown when |
||
|
||
statement ok | ||
drop table having_test |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: try_new_with_having (?