From 284f05c3e8a6c89ec8e2926546a66439ef70341f Mon Sep 17 00:00:00 2001 From: Nishi Kantamneni Date: Tue, 2 Jul 2024 17:59:51 -0700 Subject: [PATCH 1/4] Replace println! with assert! if possible in DataFusion examples --- datafusion-examples/examples/rewrite_expr.rs | 26 +++++++++++--------- datafusion-examples/examples/sql_analysis.rs | 8 +++--- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/datafusion-examples/examples/rewrite_expr.rs b/datafusion-examples/examples/rewrite_expr.rs index 06286d5d66ed..d9780caa74a3 100644 --- a/datafusion-examples/examples/rewrite_expr.rs +++ b/datafusion-examples/examples/rewrite_expr.rs @@ -42,9 +42,11 @@ pub fn main() -> Result<()> { let context_provider = MyContextProvider::default(); let sql_to_rel = SqlToRel::new(&context_provider); let logical_plan = sql_to_rel.sql_statement_to_plan(statements[0].clone())?; - println!( - "Unoptimized Logical Plan:\n\n{}\n", - logical_plan.display_indent() + assert_eq!( + logical_plan.display_indent().to_string(), + "Projection: person.name\ + \n Filter: person.age BETWEEN Int64(21) AND Int64(32)\ + \n TableScan: person" ); // run the analyzer with our custom rule @@ -52,18 +54,20 @@ pub fn main() -> Result<()> { let analyzer = Analyzer::with_rules(vec![Arc::new(MyAnalyzerRule {})]); let analyzed_plan = analyzer.execute_and_check(logical_plan, config.options(), |_, _| {})?; - println!( - "Analyzed Logical Plan:\n\n{}\n", - analyzed_plan.display_indent() - ); + assert_eq!( + analyzed_plan.display_indent().to_string(), + "Projection: person.name\ + \n Filter: CAST(person.age AS Int64) BETWEEN Int64(21) AND Int64(32)\ + \n TableScan: person", + ); // then run the optimizer with our custom rule let optimizer = Optimizer::with_rules(vec![Arc::new(MyOptimizerRule {})]); let optimized_plan = optimizer.optimize(analyzed_plan, &config, observe)?; - println!( - "Optimized Logical Plan:\n\n{}\n", - optimized_plan.display_indent() - ); + assert_eq!( + optimized_plan.display_indent().to_string(), + "TableScan: person projection=[name], full_filters=[person.age >= UInt8(21), person.age <= UInt8(32)]" + ); Ok(()) } diff --git a/datafusion-examples/examples/sql_analysis.rs b/datafusion-examples/examples/sql_analysis.rs index 3995988751c7..3ec54abe3587 100644 --- a/datafusion-examples/examples/sql_analysis.rs +++ b/datafusion-examples/examples/sql_analysis.rs @@ -280,9 +280,11 @@ from // We can create a LogicalPlan from a SQL query like this let logical_plan = ctx.sql(tpcds_query_88).await?.into_optimized_plan()?; - println!( - "Optimized Logical Plan:\n\n{}\n", - logical_plan.display_indent() + assert_eq!( + logical_plan.display_indent().to_string(), + "Projection: person.name\ + \n Filter: person.age BETWEEN Int64(21) AND Int64(32)\ + \n TableScan: person" ); // we can get the total count (query 88 has 31 joins: 7 CROSS joins and 24 INNER joins => 40 input relations) let total_join_count = total_join_count(&logical_plan); From a662d384f0adc784822d7d0e78c1f5ba49a3833c Mon Sep 17 00:00:00 2001 From: Nishi Kantamneni Date: Wed, 3 Jul 2024 14:17:05 -0700 Subject: [PATCH 2/4] Replace println! with assert! if possible in DataFusion examples --- datafusion-examples/examples/rewrite_expr.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion-examples/examples/rewrite_expr.rs b/datafusion-examples/examples/rewrite_expr.rs index d9780caa74a3..66a21c1eddf8 100644 --- a/datafusion-examples/examples/rewrite_expr.rs +++ b/datafusion-examples/examples/rewrite_expr.rs @@ -54,12 +54,12 @@ pub fn main() -> Result<()> { let analyzer = Analyzer::with_rules(vec![Arc::new(MyAnalyzerRule {})]); let analyzed_plan = analyzer.execute_and_check(logical_plan, config.options(), |_, _| {})?; - assert_eq!( - analyzed_plan.display_indent().to_string(), - "Projection: person.name\ + assert_eq!( + analyzed_plan.display_indent().to_string(), + "Projection: person.name\ \n Filter: CAST(person.age AS Int64) BETWEEN Int64(21) AND Int64(32)\ \n TableScan: person", - ); + ); // then run the optimizer with our custom rule let optimizer = Optimizer::with_rules(vec![Arc::new(MyOptimizerRule {})]); From 36d0b144eeb1be5a4e31a872ded38c56a6078b97 Mon Sep 17 00:00:00 2001 From: Nishi <46855953+Nishi46@users.noreply.github.com> Date: Wed, 3 Jul 2024 15:07:15 -0700 Subject: [PATCH 3/4] Update rewrite_expr.rs --- datafusion-examples/examples/rewrite_expr.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion-examples/examples/rewrite_expr.rs b/datafusion-examples/examples/rewrite_expr.rs index 66a21c1eddf8..08f063bd7392 100644 --- a/datafusion-examples/examples/rewrite_expr.rs +++ b/datafusion-examples/examples/rewrite_expr.rs @@ -56,9 +56,9 @@ pub fn main() -> Result<()> { analyzer.execute_and_check(logical_plan, config.options(), |_, _| {})?; assert_eq!( analyzed_plan.display_indent().to_string(), - "Projection: person.name\ - \n Filter: CAST(person.age AS Int64) BETWEEN Int64(21) AND Int64(32)\ - \n TableScan: person", + "Projection: person.name, person.age\ + \n Filter: person.age BETWEEN Int64(21) AND Int64(32)\ + \n TableScan: person" ); // then run the optimizer with our custom rule From b6401edafb0446d9d2c7ddd361bc5ceadf2871fa Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 5 Jul 2024 08:11:28 -0400 Subject: [PATCH 4/4] port changes to other examples --- .../examples/optimizer_rule.rs | 57 +++++++++++-------- datafusion-examples/examples/sql_analysis.rs | 8 +-- 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/datafusion-examples/examples/optimizer_rule.rs b/datafusion-examples/examples/optimizer_rule.rs index 057852946341..b4663b345f64 100644 --- a/datafusion-examples/examples/optimizer_rule.rs +++ b/datafusion-examples/examples/optimizer_rule.rs @@ -19,7 +19,7 @@ use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; use arrow_schema::DataType; use datafusion::prelude::SessionContext; use datafusion_common::tree_node::{Transformed, TreeNode}; -use datafusion_common::{Result, ScalarValue}; +use datafusion_common::{assert_batches_eq, Result, ScalarValue}; use datafusion_expr::{ BinaryExpr, ColumnarValue, Expr, LogicalPlan, Operator, ScalarUDF, ScalarUDFImpl, Signature, Volatility, @@ -54,39 +54,46 @@ pub async fn main() -> Result<()> { // We can see the effect of our rewrite on the output plan that the filter // has been rewritten to `my_eq` - // - // Filter: my_eq(person.age, Int32(22)) - // TableScan: person projection=[name, age] - println!("Logical Plan:\n\n{}\n", plan.display_indent()); + assert_eq!( + plan.display_indent().to_string(), + "Filter: my_eq(person.age, Int32(22))\ + \n TableScan: person projection=[name, age]" + ); // The query below doesn't respect a filter `where age = 22` because // the plan has been rewritten using UDF which returns always true // // And the output verifies the predicates have been changed (as the my_eq // function always returns true) - // - // +--------+-----+ - // | name | age | - // +--------+-----+ - // | Andy | 11 | - // | Andrew | 22 | - // | Oleks | 33 | - // +--------+-----+ - ctx.sql(sql).await?.show().await?; + assert_batches_eq!( + [ + "+--------+-----+", + "| name | age |", + "+--------+-----+", + "| Andy | 11 |", + "| Andrew | 22 |", + "| Oleks | 33 |", + "+--------+-----+", + ], + &ctx.sql(sql).await?.collect().await? + ); // however we can see the rule doesn't trigger for queries with predicates // other than `=` - // - // +-------+-----+ - // | name | age | - // +-------+-----+ - // | Andy | 11 | - // | Oleks | 33 | - // +-------+-----+ - ctx.sql("SELECT * FROM person WHERE age <> 22") - .await? - .show() - .await?; + assert_batches_eq!( + [ + "+-------+-----+", + "| name | age |", + "+-------+-----+", + "| Andy | 11 |", + "| Oleks | 33 |", + "+-------+-----+", + ], + &ctx.sql("SELECT * FROM person WHERE age <> 22") + .await? + .collect() + .await? + ); Ok(()) } diff --git a/datafusion-examples/examples/sql_analysis.rs b/datafusion-examples/examples/sql_analysis.rs index 3ec54abe3587..3995988751c7 100644 --- a/datafusion-examples/examples/sql_analysis.rs +++ b/datafusion-examples/examples/sql_analysis.rs @@ -280,11 +280,9 @@ from // We can create a LogicalPlan from a SQL query like this let logical_plan = ctx.sql(tpcds_query_88).await?.into_optimized_plan()?; - assert_eq!( - logical_plan.display_indent().to_string(), - "Projection: person.name\ - \n Filter: person.age BETWEEN Int64(21) AND Int64(32)\ - \n TableScan: person" + println!( + "Optimized Logical Plan:\n\n{}\n", + logical_plan.display_indent() ); // we can get the total count (query 88 has 31 joins: 7 CROSS joins and 24 INNER joins => 40 input relations) let total_join_count = total_join_count(&logical_plan);