From 8f09e0ef7f6813f2e612118c76d5b1ff0ad134e0 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 23 Feb 2024 17:09:03 -0500 Subject: [PATCH 01/61] Add recursive logic, and add filter/sort transpose logic --- optd-datafusion-repr/src/rules.rs | 2 + .../src/rules/filter_pushdown.rs | 101 ++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 optd-datafusion-repr/src/rules/filter_pushdown.rs diff --git a/optd-datafusion-repr/src/rules.rs b/optd-datafusion-repr/src/rules.rs index 2e49af82..107bdf63 100644 --- a/optd-datafusion-repr/src/rules.rs +++ b/optd-datafusion-repr/src/rules.rs @@ -2,6 +2,7 @@ mod eliminate_duplicated_expr; mod eliminate_filter; mod eliminate_limit; +mod filter_pushdown; mod joins; mod macros; mod physical; @@ -12,6 +13,7 @@ pub use eliminate_duplicated_expr::{ }; pub use eliminate_filter::EliminateFilterRule; pub use eliminate_limit::EliminateLimitRule; +pub use filter_pushdown::FilterPushdownRule; pub use joins::{ EliminateJoinRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, ProjectionPullUpJoin, }; diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs new file mode 100644 index 00000000..0aba0bc6 --- /dev/null +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -0,0 +1,101 @@ +//! This rule is designed to be applied heuristically (read: all the time, blindly). +//! However, pushing a filter is not *always* better (but it usually is). If cost is +//! to be taken into account, each transposition step can be done separately +//! (and are thus all in independent functions). +//! One can even implement each of these helper functions as their own transpose rule, +//! like Calcite does. +//! +//! At a high level, filter pushdown is responsible for pushing the filter node +//! further down the query plan whenever it is possible to do so. + +use std::collections::HashMap; +use std::result; + +use optd_core::rules::{Rule, RuleMatcher}; +use optd_core::{optimizer::Optimizer, rel_node::RelNode}; + +use crate::plan_nodes::{ + ConstantType, Expr, LogicalEmptyRelation, LogicalFilter, LogicalSort, OptRelNode, OptRelNodeTyp, +}; + +use super::macros::define_rule; + +define_rule!( + FilterPushdownRule, + apply_filter_pushdown, + (Filter, child, [cond]) +); + +/// Projection and filter are commutable when TODO +fn filter_project_transpose( + child: RelNode, + cond: RelNode, +) -> Vec> { + vec![] +} + +/// Filter and sort should always be commutable. +fn filter_sort_transpose( + child: RelNode, + cond: RelNode, +) -> Vec> { + let old_sort = LogicalSort::from_rel_node(child.into()).unwrap(); + let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); + let new_filter_node = LogicalFilter::new(old_sort.child(), cond_as_expr); + // Exprs should be the same, no projections have occurred here. + let new_sort = LogicalSort::new(new_filter_node.into_plan_node(), old_sort.exprs()); + vec![new_sort.into_rel_node().as_ref().clone()] +} + +fn apply_filter_pushdown( + _optimizer: &impl Optimizer, + FilterPushdownRulePicks { child, cond }: FilterPushdownRulePicks, +) -> Vec> { + // Push filter down one node + let result_from_this_step = match child.typ { + OptRelNodeTyp::Projection => filter_project_transpose(child, cond), + OptRelNodeTyp::Filter => todo!(), // @todo filter merge rule? Should we do that here? + OptRelNodeTyp::Scan => todo!(), + OptRelNodeTyp::Join(_) => todo!(), + OptRelNodeTyp::Sort => filter_sort_transpose(child, cond), + OptRelNodeTyp::Agg => todo!(), + OptRelNodeTyp::Apply(_) => todo!(), + OptRelNodeTyp::EmptyRelation => todo!(), + OptRelNodeTyp::Limit => todo!(), + _ => vec![], + }; + + // Apply rule recursively + if let Some(new_node) = result_from_this_step.first() { + // For all the children in our result, + new_node.children = new_node + .children + .into_iter() + .map(|child| { + if child.typ == OptRelNodeTyp::Filter { + // If this node is a filter, apply the rule again to this node! + let child_as_filter = LogicalFilter::from_rel_node(child.clone()).unwrap(); + let childs_child = child_as_filter.child().into_rel_node().as_ref().clone(); + let childs_cond = child_as_filter.cond().into_rel_node().as_ref().clone(); + // TODO: make this iterative? + let result = apply_filter_pushdown( + _optimizer, + FilterPushdownRulePicks { + child: childs_child, + cond: childs_cond, + }, + ); + // If we got a result, that is the replacement for this child + if let Some(&new_child) = result.first() { + return new_child.into(); + } + } + // Otherwise, if there was no result from rule application or this is not a filter, + // return the old child unmodified. + child + }) + .collect(); + } + + result_from_this_step +} From e84a576be83cb2c50b385445f28ac028ce730345 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 23 Feb 2024 17:49:10 -0500 Subject: [PATCH 02/61] change a couple notes --- optd-datafusion-repr/src/rules/filter_pushdown.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 0aba0bc6..024b3382 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -14,9 +14,7 @@ use std::result; use optd_core::rules::{Rule, RuleMatcher}; use optd_core::{optimizer::Optimizer, rel_node::RelNode}; -use crate::plan_nodes::{ - ConstantType, Expr, LogicalEmptyRelation, LogicalFilter, LogicalSort, OptRelNode, OptRelNodeTyp, -}; +use crate::plan_nodes::{Expr, LogicalFilter, LogicalSort, OptRelNode, OptRelNodeTyp}; use super::macros::define_rule; @@ -55,7 +53,7 @@ fn apply_filter_pushdown( let result_from_this_step = match child.typ { OptRelNodeTyp::Projection => filter_project_transpose(child, cond), OptRelNodeTyp::Filter => todo!(), // @todo filter merge rule? Should we do that here? - OptRelNodeTyp::Scan => todo!(), + OptRelNodeTyp::Scan => todo!(), // @todo Why doesn't our sort node have a predicate field? OptRelNodeTyp::Join(_) => todo!(), OptRelNodeTyp::Sort => filter_sort_transpose(child, cond), OptRelNodeTyp::Agg => todo!(), @@ -77,7 +75,7 @@ fn apply_filter_pushdown( let child_as_filter = LogicalFilter::from_rel_node(child.clone()).unwrap(); let childs_child = child_as_filter.child().into_rel_node().as_ref().clone(); let childs_cond = child_as_filter.cond().into_rel_node().as_ref().clone(); - // TODO: make this iterative? + // @todo: make this iterative? let result = apply_filter_pushdown( _optimizer, FilterPushdownRulePicks { From 097029a463d25fc06d320ea4ee8a3cef4e3ccb1d Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Tue, 12 Mar 2024 16:16:09 -0400 Subject: [PATCH 03/61] Modularize projection rewriting code from ProjectionPullUpJoin rule (and fix compiler warnings --- .../src/plan_nodes/projection.rs | 86 +++++++++++++++- .../src/rules/filter_pushdown.rs | 63 ++++++------ optd-datafusion-repr/src/rules/joins.rs | 97 ++----------------- 3 files changed, 122 insertions(+), 124 deletions(-) diff --git a/optd-datafusion-repr/src/plan_nodes/projection.rs b/optd-datafusion-repr/src/plan_nodes/projection.rs index 3898733d..a2ff8dca 100644 --- a/optd-datafusion-repr/src/plan_nodes/projection.rs +++ b/optd-datafusion-repr/src/plan_nodes/projection.rs @@ -1,7 +1,9 @@ +use optd_core::rel_node::RelNode; + use super::expr::ExprList; use super::macros::define_plan_node; -use super::{OptRelNode, OptRelNodeRef, OptRelNodeTyp, PlanNode}; +use super::{ColumnRefExpr, Expr, OptRelNode, OptRelNodeRef, OptRelNodeTyp, PlanNode}; #[derive(Clone, Debug)] pub struct LogicalProjection(pub PlanNode); @@ -26,3 +28,85 @@ define_plan_node!( { 1, exprs: ExprList } ] ); + +pub struct ProjectionMapping { + forward: Vec, + _backward: Vec>, +} + +impl ProjectionMapping { + pub fn build(mapping: Vec) -> Option { + let mut backward = vec![]; + for (i, &x) in mapping.iter().enumerate() { + if x >= backward.len() { + backward.resize(x + 1, None); + } + backward[x] = Some(i); + } + Some(Self { + forward: mapping, + _backward: backward, + }) + } + + pub fn projection_col_refers_to(&self, col: usize) -> usize { + self.forward[col] + } + + pub fn _original_col_maps_to(&self, col: usize) -> Option { + self._backward[col] + } + + pub fn rewrite_condition( + &self, + cond: Expr, + left_schema_size: usize, + projection_schema_size: usize, + ) -> Expr { + if cond.typ() == OptRelNodeTyp::ColumnRef { + let col = ColumnRefExpr::from_rel_node(cond.into_rel_node()).unwrap(); + let idx = col.index(); + if idx < projection_schema_size { + let col = self.projection_col_refers_to(col.index()); + return ColumnRefExpr::new(col).into_expr(); + } else { + let col = col.index(); + return ColumnRefExpr::new(col - projection_schema_size + left_schema_size) + .into_expr(); + } + } + let expr = cond.into_rel_node(); + let mut children = Vec::with_capacity(expr.children.len()); + for child in &expr.children { + children.push( + self.rewrite_condition( + Expr::from_rel_node(child.clone()).unwrap(), + left_schema_size, + projection_schema_size, + ) + .into_rel_node(), + ); + } + + Expr::from_rel_node( + RelNode { + typ: expr.typ.clone(), + children, + data: expr.data.clone(), + } + .into(), + ) + .unwrap() + } +} + +impl LogicalProjection { + pub fn compute_column_mapping(&self) -> Option { + let mut mapping = vec![]; + for expr in self.exprs().to_vec() { + let col_expr = ColumnRefExpr::from_rel_node(expr.into_rel_node())?; + mapping.push(col_expr.index()); + } + ProjectionMapping::build(mapping) + } +} diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 024b3382..f4f2dd35 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -9,7 +9,6 @@ //! further down the query plan whenever it is possible to do so. use std::collections::HashMap; -use std::result; use optd_core::rules::{Rule, RuleMatcher}; use optd_core::{optimizer::Optimizer, rel_node::RelNode}; @@ -24,11 +23,15 @@ define_rule!( (Filter, child, [cond]) ); -/// Projection and filter are commutable when TODO +/// Datafusion only pushes filter past project when the project does not contain +/// volatile (i.e. non-deterministic) expressions that are present in the filter +/// Calcite only checks if the projection contains a windowing calculation +/// We are checking neither of those things here right now fn filter_project_transpose( - child: RelNode, - cond: RelNode, + _child: RelNode, + _cond: RelNode, ) -> Vec> { + // let old_proj = LogicalProjection::from_rel_node(child.into()).unwrap(); vec![] } @@ -50,7 +53,7 @@ fn apply_filter_pushdown( FilterPushdownRulePicks { child, cond }: FilterPushdownRulePicks, ) -> Vec> { // Push filter down one node - let result_from_this_step = match child.typ { + let mut result_from_this_step = match child.typ { OptRelNodeTyp::Projection => filter_project_transpose(child, cond), OptRelNodeTyp::Filter => todo!(), // @todo filter merge rule? Should we do that here? OptRelNodeTyp::Scan => todo!(), // @todo Why doesn't our sort node have a predicate field? @@ -58,41 +61,33 @@ fn apply_filter_pushdown( OptRelNodeTyp::Sort => filter_sort_transpose(child, cond), OptRelNodeTyp::Agg => todo!(), OptRelNodeTyp::Apply(_) => todo!(), - OptRelNodeTyp::EmptyRelation => todo!(), - OptRelNodeTyp::Limit => todo!(), _ => vec![], }; // Apply rule recursively - if let Some(new_node) = result_from_this_step.first() { + if let Some(new_node) = result_from_this_step.first_mut() { // For all the children in our result, - new_node.children = new_node - .children - .into_iter() - .map(|child| { - if child.typ == OptRelNodeTyp::Filter { - // If this node is a filter, apply the rule again to this node! - let child_as_filter = LogicalFilter::from_rel_node(child.clone()).unwrap(); - let childs_child = child_as_filter.child().into_rel_node().as_ref().clone(); - let childs_cond = child_as_filter.cond().into_rel_node().as_ref().clone(); - // @todo: make this iterative? - let result = apply_filter_pushdown( - _optimizer, - FilterPushdownRulePicks { - child: childs_child, - cond: childs_cond, - }, - ); - // If we got a result, that is the replacement for this child - if let Some(&new_child) = result.first() { - return new_child.into(); - } + for child in new_node.children.iter_mut() { + if child.typ == OptRelNodeTyp::Filter { + // If this node is a filter, apply the rule again to this node! + let child_as_filter = LogicalFilter::from_rel_node(child.clone()).unwrap(); + let childs_child = child_as_filter.child().into_rel_node().as_ref().clone(); + let childs_cond = child_as_filter.cond().into_rel_node().as_ref().clone(); + // @todo: make this iterative? + let result = apply_filter_pushdown( + _optimizer, + FilterPushdownRulePicks { + child: childs_child, + cond: childs_cond, + }, + ); + // If we got a result, that is the replacement for this child + if let Some(&new_child) = result.first().as_ref() { + *child = new_child.to_owned().into(); } - // Otherwise, if there was no result from rule application or this is not a filter, - // return the old child unmodified. - child - }) - .collect(); + } + // Otherwise, if there was no result from rule application or this is not a filter, do not modify the child + } } result_from_this_step diff --git a/optd-datafusion-repr/src/rules/joins.rs b/optd-datafusion-repr/src/rules/joins.rs index a6453e3c..2ee458c3 100644 --- a/optd-datafusion-repr/src/rules/joins.rs +++ b/optd-datafusion-repr/src/rules/joins.rs @@ -256,35 +256,6 @@ define_rule!( ) ); -struct ProjectionMapping { - forward: Vec, - _backward: Vec>, -} - -impl ProjectionMapping { - pub fn build(mapping: Vec) -> Option { - let mut backward = vec![]; - for (i, &x) in mapping.iter().enumerate() { - if x >= backward.len() { - backward.resize(x + 1, None); - } - backward[x] = Some(i); - } - Some(Self { - forward: mapping, - _backward: backward, - }) - } - - pub fn projection_col_refers_to(&self, col: usize) -> usize { - self.forward[col] - } - - pub fn _original_col_maps_to(&self, col: usize) -> Option { - self._backward[col] - } -} - fn apply_projection_pull_up_join( optimizer: &impl Optimizer, ProjectionPullUpJoinPicks { @@ -294,72 +265,21 @@ fn apply_projection_pull_up_join( cond, }: ProjectionPullUpJoinPicks, ) -> Vec> { + let left = Arc::new(left.clone()); + let right = Arc::new(right.clone()); + let list = ExprList::from_rel_node(Arc::new(list)).unwrap(); - fn compute_column_mapping(list: ExprList) -> Option { - let mut mapping = vec![]; - for expr in list.to_vec() { - let col_expr = ColumnRefExpr::from_rel_node(expr.into_rel_node())?; - mapping.push(col_expr.index()); - } - ProjectionMapping::build(mapping) - } + let projection = LogicalProjection::new(PlanNode::from_group(left.clone()), list.clone()); - let Some(mapping) = compute_column_mapping(list.clone()) else { + let Some(mapping) = projection.compute_column_mapping() else { return vec![]; }; - fn rewrite_condition( - cond: Expr, - mapping: &ProjectionMapping, - left_schema_size: usize, - projection_schema_size: usize, - ) -> Expr { - if cond.typ() == OptRelNodeTyp::ColumnRef { - let col = ColumnRefExpr::from_rel_node(cond.into_rel_node()).unwrap(); - let idx = col.index(); - if idx < projection_schema_size { - let col = mapping.projection_col_refers_to(col.index()); - return ColumnRefExpr::new(col).into_expr(); - } else { - let col = col.index(); - return ColumnRefExpr::new(col - projection_schema_size + left_schema_size) - .into_expr(); - } - } - let expr = cond.into_rel_node(); - let mut children = Vec::with_capacity(expr.children.len()); - for child in &expr.children { - children.push( - rewrite_condition( - Expr::from_rel_node(child.clone()).unwrap(), - mapping, - left_schema_size, - projection_schema_size, - ) - .into_rel_node(), - ); - } - - Expr::from_rel_node( - RelNode { - typ: expr.typ.clone(), - children, - data: expr.data.clone(), - } - .into(), - ) - .unwrap() - } - - let left = Arc::new(left.clone()); - let right = Arc::new(right.clone()); - // TODO(chi): support capture projection node. - let projection = - LogicalProjection::new(PlanNode::from_group(left.clone()), list.clone()).into_rel_node(); let left_schema = optimizer.get_property::(left.clone(), 0); - let projection_schema = optimizer.get_property::(projection.clone(), 0); + let projection_schema = + optimizer.get_property::(projection.into_rel_node().clone(), 0); let right_schema = optimizer.get_property::(right.clone(), 0); let mut new_projection_exprs = list.to_vec(); for i in 0..right_schema.len() { @@ -370,9 +290,8 @@ fn apply_projection_pull_up_join( LogicalJoin::new( PlanNode::from_group(left), PlanNode::from_group(right), - rewrite_condition( + mapping.rewrite_condition( Expr::from_rel_node(Arc::new(cond)).unwrap(), - &mapping, left_schema.len(), projection_schema.len(), ), From b5e881fa6b5643a22ca0ba137f4f17b78e0f76ec Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 17 Mar 2024 00:14:42 -0400 Subject: [PATCH 04/61] Hardcoded filter/sort test --- .../src/plan_nodes/projection.rs | 9 +-- .../src/rules/filter_pushdown.rs | 58 ++++++++++++++++--- 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/optd-datafusion-repr/src/plan_nodes/projection.rs b/optd-datafusion-repr/src/plan_nodes/projection.rs index a2ff8dca..461efc7c 100644 --- a/optd-datafusion-repr/src/plan_nodes/projection.rs +++ b/optd-datafusion-repr/src/plan_nodes/projection.rs @@ -57,10 +57,12 @@ impl ProjectionMapping { self._backward[col] } + /// Recursively rewrites all ColumnRefs in an Expr to what the projection + /// node is rewriting. E.g. if Projection is A -> B, B will be rewritten as A pub fn rewrite_condition( &self, cond: Expr, - left_schema_size: usize, + schema_size: usize, projection_schema_size: usize, ) -> Expr { if cond.typ() == OptRelNodeTyp::ColumnRef { @@ -71,8 +73,7 @@ impl ProjectionMapping { return ColumnRefExpr::new(col).into_expr(); } else { let col = col.index(); - return ColumnRefExpr::new(col - projection_schema_size + left_schema_size) - .into_expr(); + return ColumnRefExpr::new(col - projection_schema_size + schema_size).into_expr(); } } let expr = cond.into_rel_node(); @@ -81,7 +82,7 @@ impl ProjectionMapping { children.push( self.rewrite_condition( Expr::from_rel_node(child.clone()).unwrap(), - left_schema_size, + schema_size, projection_schema_size, ) .into_rel_node(), diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index f4f2dd35..ceaaf796 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -13,7 +13,9 @@ use std::collections::HashMap; use optd_core::rules::{Rule, RuleMatcher}; use optd_core::{optimizer::Optimizer, rel_node::RelNode}; -use crate::plan_nodes::{Expr, LogicalFilter, LogicalSort, OptRelNode, OptRelNodeTyp}; +use crate::plan_nodes::{ + Expr, LogicalFilter, LogicalProjection, LogicalSort, OptRelNode, OptRelNodeTyp, +}; use super::macros::define_rule; @@ -26,12 +28,11 @@ define_rule!( /// Datafusion only pushes filter past project when the project does not contain /// volatile (i.e. non-deterministic) expressions that are present in the filter /// Calcite only checks if the projection contains a windowing calculation -/// We are checking neither of those things here right now fn filter_project_transpose( - _child: RelNode, - _cond: RelNode, + child: RelNode, + cond: RelNode, ) -> Vec> { - // let old_proj = LogicalProjection::from_rel_node(child.into()).unwrap(); + let old_proj = LogicalProjection::from_rel_node(child.into()).unwrap(); vec![] } @@ -56,11 +57,9 @@ fn apply_filter_pushdown( let mut result_from_this_step = match child.typ { OptRelNodeTyp::Projection => filter_project_transpose(child, cond), OptRelNodeTyp::Filter => todo!(), // @todo filter merge rule? Should we do that here? - OptRelNodeTyp::Scan => todo!(), // @todo Why doesn't our sort node have a predicate field? + // OptRelNodeTyp::Scan => todo!(), // TODO: Add predicate field to scan node OptRelNodeTyp::Join(_) => todo!(), OptRelNodeTyp::Sort => filter_sort_transpose(child, cond), - OptRelNodeTyp::Agg => todo!(), - OptRelNodeTyp::Apply(_) => todo!(), _ => vec![], }; @@ -92,3 +91,46 @@ fn apply_filter_pushdown( result_from_this_step } + +#[cfg(test)] +mod tests { + use std::{any::Any, sync::Arc}; + + use optd_core::heuristics::{ApplyOrder, HeuristicsOptimizer}; + + use crate::plan_nodes::{ + BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogicalFilter, LogicalScan, + LogicalSort, OptRelNode, OptRelNodeTyp, + }; + + use super::apply_filter_pushdown; + + #[test] + fn filter_before_sort() { + let dummy_optimizer = HeuristicsOptimizer::new_with_rules(vec![], ApplyOrder::TopDown); + + let scan = LogicalScan::new("".into()); + let sort = LogicalSort::new(scan.into_plan_node(), ExprList::new(vec![])); + + let filter_expr = BinOpExpr::new( + ColumnRefExpr::new(0).into_expr(), + ConstantExpr::int32(5).into_expr(), + BinOpType::Eq, + ) + .into_expr(); + let filter = LogicalFilter::new(sort.clone().into_plan_node(), filter_expr.clone()); + + let plan = apply_filter_pushdown( + &dummy_optimizer, + super::FilterPushdownRulePicks { + child: Arc::unwrap_or_clone(sort.into_rel_node()), + cond: Arc::unwrap_or_clone(filter_expr.into_rel_node()), + }, + ); + + let plan = plan.first().unwrap(); + + assert!(matches!(plan.typ, OptRelNodeTyp::Sort)); + assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); + } +} From 67e963236152c31653417770aacc4aa76d6cd91f Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 17 Mar 2024 02:49:56 -0400 Subject: [PATCH 05/61] push past projection w/ broken test --- .../src/rules/filter_pushdown.rs | 69 ++++++++++++++++--- 1 file changed, 59 insertions(+), 10 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index ceaaf796..08b49199 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -16,6 +16,7 @@ use optd_core::{optimizer::Optimizer, rel_node::RelNode}; use crate::plan_nodes::{ Expr, LogicalFilter, LogicalProjection, LogicalSort, OptRelNode, OptRelNodeTyp, }; +use crate::properties::schema::SchemaPropertyBuilder; use super::macros::define_rule; @@ -29,15 +30,35 @@ define_rule!( /// volatile (i.e. non-deterministic) expressions that are present in the filter /// Calcite only checks if the projection contains a windowing calculation fn filter_project_transpose( + optimizer: &impl Optimizer, child: RelNode, cond: RelNode, ) -> Vec> { let old_proj = LogicalProjection::from_rel_node(child.into()).unwrap(); - vec![] + let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); + + let projection_schema_len = optimizer + .get_property::(old_proj.clone().into_rel_node(), 0) + .len(); + let child_schema_len = optimizer + .get_property::(old_proj.clone().into_rel_node(), 0) + .len(); + + let proj_col_map = old_proj.compute_column_mapping().unwrap(); + proj_col_map.rewrite_condition( + cond_as_expr.clone(), + projection_schema_len, + child_schema_len, + ); + + let new_filter_node = LogicalFilter::new(old_proj.child(), cond_as_expr); + let new_proj = LogicalProjection::new(new_filter_node.into_plan_node(), old_proj.exprs()); + vec![new_proj.into_rel_node().as_ref().clone()] } /// Filter and sort should always be commutable. fn filter_sort_transpose( + _optimizer: &impl Optimizer, child: RelNode, cond: RelNode, ) -> Vec> { @@ -50,16 +71,16 @@ fn filter_sort_transpose( } fn apply_filter_pushdown( - _optimizer: &impl Optimizer, + optimizer: &impl Optimizer, FilterPushdownRulePicks { child, cond }: FilterPushdownRulePicks, ) -> Vec> { // Push filter down one node let mut result_from_this_step = match child.typ { - OptRelNodeTyp::Projection => filter_project_transpose(child, cond), + OptRelNodeTyp::Projection => filter_project_transpose(optimizer, child, cond), OptRelNodeTyp::Filter => todo!(), // @todo filter merge rule? Should we do that here? // OptRelNodeTyp::Scan => todo!(), // TODO: Add predicate field to scan node OptRelNodeTyp::Join(_) => todo!(), - OptRelNodeTyp::Sort => filter_sort_transpose(child, cond), + OptRelNodeTyp::Sort => filter_sort_transpose(optimizer, child, cond), _ => vec![], }; @@ -74,7 +95,7 @@ fn apply_filter_pushdown( let childs_cond = child_as_filter.cond().into_rel_node().as_ref().clone(); // @todo: make this iterative? let result = apply_filter_pushdown( - _optimizer, + optimizer, FilterPushdownRulePicks { child: childs_child, cond: childs_cond, @@ -94,19 +115,19 @@ fn apply_filter_pushdown( #[cfg(test)] mod tests { - use std::{any::Any, sync::Arc}; + use std::sync::Arc; use optd_core::heuristics::{ApplyOrder, HeuristicsOptimizer}; use crate::plan_nodes::{ - BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogicalFilter, LogicalScan, - LogicalSort, OptRelNode, OptRelNodeTyp, + BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogicalProjection, + LogicalScan, LogicalSort, OptRelNode, OptRelNodeTyp, }; use super::apply_filter_pushdown; #[test] - fn filter_before_sort() { + fn push_past_sort() { let dummy_optimizer = HeuristicsOptimizer::new_with_rules(vec![], ApplyOrder::TopDown); let scan = LogicalScan::new("".into()); @@ -118,7 +139,6 @@ mod tests { BinOpType::Eq, ) .into_expr(); - let filter = LogicalFilter::new(sort.clone().into_plan_node(), filter_expr.clone()); let plan = apply_filter_pushdown( &dummy_optimizer, @@ -133,4 +153,33 @@ mod tests { assert!(matches!(plan.typ, OptRelNodeTyp::Sort)); assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); } + + #[test] + fn push_past_proj_basic() { + // TODO: write advanced proj with more complex exprs + let dummy_optimizer = HeuristicsOptimizer::new_with_rules(vec![], ApplyOrder::TopDown); + + let scan = LogicalScan::new("".into()); + let proj = LogicalProjection::new(scan.into_plan_node(), ExprList::new(vec![])); + + let filter_expr = BinOpExpr::new( + ColumnRefExpr::new(0).into_expr(), + ConstantExpr::int32(5).into_expr(), + BinOpType::Eq, + ) + .into_expr(); + + let plan = apply_filter_pushdown( + &dummy_optimizer, + super::FilterPushdownRulePicks { + child: Arc::unwrap_or_clone(proj.into_rel_node()), + cond: Arc::unwrap_or_clone(filter_expr.into_rel_node()), + }, + ); + + let plan = plan.first().unwrap(); + + assert!(matches!(plan.typ, OptRelNodeTyp::Projection)); + assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); + } } From 330cdac20afadb35319bbf9ff58a1c5ce00fb1fb Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 17 Mar 2024 14:03:10 -0400 Subject: [PATCH 06/61] Implement filter merge --- .../src/rules/filter_pushdown.rs | 102 +++++++++++++++++- 1 file changed, 97 insertions(+), 5 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 08b49199..25d4f997 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -14,7 +14,8 @@ use optd_core::rules::{Rule, RuleMatcher}; use optd_core::{optimizer::Optimizer, rel_node::RelNode}; use crate::plan_nodes::{ - Expr, LogicalFilter, LogicalProjection, LogicalSort, OptRelNode, OptRelNodeTyp, + BinOpExpr, BinOpType, Expr, LogicalFilter, LogicalProjection, LogicalSort, OptRelNode, + OptRelNodeTyp, }; use crate::properties::schema::SchemaPropertyBuilder; @@ -26,9 +27,23 @@ define_rule!( (Filter, child, [cond]) ); +fn filter_merge( + _optimizer: &impl Optimizer, + child: RelNode, + cond: RelNode, +) -> Vec> { + let child_filter = LogicalFilter::from_rel_node(child.into()).unwrap(); + let child_filter_cond = child_filter.cond().clone(); + let curr_cond = Expr::from_rel_node(cond.into()).unwrap(); + let merged_cond = BinOpExpr::new(curr_cond, child_filter_cond, BinOpType::And).into_expr(); + let new_filter = LogicalFilter::new(child_filter.child(), merged_cond); + vec![new_filter.into_rel_node().as_ref().clone()] +} + /// Datafusion only pushes filter past project when the project does not contain /// volatile (i.e. non-deterministic) expressions that are present in the filter /// Calcite only checks if the projection contains a windowing calculation +/// We check neither of those things and do it always (which may be wrong) fn filter_project_transpose( optimizer: &impl Optimizer, child: RelNode, @@ -37,6 +52,7 @@ fn filter_project_transpose( let old_proj = LogicalProjection::from_rel_node(child.into()).unwrap(); let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); + // TODO: Implement get_property in heuristics optimizer let projection_schema_len = optimizer .get_property::(old_proj.clone().into_rel_node(), 0) .len(); @@ -77,7 +93,7 @@ fn apply_filter_pushdown( // Push filter down one node let mut result_from_this_step = match child.typ { OptRelNodeTyp::Projection => filter_project_transpose(optimizer, child, cond), - OptRelNodeTyp::Filter => todo!(), // @todo filter merge rule? Should we do that here? + OptRelNodeTyp::Filter => filter_merge(optimizer, child, cond), // OptRelNodeTyp::Scan => todo!(), // TODO: Add predicate field to scan node OptRelNodeTyp::Join(_) => todo!(), OptRelNodeTyp::Sort => filter_sort_transpose(optimizer, child, cond), @@ -117,11 +133,12 @@ fn apply_filter_pushdown( mod tests { use std::sync::Arc; + use datafusion::arrow::compute::kernels::filter; use optd_core::heuristics::{ApplyOrder, HeuristicsOptimizer}; use crate::plan_nodes::{ - BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogicalProjection, - LogicalScan, LogicalSort, OptRelNode, OptRelNodeTyp, + BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogicalFilter, + LogicalProjection, LogicalScan, LogicalSort, OptRelNode, OptRelNodeTyp, }; use super::apply_filter_pushdown; @@ -154,9 +171,84 @@ mod tests { assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); } + #[test] + fn filter_merge() { + // TODO: write advanced proj with more expr that need to be transformed + let dummy_optimizer = HeuristicsOptimizer::new_with_rules(vec![], ApplyOrder::TopDown); + + let scan = LogicalScan::new("".into()); + let filter_ch_expr = BinOpExpr::new( + ColumnRefExpr::new(0).into_expr(), + ConstantExpr::int32(1).into_expr(), + BinOpType::Eq, + ) + .into_expr(); + let filter_ch = LogicalFilter::new(scan.into_plan_node(), filter_ch_expr); + + let filter_expr = BinOpExpr::new( + ColumnRefExpr::new(1).into_expr(), + ConstantExpr::int32(6).into_expr(), + BinOpType::Eq, + ) + .into_expr(); + + let plan = apply_filter_pushdown( + &dummy_optimizer, + super::FilterPushdownRulePicks { + child: Arc::unwrap_or_clone(filter_ch.into_rel_node()), + cond: Arc::unwrap_or_clone(filter_expr.into_rel_node()), + }, + ); + + let plan = plan.first().unwrap(); + + assert!(matches!(plan.typ, OptRelNodeTyp::Filter)); + let bin_op_cond = BinOpExpr::from_rel_node( + LogicalFilter::from_rel_node((plan.clone()).into()) + .unwrap() + .cond() + .into_rel_node(), + ) + .unwrap(); + assert!(matches!(bin_op_cond.op_type(), BinOpType::And)); + let bin_op_left = + BinOpExpr::from_rel_node(bin_op_cond.left_child().into_rel_node()).unwrap(); + assert!(matches!(bin_op_left.op_type(), BinOpType::Eq)); + assert_eq!( + ColumnRefExpr::from_rel_node(bin_op_left.left_child().into_rel_node()) + .unwrap() + .index(), + 1 + ); + assert_eq!( + ConstantExpr::from_rel_node(bin_op_left.right_child().into_rel_node()) + .unwrap() + .value() + .as_i32(), + 6 + ); + let bin_op_right = + BinOpExpr::from_rel_node(bin_op_cond.right_child().into_rel_node()).unwrap(); + assert!(matches!(bin_op_right.op_type(), BinOpType::Eq)); + assert_eq!( + ColumnRefExpr::from_rel_node(bin_op_right.left_child().into_rel_node()) + .unwrap() + .index(), + 0 + ); + assert_eq!( + ConstantExpr::from_rel_node(bin_op_right.right_child().into_rel_node()) + .unwrap() + .value() + .as_i32(), + 1 + ); + assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Scan)); + } + #[test] fn push_past_proj_basic() { - // TODO: write advanced proj with more complex exprs + // TODO: write advanced proj with more expr that need to be transformed let dummy_optimizer = HeuristicsOptimizer::new_with_rules(vec![], ApplyOrder::TopDown); let scan = LogicalScan::new("".into()); From 64d55728f9993e947472928b1a605b8317f019d3 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 17 Mar 2024 20:54:07 -0400 Subject: [PATCH 07/61] More prog on join --- optd-datafusion-repr/src/plan_nodes/join.rs | 21 +++++++++ .../src/rules/filter_pushdown.rs | 47 +++++++++++++------ optd-datafusion-repr/src/rules/joins.rs | 1 - 3 files changed, 54 insertions(+), 15 deletions(-) diff --git a/optd-datafusion-repr/src/plan_nodes/join.rs b/optd-datafusion-repr/src/plan_nodes/join.rs index c2dfc710..c1d36c6d 100644 --- a/optd-datafusion-repr/src/plan_nodes/join.rs +++ b/optd-datafusion-repr/src/plan_nodes/join.rs @@ -62,3 +62,24 @@ define_plan_node!( { 3, right_keys: ExprList } ], { join_type: JoinType } ); + +pub enum MappedColRef { + Left(usize), + Right(usize), +} + +impl LogicalJoin { + pub fn map_through_join( + &self, + index: usize, + left_schema_size: usize, + right_schema_size: usize, + ) -> MappedColRef { + assert!(index < left_schema_size + right_schema_size); + if (index < left_schema_size) { + MappedColRef::Left(index) + } else { + MappedColRef::Right(index - left_schema_size) + } + } +} diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 25d4f997..5b9dc6fb 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -14,8 +14,8 @@ use optd_core::rules::{Rule, RuleMatcher}; use optd_core::{optimizer::Optimizer, rel_node::RelNode}; use crate::plan_nodes::{ - BinOpExpr, BinOpType, Expr, LogicalFilter, LogicalProjection, LogicalSort, OptRelNode, - OptRelNodeTyp, + BinOpExpr, BinOpType, Expr, LogicalFilter, LogicalJoin, LogicalProjection, LogicalSort, + OptRelNode, OptRelNodeTyp, }; use crate::properties::schema::SchemaPropertyBuilder; @@ -27,17 +27,8 @@ define_rule!( (Filter, child, [cond]) ); -fn filter_merge( - _optimizer: &impl Optimizer, - child: RelNode, - cond: RelNode, -) -> Vec> { - let child_filter = LogicalFilter::from_rel_node(child.into()).unwrap(); - let child_filter_cond = child_filter.cond().clone(); - let curr_cond = Expr::from_rel_node(cond.into()).unwrap(); - let merged_cond = BinOpExpr::new(curr_cond, child_filter_cond, BinOpType::And).into_expr(); - let new_filter = LogicalFilter::new(child_filter.child(), merged_cond); - vec![new_filter.into_rel_node().as_ref().clone()] +fn merge_conds(first: Expr, second: Expr) -> Expr { + BinOpExpr::new(first, second, BinOpType::And).into_expr() } /// Datafusion only pushes filter past project when the project does not contain @@ -72,6 +63,34 @@ fn filter_project_transpose( vec![new_proj.into_rel_node().as_ref().clone()] } +fn filter_merge( + _optimizer: &impl Optimizer, + child: RelNode, + cond: RelNode, +) -> Vec> { + let child_filter = LogicalFilter::from_rel_node(child.into()).unwrap(); + let child_filter_cond = child_filter.cond().clone(); + let curr_cond = Expr::from_rel_node(cond.into()).unwrap(); + let merged_cond = merge_conds(curr_cond, child_filter_cond); + let new_filter = LogicalFilter::new(child_filter.child(), merged_cond); + vec![new_filter.into_rel_node().as_ref().clone()] +} + +/// Cases: +/// - Push down to the left child (only involves keys from the left child) +/// - Push down to the right child (only involves keys from the right child) +/// - Push into the join condition (involves keys from both children) +fn filter_join_transpose( + _optimizer: &impl Optimizer, + child: RelNode, + cond: RelNode, +) -> Vec> { + let _old_join = LogicalJoin::from_rel_node(child.into()).unwrap(); + let _cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); + + vec![] +} + /// Filter and sort should always be commutable. fn filter_sort_transpose( _optimizer: &impl Optimizer, @@ -95,7 +114,7 @@ fn apply_filter_pushdown( OptRelNodeTyp::Projection => filter_project_transpose(optimizer, child, cond), OptRelNodeTyp::Filter => filter_merge(optimizer, child, cond), // OptRelNodeTyp::Scan => todo!(), // TODO: Add predicate field to scan node - OptRelNodeTyp::Join(_) => todo!(), + OptRelNodeTyp::Join(_) => filter_join_transpose(optimizer, child, cond), OptRelNodeTyp::Sort => filter_sort_transpose(optimizer, child, cond), _ => vec![], }; diff --git a/optd-datafusion-repr/src/rules/joins.rs b/optd-datafusion-repr/src/rules/joins.rs index 2ee458c3..224dbc91 100644 --- a/optd-datafusion-repr/src/rules/joins.rs +++ b/optd-datafusion-repr/src/rules/joins.rs @@ -88,7 +88,6 @@ define_rule!( /// Eliminate logical join with constant predicates /// True predicates becomes CrossJoin (not yet implemented) -/// False predicates become EmptyRelation (not yet implemented) #[allow(unused_variables)] fn apply_eliminate_join( optimizer: &impl Optimizer, From bc100ed6dcc999b66470ce0cba2de6a2b1f32bbf Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 17 Mar 2024 21:44:35 -0400 Subject: [PATCH 08/61] Publicize flattened_nested_logical --- optd-datafusion-bridge/src/into_optd.rs | 32 ++------ optd-datafusion-repr/src/plan_nodes/expr.rs | 23 ++++++ optd-datafusion-repr/src/plan_nodes/join.rs | 2 +- .../src/rules/filter_pushdown.rs | 78 +++++++++---------- 4 files changed, 68 insertions(+), 67 deletions(-) diff --git a/optd-datafusion-bridge/src/into_optd.rs b/optd-datafusion-bridge/src/into_optd.rs index 96e1c779..59a8e3f0 100644 --- a/optd-datafusion-bridge/src/into_optd.rs +++ b/optd-datafusion-bridge/src/into_optd.rs @@ -15,28 +15,6 @@ use optd_datafusion_repr::plan_nodes::{ use crate::OptdPlanContext; -// flatten_nested_logical is a helper function to flatten nested logical operators with same op type -// eg. (a AND (b AND c)) => ExprList([a, b, c]) -// (a OR (b OR c)) => ExprList([a, b, c]) -// It assume the children of the input expr_list are already flattened -// and can only be used in bottom up manner -fn flatten_nested_logical(op: LogOpType, expr_list: ExprList) -> ExprList { - // conv_into_optd_expr is building the children bottom up so there is no need to - // call flatten_nested_logical recursively - let mut new_expr_list = Vec::new(); - for child in expr_list.to_vec() { - if let OptRelNodeTyp::LogOp(child_op) = child.typ() { - if child_op == op { - let child_log_op_expr = LogOpExpr::from_rel_node(child.into_rel_node()).unwrap(); - new_expr_list.extend(child_log_op_expr.children().to_vec()); - continue; - } - } - new_expr_list.push(child.clone()); - } - ExprList::new(new_expr_list) -} - impl OptdPlanContext<'_> { fn conv_into_optd_table_scan(&mut self, node: &logical_plan::TableScan) -> Result { let table_name = node.table_name.to_string(); @@ -73,14 +51,16 @@ impl OptdPlanContext<'_> { Operator::And => { let op = LogOpType::And; let expr_list = ExprList::new(vec![left, right]); - let expr_list = flatten_nested_logical(op, expr_list); - return Ok(LogOpExpr::new(op, expr_list).into_expr()); + return Ok( + LogOpExpr::new_flattened_nested_logical(op, expr_list).into_expr() + ); } Operator::Or => { let op = LogOpType::Or; let expr_list = ExprList::new(vec![left, right]); - let expr_list = flatten_nested_logical(op, expr_list); - return Ok(LogOpExpr::new(op, expr_list).into_expr()); + return Ok( + LogOpExpr::new_flattened_nested_logical(op, expr_list).into_expr() + ); } _ => {} } diff --git a/optd-datafusion-repr/src/plan_nodes/expr.rs b/optd-datafusion-repr/src/plan_nodes/expr.rs index d8f54a07..5de15206 100644 --- a/optd-datafusion-repr/src/plan_nodes/expr.rs +++ b/optd-datafusion-repr/src/plan_nodes/expr.rs @@ -595,6 +595,29 @@ impl LogOpExpr { )) } + /// flatten_nested_logical is a helper function to flatten nested logical operators with same op type + /// eg. (a AND (b AND c)) => ExprList([a, b, c]) + /// (a OR (b OR c)) => ExprList([a, b, c]) + /// It assume the children of the input expr_list are already flattened + /// and can only be used in bottom up manner + pub fn new_flattened_nested_logical(op: LogOpType, expr_list: ExprList) -> Self { + // Since we assume that we are building the children bottom up, + // there is no need to call flatten_nested_logical recursively + let mut new_expr_list = Vec::new(); + for child in expr_list.to_vec() { + if let OptRelNodeTyp::LogOp(child_op) = child.typ() { + if child_op == op { + let child_log_op_expr = + LogOpExpr::from_rel_node(child.into_rel_node()).unwrap(); + new_expr_list.extend(child_log_op_expr.children().to_vec()); + continue; + } + } + new_expr_list.push(child.clone()); + } + LogOpExpr::new(op, ExprList::new(new_expr_list)) + } + pub fn children(&self) -> Vec { self.0 .0 diff --git a/optd-datafusion-repr/src/plan_nodes/join.rs b/optd-datafusion-repr/src/plan_nodes/join.rs index c1d36c6d..79f6237c 100644 --- a/optd-datafusion-repr/src/plan_nodes/join.rs +++ b/optd-datafusion-repr/src/plan_nodes/join.rs @@ -76,7 +76,7 @@ impl LogicalJoin { right_schema_size: usize, ) -> MappedColRef { assert!(index < left_schema_size + right_schema_size); - if (index < left_schema_size) { + if index < left_schema_size { MappedColRef::Left(index) } else { MappedColRef::Right(index - left_schema_size) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 5b9dc6fb..a898429c 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -12,10 +12,11 @@ use std::collections::HashMap; use optd_core::rules::{Rule, RuleMatcher}; use optd_core::{optimizer::Optimizer, rel_node::RelNode}; +use tracing_subscriber::filter::combinator::And; use crate::plan_nodes::{ - BinOpExpr, BinOpType, Expr, LogicalFilter, LogicalJoin, LogicalProjection, LogicalSort, - OptRelNode, OptRelNodeTyp, + BinOpExpr, BinOpType, Expr, ExprList, LogOpExpr, LogOpType, LogicalFilter, LogicalJoin, + LogicalProjection, LogicalSort, OptRelNode, OptRelNodeTyp, }; use crate::properties::schema::SchemaPropertyBuilder; @@ -28,7 +29,9 @@ define_rule!( ); fn merge_conds(first: Expr, second: Expr) -> Expr { - BinOpExpr::new(first, second, BinOpType::And).into_expr() + let new_expr_list = ExprList::new(vec![first, second]); + // Flatten nested logical expressions if possible + LogOpExpr::new_flattened_nested_logical(LogOpType::And, new_expr_list).into_expr() } /// Datafusion only pushes filter past project when the project does not contain @@ -156,8 +159,8 @@ mod tests { use optd_core::heuristics::{ApplyOrder, HeuristicsOptimizer}; use crate::plan_nodes::{ - BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogicalFilter, - LogicalProjection, LogicalScan, LogicalSort, OptRelNode, OptRelNodeTyp, + BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, + LogicalFilter, LogicalProjection, LogicalScan, LogicalSort, OptRelNode, OptRelNodeTyp, }; use super::apply_filter_pushdown; @@ -222,47 +225,42 @@ mod tests { let plan = plan.first().unwrap(); assert!(matches!(plan.typ, OptRelNodeTyp::Filter)); - let bin_op_cond = BinOpExpr::from_rel_node( + let cond_log_op = LogOpExpr::from_rel_node( LogicalFilter::from_rel_node((plan.clone()).into()) .unwrap() .cond() .into_rel_node(), ) .unwrap(); - assert!(matches!(bin_op_cond.op_type(), BinOpType::And)); - let bin_op_left = - BinOpExpr::from_rel_node(bin_op_cond.left_child().into_rel_node()).unwrap(); - assert!(matches!(bin_op_left.op_type(), BinOpType::Eq)); - assert_eq!( - ColumnRefExpr::from_rel_node(bin_op_left.left_child().into_rel_node()) - .unwrap() - .index(), - 1 - ); - assert_eq!( - ConstantExpr::from_rel_node(bin_op_left.right_child().into_rel_node()) - .unwrap() - .value() - .as_i32(), - 6 - ); - let bin_op_right = - BinOpExpr::from_rel_node(bin_op_cond.right_child().into_rel_node()).unwrap(); - assert!(matches!(bin_op_right.op_type(), BinOpType::Eq)); - assert_eq!( - ColumnRefExpr::from_rel_node(bin_op_right.left_child().into_rel_node()) - .unwrap() - .index(), - 0 - ); - assert_eq!( - ConstantExpr::from_rel_node(bin_op_right.right_child().into_rel_node()) - .unwrap() - .value() - .as_i32(), - 1 - ); - assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Scan)); + assert!(matches!(cond_log_op.op_type(), LogOpType::And)); + + assert!(matches!( + cond_log_op.child(0).typ(), + OptRelNodeTyp::ColumnRef + )); + let col_rel_0 = ColumnRefExpr::from_rel_node(cond_log_op.child(0).into_rel_node()).unwrap(); + assert_eq!(col_rel_0.index(), 0); + + assert!(matches!( + cond_log_op.child(1).typ(), + OptRelNodeTyp::Constant(_) + )); + let col_rel_1 = ConstantExpr::from_rel_node(cond_log_op.child(1).into_rel_node()).unwrap(); + assert_eq!(col_rel_1.value().as_i32(), 1); + + assert!(matches!( + cond_log_op.child(2).typ(), + OptRelNodeTyp::ColumnRef + )); + let col_rel_2 = ColumnRefExpr::from_rel_node(cond_log_op.child(2).into_rel_node()).unwrap(); + assert_eq!(col_rel_2.index(), 1); + + assert!(matches!( + cond_log_op.child(3).typ(), + OptRelNodeTyp::Constant(_) + )); + let col_rel_3 = ConstantExpr::from_rel_node(cond_log_op.child(3).into_rel_node()).unwrap(); + assert_eq!(col_rel_3.value().as_i32(), 6); } #[test] From abbd6b73b73f09d0c65352451c1153ab32807f2a Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 17 Mar 2024 22:14:19 -0400 Subject: [PATCH 09/61] Fix filter merge --- .../src/rules/filter_pushdown.rs | 45 ++++++++----------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index a898429c..d23f9e1d 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -234,33 +234,24 @@ mod tests { .unwrap(); assert!(matches!(cond_log_op.op_type(), LogOpType::And)); - assert!(matches!( - cond_log_op.child(0).typ(), - OptRelNodeTyp::ColumnRef - )); - let col_rel_0 = ColumnRefExpr::from_rel_node(cond_log_op.child(0).into_rel_node()).unwrap(); - assert_eq!(col_rel_0.index(), 0); - - assert!(matches!( - cond_log_op.child(1).typ(), - OptRelNodeTyp::Constant(_) - )); - let col_rel_1 = ConstantExpr::from_rel_node(cond_log_op.child(1).into_rel_node()).unwrap(); - assert_eq!(col_rel_1.value().as_i32(), 1); - - assert!(matches!( - cond_log_op.child(2).typ(), - OptRelNodeTyp::ColumnRef - )); - let col_rel_2 = ColumnRefExpr::from_rel_node(cond_log_op.child(2).into_rel_node()).unwrap(); - assert_eq!(col_rel_2.index(), 1); - - assert!(matches!( - cond_log_op.child(3).typ(), - OptRelNodeTyp::Constant(_) - )); - let col_rel_3 = ConstantExpr::from_rel_node(cond_log_op.child(3).into_rel_node()).unwrap(); - assert_eq!(col_rel_3.value().as_i32(), 6); + let cond_exprs = cond_log_op.children(); + assert_eq!(cond_exprs.len(), 2); + let expr_1 = BinOpExpr::from_rel_node(cond_exprs[0].clone().into_rel_node()).unwrap(); + let expr_2 = BinOpExpr::from_rel_node(cond_exprs[1].clone().into_rel_node()).unwrap(); + assert!(matches!(expr_1.op_type(), BinOpType::Eq)); + assert!(matches!(expr_2.op_type(), BinOpType::Eq)); + let col_1 = + ColumnRefExpr::from_rel_node(expr_1.left_child().clone().into_rel_node()).unwrap(); + let col_2 = + ConstantExpr::from_rel_node(expr_1.right_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_1.index(), 1); + assert_eq!(col_2.value().as_i32(), 6); + let col_3 = + ColumnRefExpr::from_rel_node(expr_2.left_child().clone().into_rel_node()).unwrap(); + let col_4 = + ConstantExpr::from_rel_node(expr_2.right_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_3.index(), 0); + assert_eq!(col_4.value().as_i32(), 1); } #[test] From f78f37b1b1b29b0f93a7a3a464f77e221c41f79c Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 17 Mar 2024 23:42:56 -0400 Subject: [PATCH 10/61] Join progress --- optd-datafusion-repr/src/plan_nodes.rs | 2 +- optd-datafusion-repr/src/plan_nodes/join.rs | 1 - .../src/rules/filter_pushdown.rs | 171 +++++++++++++++++- 3 files changed, 164 insertions(+), 10 deletions(-) diff --git a/optd-datafusion-repr/src/plan_nodes.rs b/optd-datafusion-repr/src/plan_nodes.rs index c0456e75..f9107f16 100644 --- a/optd-datafusion-repr/src/plan_nodes.rs +++ b/optd-datafusion-repr/src/plan_nodes.rs @@ -30,7 +30,7 @@ pub use expr::{ SortOrderExpr, SortOrderType, UnOpExpr, UnOpType, }; pub use filter::{LogicalFilter, PhysicalFilter}; -pub use join::{JoinType, LogicalJoin, PhysicalHashJoin, PhysicalNestedLoopJoin}; +pub use join::{JoinType, LogicalJoin, MappedColRef, PhysicalHashJoin, PhysicalNestedLoopJoin}; pub use limit::{LogicalLimit, PhysicalLimit}; use pretty_xmlish::{Pretty, PrettyConfig}; pub use projection::{LogicalProjection, PhysicalProjection}; diff --git a/optd-datafusion-repr/src/plan_nodes/join.rs b/optd-datafusion-repr/src/plan_nodes/join.rs index 79f6237c..e4da1588 100644 --- a/optd-datafusion-repr/src/plan_nodes/join.rs +++ b/optd-datafusion-repr/src/plan_nodes/join.rs @@ -70,7 +70,6 @@ pub enum MappedColRef { impl LogicalJoin { pub fn map_through_join( - &self, index: usize, left_schema_size: usize, right_schema_size: usize, diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index d23f9e1d..ab5190c0 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -8,15 +8,15 @@ //! At a high level, filter pushdown is responsible for pushing the filter node //! further down the query plan whenever it is possible to do so. +use core::panic; use std::collections::HashMap; use optd_core::rules::{Rule, RuleMatcher}; use optd_core::{optimizer::Optimizer, rel_node::RelNode}; -use tracing_subscriber::filter::combinator::And; use crate::plan_nodes::{ - BinOpExpr, BinOpType, Expr, ExprList, LogOpExpr, LogOpType, LogicalFilter, LogicalJoin, - LogicalProjection, LogicalSort, OptRelNode, OptRelNodeTyp, + BinOpExpr, ColumnRefExpr, Expr, ExprList, JoinType, LogOpExpr, LogOpType, LogicalFilter, + LogicalJoin, LogicalProjection, LogicalSort, MappedColRef, OptRelNode, OptRelNodeTyp, }; use crate::properties::schema::SchemaPropertyBuilder; @@ -34,6 +34,95 @@ fn merge_conds(first: Expr, second: Expr) -> Expr { LogOpExpr::new_flattened_nested_logical(LogOpType::And, new_expr_list).into_expr() } +// Recursively search through all predicates in the join condition (LogExprs and BinOps), +// separating them into those that only involve the left child, those that only involve the +// right child, and those that involve both children. Constant expressions involve neither +// child. +fn separate_join_conds( + cond: LogOpExpr, + left_schema_size: usize, + right_schema_size: usize, +) -> (Vec, Vec, Vec, Vec) { + let mut left_conds = vec![]; + let mut right_conds = vec![]; + let mut join_conds = vec![]; + let mut keep_conds = vec![]; + + // For each child, if it is a LogOpExpr, recursively call this function + // If it is a BinOpExpr, check both children and add to the appropriate list + // If this is an AND logopexpr, then each of the conditions can be separated. + // If this is an OR logopexpr, then we have to check if that entire logopexpr + // can be separated. + for child in cond.children() { + match child.typ() { + OptRelNodeTyp::LogOp(LogOpType::And) => { + let log_expr = LogOpExpr::from_rel_node(child.into_rel_node()).unwrap(); + // Recurse + let (left, right, join, keep) = + separate_join_conds(log_expr.clone(), left_schema_size, right_schema_size); + left_conds.extend(left); + right_conds.extend(right); + join_conds.extend(join); + keep_conds.extend(keep); + } + OptRelNodeTyp::LogOp(LogOpType::Or) => { + todo!("LogOpTyp::Or not yet implemented---God help us all") + } + OptRelNodeTyp::BinOp(_) => { + let bin_expr = BinOpExpr::from_rel_node(child.into_rel_node()).unwrap(); + // Check if the left and right children are column refs + let left_col = bin_expr.left_child(); + let right_col = bin_expr.right_child(); + let left_col = match left_col.typ() { + OptRelNodeTyp::ColumnRef => Some(LogicalJoin::map_through_join( + ColumnRefExpr::from_rel_node(left_col.into_rel_node()) + .unwrap() + .index(), + left_schema_size, + right_schema_size, + )), + _ => None, + }; + let right_col = match right_col.typ() { + OptRelNodeTyp::ColumnRef => Some(LogicalJoin::map_through_join( + ColumnRefExpr::from_rel_node(right_col.into_rel_node()) + .unwrap() + .index(), + left_schema_size, + right_schema_size, + )), + _ => None, + }; + // Check if cols list contains only left, only right, a mix, or is empty + // Note that the left col and right col can both be on the right side or left side + // of the join, so we need to check both + match (left_col, right_col) { + (Some(MappedColRef::Left(_)), Some(MappedColRef::Left(_))) => { + left_conds.push(bin_expr.clone().into_expr()); + } + (Some(MappedColRef::Right(_)), Some(MappedColRef::Right(_))) => { + right_conds.push(bin_expr.clone().into_expr()); + } + (Some(MappedColRef::Left(_)), Some(MappedColRef::Right(_))) + | (Some(MappedColRef::Right(_)), Some(MappedColRef::Left(_))) => { + join_conds.push(bin_expr.clone().into_expr()); + } + _ => { + // If †his is a constant expression, another rule should + // handle it. We won't push it down. + keep_conds.push(bin_expr.clone().into_expr()); + } + } + } + _ => { + panic!("Expression type {} not yet implemented", child.typ()) + } + } + } + + (left_conds, right_conds, join_conds, keep_conds) +} + /// Datafusion only pushes filter past project when the project does not contain /// volatile (i.e. non-deterministic) expressions that are present in the filter /// Calcite only checks if the projection contains a windowing calculation @@ -84,14 +173,81 @@ fn filter_merge( /// - Push down to the right child (only involves keys from the right child) /// - Push into the join condition (involves keys from both children) fn filter_join_transpose( - _optimizer: &impl Optimizer, + optimizer: &impl Optimizer, child: RelNode, cond: RelNode, ) -> Vec> { - let _old_join = LogicalJoin::from_rel_node(child.into()).unwrap(); - let _cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); + let old_join = LogicalJoin::from_rel_node(child.into()).unwrap(); + let cond_as_logexpr = LogOpExpr::from_rel_node(cond.into()).unwrap(); + + let left_schema_size = optimizer + .get_property::(old_join.left().into_rel_node(), 0) + .len(); + let right_schema_size = optimizer + .get_property::(old_join.right().into_rel_node(), 0) + .len(); + + let (left_conds, right_conds, join_conds, keep_conds) = + separate_join_conds(cond_as_logexpr, left_schema_size, right_schema_size); + + let new_left = if !left_conds.is_empty() { + let new_filter_node = LogicalFilter::new( + old_join.left(), + LogOpExpr::new(LogOpType::And, ExprList::new(left_conds)).into_expr(), + ); + new_filter_node.into_plan_node() + } else { + old_join.left() + }; + + let new_right = if !right_conds.is_empty() { + let new_filter_node = LogicalFilter::new( + old_join.right(), + LogOpExpr::new(LogOpType::And, ExprList::new(right_conds)).into_expr(), + ); + new_filter_node.into_plan_node() + } else { + old_join.right() + }; + + let new_join = match old_join.join_type() { + JoinType::Inner => { + let old_cond = old_join.cond(); + let new_conds = merge_conds( + LogOpExpr::new(LogOpType::And, ExprList::new(join_conds)).into_expr(), + old_cond, + ); + LogicalJoin::new(new_left, new_right, new_conds, JoinType::Inner) + } + JoinType::Cross => { + if !join_conds.is_empty() { + LogicalJoin::new( + new_left, + new_right, + LogOpExpr::new(LogOpType::And, ExprList::new(join_conds)).into_expr(), + JoinType::Inner, + ) + } else { + LogicalJoin::new(new_left, new_right, old_join.cond(), JoinType::Cross) + } + } + _ => { + // We don't support modifying the join condition for other join types + LogicalJoin::new(new_left, new_right, old_join.cond(), old_join.join_type()) + } + }; + + let new_node = if !keep_conds.is_empty() { + let new_filter_node = LogicalFilter::new( + new_join.into_plan_node(), + LogOpExpr::new(LogOpType::And, ExprList::new(keep_conds)).into_expr(), + ); + new_filter_node.into_rel_node().as_ref().clone() + } else { + new_join.into_rel_node().as_ref().clone() + }; - vec![] + vec![new_node] } /// Filter and sort should always be commutable. @@ -155,7 +311,6 @@ fn apply_filter_pushdown( mod tests { use std::sync::Arc; - use datafusion::arrow::compute::kernels::filter; use optd_core::heuristics::{ApplyOrder, HeuristicsOptimizer}; use crate::plan_nodes::{ From 9c9c5155ad1a051ee146a8c82642da63a9abdd34 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Thu, 21 Mar 2024 11:33:30 -0400 Subject: [PATCH 11/61] Add dummy cost model --- optd-datafusion-repr/src/cost.rs | 2 ++ optd-datafusion-repr/src/cost/dummy_cost.rs | 36 +++++++++++++++++++ .../src/rules/filter_pushdown.rs | 23 ++++++++---- 3 files changed, 55 insertions(+), 6 deletions(-) create mode 100644 optd-datafusion-repr/src/cost/dummy_cost.rs diff --git a/optd-datafusion-repr/src/cost.rs b/optd-datafusion-repr/src/cost.rs index 212b3331..89765f90 100644 --- a/optd-datafusion-repr/src/cost.rs +++ b/optd-datafusion-repr/src/cost.rs @@ -1,11 +1,13 @@ mod adaptive_cost; mod base_cost; +mod dummy_cost; mod stats; pub use adaptive_cost::{AdaptiveCostModel, RuntimeAdaptionStorage, DEFAULT_DECAY}; pub use base_cost::{ BaseTableStats, OptCostModel, PerColumnStats, PerTableStats, COMPUTE_COST, IO_COST, ROW_COUNT, }; +pub use dummy_cost::DummyCostModel; pub trait WithRuntimeStatistics { fn get_runtime_statistics(&self) -> RuntimeAdaptionStorage; diff --git a/optd-datafusion-repr/src/cost/dummy_cost.rs b/optd-datafusion-repr/src/cost/dummy_cost.rs new file mode 100644 index 00000000..ec184eb2 --- /dev/null +++ b/optd-datafusion-repr/src/cost/dummy_cost.rs @@ -0,0 +1,36 @@ +use crate::plan_nodes::OptRelNodeTyp; +use optd_core::{ + cascades::{CascadesOptimizer, RelNodeContext}, + cost::{Cost, CostModel}, + rel_node::{RelNode, Value}, +}; + +/// Dummy cost model that returns a 0 cost in all cases. Intended for testing. +pub struct DummyCostModel; + +impl CostModel for DummyCostModel { + fn compute_cost( + &self, + node: &OptRelNodeTyp, + data: &Option, + children: &[Cost], + context: Option, + optimizer: Option<&CascadesOptimizer>, + ) -> Cost { + Cost(vec![0.0]) + } + + fn compute_plan_node_cost(&self, node: &RelNode) -> Cost { + Cost(vec![0.0]) + } + + fn explain(&self, node: &Cost) -> String { + "Dummy cost".to_string() + } + + fn accumulate(&self, total_cost: &mut Cost, cost: &Cost) {} + + fn zero(&self) -> Cost { + Cost(vec![0.0]) + } +} diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index ab5190c0..d99aa1c2 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -311,15 +311,21 @@ fn apply_filter_pushdown( mod tests { use std::sync::Arc; - use optd_core::heuristics::{ApplyOrder, HeuristicsOptimizer}; + use optd_core::{ + cascades::CascadesOptimizer, + heuristics::{ApplyOrder, HeuristicsOptimizer}, + }; - use crate::plan_nodes::{ - BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, - LogicalFilter, LogicalProjection, LogicalScan, LogicalSort, OptRelNode, OptRelNodeTyp, + use crate::{ + plan_nodes::{ + BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, + LogicalFilter, LogicalProjection, LogicalScan, LogicalSort, OptRelNode, OptRelNodeTyp, + }, + properties::schema::{Catalog, SchemaPropertyBuilder}, }; use super::apply_filter_pushdown; - + use crate::cost::DummyCostModel; #[test] fn push_past_sort() { let dummy_optimizer = HeuristicsOptimizer::new_with_rules(vec![], ApplyOrder::TopDown); @@ -351,7 +357,12 @@ mod tests { #[test] fn filter_merge() { // TODO: write advanced proj with more expr that need to be transformed - let dummy_optimizer = HeuristicsOptimizer::new_with_rules(vec![], ApplyOrder::TopDown); + let dummy_catalog = Catalog::default(); + let dummy_optimizer = CascadesOptimizer::new( + vec![], + DummyCostModel, + vec![Box::new(SchemaPropertyBuilder::new(dummy_catalog))], + ); let scan = LogicalScan::new("".into()); let filter_ch_expr = BinOpExpr::new( From f4a85066d8cda1aefcf6c5160842ce1b886c5b25 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Thu, 21 Mar 2024 17:10:48 -0400 Subject: [PATCH 12/61] dummmy catalog + dummy optimizer method --- optd-datafusion-repr/src/cost.rs | 2 - optd-datafusion-repr/src/lib.rs | 1 + .../src/rules/filter_pushdown.rs | 26 ++-- optd-datafusion-repr/src/testing.rs | 23 ++++ .../src/{cost => testing}/dummy_cost.rs | 16 +-- .../src/testing/tpch_catalog.rs | 120 ++++++++++++++++++ 6 files changed, 160 insertions(+), 28 deletions(-) create mode 100644 optd-datafusion-repr/src/testing.rs rename optd-datafusion-repr/src/{cost => testing}/dummy_cost.rs (58%) create mode 100644 optd-datafusion-repr/src/testing/tpch_catalog.rs diff --git a/optd-datafusion-repr/src/cost.rs b/optd-datafusion-repr/src/cost.rs index 89765f90..212b3331 100644 --- a/optd-datafusion-repr/src/cost.rs +++ b/optd-datafusion-repr/src/cost.rs @@ -1,13 +1,11 @@ mod adaptive_cost; mod base_cost; -mod dummy_cost; mod stats; pub use adaptive_cost::{AdaptiveCostModel, RuntimeAdaptionStorage, DEFAULT_DECAY}; pub use base_cost::{ BaseTableStats, OptCostModel, PerColumnStats, PerTableStats, COMPUTE_COST, IO_COST, ROW_COUNT, }; -pub use dummy_cost::DummyCostModel; pub trait WithRuntimeStatistics { fn get_runtime_statistics(&self) -> RuntimeAdaptionStorage; diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 2e297690..d5f8f737 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -28,6 +28,7 @@ mod explain; pub mod plan_nodes; pub mod properties; pub mod rules; +mod testing; pub struct DatafusionOptimizer { optimizer: CascadesOptimizer, diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index d99aa1c2..60ee05ee 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -311,26 +311,21 @@ fn apply_filter_pushdown( mod tests { use std::sync::Arc; - use optd_core::{ - cascades::CascadesOptimizer, - heuristics::{ApplyOrder, HeuristicsOptimizer}, - }; - use crate::{ plan_nodes::{ BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, LogicalFilter, LogicalProjection, LogicalScan, LogicalSort, OptRelNode, OptRelNodeTyp, }, - properties::schema::{Catalog, SchemaPropertyBuilder}, + testing::new_dummy_optimizer, }; use super::apply_filter_pushdown; - use crate::cost::DummyCostModel; + #[test] fn push_past_sort() { - let dummy_optimizer = HeuristicsOptimizer::new_with_rules(vec![], ApplyOrder::TopDown); + let dummy_optimizer = new_dummy_optimizer(); - let scan = LogicalScan::new("".into()); + let scan = LogicalScan::new("customer".into()); let sort = LogicalSort::new(scan.into_plan_node(), ExprList::new(vec![])); let filter_expr = BinOpExpr::new( @@ -357,14 +352,9 @@ mod tests { #[test] fn filter_merge() { // TODO: write advanced proj with more expr that need to be transformed - let dummy_catalog = Catalog::default(); - let dummy_optimizer = CascadesOptimizer::new( - vec![], - DummyCostModel, - vec![Box::new(SchemaPropertyBuilder::new(dummy_catalog))], - ); + let dummy_optimizer = new_dummy_optimizer(); - let scan = LogicalScan::new("".into()); + let scan = LogicalScan::new("customer".into()); let filter_ch_expr = BinOpExpr::new( ColumnRefExpr::new(0).into_expr(), ConstantExpr::int32(1).into_expr(), @@ -423,9 +413,9 @@ mod tests { #[test] fn push_past_proj_basic() { // TODO: write advanced proj with more expr that need to be transformed - let dummy_optimizer = HeuristicsOptimizer::new_with_rules(vec![], ApplyOrder::TopDown); + let dummy_optimizer = new_dummy_optimizer(); - let scan = LogicalScan::new("".into()); + let scan = LogicalScan::new("customer".into()); let proj = LogicalProjection::new(scan.into_plan_node(), ExprList::new(vec![])); let filter_expr = BinOpExpr::new( diff --git a/optd-datafusion-repr/src/testing.rs b/optd-datafusion-repr/src/testing.rs new file mode 100644 index 00000000..81bf7d60 --- /dev/null +++ b/optd-datafusion-repr/src/testing.rs @@ -0,0 +1,23 @@ +mod dummy_cost; +mod tpch_catalog; + +use std::sync::Arc; + +pub use dummy_cost::DummyCostModel; +use optd_core::{cascades::CascadesOptimizer, optimizer::Optimizer}; +pub use tpch_catalog::TpchCatalog; + +use crate::{plan_nodes::OptRelNodeTyp, properties::schema::SchemaPropertyBuilder}; + +/// Create a "dummy" optimizer preloaded with the TPC-H catalog for testing +/// Note: Only provides the schema property currently +pub fn new_dummy_optimizer() -> impl Optimizer { + let dummy_catalog = Arc::new(TpchCatalog); + let dummy_optimizer = CascadesOptimizer::new( + vec![], + Box::new(DummyCostModel), + vec![Box::new(SchemaPropertyBuilder::new(dummy_catalog))], + ); + + dummy_optimizer +} diff --git a/optd-datafusion-repr/src/cost/dummy_cost.rs b/optd-datafusion-repr/src/testing/dummy_cost.rs similarity index 58% rename from optd-datafusion-repr/src/cost/dummy_cost.rs rename to optd-datafusion-repr/src/testing/dummy_cost.rs index ec184eb2..7f5de512 100644 --- a/optd-datafusion-repr/src/cost/dummy_cost.rs +++ b/optd-datafusion-repr/src/testing/dummy_cost.rs @@ -11,24 +11,24 @@ pub struct DummyCostModel; impl CostModel for DummyCostModel { fn compute_cost( &self, - node: &OptRelNodeTyp, - data: &Option, - children: &[Cost], - context: Option, - optimizer: Option<&CascadesOptimizer>, + _node: &OptRelNodeTyp, + _data: &Option, + _children: &[Cost], + _context: Option, + _optimizer: Option<&CascadesOptimizer>, ) -> Cost { Cost(vec![0.0]) } - fn compute_plan_node_cost(&self, node: &RelNode) -> Cost { + fn compute_plan_node_cost(&self, _node: &RelNode) -> Cost { Cost(vec![0.0]) } - fn explain(&self, node: &Cost) -> String { + fn explain(&self, _node: &Cost) -> String { "Dummy cost".to_string() } - fn accumulate(&self, total_cost: &mut Cost, cost: &Cost) {} + fn accumulate(&self, _total_cost: &mut Cost, _cost: &Cost) {} fn zero(&self) -> Cost { Cost(vec![0.0]) diff --git a/optd-datafusion-repr/src/testing/tpch_catalog.rs b/optd-datafusion-repr/src/testing/tpch_catalog.rs new file mode 100644 index 00000000..a1a10a2a --- /dev/null +++ b/optd-datafusion-repr/src/testing/tpch_catalog.rs @@ -0,0 +1,120 @@ +use core::panic; + +use crate::{ + plan_nodes::ConstantType, + properties::schema::{Catalog, Field, Schema}, +}; + +pub struct TpchCatalog; + +impl Catalog for TpchCatalog { + fn get(&self, name: &str) -> Schema { + match name { + "customer" => { + // Define the schema for the "customer" table + let schema = Schema { + fields: vec![ + Field { + name: "custkey".to_string(), + typ: ConstantType::Int32, + nullable: false, + }, + Field { + name: "name".to_string(), + typ: ConstantType::Utf8String, + nullable: false, + }, + Field { + name: "address".to_string(), + typ: ConstantType::Utf8String, + nullable: false, + }, + Field { + name: "nationkey".to_string(), + typ: ConstantType::Int32, + nullable: false, + }, + Field { + name: "phone".to_string(), + typ: ConstantType::Utf8String, + nullable: false, + }, + Field { + name: "acctbal".to_string(), + typ: ConstantType::Float64, + nullable: false, + }, + Field { + name: "mktsegment".to_string(), + typ: ConstantType::Utf8String, + nullable: false, + }, + Field { + name: "comment".to_string(), + typ: ConstantType::Utf8String, + nullable: false, + }, + ], + }; + schema + } + "orders" => { + // Define the schema for the "orders" table + let schema = Schema { + fields: vec![ + Field { + name: "orderkey".to_string(), + typ: ConstantType::Int32, + nullable: false, + }, + Field { + name: "custkey".to_string(), + typ: ConstantType::Int32, + nullable: false, + }, + Field { + name: "orderstatus".to_string(), + typ: ConstantType::Utf8String, + nullable: false, + }, + Field { + name: "totalprice".to_string(), + typ: ConstantType::Float64, + nullable: false, + }, + Field { + name: "orderdate".to_string(), + typ: ConstantType::Date, + nullable: false, + }, + Field { + name: "orderpriority".to_string(), + typ: ConstantType::Utf8String, + nullable: false, + }, + Field { + name: "clerk".to_string(), + typ: ConstantType::Utf8String, + nullable: false, + }, + Field { + name: "shippriority".to_string(), + typ: ConstantType::Int32, + nullable: false, + }, + Field { + name: "comment".to_string(), + typ: ConstantType::Utf8String, + nullable: false, + }, + ], + }; + schema + } + // Add more cases for other tables as needed + _ => { + panic!("Unknown table: {}", name); + } + } + } +} From e7d4c351f150cb3338d0a8b6721933c7cf4acd18 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 22 Mar 2024 02:35:57 -0400 Subject: [PATCH 13/61] Join test start --- .../src/rules/filter_pushdown.rs | 272 ++++++++++++++---- 1 file changed, 214 insertions(+), 58 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 60ee05ee..ce5de630 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -34,10 +34,48 @@ fn merge_conds(first: Expr, second: Expr) -> Expr { LogOpExpr::new_flattened_nested_logical(LogOpType::And, new_expr_list).into_expr() } +enum JoinCondDependency { + Left, + Right, + Both, + None, +} + +fn determine_join_cond_dep( + children: Vec, + left_schema_size: usize, + right_schema_size: usize, +) -> JoinCondDependency { + let mut left_col = false; + let mut right_col = false; + for child in children { + match child.typ() { + OptRelNodeTyp::ColumnRef => { + let col_ref = ColumnRefExpr::from_rel_node(child.into_rel_node()).unwrap(); + let index = col_ref.index(); + if index < left_schema_size { + left_col = true; + } else if index >= left_schema_size && index < left_schema_size + right_schema_size + { + right_col = true; + } + } + _ => {} + } + } + match (left_col, right_col) { + (true, true) => JoinCondDependency::Both, + (true, false) => JoinCondDependency::Left, + (false, true) => JoinCondDependency::Right, + (false, false) => JoinCondDependency::None, + } +} + // Recursively search through all predicates in the join condition (LogExprs and BinOps), // separating them into those that only involve the left child, those that only involve the // right child, and those that involve both children. Constant expressions involve neither // child. +// pre-condition: the cond is an AND LogOpExpr fn separate_join_conds( cond: LogOpExpr, left_schema_size: usize, @@ -48,75 +86,44 @@ fn separate_join_conds( let mut join_conds = vec![]; let mut keep_conds = vec![]; - // For each child, if it is a LogOpExpr, recursively call this function + // For each child, if it is a LogOpExpr with and, recursively call this function // If it is a BinOpExpr, check both children and add to the appropriate list // If this is an AND logopexpr, then each of the conditions can be separated. // If this is an OR logopexpr, then we have to check if that entire logopexpr // can be separated. for child in cond.children() { - match child.typ() { + let location = match child.typ() { OptRelNodeTyp::LogOp(LogOpType::And) => { - let log_expr = LogOpExpr::from_rel_node(child.into_rel_node()).unwrap(); - // Recurse - let (left, right, join, keep) = - separate_join_conds(log_expr.clone(), left_schema_size, right_schema_size); - left_conds.extend(left); - right_conds.extend(right); - join_conds.extend(join); - keep_conds.extend(keep); + // In theory, we could recursively call the function to handle this + // case. However, it should not be possible to have nested LogOpExpr + // ANDs. So, panic so we can detect a bug + panic!("Nested AND LogOpExprs detected in filter pushdown!"); } OptRelNodeTyp::LogOp(LogOpType::Or) => { - todo!("LogOpTyp::Or not yet implemented---God help us all") + let log_expr = LogOpExpr::from_rel_node(child.clone().into_rel_node()).unwrap(); + determine_join_cond_dep(log_expr.children(), left_schema_size, right_schema_size) } OptRelNodeTyp::BinOp(_) => { - let bin_expr = BinOpExpr::from_rel_node(child.into_rel_node()).unwrap(); - // Check if the left and right children are column refs - let left_col = bin_expr.left_child(); - let right_col = bin_expr.right_child(); - let left_col = match left_col.typ() { - OptRelNodeTyp::ColumnRef => Some(LogicalJoin::map_through_join( - ColumnRefExpr::from_rel_node(left_col.into_rel_node()) - .unwrap() - .index(), - left_schema_size, - right_schema_size, - )), - _ => None, - }; - let right_col = match right_col.typ() { - OptRelNodeTyp::ColumnRef => Some(LogicalJoin::map_through_join( - ColumnRefExpr::from_rel_node(right_col.into_rel_node()) - .unwrap() - .index(), - left_schema_size, - right_schema_size, - )), - _ => None, - }; - // Check if cols list contains only left, only right, a mix, or is empty - // Note that the left col and right col can both be on the right side or left side - // of the join, so we need to check both - match (left_col, right_col) { - (Some(MappedColRef::Left(_)), Some(MappedColRef::Left(_))) => { - left_conds.push(bin_expr.clone().into_expr()); - } - (Some(MappedColRef::Right(_)), Some(MappedColRef::Right(_))) => { - right_conds.push(bin_expr.clone().into_expr()); - } - (Some(MappedColRef::Left(_)), Some(MappedColRef::Right(_))) - | (Some(MappedColRef::Right(_)), Some(MappedColRef::Left(_))) => { - join_conds.push(bin_expr.clone().into_expr()); - } - _ => { - // If †his is a constant expression, another rule should - // handle it. We won't push it down. - keep_conds.push(bin_expr.clone().into_expr()); - } - } + let bin_expr = BinOpExpr::from_rel_node(child.clone().into_rel_node()).unwrap(); + determine_join_cond_dep( + vec![bin_expr.left_child(), bin_expr.right_child()], + left_schema_size, + right_schema_size, + ) } _ => { - panic!("Expression type {} not yet implemented", child.typ()) + panic!( + "Expression type {} not yet implemented for separate_join_conds", + child.typ() + ) } + }; + + match location { + JoinCondDependency::Left => left_conds.push(child), + JoinCondDependency::Right => right_conds.push(child), + JoinCondDependency::Both => join_conds.push(child), + JoinCondDependency::None => keep_conds.push(child), } } @@ -177,6 +184,7 @@ fn filter_join_transpose( child: RelNode, cond: RelNode, ) -> Vec> { + // TODO: Push existing join conditions down as well let old_join = LogicalJoin::from_rel_node(child.into()).unwrap(); let cond_as_logexpr = LogOpExpr::from_rel_node(cond.into()).unwrap(); @@ -232,7 +240,7 @@ fn filter_join_transpose( } } _ => { - // We don't support modifying the join condition for other join types + // We don't support modifying the join condition for other join types yet LogicalJoin::new(new_left, new_right, old_join.cond(), old_join.join_type()) } }; @@ -311,10 +319,13 @@ fn apply_filter_pushdown( mod tests { use std::sync::Arc; + use datafusion_expr::JoinType; + use crate::{ plan_nodes::{ BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, - LogicalFilter, LogicalProjection, LogicalScan, LogicalSort, OptRelNode, OptRelNodeTyp, + LogicalFilter, LogicalJoin, LogicalProjection, LogicalScan, LogicalSort, OptRelNode, + OptRelNodeTyp, }, testing::new_dummy_optimizer, }; @@ -438,4 +449,149 @@ mod tests { assert!(matches!(plan.typ, OptRelNodeTyp::Projection)); assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); } + + #[test] + fn push_past_join_conjunction() { + // Test pushing a complex filter past a join, where one clause can + // be pushed to the left child, one to the right child, one gets incorporated + // into the (now inner) join condition, and a constant one remains in the + // original filter. + let dummy_optimizer = new_dummy_optimizer(); + + let scan1 = LogicalScan::new("customer".into()); + + let scan2 = LogicalScan::new("orders".into()); + + let join = LogicalJoin::new( + scan1.into_plan_node(), + scan2.into_plan_node(), + LogOpExpr::new( + LogOpType::And, + ExprList::new(vec![BinOpExpr::new( + ColumnRefExpr::new(0).into_expr(), + ConstantExpr::int32(1).into_expr(), + BinOpType::Eq, + ) + .into_expr()]), + ) + .into_expr(), + super::JoinType::Inner, + ); + + let filter_expr = LogOpExpr::new( + LogOpType::And, + ExprList::new(vec![ + BinOpExpr::new( + // This one should be pushed to the left child + ColumnRefExpr::new(0).into_expr(), + ConstantExpr::int32(5).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + BinOpExpr::new( + // This one should be pushed to the right child + ColumnRefExpr::new(11).into_expr(), + ConstantExpr::int32(6).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + BinOpExpr::new( + // This one should be pushed to the join condition + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(3).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + BinOpExpr::new( + // always true, should be removed by other rules + ConstantExpr::int32(2).into_expr(), + ConstantExpr::int32(7).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + ]), + ); + + let plan = apply_filter_pushdown( + &dummy_optimizer, + super::FilterPushdownRulePicks { + child: Arc::unwrap_or_clone(join.into_rel_node()), + cond: Arc::unwrap_or_clone(filter_expr.into_rel_node()), + }, + ); + + let plan = plan.first().unwrap(); + + // Examine original filter + condition + let top_level_filter = LogicalFilter::from_rel_node(plan.clone().into()).unwrap(); + let top_level_filter_cond = + LogOpExpr::from_rel_node(top_level_filter.cond().into_rel_node()).unwrap(); + assert!(matches!(top_level_filter_cond.op_type(), LogOpType::And)); + assert!(matches!(top_level_filter_cond.children().len(), 1)); + let bin_op_0 = + BinOpExpr::from_rel_node(top_level_filter_cond.children()[0].clone().into_rel_node()) + .unwrap(); + assert!(matches!(bin_op_0.op_type(), BinOpType::Eq)); + let col_0 = + ColumnRefExpr::from_rel_node(bin_op_0.left_child().clone().into_rel_node()).unwrap(); + let col_1 = + ConstantExpr::from_rel_node(bin_op_0.right_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_0.index(), 2); + assert_eq!(col_1.value().as_i32(), 3); + + // Examine join node + condition + let join_node = + LogicalJoin::from_rel_node(top_level_filter.child().clone().into_rel_node()).unwrap(); + let join_conds = LogOpExpr::from_rel_node(join_node.cond().into_rel_node()).unwrap(); + assert!(matches!(join_conds.op_type(), LogOpType::And)); + assert!(matches!(join_conds.children().len(), 2)); + let bin_op_1 = + BinOpExpr::from_rel_node(join_conds.children()[0].clone().into_rel_node()).unwrap(); + let bin_op_2 = + BinOpExpr::from_rel_node(join_conds.children()[1].clone().into_rel_node()).unwrap(); + assert!(matches!(bin_op_1.op_type(), BinOpType::Eq)); + assert!(matches!(bin_op_2.op_type(), BinOpType::Eq)); + let col_2 = + ColumnRefExpr::from_rel_node(bin_op_1.left_child().clone().into_rel_node()).unwrap(); + let col_3 = + ColumnRefExpr::from_rel_node(bin_op_1.right_child().clone().into_rel_node()).unwrap(); + let col_4 = + ColumnRefExpr::from_rel_node(bin_op_2.left_child().clone().into_rel_node()).unwrap(); + let col_5 = + ConstantExpr::from_rel_node(bin_op_2.right_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_2.index(), 2); + assert_eq!(col_3.index(), 3); + assert_eq!(col_4.index(), 0); + assert_eq!(col_5.value().as_i32(), 1); + + // Examine left child filter + condition + let filter_1 = LogicalFilter::from_rel_node(join_node.left().into_rel_node()).unwrap(); + let filter_1_cond = LogOpExpr::from_rel_node(filter_1.cond().into_rel_node()).unwrap(); + assert!(matches!(filter_1_cond.children().len(), 1)); + assert!(matches!(filter_1_cond.op_type(), LogOpType::And)); + let bin_op_3 = + BinOpExpr::from_rel_node(filter_1_cond.children()[0].clone().into_rel_node()).unwrap(); + assert!(matches!(bin_op_3.op_type(), BinOpType::Eq)); + let col_6 = + ColumnRefExpr::from_rel_node(bin_op_3.left_child().clone().into_rel_node()).unwrap(); + let col_7 = + ConstantExpr::from_rel_node(bin_op_3.right_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_6.index(), 0); + assert_eq!(col_7.value().as_i32(), 5); + + // Examine right child filter + condition + let filter_2 = LogicalFilter::from_rel_node(join_node.right().into_rel_node()).unwrap(); + let filter_2_cond = LogOpExpr::from_rel_node(filter_2.cond().into_rel_node()).unwrap(); + assert!(matches!(filter_2_cond.op_type(), LogOpType::And)); + assert!(matches!(filter_2_cond.children().len(), 1)); + let bin_op_4 = + BinOpExpr::from_rel_node(filter_2_cond.children()[0].clone().into_rel_node()).unwrap(); + assert!(matches!(bin_op_4.op_type(), BinOpType::Eq)); + let col_8 = + ColumnRefExpr::from_rel_node(bin_op_4.left_child().clone().into_rel_node()).unwrap(); + let col_9 = + ConstantExpr::from_rel_node(bin_op_4.right_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_8.index(), 11); + assert_eq!(col_9.value().as_i32(), 6); + } } From 609233d4697f2e951ef98c585801ac699a9d6a52 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sat, 23 Mar 2024 08:49:56 -0400 Subject: [PATCH 14/61] Filter pushdown is beginning to work --- optd-datafusion-repr/src/lib.rs | 6 ++-- optd-datafusion-repr/src/plan_nodes.rs | 33 ++++++++++++++++++- optd-datafusion-repr/src/plan_nodes/join.rs | 11 ++----- .../src/rules/filter_pushdown.rs | 11 ++++--- optd-datafusion-repr/src/rules/joins.rs | 3 ++ 5 files changed, 48 insertions(+), 16 deletions(-) diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index d5f8f737..32e96f0a 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -17,8 +17,8 @@ use properties::{ }; use rules::{ EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, EliminateFilterRule, - EliminateJoinRule, EliminateLimitRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, - PhysicalConversionRule, ProjectionPullUpJoin, SimplifyFilterRule, + EliminateJoinRule, EliminateLimitRule, FilterPushdownRule, HashJoinRule, JoinAssocRule, + JoinCommuteRule, PhysicalConversionRule, ProjectionPullUpJoin, SimplifyFilterRule, }; pub use optd_core::rel_node::Value; @@ -28,6 +28,7 @@ mod explain; pub mod plan_nodes; pub mod properties; pub mod rules; +#[cfg(test)] mod testing; pub struct DatafusionOptimizer { @@ -58,6 +59,7 @@ impl DatafusionOptimizer { let rules = PhysicalConversionRule::all_conversions(); let mut rule_wrappers = vec![ RuleWrapper::new_heuristic(Arc::new(SimplifyFilterRule::new())), + RuleWrapper::new_heuristic(Arc::new(FilterPushdownRule::new())), RuleWrapper::new_heuristic(Arc::new(EliminateFilterRule::new())), RuleWrapper::new_heuristic(Arc::new(EliminateJoinRule::new())), RuleWrapper::new_heuristic(Arc::new(EliminateLimitRule::new())), diff --git a/optd-datafusion-repr/src/plan_nodes.rs b/optd-datafusion-repr/src/plan_nodes.rs index f9107f16..2ee22023 100644 --- a/optd-datafusion-repr/src/plan_nodes.rs +++ b/optd-datafusion-repr/src/plan_nodes.rs @@ -30,7 +30,7 @@ pub use expr::{ SortOrderExpr, SortOrderType, UnOpExpr, UnOpType, }; pub use filter::{LogicalFilter, PhysicalFilter}; -pub use join::{JoinType, LogicalJoin, MappedColRef, PhysicalHashJoin, PhysicalNestedLoopJoin}; +pub use join::{JoinType, LogicalJoin, PhysicalHashJoin, PhysicalNestedLoopJoin}; pub use limit::{LogicalLimit, PhysicalLimit}; use pretty_xmlish::{Pretty, PrettyConfig}; pub use projection::{LogicalProjection, PhysicalProjection}; @@ -271,6 +271,37 @@ impl Expr { pub fn child(&self, idx: usize) -> OptRelNodeRef { self.0.child(idx) } + + /// Recursively rewrite all column references in the expression.using a provided + /// function that replaces a column index. + pub fn rewrite_column_refs(&self, rewrite_fn: &impl Fn(usize) -> usize) -> Self { + assert!(self.typ().is_expression()); + if let OptRelNodeTyp::ColumnRef = self.typ() { + let col_ref = ColumnRefExpr::from_rel_node(self.0.clone()).unwrap(); + let new_col_ref = ColumnRefExpr::new(rewrite_fn(col_ref.index())); + return Self(new_col_ref.into_rel_node()); + } + + let children = self.0.children.clone(); + let children = children + .into_iter() + .map(|child| { + Expr::from_rel_node(child.clone()) + .unwrap() + .rewrite_column_refs(rewrite_fn) + .into_rel_node() + }) + .collect(); + Expr::from_rel_node( + RelNode { + typ: self.typ(), + children, + data: self.0.data.clone(), + } + .into(), + ) + .unwrap() + } } impl OptRelNode for Expr { diff --git a/optd-datafusion-repr/src/plan_nodes/join.rs b/optd-datafusion-repr/src/plan_nodes/join.rs index e4da1588..a6203c10 100644 --- a/optd-datafusion-repr/src/plan_nodes/join.rs +++ b/optd-datafusion-repr/src/plan_nodes/join.rs @@ -63,22 +63,17 @@ define_plan_node!( ], { join_type: JoinType } ); -pub enum MappedColRef { - Left(usize), - Right(usize), -} - impl LogicalJoin { pub fn map_through_join( index: usize, left_schema_size: usize, right_schema_size: usize, - ) -> MappedColRef { + ) -> usize { assert!(index < left_schema_size + right_schema_size); if index < left_schema_size { - MappedColRef::Left(index) + index } else { - MappedColRef::Right(index - left_schema_size) + index - left_schema_size } } } diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index ce5de630..1f97d7ee 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -16,7 +16,7 @@ use optd_core::{optimizer::Optimizer, rel_node::RelNode}; use crate::plan_nodes::{ BinOpExpr, ColumnRefExpr, Expr, ExprList, JoinType, LogOpExpr, LogOpType, LogicalFilter, - LogicalJoin, LogicalProjection, LogicalSort, MappedColRef, OptRelNode, OptRelNodeTyp, + LogicalJoin, LogicalProjection, LogicalSort, OptRelNode, OptRelNodeTyp, }; use crate::properties::schema::SchemaPropertyBuilder; @@ -25,7 +25,7 @@ use super::macros::define_rule; define_rule!( FilterPushdownRule, apply_filter_pushdown, - (Filter, child, [cond]) + (Filter, [child], [cond]) ); fn merge_conds(first: Expr, second: Expr) -> Expr { @@ -121,7 +121,9 @@ fn separate_join_conds( match location { JoinCondDependency::Left => left_conds.push(child), - JoinCondDependency::Right => right_conds.push(child), + JoinCondDependency::Right => right_conds.push(child.rewrite_column_refs(&|idx| { + LogicalJoin::map_through_join(idx, left_schema_size, right_schema_size) + })), JoinCondDependency::Both => join_conds.push(child), JoinCondDependency::None => keep_conds.push(child), } @@ -276,6 +278,7 @@ fn apply_filter_pushdown( optimizer: &impl Optimizer, FilterPushdownRulePicks { child, cond }: FilterPushdownRulePicks, ) -> Vec> { + dbg!("Reached apply_filter_pushdown with", child.typ.clone()); // Push filter down one node let mut result_from_this_step = match child.typ { OptRelNodeTyp::Projection => filter_project_transpose(optimizer, child, cond), @@ -319,8 +322,6 @@ fn apply_filter_pushdown( mod tests { use std::sync::Arc; - use datafusion_expr::JoinType; - use crate::{ plan_nodes::{ BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, diff --git a/optd-datafusion-repr/src/rules/joins.rs b/optd-datafusion-repr/src/rules/joins.rs index 224dbc91..3b5df47e 100644 --- a/optd-datafusion-repr/src/rules/joins.rs +++ b/optd-datafusion-repr/src/rules/joins.rs @@ -26,6 +26,7 @@ fn apply_join_commute( optimizer: &impl Optimizer, JoinCommuteRulePicks { left, right, cond }: JoinCommuteRulePicks, ) -> Vec> { + // TODO: migrate to new rewrite_column_refs helper fn rewrite_column_refs(expr: Expr, left_size: usize, right_size: usize) -> Expr { let expr = expr.into_rel_node(); if let Some(expr) = ColumnRefExpr::from_rel_node(expr.clone()) { @@ -139,6 +140,7 @@ fn apply_join_assoc( cond2, }: JoinAssocRulePicks, ) -> Vec> { + // TODO: migrate to new rewrite_column_refs helper fn rewrite_column_refs(expr: Expr, a_size: usize) -> Option { let expr = expr.into_rel_node(); if let Some(expr) = ColumnRefExpr::from_rel_node(expr.clone()) { @@ -289,6 +291,7 @@ fn apply_projection_pull_up_join( LogicalJoin::new( PlanNode::from_group(left), PlanNode::from_group(right), + // TODO: possibly migrate to new rewrite_column_refs helper mapping.rewrite_condition( Expr::from_rel_node(Arc::new(cond)).unwrap(), left_schema.len(), From bb3d1a2f1f900c0b801826ccef09bc026cba991f Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 24 Mar 2024 21:46:41 -0400 Subject: [PATCH 15/61] Working testing infra + all tests working! --- .../src/rules/filter_pushdown.rs | 56 ++++++++++++------- optd-datafusion-repr/src/testing.rs | 4 +- 2 files changed, 38 insertions(+), 22 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 1f97d7ee..93ff621f 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -34,6 +34,7 @@ fn merge_conds(first: Expr, second: Expr) -> Expr { LogOpExpr::new_flattened_nested_logical(LogOpType::And, new_expr_list).into_expr() } +#[derive(Debug, Clone, Copy)] enum JoinCondDependency { Left, Right, @@ -92,11 +93,12 @@ fn separate_join_conds( // If this is an OR logopexpr, then we have to check if that entire logopexpr // can be separated. for child in cond.children() { + dbg!("Working on child: ", child.clone()); let location = match child.typ() { OptRelNodeTyp::LogOp(LogOpType::And) => { // In theory, we could recursively call the function to handle this // case. However, it should not be possible to have nested LogOpExpr - // ANDs. So, panic so we can detect a bug + // ANDs. We panic so that we can notice the bug. panic!("Nested AND LogOpExprs detected in filter pushdown!"); } OptRelNodeTyp::LogOp(LogOpType::Or) => { @@ -119,6 +121,7 @@ fn separate_join_conds( } }; + println!("Location: {:?}", location.clone()); match location { JoinCondDependency::Left => left_conds.push(child), JoinCondDependency::Right => right_conds.push(child.rewrite_column_refs(&|idx| { @@ -186,6 +189,7 @@ fn filter_join_transpose( child: RelNode, cond: RelNode, ) -> Vec> { + println!("Filter join transpose hit with type {:?}", child.typ); // TODO: Push existing join conditions down as well let old_join = LogicalJoin::from_rel_node(child.into()).unwrap(); let cond_as_logexpr = LogOpExpr::from_rel_node(cond.into()).unwrap(); @@ -278,7 +282,6 @@ fn apply_filter_pushdown( optimizer: &impl Optimizer, FilterPushdownRulePicks { child, cond }: FilterPushdownRulePicks, ) -> Vec> { - dbg!("Reached apply_filter_pushdown with", child.typ.clone()); // Push filter down one node let mut result_from_this_step = match child.typ { OptRelNodeTyp::Projection => filter_project_transpose(optimizer, child, cond), @@ -425,7 +428,7 @@ mod tests { #[test] fn push_past_proj_basic() { // TODO: write advanced proj with more expr that need to be transformed - let dummy_optimizer = new_dummy_optimizer(); + let mut dummy_optimizer = new_dummy_optimizer(); let scan = LogicalScan::new("customer".into()); let proj = LogicalProjection::new(scan.into_plan_node(), ExprList::new(vec![])); @@ -437,6 +440,15 @@ mod tests { ) .into_expr(); + // Initialize groups + dummy_optimizer + .step_optimize_rel(proj.clone().into_rel_node()) + .unwrap(); + + dummy_optimizer + .step_optimize_rel(proj.clone().child().into_rel_node()) + .unwrap(); + let plan = apply_filter_pushdown( &dummy_optimizer, super::FilterPushdownRulePicks { @@ -457,7 +469,7 @@ mod tests { // be pushed to the left child, one to the right child, one gets incorporated // into the (now inner) join condition, and a constant one remains in the // original filter. - let dummy_optimizer = new_dummy_optimizer(); + let mut dummy_optimizer = new_dummy_optimizer(); let scan1 = LogicalScan::new("customer".into()); @@ -499,7 +511,7 @@ mod tests { BinOpExpr::new( // This one should be pushed to the join condition ColumnRefExpr::new(2).into_expr(), - ColumnRefExpr::new(3).into_expr(), + ColumnRefExpr::new(8).into_expr(), BinOpType::Eq, ) .into_expr(), @@ -513,6 +525,19 @@ mod tests { ]), ); + // Initialize groups + dummy_optimizer + .step_optimize_rel(join.clone().into_rel_node()) + .unwrap(); + + dummy_optimizer + .step_optimize_rel(join.clone().left().into_rel_node()) + .unwrap(); + + dummy_optimizer + .step_optimize_rel(join.clone().right().into_rel_node()) + .unwrap(); + let plan = apply_filter_pushdown( &dummy_optimizer, super::FilterPushdownRulePicks { @@ -534,36 +559,27 @@ mod tests { .unwrap(); assert!(matches!(bin_op_0.op_type(), BinOpType::Eq)); let col_0 = - ColumnRefExpr::from_rel_node(bin_op_0.left_child().clone().into_rel_node()).unwrap(); + ConstantExpr::from_rel_node(bin_op_0.left_child().clone().into_rel_node()).unwrap(); let col_1 = ConstantExpr::from_rel_node(bin_op_0.right_child().clone().into_rel_node()).unwrap(); - assert_eq!(col_0.index(), 2); - assert_eq!(col_1.value().as_i32(), 3); + assert_eq!(col_0.value().as_i32(), 2); + assert_eq!(col_1.value().as_i32(), 7); // Examine join node + condition let join_node = LogicalJoin::from_rel_node(top_level_filter.child().clone().into_rel_node()).unwrap(); let join_conds = LogOpExpr::from_rel_node(join_node.cond().into_rel_node()).unwrap(); assert!(matches!(join_conds.op_type(), LogOpType::And)); - assert!(matches!(join_conds.children().len(), 2)); + assert_eq!(join_conds.children().len(), 2); let bin_op_1 = BinOpExpr::from_rel_node(join_conds.children()[0].clone().into_rel_node()).unwrap(); - let bin_op_2 = - BinOpExpr::from_rel_node(join_conds.children()[1].clone().into_rel_node()).unwrap(); assert!(matches!(bin_op_1.op_type(), BinOpType::Eq)); - assert!(matches!(bin_op_2.op_type(), BinOpType::Eq)); let col_2 = ColumnRefExpr::from_rel_node(bin_op_1.left_child().clone().into_rel_node()).unwrap(); let col_3 = ColumnRefExpr::from_rel_node(bin_op_1.right_child().clone().into_rel_node()).unwrap(); - let col_4 = - ColumnRefExpr::from_rel_node(bin_op_2.left_child().clone().into_rel_node()).unwrap(); - let col_5 = - ConstantExpr::from_rel_node(bin_op_2.right_child().clone().into_rel_node()).unwrap(); assert_eq!(col_2.index(), 2); - assert_eq!(col_3.index(), 3); - assert_eq!(col_4.index(), 0); - assert_eq!(col_5.value().as_i32(), 1); + assert_eq!(col_3.index(), 8); // Examine left child filter + condition let filter_1 = LogicalFilter::from_rel_node(join_node.left().into_rel_node()).unwrap(); @@ -592,7 +608,7 @@ mod tests { ColumnRefExpr::from_rel_node(bin_op_4.left_child().clone().into_rel_node()).unwrap(); let col_9 = ConstantExpr::from_rel_node(bin_op_4.right_child().clone().into_rel_node()).unwrap(); - assert_eq!(col_8.index(), 11); + assert_eq!(col_8.index(), 3); assert_eq!(col_9.value().as_i32(), 6); } } diff --git a/optd-datafusion-repr/src/testing.rs b/optd-datafusion-repr/src/testing.rs index 81bf7d60..ca43f2eb 100644 --- a/optd-datafusion-repr/src/testing.rs +++ b/optd-datafusion-repr/src/testing.rs @@ -4,14 +4,14 @@ mod tpch_catalog; use std::sync::Arc; pub use dummy_cost::DummyCostModel; -use optd_core::{cascades::CascadesOptimizer, optimizer::Optimizer}; +use optd_core::cascades::CascadesOptimizer; pub use tpch_catalog::TpchCatalog; use crate::{plan_nodes::OptRelNodeTyp, properties::schema::SchemaPropertyBuilder}; /// Create a "dummy" optimizer preloaded with the TPC-H catalog for testing /// Note: Only provides the schema property currently -pub fn new_dummy_optimizer() -> impl Optimizer { +pub fn new_dummy_optimizer() -> CascadesOptimizer { let dummy_catalog = Arc::new(TpchCatalog); let dummy_optimizer = CascadesOptimizer::new( vec![], From 9cb0894eb5d374c514b65c1df2038c035d3d37c9 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 24 Mar 2024 22:15:42 -0400 Subject: [PATCH 16/61] Fix projection mistake --- .../src/rules/filter_pushdown.rs | 77 ++++++++++++++++++- 1 file changed, 74 insertions(+), 3 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 93ff621f..50021434 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -156,13 +156,13 @@ fn filter_project_transpose( .len(); let proj_col_map = old_proj.compute_column_mapping().unwrap(); - proj_col_map.rewrite_condition( + let rewritten_cond = proj_col_map.rewrite_condition( cond_as_expr.clone(), projection_schema_len, child_schema_len, ); - let new_filter_node = LogicalFilter::new(old_proj.child(), cond_as_expr); + let new_filter_node = LogicalFilter::new(old_proj.child(), rewritten_cond); let new_proj = LogicalProjection::new(new_filter_node.into_plan_node(), old_proj.exprs()); vec![new_proj.into_rel_node().as_ref().clone()] } @@ -427,7 +427,6 @@ mod tests { #[test] fn push_past_proj_basic() { - // TODO: write advanced proj with more expr that need to be transformed let mut dummy_optimizer = new_dummy_optimizer(); let scan = LogicalScan::new("customer".into()); @@ -463,6 +462,78 @@ mod tests { assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); } + #[test] + fn push_past_proj_adv() { + let mut dummy_optimizer = new_dummy_optimizer(); + + let scan = LogicalScan::new("customer".into()); + let proj = LogicalProjection::new( + scan.into_plan_node(), + ExprList::new(vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(5).into_expr(), + ColumnRefExpr::new(7).into_expr(), + ]), + ); + + let filter_expr = LogOpExpr::new( + LogOpType::And, + ExprList::new(vec![ + BinOpExpr::new( + // This one should be pushed to the left child + ColumnRefExpr::new(1).into_expr(), + ConstantExpr::int32(5).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + BinOpExpr::new( + // This one should be pushed to the right child + ColumnRefExpr::new(3).into_expr(), + ConstantExpr::int32(6).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + ]), + ); + + // Initialize groups + dummy_optimizer + .step_optimize_rel(proj.clone().into_rel_node()) + .unwrap(); + + dummy_optimizer + .step_optimize_rel(proj.clone().child().into_rel_node()) + .unwrap(); + + let plan = apply_filter_pushdown( + &dummy_optimizer, + super::FilterPushdownRulePicks { + child: Arc::unwrap_or_clone(proj.into_rel_node()), + cond: Arc::unwrap_or_clone(filter_expr.into_rel_node()), + }, + ); + + let plan = plan.first().unwrap(); + + assert!(matches!(plan.typ, OptRelNodeTyp::Projection)); + let plan_filter = LogicalFilter::from_rel_node(plan.child(0)).unwrap(); + assert!(matches!(plan_filter.0.typ(), OptRelNodeTyp::Filter)); + let plan_filter_expr = + LogOpExpr::from_rel_node(plan_filter.cond().into_rel_node()).unwrap(); + assert!(matches!(plan_filter_expr.op_type(), LogOpType::And)); + let op_0 = BinOpExpr::from_rel_node(plan_filter_expr.children()[0].clone().into_rel_node()) + .unwrap(); + let col_0 = + ColumnRefExpr::from_rel_node(op_0.left_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_0.index(), 4); + let op_1 = BinOpExpr::from_rel_node(plan_filter_expr.children()[1].clone().into_rel_node()) + .unwrap(); + let col_1 = + ColumnRefExpr::from_rel_node(op_1.left_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_1.index(), 7); + } + #[test] fn push_past_join_conjunction() { // Test pushing a complex filter past a join, where one clause can From 038db19120a7abb55588a45a43dc165614c4649a Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 24 Mar 2024 23:43:22 -0400 Subject: [PATCH 17/61] restructure predicate traversal --- .../src/rules/filter_pushdown.rs | 135 ++++++++++-------- 1 file changed, 73 insertions(+), 62 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 50021434..76696cca 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -10,6 +10,7 @@ use core::panic; use std::collections::HashMap; +use std::vec; use optd_core::rules::{Rule, RuleMatcher}; use optd_core::{optimizer::Optimizer, rel_node::RelNode}; @@ -43,7 +44,7 @@ enum JoinCondDependency { } fn determine_join_cond_dep( - children: Vec, + children: &Vec, left_schema_size: usize, right_schema_size: usize, ) -> JoinCondDependency { @@ -52,7 +53,7 @@ fn determine_join_cond_dep( for child in children { match child.typ() { OptRelNodeTyp::ColumnRef => { - let col_ref = ColumnRefExpr::from_rel_node(child.into_rel_node()).unwrap(); + let col_ref = ColumnRefExpr::from_rel_node(child.clone().into_rel_node()).unwrap(); let index = col_ref.index(); if index < left_schema_size { left_col = true; @@ -72,67 +73,49 @@ fn determine_join_cond_dep( } } -// Recursively search through all predicates in the join condition (LogExprs and BinOps), -// separating them into those that only involve the left child, those that only involve the -// right child, and those that involve both children. Constant expressions involve neither -// child. -// pre-condition: the cond is an AND LogOpExpr -fn separate_join_conds( - cond: LogOpExpr, - left_schema_size: usize, - right_schema_size: usize, -) -> (Vec, Vec, Vec, Vec) { - let mut left_conds = vec![]; - let mut right_conds = vec![]; - let mut join_conds = vec![]; - let mut keep_conds = vec![]; - - // For each child, if it is a LogOpExpr with and, recursively call this function - // If it is a BinOpExpr, check both children and add to the appropriate list - // If this is an AND logopexpr, then each of the conditions can be separated. - // If this is an OR logopexpr, then we have to check if that entire logopexpr - // can be separated. - for child in cond.children() { - dbg!("Working on child: ", child.clone()); - let location = match child.typ() { - OptRelNodeTyp::LogOp(LogOpType::And) => { - // In theory, we could recursively call the function to handle this - // case. However, it should not be possible to have nested LogOpExpr - // ANDs. We panic so that we can notice the bug. - panic!("Nested AND LogOpExprs detected in filter pushdown!"); - } - OptRelNodeTyp::LogOp(LogOpType::Or) => { - let log_expr = LogOpExpr::from_rel_node(child.clone().into_rel_node()).unwrap(); - determine_join_cond_dep(log_expr.children(), left_schema_size, right_schema_size) - } - OptRelNodeTyp::BinOp(_) => { - let bin_expr = BinOpExpr::from_rel_node(child.clone().into_rel_node()).unwrap(); - determine_join_cond_dep( - vec![bin_expr.left_child(), bin_expr.right_child()], - left_schema_size, - right_schema_size, - ) - } - _ => { - panic!( - "Expression type {} not yet implemented for separate_join_conds", - child.typ() - ) +/// Do not call directly +fn categorize_conds_helper(cond: Expr, bottom_level_children: &mut Vec) { + match cond.typ() { + OptRelNodeTyp::LogOp(_) | OptRelNodeTyp::BinOp(_) | OptRelNodeTyp::UnOp(_) => { + for child in &cond.into_rel_node().children { + categorize_conds_helper( + Expr::from_rel_node(child.clone()).unwrap(), + bottom_level_children, + ); } - }; - - println!("Location: {:?}", location.clone()); - match location { - JoinCondDependency::Left => left_conds.push(child), - JoinCondDependency::Right => right_conds.push(child.rewrite_column_refs(&|idx| { - LogicalJoin::map_through_join(idx, left_schema_size, right_schema_size) - })), - JoinCondDependency::Both => join_conds.push(child), - JoinCondDependency::None => keep_conds.push(child), } + OptRelNodeTyp::ColumnRef | OptRelNodeTyp::Constant(_) => bottom_level_children.push(cond), + _ => panic!( + "Encountered unhandled expr of type {:?} in categorize_conds_helper", + cond.typ() + ), } +} - (left_conds, right_conds, join_conds, keep_conds) +/// This function recurses/loops to the bottom-level of the expression tree, +/// building a list of bottom-level exprs for each separable expr +/// +/// # Arguments +/// * `categorization_fn` - Function, called with a list of each bottom-level +/// expression, along with the top-level expression node that will be +/// categorized. +/// * `cond` - The top-level expression node to begin separating +fn categorize_conds(mut categorization_fn: impl FnMut(Expr, &Vec), cond: Expr) { + let mut categorize_indep_expr = |cond: Expr| { + let bottom_level_children = &mut vec![]; + categorize_conds_helper(cond.clone(), bottom_level_children); + categorization_fn(cond, bottom_level_children); + }; + match cond.typ() { + OptRelNodeTyp::LogOp(LogOpType::And) => { + for child in &cond.into_rel_node().children { + categorize_indep_expr(Expr::from_rel_node(child.clone()).unwrap()); + } + } + _ => { + categorize_indep_expr(cond); + } + } } /// Datafusion only pushes filter past project when the project does not contain @@ -192,7 +175,6 @@ fn filter_join_transpose( println!("Filter join transpose hit with type {:?}", child.typ); // TODO: Push existing join conditions down as well let old_join = LogicalJoin::from_rel_node(child.into()).unwrap(); - let cond_as_logexpr = LogOpExpr::from_rel_node(cond.into()).unwrap(); let left_schema_size = optimizer .get_property::(old_join.left().into_rel_node(), 0) @@ -201,8 +183,23 @@ fn filter_join_transpose( .get_property::(old_join.right().into_rel_node(), 0) .len(); - let (left_conds, right_conds, join_conds, keep_conds) = - separate_join_conds(cond_as_logexpr, left_schema_size, right_schema_size); + let mut left_conds = vec![]; + let mut right_conds = vec![]; + let mut join_conds = vec![]; + let mut keep_conds = vec![]; + + let categorization_fn = |expr: Expr, children: &Vec| { + let location = determine_join_cond_dep(children, left_schema_size, right_schema_size); + match location { + JoinCondDependency::Left => left_conds.push(expr), + JoinCondDependency::Right => right_conds.push(expr.rewrite_column_refs(&|idx| { + LogicalJoin::map_through_join(idx, left_schema_size, right_schema_size) + })), + JoinCondDependency::Both => join_conds.push(expr), + JoinCondDependency::None => keep_conds.push(expr), + } + }; + categorize_conds(categorization_fn, Expr::from_rel_node(cond.into()).unwrap()); let new_left = if !left_conds.is_empty() { let new_filter_node = LogicalFilter::new( @@ -278,6 +275,19 @@ fn filter_sort_transpose( vec![new_sort.into_rel_node().as_ref().clone()] } +/// Filter is commutable past aggregations when the filter condition only +/// involves the group by columns. +fn filter_agg_transpose( + _optimizer: &impl Optimizer, + child: RelNode, + cond: RelNode, +) -> Vec> { + // let mut keep_conds = vec![]; + // let mut push_conds = vec![]; + + vec![] +} + fn apply_filter_pushdown( optimizer: &impl Optimizer, FilterPushdownRulePicks { child, cond }: FilterPushdownRulePicks, @@ -289,6 +299,7 @@ fn apply_filter_pushdown( // OptRelNodeTyp::Scan => todo!(), // TODO: Add predicate field to scan node OptRelNodeTyp::Join(_) => filter_join_transpose(optimizer, child, cond), OptRelNodeTyp::Sort => filter_sort_transpose(optimizer, child, cond), + OptRelNodeTyp::Agg => filter_sort_transpose(optimizer, child, cond), _ => vec![], }; From 5023c2a1fabe38e1cbfc66d2c29d177f81d20c66 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Mon, 25 Mar 2024 00:13:28 -0400 Subject: [PATCH 18/61] Push filter past agg --- .../src/rules/filter_pushdown.rs | 175 +++++++++++++++++- 1 file changed, 165 insertions(+), 10 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 76696cca..566dbf57 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -9,14 +9,14 @@ //! further down the query plan whenever it is possible to do so. use core::panic; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::vec; use optd_core::rules::{Rule, RuleMatcher}; use optd_core::{optimizer::Optimizer, rel_node::RelNode}; use crate::plan_nodes::{ - BinOpExpr, ColumnRefExpr, Expr, ExprList, JoinType, LogOpExpr, LogOpType, LogicalFilter, + ColumnRefExpr, Expr, ExprList, JoinType, LogOpExpr, LogOpType, LogicalAgg, LogicalFilter, LogicalJoin, LogicalProjection, LogicalSort, OptRelNode, OptRelNodeTyp, }; use crate::properties::schema::SchemaPropertyBuilder; @@ -248,7 +248,7 @@ fn filter_join_transpose( } }; - let new_node = if !keep_conds.is_empty() { + let new_filter = if !keep_conds.is_empty() { let new_filter_node = LogicalFilter::new( new_join.into_plan_node(), LogOpExpr::new(LogOpType::And, ExprList::new(keep_conds)).into_expr(), @@ -258,7 +258,7 @@ fn filter_join_transpose( new_join.into_rel_node().as_ref().clone() }; - vec![new_node] + vec![new_filter] } /// Filter and sort should always be commutable. @@ -282,10 +282,71 @@ fn filter_agg_transpose( child: RelNode, cond: RelNode, ) -> Vec> { - // let mut keep_conds = vec![]; - // let mut push_conds = vec![]; + let old_agg = LogicalAgg::from_rel_node(child.into()).unwrap(); + let group_exprs = old_agg.groups(); + + // Get top-level group-by columns. Does not cover cases where group-by exprs + // are more complex than a top-level column reference. + let group_cols = group_exprs + .into_rel_node() + .children + .iter() + .filter_map(|expr| match expr.typ { + OptRelNodeTyp::ColumnRef => { + Some(ColumnRefExpr::from_rel_node(expr.clone()).unwrap().index()) + } + _ => None, + }) + .collect::>(); + + // Categorize predicates that only use our group-by columns as push-able. + let mut keep_conds = vec![]; + let mut push_conds = vec![]; + + let categorization_fn = |expr: Expr, children: &Vec| { + let mut group_by_cols_only = true; + for child in children { + match child.typ() { + OptRelNodeTyp::ColumnRef => { + let col_ref = + ColumnRefExpr::from_rel_node(child.clone().into_rel_node()).unwrap(); + if !group_cols.contains(&col_ref.index()) { + group_by_cols_only = false; + break; + } + } + _ => {} + } + } + if group_by_cols_only { + push_conds.push(expr); + } else { + keep_conds.push(expr); + } + }; + categorize_conds(categorization_fn, Expr::from_rel_node(cond.into()).unwrap()); + + let new_child = if !push_conds.is_empty() { + LogicalFilter::new( + old_agg.child(), + LogOpExpr::new(LogOpType::And, ExprList::new(push_conds)).into_expr(), + ) + .into_plan_node() + } else { + old_agg.child().into_plan_node() + }; + + let new_agg = LogicalAgg::new(new_child, old_agg.exprs(), old_agg.groups()); + + let new_filter = LogicalFilter::new( + new_agg.into_plan_node(), + LogOpExpr::new(LogOpType::And, ExprList::new(keep_conds)).into_expr(), + ) + .into_rel_node() + .as_ref() + .clone(); - vec![] + vec![new_filter] } fn apply_filter_pushdown( @@ -299,7 +360,7 @@ fn apply_filter_pushdown( // OptRelNodeTyp::Scan => todo!(), // TODO: Add predicate field to scan node OptRelNodeTyp::Join(_) => filter_join_transpose(optimizer, child, cond), OptRelNodeTyp::Sort => filter_sort_transpose(optimizer, child, cond), - OptRelNodeTyp::Agg => filter_sort_transpose(optimizer, child, cond), + OptRelNodeTyp::Agg => filter_agg_transpose(optimizer, child, cond), _ => vec![], }; @@ -339,8 +400,8 @@ mod tests { use crate::{ plan_nodes::{ BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, - LogicalFilter, LogicalJoin, LogicalProjection, LogicalScan, LogicalSort, OptRelNode, - OptRelNodeTyp, + LogicalAgg, LogicalFilter, LogicalJoin, LogicalProjection, LogicalScan, LogicalSort, + OptRelNode, OptRelNodeTyp, }, testing::new_dummy_optimizer, }; @@ -693,4 +754,98 @@ mod tests { assert_eq!(col_8.index(), 3); assert_eq!(col_9.value().as_i32(), 6); } + + #[test] + fn push_past_agg() { + // Test pushing a filter past an aggregation node, where the filter + // condition has one clause that can be pushed down to the child and + // one that must remain in the filter. + let mut dummy_optimizer = new_dummy_optimizer(); + + let scan = LogicalScan::new("customer".into()); + + let agg = LogicalAgg::new( + scan.clone().into_plan_node(), + ExprList::new(vec![]), + ExprList::new(vec![ColumnRefExpr::new(0).into_expr()]), + ); + + let filter_expr = LogOpExpr::new( + LogOpType::And, + ExprList::new(vec![ + BinOpExpr::new( + // This one should be pushed to the child + ColumnRefExpr::new(0).into_expr(), + ConstantExpr::int32(5).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + BinOpExpr::new( + // This one should remain in the filter + ColumnRefExpr::new(1).into_expr(), + ConstantExpr::int32(6).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + ]), + ); + + // Initialize groups + dummy_optimizer + .step_optimize_rel(agg.clone().into_rel_node()) + .unwrap(); + dummy_optimizer + .step_optimize_rel(scan.into_rel_node()) + .unwrap(); + + let plan = apply_filter_pushdown( + &dummy_optimizer, + super::FilterPushdownRulePicks { + child: Arc::unwrap_or_clone(agg.into_rel_node()), + cond: Arc::unwrap_or_clone(filter_expr.into_rel_node()), + }, + ); + + let plan = plan.first().unwrap(); + let plan_filter = LogicalFilter::from_rel_node(plan.clone().into()).unwrap(); + assert!(matches!(plan_filter.0.typ(), OptRelNodeTyp::Filter)); + let plan_filter_expr = + LogOpExpr::from_rel_node(plan_filter.cond().into_rel_node()).unwrap(); + assert!(matches!(plan_filter_expr.op_type(), LogOpType::And)); + assert_eq!(plan_filter_expr.children().len(), 1); + let op_0 = BinOpExpr::from_rel_node(plan_filter_expr.children()[0].clone().into_rel_node()) + .unwrap(); + let col_0 = + ColumnRefExpr::from_rel_node(op_0.left_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_0.index(), 1); + let col_1 = + ConstantExpr::from_rel_node(op_0.right_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_1.value().as_i32(), 6); + + let plan_agg = LogicalAgg::from_rel_node(plan.child(0)).unwrap(); + let plan_agg_groups = plan_agg.groups(); + assert_eq!(plan_agg_groups.len(), 1); + let group_col = ColumnRefExpr::from_rel_node(plan_agg_groups.child(0).into_rel_node()) + .unwrap() + .index(); + assert_eq!(group_col, 0); + + let plan_agg_child_filter = + LogicalFilter::from_rel_node(plan_agg.child().into_rel_node()).unwrap(); + let plan_agg_child_filter_expr = + LogOpExpr::from_rel_node(plan_agg_child_filter.cond().into_rel_node()).unwrap(); + assert!(matches!( + plan_agg_child_filter_expr.op_type(), + LogOpType::And + )); + assert_eq!(plan_agg_child_filter_expr.children().len(), 1); + let op_1 = + BinOpExpr::from_rel_node(plan_agg_child_filter_expr.child(0).into_rel_node()).unwrap(); + let col_2 = + ColumnRefExpr::from_rel_node(op_1.left_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_2.index(), 0); + let col_3 = + ConstantExpr::from_rel_node(op_1.right_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_3.value().as_i32(), 5); + } } From 924649b62d9cb251f851a120a49b878b25c3d4f1 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Mon, 25 Mar 2024 00:22:05 -0400 Subject: [PATCH 19/61] Remove print --- optd-datafusion-repr/src/rules/filter_pushdown.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 566dbf57..41dfee79 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -172,7 +172,6 @@ fn filter_join_transpose( child: RelNode, cond: RelNode, ) -> Vec> { - println!("Filter join transpose hit with type {:?}", child.typ); // TODO: Push existing join conditions down as well let old_join = LogicalJoin::from_rel_node(child.into()).unwrap(); From 16650a345f5a6c662bd768424cc241e907218c2e Mon Sep 17 00:00:00 2001 From: AveryQi115 Date: Mon, 25 Mar 2024 21:48:14 -0400 Subject: [PATCH 20/61] two stage heuristic Signed-off-by: AveryQi115 --- optd-core/src/heuristics/optimizer.rs | 69 ++++++++++-- optd-datafusion-bridge/src/lib.rs | 11 +- optd-datafusion-repr/src/bin/test_optimize.rs | 1 + optd-datafusion-repr/src/lib.rs | 101 +++++++++++++----- 4 files changed, 144 insertions(+), 38 deletions(-) diff --git a/optd-core/src/heuristics/optimizer.rs b/optd-core/src/heuristics/optimizer.rs index 2c1b1bf8..a5ab31c9 100644 --- a/optd-core/src/heuristics/optimizer.rs +++ b/optd-core/src/heuristics/optimizer.rs @@ -1,9 +1,12 @@ use std::{collections::HashMap, sync::Arc}; use anyhow::Result; +use itertools::Itertools; +use std::any::Any; use crate::{ optimizer::Optimizer, + property::PropertyBuilderAny, rel_node::{RelNode, RelNodeRef, RelNodeTyp}, rules::{Rule, RuleMatcher}, }; @@ -16,6 +19,8 @@ pub enum ApplyOrder { pub struct HeuristicsOptimizer { rules: Arc<[Arc>]>, apply_order: ApplyOrder, + property_builders: Arc<[Box>]>, + properties: HashMap, Arc<[Box]>>, } fn match_node( @@ -102,10 +107,16 @@ fn match_and_pick( } impl HeuristicsOptimizer { - pub fn new_with_rules(rules: Vec>>, apply_order: ApplyOrder) -> Self { + pub fn new_with_rules( + rules: Vec>>, + apply_order: ApplyOrder, + property_builders: Arc<[Box>]>, + ) -> Self { Self { rules: rules.into(), apply_order, + property_builders, + properties: HashMap::new(), } } @@ -122,8 +133,10 @@ impl HeuristicsOptimizer { let matcher = rule.matcher(); if let Some(picks) = match_and_pick(matcher, root_rel.clone()) { let mut results = rule.apply(self, picks); - assert_eq!(results.len(), 1); - root_rel = results.remove(0).into(); + assert!(results.len() <= 1); + if !results.is_empty() { + root_rel = results.remove(0).into(); + } } } Ok(root_rel) @@ -141,20 +154,60 @@ impl HeuristicsOptimizer { } .into(), )?; + self.infer_properties(root_rel.clone()); + self.properties.insert( + node.clone(), + self.properties.get(&root_rel.clone()).unwrap().clone(), + ); Ok(node) } ApplyOrder::TopDown => { let root_rel = self.apply_rules(root_rel)?; let optimized_children = self.optimize_inputs(&root_rel.children)?; - Ok(RelNode { + let node: Arc> = RelNode { typ: root_rel.typ.clone(), children: optimized_children, data: root_rel.data.clone(), } - .into()) + .into(); + self.infer_properties(root_rel.clone()); + self.properties.insert( + node.clone(), + self.properties.get(&root_rel.clone()).unwrap().clone(), + ); + Ok(node) } } } + + fn infer_properties(&mut self, root_rel: RelNodeRef) { + if self.properties.contains_key(&root_rel) { + return; + } + + let child_properties = root_rel + .children + .iter() + .map(|child| { + self.infer_properties((*child).clone()); + self.properties.get(child).unwrap().clone() + }) + .collect_vec(); + let mut props = Vec::with_capacity(self.property_builders.len()); + for (id, builder) in self.property_builders.iter().enumerate() { + let child_properties = child_properties + .iter() + .map(|x| x[id].as_ref() as &dyn std::any::Any) + .collect::>(); + let prop = builder.derive_any( + root_rel.typ.clone(), + root_rel.data.clone(), + child_properties.as_slice(), + ); + props.push(prop); + } + self.properties.insert(root_rel.clone(), props.into()); + } } impl Optimizer for HeuristicsOptimizer { @@ -167,8 +220,8 @@ impl Optimizer for HeuristicsOptimizer { root_rel: RelNodeRef, idx: usize, ) -> P::Prop { - let _ = root_rel; - let _ = idx; - unimplemented!() + let props = self.properties.get(&root_rel).unwrap(); + let prop = props[idx].as_ref(); + prop.downcast_ref::().unwrap().clone() } } diff --git a/optd-datafusion-bridge/src/lib.rs b/optd-datafusion-bridge/src/lib.rs index 6bd447ca..4bea295d 100644 --- a/optd-datafusion-bridge/src/lib.rs +++ b/optd-datafusion-bridge/src/lib.rs @@ -218,7 +218,7 @@ impl OptdQueryPlanner { optimizer_name: "datafusion".to_string(), })); } - let optd_rel = ctx.conv_into_optd(logical_plan)?; + let mut optd_rel = ctx.conv_into_optd(logical_plan)?; if let Some(explains) = &mut explains { explains.push(StringifiedPlan::new( PlanType::OptimizedLogicalPlan { @@ -230,7 +230,12 @@ impl OptdQueryPlanner { )); } let mut optimizer = self.optimizer.lock().unwrap().take().unwrap(); - let (group_id, optimized_rel, meta) = optimizer.optimize(optd_rel)?; + + if optimizer.is_heuristic_enabled() { + optd_rel = optimizer.heuristic_optimize(optd_rel); + } + + let (group_id, optimized_rel, meta) = optimizer.cascades_optimize(optd_rel)?; if let Some(explains) = &mut explains { explains.push(StringifiedPlan::new( @@ -253,7 +258,7 @@ impl OptdQueryPlanner { }, )); let bindings = optimizer - .optd_optimizer() + .optd_cascades_optimizer() .get_all_group_bindings(group_id, true); let mut join_orders = BTreeSet::new(); let mut logical_join_orders = BTreeSet::new(); diff --git a/optd-datafusion-repr/src/bin/test_optimize.rs b/optd-datafusion-repr/src/bin/test_optimize.rs index 6eb5deb3..eb7a80a1 100644 --- a/optd-datafusion-repr/src/bin/test_optimize.rs +++ b/optd-datafusion-repr/src/bin/test_optimize.rs @@ -92,6 +92,7 @@ pub fn main() { Arc::new(HashJoinRule::new()), ], optd_core::heuristics::ApplyOrder::BottomUp, + Arc::new([]), ); let node = optimizer.optimize(fnal.0.into_rel_node()).unwrap(); println!( diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index fa19b56c..131015e2 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -9,8 +9,11 @@ use cost::{ }; use optd_core::{ cascades::{CascadesOptimizer, GroupId, OptimizerProperties}, + heuristics::{ApplyOrder, HeuristicsOptimizer}, + optimizer::Optimizer, + property::PropertyBuilderAny, rel_node::RelNodeMetaMap, - rules::RuleWrapper, + rules::{Rule, RuleWrapper}, }; use plan_nodes::{OptRelNodeRef, OptRelNodeTyp}; @@ -33,9 +36,11 @@ pub mod properties; pub mod rules; pub struct DatafusionOptimizer { - optimizer: CascadesOptimizer, + hueristic_optimizer: HeuristicsOptimizer, + cascades_optimizer: CascadesOptimizer, pub runtime_statistics: RuntimeAdaptionStorage, enable_adaptive: bool, + enable_heuristic: bool, } impl DatafusionOptimizer { @@ -47,26 +52,43 @@ impl DatafusionOptimizer { self.enable_adaptive } - pub fn optd_optimizer(&self) -> &CascadesOptimizer { - &self.optimizer + pub fn enable_heuristic(&mut self, enable: bool) { + self.enable_heuristic = enable; + } + + pub fn is_heuristic_enabled(&self) -> bool { + self.enable_heuristic + } + + pub fn optd_cascades_optimizer(&self) -> &CascadesOptimizer { + &self.cascades_optimizer + } + + pub fn optd_hueristic_optimizer(&self) -> &HeuristicsOptimizer { + &self.hueristic_optimizer } pub fn optd_optimizer_mut(&mut self) -> &mut CascadesOptimizer { - &mut self.optimizer + &mut self.cascades_optimizer } - pub fn default_rules() -> Vec>>> - { + pub fn default_heuristic_rules( + ) -> Vec>>> { + vec![ + Arc::new(SimplifyFilterRule::new()), + Arc::new(SimplifyJoinCondRule::new()), + Arc::new(EliminateFilterRule::new()), + Arc::new(EliminateJoinRule::new()), + Arc::new(EliminateLimitRule::new()), + Arc::new(EliminateDuplicatedSortExprRule::new()), + Arc::new(EliminateDuplicatedAggExprRule::new()), + ] + } + + pub fn default_cascades_rules( + ) -> Vec>>> { let rules = PhysicalConversionRule::all_conversions(); - let mut rule_wrappers = vec![ - RuleWrapper::new_heuristic(Arc::new(SimplifyFilterRule::new())), - RuleWrapper::new_heuristic(Arc::new(SimplifyJoinCondRule::new())), - RuleWrapper::new_heuristic(Arc::new(EliminateFilterRule::new())), - RuleWrapper::new_heuristic(Arc::new(EliminateJoinRule::new())), - RuleWrapper::new_heuristic(Arc::new(EliminateLimitRule::new())), - RuleWrapper::new_heuristic(Arc::new(EliminateDuplicatedSortExprRule::new())), - RuleWrapper::new_heuristic(Arc::new(EliminateDuplicatedAggExprRule::new())), - ]; + let mut rule_wrappers = vec![]; for rule in rules { rule_wrappers.push(RuleWrapper::new_cascades(rule)); } @@ -86,23 +108,34 @@ impl DatafusionOptimizer { stats: DataFusionBaseTableStats, enable_adaptive: bool, ) -> Self { - let rules = Self::default_rules(); + let cascades_rules = Self::default_cascades_rules(); + let heuristic_rules = Self::default_heuristic_rules(); + let property_builders: Arc<[Box>]> = Arc::new([ + Box::new(SchemaPropertyBuilder::new(catalog.clone())), + Box::new(ColumnRefPropertyBuilder::new(catalog.clone())), + ]); let cost_model = AdaptiveCostModel::new(DEFAULT_DECAY, stats); Self { runtime_statistics: cost_model.get_runtime_map(), - optimizer: CascadesOptimizer::new_with_prop( - rules, + cascades_optimizer: CascadesOptimizer::new_with_prop( + cascades_rules, Box::new(cost_model), vec![ Box::new(SchemaPropertyBuilder::new(catalog.clone())), - Box::new(ColumnRefPropertyBuilder::new(catalog)), + Box::new(ColumnRefPropertyBuilder::new(catalog.clone())), ], OptimizerProperties { partial_explore_iter: Some(1 << 20), partial_explore_space: Some(1 << 10), }, ), + hueristic_optimizer: HeuristicsOptimizer::new_with_rules( + heuristic_rules, + ApplyOrder::BottomUp, + property_builders.clone(), + ), enable_adaptive, + enable_heuristic: true, } } @@ -140,31 +173,45 @@ impl DatafusionOptimizer { ); Self { runtime_statistics, - optimizer, + cascades_optimizer: optimizer, enable_adaptive: true, + enable_heuristic: false, + hueristic_optimizer: HeuristicsOptimizer::new_with_rules( + vec![], + ApplyOrder::BottomUp, + Arc::new([]), + ), } } - pub fn optimize( + pub fn heuristic_optimize(&mut self, root_rel: OptRelNodeRef) -> OptRelNodeRef { + self.hueristic_optimizer + .optimize(root_rel) + .expect("heuristics returns error") + } + + pub fn cascades_optimize( &mut self, root_rel: OptRelNodeRef, ) -> Result<(GroupId, OptRelNodeRef, RelNodeMetaMap)> { if self.enable_adaptive { self.runtime_statistics.lock().unwrap().iter_cnt += 1; - self.optimizer.step_clear_winner(); + self.cascades_optimizer.step_clear_winner(); } else { - self.optimizer.step_clear(); + self.cascades_optimizer.step_clear(); } - let group_id = self.optimizer.step_optimize_rel(root_rel)?; + let group_id = self.cascades_optimizer.step_optimize_rel(root_rel)?; let mut meta = Some(HashMap::new()); - let optimized_rel = self.optimizer.step_get_optimize_rel(group_id, &mut meta)?; + let optimized_rel = self + .cascades_optimizer + .step_get_optimize_rel(group_id, &mut meta)?; Ok((group_id, optimized_rel, meta.unwrap())) } pub fn dump(&self, group_id: Option) { - self.optimizer.dump(group_id) + self.cascades_optimizer.dump(group_id) } } From 5d71f676fa7208cc3b5da15cdc8955eac91e8549 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Tue, 26 Mar 2024 00:54:57 -0400 Subject: [PATCH 21/61] Revise testing infra and port filter pushdown to it --- .../src/rules/filter_pushdown.rs | 83 ++++++------------- optd-datafusion-repr/src/testing.rs | 22 ++++- 2 files changed, 44 insertions(+), 61 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index e851b945..ef965aaf 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -396,20 +396,23 @@ fn apply_filter_pushdown( mod tests { use std::sync::Arc; + use optd_core::optimizer::Optimizer; + use crate::{ plan_nodes::{ BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, LogicalAgg, LogicalFilter, LogicalJoin, LogicalProjection, LogicalScan, LogicalSort, OptRelNode, OptRelNodeTyp, }, - testing::new_dummy_optimizer, + rules::FilterPushdownRule, + testing::new_test_optimizer, }; use super::apply_filter_pushdown; #[test] fn push_past_sort() { - let dummy_optimizer = new_dummy_optimizer(); + let mut test_optimizer = new_test_optimizer(Arc::new(FilterPushdownRule::new())); let scan = LogicalScan::new("customer".into()); let sort = LogicalSort::new(scan.into_plan_node(), ExprList::new(vec![])); @@ -421,15 +424,9 @@ mod tests { ) .into_expr(); - let plan = apply_filter_pushdown( - &dummy_optimizer, - super::FilterPushdownRulePicks { - child: Arc::unwrap_or_clone(sort.into_rel_node()), - cond: Arc::unwrap_or_clone(filter_expr.into_rel_node()), - }, - ); + let filter = LogicalFilter::new(sort.into_plan_node(), filter_expr); - let plan = plan.first().unwrap(); + let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); assert!(matches!(plan.typ, OptRelNodeTyp::Sort)); assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); @@ -438,7 +435,7 @@ mod tests { #[test] fn filter_merge() { // TODO: write advanced proj with more expr that need to be transformed - let dummy_optimizer = new_dummy_optimizer(); + let mut test_optimizer = new_test_optimizer(Arc::new(FilterPushdownRule::new())); let scan = LogicalScan::new("customer".into()); let filter_ch_expr = BinOpExpr::new( @@ -456,15 +453,9 @@ mod tests { ) .into_expr(); - let plan = apply_filter_pushdown( - &dummy_optimizer, - super::FilterPushdownRulePicks { - child: Arc::unwrap_or_clone(filter_ch.into_rel_node()), - cond: Arc::unwrap_or_clone(filter_expr.into_rel_node()), - }, - ); + let filter = LogicalFilter::new(filter_ch.into_plan_node(), filter_expr); - let plan = plan.first().unwrap(); + let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); assert!(matches!(plan.typ, OptRelNodeTyp::Filter)); let cond_log_op = LogOpExpr::from_rel_node( @@ -498,7 +489,7 @@ mod tests { #[test] fn push_past_proj_basic() { - let dummy_optimizer = new_dummy_optimizer(); + let mut test_optimizer = new_test_optimizer(Arc::new(FilterPushdownRule::new())); let scan = LogicalScan::new("customer".into()); let proj = LogicalProjection::new(scan.into_plan_node(), ExprList::new(vec![])); @@ -510,23 +501,18 @@ mod tests { ) .into_expr(); - let plan = apply_filter_pushdown( - &dummy_optimizer, - super::FilterPushdownRulePicks { - child: Arc::unwrap_or_clone(proj.into_rel_node()), - cond: Arc::unwrap_or_clone(filter_expr.into_rel_node()), - }, - ); - - let plan = plan.first().unwrap(); + let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr); + let plan = test_optimizer + .optimize(filter.into_rel_node().into()) + .unwrap(); - assert!(matches!(plan.typ, OptRelNodeTyp::Projection)); + assert_eq!(plan.typ, OptRelNodeTyp::Projection); assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); } #[test] fn push_past_proj_adv() { - let dummy_optimizer = new_dummy_optimizer(); + let mut test_optimizer = new_test_optimizer(Arc::new(FilterPushdownRule::new())); let scan = LogicalScan::new("customer".into()); let proj = LogicalProjection::new( @@ -559,15 +545,9 @@ mod tests { ]), ); - let plan = apply_filter_pushdown( - &dummy_optimizer, - super::FilterPushdownRulePicks { - child: Arc::unwrap_or_clone(proj.into_rel_node()), - cond: Arc::unwrap_or_clone(filter_expr.into_rel_node()), - }, - ); + let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr.into_expr()); - let plan = plan.first().unwrap(); + let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); assert!(matches!(plan.typ, OptRelNodeTyp::Projection)); let plan_filter = LogicalFilter::from_rel_node(plan.child(0)).unwrap(); @@ -593,7 +573,7 @@ mod tests { // be pushed to the left child, one to the right child, one gets incorporated // into the (now inner) join condition, and a constant one remains in the // original filter. - let dummy_optimizer = new_dummy_optimizer(); + let mut test_optimizer = new_test_optimizer(Arc::new(FilterPushdownRule::new())); let scan1 = LogicalScan::new("customer".into()); @@ -649,15 +629,9 @@ mod tests { ]), ); - let plan = apply_filter_pushdown( - &dummy_optimizer, - super::FilterPushdownRulePicks { - child: Arc::unwrap_or_clone(join.into_rel_node()), - cond: Arc::unwrap_or_clone(filter_expr.into_rel_node()), - }, - ); + let filter = LogicalFilter::new(join.into_plan_node(), filter_expr.into_expr()); - let plan = plan.first().unwrap(); + let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); // Examine original filter + condition let top_level_filter = LogicalFilter::from_rel_node(plan.clone().into()).unwrap(); @@ -728,7 +702,7 @@ mod tests { // Test pushing a filter past an aggregation node, where the filter // condition has one clause that can be pushed down to the child and // one that must remain in the filter. - let dummy_optimizer = new_dummy_optimizer(); + let mut test_optimizer = new_test_optimizer(Arc::new(FilterPushdownRule::new())); let scan = LogicalScan::new("customer".into()); @@ -758,15 +732,10 @@ mod tests { ]), ); - let plan = apply_filter_pushdown( - &dummy_optimizer, - super::FilterPushdownRulePicks { - child: Arc::unwrap_or_clone(agg.into_rel_node()), - cond: Arc::unwrap_or_clone(filter_expr.into_rel_node()), - }, - ); + let filter = LogicalFilter::new(agg.into_plan_node(), filter_expr.into_expr()); + + let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); - let plan = plan.first().unwrap(); let plan_filter = LogicalFilter::from_rel_node(plan.clone().into()).unwrap(); assert!(matches!(plan_filter.0.typ(), OptRelNodeTyp::Filter)); let plan_filter_expr = diff --git a/optd-datafusion-repr/src/testing.rs b/optd-datafusion-repr/src/testing.rs index d93fbede..eae886f4 100644 --- a/optd-datafusion-repr/src/testing.rs +++ b/optd-datafusion-repr/src/testing.rs @@ -1,13 +1,27 @@ mod dummy_cost; mod tpch_catalog; -use optd_core::heuristics::{ApplyOrder, HeuristicsOptimizer}; +use std::sync::Arc; -use crate::plan_nodes::OptRelNodeTyp; +use optd_core::{ + heuristics::{ApplyOrder, HeuristicsOptimizer}, + rules::Rule, +}; + +use crate::{plan_nodes::OptRelNodeTyp, properties::schema::SchemaPropertyBuilder}; + +use self::tpch_catalog::TpchCatalog; /// Create a "dummy" optimizer preloaded with the TPC-H catalog for testing /// Note: Only provides the schema property currently -pub fn new_dummy_optimizer() -> HeuristicsOptimizer { - let dummy_optimizer = HeuristicsOptimizer::new_with_rules(vec![], ApplyOrder::TopDown); +pub fn new_test_optimizer( + rule: Arc>>, +) -> HeuristicsOptimizer { + let dummy_catalog = Arc::new(TpchCatalog); + let dummy_optimizer = HeuristicsOptimizer::new_with_rules( + vec![rule], + ApplyOrder::TopDown, + Arc::new([Box::new(SchemaPropertyBuilder::new(dummy_catalog))]), + ); dummy_optimizer } From 927717f7c1ce2d2bb532801c15826bdf3afeef6a Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Tue, 26 Mar 2024 01:06:54 -0400 Subject: [PATCH 22/61] Make categorize_conds_helper more flexible --- optd-datafusion-repr/src/rules/filter_pushdown.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index ef965aaf..1b803580 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -8,7 +8,6 @@ //! At a high level, filter pushdown is responsible for pushing the filter node //! further down the query plan whenever it is possible to do so. -use core::panic; use std::collections::{HashMap, HashSet}; use std::vec; @@ -75,8 +74,10 @@ fn determine_join_cond_dep( /// Do not call directly fn categorize_conds_helper(cond: Expr, bottom_level_children: &mut Vec) { + assert!(cond.typ().is_expression()); match cond.typ() { - OptRelNodeTyp::LogOp(_) | OptRelNodeTyp::BinOp(_) | OptRelNodeTyp::UnOp(_) => { + OptRelNodeTyp::ColumnRef | OptRelNodeTyp::Constant(_) => bottom_level_children.push(cond), + _ => { for child in &cond.into_rel_node().children { categorize_conds_helper( Expr::from_rel_node(child.clone()).unwrap(), @@ -84,11 +85,6 @@ fn categorize_conds_helper(cond: Expr, bottom_level_children: &mut Vec) { ); } } - OptRelNodeTyp::ColumnRef | OptRelNodeTyp::Constant(_) => bottom_level_children.push(cond), - _ => panic!( - "Encountered unhandled expr of type {:?} in categorize_conds_helper", - cond.typ() - ), } } From 2e906c873d9597deeb3659012c365e52ebe4dfdf Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Tue, 26 Mar 2024 01:34:26 -0400 Subject: [PATCH 23/61] Fix crashes related to List --- optd-core/src/heuristics/optimizer.rs | 1 + optd-datafusion-repr/src/plan_nodes.rs | 4 ++ .../src/rules/filter_pushdown.rs | 13 ++++-- optd-datafusion-repr/src/rules/joins.rs | 46 ++++--------------- 4 files changed, 25 insertions(+), 39 deletions(-) diff --git a/optd-core/src/heuristics/optimizer.rs b/optd-core/src/heuristics/optimizer.rs index a5ab31c9..533c7051 100644 --- a/optd-core/src/heuristics/optimizer.rs +++ b/optd-core/src/heuristics/optimizer.rs @@ -162,6 +162,7 @@ impl HeuristicsOptimizer { Ok(node) } ApplyOrder::TopDown => { + self.infer_properties(root_rel.clone()); let root_rel = self.apply_rules(root_rel)?; let optimized_children = self.optimize_inputs(&root_rel.children)?; let node: Arc> = RelNode { diff --git a/optd-datafusion-repr/src/plan_nodes.rs b/optd-datafusion-repr/src/plan_nodes.rs index ad547385..14918ed8 100644 --- a/optd-datafusion-repr/src/plan_nodes.rs +++ b/optd-datafusion-repr/src/plan_nodes.rs @@ -297,6 +297,10 @@ impl Expr { let children = children .into_iter() .map(|child| { + if child.typ == OptRelNodeTyp::List { + // TODO: What should we do with List? + return child; + } Expr::from_rel_node(child.clone()) .unwrap() .rewrite_column_refs(rewrite_fn) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 1b803580..632d0abd 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -78,7 +78,16 @@ fn categorize_conds_helper(cond: Expr, bottom_level_children: &mut Vec) { match cond.typ() { OptRelNodeTyp::ColumnRef | OptRelNodeTyp::Constant(_) => bottom_level_children.push(cond), _ => { - for child in &cond.into_rel_node().children { + for child in &cond.clone().into_rel_node().children { + println!( + "Helper encountered child of node type: {:?} of type {:?}", + cond.typ(), + child.typ + ); + if child.typ == OptRelNodeTyp::List { + // TODO: What should we do when we encounter a List? + continue; + } categorize_conds_helper( Expr::from_rel_node(child.clone()).unwrap(), bottom_level_children, @@ -404,8 +413,6 @@ mod tests { testing::new_test_optimizer, }; - use super::apply_filter_pushdown; - #[test] fn push_past_sort() { let mut test_optimizer = new_test_optimizer(Arc::new(FilterPushdownRule::new())); diff --git a/optd-datafusion-repr/src/rules/joins.rs b/optd-datafusion-repr/src/rules/joins.rs index bdd453e9..4531bc96 100644 --- a/optd-datafusion-repr/src/rules/joins.rs +++ b/optd-datafusion-repr/src/rules/joins.rs @@ -26,43 +26,17 @@ fn apply_join_commute( optimizer: &impl Optimizer, JoinCommuteRulePicks { left, right, cond }: JoinCommuteRulePicks, ) -> Vec> { - // TODO: migrate to new rewrite_column_refs helper - fn rewrite_column_refs(expr: Expr, left_size: usize, right_size: usize) -> Expr { - let expr = expr.into_rel_node(); - if let Some(expr) = ColumnRefExpr::from_rel_node(expr.clone()) { - let index = expr.index(); - if index < left_size { - return ColumnRefExpr::new(index + right_size).into_expr(); - } else { - return ColumnRefExpr::new(index - left_size).into_expr(); - } - } - let children = expr.children.clone(); - let children = children - .into_iter() - .map(|x| { - rewrite_column_refs(Expr::from_rel_node(x).unwrap(), left_size, right_size) - .into_rel_node() - }) - .collect_vec(); - Expr::from_rel_node( - RelNode { - typ: expr.typ.clone(), - children, - data: expr.data.clone(), - } - .into(), - ) - .unwrap() - } - let left_schema = optimizer.get_property::(Arc::new(left.clone()), 0); let right_schema = optimizer.get_property::(Arc::new(right.clone()), 0); - let cond = rewrite_column_refs( - Expr::from_rel_node(cond.into()).unwrap(), - left_schema.len(), - right_schema.len(), - ); + let cond = Expr::from_rel_node(cond.into()) + .unwrap() + .rewrite_column_refs(&|idx| { + if idx < left_schema.len() { + idx + right_schema.len() + } else { + idx - left_schema.len() + } + }); let node = LogicalJoin::new( PlanNode::from_group(right.into()), PlanNode::from_group(left.into()), @@ -150,7 +124,7 @@ fn apply_join_assoc( cond2, }: JoinAssocRulePicks, ) -> Vec> { - // TODO: migrate to new rewrite_column_refs helper + // TODO(bowad): migrate to new rewrite_column_refs helper fn rewrite_column_refs(expr: Expr, a_size: usize) -> Option { let expr = expr.into_rel_node(); if let Some(expr) = ColumnRefExpr::from_rel_node(expr.clone()) { From 88397f4971cd57a730a9b4b9af5b74e949a0cd51 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Tue, 26 Mar 2024 01:49:06 -0400 Subject: [PATCH 24/61] Improve generation of singleton expressions --- .../src/rules/filter_pushdown.rs | 40 +++++++++---------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 632d0abd..d758cee5 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -28,6 +28,16 @@ define_rule!( (Filter, [child], [cond]) ); +/// Emits a LogOpExpr AND if the list has more than one element +/// Otherwise, returns the single element +fn and_expr_list_to_expr(exprs: Vec) -> Expr { + if exprs.len() == 1 { + exprs.first().unwrap().clone() + } else { + LogOpExpr::new(LogOpType::And, ExprList::new(exprs)).into_expr() + } +} + fn merge_conds(first: Expr, second: Expr) -> Expr { let new_expr_list = ExprList::new(vec![first, second]); // Flatten nested logical expressions if possible @@ -79,11 +89,6 @@ fn categorize_conds_helper(cond: Expr, bottom_level_children: &mut Vec) { OptRelNodeTyp::ColumnRef | OptRelNodeTyp::Constant(_) => bottom_level_children.push(cond), _ => { for child in &cond.clone().into_rel_node().children { - println!( - "Helper encountered child of node type: {:?} of type {:?}", - cond.typ(), - child.typ - ); if child.typ == OptRelNodeTyp::List { // TODO: What should we do when we encounter a List? continue; @@ -206,20 +211,16 @@ fn filter_join_transpose( categorize_conds(categorization_fn, Expr::from_rel_node(cond.into()).unwrap()); let new_left = if !left_conds.is_empty() { - let new_filter_node = LogicalFilter::new( - old_join.left(), - LogOpExpr::new(LogOpType::And, ExprList::new(left_conds)).into_expr(), - ); + let new_filter_node = + LogicalFilter::new(old_join.left(), and_expr_list_to_expr(left_conds)); new_filter_node.into_plan_node() } else { old_join.left() }; let new_right = if !right_conds.is_empty() { - let new_filter_node = LogicalFilter::new( - old_join.right(), - LogOpExpr::new(LogOpType::And, ExprList::new(right_conds)).into_expr(), - ); + let new_filter_node = + LogicalFilter::new(old_join.right(), and_expr_list_to_expr(right_conds)); new_filter_node.into_plan_node() } else { old_join.right() @@ -228,10 +229,7 @@ fn filter_join_transpose( let new_join = match old_join.join_type() { JoinType::Inner => { let old_cond = old_join.cond(); - let new_conds = merge_conds( - LogOpExpr::new(LogOpType::And, ExprList::new(join_conds)).into_expr(), - old_cond, - ); + let new_conds = merge_conds(and_expr_list_to_expr(join_conds), old_cond); LogicalJoin::new(new_left, new_right, new_conds, JoinType::Inner) } JoinType::Cross => { @@ -239,7 +237,7 @@ fn filter_join_transpose( LogicalJoin::new( new_left, new_right, - LogOpExpr::new(LogOpType::And, ExprList::new(join_conds)).into_expr(), + and_expr_list_to_expr(join_conds), JoinType::Inner, ) } else { @@ -253,10 +251,8 @@ fn filter_join_transpose( }; let new_filter = if !keep_conds.is_empty() { - let new_filter_node = LogicalFilter::new( - new_join.into_plan_node(), - LogOpExpr::new(LogOpType::And, ExprList::new(keep_conds)).into_expr(), - ); + let new_filter_node = + LogicalFilter::new(new_join.into_plan_node(), and_expr_list_to_expr(keep_conds)); new_filter_node.into_rel_node().as_ref().clone() } else { new_join.into_rel_node().as_ref().clone() From 7f2f942d88f8271f6353570439478c688bae45bd Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Tue, 26 Mar 2024 01:49:23 -0400 Subject: [PATCH 25/61] Update tpc-h sqlplannertest with filter pushdown run --- optd-sqlplannertest/tests/tpch.planner.sql | 528 +++++++++------------ 1 file changed, 212 insertions(+), 316 deletions(-) diff --git a/optd-sqlplannertest/tests/tpch.planner.sql b/optd-sqlplannertest/tests/tpch.planner.sql index 99e00812..3da6dfa3 100644 --- a/optd-sqlplannertest/tests/tpch.planner.sql +++ b/optd-sqlplannertest/tests/tpch.planner.sql @@ -374,13 +374,13 @@ PhysicalLimit { skip: 0, fetch: 100 } │ │ │ ├── PhysicalProjection { exprs: [ #0, #1, #3, #4 ] } │ │ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } │ │ │ │ ├── PhysicalProjection { exprs: [ #0, #1 ] } - │ │ │ │ │ └── PhysicalFilter - │ │ │ │ │ ├── cond:And - │ │ │ │ │ │ ├── Eq - │ │ │ │ │ │ │ ├── #3 - │ │ │ │ │ │ │ └── 4 - │ │ │ │ │ │ └── Like { expr: #2, pattern: "%TIN", negated: false, case_insensitive: false } - │ │ │ │ │ └── PhysicalProjection { exprs: [ #0, #2, #4, #5 ] } + │ │ │ │ │ └── PhysicalProjection { exprs: [ #0, #2, #4, #5 ] } + │ │ │ │ │ └── PhysicalFilter + │ │ │ │ │ ├── cond:And + │ │ │ │ │ │ ├── Eq + │ │ │ │ │ │ │ ├── #5 + │ │ │ │ │ │ │ └── 4 + │ │ │ │ │ │ └── Like { expr: #4, pattern: "%TIN", negated: false, case_insensitive: false } │ │ │ │ │ └── PhysicalScan { table: part } │ │ │ │ └── PhysicalProjection { exprs: [ #0, #1, #3 ] } │ │ │ │ └── PhysicalScan { table: partsupp } @@ -389,11 +389,11 @@ PhysicalLimit { skip: 0, fetch: 100 } │ │ └── PhysicalProjection { exprs: [ #0, #1, #2 ] } │ │ └── PhysicalScan { table: nation } │ └── PhysicalProjection { exprs: [ #0 ] } - │ └── PhysicalFilter - │ ├── cond:Eq - │ │ ├── #1 - │ │ └── "AFRICA" - │ └── PhysicalProjection { exprs: [ #0, #1 ] } + │ └── PhysicalProjection { exprs: [ #0, #1 ] } + │ └── PhysicalFilter + │ ├── cond:Eq + │ │ ├── #1 + │ │ └── "AFRICA" │ └── PhysicalScan { table: region } └── PhysicalProjection { exprs: [ #1, #0 ] } └── PhysicalAgg @@ -413,11 +413,11 @@ PhysicalLimit { skip: 0, fetch: 100 } │ └── PhysicalProjection { exprs: [ #0, #2 ] } │ └── PhysicalScan { table: nation } └── PhysicalProjection { exprs: [ #0 ] } - └── PhysicalFilter - ├── cond:Eq - │ ├── #1 - │ └── "AFRICA" - └── PhysicalProjection { exprs: [ #0, #1 ] } + └── PhysicalProjection { exprs: [ #0, #1 ] } + └── PhysicalFilter + ├── cond:Eq + │ ├── #1 + │ └── "AFRICA" └── PhysicalScan { table: region } */ @@ -515,24 +515,24 @@ PhysicalLimit { skip: 0, fetch: 10 } ├── PhysicalProjection { exprs: [ #1, #3, #4 ] } │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } │ ├── PhysicalProjection { exprs: [ #0 ] } - │ │ └── PhysicalFilter - │ │ ├── cond:Eq - │ │ │ ├── #1 - │ │ │ └── "FURNITURE" - │ │ └── PhysicalProjection { exprs: [ #0, #6 ] } + │ │ └── PhysicalProjection { exprs: [ #0, #6 ] } + │ │ └── PhysicalFilter + │ │ ├── cond:Eq + │ │ │ ├── #6 + │ │ │ └── "FURNITURE" │ │ └── PhysicalScan { table: customer } - │ └── PhysicalFilter - │ ├── cond:Lt - │ │ ├── #2 - │ │ └── 9218 - │ └── PhysicalProjection { exprs: [ #0, #1, #4, #7 ] } + │ └── PhysicalProjection { exprs: [ #0, #1, #4, #7 ] } + │ └── PhysicalFilter + │ ├── cond:Lt + │ │ ├── #4 + │ │ └── 9218 │ └── PhysicalScan { table: orders } └── PhysicalProjection { exprs: [ #0, #1, #2 ] } - └── PhysicalFilter - ├── cond:Gt - │ ├── #3 - │ └── 9218 - └── PhysicalProjection { exprs: [ #0, #5, #6, #10 ] } + └── PhysicalProjection { exprs: [ #0, #5, #6, #10 ] } + └── PhysicalFilter + ├── cond:Gt + │ ├── #10 + │ └── 9218 └── PhysicalScan { table: lineitem } */ @@ -627,46 +627,29 @@ PhysicalSort │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } │ └── #23 ├── groups: [ #41 ] - └── PhysicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #9 - │ ├── Eq - │ │ ├── #17 - │ │ └── #8 - │ ├── Eq - │ │ ├── #19 - │ │ └── #33 - │ ├── Eq - │ │ ├── #3 - │ │ └── #36 - │ ├── Eq - │ │ ├── #36 - │ │ └── #40 - │ ├── Eq - │ │ ├── #42 - │ │ └── #44 - │ ├── Eq - │ │ ├── #45 - │ │ └── "Asia" - │ ├── Geq - │ │ ├── #12 - │ │ └── Cast { cast_to: Date32, expr: "2023-01-01" } - │ └── Lt - │ ├── #12 - │ └── Cast { cast_to: Date32, expr: "2024-01-01" } - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ │ ├── PhysicalScan { table: customer } - │ │ │ │ └── PhysicalScan { table: orders } - │ │ │ └── PhysicalScan { table: lineitem } - │ │ └── PhysicalScan { table: supplier } - │ └── PhysicalScan { table: nation } - └── PhysicalScan { table: region } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #19, #3 ], right_keys: [ #0, #3 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } + │ ├── PhysicalScan { table: customer } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ ├── PhysicalFilter + │ │ ├── cond:And + │ │ │ ├── Geq + │ │ │ │ ├── #4 + │ │ │ │ └── Cast { cast_to: Date32, expr: "2023-01-01" } + │ │ │ └── Lt + │ │ │ ├── #4 + │ │ │ └── Cast { cast_to: Date32, expr: "2024-01-01" } + │ │ └── PhysicalScan { table: orders } + │ └── PhysicalScan { table: lineitem } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } + ├── PhysicalScan { table: supplier } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } + ├── PhysicalScan { table: nation } + └── PhysicalFilter + ├── cond:Eq + │ ├── #1 + │ └── "Asia" + └── PhysicalScan { table: region } */ -- TPC-H Q6 @@ -857,50 +840,38 @@ PhysicalSort │ └── Sub │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } │ └── #13 - └── PhysicalFilter + └── PhysicalNestedLoopJoin + ├── join_type: Inner ├── cond:And │ ├── Eq - │ │ ├── #0 - │ │ └── #9 - │ ├── Eq - │ │ ├── #23 - │ │ └── #7 - │ ├── Eq - │ │ ├── #32 - │ │ └── #24 - │ ├── Eq - │ │ ├── #3 - │ │ └── #40 - │ ├── Eq │ │ ├── #35 │ │ └── #44 - │ ├── Or - │ │ ├── And - │ │ │ ├── Eq - │ │ │ │ ├── #41 - │ │ │ │ └── "FRANCE" - │ │ │ └── Eq - │ │ │ ├── #45 - │ │ │ └── "GERMANY" - │ │ └── And - │ │ ├── Eq - │ │ │ ├── #41 - │ │ │ └── "GERMANY" - │ │ └── Eq - │ │ ├── #45 - │ │ └── "FRANCE" - │ └── Between { expr: #17, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ │ ├── PhysicalScan { table: supplier } - │ │ │ │ └── PhysicalScan { table: lineitem } - │ │ │ └── PhysicalScan { table: orders } - │ │ └── PhysicalScan { table: customer } - │ └── PhysicalScan { table: nation } - └── PhysicalScan { table: nation } + │ └── Or + │ ├── And + │ │ ├── Eq + │ │ │ ├── #41 + │ │ │ └── "FRANCE" + │ │ └── Eq + │ │ ├── #45 + │ │ └── "GERMANY" + │ └── And + │ ├── Eq + │ │ ├── #41 + │ │ └── "GERMANY" + │ └── Eq + │ ├── #45 + │ └── "FRANCE" + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } + │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #2 ] } + │ │ ├── PhysicalScan { table: supplier } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ │ ├── PhysicalFilter { cond: Between { expr: #10, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } } + │ │ │ └── PhysicalScan { table: lineitem } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } + │ │ ├── PhysicalScan { table: orders } + │ │ └── PhysicalScan { table: customer } + │ └── PhysicalScan { table: nation } + └── PhysicalScan { table: nation } */ -- TPC-H Q8 without top-most limit node @@ -1052,50 +1023,29 @@ PhysicalSort │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } │ │ └── #22 │ └── #54 - └── PhysicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #17 - │ ├── Eq - │ │ ├── #9 - │ │ └── #18 - │ ├── Eq - │ │ ├── #16 - │ │ └── #32 - │ ├── Eq - │ │ ├── #33 - │ │ └── #41 - │ ├── Eq - │ │ ├── #44 - │ │ └── #49 - │ ├── Eq - │ │ ├── #51 - │ │ └── #57 - │ ├── Eq - │ │ ├── #58 - │ │ └── "AMERICA" - │ ├── Eq - │ │ ├── #12 - │ │ └── #53 - │ ├── Between { expr: #36, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } - │ └── Eq - │ ├── #4 - │ └── "ECONOMY ANODIZED STEEL" - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ │ │ │ ├── PhysicalScan { table: part } - │ │ │ │ │ │ └── PhysicalScan { table: supplier } - │ │ │ │ │ └── PhysicalScan { table: lineitem } - │ │ │ │ └── PhysicalScan { table: orders } - │ │ │ └── PhysicalScan { table: customer } - │ │ └── PhysicalScan { table: nation } - │ └── PhysicalScan { table: nation } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #51 ], right_keys: [ #0 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #12 ], right_keys: [ #0 ] } + │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #9 ], right_keys: [ #1, #2 ] } + │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ │ ├── PhysicalFilter + │ │ │ │ ├── cond:Eq + │ │ │ │ │ ├── #4 + │ │ │ │ │ └── "ECONOMY ANODIZED STEEL" + │ │ │ │ └── PhysicalScan { table: part } + │ │ │ └── PhysicalScan { table: supplier } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ │ ├── PhysicalScan { table: lineitem } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } + │ │ ├── PhysicalFilter { cond: Between { expr: #4, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } } + │ │ │ └── PhysicalScan { table: orders } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } + │ │ ├── PhysicalScan { table: customer } + │ │ └── PhysicalScan { table: nation } + │ └── PhysicalScan { table: nation } + └── PhysicalFilter + ├── cond:Eq + │ ├── #1 + │ └── "AMERICA" └── PhysicalScan { table: region } */ @@ -1216,38 +1166,18 @@ PhysicalSort │ └── Mul │ ├── #35 │ └── #20 - └── PhysicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #9 - │ │ └── #18 - │ ├── Eq - │ │ ├── #33 - │ │ └── #18 - │ ├── Eq - │ │ ├── #32 - │ │ └── #17 - │ ├── Eq - │ │ ├── #0 - │ │ └── #17 - │ ├── Eq - │ │ ├── #37 - │ │ └── #16 - │ ├── Eq - │ │ ├── #12 - │ │ └── #46 - │ └── Like { expr: #1, pattern: "%green%", negated: false, case_insensitive: false } - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ │ ├── PhysicalScan { table: part } - │ │ │ │ └── PhysicalScan { table: supplier } - │ │ │ └── PhysicalScan { table: lineitem } - │ │ └── PhysicalScan { table: partsupp } - │ └── PhysicalScan { table: orders } - └── PhysicalScan { table: nation } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #12 ], right_keys: [ #0 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #9, #0 ], right_keys: [ #2, #1 ] } + │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ ├── PhysicalFilter { cond: Like { expr: #1, pattern: "%green%", negated: false, case_insensitive: false } } + │ │ │ └── PhysicalScan { table: part } + │ │ └── PhysicalScan { table: supplier } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #2, #1 ], right_keys: [ #1, #0 ] } + │ │ ├── PhysicalScan { table: lineitem } + │ │ └── PhysicalScan { table: partsupp } + │ └── PhysicalScan { table: orders } + └── PhysicalScan { table: nation } */ -- TPC-H Q9 @@ -1367,38 +1297,18 @@ PhysicalSort │ └── Mul │ ├── #35 │ └── #20 - └── PhysicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #9 - │ │ └── #18 - │ ├── Eq - │ │ ├── #33 - │ │ └── #18 - │ ├── Eq - │ │ ├── #32 - │ │ └── #17 - │ ├── Eq - │ │ ├── #0 - │ │ └── #17 - │ ├── Eq - │ │ ├── #37 - │ │ └── #16 - │ ├── Eq - │ │ ├── #12 - │ │ └── #46 - │ └── Like { expr: #1, pattern: "%green%", negated: false, case_insensitive: false } - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ │ ├── PhysicalScan { table: part } - │ │ │ │ └── PhysicalScan { table: supplier } - │ │ │ └── PhysicalScan { table: lineitem } - │ │ └── PhysicalScan { table: partsupp } - │ └── PhysicalScan { table: orders } - └── PhysicalScan { table: nation } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #12 ], right_keys: [ #0 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #9, #0 ], right_keys: [ #2, #1 ] } + │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ ├── PhysicalFilter { cond: Like { expr: #1, pattern: "%green%", negated: false, case_insensitive: false } } + │ │ │ └── PhysicalScan { table: part } + │ │ └── PhysicalScan { table: supplier } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #2, #1 ], right_keys: [ #1, #0 ] } + │ │ ├── PhysicalScan { table: lineitem } + │ │ └── PhysicalScan { table: partsupp } + │ └── PhysicalScan { table: orders } + └── PhysicalScan { table: nation } */ -- TPC-H Q10 @@ -1491,35 +1401,27 @@ PhysicalLimit { skip: 0, fetch: 20 } │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } │ └── #23 ├── groups: [ #0, #1, #5, #4, #34, #2, #7 ] - └── PhysicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #9 - │ ├── Eq - │ │ ├── #17 - │ │ └── #8 - │ ├── Geq - │ │ ├── #12 - │ │ └── Cast { cast_to: Date32, expr: "1993-07-01" } - │ ├── Lt - │ │ ├── #12 - │ │ └── Add - │ │ ├── Cast { cast_to: Date32, expr: "1993-07-01" } - │ │ └── INTERVAL_MONTH_DAY_NANO (3, 0, 0) - │ ├── Eq - │ │ ├── #25 - │ │ └── "R" - │ └── Eq - │ ├── #3 - │ └── #33 - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalScan { table: customer } - │ │ └── PhysicalScan { table: orders } - │ └── PhysicalScan { table: lineitem } - └── PhysicalScan { table: nation } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } + │ ├── PhysicalScan { table: customer } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ ├── PhysicalFilter + │ │ ├── cond:And + │ │ │ ├── Geq + │ │ │ │ ├── #4 + │ │ │ │ └── Cast { cast_to: Date32, expr: "1993-07-01" } + │ │ │ └── Lt + │ │ │ ├── #4 + │ │ │ └── Add + │ │ │ ├── Cast { cast_to: Date32, expr: "1993-07-01" } + │ │ │ └── INTERVAL_MONTH_DAY_NANO (3, 0, 0) + │ │ └── PhysicalScan { table: orders } + │ └── PhysicalFilter + │ ├── cond:Eq + │ │ ├── #8 + │ │ └── "R" + │ └── PhysicalScan { table: lineitem } + └── PhysicalScan { table: nation } */ -- TPC-H Q12 @@ -1630,26 +1532,23 @@ PhysicalSort │ ├── 1 │ └── 0 ├── groups: [ #23 ] - └── PhysicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #9 - │ ├── InList { expr: #23, list: [ "MAIL", "SHIP" ], negated: false } - │ ├── Lt - │ │ ├── #20 - │ │ └── #21 - │ ├── Lt - │ │ ├── #19 - │ │ └── #20 - │ ├── Geq - │ │ ├── #21 - │ │ └── Cast { cast_to: Date32, expr: "1994-01-01" } - │ └── Lt - │ ├── #21 - │ └── Cast { cast_to: Date32, expr: "1995-01-01" } - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - ├── PhysicalScan { table: orders } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + ├── PhysicalScan { table: orders } + └── PhysicalFilter + ├── cond:And + │ ├── InList { expr: #14, list: [ "MAIL", "SHIP" ], negated: false } + │ ├── Lt + │ │ ├── #11 + │ │ └── #12 + │ ├── Lt + │ │ ├── #10 + │ │ └── #11 + │ ├── Geq + │ │ ├── #12 + │ │ └── Cast { cast_to: Date32, expr: "1994-01-01" } + │ └── Lt + │ ├── #12 + │ └── Cast { cast_to: Date32, expr: "1995-01-01" } └── PhysicalScan { table: lineitem } */ @@ -1733,22 +1632,19 @@ PhysicalProjection │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } │ └── #6 ├── groups: [] - └── PhysicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #1 - │ │ └── #16 - │ ├── Geq - │ │ ├── #10 - │ │ └── Cast { cast_to: Date32, expr: "1995-09-01" } - │ └── Lt - │ ├── #10 - │ └── Add - │ ├── Cast { cast_to: Date32, expr: "1995-09-01" } - │ └── INTERVAL_MONTH_DAY_NANO (1, 0, 0) - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - ├── PhysicalScan { table: lineitem } - └── PhysicalScan { table: part } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } + ├── PhysicalFilter + │ ├── cond:And + │ │ ├── Geq + │ │ │ ├── #10 + │ │ │ └── Cast { cast_to: Date32, expr: "1995-09-01" } + │ │ └── Lt + │ │ ├── #10 + │ │ └── Add + │ │ ├── Cast { cast_to: Date32, expr: "1995-09-01" } + │ │ └── INTERVAL_MONTH_DAY_NANO (1, 0, 0) + │ └── PhysicalScan { table: lineitem } + └── PhysicalScan { table: part } */ -- TPC-H Q15 @@ -1867,15 +1763,15 @@ PhysicalSort │ │ └── #2 │ ├── groups: [ #0 ] │ └── PhysicalProjection { exprs: [ #0, #1, #2 ] } - │ └── PhysicalFilter - │ ├── cond:And - │ │ ├── Geq - │ │ │ ├── #3 - │ │ │ └── 8401 - │ │ └── Lt - │ │ ├── #3 - │ │ └── 8491 - │ └── PhysicalProjection { exprs: [ #2, #5, #6, #10 ] } + │ └── PhysicalProjection { exprs: [ #2, #5, #6, #10 ] } + │ └── PhysicalFilter + │ ├── cond:And + │ │ ├── Geq + │ │ │ ├── #10 + │ │ │ └── 8401 + │ │ └── Lt + │ │ ├── #10 + │ │ └── 8491 │ └── PhysicalScan { table: lineitem } └── PhysicalAgg ├── aggrs:Agg(Max) @@ -1891,15 +1787,15 @@ PhysicalSort │ └── #2 ├── groups: [ #0 ] └── PhysicalProjection { exprs: [ #0, #1, #2 ] } - └── PhysicalFilter - ├── cond:And - │ ├── Geq - │ │ ├── #3 - │ │ └── 8401 - │ └── Lt - │ ├── #3 - │ └── 8491 - └── PhysicalProjection { exprs: [ #2, #5, #6, #10 ] } + └── PhysicalProjection { exprs: [ #2, #5, #6, #10 ] } + └── PhysicalFilter + ├── cond:And + │ ├── Geq + │ │ ├── #10 + │ │ └── 8401 + │ └── Lt + │ ├── #10 + │ └── 8491 └── PhysicalScan { table: lineitem } */ @@ -2004,15 +1900,15 @@ PhysicalProjection │ ├── PhysicalProjection { exprs: [ #1, #4, #5 ] } │ │ └── PhysicalScan { table: lineitem } │ └── PhysicalProjection { exprs: [ #0 ] } - │ └── PhysicalFilter - │ ├── cond:And - │ │ ├── Eq - │ │ │ ├── #1 - │ │ │ └── "Brand#13" - │ │ └── Eq - │ │ ├── #2 - │ │ └── "JUMBO PKG" - │ └── PhysicalProjection { exprs: [ #0, #3, #6 ] } + │ └── PhysicalProjection { exprs: [ #0, #3, #6 ] } + │ └── PhysicalFilter + │ ├── cond:And + │ │ ├── Eq + │ │ │ ├── #3 + │ │ │ └── "Brand#13" + │ │ └── Eq + │ │ ├── #6 + │ │ └── "JUMBO PKG" │ └── PhysicalScan { table: part } └── PhysicalProjection ├── exprs: @@ -2145,7 +2041,8 @@ PhysicalProjection { exprs: [ #0 ] } │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } │ └── #6 ├── groups: [] - └── PhysicalFilter + └── PhysicalNestedLoopJoin + ├── join_type: Inner ├── cond:Or │ ├── And │ │ ├── Eq @@ -2204,8 +2101,7 @@ PhysicalProjection { exprs: [ #0 ] } │ └── Eq │ ├── #13 │ └── "DELIVER IN PERSON" - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - ├── PhysicalScan { table: lineitem } - └── PhysicalScan { table: part } + ├── PhysicalScan { table: lineitem } + └── PhysicalScan { table: part } */ From c701b1414bf83a1defd7f727d8c0eaa8f8867ab8 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Tue, 26 Mar 2024 20:02:12 -0400 Subject: [PATCH 26/61] More polish + rewrite a join helper as rewrite_column_refs --- optd-datafusion-repr/src/plan_nodes.rs | 39 +++++++++------ optd-datafusion-repr/src/plan_nodes/join.rs | 2 + .../src/plan_nodes/projection.rs | 1 + .../src/rules/filter_pushdown.rs | 32 +++++------- optd-datafusion-repr/src/rules/joins.rs | 49 +++++-------------- optd-datafusion-repr/src/testing.rs | 4 +- .../src/testing/dummy_cost.rs | 3 +- 7 files changed, 58 insertions(+), 72 deletions(-) diff --git a/optd-datafusion-repr/src/plan_nodes.rs b/optd-datafusion-repr/src/plan_nodes.rs index 14918ed8..e32f9ceb 100644 --- a/optd-datafusion-repr/src/plan_nodes.rs +++ b/optd-datafusion-repr/src/plan_nodes.rs @@ -16,6 +16,7 @@ use std::fmt::Debug; use std::sync::Arc; use arrow_schema::DataType; +use itertools::Itertools; use optd_core::{ cascades::{CascadesOptimizer, GroupId}, rel_node::{RelNode, RelNodeMeta, RelNodeMetaMap, RelNodeRef, RelNodeTyp}, @@ -285,12 +286,20 @@ impl Expr { /// Recursively rewrite all column references in the expression.using a provided /// function that replaces a column index. - pub fn rewrite_column_refs(&self, rewrite_fn: &impl Fn(usize) -> usize) -> Self { + pub fn rewrite_column_refs( + &self, + rewrite_fn: &impl Fn(usize) -> Option, + ) -> Option { assert!(self.typ().is_expression()); if let OptRelNodeTyp::ColumnRef = self.typ() { let col_ref = ColumnRefExpr::from_rel_node(self.0.clone()).unwrap(); - let new_col_ref = ColumnRefExpr::new(rewrite_fn(col_ref.index())); - return Self(new_col_ref.into_rel_node()); + let rewritten = rewrite_fn(col_ref.index()); + return if let Some(rewritten_idx) = rewritten { + let new_col_ref = ColumnRefExpr::new(rewritten_idx); + Some(Self(new_col_ref.into_rel_node())) + } else { + None + }; } let children = self.0.children.clone(); @@ -299,23 +308,25 @@ impl Expr { .map(|child| { if child.typ == OptRelNodeTyp::List { // TODO: What should we do with List? - return child; + return Some(child); } Expr::from_rel_node(child.clone()) .unwrap() .rewrite_column_refs(rewrite_fn) - .into_rel_node() + .map(|x| x.into_rel_node()) }) - .collect(); - Expr::from_rel_node( - RelNode { - typ: self.typ(), - children, - data: self.0.data.clone(), - } - .into(), + .collect::>>()?; + Some( + Expr::from_rel_node( + RelNode { + typ: self.0.typ.clone(), + children: children.into_iter().collect_vec(), + data: self.0.data.clone(), + } + .into(), + ) + .unwrap(), ) - .unwrap() } } diff --git a/optd-datafusion-repr/src/plan_nodes/join.rs b/optd-datafusion-repr/src/plan_nodes/join.rs index a6203c10..1bc692b8 100644 --- a/optd-datafusion-repr/src/plan_nodes/join.rs +++ b/optd-datafusion-repr/src/plan_nodes/join.rs @@ -64,6 +64,8 @@ define_plan_node!( ); impl LogicalJoin { + /// Takes in left/right schema sizes, and maps an index to be as if it were + /// pushed down to the left or right side of a join accordingly. pub fn map_through_join( index: usize, left_schema_size: usize, diff --git a/optd-datafusion-repr/src/plan_nodes/projection.rs b/optd-datafusion-repr/src/plan_nodes/projection.rs index 461efc7c..09e6ad2d 100644 --- a/optd-datafusion-repr/src/plan_nodes/projection.rs +++ b/optd-datafusion-repr/src/plan_nodes/projection.rs @@ -65,6 +65,7 @@ impl ProjectionMapping { schema_size: usize, projection_schema_size: usize, ) -> Expr { + // TODO(bowad): rewrite to use rewritecolumnrefs if cond.typ() == OptRelNodeTyp::ColumnRef { let col = ColumnRefExpr::from_rel_node(cond.into_rel_node()).unwrap(); let idx = col.index(); diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index d758cee5..3efc3c39 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -201,9 +201,16 @@ fn filter_join_transpose( let location = determine_join_cond_dep(children, left_schema_size, right_schema_size); match location { JoinCondDependency::Left => left_conds.push(expr), - JoinCondDependency::Right => right_conds.push(expr.rewrite_column_refs(&|idx| { - LogicalJoin::map_through_join(idx, left_schema_size, right_schema_size) - })), + JoinCondDependency::Right => right_conds.push( + expr.rewrite_column_refs(&|idx| { + Some(LogicalJoin::map_through_join( + idx, + left_schema_size, + right_schema_size, + )) + }) + .unwrap(), + ), JoinCondDependency::Both => join_conds.push(expr), JoinCondDependency::None => keep_conds.push(expr), } @@ -634,13 +641,8 @@ mod tests { // Examine original filter + condition let top_level_filter = LogicalFilter::from_rel_node(plan.clone().into()).unwrap(); - let top_level_filter_cond = - LogOpExpr::from_rel_node(top_level_filter.cond().into_rel_node()).unwrap(); - assert!(matches!(top_level_filter_cond.op_type(), LogOpType::And)); - assert!(matches!(top_level_filter_cond.children().len(), 1)); let bin_op_0 = - BinOpExpr::from_rel_node(top_level_filter_cond.children()[0].clone().into_rel_node()) - .unwrap(); + BinOpExpr::from_rel_node(top_level_filter.cond().clone().into_rel_node()).unwrap(); assert!(matches!(bin_op_0.op_type(), BinOpType::Eq)); let col_0 = ConstantExpr::from_rel_node(bin_op_0.left_child().clone().into_rel_node()).unwrap(); @@ -667,11 +669,7 @@ mod tests { // Examine left child filter + condition let filter_1 = LogicalFilter::from_rel_node(join_node.left().into_rel_node()).unwrap(); - let filter_1_cond = LogOpExpr::from_rel_node(filter_1.cond().into_rel_node()).unwrap(); - assert!(matches!(filter_1_cond.children().len(), 1)); - assert!(matches!(filter_1_cond.op_type(), LogOpType::And)); - let bin_op_3 = - BinOpExpr::from_rel_node(filter_1_cond.children()[0].clone().into_rel_node()).unwrap(); + let bin_op_3 = BinOpExpr::from_rel_node(filter_1.cond().clone().into_rel_node()).unwrap(); assert!(matches!(bin_op_3.op_type(), BinOpType::Eq)); let col_6 = ColumnRefExpr::from_rel_node(bin_op_3.left_child().clone().into_rel_node()).unwrap(); @@ -682,11 +680,7 @@ mod tests { // Examine right child filter + condition let filter_2 = LogicalFilter::from_rel_node(join_node.right().into_rel_node()).unwrap(); - let filter_2_cond = LogOpExpr::from_rel_node(filter_2.cond().into_rel_node()).unwrap(); - assert!(matches!(filter_2_cond.op_type(), LogOpType::And)); - assert!(matches!(filter_2_cond.children().len(), 1)); - let bin_op_4 = - BinOpExpr::from_rel_node(filter_2_cond.children()[0].clone().into_rel_node()).unwrap(); + let bin_op_4 = BinOpExpr::from_rel_node(filter_2.cond().clone().into_rel_node()).unwrap(); assert!(matches!(bin_op_4.op_type(), BinOpType::Eq)); let col_8 = ColumnRefExpr::from_rel_node(bin_op_4.left_child().clone().into_rel_node()).unwrap(); diff --git a/optd-datafusion-repr/src/rules/joins.rs b/optd-datafusion-repr/src/rules/joins.rs index 4531bc96..777dd435 100644 --- a/optd-datafusion-repr/src/rules/joins.rs +++ b/optd-datafusion-repr/src/rules/joins.rs @@ -2,7 +2,6 @@ use std::collections::HashMap; use std::sync::Arc; use std::vec; -use itertools::Itertools; use optd_core::optimizer::Optimizer; use optd_core::rel_node::RelNode; use optd_core::rules::{Rule, RuleMatcher}; @@ -31,16 +30,16 @@ fn apply_join_commute( let cond = Expr::from_rel_node(cond.into()) .unwrap() .rewrite_column_refs(&|idx| { - if idx < left_schema.len() { + Some(if idx < left_schema.len() { idx + right_schema.len() } else { idx - left_schema.len() - } + }) }); let node = LogicalJoin::new( PlanNode::from_group(right.into()), PlanNode::from_group(left.into()), - cond, + cond.unwrap(), JoinType::Inner, ); let mut proj_expr = Vec::with_capacity(left_schema.len() + right_schema.len()); @@ -124,44 +123,22 @@ fn apply_join_assoc( cond2, }: JoinAssocRulePicks, ) -> Vec> { - // TODO(bowad): migrate to new rewrite_column_refs helper - fn rewrite_column_refs(expr: Expr, a_size: usize) -> Option { - let expr = expr.into_rel_node(); - if let Some(expr) = ColumnRefExpr::from_rel_node(expr.clone()) { - let index = expr.index(); - if index < a_size { - return None; - } else { - return Some(ColumnRefExpr::new(index - a_size).into_expr()); - } - } - let children = expr.children.clone(); - let children = children - .into_iter() - .map(|x| rewrite_column_refs(Expr::from_rel_node(x).unwrap(), a_size)) - .collect::>>()?; - Some( - Expr::from_rel_node( - RelNode { - typ: expr.typ.clone(), - children: children - .into_iter() - .map(|x| x.into_rel_node()) - .collect_vec(), - data: expr.data.clone(), - } - .into(), - ) - .unwrap(), - ) - } let a_schema = optimizer.get_property::(Arc::new(a.clone()), 0); let _b_schema = optimizer.get_property::(Arc::new(b.clone()), 0); let _c_schema = optimizer.get_property::(Arc::new(c.clone()), 0); + let cond2 = Expr::from_rel_node(cond2.into()).unwrap(); - let Some(cond2) = rewrite_column_refs(cond2, a_schema.len()) else { + + let Some(cond2) = cond2.rewrite_column_refs(&|idx| { + if idx < a_schema.len() { + None + } else { + Some(idx - a_schema.len()) + } + }) else { return vec![]; }; + let node = RelNode { typ: OptRelNodeTyp::Join(JoinType::Inner), children: vec![ diff --git a/optd-datafusion-repr/src/testing.rs b/optd-datafusion-repr/src/testing.rs index eae886f4..b090655e 100644 --- a/optd-datafusion-repr/src/testing.rs +++ b/optd-datafusion-repr/src/testing.rs @@ -18,10 +18,10 @@ pub fn new_test_optimizer( rule: Arc>>, ) -> HeuristicsOptimizer { let dummy_catalog = Arc::new(TpchCatalog); - let dummy_optimizer = HeuristicsOptimizer::new_with_rules( + let test_optimizer = HeuristicsOptimizer::new_with_rules( vec![rule], ApplyOrder::TopDown, Arc::new([Box::new(SchemaPropertyBuilder::new(dummy_catalog))]), ); - dummy_optimizer + test_optimizer } diff --git a/optd-datafusion-repr/src/testing/dummy_cost.rs b/optd-datafusion-repr/src/testing/dummy_cost.rs index 7f5de512..ea00fcb1 100644 --- a/optd-datafusion-repr/src/testing/dummy_cost.rs +++ b/optd-datafusion-repr/src/testing/dummy_cost.rs @@ -5,7 +5,8 @@ use optd_core::{ rel_node::{RelNode, Value}, }; -/// Dummy cost model that returns a 0 cost in all cases. Intended for testing. +/// Dummy cost model that returns a 0 cost in all cases. +/// Intended for testing with the cascades optimizer. pub struct DummyCostModel; impl CostModel for DummyCostModel { From 73e4e234ea85dd0d7b6e3fead89d815a98faeb80 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Tue, 26 Mar 2024 20:05:37 -0400 Subject: [PATCH 27/61] rewrite one more thing as rewrite_column_refs --- .../src/plan_nodes/projection.rs | 38 +++---------------- optd-datafusion-repr/src/rules/joins.rs | 1 - 2 files changed, 6 insertions(+), 33 deletions(-) diff --git a/optd-datafusion-repr/src/plan_nodes/projection.rs b/optd-datafusion-repr/src/plan_nodes/projection.rs index 09e6ad2d..b34bd874 100644 --- a/optd-datafusion-repr/src/plan_nodes/projection.rs +++ b/optd-datafusion-repr/src/plan_nodes/projection.rs @@ -65,39 +65,13 @@ impl ProjectionMapping { schema_size: usize, projection_schema_size: usize, ) -> Expr { - // TODO(bowad): rewrite to use rewritecolumnrefs - if cond.typ() == OptRelNodeTyp::ColumnRef { - let col = ColumnRefExpr::from_rel_node(cond.into_rel_node()).unwrap(); - let idx = col.index(); - if idx < projection_schema_size { - let col = self.projection_col_refers_to(col.index()); - return ColumnRefExpr::new(col).into_expr(); + cond.rewrite_column_refs(&|idx| { + Some(if idx < projection_schema_size { + self.projection_col_refers_to(idx) } else { - let col = col.index(); - return ColumnRefExpr::new(col - projection_schema_size + schema_size).into_expr(); - } - } - let expr = cond.into_rel_node(); - let mut children = Vec::with_capacity(expr.children.len()); - for child in &expr.children { - children.push( - self.rewrite_condition( - Expr::from_rel_node(child.clone()).unwrap(), - schema_size, - projection_schema_size, - ) - .into_rel_node(), - ); - } - - Expr::from_rel_node( - RelNode { - typ: expr.typ.clone(), - children, - data: expr.data.clone(), - } - .into(), - ) + idx - projection_schema_size + schema_size + }) + }) .unwrap() } } diff --git a/optd-datafusion-repr/src/rules/joins.rs b/optd-datafusion-repr/src/rules/joins.rs index 777dd435..42da7041 100644 --- a/optd-datafusion-repr/src/rules/joins.rs +++ b/optd-datafusion-repr/src/rules/joins.rs @@ -356,7 +356,6 @@ fn apply_projection_pull_up_join( LogicalJoin::new( PlanNode::from_group(left), PlanNode::from_group(right), - // TODO: possibly migrate to new rewrite_column_refs helper mapping.rewrite_condition( Expr::from_rel_node(Arc::new(cond)).unwrap(), left_schema.len(), From 4b4322ac816a72b73adeb261ac79a08728f09ffa Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Tue, 26 Mar 2024 20:57:24 -0400 Subject: [PATCH 28/61] clippy fix --- optd-datafusion-repr/src/plan_nodes/projection.rs | 2 +- optd-datafusion-repr/src/rules/filter_pushdown.rs | 8 ++++---- optd-datafusion-repr/src/testing.rs | 6 +++--- optd-datafusion-repr/src/testing/tpch_catalog.rs | 12 ++++++------ 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/optd-datafusion-repr/src/plan_nodes/projection.rs b/optd-datafusion-repr/src/plan_nodes/projection.rs index b34bd874..a52782b1 100644 --- a/optd-datafusion-repr/src/plan_nodes/projection.rs +++ b/optd-datafusion-repr/src/plan_nodes/projection.rs @@ -1,4 +1,4 @@ -use optd_core::rel_node::RelNode; + use super::expr::ExprList; use super::macros::define_plan_node; diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 3efc3c39..18c5c15a 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -465,7 +465,7 @@ mod tests { assert!(matches!(plan.typ, OptRelNodeTyp::Filter)); let cond_log_op = LogOpExpr::from_rel_node( - LogicalFilter::from_rel_node((plan.clone()).into()) + LogicalFilter::from_rel_node(plan.clone()) .unwrap() .cond() .into_rel_node(), @@ -509,7 +509,7 @@ mod tests { let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr); let plan = test_optimizer - .optimize(filter.into_rel_node().into()) + .optimize(filter.into_rel_node()) .unwrap(); assert_eq!(plan.typ, OptRelNodeTyp::Projection); @@ -640,7 +640,7 @@ mod tests { let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); // Examine original filter + condition - let top_level_filter = LogicalFilter::from_rel_node(plan.clone().into()).unwrap(); + let top_level_filter = LogicalFilter::from_rel_node(plan.clone()).unwrap(); let bin_op_0 = BinOpExpr::from_rel_node(top_level_filter.cond().clone().into_rel_node()).unwrap(); assert!(matches!(bin_op_0.op_type(), BinOpType::Eq)); @@ -729,7 +729,7 @@ mod tests { let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); - let plan_filter = LogicalFilter::from_rel_node(plan.clone().into()).unwrap(); + let plan_filter = LogicalFilter::from_rel_node(plan.clone()).unwrap(); assert!(matches!(plan_filter.0.typ(), OptRelNodeTyp::Filter)); let plan_filter_expr = LogOpExpr::from_rel_node(plan_filter.cond().into_rel_node()).unwrap(); diff --git a/optd-datafusion-repr/src/testing.rs b/optd-datafusion-repr/src/testing.rs index b090655e..4f7ed659 100644 --- a/optd-datafusion-repr/src/testing.rs +++ b/optd-datafusion-repr/src/testing.rs @@ -18,10 +18,10 @@ pub fn new_test_optimizer( rule: Arc>>, ) -> HeuristicsOptimizer { let dummy_catalog = Arc::new(TpchCatalog); - let test_optimizer = HeuristicsOptimizer::new_with_rules( + + HeuristicsOptimizer::new_with_rules( vec![rule], ApplyOrder::TopDown, Arc::new([Box::new(SchemaPropertyBuilder::new(dummy_catalog))]), - ); - test_optimizer + ) } diff --git a/optd-datafusion-repr/src/testing/tpch_catalog.rs b/optd-datafusion-repr/src/testing/tpch_catalog.rs index a1a10a2a..be311f46 100644 --- a/optd-datafusion-repr/src/testing/tpch_catalog.rs +++ b/optd-datafusion-repr/src/testing/tpch_catalog.rs @@ -12,7 +12,8 @@ impl Catalog for TpchCatalog { match name { "customer" => { // Define the schema for the "customer" table - let schema = Schema { + + Schema { fields: vec![ Field { name: "custkey".to_string(), @@ -55,12 +56,12 @@ impl Catalog for TpchCatalog { nullable: false, }, ], - }; - schema + } } "orders" => { // Define the schema for the "orders" table - let schema = Schema { + + Schema { fields: vec![ Field { name: "orderkey".to_string(), @@ -108,8 +109,7 @@ impl Catalog for TpchCatalog { nullable: false, }, ], - }; - schema + } } // Add more cases for other tables as needed _ => { From b407d8ac490e45f8c22e074e2bc2b2aadf0e7ee1 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Tue, 26 Mar 2024 20:59:39 -0400 Subject: [PATCH 29/61] fmt fix --- optd-datafusion-repr/src/plan_nodes/projection.rs | 2 -- optd-datafusion-repr/src/rules/filter_pushdown.rs | 4 +--- optd-datafusion-repr/src/testing.rs | 2 +- optd-datafusion-repr/src/testing/tpch_catalog.rs | 4 ++-- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/optd-datafusion-repr/src/plan_nodes/projection.rs b/optd-datafusion-repr/src/plan_nodes/projection.rs index a52782b1..42602a1a 100644 --- a/optd-datafusion-repr/src/plan_nodes/projection.rs +++ b/optd-datafusion-repr/src/plan_nodes/projection.rs @@ -1,5 +1,3 @@ - - use super::expr::ExprList; use super::macros::define_plan_node; diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 18c5c15a..8d9d34f0 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -508,9 +508,7 @@ mod tests { .into_expr(); let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr); - let plan = test_optimizer - .optimize(filter.into_rel_node()) - .unwrap(); + let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); assert_eq!(plan.typ, OptRelNodeTyp::Projection); assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); diff --git a/optd-datafusion-repr/src/testing.rs b/optd-datafusion-repr/src/testing.rs index 4f7ed659..9ce555ac 100644 --- a/optd-datafusion-repr/src/testing.rs +++ b/optd-datafusion-repr/src/testing.rs @@ -18,7 +18,7 @@ pub fn new_test_optimizer( rule: Arc>>, ) -> HeuristicsOptimizer { let dummy_catalog = Arc::new(TpchCatalog); - + HeuristicsOptimizer::new_with_rules( vec![rule], ApplyOrder::TopDown, diff --git a/optd-datafusion-repr/src/testing/tpch_catalog.rs b/optd-datafusion-repr/src/testing/tpch_catalog.rs index be311f46..cc5c1389 100644 --- a/optd-datafusion-repr/src/testing/tpch_catalog.rs +++ b/optd-datafusion-repr/src/testing/tpch_catalog.rs @@ -12,7 +12,7 @@ impl Catalog for TpchCatalog { match name { "customer" => { // Define the schema for the "customer" table - + Schema { fields: vec![ Field { @@ -60,7 +60,7 @@ impl Catalog for TpchCatalog { } "orders" => { // Define the schema for the "orders" table - + Schema { fields: vec![ Field { From db56f2711ccf0b67e2d821ef02477e4fef1d5f47 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Tue, 26 Mar 2024 21:05:09 -0400 Subject: [PATCH 30/61] Fmt fix again --- .../src/rules/filter_pushdown.rs | 32 +++++++------------ 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 8d9d34f0..bd898b8d 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -60,18 +60,14 @@ fn determine_join_cond_dep( let mut left_col = false; let mut right_col = false; for child in children { - match child.typ() { - OptRelNodeTyp::ColumnRef => { - let col_ref = ColumnRefExpr::from_rel_node(child.clone().into_rel_node()).unwrap(); - let index = col_ref.index(); - if index < left_schema_size { - left_col = true; - } else if index >= left_schema_size && index < left_schema_size + right_schema_size - { - right_col = true; - } + if child.typ() == OptRelNodeTyp::ColumnRef { + let col_ref = ColumnRefExpr::from_rel_node(child.clone().into_rel_node()).unwrap(); + let index = col_ref.index(); + if index < left_schema_size { + left_col = true; + } else if index >= left_schema_size && index < left_schema_size + right_schema_size { + right_col = true; } - _ => {} } } match (left_col, right_col) { @@ -313,16 +309,12 @@ fn filter_agg_transpose( let categorization_fn = |expr: Expr, children: &Vec| { let mut group_by_cols_only = true; for child in children { - match child.typ() { - OptRelNodeTyp::ColumnRef => { - let col_ref = - ColumnRefExpr::from_rel_node(child.clone().into_rel_node()).unwrap(); - if !group_cols.contains(&col_ref.index()) { - group_by_cols_only = false; - break; - } + if child.typ() == OptRelNodeTyp::ColumnRef { + let col_ref = ColumnRefExpr::from_rel_node(child.clone().into_rel_node()).unwrap(); + if !group_cols.contains(&col_ref.index()) { + group_by_cols_only = false; + break; } - _ => {} } } if group_by_cols_only { From e60e3ebea86d255ff75752c387bebca9ea054c37 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Tue, 26 Mar 2024 21:35:14 -0400 Subject: [PATCH 31/61] Additional documentation for rewrite_column_refs --- optd-datafusion-repr/src/plan_nodes.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/optd-datafusion-repr/src/plan_nodes.rs b/optd-datafusion-repr/src/plan_nodes.rs index e32f9ceb..a74f072b 100644 --- a/optd-datafusion-repr/src/plan_nodes.rs +++ b/optd-datafusion-repr/src/plan_nodes.rs @@ -286,6 +286,11 @@ impl Expr { /// Recursively rewrite all column references in the expression.using a provided /// function that replaces a column index. + /// The provided function will, given a ColumnRefExpr's index, + /// return either Some(usize) or None. + /// - If it is Some, the column index can be rewritten with the value. + /// - If any of the columns is None, we will return None all the way up + /// the call stack, and no expression will be returned. pub fn rewrite_column_refs( &self, rewrite_fn: &impl Fn(usize) -> Option, From 5cae5b3d1e7035581c79161ea21a1e04c1d785e2 Mon Sep 17 00:00:00 2001 From: Benjamin Owad Date: Tue, 26 Mar 2024 23:03:05 -0400 Subject: [PATCH 32/61] Remove old TODOs in filter_pushdown.rs --- optd-datafusion-repr/src/rules/filter_pushdown.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index bd898b8d..90c44027 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -136,7 +136,6 @@ fn filter_project_transpose( let old_proj = LogicalProjection::from_rel_node(child.into()).unwrap(); let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); - // TODO: Implement get_property in heuristics optimizer let projection_schema_len = optimizer .get_property::(old_proj.clone().into_rel_node(), 0) .len(); @@ -356,7 +355,6 @@ fn apply_filter_pushdown( let mut result_from_this_step = match child.typ { OptRelNodeTyp::Projection => filter_project_transpose(optimizer, child, cond), OptRelNodeTyp::Filter => filter_merge(optimizer, child, cond), - // OptRelNodeTyp::Scan => todo!(), // TODO: Add predicate field to scan node OptRelNodeTyp::Join(_) => filter_join_transpose(optimizer, child, cond), OptRelNodeTyp::Sort => filter_sort_transpose(optimizer, child, cond), OptRelNodeTyp::Agg => filter_agg_transpose(optimizer, child, cond), @@ -372,7 +370,6 @@ fn apply_filter_pushdown( let child_as_filter = LogicalFilter::from_rel_node(child.clone()).unwrap(); let childs_child = child_as_filter.child().into_rel_node().as_ref().clone(); let childs_cond = child_as_filter.cond().into_rel_node().as_ref().clone(); - // @todo: make this iterative? let result = apply_filter_pushdown( optimizer, FilterPushdownRulePicks { @@ -432,7 +429,6 @@ mod tests { #[test] fn filter_merge() { - // TODO: write advanced proj with more expr that need to be transformed let mut test_optimizer = new_test_optimizer(Arc::new(FilterPushdownRule::new())); let scan = LogicalScan::new("customer".into()); From 9fdaa257f6e7fb7685dea0ac5e36ad1d619089b1 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Thu, 28 Mar 2024 15:31:04 -0400 Subject: [PATCH 33/61] Separate filter pushdown rules (includes helper modifications) --- optd-datafusion-repr/src/lib.rs | 17 +- .../src/plan_nodes/projection.rs | 16 +- optd-datafusion-repr/src/rules.rs | 5 +- .../src/rules/filter_pushdown.rs | 277 ++++++++++-------- optd-datafusion-repr/src/rules/joins.rs | 5 +- 5 files changed, 183 insertions(+), 137 deletions(-) diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 72d8c92d..2e52a871 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -23,13 +23,17 @@ use properties::{ }; use rules::{ EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, EliminateFilterRule, - EliminateJoinRule, EliminateLimitRule, FilterPushdownRule, HashJoinRule, JoinAssocRule, - JoinCommuteRule, PhysicalConversionRule, ProjectionPullUpJoin, SimplifyFilterRule, - SimplifyJoinCondRule, + EliminateJoinRule, EliminateLimitRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, + PhysicalConversionRule, ProjectionPullUpJoin, SimplifyFilterRule, SimplifyJoinCondRule, }; pub use optd_core::rel_node::Value; +use crate::rules::{ + FilterAggTransposeRule, FilterCrossJoinTransposeRule, FilterInnerJoinTransposeRule, + FilterMergeRule, FilterProjectTransposeRule, FilterSortTransposeRule, +}; + pub mod cost; mod explain; pub mod plan_nodes; @@ -80,7 +84,12 @@ impl DatafusionOptimizer { vec![ Arc::new(SimplifyFilterRule::new()), Arc::new(SimplifyJoinCondRule::new()), - Arc::new(FilterPushdownRule::new()), + Arc::new(FilterProjectTransposeRule::new()), + Arc::new(FilterMergeRule::new()), + Arc::new(FilterCrossJoinTransposeRule::new()), + Arc::new(FilterInnerJoinTransposeRule::new()), + Arc::new(FilterSortTransposeRule::new()), + Arc::new(FilterAggTransposeRule::new()), Arc::new(EliminateFilterRule::new()), Arc::new(EliminateJoinRule::new()), Arc::new(EliminateLimitRule::new()), diff --git a/optd-datafusion-repr/src/plan_nodes/projection.rs b/optd-datafusion-repr/src/plan_nodes/projection.rs index 42602a1a..a7bb872d 100644 --- a/optd-datafusion-repr/src/plan_nodes/projection.rs +++ b/optd-datafusion-repr/src/plan_nodes/projection.rs @@ -57,17 +57,13 @@ impl ProjectionMapping { /// Recursively rewrites all ColumnRefs in an Expr to what the projection /// node is rewriting. E.g. if Projection is A -> B, B will be rewritten as A - pub fn rewrite_condition( - &self, - cond: Expr, - schema_size: usize, - projection_schema_size: usize, - ) -> Expr { + pub fn rewrite_condition(&self, cond: Expr, child_schema_len: usize) -> Expr { + let proj_schema_size = self.forward.len(); cond.rewrite_column_refs(&|idx| { - Some(if idx < projection_schema_size { + Some(if idx < proj_schema_size { self.projection_col_refers_to(idx) } else { - idx - projection_schema_size + schema_size + idx - proj_schema_size + child_schema_len }) }) .unwrap() @@ -75,9 +71,9 @@ impl ProjectionMapping { } impl LogicalProjection { - pub fn compute_column_mapping(&self) -> Option { + pub fn compute_column_mapping(exprs: &ExprList) -> Option { let mut mapping = vec![]; - for expr in self.exprs().to_vec() { + for expr in exprs.to_vec() { let col_expr = ColumnRefExpr::from_rel_node(expr.into_rel_node())?; mapping.push(col_expr.index()); } diff --git a/optd-datafusion-repr/src/rules.rs b/optd-datafusion-repr/src/rules.rs index af6e2fcb..e02337ca 100644 --- a/optd-datafusion-repr/src/rules.rs +++ b/optd-datafusion-repr/src/rules.rs @@ -13,7 +13,10 @@ pub use eliminate_duplicated_expr::{ }; pub use eliminate_limit::EliminateLimitRule; pub use filter::{EliminateFilterRule, SimplifyFilterRule, SimplifyJoinCondRule}; -pub use filter_pushdown::FilterPushdownRule; +pub use filter_pushdown::{ + FilterAggTransposeRule, FilterCrossJoinTransposeRule, FilterInnerJoinTransposeRule, + FilterMergeRule, FilterProjectTransposeRule, FilterSortTransposeRule, +}; pub use joins::{ EliminateJoinRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, ProjectionPullUpJoin, }; diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index bd898b8d..0b4b4744 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -16,18 +16,12 @@ use optd_core::{optimizer::Optimizer, rel_node::RelNode}; use crate::plan_nodes::{ ColumnRefExpr, Expr, ExprList, JoinType, LogOpExpr, LogOpType, LogicalAgg, LogicalFilter, - LogicalJoin, LogicalProjection, LogicalSort, OptRelNode, OptRelNodeTyp, + LogicalJoin, LogicalProjection, LogicalSort, OptRelNode, OptRelNodeTyp, PlanNode, }; use crate::properties::schema::SchemaPropertyBuilder; use super::macros::define_rule; -define_rule!( - FilterPushdownRule, - apply_filter_pushdown, - (Filter, [child], [cond]) -); - /// Emits a LogOpExpr AND if the list has more than one element /// Otherwise, returns the single element fn and_expr_list_to_expr(exprs: Vec) -> Expr { @@ -124,70 +118,141 @@ fn categorize_conds(mut categorization_fn: impl FnMut(Expr, &Vec), cond: E } } +define_rule!( + FilterProjectTransposeRule, + apply_filter_project_transpose, + (Filter, (Projection, child, [exprs]), [cond]) +); + /// Datafusion only pushes filter past project when the project does not contain /// volatile (i.e. non-deterministic) expressions that are present in the filter /// Calcite only checks if the projection contains a windowing calculation /// We check neither of those things and do it always (which may be wrong) -fn filter_project_transpose( +fn apply_filter_project_transpose( optimizer: &impl Optimizer, - child: RelNode, - cond: RelNode, + FilterProjectTransposeRulePicks { child, exprs, cond }: FilterProjectTransposeRulePicks, ) -> Vec> { - let old_proj = LogicalProjection::from_rel_node(child.into()).unwrap(); - let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); - - // TODO: Implement get_property in heuristics optimizer - let projection_schema_len = optimizer - .get_property::(old_proj.clone().into_rel_node(), 0) - .len(); let child_schema_len = optimizer - .get_property::(old_proj.clone().into_rel_node(), 0) + .get_property::(child.clone().into(), 0) .len(); - let proj_col_map = old_proj.compute_column_mapping().unwrap(); - let rewritten_cond = proj_col_map.rewrite_condition( - cond_as_expr.clone(), - projection_schema_len, - child_schema_len, - ); + let child = PlanNode::from_rel_node(child.into()).unwrap(); + let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); + let exprs = ExprList::from_rel_node(exprs.into()).unwrap(); + + let proj_col_map = LogicalProjection::compute_column_mapping(&exprs).unwrap(); + let rewritten_cond = proj_col_map.rewrite_condition(cond_as_expr.clone(), child_schema_len); - let new_filter_node = LogicalFilter::new(old_proj.child(), rewritten_cond); - let new_proj = LogicalProjection::new(new_filter_node.into_plan_node(), old_proj.exprs()); + let new_filter_node = LogicalFilter::new(child, rewritten_cond); + let new_proj = LogicalProjection::new(new_filter_node.into_plan_node(), exprs); vec![new_proj.into_rel_node().as_ref().clone()] } -fn filter_merge( +define_rule!( + FilterMergeRule, + apply_filter_merge, + (Filter, (Filter, child, [cond1]), [cond]) +); + +fn apply_filter_merge( _optimizer: &impl Optimizer, - child: RelNode, - cond: RelNode, + FilterMergeRulePicks { child, cond1, cond }: FilterMergeRulePicks, ) -> Vec> { - let child_filter = LogicalFilter::from_rel_node(child.into()).unwrap(); - let child_filter_cond = child_filter.cond().clone(); + let child = PlanNode::from_rel_node(child.into()).unwrap(); let curr_cond = Expr::from_rel_node(cond.into()).unwrap(); - let merged_cond = merge_conds(curr_cond, child_filter_cond); - let new_filter = LogicalFilter::new(child_filter.child(), merged_cond); + let child_cond = Expr::from_rel_node(cond1.into()).unwrap(); + + let merged_cond = merge_conds(curr_cond, child_cond); + + let new_filter = LogicalFilter::new(child, merged_cond); vec![new_filter.into_rel_node().as_ref().clone()] } +// TODO: define_rule! should be able to match on any join type, ideally... + +define_rule!( + FilterCrossJoinTransposeRule, + apply_filter_cross_join_transpose, + ( + Filter, + (Join(JoinType::Cross), child_a, child_b, [join_cond]), + [cond] + ) +); + +fn apply_filter_cross_join_transpose( + optimizer: &impl Optimizer, + FilterCrossJoinTransposeRulePicks { + child_a, + child_b, + join_cond, + cond, + }: FilterCrossJoinTransposeRulePicks, +) -> Vec> { + filter_join_transpose( + optimizer, + JoinType::Cross, + child_a, + child_b, + join_cond, + cond, + ) +} + +define_rule!( + FilterInnerJoinTransposeRule, + apply_filter_inner_join_transpose, + ( + Filter, + (Join(JoinType::Inner), child_a, child_b, [join_cond]), + [cond] + ) +); + +fn apply_filter_inner_join_transpose( + optimizer: &impl Optimizer, + FilterInnerJoinTransposeRulePicks { + child_a, + child_b, + join_cond, + cond, + }: FilterInnerJoinTransposeRulePicks, +) -> Vec> { + filter_join_transpose( + optimizer, + JoinType::Inner, + child_a, + child_b, + join_cond, + cond, + ) +} + /// Cases: /// - Push down to the left child (only involves keys from the left child) /// - Push down to the right child (only involves keys from the right child) /// - Push into the join condition (involves keys from both children) fn filter_join_transpose( optimizer: &impl Optimizer, - child: RelNode, - cond: RelNode, + join_typ: JoinType, + join_child_a: RelNode, + join_child_b: RelNode, + join_cond: RelNode, + filter_cond: RelNode, ) -> Vec> { - // TODO: Push existing join conditions down as well - let old_join = LogicalJoin::from_rel_node(child.into()).unwrap(); - let left_schema_size = optimizer - .get_property::(old_join.left().into_rel_node(), 0) + .get_property::(join_child_a.clone().into(), 0) .len(); let right_schema_size = optimizer - .get_property::(old_join.right().into_rel_node(), 0) + .get_property::(join_child_b.clone().into(), 0) .len(); + let join_child_a = PlanNode::from_rel_node(join_child_a.into()).unwrap(); + let join_child_b = PlanNode::from_rel_node(join_child_b.into()).unwrap(); + let join_cond = Expr::from_rel_node(join_cond.into()).unwrap(); + let filter_cond = Expr::from_rel_node(filter_cond.into()).unwrap(); + // TODO: Push existing join conditions down as well + let mut left_conds = vec![]; let mut right_conds = vec![]; let mut join_conds = vec![]; @@ -211,27 +276,25 @@ fn filter_join_transpose( JoinCondDependency::None => keep_conds.push(expr), } }; - categorize_conds(categorization_fn, Expr::from_rel_node(cond.into()).unwrap()); + categorize_conds(categorization_fn, filter_cond); let new_left = if !left_conds.is_empty() { - let new_filter_node = - LogicalFilter::new(old_join.left(), and_expr_list_to_expr(left_conds)); + let new_filter_node = LogicalFilter::new(join_child_a, and_expr_list_to_expr(left_conds)); new_filter_node.into_plan_node() } else { - old_join.left() + join_child_a }; let new_right = if !right_conds.is_empty() { - let new_filter_node = - LogicalFilter::new(old_join.right(), and_expr_list_to_expr(right_conds)); + let new_filter_node = LogicalFilter::new(join_child_b, and_expr_list_to_expr(right_conds)); new_filter_node.into_plan_node() } else { - old_join.right() + join_child_b }; - let new_join = match old_join.join_type() { + let new_join = match join_typ { JoinType::Inner => { - let old_cond = old_join.cond(); + let old_cond = join_cond; let new_conds = merge_conds(and_expr_list_to_expr(join_conds), old_cond); LogicalJoin::new(new_left, new_right, new_conds, JoinType::Inner) } @@ -244,12 +307,12 @@ fn filter_join_transpose( JoinType::Inner, ) } else { - LogicalJoin::new(new_left, new_right, old_join.cond(), JoinType::Cross) + LogicalJoin::new(new_left, new_right, join_cond, JoinType::Cross) } } _ => { // We don't support modifying the join condition for other join types yet - LogicalJoin::new(new_left, new_right, old_join.cond(), old_join.join_type()) + LogicalJoin::new(new_left, new_right, join_cond, join_typ) } }; @@ -264,33 +327,52 @@ fn filter_join_transpose( vec![new_filter] } +define_rule!( + FilterSortTransposeRule, + apply_filter_sort_transpose, + (Filter, (Sort, child, [exprs]), [cond]) +); + /// Filter and sort should always be commutable. -fn filter_sort_transpose( +fn apply_filter_sort_transpose( _optimizer: &impl Optimizer, - child: RelNode, - cond: RelNode, + FilterSortTransposeRulePicks { child, exprs, cond }: FilterSortTransposeRulePicks, ) -> Vec> { - let old_sort = LogicalSort::from_rel_node(child.into()).unwrap(); + let child = PlanNode::from_rel_node(child.into()).unwrap(); + let exprs = ExprList::from_rel_node(exprs.into()).unwrap(); + let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); - let new_filter_node = LogicalFilter::new(old_sort.child(), cond_as_expr); + let new_filter_node = LogicalFilter::new(child, cond_as_expr); // Exprs should be the same, no projections have occurred here. - let new_sort = LogicalSort::new(new_filter_node.into_plan_node(), old_sort.exprs()); + let new_sort = LogicalSort::new(new_filter_node.into_plan_node(), exprs); vec![new_sort.into_rel_node().as_ref().clone()] } +define_rule!( + FilterAggTransposeRule, + apply_filter_agg_transpose, + (Filter, (Agg, child, [exprs], [groups]), [cond]) +); + /// Filter is commutable past aggregations when the filter condition only /// involves the group by columns. -fn filter_agg_transpose( +fn apply_filter_agg_transpose( _optimizer: &impl Optimizer, - child: RelNode, - cond: RelNode, + FilterAggTransposeRulePicks { + child, + exprs, + groups, + cond, + }: FilterAggTransposeRulePicks, ) -> Vec> { - let old_agg = LogicalAgg::from_rel_node(child.into()).unwrap(); - let group_exprs = old_agg.groups(); + let exprs = ExprList::from_rel_node(exprs.into()).unwrap(); + let groups = ExprList::from_rel_node(groups.into()).unwrap(); + let child = PlanNode::from_rel_node(child.into()).unwrap(); // Get top-level group-by columns. Does not cover cases where group-by exprs // are more complex than a top-level column reference. - let group_cols = group_exprs + let group_cols = groups + .clone() .into_rel_node() .children .iter() @@ -327,15 +409,15 @@ fn filter_agg_transpose( let new_child = if !push_conds.is_empty() { LogicalFilter::new( - old_agg.child(), + child, LogOpExpr::new(LogOpType::And, ExprList::new(push_conds)).into_expr(), ) .into_plan_node() } else { - old_agg.child().into_plan_node() + child }; - let new_agg = LogicalAgg::new(new_child, old_agg.exprs(), old_agg.groups()); + let new_agg = LogicalAgg::new(new_child, exprs, groups); let new_filter = LogicalFilter::new( new_agg.into_plan_node(), @@ -348,50 +430,6 @@ fn filter_agg_transpose( vec![new_filter] } -fn apply_filter_pushdown( - optimizer: &impl Optimizer, - FilterPushdownRulePicks { child, cond }: FilterPushdownRulePicks, -) -> Vec> { - // Push filter down one node - let mut result_from_this_step = match child.typ { - OptRelNodeTyp::Projection => filter_project_transpose(optimizer, child, cond), - OptRelNodeTyp::Filter => filter_merge(optimizer, child, cond), - // OptRelNodeTyp::Scan => todo!(), // TODO: Add predicate field to scan node - OptRelNodeTyp::Join(_) => filter_join_transpose(optimizer, child, cond), - OptRelNodeTyp::Sort => filter_sort_transpose(optimizer, child, cond), - OptRelNodeTyp::Agg => filter_agg_transpose(optimizer, child, cond), - _ => vec![], - }; - - // Apply rule recursively - if let Some(new_node) = result_from_this_step.first_mut() { - // For all the children in our result, - for child in new_node.children.iter_mut() { - if child.typ == OptRelNodeTyp::Filter { - // If this node is a filter, apply the rule again to this node! - let child_as_filter = LogicalFilter::from_rel_node(child.clone()).unwrap(); - let childs_child = child_as_filter.child().into_rel_node().as_ref().clone(); - let childs_cond = child_as_filter.cond().into_rel_node().as_ref().clone(); - // @todo: make this iterative? - let result = apply_filter_pushdown( - optimizer, - FilterPushdownRulePicks { - child: childs_child, - cond: childs_cond, - }, - ); - // If we got a result, that is the replacement for this child - if let Some(&new_child) = result.first().as_ref() { - *child = new_child.to_owned().into(); - } - } - // Otherwise, if there was no result from rule application or this is not a filter, do not modify the child - } - } - - result_from_this_step -} - #[cfg(test)] mod tests { use std::sync::Arc; @@ -404,13 +442,16 @@ mod tests { LogicalAgg, LogicalFilter, LogicalJoin, LogicalProjection, LogicalScan, LogicalSort, OptRelNode, OptRelNodeTyp, }, - rules::FilterPushdownRule, + rules::{ + FilterAggTransposeRule, FilterInnerJoinTransposeRule, FilterMergeRule, + FilterProjectTransposeRule, FilterSortTransposeRule, + }, testing::new_test_optimizer, }; #[test] fn push_past_sort() { - let mut test_optimizer = new_test_optimizer(Arc::new(FilterPushdownRule::new())); + let mut test_optimizer = new_test_optimizer(Arc::new(FilterSortTransposeRule::new())); let scan = LogicalScan::new("customer".into()); let sort = LogicalSort::new(scan.into_plan_node(), ExprList::new(vec![])); @@ -433,7 +474,7 @@ mod tests { #[test] fn filter_merge() { // TODO: write advanced proj with more expr that need to be transformed - let mut test_optimizer = new_test_optimizer(Arc::new(FilterPushdownRule::new())); + let mut test_optimizer = new_test_optimizer(Arc::new(FilterMergeRule::new())); let scan = LogicalScan::new("customer".into()); let filter_ch_expr = BinOpExpr::new( @@ -487,7 +528,7 @@ mod tests { #[test] fn push_past_proj_basic() { - let mut test_optimizer = new_test_optimizer(Arc::new(FilterPushdownRule::new())); + let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); let scan = LogicalScan::new("customer".into()); let proj = LogicalProjection::new(scan.into_plan_node(), ExprList::new(vec![])); @@ -508,7 +549,7 @@ mod tests { #[test] fn push_past_proj_adv() { - let mut test_optimizer = new_test_optimizer(Arc::new(FilterPushdownRule::new())); + let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); let scan = LogicalScan::new("customer".into()); let proj = LogicalProjection::new( @@ -567,9 +608,9 @@ mod tests { fn push_past_join_conjunction() { // Test pushing a complex filter past a join, where one clause can // be pushed to the left child, one to the right child, one gets incorporated - // into the (now inner) join condition, and a constant one remains in the + // into the join condition, and a constant one remains in the // original filter. - let mut test_optimizer = new_test_optimizer(Arc::new(FilterPushdownRule::new())); + let mut test_optimizer = new_test_optimizer(Arc::new(FilterInnerJoinTransposeRule::new())); let scan1 = LogicalScan::new("customer".into()); @@ -685,7 +726,7 @@ mod tests { // Test pushing a filter past an aggregation node, where the filter // condition has one clause that can be pushed down to the child and // one that must remain in the filter. - let mut test_optimizer = new_test_optimizer(Arc::new(FilterPushdownRule::new())); + let mut test_optimizer = new_test_optimizer(Arc::new(FilterAggTransposeRule::new())); let scan = LogicalScan::new("customer".into()); diff --git a/optd-datafusion-repr/src/rules/joins.rs b/optd-datafusion-repr/src/rules/joins.rs index 42da7041..d76083ba 100644 --- a/optd-datafusion-repr/src/rules/joins.rs +++ b/optd-datafusion-repr/src/rules/joins.rs @@ -338,14 +338,12 @@ fn apply_projection_pull_up_join( let projection = LogicalProjection::new(PlanNode::from_group(left.clone()), list.clone()); - let Some(mapping) = projection.compute_column_mapping() else { + let Some(mapping) = LogicalProjection::compute_column_mapping(&projection.exprs()) else { return vec![]; }; // TODO(chi): support capture projection node. let left_schema = optimizer.get_property::(left.clone(), 0); - let projection_schema = - optimizer.get_property::(projection.into_rel_node().clone(), 0); let right_schema = optimizer.get_property::(right.clone(), 0); let mut new_projection_exprs = list.to_vec(); for i in 0..right_schema.len() { @@ -359,7 +357,6 @@ fn apply_projection_pull_up_join( mapping.rewrite_condition( Expr::from_rel_node(Arc::new(cond)).unwrap(), left_schema.len(), - projection_schema.len(), ), JoinType::Inner, ) From 6119dbbc3e2963074c636d17e50721355f9b6593 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Thu, 28 Mar 2024 15:43:49 -0400 Subject: [PATCH 34/61] Get children from groups, now --- optd-datafusion-repr/src/rules/filter_pushdown.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 0b4b4744..b6830531 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -158,7 +158,7 @@ fn apply_filter_merge( _optimizer: &impl Optimizer, FilterMergeRulePicks { child, cond1, cond }: FilterMergeRulePicks, ) -> Vec> { - let child = PlanNode::from_rel_node(child.into()).unwrap(); + let child = PlanNode::from_group(child.into()); let curr_cond = Expr::from_rel_node(cond.into()).unwrap(); let child_cond = Expr::from_rel_node(cond1.into()).unwrap(); @@ -247,8 +247,8 @@ fn filter_join_transpose( .get_property::(join_child_b.clone().into(), 0) .len(); - let join_child_a = PlanNode::from_rel_node(join_child_a.into()).unwrap(); - let join_child_b = PlanNode::from_rel_node(join_child_b.into()).unwrap(); + let join_child_a = PlanNode::from_group(join_child_a.into()); + let join_child_b = PlanNode::from_group(join_child_b.into()); let join_cond = Expr::from_rel_node(join_cond.into()).unwrap(); let filter_cond = Expr::from_rel_node(filter_cond.into()).unwrap(); // TODO: Push existing join conditions down as well @@ -338,7 +338,7 @@ fn apply_filter_sort_transpose( _optimizer: &impl Optimizer, FilterSortTransposeRulePicks { child, exprs, cond }: FilterSortTransposeRulePicks, ) -> Vec> { - let child = PlanNode::from_rel_node(child.into()).unwrap(); + let child = PlanNode::from_group(child.into()); let exprs = ExprList::from_rel_node(exprs.into()).unwrap(); let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); @@ -367,7 +367,7 @@ fn apply_filter_agg_transpose( ) -> Vec> { let exprs = ExprList::from_rel_node(exprs.into()).unwrap(); let groups = ExprList::from_rel_node(groups.into()).unwrap(); - let child = PlanNode::from_rel_node(child.into()).unwrap(); + let child = PlanNode::from_group(child.into()); // Get top-level group-by columns. Does not cover cases where group-by exprs // are more complex than a top-level column reference. From 53098e6d9e1b9e33158eed979f5a3e518c350a9d Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Thu, 28 Mar 2024 15:45:03 -0400 Subject: [PATCH 35/61] Change rules to cost-based --- optd-datafusion-repr/src/lib.rs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 2e52a871..c0670e62 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -84,12 +84,6 @@ impl DatafusionOptimizer { vec![ Arc::new(SimplifyFilterRule::new()), Arc::new(SimplifyJoinCondRule::new()), - Arc::new(FilterProjectTransposeRule::new()), - Arc::new(FilterMergeRule::new()), - Arc::new(FilterCrossJoinTransposeRule::new()), - Arc::new(FilterInnerJoinTransposeRule::new()), - Arc::new(FilterSortTransposeRule::new()), - Arc::new(FilterAggTransposeRule::new()), Arc::new(EliminateFilterRule::new()), Arc::new(EliminateJoinRule::new()), Arc::new(EliminateLimitRule::new()), @@ -105,6 +99,23 @@ impl DatafusionOptimizer { for rule in rules { rule_wrappers.push(RuleWrapper::new_cascades(rule)); } + // add all filter pushdown rules as heuristic rules + rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( + FilterProjectTransposeRule::new(), + ))); + rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new(FilterMergeRule::new()))); + rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( + FilterCrossJoinTransposeRule::new(), + ))); + rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( + FilterInnerJoinTransposeRule::new(), + ))); + rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( + FilterSortTransposeRule::new(), + ))); + rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( + FilterAggTransposeRule::new(), + ))); rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(HashJoinRule::new()))); // 17 rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(JoinCommuteRule::new()))); // 18 rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(JoinAssocRule::new()))); From d3b1882d308923446f9a1150f4bed4406f513195 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Thu, 28 Mar 2024 15:46:22 -0400 Subject: [PATCH 36/61] Missed a spot with group conversions --- optd-datafusion-repr/src/rules/filter_pushdown.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index b6830531..a2f3400e 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -136,7 +136,7 @@ fn apply_filter_project_transpose( .get_property::(child.clone().into(), 0) .len(); - let child = PlanNode::from_rel_node(child.into()).unwrap(); + let child = PlanNode::from_group(child.into()); let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); let exprs = ExprList::from_rel_node(exprs.into()).unwrap(); From a4e06009e63993c512d7f853c4d5d7ce430e0d1c Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Thu, 28 Mar 2024 23:06:08 -0400 Subject: [PATCH 37/61] add rules to lib and rules files --- optd-datafusion-repr/src/lib.rs | 5 ++++- optd-datafusion-repr/src/rules.rs | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 2e297690..cbbc45cb 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -18,7 +18,8 @@ use properties::{ use rules::{ EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, EliminateFilterRule, EliminateJoinRule, EliminateLimitRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, - PhysicalConversionRule, ProjectionPullUpJoin, SimplifyFilterRule, + PhysicalConversionRule, ProjectionPullUpJoin, SimplifyFilterRule, ProjectRemove, + ProjectScanPushDown, }; pub use optd_core::rel_node::Value; @@ -60,6 +61,8 @@ impl DatafusionOptimizer { RuleWrapper::new_heuristic(Arc::new(EliminateFilterRule::new())), RuleWrapper::new_heuristic(Arc::new(EliminateJoinRule::new())), RuleWrapper::new_heuristic(Arc::new(EliminateLimitRule::new())), + RuleWrapper::new_heuristic(Arc::new(ProjectRemove::new())), + RuleWrapper::new_heuristic(Arc::new(ProjectScanPushDown::new())), RuleWrapper::new_heuristic(Arc::new(EliminateDuplicatedSortExprRule::new())), RuleWrapper::new_heuristic(Arc::new(EliminateDuplicatedAggExprRule::new())), ]; diff --git a/optd-datafusion-repr/src/rules.rs b/optd-datafusion-repr/src/rules.rs index 3548c617..bf657864 100644 --- a/optd-datafusion-repr/src/rules.rs +++ b/optd-datafusion-repr/src/rules.rs @@ -1,4 +1,5 @@ // mod filter_join; +mod project_remove; mod eliminate_duplicated_expr; mod eliminate_limit; mod filter; @@ -7,6 +8,9 @@ mod macros; mod physical; // pub use filter_join::FilterJoinPullUpRule; +pub use project_remove::{ + ProjectRemove, ProjectScanPushDown, +}; pub use eliminate_duplicated_expr::{ EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, }; From 427fc9bb7a709661b4ed0db4a1dc607d8ab9af38 Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Fri, 29 Mar 2024 00:00:28 -0400 Subject: [PATCH 38/61] project remove rule --- optd-datafusion-repr/src/lib.rs | 3 + .../src/plan_nodes/projection.rs | 13 ++ optd-datafusion-repr/src/rules.rs | 4 +- .../src/rules/project_remove.rs | 131 ++++++++++++++++++ 4 files changed, 148 insertions(+), 3 deletions(-) create mode 100644 optd-datafusion-repr/src/rules/project_remove.rs diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 2351eaa8..bb4c9bb4 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -100,6 +100,9 @@ impl DatafusionOptimizer { for rule in rules { rule_wrappers.push(RuleWrapper::new_cascades(rule)); } + rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( + ProjectRemove::new(), + ))); // add all filter pushdown rules as heuristic rules rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( FilterProjectTransposeRule::new(), diff --git a/optd-datafusion-repr/src/plan_nodes/projection.rs b/optd-datafusion-repr/src/plan_nodes/projection.rs index a7bb872d..52699afa 100644 --- a/optd-datafusion-repr/src/plan_nodes/projection.rs +++ b/optd-datafusion-repr/src/plan_nodes/projection.rs @@ -68,6 +68,19 @@ impl ProjectionMapping { }) .unwrap() } + + /// Rewrites all ColumnRefs in an ExprList to what the projection + /// node is rewriting. E.g. if Projection is A -> B, B will be + /// rewritten as A + pub fn rewrite_projection(&self, exprs: &ExprList) -> ExprList { + let mut new_projection_exprs = Vec::new(); + let exprs = exprs.to_vec(); + for i in &self.forward { + let col: Expr = exprs[*i].clone(); + new_projection_exprs.push(col); + }; + ExprList::new(new_projection_exprs) + } } impl LogicalProjection { diff --git a/optd-datafusion-repr/src/rules.rs b/optd-datafusion-repr/src/rules.rs index d4c2ad59..7046b8e7 100644 --- a/optd-datafusion-repr/src/rules.rs +++ b/optd-datafusion-repr/src/rules.rs @@ -9,9 +9,7 @@ mod macros; mod physical; // pub use filter_join::FilterJoinPullUpRule; -pub use project_remove::{ - ProjectRemove, ProjectScanPushDown, -}; +pub use project_remove::ProjectRemove; pub use eliminate_duplicated_expr::{ EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, }; diff --git a/optd-datafusion-repr/src/rules/project_remove.rs b/optd-datafusion-repr/src/rules/project_remove.rs new file mode 100644 index 00000000..1c048baa --- /dev/null +++ b/optd-datafusion-repr/src/rules/project_remove.rs @@ -0,0 +1,131 @@ +use std::collections::HashMap; +use std::sync::Arc; + +use optd_core::rules::{Rule, RuleMatcher}; +use optd_core::{optimizer::Optimizer, rel_node::RelNode}; + +use crate::plan_nodes::{ + BetweenExpr, ColumnRefExpr, ExprList, LikeExpr, LogOpExpr, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode +}; +use crate::properties::column_ref::ColumnRef; + +use super::macros::define_rule; + +// projects away aggregate calls that are not used +// TODO +define_rule!( + ProjectAggregatePushDown, + apply_projection_agg_pushdown, + ( + Projection, + (Agg, child, [agg_exprs], [agg_groups]), + [exprs] + ) +); + +fn apply_projection_agg_pushdown( + _optimizer: &impl Optimizer, + ProjectAggregatePushDownPicks { child, agg_exprs, agg_groups, exprs }: ProjectAggregatePushDownPicks, +) -> Vec> { + + + + vec![] +} + +// pushes projections through filters +// adds a projection node after a filter node +// only keeping necessary columns (proj node exprs + filter col exprs)) +// TODO +define_rule!( + ProjectFilterPushDown, + apply_projection_filter_pushdown, + ( + Projection, + (Filter, child, [cond]), + [exprs] + ) +); + +fn apply_projection_filter_pushdown( + _optimizer: &impl Optimizer, + ProjectFilterPushDownPicks { child, cond, exprs }: ProjectFilterPushDownPicks, +) -> Vec> { + // get columns out of cond + let cond_cols: Vec = match cond.typ { + OptRelNodeTyp::LogOp(_) => { + // make a queue of some kind + + vec![] + }, + OptRelNodeTyp::Between => { + let between_expr = BetweenExpr::from_rel_node(Arc::new(cond)).unwrap(); + let expr = between_expr.child(); + if expr.typ() != OptRelNodeTyp::ColumnRef { + vec![] + } else { + let col = ColumnRefExpr::from_rel_node(expr.into_rel_node()).unwrap(); + vec![col] + } + }, + OptRelNodeTyp::Like => { + let like_expr = LikeExpr::from_rel_node(Arc::new(cond)).unwrap(); + let expr = like_expr.child(); + if expr.typ() != OptRelNodeTyp::ColumnRef { + vec![] + } else { + let col = ColumnRefExpr::from_rel_node(expr.into_rel_node()).unwrap(); + vec![col] + } + }, + _ => vec![] + }; + if cond_cols.is_empty() { + return vec![]; + } + + // have column ref expressions of cond cols + // bottom-most projection will have proj cols + filter cols as a set + vec![] +} + + +// test case for project remove +// create table t1 (v1 int, v2 int); +// explain select v1,v2 from (select v1,v2 from t1); + +// create table t3 (v1 int, v2 int, v3 int); +// explain select v1,v2,v3 from (select v1,v3,v2 from t3); + +// Proj (Proj A) -> Proj A +// merges/removes projections +define_rule!( + ProjectRemove, + apply_projection_remove, + ( + Projection, + (Projection, child, [exprs2]), + [exprs1] + ) +); + +fn apply_projection_remove( + _optimizer: &impl Optimizer, + ProjectRemovePicks { child, exprs1, exprs2 }: ProjectRemovePicks, +) -> Vec> { + let child = PlanNode::from_group(child.into()); + let exprs1 = ExprList::from_rel_node(exprs1.into()).unwrap(); + let exprs2 = ExprList::from_rel_node(exprs2.into()).unwrap(); + + let Some(mapping) = LogicalProjection::compute_column_mapping(&exprs1) else { + return vec![]; + }; + + let res_exprs = mapping.rewrite_projection(&exprs2); + + let node: LogicalProjection = LogicalProjection::new( + child, + res_exprs, + ); + vec![node.into_rel_node().as_ref().clone()] +} From 6078cc5e9ef386766b2f8e2d2912e4cd977a2006 Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Fri, 29 Mar 2024 00:07:18 -0400 Subject: [PATCH 39/61] rename some stuff --- optd-datafusion-repr/src/lib.rs | 4 ++-- optd-datafusion-repr/src/rules.rs | 2 +- .../rules/{project_remove.rs => projection_pushdown.rs} | 9 +++++---- 3 files changed, 8 insertions(+), 7 deletions(-) rename optd-datafusion-repr/src/rules/{project_remove.rs => projection_pushdown.rs} (94%) diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index bb4c9bb4..994bc54c 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -25,7 +25,7 @@ use rules::{ EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, EliminateFilterRule, EliminateJoinRule, EliminateLimitRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, PhysicalConversionRule, ProjectionPullUpJoin, SimplifyFilterRule, SimplifyJoinCondRule, - ProjectRemove, + ProjectMergeRule, }; pub use optd_core::rel_node::Value; @@ -101,7 +101,7 @@ impl DatafusionOptimizer { rule_wrappers.push(RuleWrapper::new_cascades(rule)); } rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( - ProjectRemove::new(), + ProjectMergeRule::new(), ))); // add all filter pushdown rules as heuristic rules rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( diff --git a/optd-datafusion-repr/src/rules.rs b/optd-datafusion-repr/src/rules.rs index 7046b8e7..ce7621b3 100644 --- a/optd-datafusion-repr/src/rules.rs +++ b/optd-datafusion-repr/src/rules.rs @@ -9,7 +9,7 @@ mod macros; mod physical; // pub use filter_join::FilterJoinPullUpRule; -pub use project_remove::ProjectRemove; +pub use project_remove::ProjectMergeRule; pub use eliminate_duplicated_expr::{ EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, }; diff --git a/optd-datafusion-repr/src/rules/project_remove.rs b/optd-datafusion-repr/src/rules/projection_pushdown.rs similarity index 94% rename from optd-datafusion-repr/src/rules/project_remove.rs rename to optd-datafusion-repr/src/rules/projection_pushdown.rs index 1c048baa..3dc259f1 100644 --- a/optd-datafusion-repr/src/rules/project_remove.rs +++ b/optd-datafusion-repr/src/rules/projection_pushdown.rs @@ -95,13 +95,14 @@ fn apply_projection_filter_pushdown( // explain select v1,v2 from (select v1,v2 from t1); // create table t3 (v1 int, v2 int, v3 int); +// explain select v2,v3 from (select v1,v3,v2 from t3); // explain select v1,v2,v3 from (select v1,v3,v2 from t3); // Proj (Proj A) -> Proj A // merges/removes projections define_rule!( - ProjectRemove, - apply_projection_remove, + ProjectMergeRule, + apply_projection_merge, ( Projection, (Projection, child, [exprs2]), @@ -109,9 +110,9 @@ define_rule!( ) ); -fn apply_projection_remove( +fn apply_projection_merge( _optimizer: &impl Optimizer, - ProjectRemovePicks { child, exprs1, exprs2 }: ProjectRemovePicks, + ProjectMergeRulePicks { child, exprs1, exprs2 }: ProjectMergeRulePicks, ) -> Vec> { let child = PlanNode::from_group(child.into()); let exprs1 = ExprList::from_rel_node(exprs1.into()).unwrap(); From bea15b9d0681dffd5404032041d7df546fadbd56 Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Fri, 29 Mar 2024 00:10:06 -0400 Subject: [PATCH 40/61] rename some other stuff --- optd-datafusion-repr/src/rules.rs | 4 ++-- optd-datafusion-repr/src/rules/projection_pushdown.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/optd-datafusion-repr/src/rules.rs b/optd-datafusion-repr/src/rules.rs index ce7621b3..f899dfe7 100644 --- a/optd-datafusion-repr/src/rules.rs +++ b/optd-datafusion-repr/src/rules.rs @@ -1,5 +1,5 @@ // mod filter_join; -mod project_remove; +mod projection_pushdown; mod eliminate_duplicated_expr; mod eliminate_limit; mod filter; @@ -9,7 +9,7 @@ mod macros; mod physical; // pub use filter_join::FilterJoinPullUpRule; -pub use project_remove::ProjectMergeRule; +pub use projection_pushdown::ProjectMergeRule; pub use eliminate_duplicated_expr::{ EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, }; diff --git a/optd-datafusion-repr/src/rules/projection_pushdown.rs b/optd-datafusion-repr/src/rules/projection_pushdown.rs index 3dc259f1..84a1de41 100644 --- a/optd-datafusion-repr/src/rules/projection_pushdown.rs +++ b/optd-datafusion-repr/src/rules/projection_pushdown.rs @@ -90,7 +90,7 @@ fn apply_projection_filter_pushdown( } -// test case for project remove +// test cases for project merge // create table t1 (v1 int, v2 int); // explain select v1,v2 from (select v1,v2 from t1); From 81191583120dfb07adc95b729ce9e75f89b2fe8c Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Fri, 29 Mar 2024 01:13:03 -0400 Subject: [PATCH 41/61] broken project filter transpose --- optd-datafusion-repr/src/lib.rs | 5 +- optd-datafusion-repr/src/plan_nodes.rs | 29 +++++++ optd-datafusion-repr/src/rules.rs | 4 +- .../src/rules/projection_pushdown.rs | 79 +++++++++++-------- 4 files changed, 80 insertions(+), 37 deletions(-) diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 994bc54c..a20c2d89 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -25,7 +25,7 @@ use rules::{ EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, EliminateFilterRule, EliminateJoinRule, EliminateLimitRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, PhysicalConversionRule, ProjectionPullUpJoin, SimplifyFilterRule, SimplifyJoinCondRule, - ProjectMergeRule, + ProjectMergeRule, ProjectFilterTransposeRule, }; pub use optd_core::rel_node::Value; @@ -103,6 +103,9 @@ impl DatafusionOptimizer { rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( ProjectMergeRule::new(), ))); + rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( + ProjectFilterTransposeRule::new(), + ))); // add all filter pushdown rules as heuristic rules rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( FilterProjectTransposeRule::new(), diff --git a/optd-datafusion-repr/src/plan_nodes.rs b/optd-datafusion-repr/src/plan_nodes.rs index a74f072b..54f1e61c 100644 --- a/optd-datafusion-repr/src/plan_nodes.rs +++ b/optd-datafusion-repr/src/plan_nodes.rs @@ -333,6 +333,35 @@ impl Expr { .unwrap(), ) } + + /// Recursively retrieves all column references in the expression + /// using a provided function. + /// The provided function will, given a ColumnRefExpr's index, + /// return a Vec including the expr in col ref. + pub fn get_column_refs( + &self + ) -> Vec { + assert!(self.typ().is_expression()); + if let OptRelNodeTyp::ColumnRef = self.typ() { + let col_ref = Expr::from_rel_node(self.0.clone()).unwrap(); + return vec![col_ref]; + } + + let children = self.0.children.clone(); + let children = children + .into_iter() + .map(|child| { + if child.typ == OptRelNodeTyp::List { + // TODO: What should we do with List? + return vec![]; + } + Expr::from_rel_node(child.clone()) + .unwrap() + .get_column_refs() + }); + children.collect_vec().concat() + } + } impl OptRelNode for Expr { diff --git a/optd-datafusion-repr/src/rules.rs b/optd-datafusion-repr/src/rules.rs index f899dfe7..fe8b3ce5 100644 --- a/optd-datafusion-repr/src/rules.rs +++ b/optd-datafusion-repr/src/rules.rs @@ -9,7 +9,9 @@ mod macros; mod physical; // pub use filter_join::FilterJoinPullUpRule; -pub use projection_pushdown::ProjectMergeRule; +pub use projection_pushdown::{ + ProjectMergeRule, ProjectFilterTransposeRule +}; pub use eliminate_duplicated_expr::{ EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, }; diff --git a/optd-datafusion-repr/src/rules/projection_pushdown.rs b/optd-datafusion-repr/src/rules/projection_pushdown.rs index 84a1de41..d4ac0b29 100644 --- a/optd-datafusion-repr/src/rules/projection_pushdown.rs +++ b/optd-datafusion-repr/src/rules/projection_pushdown.rs @@ -5,12 +5,19 @@ use optd_core::rules::{Rule, RuleMatcher}; use optd_core::{optimizer::Optimizer, rel_node::RelNode}; use crate::plan_nodes::{ - BetweenExpr, ColumnRefExpr, ExprList, LikeExpr, LogOpExpr, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode + BetweenExpr, ColumnRefExpr, Expr, ExprList, LikeExpr, LogOpExpr, LogicalFilter, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode }; use crate::properties::column_ref::ColumnRef; +use crate::properties::schema::SchemaPropertyBuilder; use super::macros::define_rule; +fn merge_exprs(first: ExprList, second: ExprList) -> ExprList { + let mut res_vec = first.to_vec(); + res_vec.extend(second.to_vec()); + ExprList::new(res_vec) +} + // projects away aggregate calls that are not used // TODO define_rule!( @@ -38,8 +45,8 @@ fn apply_projection_agg_pushdown( // only keeping necessary columns (proj node exprs + filter col exprs)) // TODO define_rule!( - ProjectFilterPushDown, - apply_projection_filter_pushdown, + ProjectFilterTransposeRule, + apply_projection_filter_transpose, ( Projection, (Filter, child, [cond]), @@ -47,46 +54,48 @@ define_rule!( ) ); -fn apply_projection_filter_pushdown( +fn apply_projection_filter_transpose( _optimizer: &impl Optimizer, - ProjectFilterPushDownPicks { child, cond, exprs }: ProjectFilterPushDownPicks, + ProjectFilterTransposeRulePicks { child, cond, exprs }: ProjectFilterTransposeRulePicks, ) -> Vec> { // get columns out of cond - let cond_cols: Vec = match cond.typ { - OptRelNodeTyp::LogOp(_) => { - // make a queue of some kind - - vec![] - }, - OptRelNodeTyp::Between => { - let between_expr = BetweenExpr::from_rel_node(Arc::new(cond)).unwrap(); - let expr = between_expr.child(); - if expr.typ() != OptRelNodeTyp::ColumnRef { - vec![] - } else { - let col = ColumnRefExpr::from_rel_node(expr.into_rel_node()).unwrap(); - vec![col] - } - }, - OptRelNodeTyp::Like => { - let like_expr = LikeExpr::from_rel_node(Arc::new(cond)).unwrap(); - let expr = like_expr.child(); - if expr.typ() != OptRelNodeTyp::ColumnRef { - vec![] - } else { - let col = ColumnRefExpr::from_rel_node(expr.into_rel_node()).unwrap(); - vec![col] - } - }, - _ => vec![] + let exprs = ExprList::from_rel_node(exprs.into()).unwrap(); + let exprs_vec = exprs.clone().to_vec(); + let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); + let cond_col_refs = cond_as_expr.get_column_refs(); + let mut dedup_cond_col_refs = Vec::new(); + + for i in 0..cond_col_refs.len() { + if !exprs_vec.contains(&cond_col_refs[i]) { + dedup_cond_col_refs.push(cond_col_refs[i].clone()); + }; }; - if cond_cols.is_empty() { + + let dedup_cond_col_refs = ExprList::new(dedup_cond_col_refs); + + let bottom_proj_exprs: ExprList = merge_exprs(exprs.clone(), dedup_cond_col_refs.clone()); + let Some(mapping) = LogicalProjection::compute_column_mapping(&bottom_proj_exprs) else { return vec![]; - } + }; + let child_schema_len = _optimizer + .get_property::(child.clone().into(), 0) + .len(); + let child = PlanNode::from_group(child.into()); + let new_filter_cond: Expr = mapping.rewrite_condition(cond_as_expr.clone(), child_schema_len); + let bottom_proj_node = LogicalProjection::new(child, bottom_proj_exprs); + let new_filter_node = LogicalFilter::new(bottom_proj_node.into_plan_node(), new_filter_cond); + + if dedup_cond_col_refs.is_empty() { + // can push proj past filter and remove top proj node + return vec![new_filter_node.into_rel_node().as_ref().clone()]; + } + // have column ref expressions of cond cols // bottom-most projection will have proj cols + filter cols as a set - vec![] + let top_proj_exprs = mapping.rewrite_projection(&exprs); + let top_proj_node = LogicalProjection::new(new_filter_node.into_plan_node(), top_proj_exprs); + vec![top_proj_node.into_rel_node().as_ref().clone()] } From 8a28b2c438abd9ad50f43a8a543ecc1d7f9990d6 Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Fri, 29 Mar 2024 14:20:58 -0400 Subject: [PATCH 42/61] working project filter transpose rule --- optd-datafusion-repr/src/lib.rs | 4 ++-- optd-datafusion-repr/src/plan_nodes/projection.rs | 13 +++++++++++++ .../src/rules/projection_pushdown.rs | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index a20c2d89..265a2973 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -100,10 +100,10 @@ impl DatafusionOptimizer { for rule in rules { rule_wrappers.push(RuleWrapper::new_cascades(rule)); } - rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( + rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( ProjectMergeRule::new(), ))); - rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( + rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( ProjectFilterTransposeRule::new(), ))); // add all filter pushdown rules as heuristic rules diff --git a/optd-datafusion-repr/src/plan_nodes/projection.rs b/optd-datafusion-repr/src/plan_nodes/projection.rs index 52699afa..c31c74c5 100644 --- a/optd-datafusion-repr/src/plan_nodes/projection.rs +++ b/optd-datafusion-repr/src/plan_nodes/projection.rs @@ -81,6 +81,19 @@ impl ProjectionMapping { }; ExprList::new(new_projection_exprs) } + + /// Reverse rewrites all ColumnRefs in an ExprList to what the projection + /// node is rewriting. E.g. if Projection is A -> B, B will be + /// rewritten as A + pub fn reverse_rewrite_projection(&self, exprs: &ExprList) -> ExprList { + let mut new_projection_exprs = Vec::new(); + let exprs = exprs.to_vec(); + for i in 0..exprs.len() { + let col: Expr = ColumnRefExpr::new(self.projection_col_refers_to(i).clone()).into_expr(); + new_projection_exprs.push(col); + }; + ExprList::new(new_projection_exprs) + } } impl LogicalProjection { diff --git a/optd-datafusion-repr/src/rules/projection_pushdown.rs b/optd-datafusion-repr/src/rules/projection_pushdown.rs index d4ac0b29..78476050 100644 --- a/optd-datafusion-repr/src/rules/projection_pushdown.rs +++ b/optd-datafusion-repr/src/rules/projection_pushdown.rs @@ -93,7 +93,7 @@ fn apply_projection_filter_transpose( // have column ref expressions of cond cols // bottom-most projection will have proj cols + filter cols as a set - let top_proj_exprs = mapping.rewrite_projection(&exprs); + let top_proj_exprs = mapping.reverse_rewrite_projection(&exprs); let top_proj_node = LogicalProjection::new(new_filter_node.into_plan_node(), top_proj_exprs); vec![top_proj_node.into_rel_node().as_ref().clone()] } From 5e9b482ad8a0ab6dd08b68bb61eed9d77c408c7e Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 29 Mar 2024 21:06:25 -0400 Subject: [PATCH 43/61] Address most comments --- optd-datafusion-repr/src/lib.rs | 11 +++----- optd-datafusion-repr/src/plan_nodes/join.rs | 14 +++++------ .../src/plan_nodes/projection.rs | 21 ++++++++++++++-- .../src/rules/filter_pushdown.rs | 25 +++++++++++++------ 4 files changed, 47 insertions(+), 24 deletions(-) diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index c0670e62..3a149bc3 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -23,17 +23,14 @@ use properties::{ }; use rules::{ EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, EliminateFilterRule, - EliminateJoinRule, EliminateLimitRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, - PhysicalConversionRule, ProjectionPullUpJoin, SimplifyFilterRule, SimplifyJoinCondRule, + EliminateJoinRule, EliminateLimitRule, FilterAggTransposeRule, FilterCrossJoinTransposeRule, + FilterInnerJoinTransposeRule, FilterMergeRule, FilterProjectTransposeRule, + FilterSortTransposeRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, PhysicalConversionRule, + ProjectionPullUpJoin, SimplifyFilterRule, SimplifyJoinCondRule, }; pub use optd_core::rel_node::Value; -use crate::rules::{ - FilterAggTransposeRule, FilterCrossJoinTransposeRule, FilterInnerJoinTransposeRule, - FilterMergeRule, FilterProjectTransposeRule, FilterSortTransposeRule, -}; - pub mod cost; mod explain; pub mod plan_nodes; diff --git a/optd-datafusion-repr/src/plan_nodes/join.rs b/optd-datafusion-repr/src/plan_nodes/join.rs index 1bc692b8..28ed439b 100644 --- a/optd-datafusion-repr/src/plan_nodes/join.rs +++ b/optd-datafusion-repr/src/plan_nodes/join.rs @@ -64,18 +64,18 @@ define_plan_node!( ); impl LogicalJoin { - /// Takes in left/right schema sizes, and maps an index to be as if it were - /// pushed down to the left or right side of a join accordingly. + /// Takes in left/right schema sizes, and maps a column index to be as if it + /// were pushed down to the left or right side of a join accordingly. pub fn map_through_join( - index: usize, + col_idx: usize, left_schema_size: usize, right_schema_size: usize, ) -> usize { - assert!(index < left_schema_size + right_schema_size); - if index < left_schema_size { - index + assert!(col_idx < left_schema_size + right_schema_size); + if col_idx < left_schema_size { + col_idx } else { - index - left_schema_size + col_idx - left_schema_size } } } diff --git a/optd-datafusion-repr/src/plan_nodes/projection.rs b/optd-datafusion-repr/src/plan_nodes/projection.rs index a7bb872d..61168db0 100644 --- a/optd-datafusion-repr/src/plan_nodes/projection.rs +++ b/optd-datafusion-repr/src/plan_nodes/projection.rs @@ -27,6 +27,18 @@ define_plan_node!( ] ); +/// This struct holds the mapping from original columns to projected columns. +/// +/// # Example +/// With the following plan: +/// | Filter (#0 < 5) +/// | +/// |-| Projection [#2, #3] +/// |- Scan [#0, #1, #2, #3] +/// +/// The computed projection mapping is: +/// #2 -> #0 +/// #3 -> #1 pub struct ProjectionMapping { forward: Vec, _backward: Vec>, @@ -55,8 +67,13 @@ impl ProjectionMapping { self._backward[col] } - /// Recursively rewrites all ColumnRefs in an Expr to what the projection - /// node is rewriting. E.g. if Projection is A -> B, B will be rewritten as A + /// Recursively rewrites all ColumnRefs in an Expr to *undo* the projection + /// condition. You might want to do this if you are pushing something + /// through a projection, or pulling a projection up. + /// + /// # Example + /// If we have a projection node, mapping column A to column B (A -> B) + /// All B's in `cond` will be rewritten as A. pub fn rewrite_condition(&self, cond: Expr, child_schema_len: usize) -> Expr { let proj_schema_size = self.forward.len(); cond.rewrite_column_refs(&|idx| { diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index a2f3400e..6189d2d7 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -46,6 +46,9 @@ enum JoinCondDependency { None, } +/// Given a list of expressions (presumably a flattened tree), determine +/// if the expression is dependent on the left child, the right child, both, +/// or neither, by analyzing which columnrefs are used in the expressions. fn determine_join_cond_dep( children: &Vec, left_schema_size: usize, @@ -410,7 +413,8 @@ fn apply_filter_agg_transpose( let new_child = if !push_conds.is_empty() { LogicalFilter::new( child, - LogOpExpr::new(LogOpType::And, ExprList::new(push_conds)).into_expr(), + LogOpExpr::new_flattened_nested_logical(LogOpType::And, ExprList::new(push_conds)) + .into_expr(), ) .into_plan_node() } else { @@ -419,13 +423,18 @@ fn apply_filter_agg_transpose( let new_agg = LogicalAgg::new(new_child, exprs, groups); - let new_filter = LogicalFilter::new( - new_agg.into_plan_node(), - LogOpExpr::new(LogOpType::And, ExprList::new(keep_conds)).into_expr(), - ) - .into_rel_node() - .as_ref() - .clone(); + let new_filter = if !keep_conds.is_empty() { + LogicalFilter::new( + new_agg.into_plan_node(), + LogOpExpr::new_flattened_nested_logical(LogOpType::And, ExprList::new(keep_conds)) + .into_expr(), + ) + .into_rel_node() + .as_ref() + .clone() + } else { + new_agg.into_rel_node().as_ref().clone() + }; vec![new_filter] } From f04e4cb1ad053dc5d5a24bbef5780f5922d43afe Mon Sep 17 00:00:00 2001 From: AveryQi115 Date: Mon, 1 Apr 2024 21:00:54 -0400 Subject: [PATCH 44/61] fix for heuristic rule wrapper Signed-off-by: AveryQi115 --- optd-core/src/cascades/memo.rs | 16 +++++++++++----- optd-core/src/cascades/tasks/apply_rule.rs | 10 +++++----- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/optd-core/src/cascades/memo.rs b/optd-core/src/cascades/memo.rs index 6c858a32..1fdf344a 100644 --- a/optd-core/src/cascades/memo.rs +++ b/optd-core/src/cascades/memo.rs @@ -285,13 +285,19 @@ impl Memo { }; // if the new expr already in the memo table, merge the group and remove old expr - if let Some(&expr_id) = self.expr_node_to_expr_id.get(&memo_node) { - let group_id = self.get_group_id_of_expr_id(expr_id); + if let Some(&new_expr_id) = self.expr_node_to_expr_id.get(&memo_node) { + if new_expr_id == expr_id { + // This is not acceptable, as it means the expr returned by a heuristic rule is exactly + // the same as the original expr, which should not happen + // TODO: we can silently ignore this case without marking the original one as a deadend + // But the rule creators should follow the definition of the heuristic rule + // and return an empty vec if their rule does not do the real transformation + unreachable!("replace_group_expr: you're replacing the old expr with the same expr, please check your rules registered as heuristic + and make sure if it does not do any transformation, it should return an empty vec!"); + } + let group_id = self.get_group_id_of_expr_id(new_expr_id); let group_id = self.get_reduced_group_id(group_id); self.merge_group_inner(replace_group_id, group_id); - - // TODO: instead of remove this expr from the old group, - // we mark the expr as all rules have been fired to make it a dead end return false; } diff --git a/optd-core/src/cascades/tasks/apply_rule.rs b/optd-core/src/cascades/tasks/apply_rule.rs index 1483378b..d73e0bdc 100644 --- a/optd-core/src/cascades/tasks/apply_rule.rs +++ b/optd-core/src/cascades/tasks/apply_rule.rs @@ -233,11 +233,11 @@ impl Task for ApplyRuleTask { trace!(event = "apply_rule replace", expr_id = %self.expr_id, rule_id = %self.rule_id); - // rules registed as heuristics are always logical, exploring its children - tasks.push( - Box::new(OptimizeExpressionTask::new(self.expr_id, self.exploring)) - as Box>, - ); + // the expr returned by heuristic rule is a brand new one + // so there's no optimizeExpressionTask for it in the original task list + // we should set exploring as false to both envoke tranform rule and impl rule for it + tasks.push(Box::new(OptimizeExpressionTask::new(self.expr_id, false)) + as Box>); } continue; } From c057b1eb8f26a1b6fa786909addf3b16947a65e6 Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Tue, 2 Apr 2024 22:24:23 -0400 Subject: [PATCH 45/61] working project filter transpose --- optd-datafusion-repr/src/lib.rs | 6 +- .../src/plan_nodes/projection.rs | 22 +- .../src/rules/projection_pushdown.rs | 14 +- optd-sqlplannertest/tests/join_enumerate.yml | 2 +- .../tests/old_tpch.planner.sql | 2106 +++++++++++++++++ optd-sqlplannertest/tests/tpch.planner.sql | 142 +- 6 files changed, 2210 insertions(+), 82 deletions(-) create mode 100644 optd-sqlplannertest/tests/old_tpch.planner.sql diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 265a2973..bac05835 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -100,9 +100,9 @@ impl DatafusionOptimizer { for rule in rules { rule_wrappers.push(RuleWrapper::new_cascades(rule)); } - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - ProjectMergeRule::new(), - ))); + // rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( + // ProjectMergeRule::new(), + // ))); rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( ProjectFilterTransposeRule::new(), ))); diff --git a/optd-datafusion-repr/src/plan_nodes/projection.rs b/optd-datafusion-repr/src/plan_nodes/projection.rs index c31c74c5..1273775e 100644 --- a/optd-datafusion-repr/src/plan_nodes/projection.rs +++ b/optd-datafusion-repr/src/plan_nodes/projection.rs @@ -27,6 +27,7 @@ define_plan_node!( ] ); +#[derive(Clone, Debug)] pub struct ProjectionMapping { forward: Vec, _backward: Vec>, @@ -69,17 +70,34 @@ impl ProjectionMapping { .unwrap() } + /// Recursively rewrites all ColumnRefs in an Expr to what the projection + /// node is rewriting. E.g. if Projection is A -> B, B will be rewritten as A + pub fn reverse_rewrite_condition(&self, cond: Expr) -> Expr { + let proj_schema_size = self._backward.len(); + cond.rewrite_column_refs(&|idx| { + Some(if idx < proj_schema_size { + self._original_col_maps_to(idx).unwrap() + } else { + panic!("exprs do not map to projection"); + }) + }) + .unwrap() + } + /// Rewrites all ColumnRefs in an ExprList to what the projection /// node is rewriting. E.g. if Projection is A -> B, B will be /// rewritten as A - pub fn rewrite_projection(&self, exprs: &ExprList) -> ExprList { + pub fn rewrite_projection(&self, exprs: &ExprList) -> Option { + if exprs.len() == 0 { + return None; + } let mut new_projection_exprs = Vec::new(); let exprs = exprs.to_vec(); for i in &self.forward { let col: Expr = exprs[*i].clone(); new_projection_exprs.push(col); }; - ExprList::new(new_projection_exprs) + Some(ExprList::new(new_projection_exprs)) } /// Reverse rewrites all ColumnRefs in an ExprList to what the projection diff --git a/optd-datafusion-repr/src/rules/projection_pushdown.rs b/optd-datafusion-repr/src/rules/projection_pushdown.rs index 78476050..5857f1e7 100644 --- a/optd-datafusion-repr/src/rules/projection_pushdown.rs +++ b/optd-datafusion-repr/src/rules/projection_pushdown.rs @@ -78,11 +78,8 @@ fn apply_projection_filter_transpose( return vec![]; }; - let child_schema_len = _optimizer - .get_property::(child.clone().into(), 0) - .len(); let child = PlanNode::from_group(child.into()); - let new_filter_cond: Expr = mapping.rewrite_condition(cond_as_expr.clone(), child_schema_len); + let new_filter_cond: Expr = mapping.reverse_rewrite_condition(cond_as_expr.clone()); let bottom_proj_node = LogicalProjection::new(child, bottom_proj_exprs); let new_filter_node = LogicalFilter::new(bottom_proj_node.into_plan_node(), new_filter_cond); @@ -131,7 +128,14 @@ fn apply_projection_merge( return vec![]; }; - let res_exprs = mapping.rewrite_projection(&exprs2); + let Some(res_exprs) = mapping.rewrite_projection(&exprs2) else { + let node: LogicalProjection = LogicalProjection::new( + child, + exprs1, + ); + println!("reached something that should never happen!!!!"); + return vec![node.into_rel_node().as_ref().clone()]; + }; let node: LogicalProjection = LogicalProjection::new( child, diff --git a/optd-sqlplannertest/tests/join_enumerate.yml b/optd-sqlplannertest/tests/join_enumerate.yml index 133c5828..96d36e5d 100644 --- a/optd-sqlplannertest/tests/join_enumerate.yml +++ b/optd-sqlplannertest/tests/join_enumerate.yml @@ -11,7 +11,7 @@ select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2; desc: Test whether the optimizer enumerates all join orders. tasks: - - explain[with_logcial]:logical_join_orders + - explain[with_logical]:logical_join_orders - execute[with_logical] - sql: | select * from t1, t2, t3 where t1v1 = t2v1 and t1v2 = t3v2; diff --git a/optd-sqlplannertest/tests/old_tpch.planner.sql b/optd-sqlplannertest/tests/old_tpch.planner.sql new file mode 100644 index 00000000..4f3c88ae --- /dev/null +++ b/optd-sqlplannertest/tests/old_tpch.planner.sql @@ -0,0 +1,2106 @@ +-- TPC-H schema +CREATE TABLE NATION ( + N_NATIONKEY INT NOT NULL, + N_NAME CHAR(25) NOT NULL, + N_REGIONKEY INT NOT NULL, + N_COMMENT VARCHAR(152) +); + +CREATE TABLE REGION ( + R_REGIONKEY INT NOT NULL, + R_NAME CHAR(25) NOT NULL, + R_COMMENT VARCHAR(152) +); + +CREATE TABLE PART ( + P_PARTKEY INT NOT NULL, + P_NAME VARCHAR(55) NOT NULL, + P_MFGR CHAR(25) NOT NULL, + P_BRAND CHAR(10) NOT NULL, + P_TYPE VARCHAR(25) NOT NULL, + P_SIZE INT NOT NULL, + P_CONTAINER CHAR(10) NOT NULL, + P_RETAILPRICE DECIMAL(15,2) NOT NULL, + P_COMMENT VARCHAR(23) NOT NULL +); + +CREATE TABLE SUPPLIER ( + S_SUPPKEY INT NOT NULL, + S_NAME CHAR(25) NOT NULL, + S_ADDRESS VARCHAR(40) NOT NULL, + S_NATIONKEY INT NOT NULL, + S_PHONE CHAR(15) NOT NULL, + S_ACCTBAL DECIMAL(15,2) NOT NULL, + S_COMMENT VARCHAR(101) NOT NULL +); + +CREATE TABLE PARTSUPP ( + PS_PARTKEY INT NOT NULL, + PS_SUPPKEY INT NOT NULL, + PS_AVAILQTY INT NOT NULL, + PS_SUPPLYCOST DECIMAL(15,2) NOT NULL, + PS_COMMENT VARCHAR(199) NOT NULL +); + +CREATE TABLE CUSTOMER ( + C_CUSTKEY INT NOT NULL, + C_NAME VARCHAR(25) NOT NULL, + C_ADDRESS VARCHAR(40) NOT NULL, + C_NATIONKEY INT NOT NULL, + C_PHONE CHAR(15) NOT NULL, + C_ACCTBAL DECIMAL(15,2) NOT NULL, + C_MKTSEGMENT CHAR(10) NOT NULL, + C_COMMENT VARCHAR(117) NOT NULL +); + +CREATE TABLE ORDERS ( + O_ORDERKEY INT NOT NULL, + O_CUSTKEY INT NOT NULL, + O_ORDERSTATUS CHAR(1) NOT NULL, + O_TOTALPRICE DECIMAL(15,2) NOT NULL, + O_ORDERDATE DATE NOT NULL, + O_ORDERPRIORITY CHAR(15) NOT NULL, + O_CLERK CHAR(15) NOT NULL, + O_SHIPPRIORITY INT NOT NULL, + O_COMMENT VARCHAR(79) NOT NULL +); + +CREATE TABLE LINEITEM ( + L_ORDERKEY INT NOT NULL, + L_PARTKEY INT NOT NULL, + L_SUPPKEY INT NOT NULL, + L_LINENUMBER INT NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL +); + +/* + +*/ + +-- TPC-H Q1 +SELECT + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +FROM + lineitem +WHERE + l_shipdate <= date '1998-12-01' - interval '90' day +GROUP BY + l_returnflag, l_linestatus +ORDER BY + l_returnflag, l_linestatus; + +/* +LogicalSort +├── exprs: +│ ┌── SortOrder { order: Asc } +│ │ └── #0 +│ └── SortOrder { order: Asc } +│ └── #1 +└── LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8, #9 ] } + └── LogicalAgg + ├── exprs: + │ ┌── Agg(Sum) + │ │ └── [ #4 ] + │ ├── Agg(Sum) + │ │ └── [ #5 ] + │ ├── Agg(Sum) + │ │ └── Mul + │ │ ├── #5 + │ │ └── Sub + │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ │ └── #6 + │ ├── Agg(Sum) + │ │ └── Mul + │ │ ├── Mul + │ │ │ ├── #5 + │ │ │ └── Sub + │ │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ │ │ └── #6 + │ │ └── Add + │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ │ └── #7 + │ ├── Agg(Avg) + │ │ └── [ #4 ] + │ ├── Agg(Avg) + │ │ └── [ #5 ] + │ ├── Agg(Avg) + │ │ └── [ #6 ] + │ └── Agg(Count) + │ └── [ 1 ] + ├── groups: [ #8, #9 ] + └── LogicalFilter + ├── cond:Leq + │ ├── #10 + │ └── Sub + │ ├── Cast { cast_to: Date32, expr: "1998-12-01" } + │ └── INTERVAL_MONTH_DAY_NANO (0, 90, 0) + └── LogicalScan { table: lineitem } +PhysicalSort +├── exprs: +│ ┌── SortOrder { order: Asc } +│ │ └── #0 +│ └── SortOrder { order: Asc } +│ └── #1 +└── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8, #9 ] } + └── PhysicalAgg + ├── aggrs: + │ ┌── Agg(Sum) + │ │ └── [ #4 ] + │ ├── Agg(Sum) + │ │ └── [ #5 ] + │ ├── Agg(Sum) + │ │ └── Mul + │ │ ├── #5 + │ │ └── Sub + │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ │ └── #6 + │ ├── Agg(Sum) + │ │ └── Mul + │ │ ├── Mul + │ │ │ ├── #5 + │ │ │ └── Sub + │ │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ │ │ └── #6 + │ │ └── Add + │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ │ └── #7 + │ ├── Agg(Avg) + │ │ └── [ #4 ] + │ ├── Agg(Avg) + │ │ └── [ #5 ] + │ ├── Agg(Avg) + │ │ └── [ #6 ] + │ └── Agg(Count) + │ └── [ 1 ] + ├── groups: [ #8, #9 ] + └── PhysicalFilter + ├── cond:Leq + │ ├── #10 + │ └── Sub + │ ├── Cast { cast_to: Date32, expr: "1998-12-01" } + │ └── INTERVAL_MONTH_DAY_NANO (0, 90, 0) + └── PhysicalScan { table: lineitem } +*/ + +-- TPC-H Q2 +select + s_acctbal, + s_name, + n_name, + p_partkey, + p_mfgr, + s_address, + s_phone, + s_comment +from + part, + supplier, + partsupp, + nation, + region +where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey +and p_size = 4 +and p_type like '%TIN' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'AFRICA' + and ps_supplycost = ( + select + min(ps_supplycost) + from + partsupp, + supplier, + nation, + region + where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'AFRICA' + ) +order by + s_acctbal desc, + n_name, + s_name, + p_partkey +limit 100; + +/* +LogicalLimit { skip: 0, fetch: 100 } +└── LogicalSort + ├── exprs: + │ ┌── SortOrder { order: Desc } + │ │ └── #0 + │ ├── SortOrder { order: Asc } + │ │ └── #2 + │ ├── SortOrder { order: Asc } + │ │ └── #1 + │ └── SortOrder { order: Asc } + │ └── #3 + └── LogicalProjection { exprs: [ #5, #2, #8, #0, #1, #3, #4, #6 ] } + └── LogicalJoin + ├── join_type: Inner + ├── cond:And + │ ├── Eq + │ │ ├── #0 + │ │ └── #10 + │ └── Eq + │ ├── #7 + │ └── #9 + ├── LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8 ] } + │ └── LogicalJoin + │ ├── join_type: Inner + │ ├── cond:Eq + │ │ ├── #9 + │ │ └── #10 + │ ├── LogicalProjection { exprs: [ #0, #1, #2, #3, #5, #6, #7, #8, #10, #11 ] } + │ │ └── LogicalJoin + │ │ ├── join_type: Inner + │ │ ├── cond:Eq + │ │ │ ├── #4 + │ │ │ └── #9 + │ │ ├── LogicalProjection { exprs: [ #0, #1, #5, #6, #7, #8, #9, #10, #3 ] } + │ │ │ └── LogicalJoin + │ │ │ ├── join_type: Inner + │ │ │ ├── cond:Eq + │ │ │ │ ├── #2 + │ │ │ │ └── #4 + │ │ │ ├── LogicalProjection { exprs: [ #0, #1, #3, #4 ] } + │ │ │ │ └── LogicalJoin + │ │ │ │ ├── join_type: Inner + │ │ │ │ ├── cond:Eq + │ │ │ │ │ ├── #0 + │ │ │ │ │ └── #2 + │ │ │ │ ├── LogicalProjection { exprs: [ #0, #1 ] } + │ │ │ │ │ └── LogicalFilter + │ │ │ │ │ ├── cond:And + │ │ │ │ │ │ ├── Eq + │ │ │ │ │ │ │ ├── #3 + │ │ │ │ │ │ │ └── 4 + │ │ │ │ │ │ └── Like { expr: #2, pattern: "%TIN", negated: false, case_insensitive: false } + │ │ │ │ │ └── LogicalProjection { exprs: [ #0, #2, #4, #5 ] } + │ │ │ │ │ └── LogicalScan { table: part } + │ │ │ │ └── LogicalProjection { exprs: [ #0, #1, #3 ] } + │ │ │ │ └── LogicalScan { table: partsupp } + │ │ │ └── LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6 ] } + │ │ │ └── LogicalScan { table: supplier } + │ │ └── LogicalProjection { exprs: [ #0, #1, #2 ] } + │ │ └── LogicalScan { table: nation } + │ └── LogicalProjection { exprs: [ #0 ] } + │ └── LogicalFilter + │ ├── cond:Eq + │ │ ├── #1 + │ │ └── "AFRICA" + │ └── LogicalProjection { exprs: [ #0, #1 ] } + │ └── LogicalScan { table: region } + └── LogicalProjection { exprs: [ #1, #0 ] } + └── LogicalAgg + ├── exprs:Agg(Min) + │ └── [ #1 ] + ├── groups: [ #0 ] + └── LogicalProjection { exprs: [ #0, #1 ] } + └── LogicalJoin + ├── join_type: Inner + ├── cond:Eq + │ ├── #2 + │ └── #3 + ├── LogicalProjection { exprs: [ #0, #1, #4 ] } + │ └── LogicalJoin + │ ├── join_type: Inner + │ ├── cond:Eq + │ │ ├── #2 + │ │ └── #3 + │ ├── LogicalProjection { exprs: [ #0, #2, #4 ] } + │ │ └── LogicalJoin + │ │ ├── join_type: Inner + │ │ ├── cond:Eq + │ │ │ ├── #1 + │ │ │ └── #3 + │ │ ├── LogicalProjection { exprs: [ #0, #1, #3 ] } + │ │ │ └── LogicalScan { table: partsupp } + │ │ └── LogicalProjection { exprs: [ #0, #3 ] } + │ │ └── LogicalScan { table: supplier } + │ └── LogicalProjection { exprs: [ #0, #2 ] } + │ └── LogicalScan { table: nation } + └── LogicalProjection { exprs: [ #0 ] } + └── LogicalFilter + ├── cond:Eq + │ ├── #1 + │ └── "AFRICA" + └── LogicalProjection { exprs: [ #0, #1 ] } + └── LogicalScan { table: region } +PhysicalLimit { skip: 0, fetch: 100 } +└── PhysicalSort + ├── exprs: + │ ┌── SortOrder { order: Desc } + │ │ └── #0 + │ ├── SortOrder { order: Asc } + │ │ └── #2 + │ ├── SortOrder { order: Asc } + │ │ └── #1 + │ └── SortOrder { order: Asc } + │ └── #3 + └── PhysicalProjection { exprs: [ #5, #2, #8, #0, #1, #3, #4, #6 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #7 ], right_keys: [ #1, #0 ] } + ├── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8 ] } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #9 ], right_keys: [ #0 ] } + │ ├── PhysicalProjection { exprs: [ #0, #1, #2, #3, #5, #6, #7, #8, #10, #11 ] } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #0 ] } + │ │ ├── PhysicalProjection { exprs: [ #0, #1, #5, #6, #7, #8, #9, #10, #3 ] } + │ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } + │ │ │ ├── PhysicalProjection { exprs: [ #0, #1, #3, #4 ] } + │ │ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ │ │ │ ├── PhysicalProjection { exprs: [ #0, #1 ] } + │ │ │ │ │ └── PhysicalProjection { exprs: [ #0, #2, #4, #5 ] } + │ │ │ │ │ └── PhysicalFilter + │ │ │ │ │ ├── cond:And + │ │ │ │ │ │ ├── Eq + │ │ │ │ │ │ │ ├── #5 + │ │ │ │ │ │ │ └── 4 + │ │ │ │ │ │ └── Like { expr: #4, pattern: "%TIN", negated: false, case_insensitive: false } + │ │ │ │ │ └── PhysicalScan { table: part } + │ │ │ │ └── PhysicalProjection { exprs: [ #0, #1, #3 ] } + │ │ │ │ └── PhysicalScan { table: partsupp } + │ │ │ └── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6 ] } + │ │ │ └── PhysicalScan { table: supplier } + │ │ └── PhysicalProjection { exprs: [ #0, #1, #2 ] } + │ │ └── PhysicalScan { table: nation } + │ └── PhysicalProjection { exprs: [ #0 ] } + │ └── PhysicalProjection { exprs: [ #0, #1 ] } + │ └── PhysicalFilter + │ ├── cond:Eq + │ │ ├── #1 + │ │ └── "AFRICA" + │ └── PhysicalScan { table: region } + └── PhysicalProjection { exprs: [ #1, #0 ] } + └── PhysicalAgg + ├── aggrs:Agg(Min) + │ └── [ #1 ] + ├── groups: [ #0 ] + └── PhysicalProjection { exprs: [ #0, #1 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } + ├── PhysicalProjection { exprs: [ #0, #1, #4 ] } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } + │ ├── PhysicalProjection { exprs: [ #0, #2, #4 ] } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } + │ │ ├── PhysicalProjection { exprs: [ #0, #1, #3 ] } + │ │ │ └── PhysicalScan { table: partsupp } + │ │ └── PhysicalProjection { exprs: [ #0, #3 ] } + │ │ └── PhysicalScan { table: supplier } + │ └── PhysicalProjection { exprs: [ #0, #2 ] } + │ └── PhysicalScan { table: nation } + └── PhysicalProjection { exprs: [ #0 ] } + └── PhysicalProjection { exprs: [ #0, #1 ] } + └── PhysicalFilter + ├── cond:Eq + │ ├── #1 + │ └── "AFRICA" + └── PhysicalScan { table: region } +*/ + +-- TPC-H Q3 +SELECT + l_orderkey, + SUM(l_extendedprice * (1 - l_discount)) AS revenue, + o_orderdate, + o_shippriority +FROM + customer, + orders, + lineitem +WHERE + c_mktsegment = 'FURNITURE' + AND c_custkey = o_custkey + AND l_orderkey = o_orderkey + AND o_orderdate < DATE '1995-03-29' + AND l_shipdate > DATE '1995-03-29' +GROUP BY + l_orderkey, + o_orderdate, + o_shippriority +ORDER BY + revenue DESC, + o_orderdate LIMIT 10; + +/* +LogicalLimit { skip: 0, fetch: 10 } +└── LogicalSort + ├── exprs: + │ ┌── SortOrder { order: Desc } + │ │ └── #1 + │ └── SortOrder { order: Asc } + │ └── #2 + └── LogicalProjection { exprs: [ #0, #3, #1, #2 ] } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── Mul + │ ├── #3 + │ └── Sub + │ ├── 1 + │ └── #4 + ├── groups: [ #2, #0, #1 ] + └── LogicalProjection { exprs: [ #1, #2, #3, #4, #5 ] } + └── LogicalJoin + ├── join_type: Inner + ├── cond:Eq + │ ├── #0 + │ └── #3 + ├── LogicalProjection { exprs: [ #1, #3, #4 ] } + │ └── LogicalJoin + │ ├── join_type: Inner + │ ├── cond:Eq + │ │ ├── #0 + │ │ └── #2 + │ ├── LogicalProjection { exprs: [ #0 ] } + │ │ └── LogicalFilter + │ │ ├── cond:Eq + │ │ │ ├── #1 + │ │ │ └── "FURNITURE" + │ │ └── LogicalProjection { exprs: [ #0, #6 ] } + │ │ └── LogicalScan { table: customer } + │ └── LogicalFilter + │ ├── cond:Lt + │ │ ├── #2 + │ │ └── 9218 + │ └── LogicalProjection { exprs: [ #0, #1, #4, #7 ] } + │ └── LogicalScan { table: orders } + └── LogicalProjection { exprs: [ #0, #1, #2 ] } + └── LogicalFilter + ├── cond:Gt + │ ├── #3 + │ └── 9218 + └── LogicalProjection { exprs: [ #0, #5, #6, #10 ] } + └── LogicalScan { table: lineitem } +PhysicalLimit { skip: 0, fetch: 10 } +└── PhysicalSort + ├── exprs: + │ ┌── SortOrder { order: Desc } + │ │ └── #1 + │ └── SortOrder { order: Asc } + │ └── #2 + └── PhysicalProjection { exprs: [ #0, #3, #1, #2 ] } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── Mul + │ ├── #3 + │ └── Sub + │ ├── 1 + │ └── #4 + ├── groups: [ #2, #0, #1 ] + └── PhysicalProjection { exprs: [ #1, #2, #3, #4, #5 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + ├── PhysicalProjection { exprs: [ #1, #3, #4 ] } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } + │ ├── PhysicalProjection { exprs: [ #0 ] } + │ │ └── PhysicalProjection { exprs: [ #0, #6 ] } + │ │ └── PhysicalFilter + │ │ ├── cond:Eq + │ │ │ ├── #6 + │ │ │ └── "FURNITURE" + │ │ └── PhysicalScan { table: customer } + │ └── PhysicalProjection { exprs: [ #0, #1, #4, #7 ] } + │ └── PhysicalFilter + │ ├── cond:Lt + │ │ ├── #4 + │ │ └── 9218 + │ └── PhysicalScan { table: orders } + └── PhysicalProjection { exprs: [ #0, #1, #2 ] } + └── PhysicalProjection { exprs: [ #0, #5, #6, #10 ] } + └── PhysicalFilter + ├── cond:Gt + │ ├── #10 + │ └── 9218 + └── PhysicalScan { table: lineitem } +*/ + +-- TPC-H Q5 +SELECT + n_name AS nation, + SUM(l_extendedprice * (1 - l_discount)) AS revenue +FROM + customer, + orders, + lineitem, + supplier, + nation, + region +WHERE + c_custkey = o_custkey + AND l_orderkey = o_orderkey + AND l_suppkey = s_suppkey + AND c_nationkey = s_nationkey + AND s_nationkey = n_nationkey + AND n_regionkey = r_regionkey + AND r_name = 'Asia' -- Specified region + AND o_orderdate >= DATE '2023-01-01' + AND o_orderdate < DATE '2024-01-01' +GROUP BY + n_name +ORDER BY + revenue DESC; + +/* +LogicalSort +├── exprs:SortOrder { order: Desc } +│ └── #1 +└── LogicalProjection { exprs: [ #0, #1 ] } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── Mul + │ ├── #22 + │ └── Sub + │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ └── #23 + ├── groups: [ #41 ] + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #0 + │ │ └── #9 + │ ├── Eq + │ │ ├── #17 + │ │ └── #8 + │ ├── Eq + │ │ ├── #19 + │ │ └── #33 + │ ├── Eq + │ │ ├── #3 + │ │ └── #36 + │ ├── Eq + │ │ ├── #36 + │ │ └── #40 + │ ├── Eq + │ │ ├── #42 + │ │ └── #44 + │ ├── Eq + │ │ ├── #45 + │ │ └── "Asia" + │ ├── Geq + │ │ ├── #12 + │ │ └── Cast { cast_to: Date32, expr: "2023-01-01" } + │ └── Lt + │ ├── #12 + │ └── Cast { cast_to: Date32, expr: "2024-01-01" } + └── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalJoin { join_type: Cross, cond: true } + │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ │ ├── LogicalScan { table: customer } + │ │ │ │ └── LogicalScan { table: orders } + │ │ │ └── LogicalScan { table: lineitem } + │ │ └── LogicalScan { table: supplier } + │ └── LogicalScan { table: nation } + └── LogicalScan { table: region } +PhysicalSort +├── exprs:SortOrder { order: Desc } +│ └── #1 +└── PhysicalProjection { exprs: [ #0, #1 ] } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── Mul + │ ├── #22 + │ └── Sub + │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ └── #23 + ├── groups: [ #41 ] + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #19, #3 ], right_keys: [ #0, #3 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } + │ ├── PhysicalScan { table: customer } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ ├── PhysicalFilter + │ │ ├── cond:And + │ │ │ ├── Geq + │ │ │ │ ├── #4 + │ │ │ │ └── Cast { cast_to: Date32, expr: "2023-01-01" } + │ │ │ └── Lt + │ │ │ ├── #4 + │ │ │ └── Cast { cast_to: Date32, expr: "2024-01-01" } + │ │ └── PhysicalScan { table: orders } + │ └── PhysicalScan { table: lineitem } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } + ├── PhysicalScan { table: supplier } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } + ├── PhysicalScan { table: nation } + └── PhysicalFilter + ├── cond:Eq + │ ├── #1 + │ └── "Asia" + └── PhysicalScan { table: region } +*/ + +-- TPC-H Q6 +SELECT + SUM(l_extendedprice * l_discount) AS revenue_loss +FROM + lineitem +WHERE + l_shipdate >= DATE '2023-01-01' + AND l_shipdate < DATE '2024-01-01' + AND l_discount BETWEEN 0.05 AND 0.07 + AND l_quantity < 24; + +/* +LogicalProjection { exprs: [ #0 ] } +└── LogicalAgg + ├── exprs:Agg(Sum) + │ └── Mul + │ ├── #5 + │ └── #6 + ├── groups: [] + └── LogicalFilter + ├── cond:And + │ ├── Geq + │ │ ├── #10 + │ │ └── Cast { cast_to: Date32, expr: "2023-01-01" } + │ ├── Lt + │ │ ├── #10 + │ │ └── Cast { cast_to: Date32, expr: "2024-01-01" } + │ ├── Between { expr: Cast { cast_to: Decimal128(30, 15), expr: #6 }, lower: Cast { cast_to: Decimal128(30, 15), expr: 0.05 }, upper: Cast { cast_to: Decimal128(30, 15), expr: 0.07 } } + │ └── Lt + │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } + │ └── Cast { cast_to: Decimal128(22, 2), expr: 24 } + └── LogicalScan { table: lineitem } +PhysicalProjection { exprs: [ #0 ] } +└── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── Mul + │ ├── #5 + │ └── #6 + ├── groups: [] + └── PhysicalFilter + ├── cond:And + │ ├── Geq + │ │ ├── #10 + │ │ └── Cast { cast_to: Date32, expr: "2023-01-01" } + │ ├── Lt + │ │ ├── #10 + │ │ └── Cast { cast_to: Date32, expr: "2024-01-01" } + │ ├── Between { expr: Cast { cast_to: Decimal128(30, 15), expr: #6 }, lower: Cast { cast_to: Decimal128(30, 15), expr: 0.05 }, upper: Cast { cast_to: Decimal128(30, 15), expr: 0.07 } } + │ └── Lt + │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } + │ └── Cast { cast_to: Decimal128(22, 2), expr: 24 } + └── PhysicalScan { table: lineitem } +*/ + +-- TPC-H Q7 +SELECT + supp_nation, + cust_nation, + l_year, + SUM(volume) AS revenue +FROM + ( + SELECT + n1.n_name AS supp_nation, + n2.n_name AS cust_nation, + EXTRACT(YEAR FROM l_shipdate) AS l_year, + l_extendedprice * (1 - l_discount) AS volume + FROM + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2 + WHERE + s_suppkey = l_suppkey + AND o_orderkey = l_orderkey + AND c_custkey = o_custkey + AND s_nationkey = n1.n_nationkey + AND c_nationkey = n2.n_nationkey + AND ( + (n1.n_name = 'FRANCE' AND n2.n_name = 'GERMANY') + OR (n1.n_name = 'GERMANY' AND n2.n_name = 'FRANCE') + ) + AND l_shipdate BETWEEN DATE '1995-01-01' AND DATE '1996-12-31' + ) AS shipping +GROUP BY + supp_nation, + cust_nation, + l_year +ORDER BY + supp_nation, + cust_nation, + l_year; + +/* +LogicalSort +├── exprs: +│ ┌── SortOrder { order: Asc } +│ │ └── #0 +│ ├── SortOrder { order: Asc } +│ │ └── #1 +│ └── SortOrder { order: Asc } +│ └── #2 +└── LogicalProjection { exprs: [ #0, #1, #2, #3 ] } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── [ #3 ] + ├── groups: [ #0, #1, #2 ] + └── LogicalProjection + ├── exprs: + │ ┌── #41 + │ ├── #45 + │ ├── Scalar(DatePart) + │ │ └── [ "YEAR", #17 ] + │ └── Mul + │ ├── #12 + │ └── Sub + │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ └── #13 + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #0 + │ │ └── #9 + │ ├── Eq + │ │ ├── #23 + │ │ └── #7 + │ ├── Eq + │ │ ├── #32 + │ │ └── #24 + │ ├── Eq + │ │ ├── #3 + │ │ └── #40 + │ ├── Eq + │ │ ├── #35 + │ │ └── #44 + │ ├── Or + │ │ ├── And + │ │ │ ├── Eq + │ │ │ │ ├── #41 + │ │ │ │ └── "FRANCE" + │ │ │ └── Eq + │ │ │ ├── #45 + │ │ │ └── "GERMANY" + │ │ └── And + │ │ ├── Eq + │ │ │ ├── #41 + │ │ │ └── "GERMANY" + │ │ └── Eq + │ │ ├── #45 + │ │ └── "FRANCE" + │ └── Between { expr: #17, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } + └── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalJoin { join_type: Cross, cond: true } + │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ │ ├── LogicalScan { table: supplier } + │ │ │ │ └── LogicalScan { table: lineitem } + │ │ │ └── LogicalScan { table: orders } + │ │ └── LogicalScan { table: customer } + │ └── LogicalScan { table: nation } + └── LogicalScan { table: nation } +PhysicalSort +├── exprs: +│ ┌── SortOrder { order: Asc } +│ │ └── #0 +│ ├── SortOrder { order: Asc } +│ │ └── #1 +│ └── SortOrder { order: Asc } +│ └── #2 +└── PhysicalProjection { exprs: [ #0, #1, #2, #3 ] } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ #3 ] + ├── groups: [ #0, #1, #2 ] + └── PhysicalProjection + ├── exprs: + │ ┌── #41 + │ ├── #45 + │ ├── Scalar(DatePart) + │ │ └── [ "YEAR", #17 ] + │ └── Mul + │ ├── #12 + │ └── Sub + │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ └── #13 + └── PhysicalNestedLoopJoin + ├── join_type: Inner + ├── cond:And + │ ├── Eq + │ │ ├── #35 + │ │ └── #44 + │ └── Or + │ ├── And + │ │ ├── Eq + │ │ │ ├── #41 + │ │ │ └── "FRANCE" + │ │ └── Eq + │ │ ├── #45 + │ │ └── "GERMANY" + │ └── And + │ ├── Eq + │ │ ├── #41 + │ │ └── "GERMANY" + │ └── Eq + │ ├── #45 + │ └── "FRANCE" + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } + │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #2 ] } + │ │ ├── PhysicalScan { table: supplier } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ │ ├── PhysicalFilter { cond: Between { expr: #10, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } } + │ │ │ └── PhysicalScan { table: lineitem } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } + │ │ ├── PhysicalScan { table: orders } + │ │ └── PhysicalScan { table: customer } + │ └── PhysicalScan { table: nation } + └── PhysicalScan { table: nation } +*/ + +-- TPC-H Q8 without top-most limit node +select + o_year, + sum(case + when nation = 'IRAQ' then volume + else 0 + end) / sum(volume) as mkt_share +from + ( + select + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) as volume, + n2.n_name as nation + from + part, + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2, + region + where + p_partkey = l_partkey + and s_suppkey = l_suppkey + and l_orderkey = o_orderkey + and o_custkey = c_custkey + and c_nationkey = n1.n_nationkey + and n1.n_regionkey = r_regionkey + and r_name = 'AMERICA' + and s_nationkey = n2.n_nationkey + and o_orderdate between date '1995-01-01' and date '1996-12-31' + and p_type = 'ECONOMY ANODIZED STEEL' + ) as all_nations +group by + o_year +order by + o_year; + +/* +LogicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── LogicalProjection + ├── exprs: + │ ┌── #0 + │ └── Div + │ ├── #1 + │ └── #2 + └── LogicalAgg + ├── exprs: + │ ┌── Agg(Sum) + │ │ └── Case + │ │ └── + │ │ ┌── Eq + │ │ │ ├── #2 + │ │ │ └── "IRAQ" + │ │ ├── #1 + │ │ └── Cast { cast_to: Decimal128(38, 4), expr: 0 } + │ └── Agg(Sum) + │ └── [ #1 ] + ├── groups: [ #0 ] + └── LogicalProjection + ├── exprs: + │ ┌── Scalar(DatePart) + │ │ └── [ "YEAR", #36 ] + │ ├── Mul + │ │ ├── #21 + │ │ └── Sub + │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ │ └── #22 + │ └── #54 + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #0 + │ │ └── #17 + │ ├── Eq + │ │ ├── #9 + │ │ └── #18 + │ ├── Eq + │ │ ├── #16 + │ │ └── #32 + │ ├── Eq + │ │ ├── #33 + │ │ └── #41 + │ ├── Eq + │ │ ├── #44 + │ │ └── #49 + │ ├── Eq + │ │ ├── #51 + │ │ └── #57 + │ ├── Eq + │ │ ├── #58 + │ │ └── "AMERICA" + │ ├── Eq + │ │ ├── #12 + │ │ └── #53 + │ ├── Between { expr: #36, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } + │ └── Eq + │ ├── #4 + │ └── "ECONOMY ANODIZED STEEL" + └── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalJoin { join_type: Cross, cond: true } + │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ │ │ │ ├── LogicalScan { table: part } + │ │ │ │ │ │ └── LogicalScan { table: supplier } + │ │ │ │ │ └── LogicalScan { table: lineitem } + │ │ │ │ └── LogicalScan { table: orders } + │ │ │ └── LogicalScan { table: customer } + │ │ └── LogicalScan { table: nation } + │ └── LogicalScan { table: nation } + └── LogicalScan { table: region } +PhysicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── PhysicalProjection + ├── exprs: + │ ┌── #0 + │ └── Div + │ ├── #1 + │ └── #2 + └── PhysicalAgg + ├── aggrs: + │ ┌── Agg(Sum) + │ │ └── Case + │ │ └── + │ │ ┌── Eq + │ │ │ ├── #2 + │ │ │ └── "IRAQ" + │ │ ├── #1 + │ │ └── Cast { cast_to: Decimal128(38, 4), expr: 0 } + │ └── Agg(Sum) + │ └── [ #1 ] + ├── groups: [ #0 ] + └── PhysicalProjection + ├── exprs: + │ ┌── Scalar(DatePart) + │ │ └── [ "YEAR", #36 ] + │ ├── Mul + │ │ ├── #21 + │ │ └── Sub + │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ │ └── #22 + │ └── #54 + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #51 ], right_keys: [ #0 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #12 ], right_keys: [ #0 ] } + │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #9 ], right_keys: [ #1, #2 ] } + │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ │ ├── PhysicalFilter + │ │ │ │ ├── cond:Eq + │ │ │ │ │ ├── #4 + │ │ │ │ │ └── "ECONOMY ANODIZED STEEL" + │ │ │ │ └── PhysicalScan { table: part } + │ │ │ └── PhysicalScan { table: supplier } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ │ ├── PhysicalScan { table: lineitem } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } + │ │ ├── PhysicalFilter { cond: Between { expr: #4, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } } + │ │ │ └── PhysicalScan { table: orders } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } + │ │ ├── PhysicalScan { table: customer } + │ │ └── PhysicalScan { table: nation } + │ └── PhysicalScan { table: nation } + └── PhysicalFilter + ├── cond:Eq + │ ├── #1 + │ └── "AMERICA" + └── PhysicalScan { table: region } +*/ + +-- TPC-H Q9 +SELECT + nation, + o_year, + SUM(amount) AS sum_profit +FROM + ( + SELECT + n_name AS nation, + EXTRACT(YEAR FROM o_orderdate) AS o_year, + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity AS amount + FROM + part, + supplier, + lineitem, + partsupp, + orders, + nation + WHERE + s_suppkey = l_suppkey + AND ps_suppkey = l_suppkey + AND ps_partkey = l_partkey + AND p_partkey = l_partkey + AND o_orderkey = l_orderkey + AND s_nationkey = n_nationkey + AND p_name LIKE '%green%' + ) AS profit +GROUP BY + nation, + o_year +ORDER BY + nation, + o_year DESC; + +/* +LogicalSort +├── exprs: +│ ┌── SortOrder { order: Asc } +│ │ └── #0 +│ └── SortOrder { order: Desc } +│ └── #1 +└── LogicalProjection { exprs: [ #0, #1, #2 ] } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── [ #2 ] + ├── groups: [ #0, #1 ] + └── LogicalProjection + ├── exprs: + │ ┌── #47 + │ ├── Scalar(DatePart) + │ │ └── [ "YEAR", #41 ] + │ └── Sub + │ ├── Mul + │ │ ├── #21 + │ │ └── Sub + │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ │ └── #22 + │ └── Mul + │ ├── #35 + │ └── #20 + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #9 + │ │ └── #18 + │ ├── Eq + │ │ ├── #33 + │ │ └── #18 + │ ├── Eq + │ │ ├── #32 + │ │ └── #17 + │ ├── Eq + │ │ ├── #0 + │ │ └── #17 + │ ├── Eq + │ │ ├── #37 + │ │ └── #16 + │ ├── Eq + │ │ ├── #12 + │ │ └── #46 + │ └── Like { expr: #1, pattern: "%green%", negated: false, case_insensitive: false } + └── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalJoin { join_type: Cross, cond: true } + │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ │ ├── LogicalScan { table: part } + │ │ │ │ └── LogicalScan { table: supplier } + │ │ │ └── LogicalScan { table: lineitem } + │ │ └── LogicalScan { table: partsupp } + │ └── LogicalScan { table: orders } + └── LogicalScan { table: nation } +PhysicalSort +├── exprs: +│ ┌── SortOrder { order: Asc } +│ │ └── #0 +│ └── SortOrder { order: Desc } +│ └── #1 +└── PhysicalProjection { exprs: [ #0, #1, #2 ] } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ #2 ] + ├── groups: [ #0, #1 ] + └── PhysicalProjection + ├── exprs: + │ ┌── #47 + │ ├── Scalar(DatePart) + │ │ └── [ "YEAR", #41 ] + │ └── Sub + │ ├── Mul + │ │ ├── #21 + │ │ └── Sub + │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ │ └── #22 + │ └── Mul + │ ├── #35 + │ └── #20 + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #12 ], right_keys: [ #0 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #9, #0 ], right_keys: [ #2, #1 ] } + │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ ├── PhysicalFilter { cond: Like { expr: #1, pattern: "%green%", negated: false, case_insensitive: false } } + │ │ │ └── PhysicalScan { table: part } + │ │ └── PhysicalScan { table: supplier } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #2, #1 ], right_keys: [ #1, #0 ] } + │ │ ├── PhysicalScan { table: lineitem } + │ │ └── PhysicalScan { table: partsupp } + │ └── PhysicalScan { table: orders } + └── PhysicalScan { table: nation } +*/ + +-- TPC-H Q9 +SELECT + nation, + o_year, + SUM(amount) AS sum_profit +FROM + ( + SELECT + n_name AS nation, + EXTRACT(YEAR FROM o_orderdate) AS o_year, + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity AS amount + FROM + part, + supplier, + lineitem, + partsupp, + orders, + nation + WHERE + s_suppkey = l_suppkey + AND ps_suppkey = l_suppkey + AND ps_partkey = l_partkey + AND p_partkey = l_partkey + AND o_orderkey = l_orderkey + AND s_nationkey = n_nationkey + AND p_name LIKE '%green%' + ) AS profit +GROUP BY + nation, + o_year +ORDER BY + nation, + o_year DESC; + +/* +LogicalSort +├── exprs: +│ ┌── SortOrder { order: Asc } +│ │ └── #0 +│ └── SortOrder { order: Desc } +│ └── #1 +└── LogicalProjection { exprs: [ #0, #1, #2 ] } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── [ #2 ] + ├── groups: [ #0, #1 ] + └── LogicalProjection + ├── exprs: + │ ┌── #47 + │ ├── Scalar(DatePart) + │ │ └── [ "YEAR", #41 ] + │ └── Sub + │ ├── Mul + │ │ ├── #21 + │ │ └── Sub + │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ │ └── #22 + │ └── Mul + │ ├── #35 + │ └── #20 + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #9 + │ │ └── #18 + │ ├── Eq + │ │ ├── #33 + │ │ └── #18 + │ ├── Eq + │ │ ├── #32 + │ │ └── #17 + │ ├── Eq + │ │ ├── #0 + │ │ └── #17 + │ ├── Eq + │ │ ├── #37 + │ │ └── #16 + │ ├── Eq + │ │ ├── #12 + │ │ └── #46 + │ └── Like { expr: #1, pattern: "%green%", negated: false, case_insensitive: false } + └── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalJoin { join_type: Cross, cond: true } + │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ │ ├── LogicalScan { table: part } + │ │ │ │ └── LogicalScan { table: supplier } + │ │ │ └── LogicalScan { table: lineitem } + │ │ └── LogicalScan { table: partsupp } + │ └── LogicalScan { table: orders } + └── LogicalScan { table: nation } +PhysicalSort +├── exprs: +│ ┌── SortOrder { order: Asc } +│ │ └── #0 +│ └── SortOrder { order: Desc } +│ └── #1 +└── PhysicalProjection { exprs: [ #0, #1, #2 ] } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ #2 ] + ├── groups: [ #0, #1 ] + └── PhysicalProjection + ├── exprs: + │ ┌── #47 + │ ├── Scalar(DatePart) + │ │ └── [ "YEAR", #41 ] + │ └── Sub + │ ├── Mul + │ │ ├── #21 + │ │ └── Sub + │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ │ └── #22 + │ └── Mul + │ ├── #35 + │ └── #20 + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #12 ], right_keys: [ #0 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #9, #0 ], right_keys: [ #2, #1 ] } + │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ ├── PhysicalFilter { cond: Like { expr: #1, pattern: "%green%", negated: false, case_insensitive: false } } + │ │ │ └── PhysicalScan { table: part } + │ │ └── PhysicalScan { table: supplier } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #2, #1 ], right_keys: [ #1, #0 ] } + │ │ ├── PhysicalScan { table: lineitem } + │ │ └── PhysicalScan { table: partsupp } + │ └── PhysicalScan { table: orders } + └── PhysicalScan { table: nation } +*/ + +-- TPC-H Q10 +SELECT + c_custkey, + c_name, + sum(l_extendedprice * (1 - l_discount)) as revenue, + c_acctbal, + n_name, + c_address, + c_phone, + c_comment +FROM + customer, + orders, + lineitem, + nation +WHERE + c_custkey = o_custkey + AND l_orderkey = o_orderkey + AND o_orderdate >= DATE '1993-07-01' + AND o_orderdate < DATE '1993-07-01' + INTERVAL '3' MONTH + AND l_returnflag = 'R' + AND c_nationkey = n_nationkey +GROUP BY + c_custkey, + c_name, + c_acctbal, + c_phone, + n_name, + c_address, + c_comment +ORDER BY + revenue DESC +LIMIT 20; + +/* +LogicalLimit { skip: 0, fetch: 20 } +└── LogicalSort + ├── exprs:SortOrder { order: Desc } + │ └── #2 + └── LogicalProjection { exprs: [ #0, #1, #7, #2, #4, #5, #3, #6 ] } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── Mul + │ ├── #22 + │ └── Sub + │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ └── #23 + ├── groups: [ #0, #1, #5, #4, #34, #2, #7 ] + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #0 + │ │ └── #9 + │ ├── Eq + │ │ ├── #17 + │ │ └── #8 + │ ├── Geq + │ │ ├── #12 + │ │ └── Cast { cast_to: Date32, expr: "1993-07-01" } + │ ├── Lt + │ │ ├── #12 + │ │ └── Add + │ │ ├── Cast { cast_to: Date32, expr: "1993-07-01" } + │ │ └── INTERVAL_MONTH_DAY_NANO (3, 0, 0) + │ ├── Eq + │ │ ├── #25 + │ │ └── "R" + │ └── Eq + │ ├── #3 + │ └── #33 + └── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalJoin { join_type: Cross, cond: true } + │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ ├── LogicalScan { table: customer } + │ │ └── LogicalScan { table: orders } + │ └── LogicalScan { table: lineitem } + └── LogicalScan { table: nation } +PhysicalLimit { skip: 0, fetch: 20 } +└── PhysicalSort + ├── exprs:SortOrder { order: Desc } + │ └── #2 + └── PhysicalProjection { exprs: [ #0, #1, #7, #2, #4, #5, #3, #6 ] } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── Mul + │ ├── #22 + │ └── Sub + │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ └── #23 + ├── groups: [ #0, #1, #5, #4, #34, #2, #7 ] + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } + │ ├── PhysicalScan { table: customer } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ ├── PhysicalFilter + │ │ ├── cond:And + │ │ │ ├── Geq + │ │ │ │ ├── #4 + │ │ │ │ └── Cast { cast_to: Date32, expr: "1993-07-01" } + │ │ │ └── Lt + │ │ │ ├── #4 + │ │ │ └── Add + │ │ │ ├── Cast { cast_to: Date32, expr: "1993-07-01" } + │ │ │ └── INTERVAL_MONTH_DAY_NANO (3, 0, 0) + │ │ └── PhysicalScan { table: orders } + │ └── PhysicalFilter + │ ├── cond:Eq + │ │ ├── #8 + │ │ └── "R" + │ └── PhysicalScan { table: lineitem } + └── PhysicalScan { table: nation } +*/ + +-- TPC-H Q12 +SELECT + l_shipmode, + sum(case when o_orderpriority = '1-URGENT' + or o_orderpriority = '2-HIGH' + then 1 else 0 end) as high_priority_orders, + sum(case when o_orderpriority <> '1-URGENT' + and o_orderpriority <> '2-HIGH' + then 1 else 0 end) as low_priority_orders +FROM + orders, + lineitem +WHERE + o_orderkey = l_orderkey + AND l_shipmode in ('MAIL', 'SHIP') + AND l_commitdate < l_receiptdate + AND l_shipdate < l_commitdate + AND l_receiptdate >= DATE '1994-01-01' + AND l_receiptdate < DATE '1995-01-01' +GROUP BY + l_shipmode +ORDER BY + l_shipmode; + +/* +LogicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── LogicalProjection { exprs: [ #0, #1, #2 ] } + └── LogicalAgg + ├── exprs: + │ ┌── Agg(Sum) + │ │ └── Case + │ │ └── + │ │ ┌── Or + │ │ │ ├── Eq + │ │ │ │ ├── #5 + │ │ │ │ └── "1-URGENT" + │ │ │ └── Eq + │ │ │ ├── #5 + │ │ │ └── "2-HIGH" + │ │ ├── 1 + │ │ └── 0 + │ └── Agg(Sum) + │ └── Case + │ └── + │ ┌── And + │ │ ├── Neq + │ │ │ ├── #5 + │ │ │ └── "1-URGENT" + │ │ └── Neq + │ │ ├── #5 + │ │ └── "2-HIGH" + │ ├── 1 + │ └── 0 + ├── groups: [ #23 ] + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #0 + │ │ └── #9 + │ ├── InList { expr: #23, list: [ "MAIL", "SHIP" ], negated: false } + │ ├── Lt + │ │ ├── #20 + │ │ └── #21 + │ ├── Lt + │ │ ├── #19 + │ │ └── #20 + │ ├── Geq + │ │ ├── #21 + │ │ └── Cast { cast_to: Date32, expr: "1994-01-01" } + │ └── Lt + │ ├── #21 + │ └── Cast { cast_to: Date32, expr: "1995-01-01" } + └── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalScan { table: orders } + └── LogicalScan { table: lineitem } +PhysicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── PhysicalProjection { exprs: [ #0, #1, #2 ] } + └── PhysicalAgg + ├── aggrs: + │ ┌── Agg(Sum) + │ │ └── Case + │ │ └── + │ │ ┌── Or + │ │ │ ├── Eq + │ │ │ │ ├── #5 + │ │ │ │ └── "1-URGENT" + │ │ │ └── Eq + │ │ │ ├── #5 + │ │ │ └── "2-HIGH" + │ │ ├── 1 + │ │ └── 0 + │ └── Agg(Sum) + │ └── Case + │ └── + │ ┌── And + │ │ ├── Neq + │ │ │ ├── #5 + │ │ │ └── "1-URGENT" + │ │ └── Neq + │ │ ├── #5 + │ │ └── "2-HIGH" + │ ├── 1 + │ └── 0 + ├── groups: [ #23 ] + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + ├── PhysicalScan { table: orders } + └── PhysicalFilter + ├── cond:And + │ ├── InList { expr: #14, list: [ "MAIL", "SHIP" ], negated: false } + │ ├── Lt + │ │ ├── #11 + │ │ └── #12 + │ ├── Lt + │ │ ├── #10 + │ │ └── #11 + │ ├── Geq + │ │ ├── #12 + │ │ └── Cast { cast_to: Date32, expr: "1994-01-01" } + │ └── Lt + │ ├── #12 + │ └── Cast { cast_to: Date32, expr: "1995-01-01" } + └── PhysicalScan { table: lineitem } +*/ + +-- TPC-H Q14 +SELECT + 100.00 * sum(case when p_type like 'PROMO%' + then l_extendedprice * (1 - l_discount) + else 0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue +FROM + lineitem, + part +WHERE + l_partkey = p_partkey + AND l_shipdate >= DATE '1995-09-01' + AND l_shipdate < DATE '1995-09-01' + INTERVAL '1' MONTH; + +/* +LogicalProjection +├── exprs:Div +│ ├── Mul +│ │ ├── 100 +│ │ └── Cast { cast_to: Float64, expr: #0 } +│ └── Cast { cast_to: Float64, expr: #1 } +└── LogicalAgg + ├── exprs: + │ ┌── Agg(Sum) + │ │ └── Case + │ │ └── + │ │ ┌── Like { expr: #20, pattern: "PROMO%", negated: false, case_insensitive: false } + │ │ ├── Mul + │ │ │ ├── #5 + │ │ │ └── Sub + │ │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ │ │ └── #6 + │ │ └── Cast { cast_to: Decimal128(38, 4), expr: 0 } + │ └── Agg(Sum) + │ └── Mul + │ ├── #5 + │ └── Sub + │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ └── #6 + ├── groups: [] + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #1 + │ │ └── #16 + │ ├── Geq + │ │ ├── #10 + │ │ └── Cast { cast_to: Date32, expr: "1995-09-01" } + │ └── Lt + │ ├── #10 + │ └── Add + │ ├── Cast { cast_to: Date32, expr: "1995-09-01" } + │ └── INTERVAL_MONTH_DAY_NANO (1, 0, 0) + └── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalScan { table: lineitem } + └── LogicalScan { table: part } +PhysicalProjection +├── exprs:Div +│ ├── Mul +│ │ ├── 100 +│ │ └── Cast { cast_to: Float64, expr: #0 } +│ └── Cast { cast_to: Float64, expr: #1 } +└── PhysicalAgg + ├── aggrs: + │ ┌── Agg(Sum) + │ │ └── Case + │ │ └── + │ │ ┌── Like { expr: #20, pattern: "PROMO%", negated: false, case_insensitive: false } + │ │ ├── Mul + │ │ │ ├── #5 + │ │ │ └── Sub + │ │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ │ │ └── #6 + │ │ └── Cast { cast_to: Decimal128(38, 4), expr: 0 } + │ └── Agg(Sum) + │ └── Mul + │ ├── #5 + │ └── Sub + │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ └── #6 + ├── groups: [] + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } + ├── PhysicalFilter + │ ├── cond:And + │ │ ├── Geq + │ │ │ ├── #10 + │ │ │ └── Cast { cast_to: Date32, expr: "1995-09-01" } + │ │ └── Lt + │ │ ├── #10 + │ │ └── Add + │ │ ├── Cast { cast_to: Date32, expr: "1995-09-01" } + │ │ └── INTERVAL_MONTH_DAY_NANO (1, 0, 0) + │ └── PhysicalScan { table: lineitem } + └── PhysicalScan { table: part } +*/ + +-- TPC-H Q15 +WITH revenue0 (supplier_no, total_revenue) AS +( + SELECT + l_suppkey, + SUM(l_extendedprice * (1 - l_discount)) + FROM + lineitem + WHERE + l_shipdate >= DATE '1993-01-01' + AND l_shipdate < DATE '1993-01-01' + INTERVAL '3' MONTH + GROUP BY + l_suppkey +) +SELECT + s_suppkey, + s_name, + s_address, + s_phone, + total_revenue +FROM + supplier, + revenue0 +WHERE + s_suppkey = supplier_no + AND total_revenue = + ( + SELECT + MAX(total_revenue) + FROM + revenue0 + ) +ORDER BY + s_suppkey; + +/* +LogicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── LogicalProjection { exprs: [ #0, #1, #2, #3, #4 ] } + └── LogicalJoin + ├── join_type: Inner + ├── cond:Eq + │ ├── #4 + │ └── #5 + ├── LogicalProjection { exprs: [ #0, #1, #2, #3, #5 ] } + │ └── LogicalJoin + │ ├── join_type: Inner + │ ├── cond:Eq + │ │ ├── #0 + │ │ └── #4 + │ ├── LogicalProjection { exprs: [ #0, #1, #2, #4 ] } + │ │ └── LogicalScan { table: supplier } + │ └── LogicalProjection { exprs: [ #0, #1 ] } + │ └── LogicalAgg + │ ├── exprs:Agg(Sum) + │ │ └── Mul + │ │ ├── #1 + │ │ └── Sub + │ │ ├── 1 + │ │ └── #2 + │ ├── groups: [ #0 ] + │ └── LogicalProjection { exprs: [ #0, #1, #2 ] } + │ └── LogicalFilter + │ ├── cond:And + │ │ ├── Geq + │ │ │ ├── #3 + │ │ │ └── 8401 + │ │ └── Lt + │ │ ├── #3 + │ │ └── 8491 + │ └── LogicalProjection { exprs: [ #2, #5, #6, #10 ] } + │ └── LogicalScan { table: lineitem } + └── LogicalAgg + ├── exprs:Agg(Max) + │ └── [ #0 ] + ├── groups: [] + └── LogicalProjection { exprs: [ #1 ] } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── Mul + │ ├── #1 + │ └── Sub + │ ├── 1 + │ └── #2 + ├── groups: [ #0 ] + └── LogicalProjection { exprs: [ #0, #1, #2 ] } + └── LogicalFilter + ├── cond:And + │ ├── Geq + │ │ ├── #3 + │ │ └── 8401 + │ └── Lt + │ ├── #3 + │ └── 8491 + └── LogicalProjection { exprs: [ #2, #5, #6, #10 ] } + └── LogicalScan { table: lineitem } +PhysicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #0 ] } + ├── PhysicalProjection { exprs: [ #0, #1, #2, #3, #5 ] } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ ├── PhysicalProjection { exprs: [ #0, #1, #2, #4 ] } + │ │ └── PhysicalScan { table: supplier } + │ └── PhysicalProjection { exprs: [ #0, #1 ] } + │ └── PhysicalAgg + │ ├── aggrs:Agg(Sum) + │ │ └── Mul + │ │ ├── #1 + │ │ └── Sub + │ │ ├── 1 + │ │ └── #2 + │ ├── groups: [ #0 ] + │ └── PhysicalProjection { exprs: [ #0, #1, #2 ] } + │ └── PhysicalProjection { exprs: [ #2, #5, #6, #10 ] } + │ └── PhysicalFilter + │ ├── cond:And + │ │ ├── Geq + │ │ │ ├── #10 + │ │ │ └── 8401 + │ │ └── Lt + │ │ ├── #10 + │ │ └── 8491 + │ └── PhysicalScan { table: lineitem } + └── PhysicalAgg + ├── aggrs:Agg(Max) + │ └── [ #0 ] + ├── groups: [] + └── PhysicalProjection { exprs: [ #1 ] } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── Mul + │ ├── #1 + │ └── Sub + │ ├── 1 + │ └── #2 + ├── groups: [ #0 ] + └── PhysicalProjection { exprs: [ #0, #1, #2 ] } + └── PhysicalProjection { exprs: [ #2, #5, #6, #10 ] } + └── PhysicalFilter + ├── cond:And + │ ├── Geq + │ │ ├── #10 + │ │ └── 8401 + │ └── Lt + │ ├── #10 + │ └── 8491 + └── PhysicalScan { table: lineitem } +*/ + +-- TPC-H Q17 +SELECT + ROUND(SUM(l_extendedprice) / 7.0, 16) AS avg_yearly +FROM + lineitem, + part +WHERE + p_partkey = l_partkey + AND p_brand = 'Brand#13' + AND p_container = 'JUMBO PKG' + AND l_quantity < ( + SELECT + 0.2 * AVG(l_quantity) + FROM + lineitem + WHERE + l_partkey = p_partkey + ); + +/* +LogicalProjection +├── exprs:Scalar(Round) +│ └── +│ ┌── Div +│ │ ├── Cast { cast_to: Float64, expr: #0 } +│ │ └── 7 +│ └── 16 +└── LogicalAgg + ├── exprs:Agg(Sum) + │ └── [ #0 ] + ├── groups: [] + └── LogicalProjection { exprs: [ #1 ] } + └── LogicalJoin + ├── join_type: Inner + ├── cond:And + │ ├── Eq + │ │ ├── #2 + │ │ └── #4 + │ └── Lt + │ ├── Cast { cast_to: Decimal128(30, 15), expr: #0 } + │ └── #3 + ├── LogicalProjection { exprs: [ #1, #2, #3 ] } + │ └── LogicalJoin + │ ├── join_type: Inner + │ ├── cond:Eq + │ │ ├── #0 + │ │ └── #3 + │ ├── LogicalProjection { exprs: [ #1, #4, #5 ] } + │ │ └── LogicalScan { table: lineitem } + │ └── LogicalProjection { exprs: [ #0 ] } + │ └── LogicalFilter + │ ├── cond:And + │ │ ├── Eq + │ │ │ ├── #1 + │ │ │ └── "Brand#13" + │ │ └── Eq + │ │ ├── #2 + │ │ └── "JUMBO PKG" + │ └── LogicalProjection { exprs: [ #0, #3, #6 ] } + │ └── LogicalScan { table: part } + └── LogicalProjection + ├── exprs: + │ ┌── Cast + │ │ ├── cast_to: Decimal128(30, 15) + │ │ ├── expr:Mul + │ │ │ ├── 0.2 + │ │ │ └── Cast { cast_to: Float64, expr: #1 } + + │ └── #0 + └── LogicalAgg + ├── exprs:Agg(Avg) + │ └── [ #1 ] + ├── groups: [ #0 ] + └── LogicalProjection { exprs: [ #1, #4 ] } + └── LogicalScan { table: lineitem } +PhysicalProjection +├── exprs:Scalar(Round) +│ └── +│ ┌── Div +│ │ ├── Cast { cast_to: Float64, expr: #0 } +│ │ └── 7 +│ └── 16 +└── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ #0 ] + ├── groups: [] + └── PhysicalProjection { exprs: [ #1 ] } + └── PhysicalNestedLoopJoin + ├── join_type: Inner + ├── cond:And + │ ├── Eq + │ │ ├── #2 + │ │ └── #0 + │ └── Lt + │ ├── Cast { cast_to: Decimal128(30, 15), expr: #0 } + │ └── #3 + ├── PhysicalProjection { exprs: [ #1, #2, #3 ] } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ ├── PhysicalProjection { exprs: [ #1, #4, #5 ] } + │ │ └── PhysicalScan { table: lineitem } + │ └── PhysicalProjection { exprs: [ #0 ] } + │ └── PhysicalProjection { exprs: [ #0, #3, #6 ] } + │ └── PhysicalFilter + │ ├── cond:And + │ │ ├── Eq + │ │ │ ├── #3 + │ │ │ └── "Brand#13" + │ │ └── Eq + │ │ ├── #6 + │ │ └── "JUMBO PKG" + │ └── PhysicalScan { table: part } + └── PhysicalProjection + ├── exprs: + │ ┌── Cast + │ │ ├── cast_to: Decimal128(30, 15) + │ │ ├── expr:Mul + │ │ │ ├── 0.2 + │ │ │ └── Cast { cast_to: Float64, expr: #1 } + + │ └── #0 + └── PhysicalAgg + ├── aggrs:Agg(Avg) + │ └── [ #1 ] + ├── groups: [ #0 ] + └── PhysicalProjection { exprs: [ #1, #4 ] } + └── PhysicalScan { table: lineitem } +*/ + +-- TPC-H Q19 +SELECT + sum(l_extendedprice* (1 - l_discount)) as revenue +FROM + lineitem, + part +WHERE + ( + p_partkey = l_partkey + AND p_brand = 'Brand#12' + AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + AND l_quantity >= 1 AND l_quantity <= 11 + AND p_size BETWEEN 1 AND 5 + AND l_shipmode IN ('AIR', 'AIR REG') + AND l_shipinstruct = 'DELIVER IN PERSON' + ) OR ( + p_partkey = l_partkey + AND p_brand = 'Brand#23' + AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + AND l_quantity >= 10 AND l_quantity <= 20 + AND p_size BETWEEN 1 AND 10 + AND l_shipmode IN ('AIR', 'AIR REG') + AND l_shipinstruct = 'DELIVER IN PERSON' + ) OR ( + p_partkey = l_partkey + AND p_brand = 'Brand#34' + AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + AND l_quantity >= 20 AND l_quantity <= 30 + AND p_size BETWEEN 1 AND 15 + AND l_shipmode IN ('AIR', 'AIR REG') + AND l_shipinstruct = 'DELIVER IN PERSON' + ) + +/* +LogicalProjection { exprs: [ #0 ] } +└── LogicalAgg + ├── exprs:Agg(Sum) + │ └── Mul + │ ├── #5 + │ └── Sub + │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ └── #6 + ├── groups: [] + └── LogicalFilter + ├── cond:Or + │ ├── And + │ │ ├── Eq + │ │ │ ├── #16 + │ │ │ └── #1 + │ │ ├── Eq + │ │ │ ├── #19 + │ │ │ └── "Brand#12" + │ │ ├── InList { expr: #22, list: [ "SM CASE", "SM BOX", "SM PACK", "SM PKG" ], negated: false } + │ │ ├── Geq + │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } + │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 1 } + │ │ ├── Leq + │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } + │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 11 } + │ │ ├── Between { expr: Cast { cast_to: Int64, expr: #21 }, lower: 1, upper: 5 } + │ │ ├── InList { expr: #14, list: [ "AIR", "AIR REG" ], negated: false } + │ │ └── Eq + │ │ ├── #13 + │ │ └── "DELIVER IN PERSON" + │ ├── And + │ │ ├── Eq + │ │ │ ├── #16 + │ │ │ └── #1 + │ │ ├── Eq + │ │ │ ├── #19 + │ │ │ └── "Brand#23" + │ │ ├── InList { expr: #22, list: [ "MED BAG", "MED BOX", "MED PKG", "MED PACK" ], negated: false } + │ │ ├── Geq + │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } + │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 10 } + │ │ ├── Leq + │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } + │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 20 } + │ │ ├── Between { expr: Cast { cast_to: Int64, expr: #21 }, lower: 1, upper: 10 } + │ │ ├── InList { expr: #14, list: [ "AIR", "AIR REG" ], negated: false } + │ │ └── Eq + │ │ ├── #13 + │ │ └── "DELIVER IN PERSON" + │ └── And + │ ├── Eq + │ │ ├── #16 + │ │ └── #1 + │ ├── Eq + │ │ ├── #19 + │ │ └── "Brand#34" + │ ├── InList { expr: #22, list: [ "LG CASE", "LG BOX", "LG PACK", "LG PKG" ], negated: false } + │ ├── Geq + │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } + │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 20 } + │ ├── Leq + │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } + │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 30 } + │ ├── Between { expr: Cast { cast_to: Int64, expr: #21 }, lower: 1, upper: 15 } + │ ├── InList { expr: #14, list: [ "AIR", "AIR REG" ], negated: false } + │ └── Eq + │ ├── #13 + │ └── "DELIVER IN PERSON" + └── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalScan { table: lineitem } + └── LogicalScan { table: part } +PhysicalProjection { exprs: [ #0 ] } +└── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── Mul + │ ├── #5 + │ └── Sub + │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } + │ └── #6 + ├── groups: [] + └── PhysicalNestedLoopJoin + ├── join_type: Inner + ├── cond:Or + │ ├── And + │ │ ├── Eq + │ │ │ ├── #16 + │ │ │ └── #1 + │ │ ├── Eq + │ │ │ ├── #19 + │ │ │ └── "Brand#12" + │ │ ├── InList { expr: #22, list: [ "SM CASE", "SM BOX", "SM PACK", "SM PKG" ], negated: false } + │ │ ├── Geq + │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } + │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 1 } + │ │ ├── Leq + │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } + │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 11 } + │ │ ├── Between { expr: Cast { cast_to: Int64, expr: #21 }, lower: 1, upper: 5 } + │ │ ├── InList { expr: #14, list: [ "AIR", "AIR REG" ], negated: false } + │ │ └── Eq + │ │ ├── #13 + │ │ └── "DELIVER IN PERSON" + │ ├── And + │ │ ├── Eq + │ │ │ ├── #16 + │ │ │ └── #1 + │ │ ├── Eq + │ │ │ ├── #19 + │ │ │ └── "Brand#23" + │ │ ├── InList { expr: #22, list: [ "MED BAG", "MED BOX", "MED PKG", "MED PACK" ], negated: false } + │ │ ├── Geq + │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } + │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 10 } + │ │ ├── Leq + │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } + │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 20 } + │ │ ├── Between { expr: Cast { cast_to: Int64, expr: #21 }, lower: 1, upper: 10 } + │ │ ├── InList { expr: #14, list: [ "AIR", "AIR REG" ], negated: false } + │ │ └── Eq + │ │ ├── #13 + │ │ └── "DELIVER IN PERSON" + │ └── And + │ ├── Eq + │ │ ├── #16 + │ │ └── #1 + │ ├── Eq + │ │ ├── #19 + │ │ └── "Brand#34" + │ ├── InList { expr: #22, list: [ "LG CASE", "LG BOX", "LG PACK", "LG PKG" ], negated: false } + │ ├── Geq + │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } + │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 20 } + │ ├── Leq + │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } + │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 30 } + │ ├── Between { expr: Cast { cast_to: Int64, expr: #21 }, lower: 1, upper: 15 } + │ ├── InList { expr: #14, list: [ "AIR", "AIR REG" ], negated: false } + │ └── Eq + │ ├── #13 + │ └── "DELIVER IN PERSON" + ├── PhysicalScan { table: lineitem } + └── PhysicalScan { table: part } +*/ diff --git a/optd-sqlplannertest/tests/tpch.planner.sql b/optd-sqlplannertest/tests/tpch.planner.sql index 3da6dfa3..8497c26d 100644 --- a/optd-sqlplannertest/tests/tpch.planner.sql +++ b/optd-sqlplannertest/tests/tpch.planner.sql @@ -641,15 +641,15 @@ PhysicalSort │ │ │ └── Cast { cast_to: Date32, expr: "2024-01-01" } │ │ └── PhysicalScan { table: orders } │ └── PhysicalScan { table: lineitem } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } - ├── PhysicalScan { table: supplier } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } - ├── PhysicalScan { table: nation } - └── PhysicalFilter - ├── cond:Eq - │ ├── #1 - │ └── "Asia" - └── PhysicalScan { table: region } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #9 ], right_keys: [ #0 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } + │ ├── PhysicalScan { table: supplier } + │ └── PhysicalScan { table: nation } + └── PhysicalFilter + ├── cond:Eq + │ ├── #1 + │ └── "Asia" + └── PhysicalScan { table: region } */ -- TPC-H Q6 @@ -864,12 +864,12 @@ PhysicalSort ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #2 ] } │ │ ├── PhysicalScan { table: supplier } - │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ │ ├── PhysicalFilter { cond: Between { expr: #10, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } } - │ │ │ └── PhysicalScan { table: lineitem } - │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } - │ │ ├── PhysicalScan { table: orders } - │ │ └── PhysicalScan { table: customer } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #17 ], right_keys: [ #0 ] } + │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ │ │ ├── PhysicalFilter { cond: Between { expr: #10, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } } + │ │ │ │ └── PhysicalScan { table: lineitem } + │ │ │ └── PhysicalScan { table: orders } + │ │ └── PhysicalScan { table: customer } │ └── PhysicalScan { table: nation } └── PhysicalScan { table: nation } */ @@ -1033,14 +1033,14 @@ PhysicalSort │ │ │ │ │ └── "ECONOMY ANODIZED STEEL" │ │ │ │ └── PhysicalScan { table: part } │ │ │ └── PhysicalScan { table: supplier } - │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ │ ├── PhysicalScan { table: lineitem } - │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } - │ │ ├── PhysicalFilter { cond: Between { expr: #4, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } } - │ │ │ └── PhysicalScan { table: orders } - │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } - │ │ ├── PhysicalScan { table: customer } - │ │ └── PhysicalScan { table: nation } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #17 ], right_keys: [ #0 ] } + │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ │ │ ├── PhysicalScan { table: lineitem } + │ │ │ └── PhysicalFilter { cond: Between { expr: #4, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } } + │ │ │ └── PhysicalScan { table: orders } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } + │ │ ├── PhysicalScan { table: customer } + │ │ └── PhysicalScan { table: nation } │ └── PhysicalScan { table: nation } └── PhysicalFilter ├── cond:Eq @@ -1748,55 +1748,55 @@ PhysicalSort ├── exprs:SortOrder { order: Asc } │ └── #0 └── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4 ] } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #0 ] } - ├── PhysicalProjection { exprs: [ #0, #1, #2, #3, #5 ] } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ ├── PhysicalProjection { exprs: [ #0, #1, #2, #4 ] } - │ │ └── PhysicalScan { table: supplier } - │ └── PhysicalProjection { exprs: [ #0, #1 ] } - │ └── PhysicalAgg - │ ├── aggrs:Agg(Sum) - │ │ └── Mul - │ │ ├── #1 - │ │ └── Sub - │ │ ├── 1 - │ │ └── #2 - │ ├── groups: [ #0 ] - │ └── PhysicalProjection { exprs: [ #0, #1, #2 ] } - │ └── PhysicalProjection { exprs: [ #2, #5, #6, #10 ] } - │ └── PhysicalFilter - │ ├── cond:And - │ │ ├── Geq - │ │ │ ├── #10 - │ │ │ └── 8401 - │ │ └── Lt - │ │ ├── #10 - │ │ └── 8491 - │ └── PhysicalScan { table: lineitem } - └── PhysicalAgg - ├── aggrs:Agg(Max) - │ └── [ #0 ] - ├── groups: [] - └── PhysicalProjection { exprs: [ #1 ] } + └── PhysicalProjection { exprs: [ #0, #1, #2, #3, #5 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + ├── PhysicalProjection { exprs: [ #0, #1, #2, #4 ] } + │ └── PhysicalScan { table: supplier } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } + ├── PhysicalProjection { exprs: [ #0, #1 ] } + │ └── PhysicalAgg + │ ├── aggrs:Agg(Sum) + │ │ └── Mul + │ │ ├── #1 + │ │ └── Sub + │ │ ├── 1 + │ │ └── #2 + │ ├── groups: [ #0 ] + │ └── PhysicalProjection { exprs: [ #0, #1, #2 ] } + │ └── PhysicalProjection { exprs: [ #2, #5, #6, #10 ] } + │ └── PhysicalFilter + │ ├── cond:And + │ │ ├── Geq + │ │ │ ├── #10 + │ │ │ └── 8401 + │ │ └── Lt + │ │ ├── #10 + │ │ └── 8491 + │ └── PhysicalScan { table: lineitem } └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── Mul - │ ├── #1 - │ └── Sub - │ ├── 1 - │ └── #2 - ├── groups: [ #0 ] - └── PhysicalProjection { exprs: [ #0, #1, #2 ] } - └── PhysicalProjection { exprs: [ #2, #5, #6, #10 ] } - └── PhysicalFilter - ├── cond:And - │ ├── Geq - │ │ ├── #10 - │ │ └── 8401 - │ └── Lt - │ ├── #10 - │ └── 8491 - └── PhysicalScan { table: lineitem } + ├── aggrs:Agg(Max) + │ └── [ #0 ] + ├── groups: [] + └── PhysicalProjection { exprs: [ #1 ] } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── Mul + │ ├── #1 + │ └── Sub + │ ├── 1 + │ └── #2 + ├── groups: [ #0 ] + └── PhysicalProjection { exprs: [ #0, #1, #2 ] } + └── PhysicalProjection { exprs: [ #2, #5, #6, #10 ] } + └── PhysicalFilter + ├── cond:And + │ ├── Geq + │ │ ├── #10 + │ │ └── 8401 + │ └── Lt + │ ├── #10 + │ └── 8491 + └── PhysicalScan { table: lineitem } */ -- TPC-H Q17 From 7fe1fd0e003e202c8ea90121b0584e92715a5d0e Mon Sep 17 00:00:00 2001 From: AveryQi115 Date: Sun, 7 Apr 2024 21:03:12 -0400 Subject: [PATCH 46/61] heuristic rule wrapper fix Signed-off-by: AveryQi115 --- optd-core/src/cascades/optimizer.rs | 16 ++++++++++------ optd-core/src/cascades/tasks/apply_rule.rs | 5 +---- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/optd-core/src/cascades/optimizer.rs b/optd-core/src/cascades/optimizer.rs index d24eec70..36853334 100644 --- a/optd-core/src/cascades/optimizer.rs +++ b/optd-core/src/cascades/optimizer.rs @@ -294,15 +294,19 @@ impl CascadesOptimizer { // the old expr is replaced, so we clear the fired rules for old expr self.fired_rules .entry(expr_id) - .and_modify(|fired_rules| fired_rules.clear()); + .or_default().clear(); return; } - // new expr merged with old expr, we mark old expr as a dead end - self.fired_rules.entry(expr_id).and_modify(|fired_rules| { - for i in 0..self.rules.len() { - fired_rules.insert(i); + + // We can mark the expr as a deadend + // However, even some of the exprs cannot be the winner for the group + // We still need the physical form of those expr to start the optimizeInput task + // So we don't mark the impl rules as fired + for i in 0..self.rules.len() { + if !self.rules[i].rule().is_impl_rule(){ + self.fired_rules.entry(expr_id).or_default().insert(i); } - }); + } } pub(super) fn get_group_info(&self, group_id: GroupId) -> GroupInfo { diff --git a/optd-core/src/cascades/tasks/apply_rule.rs b/optd-core/src/cascades/tasks/apply_rule.rs index d73e0bdc..a115a079 100644 --- a/optd-core/src/cascades/tasks/apply_rule.rs +++ b/optd-core/src/cascades/tasks/apply_rule.rs @@ -233,10 +233,7 @@ impl Task for ApplyRuleTask { trace!(event = "apply_rule replace", expr_id = %self.expr_id, rule_id = %self.rule_id); - // the expr returned by heuristic rule is a brand new one - // so there's no optimizeExpressionTask for it in the original task list - // we should set exploring as false to both envoke tranform rule and impl rule for it - tasks.push(Box::new(OptimizeExpressionTask::new(self.expr_id, false)) + tasks.push(Box::new(OptimizeExpressionTask::new(self.expr_id, self.exploring)) as Box>); } continue; From 487f1e2fc30ac699883a8b2a13e5d8e138e0822a Mon Sep 17 00:00:00 2001 From: AveryQi115 Date: Sun, 7 Apr 2024 21:37:28 -0400 Subject: [PATCH 47/61] still merge group Signed-off-by: AveryQi115 --- optd-core/src/cascades/memo.rs | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/optd-core/src/cascades/memo.rs b/optd-core/src/cascades/memo.rs index 1fdf344a..d1567dc9 100644 --- a/optd-core/src/cascades/memo.rs +++ b/optd-core/src/cascades/memo.rs @@ -285,18 +285,10 @@ impl Memo { }; // if the new expr already in the memo table, merge the group and remove old expr - if let Some(&new_expr_id) = self.expr_node_to_expr_id.get(&memo_node) { - if new_expr_id == expr_id { - // This is not acceptable, as it means the expr returned by a heuristic rule is exactly - // the same as the original expr, which should not happen - // TODO: we can silently ignore this case without marking the original one as a deadend - // But the rule creators should follow the definition of the heuristic rule - // and return an empty vec if their rule does not do the real transformation - unreachable!("replace_group_expr: you're replacing the old expr with the same expr, please check your rules registered as heuristic - and make sure if it does not do any transformation, it should return an empty vec!"); - } - let group_id = self.get_group_id_of_expr_id(new_expr_id); + if let Some(&expr_id) = self.expr_node_to_expr_id.get(&memo_node) { + let group_id = self.get_group_id_of_expr_id(expr_id); let group_id = self.get_reduced_group_id(group_id); + self.merge_group_inner(replace_group_id, group_id); return false; } From 1d4cd5b4bc3265c60ef9f848498727e1c61b71e8 Mon Sep 17 00:00:00 2001 From: AveryQi115 Date: Mon, 8 Apr 2024 14:35:53 -0400 Subject: [PATCH 48/61] fmt Signed-off-by: AveryQi115 --- optd-core/src/cascades/optimizer.rs | 8 +++----- optd-core/src/cascades/tasks/apply_rule.rs | 6 ++++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/optd-core/src/cascades/optimizer.rs b/optd-core/src/cascades/optimizer.rs index 36853334..c312075f 100644 --- a/optd-core/src/cascades/optimizer.rs +++ b/optd-core/src/cascades/optimizer.rs @@ -292,18 +292,16 @@ impl CascadesOptimizer { let replaced = self.memo.replace_group_expr(expr_id, group_id, expr); if replaced { // the old expr is replaced, so we clear the fired rules for old expr - self.fired_rules - .entry(expr_id) - .or_default().clear(); + self.fired_rules.entry(expr_id).or_default().clear(); return; } - + // We can mark the expr as a deadend // However, even some of the exprs cannot be the winner for the group // We still need the physical form of those expr to start the optimizeInput task // So we don't mark the impl rules as fired for i in 0..self.rules.len() { - if !self.rules[i].rule().is_impl_rule(){ + if !self.rules[i].rule().is_impl_rule() { self.fired_rules.entry(expr_id).or_default().insert(i); } } diff --git a/optd-core/src/cascades/tasks/apply_rule.rs b/optd-core/src/cascades/tasks/apply_rule.rs index a115a079..8926fc78 100644 --- a/optd-core/src/cascades/tasks/apply_rule.rs +++ b/optd-core/src/cascades/tasks/apply_rule.rs @@ -233,8 +233,10 @@ impl Task for ApplyRuleTask { trace!(event = "apply_rule replace", expr_id = %self.expr_id, rule_id = %self.rule_id); - tasks.push(Box::new(OptimizeExpressionTask::new(self.expr_id, self.exploring)) - as Box>); + tasks.push( + Box::new(OptimizeExpressionTask::new(self.expr_id, self.exploring)) + as Box>, + ); } continue; } From 1814b84ed2f6e13360f38593a9628557d9019f56 Mon Sep 17 00:00:00 2001 From: AveryQi115 Date: Mon, 15 Apr 2024 16:23:58 -0400 Subject: [PATCH 49/61] tpc-h result changed due to search space limit after the fix Signed-off-by: AveryQi115 --- optd-sqlplannertest/tests/tpch.planner.sql | 46 +++++++++++----------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/optd-sqlplannertest/tests/tpch.planner.sql b/optd-sqlplannertest/tests/tpch.planner.sql index c9730a42..2e1a6256 100644 --- a/optd-sqlplannertest/tests/tpch.planner.sql +++ b/optd-sqlplannertest/tests/tpch.planner.sql @@ -627,29 +627,29 @@ PhysicalSort │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } │ └── #23 ├── groups: [ #41 ] - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #36 ], right_keys: [ #0 ] } - ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #19, #3 ], right_keys: [ #0, #3 ] } - │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } - │ │ ├── PhysicalScan { table: customer } - │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ │ ├── PhysicalFilter - │ │ │ ├── cond:And - │ │ │ │ ├── Geq - │ │ │ │ │ ├── #4 - │ │ │ │ │ └── Cast { cast_to: Date32, expr: "2023-01-01" } - │ │ │ │ └── Lt - │ │ │ │ ├── #4 - │ │ │ │ └── Cast { cast_to: Date32, expr: "2024-01-01" } - │ │ │ └── PhysicalScan { table: orders } - │ │ └── PhysicalScan { table: lineitem } - │ └── PhysicalScan { table: supplier } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } - ├── PhysicalScan { table: nation } - └── PhysicalFilter - ├── cond:Eq - │ ├── #1 - │ └── "Asia" - └── PhysicalScan { table: region } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #19, #3 ], right_keys: [ #0, #3 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } + │ ├── PhysicalScan { table: customer } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ ├── PhysicalFilter + │ │ ├── cond:And + │ │ │ ├── Geq + │ │ │ │ ├── #4 + │ │ │ │ └── Cast { cast_to: Date32, expr: "2023-01-01" } + │ │ │ └── Lt + │ │ │ ├── #4 + │ │ │ └── Cast { cast_to: Date32, expr: "2024-01-01" } + │ │ └── PhysicalScan { table: orders } + │ └── PhysicalScan { table: lineitem } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } + ├── PhysicalScan { table: supplier } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } + ├── PhysicalScan { table: nation } + └── PhysicalFilter + ├── cond:Eq + │ ├── #1 + │ └── "Asia" + └── PhysicalScan { table: region } */ -- TPC-H Q6 From 8b10c94510c6b367a6804e395f61c665106c7463 Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Mon, 15 Apr 2024 17:42:58 -0400 Subject: [PATCH 50/61] massive refactor --- optd-datafusion-repr/src/lib.rs | 3 +- .../src/plan_nodes/projection.rs | 122 +----------- optd-datafusion-repr/src/rules.rs | 13 +- .../src/rules/filter_pushdown.rs | 188 ++++++++---------- optd-datafusion-repr/src/rules/joins.rs | 58 +----- .../src/rules/project_transpose.rs | 5 + .../filter_project_transpose.rs | 38 ++++ .../project_agg_transpose.rs | 21 ++ .../project_filter_transpose.rs | 73 +++++++ .../project_join_transpose.rs | 73 +++++++ .../rules/project_transpose/project_merge.rs | 54 +++++ .../project_transpose_common.rs | 119 +++++++++++ .../src/rules/projection_pushdown.rs | 145 -------------- 13 files changed, 475 insertions(+), 437 deletions(-) create mode 100644 optd-datafusion-repr/src/rules/project_transpose.rs create mode 100644 optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs create mode 100644 optd-datafusion-repr/src/rules/project_transpose/project_agg_transpose.rs create mode 100644 optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs create mode 100644 optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs create mode 100644 optd-datafusion-repr/src/rules/project_transpose/project_merge.rs create mode 100644 optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs delete mode 100644 optd-datafusion-repr/src/rules/projection_pushdown.rs diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 3a82069e..983619a0 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -87,6 +87,7 @@ impl DatafusionOptimizer { Arc::new(EliminateLimitRule::new()), Arc::new(EliminateDuplicatedSortExprRule::new()), Arc::new(EliminateDuplicatedAggExprRule::new()), + Arc::new(ProjectMergeRule::new()), ] } @@ -97,7 +98,7 @@ impl DatafusionOptimizer { for rule in rules { rule_wrappers.push(RuleWrapper::new_cascades(rule)); } - // rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( + // rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( // ProjectMergeRule::new(), // ))); rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( diff --git a/optd-datafusion-repr/src/plan_nodes/projection.rs b/optd-datafusion-repr/src/plan_nodes/projection.rs index c52db4ba..418a5b2d 100644 --- a/optd-datafusion-repr/src/plan_nodes/projection.rs +++ b/optd-datafusion-repr/src/plan_nodes/projection.rs @@ -1,7 +1,7 @@ use super::expr::ExprList; use super::macros::define_plan_node; -use super::{ColumnRefExpr, Expr, OptRelNode, OptRelNodeRef, OptRelNodeTyp, PlanNode}; +use super::{OptRelNode, OptRelNodeRef, OptRelNodeTyp, PlanNode}; #[derive(Clone, Debug)] pub struct LogicalProjection(pub PlanNode); @@ -25,122 +25,4 @@ define_plan_node!( ], [ { 1, exprs: ExprList } ] -); - -/// This struct holds the mapping from original columns to projected columns. -/// -/// # Example -/// With the following plan: -/// | Filter (#0 < 5) -/// | -/// |-| Projection [#2, #3] -/// |- Scan [#0, #1, #2, #3] -/// -/// The computed projection mapping is: -/// #2 -> #0 -/// #3 -> #1 -#[derive(Clone, Debug)] -pub struct ProjectionMapping { - forward: Vec, - _backward: Vec>, -} - -impl ProjectionMapping { - pub fn build(mapping: Vec) -> Option { - let mut backward = vec![]; - for (i, &x) in mapping.iter().enumerate() { - if x >= backward.len() { - backward.resize(x + 1, None); - } - backward[x] = Some(i); - } - Some(Self { - forward: mapping, - _backward: backward, - }) - } - - pub fn projection_col_refers_to(&self, col: usize) -> usize { - self.forward[col] - } - - pub fn _original_col_maps_to(&self, col: usize) -> Option { - self._backward[col] - } - - /// Recursively rewrites all ColumnRefs in an Expr to *undo* the projection - /// condition. You might want to do this if you are pushing something - /// through a projection, or pulling a projection up. - /// - /// # Example - /// If we have a projection node, mapping column A to column B (A -> B) - /// All B's in `cond` will be rewritten as A. - pub fn rewrite_condition(&self, cond: Expr, child_schema_len: usize) -> Expr { - let proj_schema_size = self.forward.len(); - cond.rewrite_column_refs(&|idx| { - Some(if idx < proj_schema_size { - self.projection_col_refers_to(idx) - } else { - idx - proj_schema_size + child_schema_len - }) - }) - .unwrap() - } - - /// Recursively rewrites all ColumnRefs in an Expr to what the projection - /// node is rewriting. E.g. if Projection is A -> B, B will be rewritten as A - pub fn reverse_rewrite_condition(&self, cond: Expr) -> Expr { - let proj_schema_size = self._backward.len(); - cond.rewrite_column_refs(&|idx| { - Some(if idx < proj_schema_size { - self._original_col_maps_to(idx).unwrap() - } else { - panic!("exprs do not map to projection"); - }) - }) - .unwrap() - } - - /// Rewrites all ColumnRefs in an ExprList to what the projection - /// node is rewriting. E.g. if Projection is A -> B, B will be - /// rewritten as A - pub fn rewrite_projection(&self, exprs: &ExprList) -> Option { - if exprs.len() == 0 { - return None; - } - let mut new_projection_exprs = Vec::new(); - let exprs = exprs.to_vec(); - for i in &self.forward { - let col: Expr = exprs[*i].clone(); - new_projection_exprs.push(col); - }; - Some(ExprList::new(new_projection_exprs)) - } - - /// rewrites the input exprs based on the mapped col refs - /// intended use: - /// Projection { exprs: [#1, #0] } - /// Projection { exprs: [#0, #2] } - /// remove bottom projection by converting nodes to: - /// Projection { exprs: [#2, #0] } - pub fn reverse_rewrite_projection(&self, exprs: &ExprList) -> ExprList { - let mut new_projection_exprs = Vec::new(); - let exprs = exprs.to_vec(); - for i in 0..exprs.len() { - let col: Expr = ColumnRefExpr::new(self.projection_col_refers_to(i).clone()).into_expr(); - new_projection_exprs.push(col); - }; - ExprList::new(new_projection_exprs) - } -} - -impl LogicalProjection { - pub fn compute_column_mapping(exprs: &ExprList) -> Option { - let mut mapping = vec![]; - for expr in exprs.to_vec() { - let col_expr = ColumnRefExpr::from_rel_node(expr.into_rel_node())?; - mapping.push(col_expr.index()); - } - ProjectionMapping::build(mapping) - } -} +); \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules.rs b/optd-datafusion-repr/src/rules.rs index fe8b3ce5..9d620e8a 100644 --- a/optd-datafusion-repr/src/rules.rs +++ b/optd-datafusion-repr/src/rules.rs @@ -1,5 +1,4 @@ // mod filter_join; -mod projection_pushdown; mod eliminate_duplicated_expr; mod eliminate_limit; mod filter; @@ -7,10 +6,14 @@ mod filter_pushdown; mod joins; mod macros; mod physical; +mod project_transpose; // pub use filter_join::FilterJoinPullUpRule; -pub use projection_pushdown::{ - ProjectMergeRule, ProjectFilterTransposeRule +pub use project_transpose::{ + project_merge::ProjectMergeRule, + project_filter_transpose::ProjectFilterTransposeRule, + project_join_transpose::ProjectionPullUpJoin, + filter_project_transpose::FilterProjectTransposeRule, }; pub use eliminate_duplicated_expr::{ EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, @@ -19,9 +22,9 @@ pub use eliminate_limit::EliminateLimitRule; pub use filter::{EliminateFilterRule, SimplifyFilterRule, SimplifyJoinCondRule}; pub use filter_pushdown::{ FilterAggTransposeRule, FilterCrossJoinTransposeRule, FilterInnerJoinTransposeRule, - FilterMergeRule, FilterProjectTransposeRule, FilterSortTransposeRule, + FilterMergeRule, FilterSortTransposeRule, }; pub use joins::{ - EliminateJoinRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, ProjectionPullUpJoin, + EliminateJoinRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, }; pub use physical::PhysicalConversionRule; diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 6189d2d7..9b949a96 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -16,7 +16,7 @@ use optd_core::{optimizer::Optimizer, rel_node::RelNode}; use crate::plan_nodes::{ ColumnRefExpr, Expr, ExprList, JoinType, LogOpExpr, LogOpType, LogicalAgg, LogicalFilter, - LogicalJoin, LogicalProjection, LogicalSort, OptRelNode, OptRelNodeTyp, PlanNode, + LogicalJoin, LogicalSort, OptRelNode, OptRelNodeTyp, PlanNode, }; use crate::properties::schema::SchemaPropertyBuilder; @@ -121,36 +121,6 @@ fn categorize_conds(mut categorization_fn: impl FnMut(Expr, &Vec), cond: E } } -define_rule!( - FilterProjectTransposeRule, - apply_filter_project_transpose, - (Filter, (Projection, child, [exprs]), [cond]) -); - -/// Datafusion only pushes filter past project when the project does not contain -/// volatile (i.e. non-deterministic) expressions that are present in the filter -/// Calcite only checks if the projection contains a windowing calculation -/// We check neither of those things and do it always (which may be wrong) -fn apply_filter_project_transpose( - optimizer: &impl Optimizer, - FilterProjectTransposeRulePicks { child, exprs, cond }: FilterProjectTransposeRulePicks, -) -> Vec> { - let child_schema_len = optimizer - .get_property::(child.clone().into(), 0) - .len(); - - let child = PlanNode::from_group(child.into()); - let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); - let exprs = ExprList::from_rel_node(exprs.into()).unwrap(); - - let proj_col_map = LogicalProjection::compute_column_mapping(&exprs).unwrap(); - let rewritten_cond = proj_col_map.rewrite_condition(cond_as_expr.clone(), child_schema_len); - - let new_filter_node = LogicalFilter::new(child, rewritten_cond); - let new_proj = LogicalProjection::new(new_filter_node.into_plan_node(), exprs); - vec![new_proj.into_rel_node().as_ref().clone()] -} - define_rule!( FilterMergeRule, apply_filter_merge, @@ -453,7 +423,7 @@ mod tests { }, rules::{ FilterAggTransposeRule, FilterInnerJoinTransposeRule, FilterMergeRule, - FilterProjectTransposeRule, FilterSortTransposeRule, + FilterSortTransposeRule, }, testing::new_test_optimizer, }; @@ -535,83 +505,83 @@ mod tests { assert_eq!(col_4.value().as_i32(), 1); } - #[test] - fn push_past_proj_basic() { - let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); - - let scan = LogicalScan::new("customer".into()); - let proj = LogicalProjection::new(scan.into_plan_node(), ExprList::new(vec![])); - - let filter_expr = BinOpExpr::new( - ColumnRefExpr::new(0).into_expr(), - ConstantExpr::int32(5).into_expr(), - BinOpType::Eq, - ) - .into_expr(); - - let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr); - let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); - - assert_eq!(plan.typ, OptRelNodeTyp::Projection); - assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); - } - - #[test] - fn push_past_proj_adv() { - let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); - - let scan = LogicalScan::new("customer".into()); - let proj = LogicalProjection::new( - scan.into_plan_node(), - ExprList::new(vec![ - ColumnRefExpr::new(0).into_expr(), - ColumnRefExpr::new(4).into_expr(), - ColumnRefExpr::new(5).into_expr(), - ColumnRefExpr::new(7).into_expr(), - ]), - ); - - let filter_expr = LogOpExpr::new( - LogOpType::And, - ExprList::new(vec![ - BinOpExpr::new( - // This one should be pushed to the left child - ColumnRefExpr::new(1).into_expr(), - ConstantExpr::int32(5).into_expr(), - BinOpType::Eq, - ) - .into_expr(), - BinOpExpr::new( - // This one should be pushed to the right child - ColumnRefExpr::new(3).into_expr(), - ConstantExpr::int32(6).into_expr(), - BinOpType::Eq, - ) - .into_expr(), - ]), - ); - - let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr.into_expr()); - - let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); - - assert!(matches!(plan.typ, OptRelNodeTyp::Projection)); - let plan_filter = LogicalFilter::from_rel_node(plan.child(0)).unwrap(); - assert!(matches!(plan_filter.0.typ(), OptRelNodeTyp::Filter)); - let plan_filter_expr = - LogOpExpr::from_rel_node(plan_filter.cond().into_rel_node()).unwrap(); - assert!(matches!(plan_filter_expr.op_type(), LogOpType::And)); - let op_0 = BinOpExpr::from_rel_node(plan_filter_expr.children()[0].clone().into_rel_node()) - .unwrap(); - let col_0 = - ColumnRefExpr::from_rel_node(op_0.left_child().clone().into_rel_node()).unwrap(); - assert_eq!(col_0.index(), 4); - let op_1 = BinOpExpr::from_rel_node(plan_filter_expr.children()[1].clone().into_rel_node()) - .unwrap(); - let col_1 = - ColumnRefExpr::from_rel_node(op_1.left_child().clone().into_rel_node()).unwrap(); - assert_eq!(col_1.index(), 7); - } + // #[test] + // fn push_past_proj_basic() { + // let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); + + // let scan = LogicalScan::new("customer".into()); + // let proj = LogicalProjection::new(scan.into_plan_node(), ExprList::new(vec![])); + + // let filter_expr = BinOpExpr::new( + // ColumnRefExpr::new(0).into_expr(), + // ConstantExpr::int32(5).into_expr(), + // BinOpType::Eq, + // ) + // .into_expr(); + + // let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr); + // let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); + + // assert_eq!(plan.typ, OptRelNodeTyp::Projection); + // assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); + // } + + // #[test] + // fn push_past_proj_adv() { + // let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); + + // let scan = LogicalScan::new("customer".into()); + // let proj = LogicalProjection::new( + // scan.into_plan_node(), + // ExprList::new(vec![ + // ColumnRefExpr::new(0).into_expr(), + // ColumnRefExpr::new(4).into_expr(), + // ColumnRefExpr::new(5).into_expr(), + // ColumnRefExpr::new(7).into_expr(), + // ]), + // ); + + // let filter_expr = LogOpExpr::new( + // LogOpType::And, + // ExprList::new(vec![ + // BinOpExpr::new( + // // This one should be pushed to the left child + // ColumnRefExpr::new(1).into_expr(), + // ConstantExpr::int32(5).into_expr(), + // BinOpType::Eq, + // ) + // .into_expr(), + // BinOpExpr::new( + // // This one should be pushed to the right child + // ColumnRefExpr::new(3).into_expr(), + // ConstantExpr::int32(6).into_expr(), + // BinOpType::Eq, + // ) + // .into_expr(), + // ]), + // ); + + // let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr.into_expr()); + + // let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); + + // assert!(matches!(plan.typ, OptRelNodeTyp::Projection)); + // let plan_filter = LogicalFilter::from_rel_node(plan.child(0)).unwrap(); + // assert!(matches!(plan_filter.0.typ(), OptRelNodeTyp::Filter)); + // let plan_filter_expr = + // LogOpExpr::from_rel_node(plan_filter.cond().into_rel_node()).unwrap(); + // assert!(matches!(plan_filter_expr.op_type(), LogOpType::And)); + // let op_0 = BinOpExpr::from_rel_node(plan_filter_expr.children()[0].clone().into_rel_node()) + // .unwrap(); + // let col_0 = + // ColumnRefExpr::from_rel_node(op_0.left_child().clone().into_rel_node()).unwrap(); + // assert_eq!(col_0.index(), 4); + // let op_1 = BinOpExpr::from_rel_node(plan_filter_expr.children()[1].clone().into_rel_node()) + // .unwrap(); + // let col_1 = + // ColumnRefExpr::from_rel_node(op_1.left_child().clone().into_rel_node()).unwrap(); + // assert_eq!(col_1.index(), 7); + // } #[test] fn push_past_join_conjunction() { diff --git a/optd-datafusion-repr/src/rules/joins.rs b/optd-datafusion-repr/src/rules/joins.rs index d76083ba..13896a67 100644 --- a/optd-datafusion-repr/src/rules/joins.rs +++ b/optd-datafusion-repr/src/rules/joins.rs @@ -308,60 +308,4 @@ fn apply_hash_join( } } vec![] -} - -// (Proj A) join B -> (Proj (A join B)) -define_rule!( - ProjectionPullUpJoin, - apply_projection_pull_up_join, - ( - Join(JoinType::Inner), - (Projection, left, [list]), - right, - [cond] - ) -); - -fn apply_projection_pull_up_join( - optimizer: &impl Optimizer, - ProjectionPullUpJoinPicks { - left, - right, - list, - cond, - }: ProjectionPullUpJoinPicks, -) -> Vec> { - let left = Arc::new(left.clone()); - let right = Arc::new(right.clone()); - - let list = ExprList::from_rel_node(Arc::new(list)).unwrap(); - - let projection = LogicalProjection::new(PlanNode::from_group(left.clone()), list.clone()); - - let Some(mapping) = LogicalProjection::compute_column_mapping(&projection.exprs()) else { - return vec![]; - }; - - // TODO(chi): support capture projection node. - let left_schema = optimizer.get_property::(left.clone(), 0); - let right_schema = optimizer.get_property::(right.clone(), 0); - let mut new_projection_exprs = list.to_vec(); - for i in 0..right_schema.len() { - let col: Expr = ColumnRefExpr::new(i + left_schema.len()).into_expr(); - new_projection_exprs.push(col); - } - let node = LogicalProjection::new( - LogicalJoin::new( - PlanNode::from_group(left), - PlanNode::from_group(right), - mapping.rewrite_condition( - Expr::from_rel_node(Arc::new(cond)).unwrap(), - left_schema.len(), - ), - JoinType::Inner, - ) - .into_plan_node(), - ExprList::new(new_projection_exprs), - ); - vec![node.into_rel_node().as_ref().clone()] -} +} \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules/project_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose.rs new file mode 100644 index 00000000..06a9f509 --- /dev/null +++ b/optd-datafusion-repr/src/rules/project_transpose.rs @@ -0,0 +1,5 @@ +pub mod project_transpose_common; +pub mod project_merge; +pub mod project_filter_transpose; +pub mod project_join_transpose; +pub mod filter_project_transpose; \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs new file mode 100644 index 00000000..0d4bf760 --- /dev/null +++ b/optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs @@ -0,0 +1,38 @@ +use std::collections::HashMap; +use std::vec; + +use optd_core::rules::{Rule, RuleMatcher}; +use optd_core::{optimizer::Optimizer, rel_node::RelNode}; + +use crate::plan_nodes::{ + Expr, ExprList, LogicalFilter, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode +}; + +use crate::rules::macros::define_rule; +use crate::rules::project_transpose::project_transpose_common::ProjectionMapping; + +define_rule!( + FilterProjectTransposeRule, + apply_filter_project_transpose, + (Filter, (Projection, child, [exprs]), [cond]) +); + +/// Datafusion only pushes filter past project when the project does not contain +/// volatile (i.e. non-deterministic) expressions that are present in the filter +/// Calcite only checks if the projection contains a windowing calculation +/// We check neither of those things and do it always (which may be wrong) +fn apply_filter_project_transpose( + optimizer: &impl Optimizer, + FilterProjectTransposeRulePicks { child, exprs, cond }: FilterProjectTransposeRulePicks, +) -> Vec> { + let child = PlanNode::from_group(child.into()); + let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); + let exprs = ExprList::from_rel_node(exprs.into()).unwrap(); + + let proj_col_map = ProjectionMapping::build(&exprs).unwrap(); + let rewritten_cond = proj_col_map.rewrite_filter_cond(cond_as_expr.clone(), false); + + let new_filter_node = LogicalFilter::new(child, rewritten_cond); + let new_proj = LogicalProjection::new(new_filter_node.into_plan_node(), exprs); + vec![new_proj.into_rel_node().as_ref().clone()] +} \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_agg_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_agg_transpose.rs new file mode 100644 index 00000000..277bc0c7 --- /dev/null +++ b/optd-datafusion-repr/src/rules/project_transpose/project_agg_transpose.rs @@ -0,0 +1,21 @@ +// projects away aggregate calls that are not used +// TODO +define_rule!( + ProjectAggregatePushDown, + apply_projection_agg_pushdown, + ( + Projection, + (Agg, child, [agg_exprs], [agg_groups]), + [exprs] + ) +); + +fn apply_projection_agg_pushdown( + _optimizer: &impl Optimizer, + ProjectAggregatePushDownPicks { child, agg_exprs, agg_groups, exprs }: ProjectAggregatePushDownPicks, +) -> Vec> { + + + + vec![] +} \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs new file mode 100644 index 00000000..9f550aa3 --- /dev/null +++ b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs @@ -0,0 +1,73 @@ +use std::collections::HashMap; + +use optd_core::rules::{Rule, RuleMatcher}; +use optd_core::{optimizer::Optimizer, rel_node::RelNode}; + +use crate::plan_nodes::{ + Expr, ExprList, LogicalFilter, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode +}; +use crate::rules::macros::define_rule; +use super::project_transpose_common::ProjectionMapping; + +fn merge_exprs(first: ExprList, second: ExprList) -> ExprList { + let mut res_vec = first.to_vec(); + res_vec.extend(second.to_vec()); + ExprList::new(res_vec) +} + +// pushes projections through filters +// adds a projection node after a filter node +// only keeping necessary columns (proj node exprs + filter col exprs)) +// TODO +define_rule!( + ProjectFilterTransposeRule, + apply_projection_filter_transpose, + ( + Projection, + (Filter, child, [cond]), + [exprs] + ) +); + +fn apply_projection_filter_transpose( + _optimizer: &impl Optimizer, + ProjectFilterTransposeRulePicks { child, cond, exprs }: ProjectFilterTransposeRulePicks, +) -> Vec> { + // get columns out of cond + let exprs = ExprList::from_rel_node(exprs.into()).unwrap(); + let exprs_vec = exprs.clone().to_vec(); + let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); + let cond_col_refs = cond_as_expr.get_column_refs(); + let mut dedup_cond_col_refs = Vec::new(); + + for i in 0..cond_col_refs.len() { + if !exprs_vec.contains(&cond_col_refs[i]) { + dedup_cond_col_refs.push(cond_col_refs[i].clone()); + }; + }; + + let dedup_cond_col_refs = ExprList::new(dedup_cond_col_refs); + + let bottom_proj_exprs: ExprList = merge_exprs(exprs.clone(), dedup_cond_col_refs.clone()); + let Some(mapping) = ProjectionMapping::build(&bottom_proj_exprs) else { + return vec![]; + }; + + let child = PlanNode::from_group(child.into()); + let new_filter_cond: Expr = mapping.rewrite_filter_cond(cond_as_expr.clone(), true); + let bottom_proj_node = LogicalProjection::new(child, bottom_proj_exprs); + let new_filter_node = LogicalFilter::new(bottom_proj_node.into_plan_node(), new_filter_cond); + + if dedup_cond_col_refs.is_empty() { + // can push proj past filter and remove top proj node + return vec![new_filter_node.into_rel_node().as_ref().clone()]; + } + + // have column ref expressions of cond cols + // bottom-most projection will have proj cols + filter cols as a set + let Some(top_proj_exprs) = mapping.rewrite_projection(&exprs, false) else { + return vec![]; + }; + let top_proj_node = LogicalProjection::new(new_filter_node.into_plan_node(), top_proj_exprs); + vec![top_proj_node.into_rel_node().as_ref().clone()] +} \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs new file mode 100644 index 00000000..19aedd4e --- /dev/null +++ b/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs @@ -0,0 +1,73 @@ +use crate::Rule; +use crate::HashMap; + +use optd_core::rules::RuleMatcher; +use std::sync::Arc; +use std::vec; + +use optd_core::optimizer::Optimizer; +use optd_core::rel_node::RelNode; +use crate::rules::macros::define_rule; + +use crate::plan_nodes::{ + ColumnRefExpr, Expr, ExprList, JoinType, LogicalJoin, LogicalProjection, + OptRelNode, OptRelNodeTyp, PlanNode, +}; +use crate::properties::schema::SchemaPropertyBuilder; +use super::project_transpose_common::ProjectionMapping; + +// (Proj A) join B -> (Proj (A join B)) +define_rule!( + ProjectionPullUpJoin, + apply_projection_pull_up_join, + ( + Join(JoinType::Inner), + (Projection, left, [list]), + right, + [cond] + ) +); + +fn apply_projection_pull_up_join( + optimizer: &impl Optimizer, + ProjectionPullUpJoinPicks { + left, + right, + list, + cond, + }: ProjectionPullUpJoinPicks, +) -> Vec> { + let left = Arc::new(left.clone()); + let right = Arc::new(right.clone()); + + let list = ExprList::from_rel_node(Arc::new(list)).unwrap(); + + let projection = LogicalProjection::new(PlanNode::from_group(left.clone()), list.clone()); + + let Some(mapping) = ProjectionMapping::build(&projection.exprs()) else { + return vec![]; + }; + + // TODO(chi): support capture projection node. + let left_schema = optimizer.get_property::(left.clone(), 0); + let right_schema = optimizer.get_property::(right.clone(), 0); + let mut new_projection_exprs = list.to_vec(); + for i in 0..right_schema.len() { + let col: Expr = ColumnRefExpr::new(i + left_schema.len()).into_expr(); + new_projection_exprs.push(col); + } + let node = LogicalProjection::new( + LogicalJoin::new( + PlanNode::from_group(left), + PlanNode::from_group(right), + mapping.rewrite_join_cond( + Expr::from_rel_node(Arc::new(cond)).unwrap(), + left_schema.len(), + ), + JoinType::Inner, + ) + .into_plan_node(), + ExprList::new(new_projection_exprs), + ); + vec![node.into_rel_node().as_ref().clone()] +} diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs b/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs new file mode 100644 index 00000000..c77d674a --- /dev/null +++ b/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs @@ -0,0 +1,54 @@ +use std::collections::HashMap; + +use optd_core::rules::{Rule, RuleMatcher}; +use optd_core::{optimizer::Optimizer, rel_node::RelNode}; + +use crate::plan_nodes::{ + ExprList, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode +}; +use crate::rules::macros::define_rule; + +use super::project_transpose_common::ProjectionMapping; +// test cases for project merge +// create table t1 (v1 int, v2 int); +// explain select v1,v2 from (select v1,v2 from t1); + +// create table t3 (v1 int, v2 int, v3 int); +// explain select v2,v3 from (select v1,v3,v2 from t3); +// explain select v1,v2,v3 from (select v1,v3,v2 from t3); + +// Proj (Proj A) -> Proj A +// merges/removes projections +define_rule!( + ProjectMergeRule, + apply_projection_merge, + ( + Projection, + (Projection, child, [exprs2]), + [exprs1] + ) +); + +fn apply_projection_merge( + _optimizer: &impl Optimizer, + ProjectMergeRulePicks { child, exprs1, exprs2 }: ProjectMergeRulePicks, +) -> Vec> { + let child = PlanNode::from_group(child.into()); + let exprs1 = ExprList::from_rel_node(exprs1.into()).unwrap(); + let exprs2 = ExprList::from_rel_node(exprs2.into()).unwrap(); + + let Some(mapping) = ProjectionMapping::build(&exprs1) else { + return vec![]; + }; + + let Some(res_exprs) = mapping.rewrite_projection(&exprs2, true) else { + println!("reached something that should never happen!!!!"); + return vec![]; + }; + + let node: LogicalProjection = LogicalProjection::new( + child, + res_exprs, + ); + vec![node.into_rel_node().as_ref().clone()] +} diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs b/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs new file mode 100644 index 00000000..f4627210 --- /dev/null +++ b/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs @@ -0,0 +1,119 @@ +use crate::plan_nodes::{ColumnRefExpr, Expr, ExprList, OptRelNode}; + +/// This struct holds the mapping from original columns to projected columns. +/// +/// # Example +/// With the following plan: +/// | Filter (#0 < 5) +/// | +/// |-| Projection [#2, #3] +/// |- Scan [#0, #1, #2, #3] +/// +/// The computed projection mapping is: +/// #2 -> #0 +/// #3 -> #1 +#[derive(Clone, Debug)] +pub struct ProjectionMapping { + forward: Vec, + backward: Vec>, +} + +impl ProjectionMapping { + // forward vec is mapped output schema -> col refs + // backward vec is mapped col refs -> output schema + pub fn build(exprs: &ExprList) -> Option { + let mut forward = vec![]; + let mut backward = vec![]; + for (i, expr) in exprs.to_vec().iter().enumerate() { + let col_expr = ColumnRefExpr::from_rel_node(expr.clone().into_rel_node())?; + let col_idx = col_expr.index(); + forward.push(col_idx); + if col_idx >= backward.len() { + backward.resize(col_idx+1, None); + } + backward[col_idx] = Some(i); + } + Some(Self { forward, backward }) + } + + pub fn projection_col_maps_to(&self, col_idx: usize) -> Option { + self.forward.get(col_idx).copied() + } + + pub fn original_col_maps_to(&self, col_idx: usize) -> Option { + self.backward.get(col_idx).copied().flatten() + } + + /// Remaps all column refs in the join condition based on a + /// removed bottom projection node + /// + /// removed node: + /// Join { cond: #0=#5 } + /// Projection { exprs: [#1, #0, #3, #5, #4] } --> has mapping + /// ----> + /// Join { cond: #1=#4 } + pub fn rewrite_join_cond(&self, cond: Expr, child_schema_len: usize) -> Expr { + let schema_size = self.forward.len(); + cond.rewrite_column_refs(&|col_idx| { + if col_idx < schema_size { + self.projection_col_maps_to(col_idx) + } else { + Some(col_idx - schema_size + child_schema_len) + } + }).unwrap() + } + + /// Remaps all column refs in the filter condition based on an added or + /// removed bottom projection node + /// + /// added node: + /// Filter { cond: #1=0 and #4=1 } + /// ----> + /// Filter { cond: #0=0 and #5=1 } + /// Projection { exprs: [#1, #0, #3, #5, #4] } --> has mapping + /// + /// removed node: + /// Filter { cond: #0=0 and #5=1 } + /// Projection { exprs: [#1, #0, #3, #5, #4] } --> has mapping + /// ----> + /// Filter { cond: #1=0 and #4=1 } + pub fn rewrite_filter_cond(&self, cond: Expr, is_added: bool) -> Expr { + cond.rewrite_column_refs(&|col_idx| { + if is_added { + self.original_col_maps_to(col_idx) + } else { + self.projection_col_maps_to(col_idx) + } + }).unwrap() + } + + /// If the top projection node is mapped, rewrites the bottom projection's + /// exprs based on the top projection's mapped col refs. + /// + /// If the bottom projection node is mapped, rewrites the top projection's + /// exprs based on the bottom projection's mapped col refs. + /// + /// Projection { exprs: [#1, #0] } + /// Projection { exprs: [#0, #2] } + /// ----> + /// Projection { exprs: [#2, #0] } + pub fn rewrite_projection(&self, exprs: &ExprList, is_top_mapped: bool) -> Option { + if exprs.len() == 0 { + return None; + } + let mut new_projection_exprs = Vec::new(); + if is_top_mapped { + let exprs = exprs.to_vec(); + for i in &self.forward { + new_projection_exprs.push(exprs[*i].clone()); + }; + } else { + for i in 0..exprs.len() { + let col_idx = self.projection_col_maps_to(i).unwrap(); + let col: Expr = ColumnRefExpr::new(col_idx).into_expr(); + new_projection_exprs.push(col); + }; + } + Some(ExprList::new(new_projection_exprs)) + } +} diff --git a/optd-datafusion-repr/src/rules/projection_pushdown.rs b/optd-datafusion-repr/src/rules/projection_pushdown.rs deleted file mode 100644 index 5857f1e7..00000000 --- a/optd-datafusion-repr/src/rules/projection_pushdown.rs +++ /dev/null @@ -1,145 +0,0 @@ -use std::collections::HashMap; -use std::sync::Arc; - -use optd_core::rules::{Rule, RuleMatcher}; -use optd_core::{optimizer::Optimizer, rel_node::RelNode}; - -use crate::plan_nodes::{ - BetweenExpr, ColumnRefExpr, Expr, ExprList, LikeExpr, LogOpExpr, LogicalFilter, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode -}; -use crate::properties::column_ref::ColumnRef; -use crate::properties::schema::SchemaPropertyBuilder; - -use super::macros::define_rule; - -fn merge_exprs(first: ExprList, second: ExprList) -> ExprList { - let mut res_vec = first.to_vec(); - res_vec.extend(second.to_vec()); - ExprList::new(res_vec) -} - -// projects away aggregate calls that are not used -// TODO -define_rule!( - ProjectAggregatePushDown, - apply_projection_agg_pushdown, - ( - Projection, - (Agg, child, [agg_exprs], [agg_groups]), - [exprs] - ) -); - -fn apply_projection_agg_pushdown( - _optimizer: &impl Optimizer, - ProjectAggregatePushDownPicks { child, agg_exprs, agg_groups, exprs }: ProjectAggregatePushDownPicks, -) -> Vec> { - - - - vec![] -} - -// pushes projections through filters -// adds a projection node after a filter node -// only keeping necessary columns (proj node exprs + filter col exprs)) -// TODO -define_rule!( - ProjectFilterTransposeRule, - apply_projection_filter_transpose, - ( - Projection, - (Filter, child, [cond]), - [exprs] - ) -); - -fn apply_projection_filter_transpose( - _optimizer: &impl Optimizer, - ProjectFilterTransposeRulePicks { child, cond, exprs }: ProjectFilterTransposeRulePicks, -) -> Vec> { - // get columns out of cond - let exprs = ExprList::from_rel_node(exprs.into()).unwrap(); - let exprs_vec = exprs.clone().to_vec(); - let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); - let cond_col_refs = cond_as_expr.get_column_refs(); - let mut dedup_cond_col_refs = Vec::new(); - - for i in 0..cond_col_refs.len() { - if !exprs_vec.contains(&cond_col_refs[i]) { - dedup_cond_col_refs.push(cond_col_refs[i].clone()); - }; - }; - - let dedup_cond_col_refs = ExprList::new(dedup_cond_col_refs); - - let bottom_proj_exprs: ExprList = merge_exprs(exprs.clone(), dedup_cond_col_refs.clone()); - let Some(mapping) = LogicalProjection::compute_column_mapping(&bottom_proj_exprs) else { - return vec![]; - }; - - let child = PlanNode::from_group(child.into()); - let new_filter_cond: Expr = mapping.reverse_rewrite_condition(cond_as_expr.clone()); - let bottom_proj_node = LogicalProjection::new(child, bottom_proj_exprs); - let new_filter_node = LogicalFilter::new(bottom_proj_node.into_plan_node(), new_filter_cond); - - if dedup_cond_col_refs.is_empty() { - // can push proj past filter and remove top proj node - return vec![new_filter_node.into_rel_node().as_ref().clone()]; - } - - // have column ref expressions of cond cols - // bottom-most projection will have proj cols + filter cols as a set - let top_proj_exprs = mapping.reverse_rewrite_projection(&exprs); - let top_proj_node = LogicalProjection::new(new_filter_node.into_plan_node(), top_proj_exprs); - vec![top_proj_node.into_rel_node().as_ref().clone()] -} - - -// test cases for project merge -// create table t1 (v1 int, v2 int); -// explain select v1,v2 from (select v1,v2 from t1); - -// create table t3 (v1 int, v2 int, v3 int); -// explain select v2,v3 from (select v1,v3,v2 from t3); -// explain select v1,v2,v3 from (select v1,v3,v2 from t3); - -// Proj (Proj A) -> Proj A -// merges/removes projections -define_rule!( - ProjectMergeRule, - apply_projection_merge, - ( - Projection, - (Projection, child, [exprs2]), - [exprs1] - ) -); - -fn apply_projection_merge( - _optimizer: &impl Optimizer, - ProjectMergeRulePicks { child, exprs1, exprs2 }: ProjectMergeRulePicks, -) -> Vec> { - let child = PlanNode::from_group(child.into()); - let exprs1 = ExprList::from_rel_node(exprs1.into()).unwrap(); - let exprs2 = ExprList::from_rel_node(exprs2.into()).unwrap(); - - let Some(mapping) = LogicalProjection::compute_column_mapping(&exprs1) else { - return vec![]; - }; - - let Some(res_exprs) = mapping.rewrite_projection(&exprs2) else { - let node: LogicalProjection = LogicalProjection::new( - child, - exprs1, - ); - println!("reached something that should never happen!!!!"); - return vec![node.into_rel_node().as_ref().clone()]; - }; - - let node: LogicalProjection = LogicalProjection::new( - child, - res_exprs, - ); - vec![node.into_rel_node().as_ref().clone()] -} From fc18944ead8874929846e66687609f794d73ee24 Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Mon, 15 Apr 2024 23:31:30 -0400 Subject: [PATCH 51/61] project merge as default hueristic --- optd-datafusion-repr/src/lib.rs | 6 +- .../rules/project_transpose/project_merge.rs | 4 +- .../project_transpose_common.rs | 3 + optd-sqlplannertest/tests/tpch.planner.sql | 220 +++++++++++++++--- 4 files changed, 201 insertions(+), 32 deletions(-) diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index bcf2f6bb..27cf4787 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -88,6 +88,7 @@ impl DatafusionOptimizer { Arc::new(EliminateDuplicatedSortExprRule::new()), Arc::new(EliminateDuplicatedAggExprRule::new()), Arc::new(ProjectMergeRule::new()), + Arc::new(FilterMergeRule::new()), ] } @@ -98,9 +99,7 @@ impl DatafusionOptimizer { for rule in rules { rule_wrappers.push(RuleWrapper::new_cascades(rule)); } - // rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( - // ProjectMergeRule::new(), - // ))); + // project transpose rules rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( ProjectFilterTransposeRule::new(), ))); @@ -108,7 +107,6 @@ impl DatafusionOptimizer { rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( FilterProjectTransposeRule::new(), ))); - rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new(FilterMergeRule::new()))); rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( FilterCrossJoinTransposeRule::new(), ))); diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs b/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs index c77d674a..5254522f 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs @@ -42,13 +42,15 @@ fn apply_projection_merge( }; let Some(res_exprs) = mapping.rewrite_projection(&exprs2, true) else { - println!("reached something that should never happen!!!!"); return vec![]; }; + // println!("res_exprs: {:?}\n exprs1: {:?}\n child: {:?}\n exprs2: {:?}\n", res_exprs, exprs1, child, exprs2); + let node: LogicalProjection = LogicalProjection::new( child, res_exprs, ); + vec![node.into_rel_node().as_ref().clone()] } diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs b/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs index f4627210..7236a2ad 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs @@ -103,6 +103,9 @@ impl ProjectionMapping { } let mut new_projection_exprs = Vec::new(); if is_top_mapped { + if exprs.len() > self.forward.len() { + return None; + } let exprs = exprs.to_vec(); for i in &self.forward { new_projection_exprs.push(exprs[*i].clone()); diff --git a/optd-sqlplannertest/tests/tpch.planner.sql b/optd-sqlplannertest/tests/tpch.planner.sql index 8497c26d..3cb76238 100644 --- a/optd-sqlplannertest/tests/tpch.planner.sql +++ b/optd-sqlplannertest/tests/tpch.planner.sql @@ -1424,6 +1424,171 @@ PhysicalLimit { skip: 0, fetch: 20 } └── PhysicalScan { table: nation } */ +-- TPC-H Q11 +select + ps_partkey, + sum(ps_supplycost * ps_availqty) as value +from + partsupp, + supplier, + nation +where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'CHINA' +group by + ps_partkey having + sum(ps_supplycost * ps_availqty) > ( + select + sum(ps_supplycost * ps_availqty) * 0.0001000000 + from + partsupp, + supplier, + nation + where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'CHINA' + ) +order by + value desc; + +/* +LogicalSort +├── exprs:SortOrder { order: Desc } +│ └── #1 +└── LogicalProjection { exprs: [ #0, #1 ] } + └── LogicalJoin + ├── join_type: Inner + ├── cond:Gt + │ ├── Cast { cast_to: Decimal128(38, 15), expr: #1 } + │ └── #2 + ├── LogicalAgg + │ ├── exprs:Agg(Sum) + │ │ └── Mul + │ │ ├── #2 + │ │ └── Cast { cast_to: Decimal128(10, 0), expr: #1 } + │ ├── groups: [ #0 ] + │ └── LogicalProjection { exprs: [ #0, #1, #2 ] } + │ └── LogicalJoin + │ ├── join_type: Inner + │ ├── cond:Eq + │ │ ├── #3 + │ │ └── #4 + │ ├── LogicalProjection { exprs: [ #0, #2, #3, #5 ] } + │ │ └── LogicalJoin + │ │ ├── join_type: Inner + │ │ ├── cond:Eq + │ │ │ ├── #1 + │ │ │ └── #4 + │ │ ├── LogicalProjection { exprs: [ #0, #1, #2, #3 ] } + │ │ │ └── LogicalScan { table: partsupp } + │ │ └── LogicalProjection { exprs: [ #0, #3 ] } + │ │ └── LogicalScan { table: supplier } + │ └── LogicalProjection { exprs: [ #0 ] } + │ └── LogicalFilter + │ ├── cond:Eq + │ │ ├── #1 + │ │ └── "CHINA" + │ └── LogicalProjection { exprs: [ #0, #1 ] } + │ └── LogicalScan { table: nation } + └── LogicalProjection + ├── exprs:Cast + │ ├── cast_to: Decimal128(38, 15) + │ ├── expr:Mul + │ │ ├── Cast { cast_to: Float64, expr: #0 } + │ │ └── 0.0001 + + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── Mul + │ ├── #1 + │ └── Cast { cast_to: Decimal128(10, 0), expr: #0 } + ├── groups: [] + └── LogicalProjection { exprs: [ #0, #1 ] } + └── LogicalJoin + ├── join_type: Inner + ├── cond:Eq + │ ├── #2 + │ └── #3 + ├── LogicalProjection { exprs: [ #1, #2, #4 ] } + │ └── LogicalJoin + │ ├── join_type: Inner + │ ├── cond:Eq + │ │ ├── #0 + │ │ └── #3 + │ ├── LogicalProjection { exprs: [ #1, #2, #3 ] } + │ │ └── LogicalScan { table: partsupp } + │ └── LogicalProjection { exprs: [ #0, #3 ] } + │ └── LogicalScan { table: supplier } + └── LogicalProjection { exprs: [ #0 ] } + └── LogicalFilter + ├── cond:Eq + │ ├── #1 + │ └── "CHINA" + └── LogicalProjection { exprs: [ #0, #1 ] } + └── LogicalScan { table: nation } +PhysicalSort +├── exprs:SortOrder { order: Desc } +│ └── #1 +└── PhysicalProjection { exprs: [ #0, #1 ] } + └── PhysicalProjection { exprs: [ #0, #1 ] } + └── PhysicalNestedLoopJoin + ├── join_type: Inner + ├── cond:Gt + │ ├── Cast { cast_to: Decimal128(38, 15), expr: #1 } + │ └── #0 + ├── PhysicalProjection + │ ├── exprs:Cast + │ │ ├── cast_to: Decimal128(38, 15) + │ │ ├── expr:Mul + │ │ │ ├── Cast { cast_to: Float64, expr: #0 } + │ │ │ └── 0.0001 + + │ └── PhysicalAgg + │ ├── aggrs:Agg(Sum) + │ │ └── Mul + │ │ ├── #1 + │ │ └── Cast { cast_to: Decimal128(10, 0), expr: #0 } + │ ├── groups: [] + │ └── PhysicalProjection { exprs: [ #0, #1 ] } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } + │ ├── PhysicalProjection { exprs: [ #1, #2, #4 ] } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ │ ├── PhysicalProjection { exprs: [ #1, #2, #3 ] } + │ │ │ └── PhysicalScan { table: partsupp } + │ │ └── PhysicalProjection { exprs: [ #0, #3 ] } + │ │ └── PhysicalScan { table: supplier } + │ └── PhysicalProjection { exprs: [ #0 ] } + │ └── PhysicalProjection { exprs: [ #0, #1 ] } + │ └── PhysicalFilter + │ ├── cond:Eq + │ │ ├── #1 + │ │ └── "CHINA" + │ └── PhysicalScan { table: nation } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── Mul + │ ├── #2 + │ └── Cast { cast_to: Decimal128(10, 0), expr: #1 } + ├── groups: [ #0 ] + └── PhysicalProjection { exprs: [ #0, #1, #2 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } + ├── PhysicalProjection { exprs: [ #0, #2, #3, #5 ] } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } + │ ├── PhysicalProjection { exprs: [ #0, #1, #2, #3 ] } + │ │ └── PhysicalScan { table: partsupp } + │ └── PhysicalProjection { exprs: [ #0, #3 ] } + │ └── PhysicalScan { table: supplier } + └── PhysicalProjection { exprs: [ #0 ] } + └── PhysicalProjection { exprs: [ #0, #1 ] } + └── PhysicalFilter + ├── cond:Eq + │ ├── #1 + │ └── "CHINA" + └── PhysicalScan { table: nation } +*/ + -- TPC-H Q12 SELECT l_shipmode, @@ -1748,36 +1913,37 @@ PhysicalSort ├── exprs:SortOrder { order: Asc } │ └── #0 └── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4 ] } - └── PhysicalProjection { exprs: [ #0, #1, #2, #3, #5 ] } + └── PhysicalProjection { exprs: [ #0, #1, #2, #3, #5, #6 ] } └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } ├── PhysicalProjection { exprs: [ #0, #1, #2, #4 ] } │ └── PhysicalScan { table: supplier } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } - ├── PhysicalProjection { exprs: [ #0, #1 ] } - │ └── PhysicalAgg - │ ├── aggrs:Agg(Sum) - │ │ └── Mul - │ │ ├── #1 - │ │ └── Sub - │ │ ├── 1 - │ │ └── #2 - │ ├── groups: [ #0 ] - │ └── PhysicalProjection { exprs: [ #0, #1, #2 ] } - │ └── PhysicalProjection { exprs: [ #2, #5, #6, #10 ] } - │ └── PhysicalFilter - │ ├── cond:And - │ │ ├── Geq - │ │ │ ├── #10 - │ │ │ └── 8401 - │ │ └── Lt - │ │ ├── #10 - │ │ └── 8491 - │ └── PhysicalScan { table: lineitem } - └── PhysicalAgg - ├── aggrs:Agg(Max) - │ └── [ #0 ] - ├── groups: [] - └── PhysicalProjection { exprs: [ #1 ] } + └── PhysicalProjection { exprs: [ #0, #1, #2 ] } + └── PhysicalProjection { exprs: [ #1, #2, #0 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } + ├── PhysicalAgg + │ ├── aggrs:Agg(Max) + │ │ └── [ #0 ] + │ ├── groups: [] + │ └── PhysicalProjection { exprs: [ #1 ] } + │ └── PhysicalAgg + │ ├── aggrs:Agg(Sum) + │ │ └── Mul + │ │ ├── #1 + │ │ └── Sub + │ │ ├── 1 + │ │ └── #2 + │ ├── groups: [ #0 ] + │ └── PhysicalProjection { exprs: [ #0, #1, #2 ] } + │ └── PhysicalProjection { exprs: [ #2, #5, #6, #10 ] } + │ └── PhysicalFilter + │ ├── cond:And + │ │ ├── Geq + │ │ │ ├── #10 + │ │ │ └── 8401 + │ │ └── Lt + │ │ ├── #10 + │ │ └── 8491 + │ └── PhysicalScan { table: lineitem } └── PhysicalAgg ├── aggrs:Agg(Sum) │ └── Mul From e92271d4d197b55bba37fa6729d4eeb212a004c7 Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Mon, 15 Apr 2024 23:52:56 -0400 Subject: [PATCH 52/61] move filter_project test cases --- .../src/rules/filter_pushdown.rs | 80 +-------------- .../filter_project_transpose.rs | 97 ++++++++++++++++++- .../project_agg_transpose.rs | 3 - .../project_filter_transpose.rs | 3 +- .../rules/project_transpose/project_merge.rs | 9 -- 5 files changed, 98 insertions(+), 94 deletions(-) diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 4d0d6c49..ee5ec6ad 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -421,7 +421,7 @@ mod tests { use crate::{ plan_nodes::{ BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, - LogicalAgg, LogicalFilter, LogicalJoin, LogicalProjection, LogicalScan, LogicalSort, + LogicalAgg, LogicalFilter, LogicalJoin, LogicalScan, LogicalSort, OptRelNode, OptRelNodeTyp, }, rules::{ @@ -508,84 +508,6 @@ mod tests { assert_eq!(col_4.value().as_i32(), 1); } - // #[test] - // fn push_past_proj_basic() { - // let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); - - // let scan = LogicalScan::new("customer".into()); - // let proj = LogicalProjection::new(scan.into_plan_node(), ExprList::new(vec![])); - - // let filter_expr = BinOpExpr::new( - // ColumnRefExpr::new(0).into_expr(), - // ConstantExpr::int32(5).into_expr(), - // BinOpType::Eq, - // ) - // .into_expr(); - - // let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr); - // let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); - - // assert_eq!(plan.typ, OptRelNodeTyp::Projection); - // assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); - // } - - // #[test] - // fn push_past_proj_adv() { - // let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); - - // let scan = LogicalScan::new("customer".into()); - // let proj = LogicalProjection::new( - // scan.into_plan_node(), - // ExprList::new(vec![ - // ColumnRefExpr::new(0).into_expr(), - // ColumnRefExpr::new(4).into_expr(), - // ColumnRefExpr::new(5).into_expr(), - // ColumnRefExpr::new(7).into_expr(), - // ]), - // ); - - // let filter_expr = LogOpExpr::new( - // LogOpType::And, - // ExprList::new(vec![ - // BinOpExpr::new( - // // This one should be pushed to the left child - // ColumnRefExpr::new(1).into_expr(), - // ConstantExpr::int32(5).into_expr(), - // BinOpType::Eq, - // ) - // .into_expr(), - // BinOpExpr::new( - // // This one should be pushed to the right child - // ColumnRefExpr::new(3).into_expr(), - // ConstantExpr::int32(6).into_expr(), - // BinOpType::Eq, - // ) - // .into_expr(), - // ]), - // ); - - // let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr.into_expr()); - - // let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); - - // assert!(matches!(plan.typ, OptRelNodeTyp::Projection)); - // let plan_filter = LogicalFilter::from_rel_node(plan.child(0)).unwrap(); - // assert!(matches!(plan_filter.0.typ(), OptRelNodeTyp::Filter)); - // let plan_filter_expr = - // LogOpExpr::from_rel_node(plan_filter.cond().into_rel_node()).unwrap(); - // assert!(matches!(plan_filter_expr.op_type(), LogOpType::And)); - // let op_0 = BinOpExpr::from_rel_node(plan_filter_expr.children()[0].clone().into_rel_node()) - // .unwrap(); - // let col_0 = - // ColumnRefExpr::from_rel_node(op_0.left_child().clone().into_rel_node()).unwrap(); - // assert_eq!(col_0.index(), 4); - // let op_1 = BinOpExpr::from_rel_node(plan_filter_expr.children()[1].clone().into_rel_node()) - // .unwrap(); - // let col_1 = - // ColumnRefExpr::from_rel_node(op_1.left_child().clone().into_rel_node()).unwrap(); - // assert_eq!(col_1.index(), 7); - // } - #[test] fn push_past_join_conjunction() { // Test pushing a complex filter past a join, where one clause can diff --git a/optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs index 0d4bf760..f29525f8 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs @@ -22,7 +22,7 @@ define_rule!( /// Calcite only checks if the projection contains a windowing calculation /// We check neither of those things and do it always (which may be wrong) fn apply_filter_project_transpose( - optimizer: &impl Optimizer, + _optimizer: &impl Optimizer, FilterProjectTransposeRulePicks { child, exprs, cond }: FilterProjectTransposeRulePicks, ) -> Vec> { let child = PlanNode::from_group(child.into()); @@ -35,4 +35,99 @@ fn apply_filter_project_transpose( let new_filter_node = LogicalFilter::new(child, rewritten_cond); let new_proj = LogicalProjection::new(new_filter_node.into_plan_node(), exprs); vec![new_proj.into_rel_node().as_ref().clone()] +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use optd_core::optimizer::Optimizer; + + use crate::{ + plan_nodes::{ + BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, + LogicalFilter, LogicalProjection, LogicalScan, OptRelNode, OptRelNodeTyp + }, + rules::FilterProjectTransposeRule, + testing::new_test_optimizer, + }; + + #[test] + fn push_past_proj_basic() { + let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); + + let scan = LogicalScan::new("customer".into()); + let proj = LogicalProjection::new(scan.into_plan_node(), ExprList::new(vec![])); + + let filter_expr = BinOpExpr::new( + ColumnRefExpr::new(0).into_expr(), + ConstantExpr::int32(5).into_expr(), + BinOpType::Eq, + ) + .into_expr(); + + let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr); + let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); + + assert_eq!(plan.typ, OptRelNodeTyp::Projection); + assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); + } + + #[test] + fn push_past_proj_adv() { + let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); + + let scan = LogicalScan::new("customer".into()); + let proj = LogicalProjection::new( + scan.into_plan_node(), + ExprList::new(vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(5).into_expr(), + ColumnRefExpr::new(7).into_expr(), + ]), + ); + + let filter_expr = LogOpExpr::new( + LogOpType::And, + ExprList::new(vec![ + BinOpExpr::new( + // This one should be pushed to the left child + ColumnRefExpr::new(1).into_expr(), + ConstantExpr::int32(5).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + BinOpExpr::new( + // This one should be pushed to the right child + ColumnRefExpr::new(3).into_expr(), + ConstantExpr::int32(6).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + ]), + ); + + let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr.into_expr()); + + let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); + + assert!(matches!(plan.typ, OptRelNodeTyp::Projection)); + let plan_filter = LogicalFilter::from_rel_node(plan.child(0)).unwrap(); + assert!(matches!(plan_filter.0.typ(), OptRelNodeTyp::Filter)); + let plan_filter_expr = + LogOpExpr::from_rel_node(plan_filter.cond().into_rel_node()).unwrap(); + assert!(matches!(plan_filter_expr.op_type(), LogOpType::And)); + let op_0 = BinOpExpr::from_rel_node(plan_filter_expr.children()[0].clone().into_rel_node()) + .unwrap(); + let col_0 = + ColumnRefExpr::from_rel_node(op_0.left_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_0.index(), 4); + let op_1 = BinOpExpr::from_rel_node(plan_filter_expr.children()[1].clone().into_rel_node()) + .unwrap(); + let col_1 = + ColumnRefExpr::from_rel_node(op_1.left_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_1.index(), 7); + } + } \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_agg_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_agg_transpose.rs index 277bc0c7..14fd690a 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_agg_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_agg_transpose.rs @@ -14,8 +14,5 @@ fn apply_projection_agg_pushdown( _optimizer: &impl Optimizer, ProjectAggregatePushDownPicks { child, agg_exprs, agg_groups, exprs }: ProjectAggregatePushDownPicks, ) -> Vec> { - - - vec![] } \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs index 9f550aa3..8a2aa758 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs @@ -17,8 +17,7 @@ fn merge_exprs(first: ExprList, second: ExprList) -> ExprList { // pushes projections through filters // adds a projection node after a filter node -// only keeping necessary columns (proj node exprs + filter col exprs)) -// TODO +// only keeping necessary columns (proj node exprs + filter col exprs) define_rule!( ProjectFilterTransposeRule, apply_projection_filter_transpose, diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs b/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs index 5254522f..dc109a8e 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs @@ -9,13 +9,6 @@ use crate::plan_nodes::{ use crate::rules::macros::define_rule; use super::project_transpose_common::ProjectionMapping; -// test cases for project merge -// create table t1 (v1 int, v2 int); -// explain select v1,v2 from (select v1,v2 from t1); - -// create table t3 (v1 int, v2 int, v3 int); -// explain select v2,v3 from (select v1,v3,v2 from t3); -// explain select v1,v2,v3 from (select v1,v3,v2 from t3); // Proj (Proj A) -> Proj A // merges/removes projections @@ -45,8 +38,6 @@ fn apply_projection_merge( return vec![]; }; - // println!("res_exprs: {:?}\n exprs1: {:?}\n child: {:?}\n exprs2: {:?}\n", res_exprs, exprs1, child, exprs2); - let node: LogicalProjection = LogicalProjection::new( child, res_exprs, From 37bfc37fe401b995ff1640751adcf5c7aa09a0d2 Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Wed, 17 Apr 2024 14:03:29 -0400 Subject: [PATCH 53/61] project filter unit tests --- .../src/plan_nodes/projection.rs | 2 +- .../filter_project_transpose.rs | 2 +- .../project_filter_transpose.rs | 266 +++++++++++++++++- .../project_transpose_common.rs | 8 +- .../src/testing/tpch_catalog.rs | 21 ++ 5 files changed, 293 insertions(+), 6 deletions(-) diff --git a/optd-datafusion-repr/src/plan_nodes/projection.rs b/optd-datafusion-repr/src/plan_nodes/projection.rs index 1f6b6279..3898733d 100644 --- a/optd-datafusion-repr/src/plan_nodes/projection.rs +++ b/optd-datafusion-repr/src/plan_nodes/projection.rs @@ -1,7 +1,7 @@ use super::expr::ExprList; use super::macros::define_plan_node; -use super::{ColumnRefExpr, Expr, OptRelNode, OptRelNodeRef, OptRelNodeTyp, PlanNode}; +use super::{OptRelNode, OptRelNodeRef, OptRelNodeTyp, PlanNode}; #[derive(Clone, Debug)] pub struct LogicalProjection(pub PlanNode); diff --git a/optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs index f29525f8..27a3498c 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs @@ -57,7 +57,7 @@ mod tests { let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); let scan = LogicalScan::new("customer".into()); - let proj = LogicalProjection::new(scan.into_plan_node(), ExprList::new(vec![])); + let proj = LogicalProjection::new(scan.into_plan_node(), ExprList::new(vec![ColumnRefExpr::new(0).into_expr()])); let filter_expr = BinOpExpr::new( ColumnRefExpr::new(0).into_expr(), diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs index 8a2aa758..978a4c03 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs @@ -52,7 +52,7 @@ fn apply_projection_filter_transpose( return vec![]; }; - let child = PlanNode::from_group(child.into()); + let child: PlanNode = PlanNode::from_group(child.into()); let new_filter_cond: Expr = mapping.rewrite_filter_cond(cond_as_expr.clone(), true); let bottom_proj_node = LogicalProjection::new(child, bottom_proj_exprs); let new_filter_node = LogicalFilter::new(bottom_proj_node.into_plan_node(), new_filter_cond); @@ -69,4 +69,268 @@ fn apply_projection_filter_transpose( }; let top_proj_node = LogicalProjection::new(new_filter_node.into_plan_node(), top_proj_exprs); vec![top_proj_node.into_rel_node().as_ref().clone()] +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use optd_core::optimizer::Optimizer; + + use crate::{ + plan_nodes::{ + BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, + LogicalFilter, LogicalProjection, LogicalScan, OptRelNode, OptRelNodeTyp + }, + rules::ProjectFilterTransposeRule, + testing::new_test_optimizer, + }; + + #[test] + fn push_proj_past_filter_basic_1() { + // convert proj -> filter -> scan to filter -> proj -> scan + // happens when all filter expr col refs are in proj exprs + let mut test_optimizer = new_test_optimizer(Arc::new(ProjectFilterTransposeRule::new())); + + let scan = LogicalScan::new("customer".into()); + + let filter_expr = BinOpExpr::new( + ColumnRefExpr::new(0).into_expr(), + ConstantExpr::int32(5).into_expr(), + BinOpType::Eq, + ) + .into_expr(); + + let filter = LogicalFilter::new(scan.into_plan_node(), filter_expr); + + let proj_exprs = ExprList::new( + vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr() + ] + ); + + let proj = LogicalProjection::new(filter.into_plan_node(), proj_exprs.clone()); + + let plan = test_optimizer.optimize(proj.into_rel_node()).unwrap(); + + let res_filter_expr = BinOpExpr::new( + ColumnRefExpr::new(1).into_expr(), + ConstantExpr::int32(5).into_expr(), + BinOpType::Eq, + ) + .into_expr().into_rel_node(); + + assert_eq!(plan.child(1), res_filter_expr); + assert_eq!(plan.typ, OptRelNodeTyp::Filter); + assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Projection)); + assert_eq!(plan.child(0).child(1), proj_exprs.into_rel_node()); + assert!(matches!(plan.child(0).child(0).typ, OptRelNodeTyp::Scan)); + } + + #[test] + fn push_proj_past_filter_basic_2() { + // convert proj -> filter -> scan to filter -> proj -> scan + // happens when all filter expr col refs are NOT in proj exprs + + let mut test_optimizer = new_test_optimizer(Arc::new(ProjectFilterTransposeRule::new())); + + let scan = LogicalScan::new("region".into()); + + let filter_expr = BinOpExpr::new( + ColumnRefExpr::new(2).into_expr(), + ConstantExpr::int32(5).into_expr(), + BinOpType::Eq, + ) + .into_expr(); + + let filter = LogicalFilter::new(scan.into_plan_node(), filter_expr); + + let proj_exprs = ExprList::new( + vec![ + ColumnRefExpr::new(1).into_expr(), + ] + ); + + let res_filter_expr: Arc> = BinOpExpr::new( + ColumnRefExpr::new(1).into_expr(), + ConstantExpr::int32(5).into_expr(), + BinOpType::Eq, + ) + .into_expr().into_rel_node(); + + let res_top_proj_exprs: Arc> = ExprList::new( + vec![ + ColumnRefExpr::new(0).into_expr(), + ] + ).into_rel_node(); + + let res_bot_proj_exprs = ExprList::new( + vec![ + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(2).into_expr(), + ] + ).into_rel_node(); + + let proj = LogicalProjection::new(filter.into_plan_node(), proj_exprs); + + let plan = test_optimizer.optimize(proj.into_rel_node()).unwrap(); + + assert_eq!(plan.typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(1), res_top_proj_exprs); + + assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); + assert_eq!(plan.child(0).child(1), res_filter_expr); + + assert!(matches!(plan.child(0).child(0).typ, OptRelNodeTyp::Projection)); + assert_eq!(plan.child(0).child(0).child(1), res_bot_proj_exprs); + + assert!(matches!(plan.child(0).child(0).child(0).typ, OptRelNodeTyp::Scan)); + } + + #[test] + fn push_proj_past_filter_adv_1() { + let mut test_optimizer: optd_core::heuristics::HeuristicsOptimizer = new_test_optimizer(Arc::new(ProjectFilterTransposeRule::new())); + + let scan = LogicalScan::new("customer".into()); + + let filter_expr = LogOpExpr::new( + LogOpType::And, + ExprList::new(vec![ + BinOpExpr::new( + ColumnRefExpr::new(5).into_expr(), + ConstantExpr::int32(3).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + BinOpExpr::new( + ConstantExpr::int32(6).into_expr(), + ColumnRefExpr::new(0).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + ]), + ).into_expr(); + + let filter = LogicalFilter::new(scan.into_plan_node(), filter_expr); + let proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(5).into_expr(), + ColumnRefExpr::new(7).into_expr(), + ]); + + let proj = LogicalProjection::new( + filter.into_plan_node(), + proj_exprs.clone(), + ).into_rel_node(); + + let plan = test_optimizer.optimize(proj).unwrap(); + + let res_filter_expr = LogOpExpr::new( + LogOpType::And, + ExprList::new(vec![ + BinOpExpr::new( + ColumnRefExpr::new(2).into_expr(), + ConstantExpr::int32(3).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + BinOpExpr::new( + ConstantExpr::int32(6).into_expr(), + ColumnRefExpr::new(0).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + ]), + ).into_expr().into_rel_node(); + + assert!(matches!(plan.typ, OptRelNodeTyp::Filter)); + assert_eq!(plan.child(1), res_filter_expr); + + assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Projection)); + assert_eq!(plan.child(0).child(1), proj_exprs.into_rel_node()); + } + + #[test] + fn push_proj_past_filter_adv_2() { + let mut test_optimizer: optd_core::heuristics::HeuristicsOptimizer = new_test_optimizer(Arc::new(ProjectFilterTransposeRule::new())); + + let scan = LogicalScan::new("customer".into()); + + let filter_expr = LogOpExpr::new( + LogOpType::And, + ExprList::new(vec![ + BinOpExpr::new( + ColumnRefExpr::new(5).into_expr(), + ConstantExpr::int32(3).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + BinOpExpr::new( + ConstantExpr::int32(6).into_expr(), + ColumnRefExpr::new(2).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + ]), + ).into_expr(); + + let filter = LogicalFilter::new(scan.into_plan_node(), filter_expr); + let proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(5).into_expr(), + ColumnRefExpr::new(7).into_expr(), + ]); + + let proj = LogicalProjection::new( + filter.into_plan_node(), + proj_exprs.clone(), + ).into_rel_node(); + + let plan = test_optimizer.optimize(proj).unwrap(); + + let res_filter_expr = LogOpExpr::new( + LogOpType::And, + ExprList::new(vec![ + BinOpExpr::new( + ColumnRefExpr::new(2).into_expr(), + ConstantExpr::int32(3).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + BinOpExpr::new( + ConstantExpr::int32(6).into_expr(), + ColumnRefExpr::new(4).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + ]), + ).into_expr().into_rel_node(); + + let top_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ]).into_rel_node(); + + let bot_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(5).into_expr(), + ColumnRefExpr::new(7).into_expr(), + ColumnRefExpr::new(2).into_expr(), + ]).into_rel_node(); + + assert!(matches!(plan.typ, OptRelNodeTyp::Projection)); + assert_eq!(plan.child(1), top_proj_exprs); + + assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); + assert_eq!(plan.child(0).child(1), res_filter_expr); + + assert!(matches!(plan.child(0).child(0).typ, OptRelNodeTyp::Projection)); + assert_eq!(plan.child(0).child(0).child(1), bot_proj_exprs); + } } \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs b/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs index 7236a2ad..6ebcfba5 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs @@ -108,11 +108,13 @@ impl ProjectionMapping { } let exprs = exprs.to_vec(); for i in &self.forward { - new_projection_exprs.push(exprs[*i].clone()); + let col_idx = self.projection_col_maps_to(*i).unwrap(); + new_projection_exprs.push(exprs[col_idx].clone()); }; } else { - for i in 0..exprs.len() { - let col_idx = self.projection_col_maps_to(i).unwrap(); + for i in exprs.to_vec() { + let col_ref = ColumnRefExpr::from_rel_node(i.into_rel_node()).unwrap(); + let col_idx = self.original_col_maps_to(col_ref.index()).unwrap(); let col: Expr = ColumnRefExpr::new(col_idx).into_expr(); new_projection_exprs.push(col); }; diff --git a/optd-datafusion-repr/src/testing/tpch_catalog.rs b/optd-datafusion-repr/src/testing/tpch_catalog.rs index cc5c1389..be99c394 100644 --- a/optd-datafusion-repr/src/testing/tpch_catalog.rs +++ b/optd-datafusion-repr/src/testing/tpch_catalog.rs @@ -10,6 +10,27 @@ pub struct TpchCatalog; impl Catalog for TpchCatalog { fn get(&self, name: &str) -> Schema { match name { + "region" => { + Schema { + fields: vec![ + Field { + name: "regionkey".to_string(), + typ: ConstantType::Int32, + nullable: false, + }, + Field { + name: "name".to_string(), + typ: ConstantType::Utf8String, + nullable: false, + }, + Field { + name: "comment".to_string(), + typ: ConstantType::Utf8String, + nullable: false, + }, + ], + } + } "customer" => { // Define the schema for the "customer" table From 804ef963f722202fbb3bb24467b98edc1fb0a21f Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Wed, 17 Apr 2024 15:06:45 -0400 Subject: [PATCH 54/61] project merge tests --- .../rules/project_transpose/project_merge.rs | 172 +++++++++++++++++- .../rules/project_transpose/project_remove.rs | 2 + .../project_transpose_common.rs | 7 +- 3 files changed, 175 insertions(+), 6 deletions(-) create mode 100644 optd-datafusion-repr/src/rules/project_transpose/project_remove.rs diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs b/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs index dc109a8e..4e01e654 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs @@ -11,7 +11,7 @@ use crate::rules::macros::define_rule; use super::project_transpose_common::ProjectionMapping; // Proj (Proj A) -> Proj A -// merges/removes projections +// merges projections define_rule!( ProjectMergeRule, apply_projection_merge, @@ -45,3 +45,173 @@ fn apply_projection_merge( vec![node.into_rel_node().as_ref().clone()] } + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use optd_core::optimizer::Optimizer; + + use crate::{ + plan_nodes::{ + ColumnRefExpr, ExprList, LogicalProjection, LogicalScan, OptRelNode, OptRelNodeTyp + }, + rules::ProjectMergeRule, + testing::new_test_optimizer, + }; + + #[test] + fn proj_merge_basic() { + // convert proj -> proj -> scan to proj -> scan + let mut test_optimizer = new_test_optimizer(Arc::new(ProjectMergeRule::new())); + + let scan = LogicalScan::new("customer".into()); + + let top_proj_exprs = ExprList::new( + vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr() + ] + ); + + let bot_proj_exprs = ExprList::new( + vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr() + ] + ); + + let bot_proj = LogicalProjection::new(scan.into_plan_node(), bot_proj_exprs); + let top_proj = LogicalProjection::new(bot_proj.into_plan_node(), top_proj_exprs); + + let plan = test_optimizer.optimize(top_proj.into_rel_node()).unwrap(); + + let res_proj_exprs = ExprList::new( + vec![ + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(2).into_expr(), + ] + ).into_rel_node(); + + assert_eq!(plan.typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(1), res_proj_exprs); + assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Scan)); + } + + #[test] + fn proj_merge_adv() { + // convert proj -> proj -> proj -> scan to proj -> scan + let mut test_optimizer = new_test_optimizer(Arc::new(ProjectMergeRule::new())); + + let scan = LogicalScan::new("customer".into()); + + let proj_exprs_1 = ExprList::new( + vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(3).into_expr() + ] + ); + + let proj_exprs_2 = ExprList::new( + vec![ + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(3).into_expr() + ] + ); + + let proj_exprs_3 = ExprList::new( + vec![ + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(2).into_expr() + ] + ); + + let proj_1 = LogicalProjection::new(scan.into_plan_node(), proj_exprs_1); + let proj_2 = LogicalProjection::new(proj_1.into_plan_node(), proj_exprs_2); + let proj_3 = LogicalProjection::new(proj_2.into_plan_node(), proj_exprs_3); + + // needs to be called twice + let plan = test_optimizer.optimize(proj_3.into_rel_node()).unwrap(); + let plan = test_optimizer.optimize(plan).unwrap(); + + let res_proj_exprs = ExprList::new( + vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ] + ).into_rel_node(); + + assert_eq!(plan.typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(1), res_proj_exprs); + assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Scan)); + } + + #[test] + fn proj_merge_adv_2() { + // convert proj -> proj -> proj -> proj -> scan to proj -> scan + let mut test_optimizer = new_test_optimizer(Arc::new(ProjectMergeRule::new())); + + let scan = LogicalScan::new("customer".into()); + + let proj_exprs_1 = ExprList::new( + vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(3).into_expr() + ] + ); + + let proj_exprs_2 = ExprList::new( + vec![ + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(3).into_expr() + ] + ); + + let proj_exprs_3 = ExprList::new( + vec![ + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(2).into_expr() + ] + ); + + let proj_exprs_4 = ExprList::new( + vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(2).into_expr() + ] + ); + + let proj_1 = LogicalProjection::new(scan.into_plan_node(), proj_exprs_1); + let proj_2 = LogicalProjection::new(proj_1.into_plan_node(), proj_exprs_2); + let proj_3 = LogicalProjection::new(proj_2.into_plan_node(), proj_exprs_3); + let proj_4 = LogicalProjection::new(proj_3.into_plan_node(), proj_exprs_4); + + // needs to be called three times + let plan = test_optimizer.optimize(proj_4.into_rel_node()).unwrap(); + let plan = test_optimizer.optimize(plan).unwrap(); + let plan = test_optimizer.optimize(plan).unwrap(); + + let res_proj_exprs = ExprList::new( + vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ] + ).into_rel_node(); + + assert_eq!(plan.typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(1), res_proj_exprs); + assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Scan)); + } +} \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_remove.rs b/optd-datafusion-repr/src/rules/project_transpose/project_remove.rs new file mode 100644 index 00000000..5490307d --- /dev/null +++ b/optd-datafusion-repr/src/rules/project_transpose/project_remove.rs @@ -0,0 +1,2 @@ +// intended to remove a projection that outputs the same num of cols +// that are in scan node \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs b/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs index 6ebcfba5..affdbc27 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs @@ -103,12 +103,9 @@ impl ProjectionMapping { } let mut new_projection_exprs = Vec::new(); if is_top_mapped { - if exprs.len() > self.forward.len() { - return None; - } let exprs = exprs.to_vec(); - for i in &self.forward { - let col_idx = self.projection_col_maps_to(*i).unwrap(); + for i in 0..self.forward.len() { + let col_idx = self.projection_col_maps_to(i).unwrap(); new_projection_exprs.push(exprs[col_idx].clone()); }; } else { From facd914bd9a7f5042f05105b1c35de0b38d911f0 Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Wed, 17 Apr 2024 16:13:24 -0400 Subject: [PATCH 55/61] move filter project into project filter file --- optd-datafusion-repr/src/rules.rs | 3 +- .../src/rules/project_transpose.rs | 3 +- .../filter_project_transpose.rs | 133 ------------------ .../project_filter_transpose.rs | 115 ++++++++++++++- 4 files changed, 113 insertions(+), 141 deletions(-) delete mode 100644 optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs diff --git a/optd-datafusion-repr/src/rules.rs b/optd-datafusion-repr/src/rules.rs index 9d620e8a..d72463ab 100644 --- a/optd-datafusion-repr/src/rules.rs +++ b/optd-datafusion-repr/src/rules.rs @@ -11,9 +11,8 @@ mod project_transpose; // pub use filter_join::FilterJoinPullUpRule; pub use project_transpose::{ project_merge::ProjectMergeRule, - project_filter_transpose::ProjectFilterTransposeRule, + project_filter_transpose::{ProjectFilterTransposeRule, FilterProjectTransposeRule}, project_join_transpose::ProjectionPullUpJoin, - filter_project_transpose::FilterProjectTransposeRule, }; pub use eliminate_duplicated_expr::{ EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, diff --git a/optd-datafusion-repr/src/rules/project_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose.rs index 06a9f509..5a923743 100644 --- a/optd-datafusion-repr/src/rules/project_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose.rs @@ -1,5 +1,4 @@ pub mod project_transpose_common; pub mod project_merge; pub mod project_filter_transpose; -pub mod project_join_transpose; -pub mod filter_project_transpose; \ No newline at end of file +pub mod project_join_transpose; \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs deleted file mode 100644 index 27a3498c..00000000 --- a/optd-datafusion-repr/src/rules/project_transpose/filter_project_transpose.rs +++ /dev/null @@ -1,133 +0,0 @@ -use std::collections::HashMap; -use std::vec; - -use optd_core::rules::{Rule, RuleMatcher}; -use optd_core::{optimizer::Optimizer, rel_node::RelNode}; - -use crate::plan_nodes::{ - Expr, ExprList, LogicalFilter, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode -}; - -use crate::rules::macros::define_rule; -use crate::rules::project_transpose::project_transpose_common::ProjectionMapping; - -define_rule!( - FilterProjectTransposeRule, - apply_filter_project_transpose, - (Filter, (Projection, child, [exprs]), [cond]) -); - -/// Datafusion only pushes filter past project when the project does not contain -/// volatile (i.e. non-deterministic) expressions that are present in the filter -/// Calcite only checks if the projection contains a windowing calculation -/// We check neither of those things and do it always (which may be wrong) -fn apply_filter_project_transpose( - _optimizer: &impl Optimizer, - FilterProjectTransposeRulePicks { child, exprs, cond }: FilterProjectTransposeRulePicks, -) -> Vec> { - let child = PlanNode::from_group(child.into()); - let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); - let exprs = ExprList::from_rel_node(exprs.into()).unwrap(); - - let proj_col_map = ProjectionMapping::build(&exprs).unwrap(); - let rewritten_cond = proj_col_map.rewrite_filter_cond(cond_as_expr.clone(), false); - - let new_filter_node = LogicalFilter::new(child, rewritten_cond); - let new_proj = LogicalProjection::new(new_filter_node.into_plan_node(), exprs); - vec![new_proj.into_rel_node().as_ref().clone()] -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use optd_core::optimizer::Optimizer; - - use crate::{ - plan_nodes::{ - BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, - LogicalFilter, LogicalProjection, LogicalScan, OptRelNode, OptRelNodeTyp - }, - rules::FilterProjectTransposeRule, - testing::new_test_optimizer, - }; - - #[test] - fn push_past_proj_basic() { - let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); - - let scan = LogicalScan::new("customer".into()); - let proj = LogicalProjection::new(scan.into_plan_node(), ExprList::new(vec![ColumnRefExpr::new(0).into_expr()])); - - let filter_expr = BinOpExpr::new( - ColumnRefExpr::new(0).into_expr(), - ConstantExpr::int32(5).into_expr(), - BinOpType::Eq, - ) - .into_expr(); - - let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr); - let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); - - assert_eq!(plan.typ, OptRelNodeTyp::Projection); - assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); - } - - #[test] - fn push_past_proj_adv() { - let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); - - let scan = LogicalScan::new("customer".into()); - let proj = LogicalProjection::new( - scan.into_plan_node(), - ExprList::new(vec![ - ColumnRefExpr::new(0).into_expr(), - ColumnRefExpr::new(4).into_expr(), - ColumnRefExpr::new(5).into_expr(), - ColumnRefExpr::new(7).into_expr(), - ]), - ); - - let filter_expr = LogOpExpr::new( - LogOpType::And, - ExprList::new(vec![ - BinOpExpr::new( - // This one should be pushed to the left child - ColumnRefExpr::new(1).into_expr(), - ConstantExpr::int32(5).into_expr(), - BinOpType::Eq, - ) - .into_expr(), - BinOpExpr::new( - // This one should be pushed to the right child - ColumnRefExpr::new(3).into_expr(), - ConstantExpr::int32(6).into_expr(), - BinOpType::Eq, - ) - .into_expr(), - ]), - ); - - let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr.into_expr()); - - let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); - - assert!(matches!(plan.typ, OptRelNodeTyp::Projection)); - let plan_filter = LogicalFilter::from_rel_node(plan.child(0)).unwrap(); - assert!(matches!(plan_filter.0.typ(), OptRelNodeTyp::Filter)); - let plan_filter_expr = - LogOpExpr::from_rel_node(plan_filter.cond().into_rel_node()).unwrap(); - assert!(matches!(plan_filter_expr.op_type(), LogOpType::And)); - let op_0 = BinOpExpr::from_rel_node(plan_filter_expr.children()[0].clone().into_rel_node()) - .unwrap(); - let col_0 = - ColumnRefExpr::from_rel_node(op_0.left_child().clone().into_rel_node()).unwrap(); - assert_eq!(col_0.index(), 4); - let op_1 = BinOpExpr::from_rel_node(plan_filter_expr.children()[1].clone().into_rel_node()) - .unwrap(); - let col_1 = - ColumnRefExpr::from_rel_node(op_1.left_child().clone().into_rel_node()).unwrap(); - assert_eq!(col_1.index(), 7); - } - -} \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs index 978a4c03..8d07ee39 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::vec; use optd_core::rules::{Rule, RuleMatcher}; use optd_core::{optimizer::Optimizer, rel_node::RelNode}; @@ -15,9 +16,6 @@ fn merge_exprs(first: ExprList, second: ExprList) -> ExprList { ExprList::new(res_vec) } -// pushes projections through filters -// adds a projection node after a filter node -// only keeping necessary columns (proj node exprs + filter col exprs) define_rule!( ProjectFilterTransposeRule, apply_projection_filter_transpose, @@ -28,6 +26,9 @@ define_rule!( ) ); +/// pushes projections through filters +/// adds a projection node after a filter node +/// only keeping necessary columns (proj node exprs + filter col exprs) fn apply_projection_filter_transpose( _optimizer: &impl Optimizer, ProjectFilterTransposeRulePicks { child, cond, exprs }: ProjectFilterTransposeRulePicks, @@ -71,6 +72,32 @@ fn apply_projection_filter_transpose( vec![top_proj_node.into_rel_node().as_ref().clone()] } +define_rule!( + FilterProjectTransposeRule, + apply_filter_project_transpose, + (Filter, (Projection, child, [exprs]), [cond]) +); + +/// Datafusion only pushes filter past project when the project does not contain +/// volatile (i.e. non-deterministic) expressions that are present in the filter +/// Calcite only checks if the projection contains a windowing calculation +/// We check neither of those things and do it always (which may be wrong) +fn apply_filter_project_transpose( + _optimizer: &impl Optimizer, + FilterProjectTransposeRulePicks { child, exprs, cond }: FilterProjectTransposeRulePicks, +) -> Vec> { + let child = PlanNode::from_group(child.into()); + let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); + let exprs = ExprList::from_rel_node(exprs.into()).unwrap(); + + let proj_col_map = ProjectionMapping::build(&exprs).unwrap(); + let rewritten_cond = proj_col_map.rewrite_filter_cond(cond_as_expr.clone(), false); + + let new_filter_node = LogicalFilter::new(child, rewritten_cond); + let new_proj = LogicalProjection::new(new_filter_node.into_plan_node(), exprs); + vec![new_proj.into_rel_node().as_ref().clone()] +} + #[cfg(test)] mod tests { use std::sync::Arc; @@ -82,10 +109,11 @@ mod tests { BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, LogicalFilter, LogicalProjection, LogicalScan, OptRelNode, OptRelNodeTyp }, - rules::ProjectFilterTransposeRule, + rules::{ProjectFilterTransposeRule, FilterProjectTransposeRule}, testing::new_test_optimizer, }; + // ProjectFilterTransposeRule Tests #[test] fn push_proj_past_filter_basic_1() { // convert proj -> filter -> scan to filter -> proj -> scan @@ -333,4 +361,83 @@ mod tests { assert!(matches!(plan.child(0).child(0).typ, OptRelNodeTyp::Projection)); assert_eq!(plan.child(0).child(0).child(1), bot_proj_exprs); } + + // FilterProjectTransposeRule Tests + #[test] + fn push_filter_past_proj_basic() { + let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); + + let scan = LogicalScan::new("customer".into()); + let proj = LogicalProjection::new(scan.into_plan_node(), ExprList::new(vec![ColumnRefExpr::new(0).into_expr()])); + + let filter_expr = BinOpExpr::new( + ColumnRefExpr::new(0).into_expr(), + ConstantExpr::int32(5).into_expr(), + BinOpType::Eq, + ) + .into_expr(); + + let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr); + let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); + + assert_eq!(plan.typ, OptRelNodeTyp::Projection); + assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); + } + + #[test] + fn push_filter_past_proj_adv() { + let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); + + let scan = LogicalScan::new("customer".into()); + let proj = LogicalProjection::new( + scan.into_plan_node(), + ExprList::new(vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(5).into_expr(), + ColumnRefExpr::new(7).into_expr(), + ]), + ); + + let filter_expr = LogOpExpr::new( + LogOpType::And, + ExprList::new(vec![ + BinOpExpr::new( + // This one should be pushed to the left child + ColumnRefExpr::new(1).into_expr(), + ConstantExpr::int32(5).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + BinOpExpr::new( + // This one should be pushed to the right child + ColumnRefExpr::new(3).into_expr(), + ConstantExpr::int32(6).into_expr(), + BinOpType::Eq, + ) + .into_expr(), + ]), + ); + + let filter = LogicalFilter::new(proj.into_plan_node(), filter_expr.into_expr()); + + let plan = test_optimizer.optimize(filter.into_rel_node()).unwrap(); + + assert!(matches!(plan.typ, OptRelNodeTyp::Projection)); + let plan_filter = LogicalFilter::from_rel_node(plan.child(0)).unwrap(); + assert!(matches!(plan_filter.0.typ(), OptRelNodeTyp::Filter)); + let plan_filter_expr = + LogOpExpr::from_rel_node(plan_filter.cond().into_rel_node()).unwrap(); + assert!(matches!(plan_filter_expr.op_type(), LogOpType::And)); + let op_0 = BinOpExpr::from_rel_node(plan_filter_expr.children()[0].clone().into_rel_node()) + .unwrap(); + let col_0 = + ColumnRefExpr::from_rel_node(op_0.left_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_0.index(), 4); + let op_1 = BinOpExpr::from_rel_node(plan_filter_expr.children()[1].clone().into_rel_node()) + .unwrap(); + let col_1 = + ColumnRefExpr::from_rel_node(op_1.left_child().clone().into_rel_node()).unwrap(); + assert_eq!(col_1.index(), 7); + } } \ No newline at end of file From 42bb85bc6d5a1d9b5ada59d61622fab1d39cfb35 Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Wed, 17 Apr 2024 16:17:06 -0400 Subject: [PATCH 56/61] fix fmt and clippy --- optd-datafusion-repr/src/lib.rs | 4 +- optd-datafusion-repr/src/plan_nodes.rs | 27 +-- optd-datafusion-repr/src/rules.rs | 14 +- .../src/rules/filter_pushdown.rs | 4 +- .../src/rules/project_transpose.rs | 6 +- .../project_filter_transpose.rs | 132 ++++++------ .../project_join_transpose.rs | 10 +- .../rules/project_transpose/project_merge.rs | 194 ++++++++---------- .../project_transpose_common.rs | 34 +-- .../src/testing/tpch_catalog.rs | 40 ++-- 10 files changed, 219 insertions(+), 246 deletions(-) diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 27cf4787..c6e961a6 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -26,8 +26,8 @@ use rules::{ EliminateJoinRule, EliminateLimitRule, FilterAggTransposeRule, FilterCrossJoinTransposeRule, FilterInnerJoinTransposeRule, FilterMergeRule, FilterProjectTransposeRule, FilterSortTransposeRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, PhysicalConversionRule, - ProjectionPullUpJoin, SimplifyFilterRule, SimplifyJoinCondRule, - ProjectMergeRule, ProjectFilterTransposeRule, + ProjectFilterTransposeRule, ProjectMergeRule, ProjectionPullUpJoin, SimplifyFilterRule, + SimplifyJoinCondRule, }; pub use optd_core::rel_node::Value; diff --git a/optd-datafusion-repr/src/plan_nodes.rs b/optd-datafusion-repr/src/plan_nodes.rs index fea21e2d..0dca376d 100644 --- a/optd-datafusion-repr/src/plan_nodes.rs +++ b/optd-datafusion-repr/src/plan_nodes.rs @@ -336,13 +336,11 @@ impl Expr { ) } - /// Recursively retrieves all column references in the expression + /// Recursively retrieves all column references in the expression /// using a provided function. /// The provided function will, given a ColumnRefExpr's index, /// return a Vec including the expr in col ref. - pub fn get_column_refs( - &self - ) -> Vec { + pub fn get_column_refs(&self) -> Vec { assert!(self.typ().is_expression()); if let OptRelNodeTyp::ColumnRef = self.typ() { let col_ref = Expr::from_rel_node(self.0.clone()).unwrap(); @@ -350,20 +348,17 @@ impl Expr { } let children = self.0.children.clone(); - let children = children - .into_iter() - .map(|child| { - if child.typ == OptRelNodeTyp::List { - // TODO: What should we do with List? - return vec![]; - } - Expr::from_rel_node(child.clone()) - .unwrap() - .get_column_refs() - }); + let children = children.into_iter().map(|child| { + if child.typ == OptRelNodeTyp::List { + // TODO: What should we do with List? + return vec![]; + } + Expr::from_rel_node(child.clone()) + .unwrap() + .get_column_refs() + }); children.collect_vec().concat() } - } impl OptRelNode for Expr { diff --git a/optd-datafusion-repr/src/rules.rs b/optd-datafusion-repr/src/rules.rs index d72463ab..aad6a908 100644 --- a/optd-datafusion-repr/src/rules.rs +++ b/optd-datafusion-repr/src/rules.rs @@ -9,11 +9,6 @@ mod physical; mod project_transpose; // pub use filter_join::FilterJoinPullUpRule; -pub use project_transpose::{ - project_merge::ProjectMergeRule, - project_filter_transpose::{ProjectFilterTransposeRule, FilterProjectTransposeRule}, - project_join_transpose::ProjectionPullUpJoin, -}; pub use eliminate_duplicated_expr::{ EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, }; @@ -23,7 +18,10 @@ pub use filter_pushdown::{ FilterAggTransposeRule, FilterCrossJoinTransposeRule, FilterInnerJoinTransposeRule, FilterMergeRule, FilterSortTransposeRule, }; -pub use joins::{ - EliminateJoinRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, -}; +pub use joins::{EliminateJoinRule, HashJoinRule, JoinAssocRule, JoinCommuteRule}; pub use physical::PhysicalConversionRule; +pub use project_transpose::{ + project_filter_transpose::{FilterProjectTransposeRule, ProjectFilterTransposeRule}, + project_join_transpose::ProjectionPullUpJoin, + project_merge::ProjectMergeRule, +}; diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index ee5ec6ad..6143fc79 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -421,8 +421,8 @@ mod tests { use crate::{ plan_nodes::{ BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, - LogicalAgg, LogicalFilter, LogicalJoin, LogicalScan, LogicalSort, - OptRelNode, OptRelNodeTyp, + LogicalAgg, LogicalFilter, LogicalJoin, LogicalScan, LogicalSort, OptRelNode, + OptRelNodeTyp, }, rules::{ FilterAggTransposeRule, FilterInnerJoinTransposeRule, FilterMergeRule, diff --git a/optd-datafusion-repr/src/rules/project_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose.rs index 5a923743..5c4f45bb 100644 --- a/optd-datafusion-repr/src/rules/project_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose.rs @@ -1,4 +1,4 @@ -pub mod project_transpose_common; -pub mod project_merge; pub mod project_filter_transpose; -pub mod project_join_transpose; \ No newline at end of file +pub mod project_join_transpose; +pub mod project_merge; +pub mod project_transpose_common; diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs index 8d07ee39..004ce0cf 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs @@ -4,11 +4,11 @@ use std::vec; use optd_core::rules::{Rule, RuleMatcher}; use optd_core::{optimizer::Optimizer, rel_node::RelNode}; +use super::project_transpose_common::ProjectionMapping; use crate::plan_nodes::{ - Expr, ExprList, LogicalFilter, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode + Expr, ExprList, LogicalFilter, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode, }; use crate::rules::macros::define_rule; -use super::project_transpose_common::ProjectionMapping; fn merge_exprs(first: ExprList, second: ExprList) -> ExprList { let mut res_vec = first.to_vec(); @@ -19,15 +19,11 @@ fn merge_exprs(first: ExprList, second: ExprList) -> ExprList { define_rule!( ProjectFilterTransposeRule, apply_projection_filter_transpose, - ( - Projection, - (Filter, child, [cond]), - [exprs] - ) + (Projection, (Filter, child, [cond]), [exprs]) ); /// pushes projections through filters -/// adds a projection node after a filter node +/// adds a projection node after a filter node /// only keeping necessary columns (proj node exprs + filter col exprs) fn apply_projection_filter_transpose( _optimizer: &impl Optimizer, @@ -40,11 +36,11 @@ fn apply_projection_filter_transpose( let cond_col_refs = cond_as_expr.get_column_refs(); let mut dedup_cond_col_refs = Vec::new(); - for i in 0..cond_col_refs.len() { - if !exprs_vec.contains(&cond_col_refs[i]) { - dedup_cond_col_refs.push(cond_col_refs[i].clone()); + for col_ref in &cond_col_refs { + if !exprs_vec.contains(col_ref) { + dedup_cond_col_refs.push(col_ref.clone()); }; - }; + } let dedup_cond_col_refs = ExprList::new(dedup_cond_col_refs); @@ -61,8 +57,8 @@ fn apply_projection_filter_transpose( if dedup_cond_col_refs.is_empty() { // can push proj past filter and remove top proj node return vec![new_filter_node.into_rel_node().as_ref().clone()]; - } - + } + // have column ref expressions of cond cols // bottom-most projection will have proj cols + filter cols as a set let Some(top_proj_exprs) = mapping.rewrite_projection(&exprs, false) else { @@ -106,10 +102,10 @@ mod tests { use crate::{ plan_nodes::{ - BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, - LogicalFilter, LogicalProjection, LogicalScan, OptRelNode, OptRelNodeTyp + BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ExprList, LogOpExpr, LogOpType, + LogicalFilter, LogicalProjection, LogicalScan, OptRelNode, OptRelNodeTyp, }, - rules::{ProjectFilterTransposeRule, FilterProjectTransposeRule}, + rules::{FilterProjectTransposeRule, ProjectFilterTransposeRule}, testing::new_test_optimizer, }; @@ -131,12 +127,10 @@ mod tests { let filter = LogicalFilter::new(scan.into_plan_node(), filter_expr); - let proj_exprs = ExprList::new( - vec![ - ColumnRefExpr::new(2).into_expr(), - ColumnRefExpr::new(0).into_expr() - ] - ); + let proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ]); let proj = LogicalProjection::new(filter.into_plan_node(), proj_exprs.clone()); @@ -147,7 +141,8 @@ mod tests { ConstantExpr::int32(5).into_expr(), BinOpType::Eq, ) - .into_expr().into_rel_node(); + .into_expr() + .into_rel_node(); assert_eq!(plan.child(1), res_filter_expr); assert_eq!(plan.typ, OptRelNodeTyp::Filter); @@ -174,31 +169,24 @@ mod tests { let filter = LogicalFilter::new(scan.into_plan_node(), filter_expr); - let proj_exprs = ExprList::new( - vec![ - ColumnRefExpr::new(1).into_expr(), - ] - ); + let proj_exprs = ExprList::new(vec![ColumnRefExpr::new(1).into_expr()]); let res_filter_expr: Arc> = BinOpExpr::new( ColumnRefExpr::new(1).into_expr(), ConstantExpr::int32(5).into_expr(), BinOpType::Eq, ) - .into_expr().into_rel_node(); + .into_expr() + .into_rel_node(); - let res_top_proj_exprs: Arc> = ExprList::new( - vec![ - ColumnRefExpr::new(0).into_expr(), - ] - ).into_rel_node(); + let res_top_proj_exprs: Arc> = + ExprList::new(vec![ColumnRefExpr::new(0).into_expr()]).into_rel_node(); - let res_bot_proj_exprs = ExprList::new( - vec![ - ColumnRefExpr::new(1).into_expr(), - ColumnRefExpr::new(2).into_expr(), - ] - ).into_rel_node(); + let res_bot_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(2).into_expr(), + ]) + .into_rel_node(); let proj = LogicalProjection::new(filter.into_plan_node(), proj_exprs); @@ -210,15 +198,22 @@ mod tests { assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); assert_eq!(plan.child(0).child(1), res_filter_expr); - assert!(matches!(plan.child(0).child(0).typ, OptRelNodeTyp::Projection)); + assert!(matches!( + plan.child(0).child(0).typ, + OptRelNodeTyp::Projection + )); assert_eq!(plan.child(0).child(0).child(1), res_bot_proj_exprs); - assert!(matches!(plan.child(0).child(0).child(0).typ, OptRelNodeTyp::Scan)); + assert!(matches!( + plan.child(0).child(0).child(0).typ, + OptRelNodeTyp::Scan + )); } #[test] fn push_proj_past_filter_adv_1() { - let mut test_optimizer: optd_core::heuristics::HeuristicsOptimizer = new_test_optimizer(Arc::new(ProjectFilterTransposeRule::new())); + let mut test_optimizer: optd_core::heuristics::HeuristicsOptimizer = + new_test_optimizer(Arc::new(ProjectFilterTransposeRule::new())); let scan = LogicalScan::new("customer".into()); @@ -238,7 +233,8 @@ mod tests { ) .into_expr(), ]), - ).into_expr(); + ) + .into_expr(); let filter = LogicalFilter::new(scan.into_plan_node(), filter_expr); let proj_exprs = ExprList::new(vec![ @@ -248,10 +244,8 @@ mod tests { ColumnRefExpr::new(7).into_expr(), ]); - let proj = LogicalProjection::new( - filter.into_plan_node(), - proj_exprs.clone(), - ).into_rel_node(); + let proj = + LogicalProjection::new(filter.into_plan_node(), proj_exprs.clone()).into_rel_node(); let plan = test_optimizer.optimize(proj).unwrap(); @@ -271,7 +265,9 @@ mod tests { ) .into_expr(), ]), - ).into_expr().into_rel_node(); + ) + .into_expr() + .into_rel_node(); assert!(matches!(plan.typ, OptRelNodeTyp::Filter)); assert_eq!(plan.child(1), res_filter_expr); @@ -279,10 +275,11 @@ mod tests { assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Projection)); assert_eq!(plan.child(0).child(1), proj_exprs.into_rel_node()); } - + #[test] fn push_proj_past_filter_adv_2() { - let mut test_optimizer: optd_core::heuristics::HeuristicsOptimizer = new_test_optimizer(Arc::new(ProjectFilterTransposeRule::new())); + let mut test_optimizer: optd_core::heuristics::HeuristicsOptimizer = + new_test_optimizer(Arc::new(ProjectFilterTransposeRule::new())); let scan = LogicalScan::new("customer".into()); @@ -302,7 +299,8 @@ mod tests { ) .into_expr(), ]), - ).into_expr(); + ) + .into_expr(); let filter = LogicalFilter::new(scan.into_plan_node(), filter_expr); let proj_exprs = ExprList::new(vec![ @@ -312,10 +310,8 @@ mod tests { ColumnRefExpr::new(7).into_expr(), ]); - let proj = LogicalProjection::new( - filter.into_plan_node(), - proj_exprs.clone(), - ).into_rel_node(); + let proj = + LogicalProjection::new(filter.into_plan_node(), proj_exprs.clone()).into_rel_node(); let plan = test_optimizer.optimize(proj).unwrap(); @@ -335,14 +331,17 @@ mod tests { ) .into_expr(), ]), - ).into_expr().into_rel_node(); + ) + .into_expr() + .into_rel_node(); let top_proj_exprs = ExprList::new(vec![ ColumnRefExpr::new(0).into_expr(), ColumnRefExpr::new(1).into_expr(), ColumnRefExpr::new(2).into_expr(), ColumnRefExpr::new(3).into_expr(), - ]).into_rel_node(); + ]) + .into_rel_node(); let bot_proj_exprs = ExprList::new(vec![ ColumnRefExpr::new(0).into_expr(), @@ -350,7 +349,8 @@ mod tests { ColumnRefExpr::new(5).into_expr(), ColumnRefExpr::new(7).into_expr(), ColumnRefExpr::new(2).into_expr(), - ]).into_rel_node(); + ]) + .into_rel_node(); assert!(matches!(plan.typ, OptRelNodeTyp::Projection)); assert_eq!(plan.child(1), top_proj_exprs); @@ -358,7 +358,10 @@ mod tests { assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Filter)); assert_eq!(plan.child(0).child(1), res_filter_expr); - assert!(matches!(plan.child(0).child(0).typ, OptRelNodeTyp::Projection)); + assert!(matches!( + plan.child(0).child(0).typ, + OptRelNodeTyp::Projection + )); assert_eq!(plan.child(0).child(0).child(1), bot_proj_exprs); } @@ -368,7 +371,10 @@ mod tests { let mut test_optimizer = new_test_optimizer(Arc::new(FilterProjectTransposeRule::new())); let scan = LogicalScan::new("customer".into()); - let proj = LogicalProjection::new(scan.into_plan_node(), ExprList::new(vec![ColumnRefExpr::new(0).into_expr()])); + let proj = LogicalProjection::new( + scan.into_plan_node(), + ExprList::new(vec![ColumnRefExpr::new(0).into_expr()]), + ); let filter_expr = BinOpExpr::new( ColumnRefExpr::new(0).into_expr(), @@ -440,4 +446,4 @@ mod tests { ColumnRefExpr::from_rel_node(op_1.left_child().clone().into_rel_node()).unwrap(); assert_eq!(col_1.index(), 7); } -} \ No newline at end of file +} diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs index 19aedd4e..52465292 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs @@ -1,20 +1,20 @@ -use crate::Rule; use crate::HashMap; +use crate::Rule; use optd_core::rules::RuleMatcher; use std::sync::Arc; use std::vec; +use crate::rules::macros::define_rule; use optd_core::optimizer::Optimizer; use optd_core::rel_node::RelNode; -use crate::rules::macros::define_rule; +use super::project_transpose_common::ProjectionMapping; use crate::plan_nodes::{ - ColumnRefExpr, Expr, ExprList, JoinType, LogicalJoin, LogicalProjection, - OptRelNode, OptRelNodeTyp, PlanNode, + ColumnRefExpr, Expr, ExprList, JoinType, LogicalJoin, LogicalProjection, OptRelNode, + OptRelNodeTyp, PlanNode, }; use crate::properties::schema::SchemaPropertyBuilder; -use super::project_transpose_common::ProjectionMapping; // (Proj A) join B -> (Proj (A join B)) define_rule!( diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs b/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs index 4e01e654..84983d53 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_merge.rs @@ -3,9 +3,7 @@ use std::collections::HashMap; use optd_core::rules::{Rule, RuleMatcher}; use optd_core::{optimizer::Optimizer, rel_node::RelNode}; -use crate::plan_nodes::{ - ExprList, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode -}; +use crate::plan_nodes::{ExprList, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode}; use crate::rules::macros::define_rule; use super::project_transpose_common::ProjectionMapping; @@ -15,16 +13,16 @@ use super::project_transpose_common::ProjectionMapping; define_rule!( ProjectMergeRule, apply_projection_merge, - ( - Projection, - (Projection, child, [exprs2]), - [exprs1] - ) + (Projection, (Projection, child, [exprs2]), [exprs1]) ); fn apply_projection_merge( _optimizer: &impl Optimizer, - ProjectMergeRulePicks { child, exprs1, exprs2 }: ProjectMergeRulePicks, + ProjectMergeRulePicks { + child, + exprs1, + exprs2, + }: ProjectMergeRulePicks, ) -> Vec> { let child = PlanNode::from_group(child.into()); let exprs1 = ExprList::from_rel_node(exprs1.into()).unwrap(); @@ -38,10 +36,7 @@ fn apply_projection_merge( return vec![]; }; - let node: LogicalProjection = LogicalProjection::new( - child, - res_exprs, - ); + let node: LogicalProjection = LogicalProjection::new(child, res_exprs); vec![node.into_rel_node().as_ref().clone()] } @@ -54,7 +49,7 @@ mod tests { use crate::{ plan_nodes::{ - ColumnRefExpr, ExprList, LogicalProjection, LogicalScan, OptRelNode, OptRelNodeTyp + ColumnRefExpr, ExprList, LogicalProjection, LogicalScan, OptRelNode, OptRelNodeTyp, }, rules::ProjectMergeRule, testing::new_test_optimizer, @@ -67,32 +62,27 @@ mod tests { let scan = LogicalScan::new("customer".into()); - let top_proj_exprs = ExprList::new( - vec![ - ColumnRefExpr::new(2).into_expr(), - ColumnRefExpr::new(0).into_expr() - ] - ); - - let bot_proj_exprs = ExprList::new( - vec![ - ColumnRefExpr::new(2).into_expr(), - ColumnRefExpr::new(0).into_expr(), - ColumnRefExpr::new(4).into_expr() - ] - ); + let top_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ]); + + let bot_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ]); let bot_proj = LogicalProjection::new(scan.into_plan_node(), bot_proj_exprs); let top_proj = LogicalProjection::new(bot_proj.into_plan_node(), top_proj_exprs); let plan = test_optimizer.optimize(top_proj.into_rel_node()).unwrap(); - - let res_proj_exprs = ExprList::new( - vec![ - ColumnRefExpr::new(4).into_expr(), - ColumnRefExpr::new(2).into_expr(), - ] - ).into_rel_node(); + + let res_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(2).into_expr(), + ]) + .into_rel_node(); assert_eq!(plan.typ, OptRelNodeTyp::Projection); assert_eq!(plan.child(1), res_proj_exprs); @@ -106,30 +96,24 @@ mod tests { let scan = LogicalScan::new("customer".into()); - let proj_exprs_1 = ExprList::new( - vec![ - ColumnRefExpr::new(2).into_expr(), - ColumnRefExpr::new(0).into_expr(), - ColumnRefExpr::new(4).into_expr(), - ColumnRefExpr::new(3).into_expr() - ] - ); - - let proj_exprs_2 = ExprList::new( - vec![ - ColumnRefExpr::new(1).into_expr(), - ColumnRefExpr::new(0).into_expr(), - ColumnRefExpr::new(3).into_expr() - ] - ); - - let proj_exprs_3 = ExprList::new( - vec![ - ColumnRefExpr::new(1).into_expr(), - ColumnRefExpr::new(0).into_expr(), - ColumnRefExpr::new(2).into_expr() - ] - ); + let proj_exprs_1 = ExprList::new(vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ]); + + let proj_exprs_2 = ExprList::new(vec![ + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ]); + + let proj_exprs_3 = ExprList::new(vec![ + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(2).into_expr(), + ]); let proj_1 = LogicalProjection::new(scan.into_plan_node(), proj_exprs_1); let proj_2 = LogicalProjection::new(proj_1.into_plan_node(), proj_exprs_2); @@ -138,20 +122,19 @@ mod tests { // needs to be called twice let plan = test_optimizer.optimize(proj_3.into_rel_node()).unwrap(); let plan = test_optimizer.optimize(plan).unwrap(); - - let res_proj_exprs = ExprList::new( - vec![ - ColumnRefExpr::new(2).into_expr(), - ColumnRefExpr::new(0).into_expr(), - ColumnRefExpr::new(3).into_expr(), - ] - ).into_rel_node(); + + let res_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ]) + .into_rel_node(); assert_eq!(plan.typ, OptRelNodeTyp::Projection); assert_eq!(plan.child(1), res_proj_exprs); assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Scan)); } - + #[test] fn proj_merge_adv_2() { // convert proj -> proj -> proj -> proj -> scan to proj -> scan @@ -159,38 +142,30 @@ mod tests { let scan = LogicalScan::new("customer".into()); - let proj_exprs_1 = ExprList::new( - vec![ - ColumnRefExpr::new(2).into_expr(), - ColumnRefExpr::new(0).into_expr(), - ColumnRefExpr::new(4).into_expr(), - ColumnRefExpr::new(3).into_expr() - ] - ); - - let proj_exprs_2 = ExprList::new( - vec![ - ColumnRefExpr::new(1).into_expr(), - ColumnRefExpr::new(0).into_expr(), - ColumnRefExpr::new(3).into_expr() - ] - ); - - let proj_exprs_3 = ExprList::new( - vec![ - ColumnRefExpr::new(1).into_expr(), - ColumnRefExpr::new(0).into_expr(), - ColumnRefExpr::new(2).into_expr() - ] - ); - - let proj_exprs_4 = ExprList::new( - vec![ - ColumnRefExpr::new(0).into_expr(), - ColumnRefExpr::new(1).into_expr(), - ColumnRefExpr::new(2).into_expr() - ] - ); + let proj_exprs_1 = ExprList::new(vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ]); + + let proj_exprs_2 = ExprList::new(vec![ + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ]); + + let proj_exprs_3 = ExprList::new(vec![ + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(2).into_expr(), + ]); + + let proj_exprs_4 = ExprList::new(vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(2).into_expr(), + ]); let proj_1 = LogicalProjection::new(scan.into_plan_node(), proj_exprs_1); let proj_2 = LogicalProjection::new(proj_1.into_plan_node(), proj_exprs_2); @@ -201,17 +176,16 @@ mod tests { let plan = test_optimizer.optimize(proj_4.into_rel_node()).unwrap(); let plan = test_optimizer.optimize(plan).unwrap(); let plan = test_optimizer.optimize(plan).unwrap(); - - let res_proj_exprs = ExprList::new( - vec![ - ColumnRefExpr::new(2).into_expr(), - ColumnRefExpr::new(0).into_expr(), - ColumnRefExpr::new(3).into_expr(), - ] - ).into_rel_node(); + + let res_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ]) + .into_rel_node(); assert_eq!(plan.typ, OptRelNodeTyp::Projection); assert_eq!(plan.child(1), res_proj_exprs); assert!(matches!(plan.child(0).typ, OptRelNodeTyp::Scan)); - } -} \ No newline at end of file + } +} diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs b/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs index affdbc27..8baca0f0 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs @@ -29,7 +29,7 @@ impl ProjectionMapping { let col_idx = col_expr.index(); forward.push(col_idx); if col_idx >= backward.len() { - backward.resize(col_idx+1, None); + backward.resize(col_idx + 1, None); } backward[col_idx] = Some(i); } @@ -44,9 +44,9 @@ impl ProjectionMapping { self.backward.get(col_idx).copied().flatten() } - /// Remaps all column refs in the join condition based on a + /// Remaps all column refs in the join condition based on a /// removed bottom projection node - /// + /// /// removed node: /// Join { cond: #0=#5 } /// Projection { exprs: [#1, #0, #3, #5, #4] } --> has mapping @@ -60,18 +60,19 @@ impl ProjectionMapping { } else { Some(col_idx - schema_size + child_schema_len) } - }).unwrap() - } + }) + .unwrap() + } - /// Remaps all column refs in the filter condition based on an added or + /// Remaps all column refs in the filter condition based on an added or /// removed bottom projection node - /// + /// /// added node: /// Filter { cond: #1=0 and #4=1 } /// ----> /// Filter { cond: #0=0 and #5=1 } /// Projection { exprs: [#1, #0, #3, #5, #4] } --> has mapping - /// + /// /// removed node: /// Filter { cond: #0=0 and #5=1 } /// Projection { exprs: [#1, #0, #3, #5, #4] } --> has mapping @@ -84,21 +85,22 @@ impl ProjectionMapping { } else { self.projection_col_maps_to(col_idx) } - }).unwrap() + }) + .unwrap() } - /// If the top projection node is mapped, rewrites the bottom projection's - /// exprs based on the top projection's mapped col refs. - /// + /// If the top projection node is mapped, rewrites the bottom projection's + /// exprs based on the top projection's mapped col refs. + /// /// If the bottom projection node is mapped, rewrites the top projection's /// exprs based on the bottom projection's mapped col refs. - /// + /// /// Projection { exprs: [#1, #0] } /// Projection { exprs: [#0, #2] } /// ----> /// Projection { exprs: [#2, #0] } pub fn rewrite_projection(&self, exprs: &ExprList, is_top_mapped: bool) -> Option { - if exprs.len() == 0 { + if exprs.is_empty() { return None; } let mut new_projection_exprs = Vec::new(); @@ -107,14 +109,14 @@ impl ProjectionMapping { for i in 0..self.forward.len() { let col_idx = self.projection_col_maps_to(i).unwrap(); new_projection_exprs.push(exprs[col_idx].clone()); - }; + } } else { for i in exprs.to_vec() { let col_ref = ColumnRefExpr::from_rel_node(i.into_rel_node()).unwrap(); let col_idx = self.original_col_maps_to(col_ref.index()).unwrap(); let col: Expr = ColumnRefExpr::new(col_idx).into_expr(); new_projection_exprs.push(col); - }; + } } Some(ExprList::new(new_projection_exprs)) } diff --git a/optd-datafusion-repr/src/testing/tpch_catalog.rs b/optd-datafusion-repr/src/testing/tpch_catalog.rs index be99c394..873a23c2 100644 --- a/optd-datafusion-repr/src/testing/tpch_catalog.rs +++ b/optd-datafusion-repr/src/testing/tpch_catalog.rs @@ -10,27 +10,25 @@ pub struct TpchCatalog; impl Catalog for TpchCatalog { fn get(&self, name: &str) -> Schema { match name { - "region" => { - Schema { - fields: vec![ - Field { - name: "regionkey".to_string(), - typ: ConstantType::Int32, - nullable: false, - }, - Field { - name: "name".to_string(), - typ: ConstantType::Utf8String, - nullable: false, - }, - Field { - name: "comment".to_string(), - typ: ConstantType::Utf8String, - nullable: false, - }, - ], - } - } + "region" => Schema { + fields: vec![ + Field { + name: "regionkey".to_string(), + typ: ConstantType::Int32, + nullable: false, + }, + Field { + name: "name".to_string(), + typ: ConstantType::Utf8String, + nullable: false, + }, + Field { + name: "comment".to_string(), + typ: ConstantType::Utf8String, + nullable: false, + }, + ], + }, "customer" => { // Define the schema for the "customer" table From bdc9b877596b27eac2d5ce2b75ce2f128fc9018d Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Wed, 17 Apr 2024 18:25:05 -0400 Subject: [PATCH 57/61] base impl for project push down join --- optd-datafusion-repr/src/lib.rs | 10 +- optd-datafusion-repr/src/rules.rs | 2 +- .../project_filter_transpose.rs | 8 +- .../project_join_transpose.rs | 115 +++++++++++++++++- .../project_transpose_common.rs | 61 ++++++++-- 5 files changed, 167 insertions(+), 29 deletions(-) diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index d2869af5..50101c5a 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -22,12 +22,7 @@ use properties::{ schema::{Catalog, SchemaPropertyBuilder}, }; use rules::{ - EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, EliminateFilterRule, - EliminateJoinRule, EliminateLimitRule, FilterAggTransposeRule, FilterCrossJoinTransposeRule, - FilterInnerJoinTransposeRule, FilterMergeRule, FilterProjectTransposeRule, - FilterSortTransposeRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, PhysicalConversionRule, - ProjectFilterTransposeRule, ProjectMergeRule, ProjectionPullUpJoin, SimplifyFilterRule, - SimplifyJoinCondRule, + EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, EliminateFilterRule, EliminateJoinRule, EliminateLimitRule, FilterAggTransposeRule, FilterCrossJoinTransposeRule, FilterInnerJoinTransposeRule, FilterMergeRule, FilterProjectTransposeRule, FilterSortTransposeRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, PhysicalConversionRule, ProjectFilterTransposeRule, ProjectMergeRule, ProjectionPullUpJoin, ProjectionPushDownJoin, SimplifyFilterRule, SimplifyJoinCondRule }; pub use optd_core::rel_node::Value; @@ -104,6 +99,9 @@ impl DatafusionOptimizer { rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( ProjectFilterTransposeRule::new(), ))); + rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( + ProjectionPushDownJoin::new(), + ))); // add all filter pushdown rules as heuristic rules rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( FilterProjectTransposeRule::new(), diff --git a/optd-datafusion-repr/src/rules.rs b/optd-datafusion-repr/src/rules.rs index aad6a908..9e5c0f09 100644 --- a/optd-datafusion-repr/src/rules.rs +++ b/optd-datafusion-repr/src/rules.rs @@ -22,6 +22,6 @@ pub use joins::{EliminateJoinRule, HashJoinRule, JoinAssocRule, JoinCommuteRule} pub use physical::PhysicalConversionRule; pub use project_transpose::{ project_filter_transpose::{FilterProjectTransposeRule, ProjectFilterTransposeRule}, - project_join_transpose::ProjectionPullUpJoin, + project_join_transpose::{ProjectionPullUpJoin, ProjectionPushDownJoin}, project_merge::ProjectMergeRule, }; diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs index 004ce0cf..58851ffa 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs @@ -4,18 +4,12 @@ use std::vec; use optd_core::rules::{Rule, RuleMatcher}; use optd_core::{optimizer::Optimizer, rel_node::RelNode}; -use super::project_transpose_common::ProjectionMapping; +use super::project_transpose_common::{ProjectionMapping, merge_exprs}; use crate::plan_nodes::{ Expr, ExprList, LogicalFilter, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode, }; use crate::rules::macros::define_rule; -fn merge_exprs(first: ExprList, second: ExprList) -> ExprList { - let mut res_vec = first.to_vec(); - res_vec.extend(second.to_vec()); - ExprList::new(res_vec) -} - define_rule!( ProjectFilterTransposeRule, apply_projection_filter_transpose, diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs index 52465292..02cc0395 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs @@ -9,10 +9,10 @@ use crate::rules::macros::define_rule; use optd_core::optimizer::Optimizer; use optd_core::rel_node::RelNode; -use super::project_transpose_common::ProjectionMapping; +use super::project_transpose_common::{ProjectionMapping, merge_exprs, split_exprs}; use crate::plan_nodes::{ - ColumnRefExpr, Expr, ExprList, JoinType, LogicalJoin, LogicalProjection, OptRelNode, - OptRelNodeTyp, PlanNode, + ColumnRefExpr, Expr, ExprList, JoinType, LogicalJoin, LogicalProjection, + OptRelNode, OptRelNodeTyp, PlanNode, }; use crate::properties::schema::SchemaPropertyBuilder; @@ -63,11 +63,118 @@ fn apply_projection_pull_up_join( mapping.rewrite_join_cond( Expr::from_rel_node(Arc::new(cond)).unwrap(), left_schema.len(), + false, + true, ), JoinType::Inner, ) .into_plan_node(), ExprList::new(new_projection_exprs), ); - vec![node.into_rel_node().as_ref().clone()] + vec![node.into_rel_node().as_ref().clone()] } + +// most general: (Proj (A join B) -> Proj ((Proj A) join (Proj B)) +// ideal: (Proj (A join B) -> (Proj A) join (Proj B) +define_rule!( + ProjectionPushDownJoin, + apply_projection_push_down_join, + ( + Projection, + (Join(JoinType::Inner), left, right, [cond]), + [exprs] + ) +); + +fn apply_projection_push_down_join( + optimizer: &impl Optimizer, + ProjectionPushDownJoinPicks { + left, + right, + cond, + exprs, + }: ProjectionPushDownJoinPicks, +) -> Vec> { + let left = Arc::new(left.clone()); + let right = Arc::new(right.clone()); + + let exprs = ExprList::from_rel_node(Arc::new(exprs)).unwrap(); + let exprs_vec = exprs.clone().to_vec(); + let cond_as_expr = Expr::from_rel_node(cond.into()).unwrap(); + let cond_col_refs = cond_as_expr.get_column_refs(); + let mut dedup_cond_col_refs = Vec::new(); + + for col_ref in &cond_col_refs { + if !exprs_vec.contains(col_ref) { + dedup_cond_col_refs.push(col_ref.clone()); + }; + } + + let dedup_cond_col_refs = ExprList::new(dedup_cond_col_refs); + let tot_exprs = merge_exprs(exprs.clone(), dedup_cond_col_refs.clone()); + + // split exprs into exprs based on left + right children + let left_schema = optimizer.get_property::(left.clone(), 0); + let left_schema_len = left_schema.len(); + + let (left_exprs, right_exprs) = split_exprs(tot_exprs, left_schema_len); + + let Some(left_exprs_mapping) = ProjectionMapping::build(&left_exprs) else { + return vec![]; + }; + + let Some(right_exprs_mapping) = ProjectionMapping::build(&right_exprs) else { + return vec![]; + }; + + // update join cond based on new left + right child projection nodes + let new_join_cond: Expr = left_exprs_mapping.rewrite_join_cond(cond_as_expr.clone(), left_schema_len, true, true); + let new_join_cond: Expr = right_exprs_mapping.rewrite_join_cond(new_join_cond.clone(), left_schema_len, true, false); + + let new_left_child = LogicalProjection::new( + PlanNode::from_group(left), + left_exprs + ) + .into_plan_node(); + + let new_right_child = LogicalProjection::new( + PlanNode::from_group(right), + right_exprs + ) + .into_plan_node(); + + let new_join_node = LogicalJoin::new( + new_left_child, + new_right_child, + new_join_cond, + JoinType::Inner, + ) + .into_plan_node(); + + if dedup_cond_col_refs.is_empty() { + // don't need top projection node + return vec![new_join_node.into_rel_node().as_ref().clone()]; + } + + // update top projection node based on new left + right child projection nodes + let mut top_proj_exprs = vec![]; + let mut left_col_idx = 0; + let mut right_col_idx = left_schema_len; + for i in 0..exprs.len() { + let old_col_ref = ColumnRefExpr::from_rel_node(exprs_vec[i].clone().into_rel_node()).unwrap(); + if old_col_ref.index() < left_schema_len { + top_proj_exprs.push(ColumnRefExpr::new(left_col_idx).into_expr()); + left_col_idx += 1; + } else { + top_proj_exprs.push(ColumnRefExpr::new(right_col_idx).into_expr()); + right_col_idx += 1; + } + } + let top_proj_exprs = ExprList::new(top_proj_exprs); + + let new_top_node = LogicalProjection::new( + new_join_node, + top_proj_exprs, + ); + vec![new_top_node.into_rel_node().as_ref().clone()] +} \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs b/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs index 8baca0f0..a83f0d6f 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs @@ -1,5 +1,27 @@ use crate::plan_nodes::{ColumnRefExpr, Expr, ExprList, OptRelNode}; +pub fn merge_exprs(first: ExprList, second: ExprList) -> ExprList { + let mut res_vec = first.to_vec(); + res_vec.extend(second.to_vec()); + ExprList::new(res_vec) +} + +pub fn split_exprs(exprs: ExprList, left_schema_len: usize) -> (ExprList, ExprList) { + let mut left_vec = vec![]; + let mut right_vec = vec![]; + for expr in exprs.to_vec() { + let col_ref = ColumnRefExpr::from_rel_node(expr.into_rel_node()).unwrap(); + if col_ref.index() < left_schema_len { + // left expr + left_vec.push(col_ref.into_expr()); + } else { + // right expr + right_vec.push(col_ref.into_expr()); + } + } + (ExprList::new(left_vec), ExprList::new(right_vec)) +} + /// This struct holds the mapping from original columns to projected columns. /// /// # Example @@ -45,23 +67,40 @@ impl ProjectionMapping { } /// Remaps all column refs in the join condition based on a - /// removed bottom projection node + /// removed bottom projection node on the left child /// /// removed node: /// Join { cond: #0=#5 } /// Projection { exprs: [#1, #0, #3, #5, #4] } --> has mapping + /// Scan + /// Scan /// ----> /// Join { cond: #1=#4 } - pub fn rewrite_join_cond(&self, cond: Expr, child_schema_len: usize) -> Expr { - let schema_size = self.forward.len(); - cond.rewrite_column_refs(&|col_idx| { - if col_idx < schema_size { - self.projection_col_maps_to(col_idx) - } else { - Some(col_idx - schema_size + child_schema_len) - } - }) - .unwrap() + /// Scan + /// Scan + pub fn rewrite_join_cond(&self, cond: Expr, left_child_schema_len: usize, is_added: bool, is_left_child: bool) -> Expr { + if is_added { + cond.rewrite_column_refs(&|col_idx| { + if is_left_child && col_idx < left_child_schema_len { + self.original_col_maps_to(col_idx) + } else if !is_left_child && col_idx >= left_child_schema_len { + self.original_col_maps_to(col_idx - left_child_schema_len) + } else { + Some(col_idx) + } + }) + .unwrap() + } else { + let schema_size = self.forward.len(); + cond.rewrite_column_refs(&|col_idx| { + if col_idx < schema_size { + self.projection_col_maps_to(col_idx) + } else { + Some(col_idx - schema_size + left_child_schema_len) + } + }) + .unwrap() + } } /// Remaps all column refs in the filter condition based on an added or From 68ca68306fb8d23e36b0f55ee6fff2fc9e33bdf9 Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Thu, 18 Apr 2024 08:57:45 -0400 Subject: [PATCH 58/61] removed unecessecary file --- .../tests/old_tpch.planner.sql | 2106 ----------------- 1 file changed, 2106 deletions(-) delete mode 100644 optd-sqlplannertest/tests/old_tpch.planner.sql diff --git a/optd-sqlplannertest/tests/old_tpch.planner.sql b/optd-sqlplannertest/tests/old_tpch.planner.sql deleted file mode 100644 index 4f3c88ae..00000000 --- a/optd-sqlplannertest/tests/old_tpch.planner.sql +++ /dev/null @@ -1,2106 +0,0 @@ --- TPC-H schema -CREATE TABLE NATION ( - N_NATIONKEY INT NOT NULL, - N_NAME CHAR(25) NOT NULL, - N_REGIONKEY INT NOT NULL, - N_COMMENT VARCHAR(152) -); - -CREATE TABLE REGION ( - R_REGIONKEY INT NOT NULL, - R_NAME CHAR(25) NOT NULL, - R_COMMENT VARCHAR(152) -); - -CREATE TABLE PART ( - P_PARTKEY INT NOT NULL, - P_NAME VARCHAR(55) NOT NULL, - P_MFGR CHAR(25) NOT NULL, - P_BRAND CHAR(10) NOT NULL, - P_TYPE VARCHAR(25) NOT NULL, - P_SIZE INT NOT NULL, - P_CONTAINER CHAR(10) NOT NULL, - P_RETAILPRICE DECIMAL(15,2) NOT NULL, - P_COMMENT VARCHAR(23) NOT NULL -); - -CREATE TABLE SUPPLIER ( - S_SUPPKEY INT NOT NULL, - S_NAME CHAR(25) NOT NULL, - S_ADDRESS VARCHAR(40) NOT NULL, - S_NATIONKEY INT NOT NULL, - S_PHONE CHAR(15) NOT NULL, - S_ACCTBAL DECIMAL(15,2) NOT NULL, - S_COMMENT VARCHAR(101) NOT NULL -); - -CREATE TABLE PARTSUPP ( - PS_PARTKEY INT NOT NULL, - PS_SUPPKEY INT NOT NULL, - PS_AVAILQTY INT NOT NULL, - PS_SUPPLYCOST DECIMAL(15,2) NOT NULL, - PS_COMMENT VARCHAR(199) NOT NULL -); - -CREATE TABLE CUSTOMER ( - C_CUSTKEY INT NOT NULL, - C_NAME VARCHAR(25) NOT NULL, - C_ADDRESS VARCHAR(40) NOT NULL, - C_NATIONKEY INT NOT NULL, - C_PHONE CHAR(15) NOT NULL, - C_ACCTBAL DECIMAL(15,2) NOT NULL, - C_MKTSEGMENT CHAR(10) NOT NULL, - C_COMMENT VARCHAR(117) NOT NULL -); - -CREATE TABLE ORDERS ( - O_ORDERKEY INT NOT NULL, - O_CUSTKEY INT NOT NULL, - O_ORDERSTATUS CHAR(1) NOT NULL, - O_TOTALPRICE DECIMAL(15,2) NOT NULL, - O_ORDERDATE DATE NOT NULL, - O_ORDERPRIORITY CHAR(15) NOT NULL, - O_CLERK CHAR(15) NOT NULL, - O_SHIPPRIORITY INT NOT NULL, - O_COMMENT VARCHAR(79) NOT NULL -); - -CREATE TABLE LINEITEM ( - L_ORDERKEY INT NOT NULL, - L_PARTKEY INT NOT NULL, - L_SUPPKEY INT NOT NULL, - L_LINENUMBER INT NOT NULL, - L_QUANTITY DECIMAL(15,2) NOT NULL, - L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, - L_DISCOUNT DECIMAL(15,2) NOT NULL, - L_TAX DECIMAL(15,2) NOT NULL, - L_RETURNFLAG CHAR(1) NOT NULL, - L_LINESTATUS CHAR(1) NOT NULL, - L_SHIPDATE DATE NOT NULL, - L_COMMITDATE DATE NOT NULL, - L_RECEIPTDATE DATE NOT NULL, - L_SHIPINSTRUCT CHAR(25) NOT NULL, - L_SHIPMODE CHAR(10) NOT NULL, - L_COMMENT VARCHAR(44) NOT NULL -); - -/* - -*/ - --- TPC-H Q1 -SELECT - l_returnflag, - l_linestatus, - sum(l_quantity) as sum_qty, - sum(l_extendedprice) as sum_base_price, - sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, - sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, - avg(l_quantity) as avg_qty, - avg(l_extendedprice) as avg_price, - avg(l_discount) as avg_disc, - count(*) as count_order -FROM - lineitem -WHERE - l_shipdate <= date '1998-12-01' - interval '90' day -GROUP BY - l_returnflag, l_linestatus -ORDER BY - l_returnflag, l_linestatus; - -/* -LogicalSort -├── exprs: -│ ┌── SortOrder { order: Asc } -│ │ └── #0 -│ └── SortOrder { order: Asc } -│ └── #1 -└── LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8, #9 ] } - └── LogicalAgg - ├── exprs: - │ ┌── Agg(Sum) - │ │ └── [ #4 ] - │ ├── Agg(Sum) - │ │ └── [ #5 ] - │ ├── Agg(Sum) - │ │ └── Mul - │ │ ├── #5 - │ │ └── Sub - │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ │ └── #6 - │ ├── Agg(Sum) - │ │ └── Mul - │ │ ├── Mul - │ │ │ ├── #5 - │ │ │ └── Sub - │ │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ │ │ └── #6 - │ │ └── Add - │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ │ └── #7 - │ ├── Agg(Avg) - │ │ └── [ #4 ] - │ ├── Agg(Avg) - │ │ └── [ #5 ] - │ ├── Agg(Avg) - │ │ └── [ #6 ] - │ └── Agg(Count) - │ └── [ 1 ] - ├── groups: [ #8, #9 ] - └── LogicalFilter - ├── cond:Leq - │ ├── #10 - │ └── Sub - │ ├── Cast { cast_to: Date32, expr: "1998-12-01" } - │ └── INTERVAL_MONTH_DAY_NANO (0, 90, 0) - └── LogicalScan { table: lineitem } -PhysicalSort -├── exprs: -│ ┌── SortOrder { order: Asc } -│ │ └── #0 -│ └── SortOrder { order: Asc } -│ └── #1 -└── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8, #9 ] } - └── PhysicalAgg - ├── aggrs: - │ ┌── Agg(Sum) - │ │ └── [ #4 ] - │ ├── Agg(Sum) - │ │ └── [ #5 ] - │ ├── Agg(Sum) - │ │ └── Mul - │ │ ├── #5 - │ │ └── Sub - │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ │ └── #6 - │ ├── Agg(Sum) - │ │ └── Mul - │ │ ├── Mul - │ │ │ ├── #5 - │ │ │ └── Sub - │ │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ │ │ └── #6 - │ │ └── Add - │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ │ └── #7 - │ ├── Agg(Avg) - │ │ └── [ #4 ] - │ ├── Agg(Avg) - │ │ └── [ #5 ] - │ ├── Agg(Avg) - │ │ └── [ #6 ] - │ └── Agg(Count) - │ └── [ 1 ] - ├── groups: [ #8, #9 ] - └── PhysicalFilter - ├── cond:Leq - │ ├── #10 - │ └── Sub - │ ├── Cast { cast_to: Date32, expr: "1998-12-01" } - │ └── INTERVAL_MONTH_DAY_NANO (0, 90, 0) - └── PhysicalScan { table: lineitem } -*/ - --- TPC-H Q2 -select - s_acctbal, - s_name, - n_name, - p_partkey, - p_mfgr, - s_address, - s_phone, - s_comment -from - part, - supplier, - partsupp, - nation, - region -where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey -and p_size = 4 -and p_type like '%TIN' - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'AFRICA' - and ps_supplycost = ( - select - min(ps_supplycost) - from - partsupp, - supplier, - nation, - region - where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'AFRICA' - ) -order by - s_acctbal desc, - n_name, - s_name, - p_partkey -limit 100; - -/* -LogicalLimit { skip: 0, fetch: 100 } -└── LogicalSort - ├── exprs: - │ ┌── SortOrder { order: Desc } - │ │ └── #0 - │ ├── SortOrder { order: Asc } - │ │ └── #2 - │ ├── SortOrder { order: Asc } - │ │ └── #1 - │ └── SortOrder { order: Asc } - │ └── #3 - └── LogicalProjection { exprs: [ #5, #2, #8, #0, #1, #3, #4, #6 ] } - └── LogicalJoin - ├── join_type: Inner - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #10 - │ └── Eq - │ ├── #7 - │ └── #9 - ├── LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8 ] } - │ └── LogicalJoin - │ ├── join_type: Inner - │ ├── cond:Eq - │ │ ├── #9 - │ │ └── #10 - │ ├── LogicalProjection { exprs: [ #0, #1, #2, #3, #5, #6, #7, #8, #10, #11 ] } - │ │ └── LogicalJoin - │ │ ├── join_type: Inner - │ │ ├── cond:Eq - │ │ │ ├── #4 - │ │ │ └── #9 - │ │ ├── LogicalProjection { exprs: [ #0, #1, #5, #6, #7, #8, #9, #10, #3 ] } - │ │ │ └── LogicalJoin - │ │ │ ├── join_type: Inner - │ │ │ ├── cond:Eq - │ │ │ │ ├── #2 - │ │ │ │ └── #4 - │ │ │ ├── LogicalProjection { exprs: [ #0, #1, #3, #4 ] } - │ │ │ │ └── LogicalJoin - │ │ │ │ ├── join_type: Inner - │ │ │ │ ├── cond:Eq - │ │ │ │ │ ├── #0 - │ │ │ │ │ └── #2 - │ │ │ │ ├── LogicalProjection { exprs: [ #0, #1 ] } - │ │ │ │ │ └── LogicalFilter - │ │ │ │ │ ├── cond:And - │ │ │ │ │ │ ├── Eq - │ │ │ │ │ │ │ ├── #3 - │ │ │ │ │ │ │ └── 4 - │ │ │ │ │ │ └── Like { expr: #2, pattern: "%TIN", negated: false, case_insensitive: false } - │ │ │ │ │ └── LogicalProjection { exprs: [ #0, #2, #4, #5 ] } - │ │ │ │ │ └── LogicalScan { table: part } - │ │ │ │ └── LogicalProjection { exprs: [ #0, #1, #3 ] } - │ │ │ │ └── LogicalScan { table: partsupp } - │ │ │ └── LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6 ] } - │ │ │ └── LogicalScan { table: supplier } - │ │ └── LogicalProjection { exprs: [ #0, #1, #2 ] } - │ │ └── LogicalScan { table: nation } - │ └── LogicalProjection { exprs: [ #0 ] } - │ └── LogicalFilter - │ ├── cond:Eq - │ │ ├── #1 - │ │ └── "AFRICA" - │ └── LogicalProjection { exprs: [ #0, #1 ] } - │ └── LogicalScan { table: region } - └── LogicalProjection { exprs: [ #1, #0 ] } - └── LogicalAgg - ├── exprs:Agg(Min) - │ └── [ #1 ] - ├── groups: [ #0 ] - └── LogicalProjection { exprs: [ #0, #1 ] } - └── LogicalJoin - ├── join_type: Inner - ├── cond:Eq - │ ├── #2 - │ └── #3 - ├── LogicalProjection { exprs: [ #0, #1, #4 ] } - │ └── LogicalJoin - │ ├── join_type: Inner - │ ├── cond:Eq - │ │ ├── #2 - │ │ └── #3 - │ ├── LogicalProjection { exprs: [ #0, #2, #4 ] } - │ │ └── LogicalJoin - │ │ ├── join_type: Inner - │ │ ├── cond:Eq - │ │ │ ├── #1 - │ │ │ └── #3 - │ │ ├── LogicalProjection { exprs: [ #0, #1, #3 ] } - │ │ │ └── LogicalScan { table: partsupp } - │ │ └── LogicalProjection { exprs: [ #0, #3 ] } - │ │ └── LogicalScan { table: supplier } - │ └── LogicalProjection { exprs: [ #0, #2 ] } - │ └── LogicalScan { table: nation } - └── LogicalProjection { exprs: [ #0 ] } - └── LogicalFilter - ├── cond:Eq - │ ├── #1 - │ └── "AFRICA" - └── LogicalProjection { exprs: [ #0, #1 ] } - └── LogicalScan { table: region } -PhysicalLimit { skip: 0, fetch: 100 } -└── PhysicalSort - ├── exprs: - │ ┌── SortOrder { order: Desc } - │ │ └── #0 - │ ├── SortOrder { order: Asc } - │ │ └── #2 - │ ├── SortOrder { order: Asc } - │ │ └── #1 - │ └── SortOrder { order: Asc } - │ └── #3 - └── PhysicalProjection { exprs: [ #5, #2, #8, #0, #1, #3, #4, #6 ] } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #7 ], right_keys: [ #1, #0 ] } - ├── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8 ] } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #9 ], right_keys: [ #0 ] } - │ ├── PhysicalProjection { exprs: [ #0, #1, #2, #3, #5, #6, #7, #8, #10, #11 ] } - │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #0 ] } - │ │ ├── PhysicalProjection { exprs: [ #0, #1, #5, #6, #7, #8, #9, #10, #3 ] } - │ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } - │ │ │ ├── PhysicalProjection { exprs: [ #0, #1, #3, #4 ] } - │ │ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ │ │ │ ├── PhysicalProjection { exprs: [ #0, #1 ] } - │ │ │ │ │ └── PhysicalProjection { exprs: [ #0, #2, #4, #5 ] } - │ │ │ │ │ └── PhysicalFilter - │ │ │ │ │ ├── cond:And - │ │ │ │ │ │ ├── Eq - │ │ │ │ │ │ │ ├── #5 - │ │ │ │ │ │ │ └── 4 - │ │ │ │ │ │ └── Like { expr: #4, pattern: "%TIN", negated: false, case_insensitive: false } - │ │ │ │ │ └── PhysicalScan { table: part } - │ │ │ │ └── PhysicalProjection { exprs: [ #0, #1, #3 ] } - │ │ │ │ └── PhysicalScan { table: partsupp } - │ │ │ └── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6 ] } - │ │ │ └── PhysicalScan { table: supplier } - │ │ └── PhysicalProjection { exprs: [ #0, #1, #2 ] } - │ │ └── PhysicalScan { table: nation } - │ └── PhysicalProjection { exprs: [ #0 ] } - │ └── PhysicalProjection { exprs: [ #0, #1 ] } - │ └── PhysicalFilter - │ ├── cond:Eq - │ │ ├── #1 - │ │ └── "AFRICA" - │ └── PhysicalScan { table: region } - └── PhysicalProjection { exprs: [ #1, #0 ] } - └── PhysicalAgg - ├── aggrs:Agg(Min) - │ └── [ #1 ] - ├── groups: [ #0 ] - └── PhysicalProjection { exprs: [ #0, #1 ] } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } - ├── PhysicalProjection { exprs: [ #0, #1, #4 ] } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } - │ ├── PhysicalProjection { exprs: [ #0, #2, #4 ] } - │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } - │ │ ├── PhysicalProjection { exprs: [ #0, #1, #3 ] } - │ │ │ └── PhysicalScan { table: partsupp } - │ │ └── PhysicalProjection { exprs: [ #0, #3 ] } - │ │ └── PhysicalScan { table: supplier } - │ └── PhysicalProjection { exprs: [ #0, #2 ] } - │ └── PhysicalScan { table: nation } - └── PhysicalProjection { exprs: [ #0 ] } - └── PhysicalProjection { exprs: [ #0, #1 ] } - └── PhysicalFilter - ├── cond:Eq - │ ├── #1 - │ └── "AFRICA" - └── PhysicalScan { table: region } -*/ - --- TPC-H Q3 -SELECT - l_orderkey, - SUM(l_extendedprice * (1 - l_discount)) AS revenue, - o_orderdate, - o_shippriority -FROM - customer, - orders, - lineitem -WHERE - c_mktsegment = 'FURNITURE' - AND c_custkey = o_custkey - AND l_orderkey = o_orderkey - AND o_orderdate < DATE '1995-03-29' - AND l_shipdate > DATE '1995-03-29' -GROUP BY - l_orderkey, - o_orderdate, - o_shippriority -ORDER BY - revenue DESC, - o_orderdate LIMIT 10; - -/* -LogicalLimit { skip: 0, fetch: 10 } -└── LogicalSort - ├── exprs: - │ ┌── SortOrder { order: Desc } - │ │ └── #1 - │ └── SortOrder { order: Asc } - │ └── #2 - └── LogicalProjection { exprs: [ #0, #3, #1, #2 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── Mul - │ ├── #3 - │ └── Sub - │ ├── 1 - │ └── #4 - ├── groups: [ #2, #0, #1 ] - └── LogicalProjection { exprs: [ #1, #2, #3, #4, #5 ] } - └── LogicalJoin - ├── join_type: Inner - ├── cond:Eq - │ ├── #0 - │ └── #3 - ├── LogicalProjection { exprs: [ #1, #3, #4 ] } - │ └── LogicalJoin - │ ├── join_type: Inner - │ ├── cond:Eq - │ │ ├── #0 - │ │ └── #2 - │ ├── LogicalProjection { exprs: [ #0 ] } - │ │ └── LogicalFilter - │ │ ├── cond:Eq - │ │ │ ├── #1 - │ │ │ └── "FURNITURE" - │ │ └── LogicalProjection { exprs: [ #0, #6 ] } - │ │ └── LogicalScan { table: customer } - │ └── LogicalFilter - │ ├── cond:Lt - │ │ ├── #2 - │ │ └── 9218 - │ └── LogicalProjection { exprs: [ #0, #1, #4, #7 ] } - │ └── LogicalScan { table: orders } - └── LogicalProjection { exprs: [ #0, #1, #2 ] } - └── LogicalFilter - ├── cond:Gt - │ ├── #3 - │ └── 9218 - └── LogicalProjection { exprs: [ #0, #5, #6, #10 ] } - └── LogicalScan { table: lineitem } -PhysicalLimit { skip: 0, fetch: 10 } -└── PhysicalSort - ├── exprs: - │ ┌── SortOrder { order: Desc } - │ │ └── #1 - │ └── SortOrder { order: Asc } - │ └── #2 - └── PhysicalProjection { exprs: [ #0, #3, #1, #2 ] } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── Mul - │ ├── #3 - │ └── Sub - │ ├── 1 - │ └── #4 - ├── groups: [ #2, #0, #1 ] - └── PhysicalProjection { exprs: [ #1, #2, #3, #4, #5 ] } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - ├── PhysicalProjection { exprs: [ #1, #3, #4 ] } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } - │ ├── PhysicalProjection { exprs: [ #0 ] } - │ │ └── PhysicalProjection { exprs: [ #0, #6 ] } - │ │ └── PhysicalFilter - │ │ ├── cond:Eq - │ │ │ ├── #6 - │ │ │ └── "FURNITURE" - │ │ └── PhysicalScan { table: customer } - │ └── PhysicalProjection { exprs: [ #0, #1, #4, #7 ] } - │ └── PhysicalFilter - │ ├── cond:Lt - │ │ ├── #4 - │ │ └── 9218 - │ └── PhysicalScan { table: orders } - └── PhysicalProjection { exprs: [ #0, #1, #2 ] } - └── PhysicalProjection { exprs: [ #0, #5, #6, #10 ] } - └── PhysicalFilter - ├── cond:Gt - │ ├── #10 - │ └── 9218 - └── PhysicalScan { table: lineitem } -*/ - --- TPC-H Q5 -SELECT - n_name AS nation, - SUM(l_extendedprice * (1 - l_discount)) AS revenue -FROM - customer, - orders, - lineitem, - supplier, - nation, - region -WHERE - c_custkey = o_custkey - AND l_orderkey = o_orderkey - AND l_suppkey = s_suppkey - AND c_nationkey = s_nationkey - AND s_nationkey = n_nationkey - AND n_regionkey = r_regionkey - AND r_name = 'Asia' -- Specified region - AND o_orderdate >= DATE '2023-01-01' - AND o_orderdate < DATE '2024-01-01' -GROUP BY - n_name -ORDER BY - revenue DESC; - -/* -LogicalSort -├── exprs:SortOrder { order: Desc } -│ └── #1 -└── LogicalProjection { exprs: [ #0, #1 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── Mul - │ ├── #22 - │ └── Sub - │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ └── #23 - ├── groups: [ #41 ] - └── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #9 - │ ├── Eq - │ │ ├── #17 - │ │ └── #8 - │ ├── Eq - │ │ ├── #19 - │ │ └── #33 - │ ├── Eq - │ │ ├── #3 - │ │ └── #36 - │ ├── Eq - │ │ ├── #36 - │ │ └── #40 - │ ├── Eq - │ │ ├── #42 - │ │ └── #44 - │ ├── Eq - │ │ ├── #45 - │ │ └── "Asia" - │ ├── Geq - │ │ ├── #12 - │ │ └── Cast { cast_to: Date32, expr: "2023-01-01" } - │ └── Lt - │ ├── #12 - │ └── Cast { cast_to: Date32, expr: "2024-01-01" } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ │ ├── LogicalScan { table: customer } - │ │ │ │ └── LogicalScan { table: orders } - │ │ │ └── LogicalScan { table: lineitem } - │ │ └── LogicalScan { table: supplier } - │ └── LogicalScan { table: nation } - └── LogicalScan { table: region } -PhysicalSort -├── exprs:SortOrder { order: Desc } -│ └── #1 -└── PhysicalProjection { exprs: [ #0, #1 ] } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── Mul - │ ├── #22 - │ └── Sub - │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ └── #23 - ├── groups: [ #41 ] - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #19, #3 ], right_keys: [ #0, #3 ] } - ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } - │ ├── PhysicalScan { table: customer } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ ├── PhysicalFilter - │ │ ├── cond:And - │ │ │ ├── Geq - │ │ │ │ ├── #4 - │ │ │ │ └── Cast { cast_to: Date32, expr: "2023-01-01" } - │ │ │ └── Lt - │ │ │ ├── #4 - │ │ │ └── Cast { cast_to: Date32, expr: "2024-01-01" } - │ │ └── PhysicalScan { table: orders } - │ └── PhysicalScan { table: lineitem } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } - ├── PhysicalScan { table: supplier } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } - ├── PhysicalScan { table: nation } - └── PhysicalFilter - ├── cond:Eq - │ ├── #1 - │ └── "Asia" - └── PhysicalScan { table: region } -*/ - --- TPC-H Q6 -SELECT - SUM(l_extendedprice * l_discount) AS revenue_loss -FROM - lineitem -WHERE - l_shipdate >= DATE '2023-01-01' - AND l_shipdate < DATE '2024-01-01' - AND l_discount BETWEEN 0.05 AND 0.07 - AND l_quantity < 24; - -/* -LogicalProjection { exprs: [ #0 ] } -└── LogicalAgg - ├── exprs:Agg(Sum) - │ └── Mul - │ ├── #5 - │ └── #6 - ├── groups: [] - └── LogicalFilter - ├── cond:And - │ ├── Geq - │ │ ├── #10 - │ │ └── Cast { cast_to: Date32, expr: "2023-01-01" } - │ ├── Lt - │ │ ├── #10 - │ │ └── Cast { cast_to: Date32, expr: "2024-01-01" } - │ ├── Between { expr: Cast { cast_to: Decimal128(30, 15), expr: #6 }, lower: Cast { cast_to: Decimal128(30, 15), expr: 0.05 }, upper: Cast { cast_to: Decimal128(30, 15), expr: 0.07 } } - │ └── Lt - │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } - │ └── Cast { cast_to: Decimal128(22, 2), expr: 24 } - └── LogicalScan { table: lineitem } -PhysicalProjection { exprs: [ #0 ] } -└── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── Mul - │ ├── #5 - │ └── #6 - ├── groups: [] - └── PhysicalFilter - ├── cond:And - │ ├── Geq - │ │ ├── #10 - │ │ └── Cast { cast_to: Date32, expr: "2023-01-01" } - │ ├── Lt - │ │ ├── #10 - │ │ └── Cast { cast_to: Date32, expr: "2024-01-01" } - │ ├── Between { expr: Cast { cast_to: Decimal128(30, 15), expr: #6 }, lower: Cast { cast_to: Decimal128(30, 15), expr: 0.05 }, upper: Cast { cast_to: Decimal128(30, 15), expr: 0.07 } } - │ └── Lt - │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } - │ └── Cast { cast_to: Decimal128(22, 2), expr: 24 } - └── PhysicalScan { table: lineitem } -*/ - --- TPC-H Q7 -SELECT - supp_nation, - cust_nation, - l_year, - SUM(volume) AS revenue -FROM - ( - SELECT - n1.n_name AS supp_nation, - n2.n_name AS cust_nation, - EXTRACT(YEAR FROM l_shipdate) AS l_year, - l_extendedprice * (1 - l_discount) AS volume - FROM - supplier, - lineitem, - orders, - customer, - nation n1, - nation n2 - WHERE - s_suppkey = l_suppkey - AND o_orderkey = l_orderkey - AND c_custkey = o_custkey - AND s_nationkey = n1.n_nationkey - AND c_nationkey = n2.n_nationkey - AND ( - (n1.n_name = 'FRANCE' AND n2.n_name = 'GERMANY') - OR (n1.n_name = 'GERMANY' AND n2.n_name = 'FRANCE') - ) - AND l_shipdate BETWEEN DATE '1995-01-01' AND DATE '1996-12-31' - ) AS shipping -GROUP BY - supp_nation, - cust_nation, - l_year -ORDER BY - supp_nation, - cust_nation, - l_year; - -/* -LogicalSort -├── exprs: -│ ┌── SortOrder { order: Asc } -│ │ └── #0 -│ ├── SortOrder { order: Asc } -│ │ └── #1 -│ └── SortOrder { order: Asc } -│ └── #2 -└── LogicalProjection { exprs: [ #0, #1, #2, #3 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── [ #3 ] - ├── groups: [ #0, #1, #2 ] - └── LogicalProjection - ├── exprs: - │ ┌── #41 - │ ├── #45 - │ ├── Scalar(DatePart) - │ │ └── [ "YEAR", #17 ] - │ └── Mul - │ ├── #12 - │ └── Sub - │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ └── #13 - └── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #9 - │ ├── Eq - │ │ ├── #23 - │ │ └── #7 - │ ├── Eq - │ │ ├── #32 - │ │ └── #24 - │ ├── Eq - │ │ ├── #3 - │ │ └── #40 - │ ├── Eq - │ │ ├── #35 - │ │ └── #44 - │ ├── Or - │ │ ├── And - │ │ │ ├── Eq - │ │ │ │ ├── #41 - │ │ │ │ └── "FRANCE" - │ │ │ └── Eq - │ │ │ ├── #45 - │ │ │ └── "GERMANY" - │ │ └── And - │ │ ├── Eq - │ │ │ ├── #41 - │ │ │ └── "GERMANY" - │ │ └── Eq - │ │ ├── #45 - │ │ └── "FRANCE" - │ └── Between { expr: #17, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ │ ├── LogicalScan { table: supplier } - │ │ │ │ └── LogicalScan { table: lineitem } - │ │ │ └── LogicalScan { table: orders } - │ │ └── LogicalScan { table: customer } - │ └── LogicalScan { table: nation } - └── LogicalScan { table: nation } -PhysicalSort -├── exprs: -│ ┌── SortOrder { order: Asc } -│ │ └── #0 -│ ├── SortOrder { order: Asc } -│ │ └── #1 -│ └── SortOrder { order: Asc } -│ └── #2 -└── PhysicalProjection { exprs: [ #0, #1, #2, #3 ] } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── [ #3 ] - ├── groups: [ #0, #1, #2 ] - └── PhysicalProjection - ├── exprs: - │ ┌── #41 - │ ├── #45 - │ ├── Scalar(DatePart) - │ │ └── [ "YEAR", #17 ] - │ └── Mul - │ ├── #12 - │ └── Sub - │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ └── #13 - └── PhysicalNestedLoopJoin - ├── join_type: Inner - ├── cond:And - │ ├── Eq - │ │ ├── #35 - │ │ └── #44 - │ └── Or - │ ├── And - │ │ ├── Eq - │ │ │ ├── #41 - │ │ │ └── "FRANCE" - │ │ └── Eq - │ │ ├── #45 - │ │ └── "GERMANY" - │ └── And - │ ├── Eq - │ │ ├── #41 - │ │ └── "GERMANY" - │ └── Eq - │ ├── #45 - │ └── "FRANCE" - ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } - │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #2 ] } - │ │ ├── PhysicalScan { table: supplier } - │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ │ ├── PhysicalFilter { cond: Between { expr: #10, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } } - │ │ │ └── PhysicalScan { table: lineitem } - │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } - │ │ ├── PhysicalScan { table: orders } - │ │ └── PhysicalScan { table: customer } - │ └── PhysicalScan { table: nation } - └── PhysicalScan { table: nation } -*/ - --- TPC-H Q8 without top-most limit node -select - o_year, - sum(case - when nation = 'IRAQ' then volume - else 0 - end) / sum(volume) as mkt_share -from - ( - select - extract(year from o_orderdate) as o_year, - l_extendedprice * (1 - l_discount) as volume, - n2.n_name as nation - from - part, - supplier, - lineitem, - orders, - customer, - nation n1, - nation n2, - region - where - p_partkey = l_partkey - and s_suppkey = l_suppkey - and l_orderkey = o_orderkey - and o_custkey = c_custkey - and c_nationkey = n1.n_nationkey - and n1.n_regionkey = r_regionkey - and r_name = 'AMERICA' - and s_nationkey = n2.n_nationkey - and o_orderdate between date '1995-01-01' and date '1996-12-31' - and p_type = 'ECONOMY ANODIZED STEEL' - ) as all_nations -group by - o_year -order by - o_year; - -/* -LogicalSort -├── exprs:SortOrder { order: Asc } -│ └── #0 -└── LogicalProjection - ├── exprs: - │ ┌── #0 - │ └── Div - │ ├── #1 - │ └── #2 - └── LogicalAgg - ├── exprs: - │ ┌── Agg(Sum) - │ │ └── Case - │ │ └── - │ │ ┌── Eq - │ │ │ ├── #2 - │ │ │ └── "IRAQ" - │ │ ├── #1 - │ │ └── Cast { cast_to: Decimal128(38, 4), expr: 0 } - │ └── Agg(Sum) - │ └── [ #1 ] - ├── groups: [ #0 ] - └── LogicalProjection - ├── exprs: - │ ┌── Scalar(DatePart) - │ │ └── [ "YEAR", #36 ] - │ ├── Mul - │ │ ├── #21 - │ │ └── Sub - │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ │ └── #22 - │ └── #54 - └── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #17 - │ ├── Eq - │ │ ├── #9 - │ │ └── #18 - │ ├── Eq - │ │ ├── #16 - │ │ └── #32 - │ ├── Eq - │ │ ├── #33 - │ │ └── #41 - │ ├── Eq - │ │ ├── #44 - │ │ └── #49 - │ ├── Eq - │ │ ├── #51 - │ │ └── #57 - │ ├── Eq - │ │ ├── #58 - │ │ └── "AMERICA" - │ ├── Eq - │ │ ├── #12 - │ │ └── #53 - │ ├── Between { expr: #36, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } - │ └── Eq - │ ├── #4 - │ └── "ECONOMY ANODIZED STEEL" - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ │ │ │ ├── LogicalScan { table: part } - │ │ │ │ │ │ └── LogicalScan { table: supplier } - │ │ │ │ │ └── LogicalScan { table: lineitem } - │ │ │ │ └── LogicalScan { table: orders } - │ │ │ └── LogicalScan { table: customer } - │ │ └── LogicalScan { table: nation } - │ └── LogicalScan { table: nation } - └── LogicalScan { table: region } -PhysicalSort -├── exprs:SortOrder { order: Asc } -│ └── #0 -└── PhysicalProjection - ├── exprs: - │ ┌── #0 - │ └── Div - │ ├── #1 - │ └── #2 - └── PhysicalAgg - ├── aggrs: - │ ┌── Agg(Sum) - │ │ └── Case - │ │ └── - │ │ ┌── Eq - │ │ │ ├── #2 - │ │ │ └── "IRAQ" - │ │ ├── #1 - │ │ └── Cast { cast_to: Decimal128(38, 4), expr: 0 } - │ └── Agg(Sum) - │ └── [ #1 ] - ├── groups: [ #0 ] - └── PhysicalProjection - ├── exprs: - │ ┌── Scalar(DatePart) - │ │ └── [ "YEAR", #36 ] - │ ├── Mul - │ │ ├── #21 - │ │ └── Sub - │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ │ └── #22 - │ └── #54 - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #51 ], right_keys: [ #0 ] } - ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #12 ], right_keys: [ #0 ] } - │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #9 ], right_keys: [ #1, #2 ] } - │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ ├── PhysicalFilter - │ │ │ │ ├── cond:Eq - │ │ │ │ │ ├── #4 - │ │ │ │ │ └── "ECONOMY ANODIZED STEEL" - │ │ │ │ └── PhysicalScan { table: part } - │ │ │ └── PhysicalScan { table: supplier } - │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ │ ├── PhysicalScan { table: lineitem } - │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } - │ │ ├── PhysicalFilter { cond: Between { expr: #4, lower: Cast { cast_to: Date32, expr: "1995-01-01" }, upper: Cast { cast_to: Date32, expr: "1996-12-31" } } } - │ │ │ └── PhysicalScan { table: orders } - │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } - │ │ ├── PhysicalScan { table: customer } - │ │ └── PhysicalScan { table: nation } - │ └── PhysicalScan { table: nation } - └── PhysicalFilter - ├── cond:Eq - │ ├── #1 - │ └── "AMERICA" - └── PhysicalScan { table: region } -*/ - --- TPC-H Q9 -SELECT - nation, - o_year, - SUM(amount) AS sum_profit -FROM - ( - SELECT - n_name AS nation, - EXTRACT(YEAR FROM o_orderdate) AS o_year, - l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity AS amount - FROM - part, - supplier, - lineitem, - partsupp, - orders, - nation - WHERE - s_suppkey = l_suppkey - AND ps_suppkey = l_suppkey - AND ps_partkey = l_partkey - AND p_partkey = l_partkey - AND o_orderkey = l_orderkey - AND s_nationkey = n_nationkey - AND p_name LIKE '%green%' - ) AS profit -GROUP BY - nation, - o_year -ORDER BY - nation, - o_year DESC; - -/* -LogicalSort -├── exprs: -│ ┌── SortOrder { order: Asc } -│ │ └── #0 -│ └── SortOrder { order: Desc } -│ └── #1 -└── LogicalProjection { exprs: [ #0, #1, #2 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── [ #2 ] - ├── groups: [ #0, #1 ] - └── LogicalProjection - ├── exprs: - │ ┌── #47 - │ ├── Scalar(DatePart) - │ │ └── [ "YEAR", #41 ] - │ └── Sub - │ ├── Mul - │ │ ├── #21 - │ │ └── Sub - │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ │ └── #22 - │ └── Mul - │ ├── #35 - │ └── #20 - └── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #9 - │ │ └── #18 - │ ├── Eq - │ │ ├── #33 - │ │ └── #18 - │ ├── Eq - │ │ ├── #32 - │ │ └── #17 - │ ├── Eq - │ │ ├── #0 - │ │ └── #17 - │ ├── Eq - │ │ ├── #37 - │ │ └── #16 - │ ├── Eq - │ │ ├── #12 - │ │ └── #46 - │ └── Like { expr: #1, pattern: "%green%", negated: false, case_insensitive: false } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ │ ├── LogicalScan { table: part } - │ │ │ │ └── LogicalScan { table: supplier } - │ │ │ └── LogicalScan { table: lineitem } - │ │ └── LogicalScan { table: partsupp } - │ └── LogicalScan { table: orders } - └── LogicalScan { table: nation } -PhysicalSort -├── exprs: -│ ┌── SortOrder { order: Asc } -│ │ └── #0 -│ └── SortOrder { order: Desc } -│ └── #1 -└── PhysicalProjection { exprs: [ #0, #1, #2 ] } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── [ #2 ] - ├── groups: [ #0, #1 ] - └── PhysicalProjection - ├── exprs: - │ ┌── #47 - │ ├── Scalar(DatePart) - │ │ └── [ "YEAR", #41 ] - │ └── Sub - │ ├── Mul - │ │ ├── #21 - │ │ └── Sub - │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ │ └── #22 - │ └── Mul - │ ├── #35 - │ └── #20 - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #12 ], right_keys: [ #0 ] } - ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #9, #0 ], right_keys: [ #2, #1 ] } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalFilter { cond: Like { expr: #1, pattern: "%green%", negated: false, case_insensitive: false } } - │ │ │ └── PhysicalScan { table: part } - │ │ └── PhysicalScan { table: supplier } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #2, #1 ], right_keys: [ #1, #0 ] } - │ │ ├── PhysicalScan { table: lineitem } - │ │ └── PhysicalScan { table: partsupp } - │ └── PhysicalScan { table: orders } - └── PhysicalScan { table: nation } -*/ - --- TPC-H Q9 -SELECT - nation, - o_year, - SUM(amount) AS sum_profit -FROM - ( - SELECT - n_name AS nation, - EXTRACT(YEAR FROM o_orderdate) AS o_year, - l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity AS amount - FROM - part, - supplier, - lineitem, - partsupp, - orders, - nation - WHERE - s_suppkey = l_suppkey - AND ps_suppkey = l_suppkey - AND ps_partkey = l_partkey - AND p_partkey = l_partkey - AND o_orderkey = l_orderkey - AND s_nationkey = n_nationkey - AND p_name LIKE '%green%' - ) AS profit -GROUP BY - nation, - o_year -ORDER BY - nation, - o_year DESC; - -/* -LogicalSort -├── exprs: -│ ┌── SortOrder { order: Asc } -│ │ └── #0 -│ └── SortOrder { order: Desc } -│ └── #1 -└── LogicalProjection { exprs: [ #0, #1, #2 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── [ #2 ] - ├── groups: [ #0, #1 ] - └── LogicalProjection - ├── exprs: - │ ┌── #47 - │ ├── Scalar(DatePart) - │ │ └── [ "YEAR", #41 ] - │ └── Sub - │ ├── Mul - │ │ ├── #21 - │ │ └── Sub - │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ │ └── #22 - │ └── Mul - │ ├── #35 - │ └── #20 - └── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #9 - │ │ └── #18 - │ ├── Eq - │ │ ├── #33 - │ │ └── #18 - │ ├── Eq - │ │ ├── #32 - │ │ └── #17 - │ ├── Eq - │ │ ├── #0 - │ │ └── #17 - │ ├── Eq - │ │ ├── #37 - │ │ └── #16 - │ ├── Eq - │ │ ├── #12 - │ │ └── #46 - │ └── Like { expr: #1, pattern: "%green%", negated: false, case_insensitive: false } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ │ ├── LogicalScan { table: part } - │ │ │ │ └── LogicalScan { table: supplier } - │ │ │ └── LogicalScan { table: lineitem } - │ │ └── LogicalScan { table: partsupp } - │ └── LogicalScan { table: orders } - └── LogicalScan { table: nation } -PhysicalSort -├── exprs: -│ ┌── SortOrder { order: Asc } -│ │ └── #0 -│ └── SortOrder { order: Desc } -│ └── #1 -└── PhysicalProjection { exprs: [ #0, #1, #2 ] } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── [ #2 ] - ├── groups: [ #0, #1 ] - └── PhysicalProjection - ├── exprs: - │ ┌── #47 - │ ├── Scalar(DatePart) - │ │ └── [ "YEAR", #41 ] - │ └── Sub - │ ├── Mul - │ │ ├── #21 - │ │ └── Sub - │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ │ └── #22 - │ └── Mul - │ ├── #35 - │ └── #20 - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #12 ], right_keys: [ #0 ] } - ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #9, #0 ], right_keys: [ #2, #1 ] } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalFilter { cond: Like { expr: #1, pattern: "%green%", negated: false, case_insensitive: false } } - │ │ │ └── PhysicalScan { table: part } - │ │ └── PhysicalScan { table: supplier } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #2, #1 ], right_keys: [ #1, #0 ] } - │ │ ├── PhysicalScan { table: lineitem } - │ │ └── PhysicalScan { table: partsupp } - │ └── PhysicalScan { table: orders } - └── PhysicalScan { table: nation } -*/ - --- TPC-H Q10 -SELECT - c_custkey, - c_name, - sum(l_extendedprice * (1 - l_discount)) as revenue, - c_acctbal, - n_name, - c_address, - c_phone, - c_comment -FROM - customer, - orders, - lineitem, - nation -WHERE - c_custkey = o_custkey - AND l_orderkey = o_orderkey - AND o_orderdate >= DATE '1993-07-01' - AND o_orderdate < DATE '1993-07-01' + INTERVAL '3' MONTH - AND l_returnflag = 'R' - AND c_nationkey = n_nationkey -GROUP BY - c_custkey, - c_name, - c_acctbal, - c_phone, - n_name, - c_address, - c_comment -ORDER BY - revenue DESC -LIMIT 20; - -/* -LogicalLimit { skip: 0, fetch: 20 } -└── LogicalSort - ├── exprs:SortOrder { order: Desc } - │ └── #2 - └── LogicalProjection { exprs: [ #0, #1, #7, #2, #4, #5, #3, #6 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── Mul - │ ├── #22 - │ └── Sub - │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ └── #23 - ├── groups: [ #0, #1, #5, #4, #34, #2, #7 ] - └── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #9 - │ ├── Eq - │ │ ├── #17 - │ │ └── #8 - │ ├── Geq - │ │ ├── #12 - │ │ └── Cast { cast_to: Date32, expr: "1993-07-01" } - │ ├── Lt - │ │ ├── #12 - │ │ └── Add - │ │ ├── Cast { cast_to: Date32, expr: "1993-07-01" } - │ │ └── INTERVAL_MONTH_DAY_NANO (3, 0, 0) - │ ├── Eq - │ │ ├── #25 - │ │ └── "R" - │ └── Eq - │ ├── #3 - │ └── #33 - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalScan { table: customer } - │ │ └── LogicalScan { table: orders } - │ └── LogicalScan { table: lineitem } - └── LogicalScan { table: nation } -PhysicalLimit { skip: 0, fetch: 20 } -└── PhysicalSort - ├── exprs:SortOrder { order: Desc } - │ └── #2 - └── PhysicalProjection { exprs: [ #0, #1, #7, #2, #4, #5, #3, #6 ] } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── Mul - │ ├── #22 - │ └── Sub - │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ └── #23 - ├── groups: [ #0, #1, #5, #4, #34, #2, #7 ] - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } - ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } - │ ├── PhysicalScan { table: customer } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ ├── PhysicalFilter - │ │ ├── cond:And - │ │ │ ├── Geq - │ │ │ │ ├── #4 - │ │ │ │ └── Cast { cast_to: Date32, expr: "1993-07-01" } - │ │ │ └── Lt - │ │ │ ├── #4 - │ │ │ └── Add - │ │ │ ├── Cast { cast_to: Date32, expr: "1993-07-01" } - │ │ │ └── INTERVAL_MONTH_DAY_NANO (3, 0, 0) - │ │ └── PhysicalScan { table: orders } - │ └── PhysicalFilter - │ ├── cond:Eq - │ │ ├── #8 - │ │ └── "R" - │ └── PhysicalScan { table: lineitem } - └── PhysicalScan { table: nation } -*/ - --- TPC-H Q12 -SELECT - l_shipmode, - sum(case when o_orderpriority = '1-URGENT' - or o_orderpriority = '2-HIGH' - then 1 else 0 end) as high_priority_orders, - sum(case when o_orderpriority <> '1-URGENT' - and o_orderpriority <> '2-HIGH' - then 1 else 0 end) as low_priority_orders -FROM - orders, - lineitem -WHERE - o_orderkey = l_orderkey - AND l_shipmode in ('MAIL', 'SHIP') - AND l_commitdate < l_receiptdate - AND l_shipdate < l_commitdate - AND l_receiptdate >= DATE '1994-01-01' - AND l_receiptdate < DATE '1995-01-01' -GROUP BY - l_shipmode -ORDER BY - l_shipmode; - -/* -LogicalSort -├── exprs:SortOrder { order: Asc } -│ └── #0 -└── LogicalProjection { exprs: [ #0, #1, #2 ] } - └── LogicalAgg - ├── exprs: - │ ┌── Agg(Sum) - │ │ └── Case - │ │ └── - │ │ ┌── Or - │ │ │ ├── Eq - │ │ │ │ ├── #5 - │ │ │ │ └── "1-URGENT" - │ │ │ └── Eq - │ │ │ ├── #5 - │ │ │ └── "2-HIGH" - │ │ ├── 1 - │ │ └── 0 - │ └── Agg(Sum) - │ └── Case - │ └── - │ ┌── And - │ │ ├── Neq - │ │ │ ├── #5 - │ │ │ └── "1-URGENT" - │ │ └── Neq - │ │ ├── #5 - │ │ └── "2-HIGH" - │ ├── 1 - │ └── 0 - ├── groups: [ #23 ] - └── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #9 - │ ├── InList { expr: #23, list: [ "MAIL", "SHIP" ], negated: false } - │ ├── Lt - │ │ ├── #20 - │ │ └── #21 - │ ├── Lt - │ │ ├── #19 - │ │ └── #20 - │ ├── Geq - │ │ ├── #21 - │ │ └── Cast { cast_to: Date32, expr: "1994-01-01" } - │ └── Lt - │ ├── #21 - │ └── Cast { cast_to: Date32, expr: "1995-01-01" } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalScan { table: orders } - └── LogicalScan { table: lineitem } -PhysicalSort -├── exprs:SortOrder { order: Asc } -│ └── #0 -└── PhysicalProjection { exprs: [ #0, #1, #2 ] } - └── PhysicalAgg - ├── aggrs: - │ ┌── Agg(Sum) - │ │ └── Case - │ │ └── - │ │ ┌── Or - │ │ │ ├── Eq - │ │ │ │ ├── #5 - │ │ │ │ └── "1-URGENT" - │ │ │ └── Eq - │ │ │ ├── #5 - │ │ │ └── "2-HIGH" - │ │ ├── 1 - │ │ └── 0 - │ └── Agg(Sum) - │ └── Case - │ └── - │ ┌── And - │ │ ├── Neq - │ │ │ ├── #5 - │ │ │ └── "1-URGENT" - │ │ └── Neq - │ │ ├── #5 - │ │ └── "2-HIGH" - │ ├── 1 - │ └── 0 - ├── groups: [ #23 ] - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - ├── PhysicalScan { table: orders } - └── PhysicalFilter - ├── cond:And - │ ├── InList { expr: #14, list: [ "MAIL", "SHIP" ], negated: false } - │ ├── Lt - │ │ ├── #11 - │ │ └── #12 - │ ├── Lt - │ │ ├── #10 - │ │ └── #11 - │ ├── Geq - │ │ ├── #12 - │ │ └── Cast { cast_to: Date32, expr: "1994-01-01" } - │ └── Lt - │ ├── #12 - │ └── Cast { cast_to: Date32, expr: "1995-01-01" } - └── PhysicalScan { table: lineitem } -*/ - --- TPC-H Q14 -SELECT - 100.00 * sum(case when p_type like 'PROMO%' - then l_extendedprice * (1 - l_discount) - else 0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue -FROM - lineitem, - part -WHERE - l_partkey = p_partkey - AND l_shipdate >= DATE '1995-09-01' - AND l_shipdate < DATE '1995-09-01' + INTERVAL '1' MONTH; - -/* -LogicalProjection -├── exprs:Div -│ ├── Mul -│ │ ├── 100 -│ │ └── Cast { cast_to: Float64, expr: #0 } -│ └── Cast { cast_to: Float64, expr: #1 } -└── LogicalAgg - ├── exprs: - │ ┌── Agg(Sum) - │ │ └── Case - │ │ └── - │ │ ┌── Like { expr: #20, pattern: "PROMO%", negated: false, case_insensitive: false } - │ │ ├── Mul - │ │ │ ├── #5 - │ │ │ └── Sub - │ │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ │ │ └── #6 - │ │ └── Cast { cast_to: Decimal128(38, 4), expr: 0 } - │ └── Agg(Sum) - │ └── Mul - │ ├── #5 - │ └── Sub - │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ └── #6 - ├── groups: [] - └── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #1 - │ │ └── #16 - │ ├── Geq - │ │ ├── #10 - │ │ └── Cast { cast_to: Date32, expr: "1995-09-01" } - │ └── Lt - │ ├── #10 - │ └── Add - │ ├── Cast { cast_to: Date32, expr: "1995-09-01" } - │ └── INTERVAL_MONTH_DAY_NANO (1, 0, 0) - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalScan { table: lineitem } - └── LogicalScan { table: part } -PhysicalProjection -├── exprs:Div -│ ├── Mul -│ │ ├── 100 -│ │ └── Cast { cast_to: Float64, expr: #0 } -│ └── Cast { cast_to: Float64, expr: #1 } -└── PhysicalAgg - ├── aggrs: - │ ┌── Agg(Sum) - │ │ └── Case - │ │ └── - │ │ ┌── Like { expr: #20, pattern: "PROMO%", negated: false, case_insensitive: false } - │ │ ├── Mul - │ │ │ ├── #5 - │ │ │ └── Sub - │ │ │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ │ │ └── #6 - │ │ └── Cast { cast_to: Decimal128(38, 4), expr: 0 } - │ └── Agg(Sum) - │ └── Mul - │ ├── #5 - │ └── Sub - │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ └── #6 - ├── groups: [] - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } - ├── PhysicalFilter - │ ├── cond:And - │ │ ├── Geq - │ │ │ ├── #10 - │ │ │ └── Cast { cast_to: Date32, expr: "1995-09-01" } - │ │ └── Lt - │ │ ├── #10 - │ │ └── Add - │ │ ├── Cast { cast_to: Date32, expr: "1995-09-01" } - │ │ └── INTERVAL_MONTH_DAY_NANO (1, 0, 0) - │ └── PhysicalScan { table: lineitem } - └── PhysicalScan { table: part } -*/ - --- TPC-H Q15 -WITH revenue0 (supplier_no, total_revenue) AS -( - SELECT - l_suppkey, - SUM(l_extendedprice * (1 - l_discount)) - FROM - lineitem - WHERE - l_shipdate >= DATE '1993-01-01' - AND l_shipdate < DATE '1993-01-01' + INTERVAL '3' MONTH - GROUP BY - l_suppkey -) -SELECT - s_suppkey, - s_name, - s_address, - s_phone, - total_revenue -FROM - supplier, - revenue0 -WHERE - s_suppkey = supplier_no - AND total_revenue = - ( - SELECT - MAX(total_revenue) - FROM - revenue0 - ) -ORDER BY - s_suppkey; - -/* -LogicalSort -├── exprs:SortOrder { order: Asc } -│ └── #0 -└── LogicalProjection { exprs: [ #0, #1, #2, #3, #4 ] } - └── LogicalJoin - ├── join_type: Inner - ├── cond:Eq - │ ├── #4 - │ └── #5 - ├── LogicalProjection { exprs: [ #0, #1, #2, #3, #5 ] } - │ └── LogicalJoin - │ ├── join_type: Inner - │ ├── cond:Eq - │ │ ├── #0 - │ │ └── #4 - │ ├── LogicalProjection { exprs: [ #0, #1, #2, #4 ] } - │ │ └── LogicalScan { table: supplier } - │ └── LogicalProjection { exprs: [ #0, #1 ] } - │ └── LogicalAgg - │ ├── exprs:Agg(Sum) - │ │ └── Mul - │ │ ├── #1 - │ │ └── Sub - │ │ ├── 1 - │ │ └── #2 - │ ├── groups: [ #0 ] - │ └── LogicalProjection { exprs: [ #0, #1, #2 ] } - │ └── LogicalFilter - │ ├── cond:And - │ │ ├── Geq - │ │ │ ├── #3 - │ │ │ └── 8401 - │ │ └── Lt - │ │ ├── #3 - │ │ └── 8491 - │ └── LogicalProjection { exprs: [ #2, #5, #6, #10 ] } - │ └── LogicalScan { table: lineitem } - └── LogicalAgg - ├── exprs:Agg(Max) - │ └── [ #0 ] - ├── groups: [] - └── LogicalProjection { exprs: [ #1 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── Mul - │ ├── #1 - │ └── Sub - │ ├── 1 - │ └── #2 - ├── groups: [ #0 ] - └── LogicalProjection { exprs: [ #0, #1, #2 ] } - └── LogicalFilter - ├── cond:And - │ ├── Geq - │ │ ├── #3 - │ │ └── 8401 - │ └── Lt - │ ├── #3 - │ └── 8491 - └── LogicalProjection { exprs: [ #2, #5, #6, #10 ] } - └── LogicalScan { table: lineitem } -PhysicalSort -├── exprs:SortOrder { order: Asc } -│ └── #0 -└── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4 ] } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #0 ] } - ├── PhysicalProjection { exprs: [ #0, #1, #2, #3, #5 ] } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ ├── PhysicalProjection { exprs: [ #0, #1, #2, #4 ] } - │ │ └── PhysicalScan { table: supplier } - │ └── PhysicalProjection { exprs: [ #0, #1 ] } - │ └── PhysicalAgg - │ ├── aggrs:Agg(Sum) - │ │ └── Mul - │ │ ├── #1 - │ │ └── Sub - │ │ ├── 1 - │ │ └── #2 - │ ├── groups: [ #0 ] - │ └── PhysicalProjection { exprs: [ #0, #1, #2 ] } - │ └── PhysicalProjection { exprs: [ #2, #5, #6, #10 ] } - │ └── PhysicalFilter - │ ├── cond:And - │ │ ├── Geq - │ │ │ ├── #10 - │ │ │ └── 8401 - │ │ └── Lt - │ │ ├── #10 - │ │ └── 8491 - │ └── PhysicalScan { table: lineitem } - └── PhysicalAgg - ├── aggrs:Agg(Max) - │ └── [ #0 ] - ├── groups: [] - └── PhysicalProjection { exprs: [ #1 ] } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── Mul - │ ├── #1 - │ └── Sub - │ ├── 1 - │ └── #2 - ├── groups: [ #0 ] - └── PhysicalProjection { exprs: [ #0, #1, #2 ] } - └── PhysicalProjection { exprs: [ #2, #5, #6, #10 ] } - └── PhysicalFilter - ├── cond:And - │ ├── Geq - │ │ ├── #10 - │ │ └── 8401 - │ └── Lt - │ ├── #10 - │ └── 8491 - └── PhysicalScan { table: lineitem } -*/ - --- TPC-H Q17 -SELECT - ROUND(SUM(l_extendedprice) / 7.0, 16) AS avg_yearly -FROM - lineitem, - part -WHERE - p_partkey = l_partkey - AND p_brand = 'Brand#13' - AND p_container = 'JUMBO PKG' - AND l_quantity < ( - SELECT - 0.2 * AVG(l_quantity) - FROM - lineitem - WHERE - l_partkey = p_partkey - ); - -/* -LogicalProjection -├── exprs:Scalar(Round) -│ └── -│ ┌── Div -│ │ ├── Cast { cast_to: Float64, expr: #0 } -│ │ └── 7 -│ └── 16 -└── LogicalAgg - ├── exprs:Agg(Sum) - │ └── [ #0 ] - ├── groups: [] - └── LogicalProjection { exprs: [ #1 ] } - └── LogicalJoin - ├── join_type: Inner - ├── cond:And - │ ├── Eq - │ │ ├── #2 - │ │ └── #4 - │ └── Lt - │ ├── Cast { cast_to: Decimal128(30, 15), expr: #0 } - │ └── #3 - ├── LogicalProjection { exprs: [ #1, #2, #3 ] } - │ └── LogicalJoin - │ ├── join_type: Inner - │ ├── cond:Eq - │ │ ├── #0 - │ │ └── #3 - │ ├── LogicalProjection { exprs: [ #1, #4, #5 ] } - │ │ └── LogicalScan { table: lineitem } - │ └── LogicalProjection { exprs: [ #0 ] } - │ └── LogicalFilter - │ ├── cond:And - │ │ ├── Eq - │ │ │ ├── #1 - │ │ │ └── "Brand#13" - │ │ └── Eq - │ │ ├── #2 - │ │ └── "JUMBO PKG" - │ └── LogicalProjection { exprs: [ #0, #3, #6 ] } - │ └── LogicalScan { table: part } - └── LogicalProjection - ├── exprs: - │ ┌── Cast - │ │ ├── cast_to: Decimal128(30, 15) - │ │ ├── expr:Mul - │ │ │ ├── 0.2 - │ │ │ └── Cast { cast_to: Float64, expr: #1 } - - │ └── #0 - └── LogicalAgg - ├── exprs:Agg(Avg) - │ └── [ #1 ] - ├── groups: [ #0 ] - └── LogicalProjection { exprs: [ #1, #4 ] } - └── LogicalScan { table: lineitem } -PhysicalProjection -├── exprs:Scalar(Round) -│ └── -│ ┌── Div -│ │ ├── Cast { cast_to: Float64, expr: #0 } -│ │ └── 7 -│ └── 16 -└── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── [ #0 ] - ├── groups: [] - └── PhysicalProjection { exprs: [ #1 ] } - └── PhysicalNestedLoopJoin - ├── join_type: Inner - ├── cond:And - │ ├── Eq - │ │ ├── #2 - │ │ └── #0 - │ └── Lt - │ ├── Cast { cast_to: Decimal128(30, 15), expr: #0 } - │ └── #3 - ├── PhysicalProjection { exprs: [ #1, #2, #3 ] } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ ├── PhysicalProjection { exprs: [ #1, #4, #5 ] } - │ │ └── PhysicalScan { table: lineitem } - │ └── PhysicalProjection { exprs: [ #0 ] } - │ └── PhysicalProjection { exprs: [ #0, #3, #6 ] } - │ └── PhysicalFilter - │ ├── cond:And - │ │ ├── Eq - │ │ │ ├── #3 - │ │ │ └── "Brand#13" - │ │ └── Eq - │ │ ├── #6 - │ │ └── "JUMBO PKG" - │ └── PhysicalScan { table: part } - └── PhysicalProjection - ├── exprs: - │ ┌── Cast - │ │ ├── cast_to: Decimal128(30, 15) - │ │ ├── expr:Mul - │ │ │ ├── 0.2 - │ │ │ └── Cast { cast_to: Float64, expr: #1 } - - │ └── #0 - └── PhysicalAgg - ├── aggrs:Agg(Avg) - │ └── [ #1 ] - ├── groups: [ #0 ] - └── PhysicalProjection { exprs: [ #1, #4 ] } - └── PhysicalScan { table: lineitem } -*/ - --- TPC-H Q19 -SELECT - sum(l_extendedprice* (1 - l_discount)) as revenue -FROM - lineitem, - part -WHERE - ( - p_partkey = l_partkey - AND p_brand = 'Brand#12' - AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') - AND l_quantity >= 1 AND l_quantity <= 11 - AND p_size BETWEEN 1 AND 5 - AND l_shipmode IN ('AIR', 'AIR REG') - AND l_shipinstruct = 'DELIVER IN PERSON' - ) OR ( - p_partkey = l_partkey - AND p_brand = 'Brand#23' - AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') - AND l_quantity >= 10 AND l_quantity <= 20 - AND p_size BETWEEN 1 AND 10 - AND l_shipmode IN ('AIR', 'AIR REG') - AND l_shipinstruct = 'DELIVER IN PERSON' - ) OR ( - p_partkey = l_partkey - AND p_brand = 'Brand#34' - AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') - AND l_quantity >= 20 AND l_quantity <= 30 - AND p_size BETWEEN 1 AND 15 - AND l_shipmode IN ('AIR', 'AIR REG') - AND l_shipinstruct = 'DELIVER IN PERSON' - ) - -/* -LogicalProjection { exprs: [ #0 ] } -└── LogicalAgg - ├── exprs:Agg(Sum) - │ └── Mul - │ ├── #5 - │ └── Sub - │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ └── #6 - ├── groups: [] - └── LogicalFilter - ├── cond:Or - │ ├── And - │ │ ├── Eq - │ │ │ ├── #16 - │ │ │ └── #1 - │ │ ├── Eq - │ │ │ ├── #19 - │ │ │ └── "Brand#12" - │ │ ├── InList { expr: #22, list: [ "SM CASE", "SM BOX", "SM PACK", "SM PKG" ], negated: false } - │ │ ├── Geq - │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } - │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 1 } - │ │ ├── Leq - │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } - │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 11 } - │ │ ├── Between { expr: Cast { cast_to: Int64, expr: #21 }, lower: 1, upper: 5 } - │ │ ├── InList { expr: #14, list: [ "AIR", "AIR REG" ], negated: false } - │ │ └── Eq - │ │ ├── #13 - │ │ └── "DELIVER IN PERSON" - │ ├── And - │ │ ├── Eq - │ │ │ ├── #16 - │ │ │ └── #1 - │ │ ├── Eq - │ │ │ ├── #19 - │ │ │ └── "Brand#23" - │ │ ├── InList { expr: #22, list: [ "MED BAG", "MED BOX", "MED PKG", "MED PACK" ], negated: false } - │ │ ├── Geq - │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } - │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 10 } - │ │ ├── Leq - │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } - │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 20 } - │ │ ├── Between { expr: Cast { cast_to: Int64, expr: #21 }, lower: 1, upper: 10 } - │ │ ├── InList { expr: #14, list: [ "AIR", "AIR REG" ], negated: false } - │ │ └── Eq - │ │ ├── #13 - │ │ └── "DELIVER IN PERSON" - │ └── And - │ ├── Eq - │ │ ├── #16 - │ │ └── #1 - │ ├── Eq - │ │ ├── #19 - │ │ └── "Brand#34" - │ ├── InList { expr: #22, list: [ "LG CASE", "LG BOX", "LG PACK", "LG PKG" ], negated: false } - │ ├── Geq - │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } - │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 20 } - │ ├── Leq - │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } - │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 30 } - │ ├── Between { expr: Cast { cast_to: Int64, expr: #21 }, lower: 1, upper: 15 } - │ ├── InList { expr: #14, list: [ "AIR", "AIR REG" ], negated: false } - │ └── Eq - │ ├── #13 - │ └── "DELIVER IN PERSON" - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalScan { table: lineitem } - └── LogicalScan { table: part } -PhysicalProjection { exprs: [ #0 ] } -└── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── Mul - │ ├── #5 - │ └── Sub - │ ├── Cast { cast_to: Decimal128(20, 0), expr: 1 } - │ └── #6 - ├── groups: [] - └── PhysicalNestedLoopJoin - ├── join_type: Inner - ├── cond:Or - │ ├── And - │ │ ├── Eq - │ │ │ ├── #16 - │ │ │ └── #1 - │ │ ├── Eq - │ │ │ ├── #19 - │ │ │ └── "Brand#12" - │ │ ├── InList { expr: #22, list: [ "SM CASE", "SM BOX", "SM PACK", "SM PKG" ], negated: false } - │ │ ├── Geq - │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } - │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 1 } - │ │ ├── Leq - │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } - │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 11 } - │ │ ├── Between { expr: Cast { cast_to: Int64, expr: #21 }, lower: 1, upper: 5 } - │ │ ├── InList { expr: #14, list: [ "AIR", "AIR REG" ], negated: false } - │ │ └── Eq - │ │ ├── #13 - │ │ └── "DELIVER IN PERSON" - │ ├── And - │ │ ├── Eq - │ │ │ ├── #16 - │ │ │ └── #1 - │ │ ├── Eq - │ │ │ ├── #19 - │ │ │ └── "Brand#23" - │ │ ├── InList { expr: #22, list: [ "MED BAG", "MED BOX", "MED PKG", "MED PACK" ], negated: false } - │ │ ├── Geq - │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } - │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 10 } - │ │ ├── Leq - │ │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } - │ │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 20 } - │ │ ├── Between { expr: Cast { cast_to: Int64, expr: #21 }, lower: 1, upper: 10 } - │ │ ├── InList { expr: #14, list: [ "AIR", "AIR REG" ], negated: false } - │ │ └── Eq - │ │ ├── #13 - │ │ └── "DELIVER IN PERSON" - │ └── And - │ ├── Eq - │ │ ├── #16 - │ │ └── #1 - │ ├── Eq - │ │ ├── #19 - │ │ └── "Brand#34" - │ ├── InList { expr: #22, list: [ "LG CASE", "LG BOX", "LG PACK", "LG PKG" ], negated: false } - │ ├── Geq - │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } - │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 20 } - │ ├── Leq - │ │ ├── Cast { cast_to: Decimal128(22, 2), expr: #4 } - │ │ └── Cast { cast_to: Decimal128(22, 2), expr: 30 } - │ ├── Between { expr: Cast { cast_to: Int64, expr: #21 }, lower: 1, upper: 15 } - │ ├── InList { expr: #14, list: [ "AIR", "AIR REG" ], negated: false } - │ └── Eq - │ ├── #13 - │ └── "DELIVER IN PERSON" - ├── PhysicalScan { table: lineitem } - └── PhysicalScan { table: part } -*/ From bbac778c85ea96dc4be9a42ce0203415033adfe0 Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Thu, 18 Apr 2024 11:05:15 -0400 Subject: [PATCH 59/61] project join passing unit tests --- .../project_join_transpose.rs | 294 +++++++++++++++++- .../project_transpose_common.rs | 23 +- 2 files changed, 304 insertions(+), 13 deletions(-) diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs index 02cc0395..cd4bc7ad 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs @@ -65,6 +65,7 @@ fn apply_projection_pull_up_join( left_schema.len(), false, true, + 0 ), JoinType::Inner, ) @@ -117,7 +118,8 @@ fn apply_projection_push_down_join( let left_schema = optimizer.get_property::(left.clone(), 0); let left_schema_len = left_schema.len(); - let (left_exprs, right_exprs) = split_exprs(tot_exprs, left_schema_len); + let (left_exprs, right_exprs, is_left_right_ordered) = split_exprs(tot_exprs, left_schema_len); + let new_left_schema_len = left_exprs.len(); let Some(left_exprs_mapping) = ProjectionMapping::build(&left_exprs) else { return vec![]; @@ -128,8 +130,8 @@ fn apply_projection_push_down_join( }; // update join cond based on new left + right child projection nodes - let new_join_cond: Expr = left_exprs_mapping.rewrite_join_cond(cond_as_expr.clone(), left_schema_len, true, true); - let new_join_cond: Expr = right_exprs_mapping.rewrite_join_cond(new_join_cond.clone(), left_schema_len, true, false); + let new_join_cond: Expr = left_exprs_mapping.rewrite_join_cond(cond_as_expr.clone(), left_schema_len, true, true, 0); + let new_join_cond: Expr = right_exprs_mapping.rewrite_join_cond(new_join_cond.clone(), left_schema_len, true, false, new_left_schema_len); let new_left_child = LogicalProjection::new( PlanNode::from_group(left), @@ -151,7 +153,7 @@ fn apply_projection_push_down_join( ) .into_plan_node(); - if dedup_cond_col_refs.is_empty() { + if dedup_cond_col_refs.is_empty() && is_left_right_ordered { // don't need top projection node return vec![new_join_node.into_rel_node().as_ref().clone()]; } @@ -159,7 +161,7 @@ fn apply_projection_push_down_join( // update top projection node based on new left + right child projection nodes let mut top_proj_exprs = vec![]; let mut left_col_idx = 0; - let mut right_col_idx = left_schema_len; + let mut right_col_idx = new_left_schema_len; for i in 0..exprs.len() { let old_col_ref = ColumnRefExpr::from_rel_node(exprs_vec[i].clone().into_rel_node()).unwrap(); if old_col_ref.index() < left_schema_len { @@ -177,4 +179,286 @@ fn apply_projection_push_down_join( top_proj_exprs, ); vec![new_top_node.into_rel_node().as_ref().clone()] +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use optd_core::optimizer::Optimizer; + + use crate::{ + plan_nodes::{ + BinOpExpr, BinOpType, ColumnRefExpr, ExprList, JoinType, LogicalJoin, LogicalProjection, LogicalScan, OptRelNode, OptRelNodeTyp + }, + rules::ProjectionPushDownJoin, + testing::new_test_optimizer, + }; + + #[test] + fn proj_pushdown_join_adv_1() { + // convert proj -> join -> 2xscan to join -> 2xproj -> 2xscan + let mut test_optimizer = new_test_optimizer(Arc::new(ProjectionPushDownJoin::new())); + + let left_scan = LogicalScan::new("region".into()); + let right_scan = LogicalScan::new("customer".into()); + + let join_cond = BinOpExpr::new( + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(7).into_expr(), + BinOpType::Eq, + ) + .into_expr(); + + let proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(7).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ]); + + let join_node = LogicalJoin::new(left_scan.into_plan_node(), right_scan.into_plan_node(), join_cond, JoinType::Inner); + let proj = LogicalProjection::new(join_node.into_plan_node(), proj_exprs); + + let plan = test_optimizer.optimize(proj.into_rel_node()).unwrap(); + + let left_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ]) + .into_rel_node(); + + let right_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ]) + .into_rel_node(); + + let new_join_cond = BinOpExpr::new( + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(3).into_expr(), + BinOpType::Eq, + ) + .into_expr().into_rel_node(); + + assert_eq!(plan.typ, OptRelNodeTyp::Join(JoinType::Inner)); + assert_eq!(plan.child(2), new_join_cond); + + assert_eq!(plan.child(0).typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(1).typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(0).child(1), left_proj_exprs); + assert_eq!(plan.child(1).child(1), right_proj_exprs); + + assert_eq!(plan.child(0).child(0).typ, OptRelNodeTyp::Scan); + assert_eq!(plan.child(1).child(0).typ, OptRelNodeTyp::Scan); + } + + #[test] + fn proj_pushdown_join_adv_2() { + // convert proj -> join -> 2xscan to proj -> join -> 2xproj -> 2xscan (out of order left + right cols) + let mut test_optimizer = new_test_optimizer(Arc::new(ProjectionPushDownJoin::new())); + + let left_scan = LogicalScan::new("region".into()); + let right_scan = LogicalScan::new("customer".into()); + + let join_cond = BinOpExpr::new( + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(7).into_expr(), + BinOpType::Eq, + ) + .into_expr(); + + let proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(7).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ]); + + let join_node = LogicalJoin::new(left_scan.into_plan_node(), right_scan.into_plan_node(), join_cond, JoinType::Inner); + let proj = LogicalProjection::new(join_node.into_plan_node(), proj_exprs); + + let plan = test_optimizer.optimize(proj.into_rel_node()).unwrap(); + + let left_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ]) + .into_rel_node(); + + let right_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ]) + .into_rel_node(); + + let new_join_cond = BinOpExpr::new( + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(3).into_expr(), + BinOpType::Eq, + ) + .into_expr().into_rel_node(); + + let top_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ]) + .into_rel_node(); + + assert_eq!(plan.typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(1), top_proj_exprs); + + assert_eq!(plan.child(0).typ, OptRelNodeTyp::Join(JoinType::Inner)); + assert_eq!(plan.child(0).child(2), new_join_cond); + + assert_eq!(plan.child(0).child(0).typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(0).child(1).typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(0).child(0).child(1), left_proj_exprs); + assert_eq!(plan.child(0).child(1).child(1), right_proj_exprs); + + assert_eq!(plan.child(0).child(0).child(0).typ, OptRelNodeTyp::Scan); + assert_eq!(plan.child(0).child(1).child(0).typ, OptRelNodeTyp::Scan); + } + + #[test] + fn proj_pushdown_join_adv_3() { + // convert proj -> join -> 2xscan to proj -> join -> 2xproj -> 2xscan (join cols not in proj cols) + let mut test_optimizer = new_test_optimizer(Arc::new(ProjectionPushDownJoin::new())); + + let left_scan = LogicalScan::new("region".into()); + let right_scan = LogicalScan::new("customer".into()); + + let join_cond = BinOpExpr::new( + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(7).into_expr(), + BinOpType::Eq, + ) + .into_expr(); + + let proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ]); + + let join_node = LogicalJoin::new(left_scan.into_plan_node(), right_scan.into_plan_node(), join_cond, JoinType::Inner); + let proj = LogicalProjection::new(join_node.into_plan_node(), proj_exprs); + + let plan = test_optimizer.optimize(proj.into_rel_node()).unwrap(); + + let left_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(2).into_expr(), + ]) + .into_rel_node(); + + let right_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ]) + .into_rel_node(); + + let new_join_cond = BinOpExpr::new( + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(4).into_expr(), + BinOpType::Eq, + ) + .into_expr().into_rel_node(); + + let top_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ]) + .into_rel_node(); + + assert_eq!(plan.typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(1), top_proj_exprs); + + assert_eq!(plan.child(0).typ, OptRelNodeTyp::Join(JoinType::Inner)); + assert_eq!(plan.child(0).child(2), new_join_cond); + + assert_eq!(plan.child(0).child(0).typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(0).child(1).typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(0).child(0).child(1), left_proj_exprs); + assert_eq!(plan.child(0).child(1).child(1), right_proj_exprs); + + assert_eq!(plan.child(0).child(0).child(0).typ, OptRelNodeTyp::Scan); + assert_eq!(plan.child(0).child(1).child(0).typ, OptRelNodeTyp::Scan); + } + + #[test] + fn proj_pushdown_join_adv_4() { + // convert proj -> join -> 2xscan to proj -> join -> 2xproj -> 2xscan (partial join cols not in proj cols) + let mut test_optimizer = new_test_optimizer(Arc::new(ProjectionPushDownJoin::new())); + + let left_scan = LogicalScan::new("region".into()); + let right_scan = LogicalScan::new("customer".into()); + + let join_cond = BinOpExpr::new( + ColumnRefExpr::new(3).into_expr(), + ColumnRefExpr::new(1).into_expr(), + BinOpType::Eq, + ) + .into_expr(); + + let proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(4).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ]); + + let join_node = LogicalJoin::new(left_scan.into_plan_node(), right_scan.into_plan_node(), join_cond, JoinType::Inner); + let proj = LogicalProjection::new(join_node.into_plan_node(), proj_exprs); + + let plan = test_optimizer.optimize(proj.into_rel_node()).unwrap(); + + let left_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(1).into_expr(), + ]) + .into_rel_node(); + + let right_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(0).into_expr(), + ]) + .into_rel_node(); + + let new_join_cond = BinOpExpr::new( + ColumnRefExpr::new(3).into_expr(), + ColumnRefExpr::new(1).into_expr(), + BinOpType::Eq, + ) + .into_expr().into_rel_node(); + + let top_proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(2).into_expr(), + ColumnRefExpr::new(3).into_expr(), + ]) + .into_rel_node(); + + assert_eq!(plan.typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(1), top_proj_exprs); + + assert_eq!(plan.child(0).typ, OptRelNodeTyp::Join(JoinType::Inner)); + assert_eq!(plan.child(0).child(2), new_join_cond); + + assert_eq!(plan.child(0).child(0).typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(0).child(1).typ, OptRelNodeTyp::Projection); + assert_eq!(plan.child(0).child(0).child(1), left_proj_exprs); + assert_eq!(plan.child(0).child(1).child(1), right_proj_exprs); + + assert_eq!(plan.child(0).child(0).child(0).typ, OptRelNodeTyp::Scan); + assert_eq!(plan.child(0).child(1).child(0).typ, OptRelNodeTyp::Scan); + } } \ No newline at end of file diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs b/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs index a83f0d6f..a5167f3c 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_transpose_common.rs @@ -6,20 +6,27 @@ pub fn merge_exprs(first: ExprList, second: ExprList) -> ExprList { ExprList::new(res_vec) } -pub fn split_exprs(exprs: ExprList, left_schema_len: usize) -> (ExprList, ExprList) { +pub fn split_exprs(exprs: ExprList, left_schema_len: usize) -> (ExprList, ExprList, bool) { let mut left_vec = vec![]; let mut right_vec = vec![]; + let mut reached_right = false; + let mut is_left_right_ordered = true; for expr in exprs.to_vec() { let col_ref = ColumnRefExpr::from_rel_node(expr.into_rel_node()).unwrap(); if col_ref.index() < left_schema_len { // left expr left_vec.push(col_ref.into_expr()); + if reached_right { + is_left_right_ordered = false; + } } else { // right expr - right_vec.push(col_ref.into_expr()); + let right_col_ref = ColumnRefExpr::new(col_ref.index() - left_schema_len); + right_vec.push(right_col_ref.into_expr()); + reached_right = true; } } - (ExprList::new(left_vec), ExprList::new(right_vec)) + (ExprList::new(left_vec), ExprList::new(right_vec), is_left_right_ordered) } /// This struct holds the mapping from original columns to projected columns. @@ -78,13 +85,13 @@ impl ProjectionMapping { /// Join { cond: #1=#4 } /// Scan /// Scan - pub fn rewrite_join_cond(&self, cond: Expr, left_child_schema_len: usize, is_added: bool, is_left_child: bool) -> Expr { + pub fn rewrite_join_cond(&self, cond: Expr, left_schema_len: usize, is_added: bool, is_left_child: bool, new_left_schema_len: usize) -> Expr { if is_added { cond.rewrite_column_refs(&|col_idx| { - if is_left_child && col_idx < left_child_schema_len { + if is_left_child && col_idx < left_schema_len { self.original_col_maps_to(col_idx) - } else if !is_left_child && col_idx >= left_child_schema_len { - self.original_col_maps_to(col_idx - left_child_schema_len) + } else if !is_left_child && col_idx >= left_schema_len { + Some(self.original_col_maps_to(col_idx - left_schema_len).unwrap() + new_left_schema_len) } else { Some(col_idx) } @@ -96,7 +103,7 @@ impl ProjectionMapping { if col_idx < schema_size { self.projection_col_maps_to(col_idx) } else { - Some(col_idx - schema_size + left_child_schema_len) + Some(col_idx - schema_size + left_schema_len) } }) .unwrap() From 2a12661bde2d0d00f7dbd6f6b4499a9716fd3b58 Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Wed, 1 May 2024 10:40:10 -0400 Subject: [PATCH 60/61] comment out project join push down rule until core is fixed --- optd-datafusion-repr/src/lib.rs | 6 +++--- .../src/rules/project_transpose/project_join_transpose.rs | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 50101c5a..2c821bf6 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -99,9 +99,9 @@ impl DatafusionOptimizer { rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( ProjectFilterTransposeRule::new(), ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - ProjectionPushDownJoin::new(), - ))); + // rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( + // ProjectionPushDownJoin::new(), + // ))); // add all filter pushdown rules as heuristic rules rule_wrappers.push(RuleWrapper::new_heuristic(Arc::new( FilterProjectTransposeRule::new(), diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs index cd4bc7ad..8b89d7d4 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs @@ -17,6 +17,10 @@ use crate::plan_nodes::{ use crate::properties::schema::SchemaPropertyBuilder; // (Proj A) join B -> (Proj (A join B)) +// TODO: rule currently doesn't work under current +// cascades + heuristic rule wrapper infrastructure +// correctness verified using unit tests in current file +// and via comparisons against CockroachDB & Calcite define_rule!( ProjectionPullUpJoin, apply_projection_pull_up_join, From c9706485e5d228b51c46cccc08fa62959775876c Mon Sep 17 00:00:00 2001 From: Sweetsuro Date: Wed, 1 May 2024 13:46:26 -0400 Subject: [PATCH 61/61] proj remove implmented (hueristic pass ONLY) --- optd-datafusion-repr/src/lib.rs | 3 +- optd-datafusion-repr/src/rules.rs | 1 + .../src/rules/project_transpose.rs | 1 + .../rules/project_transpose/project_remove.rs | 83 ++++++++++++++++++- .../tests/basic_nodes.planner.sql | 3 +- .../eliminate_duplicated_expr.planner.sql | 6 +- optd-sqlplannertest/tests/filter.planner.sql | 3 +- optd-sqlplannertest/tests/tpch.planner.sql | 3 +- optd-sqlplannertest/tests/verbose.planner.sql | 6 +- 9 files changed, 93 insertions(+), 16 deletions(-) diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 2c821bf6..3e86bfde 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -22,7 +22,7 @@ use properties::{ schema::{Catalog, SchemaPropertyBuilder}, }; use rules::{ - EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, EliminateFilterRule, EliminateJoinRule, EliminateLimitRule, FilterAggTransposeRule, FilterCrossJoinTransposeRule, FilterInnerJoinTransposeRule, FilterMergeRule, FilterProjectTransposeRule, FilterSortTransposeRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, PhysicalConversionRule, ProjectFilterTransposeRule, ProjectMergeRule, ProjectionPullUpJoin, ProjectionPushDownJoin, SimplifyFilterRule, SimplifyJoinCondRule + EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, EliminateFilterRule, EliminateJoinRule, EliminateLimitRule, FilterAggTransposeRule, FilterCrossJoinTransposeRule, FilterInnerJoinTransposeRule, FilterMergeRule, FilterProjectTransposeRule, FilterSortTransposeRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, PhysicalConversionRule, ProjectFilterTransposeRule, ProjectMergeRule, ProjectRemoveRule, ProjectionPullUpJoin, ProjectionPushDownJoin, SimplifyFilterRule, SimplifyJoinCondRule }; pub use optd_core::rel_node::Value; @@ -85,6 +85,7 @@ impl DatafusionOptimizer { Arc::new(EliminateDuplicatedAggExprRule::new()), Arc::new(ProjectMergeRule::new()), Arc::new(FilterMergeRule::new()), + Arc::new(ProjectRemoveRule::new()), ] } diff --git a/optd-datafusion-repr/src/rules.rs b/optd-datafusion-repr/src/rules.rs index 9e5c0f09..4e3aee48 100644 --- a/optd-datafusion-repr/src/rules.rs +++ b/optd-datafusion-repr/src/rules.rs @@ -24,4 +24,5 @@ pub use project_transpose::{ project_filter_transpose::{FilterProjectTransposeRule, ProjectFilterTransposeRule}, project_join_transpose::{ProjectionPullUpJoin, ProjectionPushDownJoin}, project_merge::ProjectMergeRule, + project_remove::ProjectRemoveRule, }; diff --git a/optd-datafusion-repr/src/rules/project_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose.rs index 5c4f45bb..27c251c1 100644 --- a/optd-datafusion-repr/src/rules/project_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose.rs @@ -1,4 +1,5 @@ pub mod project_filter_transpose; pub mod project_join_transpose; pub mod project_merge; +pub mod project_remove; pub mod project_transpose_common; diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_remove.rs b/optd-datafusion-repr/src/rules/project_transpose/project_remove.rs index 5490307d..16231ff9 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_remove.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_remove.rs @@ -1,2 +1,83 @@ // intended to remove a projection that outputs the same num of cols -// that are in scan node \ No newline at end of file +// that are in scan node +use std::collections::HashMap; + +use optd_core::rules::{Rule, RuleMatcher}; +use optd_core::{optimizer::Optimizer, rel_node::RelNode}; + +use crate::plan_nodes::{ColumnRefExpr, ExprList, OptRelNode, OptRelNodeTyp, PlanNode}; +use crate::properties::schema::SchemaPropertyBuilder; +use crate::rules::macros::define_rule; + +// Proj (Scan A) -> Scan A +// removes projections +// TODO: need to somehow match on just scan node instead +// only works in hueristic optimizer (which may be ok) +// ideally include a pass after for physical proj -> physical scan +define_rule!( + ProjectRemoveRule, + apply_projection_remove, + (Projection, child, [exprs]) +); + +fn apply_projection_remove( + optimizer: &impl Optimizer, + ProjectRemoveRulePicks { + child, + exprs + }: ProjectRemoveRulePicks, +) -> Vec> { + let child_schema = optimizer.get_property::(child.clone().into(), 0); + let child = PlanNode::from_group(child.into()); + if child.typ() != OptRelNodeTyp::Scan { + return vec![]; + } + let exprs = ExprList::from_rel_node(exprs.into()).unwrap().to_vec(); + if exprs.len() != child_schema.len() { + return vec![]; + } + let mut exp_col_idx: usize = 0; + for expr in exprs { + let col_ref = ColumnRefExpr::from_rel_node(expr.into_rel_node()).unwrap(); + let col_idx = col_ref.index(); + if exp_col_idx != col_idx { + return vec![]; + } + exp_col_idx += 1; + } + vec![child.into_rel_node().as_ref().clone()] +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use optd_core::optimizer::Optimizer; + + use crate::{ + plan_nodes::{ + ColumnRefExpr, ExprList, LogicalProjection, LogicalScan, OptRelNode, OptRelNodeTyp, + }, + rules::ProjectRemoveRule, + testing::new_test_optimizer, + }; + + #[test] + fn proj_scan_basic() { + // convert proj -> scan to scan + let mut test_optimizer = new_test_optimizer(Arc::new(ProjectRemoveRule::new())); + + let scan = LogicalScan::new("region".into()); + + let proj_exprs = ExprList::new(vec![ + ColumnRefExpr::new(0).into_expr(), + ColumnRefExpr::new(1).into_expr(), + ColumnRefExpr::new(2).into_expr(), + ]); + + let proj_node: LogicalProjection = LogicalProjection::new(scan.into_plan_node(), proj_exprs); + let plan = test_optimizer.optimize(proj_node.into_rel_node()).unwrap(); + + assert_eq!(plan.typ, OptRelNodeTyp::Scan); + } +} diff --git a/optd-sqlplannertest/tests/basic_nodes.planner.sql b/optd-sqlplannertest/tests/basic_nodes.planner.sql index 301f300e..d9d880c5 100644 --- a/optd-sqlplannertest/tests/basic_nodes.planner.sql +++ b/optd-sqlplannertest/tests/basic_nodes.planner.sql @@ -19,8 +19,7 @@ LogicalLimit { skip: 0(u64), fetch: 1(u64) } └── LogicalProjection { exprs: [ #0, #1 ] } └── LogicalScan { table: t1 } PhysicalLimit { skip: 0(u64), fetch: 1(u64) } -└── PhysicalProjection { exprs: [ #0, #1 ] } - └── PhysicalScan { table: t1 } +└── PhysicalScan { table: t1 } 0 0 0 0 1 1 diff --git a/optd-sqlplannertest/tests/eliminate_duplicated_expr.planner.sql b/optd-sqlplannertest/tests/eliminate_duplicated_expr.planner.sql index b31e774f..8c595a05 100644 --- a/optd-sqlplannertest/tests/eliminate_duplicated_expr.planner.sql +++ b/optd-sqlplannertest/tests/eliminate_duplicated_expr.planner.sql @@ -12,8 +12,7 @@ select * from t1; /* LogicalProjection { exprs: [ #0, #1 ] } └── LogicalScan { table: t1 } -PhysicalProjection { exprs: [ #0, #1 ] } -└── PhysicalScan { table: t1 } +PhysicalScan { table: t1 } 0 0 1 1 5 2 @@ -45,8 +44,7 @@ PhysicalSort │ │ └── #0 │ └── SortOrder { order: Asc } │ └── #1 -└── PhysicalProjection { exprs: [ #0, #1 ] } - └── PhysicalScan { table: t1 } +└── PhysicalScan { table: t1 } 0 0 0 2 1 1 diff --git a/optd-sqlplannertest/tests/filter.planner.sql b/optd-sqlplannertest/tests/filter.planner.sql index 8ba252fa..f2ad8484 100644 --- a/optd-sqlplannertest/tests/filter.planner.sql +++ b/optd-sqlplannertest/tests/filter.planner.sql @@ -27,8 +27,7 @@ select * from t1 where true; LogicalProjection { exprs: [ #0, #1 ] } └── LogicalFilter { cond: true } └── LogicalScan { table: t1 } -PhysicalProjection { exprs: [ #0, #1 ] } -└── PhysicalScan { table: t1 } +PhysicalScan { table: t1 } 0 0 1 1 2 2 diff --git a/optd-sqlplannertest/tests/tpch.planner.sql b/optd-sqlplannertest/tests/tpch.planner.sql index 8bf88051..710e4a9a 100644 --- a/optd-sqlplannertest/tests/tpch.planner.sql +++ b/optd-sqlplannertest/tests/tpch.planner.sql @@ -384,8 +384,7 @@ PhysicalLimit { skip: 0(u64), fetch: 100(u64) } │ │ │ │ │ └── PhysicalScan { table: part } │ │ │ │ └── PhysicalProjection { exprs: [ #0, #1, #3 ] } │ │ │ │ └── PhysicalScan { table: partsupp } - │ │ │ └── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6 ] } - │ │ │ └── PhysicalScan { table: supplier } + │ │ │ └── PhysicalScan { table: supplier } │ │ └── PhysicalProjection { exprs: [ #0, #1, #2 ] } │ │ └── PhysicalScan { table: nation } │ └── PhysicalProjection { exprs: [ #0 ] } diff --git a/optd-sqlplannertest/tests/verbose.planner.sql b/optd-sqlplannertest/tests/verbose.planner.sql index 910a1663..391306be 100644 --- a/optd-sqlplannertest/tests/verbose.planner.sql +++ b/optd-sqlplannertest/tests/verbose.planner.sql @@ -10,16 +10,14 @@ insert into t1 values (0), (1), (2), (3); select * from t1; /* -PhysicalProjection { exprs: [ #0 ] } -└── PhysicalScan { table: t1 } +PhysicalScan { table: t1 } */ -- Test verbose explain select * from t1; /* -PhysicalProjection { exprs: [ #0 ], cost: weighted=1.06,row_cnt=1.00,compute=0.06,io=1.00 } -└── PhysicalScan { table: t1, cost: weighted=1.00,row_cnt=1.00,compute=0.00,io=1.00 } +PhysicalScan { table: t1, cost: weighted=1.00,row_cnt=1.00,compute=0.00,io=1.00 } */ -- Test verbose explain with aggregation