diff --git a/src/query/catalog/src/table.rs b/src/query/catalog/src/table.rs index 6ea8209c97c13..98c9288e56da3 100644 --- a/src/query/catalog/src/table.rs +++ b/src/query/catalog/src/table.rs @@ -224,7 +224,7 @@ pub enum NavigationPoint { TimePoint(DateTime), } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct TableStatistics { pub num_rows: Option, pub data_size: Option, diff --git a/src/query/datavalues/src/data_value.rs b/src/query/datavalues/src/data_value.rs index 054d66584f24e..8f6247e52b727 100644 --- a/src/query/datavalues/src/data_value.rs +++ b/src/query/datavalues/src/data_value.rs @@ -17,6 +17,7 @@ use std::cmp::Ordering; use std::fmt; +use std::hash::Hash; use std::sync::Arc; use common_exception::ErrorCode; @@ -367,6 +368,24 @@ impl Ord for DataValue { } } +#[allow(clippy::derive_hash_xor_eq)] +impl Hash for DataValue { + fn hash(&self, state: &mut H) { + std::mem::discriminant(self).hash(state); + match self { + DataValue::Null => {} + DataValue::Boolean(v) => v.hash(state), + DataValue::UInt64(v) => v.hash(state), + DataValue::Int64(v) => v.hash(state), + DataValue::Float64(v) => v.to_bits().hash(state), + DataValue::String(v) => v.hash(state), + DataValue::Array(v) => v.hash(state), + DataValue::Struct(v) => v.hash(state), + DataValue::Variant(v) => v.hash(state), + } + } +} + // Did not use std::convert:TryFrom // Because we do not need custom type error. pub trait DFTryFrom: Sized { diff --git a/src/query/datavalues/src/types/data_type.rs b/src/query/datavalues/src/types/data_type.rs index 5e76dfb4b3bbd..629f5484db561 100644 --- a/src/query/datavalues/src/types/data_type.rs +++ b/src/query/datavalues/src/types/data_type.rs @@ -48,7 +48,8 @@ use crate::serializations::ConstSerializer; pub const ARROW_EXTENSION_NAME: &str = "ARROW:extension:databend_name"; pub const ARROW_EXTENSION_META: &str = "ARROW:extension:databend_metadata"; -#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +#[derive(Clone, Debug, Hash, serde::Deserialize, serde::Serialize)] +#[allow(clippy::derive_hash_xor_eq)] #[serde(tag = "type")] #[enum_dispatch(DataType)] pub enum DataTypeImpl { diff --git a/src/query/datavalues/src/types/type_array.rs b/src/query/datavalues/src/types/type_array.rs index 3f4d1ba31aded..4ac32f793e7df 100644 --- a/src/query/datavalues/src/types/type_array.rs +++ b/src/query/datavalues/src/types/type_array.rs @@ -26,7 +26,7 @@ use crate::prelude::*; use crate::serializations::ArraySerializer; use crate::serializations::TypeSerializerImpl; -#[derive(Clone, serde::Deserialize, serde::Serialize)] +#[derive(Clone, Hash, serde::Deserialize, serde::Serialize)] pub struct ArrayType { inner: Box, } diff --git a/src/query/datavalues/src/types/type_boolean.rs b/src/query/datavalues/src/types/type_boolean.rs index b7e4ba70d2dfa..c7d1a7f8303bf 100644 --- a/src/query/datavalues/src/types/type_boolean.rs +++ b/src/query/datavalues/src/types/type_boolean.rs @@ -24,7 +24,7 @@ pub use crate::prelude::*; use crate::serializations::BooleanSerializer; use crate::serializations::TypeSerializerImpl; -#[derive(Default, Clone, serde::Deserialize, serde::Serialize)] +#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)] pub struct BooleanType {} impl BooleanType { diff --git a/src/query/datavalues/src/types/type_date.rs b/src/query/datavalues/src/types/type_date.rs index 04c8a12197f6d..37e541fce8bce 100644 --- a/src/query/datavalues/src/types/type_date.rs +++ b/src/query/datavalues/src/types/type_date.rs @@ -41,7 +41,7 @@ pub fn check_date(days: i32) -> Result<()> { )) } -#[derive(Default, Clone, serde::Deserialize, serde::Serialize)] +#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)] pub struct DateType {} impl DateType { diff --git a/src/query/datavalues/src/types/type_interval.rs b/src/query/datavalues/src/types/type_interval.rs index bd7c790f50720..93453f0379cac 100644 --- a/src/query/datavalues/src/types/type_interval.rs +++ b/src/query/datavalues/src/types/type_interval.rs @@ -25,7 +25,7 @@ use crate::prelude::*; use crate::serializations::DateSerializer; use crate::serializations::TypeSerializerImpl; -#[derive(Clone, serde::Deserialize, serde::Serialize)] +#[derive(Clone, Hash, serde::Deserialize, serde::Serialize)] pub struct IntervalType { kind: IntervalKind, } diff --git a/src/query/datavalues/src/types/type_null.rs b/src/query/datavalues/src/types/type_null.rs index 2834179aebe87..bc631d6ef84cd 100644 --- a/src/query/datavalues/src/types/type_null.rs +++ b/src/query/datavalues/src/types/type_null.rs @@ -22,7 +22,7 @@ use crate::prelude::*; use crate::serializations::NullSerializer; use crate::serializations::TypeSerializerImpl; -#[derive(Default, Clone, serde::Deserialize, serde::Serialize)] +#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)] pub struct NullType {} impl NullType { diff --git a/src/query/datavalues/src/types/type_nullable.rs b/src/query/datavalues/src/types/type_nullable.rs index fdd686170eb8e..5ef2e9e020b37 100644 --- a/src/query/datavalues/src/types/type_nullable.rs +++ b/src/query/datavalues/src/types/type_nullable.rs @@ -28,7 +28,7 @@ use crate::prelude::*; use crate::serializations::NullableSerializer; use crate::serializations::TypeSerializerImpl; -#[derive(Clone, serde::Deserialize, serde::Serialize)] +#[derive(Clone, Hash, serde::Deserialize, serde::Serialize)] pub struct NullableType { inner: Box, } diff --git a/src/query/datavalues/src/types/type_primitive.rs b/src/query/datavalues/src/types/type_primitive.rs index d180dfefe36e8..fed6d4c115922 100644 --- a/src/query/datavalues/src/types/type_primitive.rs +++ b/src/query/datavalues/src/types/type_primitive.rs @@ -144,6 +144,12 @@ macro_rules! impl_numeric { write!(f, "{}", self.name()) } } + + impl std::hash::Hash for PrimitiveDataType<$ty> { + fn hash(&self, state: &mut H) { + self.data_type_id().hash(state); + } + } }; } // diff --git a/src/query/datavalues/src/types/type_string.rs b/src/query/datavalues/src/types/type_string.rs index b252ba2dfaa06..76104e7019f19 100644 --- a/src/query/datavalues/src/types/type_string.rs +++ b/src/query/datavalues/src/types/type_string.rs @@ -24,7 +24,7 @@ use crate::prelude::*; use crate::serializations::StringSerializer; use crate::serializations::TypeSerializerImpl; -#[derive(Default, Clone, serde::Deserialize, serde::Serialize)] +#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)] pub struct StringType {} impl StringType { diff --git a/src/query/datavalues/src/types/type_struct.rs b/src/query/datavalues/src/types/type_struct.rs index 8b7c1eb5db942..b075b9b306b24 100644 --- a/src/query/datavalues/src/types/type_struct.rs +++ b/src/query/datavalues/src/types/type_struct.rs @@ -27,7 +27,7 @@ use crate::prelude::*; use crate::serializations::StructSerializer; use crate::serializations::TypeSerializerImpl; -#[derive(Default, Clone, serde::Deserialize, serde::Serialize)] +#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)] pub struct StructType { names: Option>, types: Vec, diff --git a/src/query/datavalues/src/types/type_timestamp.rs b/src/query/datavalues/src/types/type_timestamp.rs index 043d895e4a360..b33ff87645d5c 100644 --- a/src/query/datavalues/src/types/type_timestamp.rs +++ b/src/query/datavalues/src/types/type_timestamp.rs @@ -47,7 +47,7 @@ pub fn check_timestamp(micros: i64) -> Result<()> { } /// Timestamp type only stores UTC time in microseconds -#[derive(Default, Clone, serde::Deserialize, serde::Serialize)] +#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)] pub struct TimestampType { /// Typically are used - 0 (seconds) 3 (milliseconds), 6 (microseconds) precision: usize, diff --git a/src/query/datavalues/src/types/type_variant.rs b/src/query/datavalues/src/types/type_variant.rs index 3161740826e39..7f7f1df6ec4f8 100644 --- a/src/query/datavalues/src/types/type_variant.rs +++ b/src/query/datavalues/src/types/type_variant.rs @@ -25,7 +25,7 @@ use crate::prelude::*; use crate::serializations::TypeSerializerImpl; use crate::serializations::VariantSerializer; -#[derive(Default, Clone, serde::Deserialize, serde::Serialize)] +#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)] pub struct VariantType {} impl VariantType { diff --git a/src/query/datavalues/src/types/type_variant_array.rs b/src/query/datavalues/src/types/type_variant_array.rs index 004e76fefa20d..7c54f3fb92599 100644 --- a/src/query/datavalues/src/types/type_variant_array.rs +++ b/src/query/datavalues/src/types/type_variant_array.rs @@ -25,7 +25,7 @@ use crate::prelude::*; use crate::serializations::TypeSerializerImpl; use crate::serializations::VariantSerializer; -#[derive(Default, Clone, serde::Deserialize, serde::Serialize)] +#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)] pub struct VariantArrayType {} impl VariantArrayType { diff --git a/src/query/datavalues/src/types/type_variant_object.rs b/src/query/datavalues/src/types/type_variant_object.rs index 09d0c94886615..6682f4515566a 100644 --- a/src/query/datavalues/src/types/type_variant_object.rs +++ b/src/query/datavalues/src/types/type_variant_object.rs @@ -25,7 +25,7 @@ use crate::prelude::*; use crate::serializations::TypeSerializerImpl; use crate::serializations::VariantSerializer; -#[derive(Default, Clone, serde::Deserialize, serde::Serialize)] +#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)] pub struct VariantObjectType {} impl VariantObjectType { diff --git a/src/query/datavalues/src/variant_value.rs b/src/query/datavalues/src/variant_value.rs index ceddb75464911..ece7c712d539f 100644 --- a/src/query/datavalues/src/variant_value.rs +++ b/src/query/datavalues/src/variant_value.rs @@ -16,6 +16,7 @@ use core::str::FromStr; use std::cmp::Ordering; use std::fmt::Display; use std::fmt::Formatter; +use std::hash::Hash; use std::ops::Deref; use common_exception::ErrorCode; @@ -219,6 +220,15 @@ impl PartialOrd for VariantValue { } } +#[allow(clippy::derive_hash_xor_eq)] +impl Hash for VariantValue { + fn hash(&self, state: &mut H) { + let v = self.as_ref().to_string(); + let u = v.as_bytes(); + Hash::hash(&u, state); + } +} + impl Display for VariantValue { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.as_ref()) diff --git a/src/query/functions/src/scalars/hashes/hash_base.rs b/src/query/functions/src/scalars/hashes/hash_base.rs index 9df9ae5a3def4..cc8633d9c13f7 100644 --- a/src/query/functions/src/scalars/hashes/hash_base.rs +++ b/src/query/functions/src/scalars/hashes/hash_base.rs @@ -227,13 +227,13 @@ impl DFHash for DataValue { } DataValue::Array(vals) => { for v in vals { - v.hash(state); + DFHash::hash(v, state); Hash::hash(&',', state); } } DataValue::Struct(vals) => { for v in vals { - v.hash(state); + DFHash::hash(v, state); Hash::hash(&',', state); } } diff --git a/src/query/service/src/interpreters/interpreter_copy_v2.rs b/src/query/service/src/interpreters/interpreter_copy_v2.rs index 0648c5e76267d..1d525c18fd818 100644 --- a/src/query/service/src/interpreters/interpreter_copy_v2.rs +++ b/src/query/service/src/interpreters/interpreter_copy_v2.rs @@ -186,7 +186,7 @@ impl CopyInterpreterV2 { let select_interpreter = SelectInterpreterV2::try_create( self.ctx.clone(), *(bind_context.clone()), - s_expr.clone(), + *s_expr.clone(), metadata.clone(), )?; diff --git a/src/query/service/src/interpreters/interpreter_explain_v2.rs b/src/query/service/src/interpreters/interpreter_explain_v2.rs index a2cfd2ac2dce5..09d6663556512 100644 --- a/src/query/service/src/interpreters/interpreter_explain_v2.rs +++ b/src/query/service/src/interpreters/interpreter_explain_v2.rs @@ -56,7 +56,7 @@ impl Interpreter for ExplainInterpreterV2 { Plan::Query { s_expr, metadata, .. } => { - self.explain_pipeline(s_expr.clone(), metadata.clone()) + self.explain_pipeline(*s_expr.clone(), metadata.clone()) .await? } _ => { @@ -67,7 +67,7 @@ impl Interpreter for ExplainInterpreterV2 { Plan::Query { s_expr, metadata, .. } => { - self.explain_fragments(s_expr.clone(), metadata.clone()) + self.explain_fragments(*s_expr.clone(), metadata.clone()) .await? } _ => { diff --git a/src/query/service/src/interpreters/interpreter_factory_v2.rs b/src/query/service/src/interpreters/interpreter_factory_v2.rs index ef39c12961ec0..db2e53fae2ee5 100644 --- a/src/query/service/src/interpreters/interpreter_factory_v2.rs +++ b/src/query/service/src/interpreters/interpreter_factory_v2.rs @@ -65,7 +65,7 @@ impl InterpreterFactoryV2 { } => Ok(Arc::new(SelectInterpreterV2::try_create( ctx, *bind_context.clone(), - s_expr.clone(), + *s_expr.clone(), metadata.clone(), )?)), Plan::Explain { kind, plan } => Ok(Arc::new(ExplainInterpreterV2::try_create( diff --git a/src/query/service/src/interpreters/interpreter_insert_v2.rs b/src/query/service/src/interpreters/interpreter_insert_v2.rs index 5a28d044f2447..83df1a95e2f2a 100644 --- a/src/query/service/src/interpreters/interpreter_insert_v2.rs +++ b/src/query/service/src/interpreters/interpreter_insert_v2.rs @@ -109,7 +109,7 @@ impl InsertInterpreterV2 { } => SelectInterpreterV2::try_create( self.ctx.clone(), *bind_context.clone(), - s_expr.clone(), + *s_expr.clone(), metadata.clone(), ), _ => unreachable!(), diff --git a/src/query/service/src/sql/optimizer/cascades/explore_rules.rs b/src/query/service/src/sql/optimizer/cascades/explore_rules.rs index 407b2c30ef61b..f68dcdac9a8be 100644 --- a/src/query/service/src/sql/optimizer/cascades/explore_rules.rs +++ b/src/query/service/src/sql/optimizer/cascades/explore_rules.rs @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::sql::optimizer::RuleID; use crate::sql::optimizer::RuleSet; pub fn get_explore_rule_set() -> RuleSet { - RuleSet::create_with_ids(vec![]).unwrap() + RuleSet::create_with_ids(vec![RuleID::CommuteJoin]).unwrap() } #[cfg(test)] diff --git a/src/query/service/src/sql/optimizer/cascades/implement_rules.rs b/src/query/service/src/sql/optimizer/cascades/implement_rules.rs index 62354fbfa8ec7..170e568e0091a 100644 --- a/src/query/service/src/sql/optimizer/cascades/implement_rules.rs +++ b/src/query/service/src/sql/optimizer/cascades/implement_rules.rs @@ -16,7 +16,7 @@ use crate::sql::optimizer::RuleID; use crate::sql::optimizer::RuleSet; pub fn get_implement_rule_set() -> RuleSet { - RuleSet::create_with_ids(vec![RuleID::ImplementGet]).unwrap() + RuleSet::create_with_ids(vec![RuleID::ImplementGet, RuleID::ImplementHashJoin]).unwrap() } #[cfg(test)] diff --git a/src/query/service/src/sql/optimizer/cascades/mod.rs b/src/query/service/src/sql/optimizer/cascades/mod.rs index a9d47539c387c..be1bba4adabd8 100644 --- a/src/query/service/src/sql/optimizer/cascades/mod.rs +++ b/src/query/service/src/sql/optimizer/cascades/mod.rs @@ -15,76 +15,90 @@ mod explore_rules; mod implement_rules; +use std::collections::hash_map::Entry; +use std::collections::HashMap; + use common_exception::ErrorCode; use common_exception::Result; +use super::cost::Cost; +use super::cost::CostContext; +use super::cost::CostModel; +use super::cost::DefaultCostModel; use crate::sql::optimizer::cascades::explore_rules::get_explore_rule_set; use crate::sql::optimizer::cascades::implement_rules::get_implement_rule_set; -use crate::sql::optimizer::group::Group; +use crate::sql::optimizer::format::display_memo; use crate::sql::optimizer::m_expr::MExpr; use crate::sql::optimizer::memo::Memo; -use crate::sql::optimizer::rule::RulePtr; use crate::sql::optimizer::rule::RuleSet; use crate::sql::optimizer::rule::TransformState; -use crate::sql::optimizer::RequiredProperty; use crate::sql::optimizer::SExpr; use crate::sql::plans::Operator; use crate::sql::IndexType; /// A cascades-style search engine to enumerate possible alternations of a relational expression and /// find the optimal one. -/// -/// NOTICE: we don't support cost-based optimization and lower bound searching for now. -#[allow(dead_code)] pub struct CascadesOptimizer { - // optimize_context: OptimizeContext, memo: Memo, explore_rules: RuleSet, implement_rules: RuleSet, + + cost_model: Box, + + /// group index -> best cost context + best_cost_map: HashMap, } impl CascadesOptimizer { - #[allow(dead_code)] pub fn create() -> Self { CascadesOptimizer { memo: Memo::create(), explore_rules: get_explore_rule_set(), implement_rules: get_implement_rule_set(), + cost_model: Box::new(DefaultCostModel), + best_cost_map: HashMap::new(), } } - #[allow(dead_code)] fn init(&mut self, expression: SExpr) -> Result<()> { self.memo.init(expression)?; Ok(()) } - #[allow(dead_code)] - pub fn optimize(&mut self, expression: SExpr) -> Result { - self.init(expression)?; + pub fn optimize(mut self, s_expr: SExpr) -> Result { + self.init(s_expr)?; + + let root_index = self + .memo + .root() + .ok_or_else(|| { + ErrorCode::LogicalError("Root group cannot be None after initialization") + })? + .group_index; + + self.explore_group(root_index)?; - self.explore_group(self.memo.root().unwrap().group_index())?; + self.implement_group(root_index)?; - self.implement_group(self.memo.root().unwrap().group_index())?; + self.optimize_group(root_index)?; - self.find_optimal_plan() + tracing::debug!("Memo: \n{}", display_memo(&self.memo)); + + self.find_optimal_plan(root_index) } - #[allow(dead_code)] fn explore_group(&mut self, group_index: IndexType) -> Result<()> { - let group = self.memo.group(group_index); - let expressions: Vec = group.iter().cloned().collect(); - for m_expr in expressions { - self.explore_expr(m_expr)?; + let group = self.memo.group(group_index)?; + for m_expr in group.m_exprs.clone() { + self.explore_expr(&m_expr)?; } Ok(()) } - #[allow(dead_code)] - fn explore_expr(&mut self, m_expr: MExpr) -> Result<()> { - for child in m_expr.children() { + fn explore_expr(&mut self, m_expr: &MExpr) -> Result<()> { + for child in m_expr.children.iter() { self.explore_group(*child)?; } @@ -92,25 +106,22 @@ impl CascadesOptimizer { for rule in self.explore_rules.iter() { m_expr.apply_rule(&self.memo, rule, &mut state)?; } - self.insert_from_transform_state(m_expr.group_index(), state)?; + self.insert_from_transform_state(m_expr.group_index, state)?; Ok(()) } - #[allow(dead_code)] fn implement_group(&mut self, group_index: IndexType) -> Result<()> { - let group = self.memo.group(group_index); - let expressions: Vec = group.iter().cloned().collect(); - for m_expr in expressions { - self.implement_expr(m_expr)?; + let group = self.memo.group(group_index)?; + for m_expr in group.m_exprs.clone() { + self.implement_expr(&m_expr)?; } Ok(()) } - #[allow(dead_code)] - fn implement_expr(&mut self, m_expr: MExpr) -> Result<()> { - for child in m_expr.children() { + fn implement_expr(&mut self, m_expr: &MExpr) -> Result<()> { + for child in m_expr.children.iter() { self.implement_group(*child)?; } @@ -118,12 +129,11 @@ impl CascadesOptimizer { for rule in self.implement_rules.iter() { m_expr.apply_rule(&self.memo, rule, &mut state)?; } - self.insert_from_transform_state(m_expr.group_index(), state)?; + self.insert_from_transform_state(m_expr.group_index, state)?; Ok(()) } - #[allow(dead_code)] fn insert_from_transform_state( &mut self, group_index: IndexType, @@ -136,68 +146,78 @@ impl CascadesOptimizer { Ok(()) } - #[allow(dead_code)] fn insert_expression(&mut self, group_index: IndexType, expression: &SExpr) -> Result<()> { self.memo.insert(Some(group_index), expression.clone())?; Ok(()) } - #[allow(dead_code)] - fn apply_rule(&self, m_expr: &MExpr, rule: &RulePtr, state: &mut TransformState) -> Result<()> { - m_expr.apply_rule(&self.memo, rule, state)?; + fn find_optimal_plan(&self, group_index: IndexType) -> Result { + let group = self.memo.group(group_index)?; + let cost_context = self.best_cost_map.get(&group_index).ok_or_else(|| { + ErrorCode::LogicalError(format!("Cannot find CostContext of group: {group_index}")) + })?; - Ok(()) - } + let m_expr = group.m_exprs.get(cost_context.expr_index).ok_or_else(|| { + ErrorCode::LogicalError(format!( + "Cannot find best expression of group: {group_index}" + )) + })?; - #[allow(dead_code)] - fn find_optimal_plan(&self) -> Result { - let root_group = self.memo.root().unwrap(); + let children = m_expr + .children + .iter() + .map(|index| self.find_optimal_plan(*index)) + .collect::>>()?; - let required_prop = RequiredProperty::default(); + let result = SExpr::create(m_expr.plan.clone(), children, None); - self.optimize_group(root_group, &required_prop) + Ok(result) } - /// We don't have cost mechanism for evaluate cost of plans, so we just extract - /// first physical plan in a group that satisfies given RequiredProperty. - #[allow(dead_code)] - fn optimize_group(&self, group: &Group, required_prop: &RequiredProperty) -> Result { - for m_expr in group.iter() { - if m_expr.plan().is_physical() { - let plan = m_expr.plan(); - - // TODO: Check properties - // let physical = plan.as_physical_plan().unwrap(); - // let relational_prop = group.relational_prop().unwrap(); - // let dummy_physical_prop = PhysicalProperty::default(); - // let required_prop = physical.compute_required_prop(required_prop); - // - // if !required_prop.provided_by(relational_prop, &dummy_physical_prop) { - // continue; - // } - - let children = self.optimize_m_expr(m_expr, required_prop)?; - let result = SExpr::create(plan.clone(), children, None); - return Ok(result); + fn optimize_group(&mut self, group_index: IndexType) -> Result<()> { + let group = self.memo.group(group_index)?.clone(); + for m_expr in group.m_exprs.iter() { + if m_expr.plan.is_physical() { + self.optimize_m_expr(m_expr)?; } } - Err(ErrorCode::LogicalError("Cannot find an appropriate plan")) + Ok(()) } - #[allow(dead_code)] - fn optimize_m_expr( - &self, - m_expr: &MExpr, - required_prop: &RequiredProperty, - ) -> Result> { - let mut children = vec![]; - for child in m_expr.children() { - let group = self.memo.group(*child); - children.push(self.optimize_group(group, required_prop)?); + fn optimize_m_expr(&mut self, m_expr: &MExpr) -> Result<()> { + let mut cost = Cost::from(0); + for child in m_expr.children.iter() { + self.optimize_group(*child)?; + let cost_context = self.best_cost_map.get(child).ok_or_else(|| { + ErrorCode::LogicalError(format!("Cannot find CostContext of group: {child}")) + })?; + + cost = cost + cost_context.cost; + } + + let op_cost = self.cost_model.compute_cost(&self.memo, m_expr)?; + cost = cost + op_cost; + + let cost_context = CostContext { + cost, + group_index: m_expr.group_index, + expr_index: m_expr.index, + }; + + match self.best_cost_map.entry(m_expr.group_index) { + Entry::Vacant(entry) => { + entry.insert(cost_context); + } + Entry::Occupied(mut entry) => { + // Replace the cost context of the group if current context is lower + if cost < entry.get().cost { + entry.insert(cost_context); + } + } } - Ok(children) + Ok(()) } } diff --git a/src/query/service/src/sql/optimizer/cost/cost_model.rs b/src/query/service/src/sql/optimizer/cost/cost_model.rs new file mode 100644 index 0000000000000..f01e909f7c069 --- /dev/null +++ b/src/query/service/src/sql/optimizer/cost/cost_model.rs @@ -0,0 +1,94 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_exception::ErrorCode; +use common_exception::Result; + +use super::Cost; +use super::CostModel; +use crate::sql::optimizer::MExpr; +use crate::sql::optimizer::Memo; +use crate::sql::plans::PhysicalHashJoin; +use crate::sql::plans::PhysicalScan; +use crate::sql::plans::RelOperator; + +static COST_FACTOR_COMPUTE_PER_ROW: f64 = 1.0; +static COST_FACTOR_HASH_TABLE_PER_ROW: f64 = 10.0; + +#[derive(Default)] +pub struct DefaultCostModel; + +impl CostModel for DefaultCostModel { + fn compute_cost(&self, memo: &Memo, m_expr: &MExpr) -> Result { + compute_cost_impl(memo, m_expr) + } +} + +fn compute_cost_impl(memo: &Memo, m_expr: &MExpr) -> Result { + match &m_expr.plan { + RelOperator::PhysicalScan(plan) => compute_cost_physical_scan(memo, m_expr, plan), + RelOperator::PhysicalHashJoin(plan) => compute_cost_hash_join(memo, m_expr, plan), + RelOperator::UnionAll(_) => compute_cost_union_all(memo, m_expr), + + RelOperator::Project(_) + | RelOperator::EvalScalar(_) + | RelOperator::Filter(_) + | RelOperator::Aggregate(_) + | RelOperator::Sort(_) + | RelOperator::Limit(_) => compute_cost_unary_common_operator(memo, m_expr), + + _ => Err(ErrorCode::LogicalError( + "Cannot compute cost from logical plan", + )), + } +} + +fn compute_cost_physical_scan(memo: &Memo, m_expr: &MExpr, _plan: &PhysicalScan) -> Result { + // Since we don't have alternations(e.g. index scan) for table scan for now, we just ignore + // the I/O cost and treat `PhysicalScan` as normal computation. + let group = memo.group(m_expr.group_index)?; + let prop = &group.relational_prop; + let cost = prop.cardinality * COST_FACTOR_COMPUTE_PER_ROW; + Ok(Cost(cost)) +} + +fn compute_cost_hash_join(memo: &Memo, m_expr: &MExpr, _plan: &PhysicalHashJoin) -> Result { + let build_group = m_expr.child_group(memo, 1)?; + let probe_group = m_expr.child_group(memo, 0)?; + let build_card = build_group.relational_prop.cardinality; + let probe_card = probe_group.relational_prop.cardinality; + + let cost = + build_card * COST_FACTOR_HASH_TABLE_PER_ROW + probe_card * COST_FACTOR_COMPUTE_PER_ROW; + Ok(Cost(cost)) +} + +/// Compute cost for the unary operators that perform simple computation(e.g. `Project`, `Filter`, `EvalScalar`). +/// +/// TODO(leiysky): Since we don't have alternation for `Aggregate` for now, we just +/// treat `Aggregate` as normal computation. +fn compute_cost_unary_common_operator(memo: &Memo, m_expr: &MExpr) -> Result { + let group = m_expr.child_group(memo, 0)?; + let card = group.relational_prop.cardinality; + let cost = card * COST_FACTOR_COMPUTE_PER_ROW; + Ok(Cost(cost)) +} + +fn compute_cost_union_all(memo: &Memo, m_expr: &MExpr) -> Result { + let left_group = m_expr.child_group(memo, 0)?; + let right_group = m_expr.child_group(memo, 0)?; + let card = left_group.relational_prop.cardinality + right_group.relational_prop.cardinality; + let cost = card * COST_FACTOR_COMPUTE_PER_ROW; + Ok(Cost(cost)) +} diff --git a/src/query/service/src/sql/optimizer/cost/mod.rs b/src/query/service/src/sql/optimizer/cost/mod.rs new file mode 100644 index 0000000000000..5c1b65890a5e9 --- /dev/null +++ b/src/query/service/src/sql/optimizer/cost/mod.rs @@ -0,0 +1,56 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod cost_model; + +use std::ops::Add; + +use common_exception::Result; +pub use cost_model::DefaultCostModel; + +use super::MExpr; +use super::Memo; +use crate::sql::IndexType; + +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] +pub struct Cost(pub f64); + +impl From for Cost +where T: Into +{ + fn from(t: T) -> Self { + Cost(t.into()) + } +} + +impl Add for Cost { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Cost(self.0 + rhs.0) + } +} + +pub trait CostModel { + /// Compute cost of given `MExpr`(children are not encapsulated). + fn compute_cost(&self, memo: &Memo, m_expr: &MExpr) -> Result; +} + +/// Context of best cost within a group. +#[derive(Debug, Clone)] +pub struct CostContext { + pub group_index: IndexType, + pub expr_index: IndexType, + pub cost: Cost, +} diff --git a/src/query/service/src/sql/optimizer/distributed/mod.rs b/src/query/service/src/sql/optimizer/distributed/mod.rs new file mode 100644 index 0000000000000..b9f0dcaa91a9f --- /dev/null +++ b/src/query/service/src/sql/optimizer/distributed/mod.rs @@ -0,0 +1,40 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_exception::Result; + +use super::property::require_property; +use super::Distribution; +use super::RelExpr; +use super::RequiredProperty; +use super::SExpr; +use crate::sql::plans::Exchange; + +pub fn optimize_distributed_query(s_expr: &SExpr) -> Result { + let required = RequiredProperty { + distribution: Distribution::Any, + }; + let mut result = require_property(&required, s_expr)?; + let rel_expr = RelExpr::with_s_expr(&result); + let physical_prop = rel_expr.derive_physical_prop()?; + let root_required = RequiredProperty { + distribution: Distribution::Serial, + }; + if !root_required.satisfied_by(&physical_prop) { + // Manually enforce serial distribution. + result = SExpr::create_unary(Exchange::Merge.into(), result); + } + + Ok(result) +} diff --git a/src/query/service/src/sql/optimizer/format/mod.rs b/src/query/service/src/sql/optimizer/format/mod.rs new file mode 100644 index 0000000000000..cad7253554d66 --- /dev/null +++ b/src/query/service/src/sql/optimizer/format/mod.rs @@ -0,0 +1,70 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::group::Group; +use super::MExpr; +use super::Memo; +use crate::sql::plans::RelOperator; + +pub fn display_memo(memo: &Memo) -> String { + memo.groups + .iter() + .map(display_group) + .collect::>() + .join("\n") +} + +pub fn display_group(group: &Group) -> String { + format!( + "Group #{}: [{}]", + group.group_index, + group + .m_exprs + .iter() + .map(display_m_expr) + .collect::>() + .join(",\n") + ) +} + +pub fn display_m_expr(m_expr: &MExpr) -> String { + format!( + "{} [{}]", + display_rel_op(&m_expr.plan), + m_expr + .children + .iter() + .map(|child| format!("#{child}")) + .collect::>() + .join(", ") + ) +} + +pub fn display_rel_op(rel_op: &RelOperator) -> String { + match rel_op { + RelOperator::LogicalGet(_) => "LogicalGet".to_string(), + RelOperator::LogicalInnerJoin(_) => "LogicalInnerJoin".to_string(), + RelOperator::PhysicalScan(_) => "PhysicalScan".to_string(), + RelOperator::PhysicalHashJoin(_) => "PhysicalHashJoin".to_string(), + RelOperator::Project(_) => "Project".to_string(), + RelOperator::EvalScalar(_) => "EvalScalar".to_string(), + RelOperator::Filter(_) => "Filter".to_string(), + RelOperator::Aggregate(_) => "Aggregate".to_string(), + RelOperator::Sort(_) => "Sort".to_string(), + RelOperator::Limit(_) => "Limit".to_string(), + RelOperator::UnionAll(_) => "UnionAll".to_string(), + RelOperator::Exchange(_) => "Exchange".to_string(), + RelOperator::Pattern(_) => "Pattern".to_string(), + } +} diff --git a/src/query/service/src/sql/optimizer/group.rs b/src/query/service/src/sql/optimizer/group.rs index 99aab2d8fe01a..f2d823d2329d7 100644 --- a/src/query/service/src/sql/optimizer/group.rs +++ b/src/query/service/src/sql/optimizer/group.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::iter::Iterator; - use common_exception::Result; use crate::sql::optimizer::m_expr::MExpr; @@ -23,19 +21,19 @@ use crate::sql::IndexType; /// `Group` is a set of logically equivalent relational expressions represented with `MExpr`. #[derive(Clone)] pub struct Group { - group_index: IndexType, - expressions: Vec, + pub group_index: IndexType, + pub m_exprs: Vec, /// Relational property shared by expressions in a same `Group` - relational_prop: Option, + pub relational_prop: RelationalProperty, } impl Group { - pub fn create(index: IndexType) -> Self { + pub fn create(index: IndexType, relational_prop: RelationalProperty) -> Self { Group { group_index: index, - expressions: vec![], - relational_prop: None, + m_exprs: vec![], + relational_prop, } } @@ -43,20 +41,12 @@ impl Group { self.group_index } - pub fn iter(&self) -> impl Iterator { - self.expressions.iter() + pub fn num_exprs(&self) -> usize { + self.m_exprs.len() } - pub fn insert(&mut self, group_expression: MExpr) -> Result<()> { - self.expressions.push(group_expression); + pub fn insert(&mut self, m_expr: MExpr) -> Result<()> { + self.m_exprs.push(m_expr); Ok(()) } - - pub fn set_relational_prop(&mut self, relational_prop: RelationalProperty) { - self.relational_prop = Some(relational_prop); - } - - pub fn relational_prop(&self) -> Option<&RelationalProperty> { - self.relational_prop.as_ref() - } } diff --git a/src/query/service/src/sql/optimizer/heuristic/decorrelate.rs b/src/query/service/src/sql/optimizer/heuristic/decorrelate.rs index 7ffb04f0fcb6b..4bf76dd945036 100644 --- a/src/query/service/src/sql/optimizer/heuristic/decorrelate.rs +++ b/src/query/service/src/sql/optimizer/heuristic/decorrelate.rs @@ -457,6 +457,7 @@ impl SubqueryRewriter { push_down_predicates: None, limit: None, order_by: None, + statistics: None, } .into(), ); diff --git a/src/query/service/src/sql/optimizer/heuristic/mod.rs b/src/query/service/src/sql/optimizer/heuristic/mod.rs index 7ae5b038b6f63..87d2bb6f57087 100644 --- a/src/query/service/src/sql/optimizer/heuristic/mod.rs +++ b/src/query/service/src/sql/optimizer/heuristic/mod.rs @@ -24,19 +24,13 @@ use common_exception::Result; use once_cell::sync::Lazy; use super::rule::RuleID; -use super::util::validate_distributed_query; use super::ColumnSet; use crate::sessions::QueryContext; use crate::sql::optimizer::heuristic::decorrelate::decorrelate_subquery; use crate::sql::optimizer::heuristic::implement::HeuristicImplementor; pub use crate::sql::optimizer::heuristic::rule_list::RuleList; -use crate::sql::optimizer::property::require_property; use crate::sql::optimizer::rule::TransformState; -use crate::sql::optimizer::Distribution; -use crate::sql::optimizer::RelExpr; -use crate::sql::optimizer::RequiredProperty; use crate::sql::optimizer::SExpr; -use crate::sql::plans::Exchange; use crate::sql::BindContext; use crate::sql::MetadataRef; @@ -72,8 +66,6 @@ pub struct HeuristicOptimizer { _ctx: Arc, bind_context: Box, metadata: MetadataRef, - - enable_distributed_optimization: bool, } impl HeuristicOptimizer { @@ -82,7 +74,6 @@ impl HeuristicOptimizer { bind_context: Box, metadata: MetadataRef, rules: RuleList, - enable_distributed_optimization: bool, ) -> Self { HeuristicOptimizer { rules, @@ -91,7 +82,6 @@ impl HeuristicOptimizer { _ctx: ctx, bind_context, metadata, - enable_distributed_optimization, } } @@ -111,25 +101,9 @@ impl HeuristicOptimizer { let pre_optimized = self.pre_optimize(s_expr)?; let optimized = self.optimize_expression(&pre_optimized)?; let post_optimized = self.post_optimize(optimized)?; - let mut result = self.implement_expression(&post_optimized)?; - - if self.enable_distributed_optimization && validate_distributed_query(&result) { - let required = RequiredProperty { - distribution: Distribution::Any, - }; - result = require_property(&required, &result)?; - let rel_expr = RelExpr::with_s_expr(&result); - let physical_prop = rel_expr.derive_physical_prop()?; - let root_required = RequiredProperty { - distribution: Distribution::Serial, - }; - if !root_required.satisfied_by(&physical_prop) { - // Manually enforce serial distribution. - result = SExpr::create_unary(Exchange::Merge.into(), result); - } - } + // let mut result = self.implement_expression(&post_optimized)?; - Ok(result) + Ok(post_optimized) } fn optimize_expression(&self, s_expr: &SExpr) -> Result { @@ -143,6 +117,7 @@ impl HeuristicOptimizer { Ok(result) } + #[allow(dead_code)] fn implement_expression(&self, s_expr: &SExpr) -> Result { let mut implemented_children = Vec::with_capacity(s_expr.arity()); for expr in s_expr.children() { diff --git a/src/query/service/src/sql/optimizer/heuristic/prune_columns.rs b/src/query/service/src/sql/optimizer/heuristic/prune_columns.rs index ad89863fd9252..20415c37d9849 100644 --- a/src/query/service/src/sql/optimizer/heuristic/prune_columns.rs +++ b/src/query/service/src/sql/optimizer/heuristic/prune_columns.rs @@ -89,6 +89,7 @@ impl ColumnPruner { push_down_predicates: p.push_down_predicates.clone(), limit: p.limit, order_by: p.order_by.clone(), + statistics: p.statistics, }))) } RelOperator::LogicalInnerJoin(p) => { diff --git a/src/query/service/src/sql/optimizer/m_expr.rs b/src/query/service/src/sql/optimizer/m_expr.rs index 7b4486374c157..20606254ecf00 100644 --- a/src/query/service/src/sql/optimizer/m_expr.rs +++ b/src/query/service/src/sql/optimizer/m_expr.rs @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_exception::ErrorCode; use common_exception::Result; +use super::group::Group; use crate::sql::optimizer::memo::Memo; use crate::sql::optimizer::pattern_extractor::PatternExtractor; use crate::sql::optimizer::rule::RulePtr; @@ -27,17 +29,27 @@ use crate::sql::IndexType; /// expressions inside `Memo`. #[derive(Clone)] pub struct MExpr { - group_index: IndexType, - plan: RelOperator, - children: Vec, + // index of current `Group` + pub group_index: IndexType, + // index of current `MExpr` within a `Group` + pub index: IndexType, + + pub plan: RelOperator, + pub children: Vec, } impl MExpr { - pub fn create(group_index: IndexType, plan: RelOperator, children: Vec) -> Self { + pub fn create( + group_index: IndexType, + index: IndexType, + plan: RelOperator, + children: Vec, + ) -> Self { MExpr { group_index, plan, children, + index, } } @@ -45,16 +57,15 @@ impl MExpr { self.children.len() } - pub fn group_index(&self) -> IndexType { - self.group_index - } - - pub fn plan(&self) -> &RelOperator { - &self.plan - } - - pub fn children(&self) -> &Vec { - &self.children + pub fn child_group<'a>(&'a self, memo: &'a Memo, child_index: usize) -> Result<&'a Group> { + let group_index = self.children.get(child_index).ok_or_else(|| { + ErrorCode::LogicalError(format!( + "child_index {} is out of bound {}", + child_index, + self.children.len() + )) + })?; + memo.group(*group_index) } /// Doesn't check if children are matched @@ -77,7 +88,7 @@ impl MExpr { transform_state: &mut TransformState, ) -> Result<()> { let mut extractor = PatternExtractor::create(); - let exprs = extractor.extract(memo, self, rule.pattern()); + let exprs = extractor.extract(memo, self, rule.pattern())?; for expr in exprs.iter() { rule.apply(expr, transform_state)?; diff --git a/src/query/service/src/sql/optimizer/memo.rs b/src/query/service/src/sql/optimizer/memo.rs index f1e1c81fc0727..82506ccc68097 100644 --- a/src/query/service/src/sql/optimizer/memo.rs +++ b/src/query/service/src/sql/optimizer/memo.rs @@ -12,11 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashSet; + +use common_exception::ErrorCode; use common_exception::Result; +use super::RelExpr; +use super::RelationalProperty; use crate::sql::optimizer::group::Group; use crate::sql::optimizer::m_expr::MExpr; use crate::sql::optimizer::s_expr::SExpr; +use crate::sql::plans::RelOperator; use crate::sql::IndexType; /// `Memo` is a search space which memoize possible plans of a query. @@ -24,8 +30,12 @@ use crate::sql::IndexType; /// Each `Group` is a set of logically equivalent relational expressions represented with `MExpr`. #[derive(Clone)] pub struct Memo { - groups: Vec, - root: Option, + pub groups: Vec, + pub root: Option, + + /// Hash table for detecting duplicated expressions. + /// The entry is `(group_index, plan, children)`. + pub m_expr_lookup_table: HashSet<(IndexType, RelOperator, Vec)>, } impl Memo { @@ -33,6 +43,7 @@ impl Memo { Memo { groups: vec![], root: None, + m_expr_lookup_table: HashSet::new(), } } @@ -45,70 +56,74 @@ impl Memo { } // Initialize memo with given expression - pub fn init(&mut self, expression: SExpr) -> Result<()> { - let root = self.insert(None, expression)?; + pub fn init(&mut self, s_expr: SExpr) -> Result<()> { + let root = self.insert(None, s_expr)?; self.set_root(root); Ok(()) } - pub fn insert( - &mut self, - target_group: Option, - expression: SExpr, - ) -> Result { + pub fn insert(&mut self, target_group: Option, s_expr: SExpr) -> Result { let mut children_group = vec![]; - for expr in expression.children() { + for expr in s_expr.children() { // Insert children expressions recursively and collect their group indices let group = self.insert(None, expr.clone())?; children_group.push(group); } - if let Some(group_index) = expression.original_group() { + if let Some(group_index) = s_expr.original_group() { // The expression is extracted by PatternExtractor, no need to reinsert. return Ok(group_index); } - let mut _new_group = false; - // Create new group if not specified let group_index = match target_group { Some(index) => index, _ => { - _new_group = true; - self.add_group() + let rel_expr = RelExpr::with_s_expr(&s_expr); + let relational_prop = rel_expr.derive_relational_prop()?; + self.add_group(relational_prop) } }; - // if new_group { - // let group = self.group_mut(group_index); - // let relational_prop = expression.compute_relational_prop(); - // group.set_relational_prop(relational_prop); - // } + let plan = s_expr.plan(); - let plan = expression.plan(); - - let group_expression = MExpr::create(group_index, plan.clone(), children_group); - self.insert_m_expr(group_index, group_expression)?; + let m_expr = MExpr::create( + group_index, + self.group(group_index)?.num_exprs(), + plan.clone(), + children_group, + ); + self.insert_m_expr(group_index, m_expr)?; Ok(group_index) } - pub fn group(&self, index: IndexType) -> &Group { - &self.groups[index] + pub fn group(&self, index: IndexType) -> Result<&Group> { + self.groups + .get(index) + .ok_or_else(|| ErrorCode::LogicalError(format!("Group index {} not found", index))) } - pub fn insert_m_expr(&mut self, group_index: IndexType, expression: MExpr) -> Result<()> { - self.group_mut(group_index).insert(expression) + pub fn insert_m_expr(&mut self, group_index: IndexType, m_expr: MExpr) -> Result<()> { + if self.m_expr_lookup_table.insert(( + m_expr.group_index, + m_expr.plan.clone(), + m_expr.children.clone(), + )) { + self.group_mut(group_index).insert(m_expr) + } else { + Ok(()) + } } fn group_mut(&mut self, index: IndexType) -> &mut Group { &mut self.groups[index] } - fn add_group(&mut self) -> IndexType { + fn add_group(&mut self, relational_prop: RelationalProperty) -> IndexType { let group_index = self.groups.len(); - let group = Group::create(group_index); + let group = Group::create(group_index, relational_prop); self.groups.push(group); group_index } diff --git a/src/query/service/src/sql/optimizer/mod.rs b/src/query/service/src/sql/optimizer/mod.rs index 400bd98d5582a..52219c4deee61 100644 --- a/src/query/service/src/sql/optimizer/mod.rs +++ b/src/query/service/src/sql/optimizer/mod.rs @@ -13,6 +13,9 @@ // limitations under the License. mod cascades; +mod cost; +mod distributed; +mod format; mod group; mod heuristic; mod m_expr; @@ -41,7 +44,10 @@ pub use property::RequiredProperty; pub use rule::RuleFactory; pub use s_expr::SExpr; +use self::cascades::CascadesOptimizer; +use self::distributed::optimize_distributed_query; use self::util::contains_local_table_scan; +use self::util::validate_distributed_query; use super::plans::Plan; use super::BindContext; use crate::sessions::QueryContext; @@ -79,7 +85,13 @@ pub fn optimize( metadata, rewrite_kind, } => Ok(Plan::Query { - s_expr: optimize_query(ctx, opt_ctx, metadata.clone(), bind_context.clone(), s_expr)?, + s_expr: Box::new(optimize_query( + ctx, + opt_ctx, + metadata.clone(), + bind_context.clone(), + *s_expr, + )?), bind_context, metadata, rewrite_kind, @@ -124,17 +136,21 @@ pub fn optimize_query( ) -> Result { let rules = RuleList::create(DEFAULT_REWRITE_RULES.clone())?; + let contains_local_table_scan = contains_local_table_scan(&s_expr, &metadata); + + let mut heuristic = HeuristicOptimizer::new(ctx, bind_context, metadata, rules); + let mut result = heuristic.optimize(s_expr)?; + + let cascades = CascadesOptimizer::create(); + result = cascades.optimize(result)?; + // So far, we don't have ability to execute distributed query // with reading data from local tales(e.g. system tables). - let enable_distributed_query = opt_ctx.config.enable_distributed_optimization - && !contains_local_table_scan(&s_expr, &metadata); - - let mut heuristic = - HeuristicOptimizer::new(ctx, bind_context, metadata, rules, enable_distributed_query); - let optimized = heuristic.optimize(s_expr)?; - // TODO: enable cascades optimizer - // let mut cascades = CascadesOptimizer::create(ctx); - // cascades.optimize(s_expr) + let enable_distributed_query = + opt_ctx.config.enable_distributed_optimization && !contains_local_table_scan; + if enable_distributed_query && validate_distributed_query(&result) { + result = optimize_distributed_query(&result)?; + } - Ok(optimized) + Ok(result) } diff --git a/src/query/service/src/sql/optimizer/pattern_extractor.rs b/src/query/service/src/sql/optimizer/pattern_extractor.rs index 7e4a04d71d196..d7a174115287b 100644 --- a/src/query/service/src/sql/optimizer/pattern_extractor.rs +++ b/src/query/service/src/sql/optimizer/pattern_extractor.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_exception::Result; + use crate::sql::optimizer::group::Group; use crate::sql::optimizer::m_expr::MExpr; use crate::sql::optimizer::memo::Memo; @@ -25,45 +27,48 @@ impl PatternExtractor { PatternExtractor {} } - pub fn extract(&mut self, memo: &Memo, m_expr: &MExpr, pattern: &SExpr) -> Vec { + pub fn extract(&mut self, memo: &Memo, m_expr: &MExpr, pattern: &SExpr) -> Result> { if !m_expr.match_pattern(memo, pattern) { - return vec![]; + return Ok(vec![]); } if pattern.is_pattern() { // Pattern operator is `Pattern`, we can return current operator. - return vec![SExpr::create( - m_expr.plan().clone(), + return Ok(vec![SExpr::create( + m_expr.plan.clone(), vec![], - Some(m_expr.group_index()), - )]; + Some(m_expr.group_index), + )]); } let pattern_children = pattern.children(); if m_expr.arity() != pattern_children.len() { - return vec![]; + return Ok(vec![]); } let mut children_results = vec![]; - for (i, child) in m_expr.children().iter().enumerate().take(m_expr.arity()) { + for (i, child) in m_expr.children.iter().enumerate().take(m_expr.arity()) { let pattern = &pattern_children[i]; - let child_group = memo.group(*child); - let result = self.extract_group(memo, child_group, pattern); + let child_group = memo.group(*child)?; + let result = self.extract_group(memo, child_group, pattern)?; children_results.push(result); } - Self::generate_expression_with_children(m_expr, children_results) + Ok(Self::generate_expression_with_children( + m_expr, + children_results, + )) } - fn extract_group(&mut self, memo: &Memo, group: &Group, pattern: &SExpr) -> Vec { + fn extract_group(&mut self, memo: &Memo, group: &Group, pattern: &SExpr) -> Result> { let mut results = vec![]; - for group_expression in group.iter() { - let mut result = self.extract(memo, group_expression, pattern); - results.append(&mut result); + for m_expr in group.m_exprs.iter() { + let result = self.extract(memo, m_expr, pattern)?; + results.extend(result.into_iter()); } - results + Ok(results) } fn generate_expression_with_children( @@ -84,9 +89,9 @@ impl PatternExtractor { if cursors.is_empty() { results.push(SExpr::create( - m_expr.plan().clone(), + m_expr.plan.clone(), vec![], - Some(m_expr.group_index()), + Some(m_expr.group_index), )); return results; } @@ -97,9 +102,9 @@ impl PatternExtractor { children.push(candidates[index][*cursor].clone()); } results.push(SExpr::create( - m_expr.plan().clone(), + m_expr.plan.clone(), children, - Some(m_expr.group_index()), + Some(m_expr.group_index), )); let mut shifted = false; diff --git a/src/query/service/src/sql/optimizer/property/builder.rs b/src/query/service/src/sql/optimizer/property/builder.rs index 7891b13cf82c4..d978169e5d39f 100644 --- a/src/query/service/src/sql/optimizer/property/builder.rs +++ b/src/query/service/src/sql/optimizer/property/builder.rs @@ -42,7 +42,7 @@ impl<'a> RelExpr<'a> { pub fn derive_relational_prop(&self) -> Result { let plan = match self { RelExpr::SExpr { expr } => expr.plan(), - RelExpr::MExpr { expr, .. } => expr.plan(), + RelExpr::MExpr { expr, .. } => &expr.plan, }; if let Some(logical) = plan.as_logical() { @@ -62,22 +62,16 @@ impl<'a> RelExpr<'a> { let rel_expr = RelExpr::with_s_expr(child); rel_expr.derive_relational_prop() } - RelExpr::MExpr { expr, memo } => memo - .group(expr.group_index()) - .relational_prop() - .cloned() - .ok_or_else(|| { - ErrorCode::LogicalError( - "Relational property should have been filled".to_string(), - ) - }), + RelExpr::MExpr { expr, memo } => { + Ok(memo.group(expr.group_index)?.relational_prop.clone()) + } } } pub fn derive_physical_prop(&self) -> Result { let plan = match self { RelExpr::SExpr { expr } => expr.plan(), - RelExpr::MExpr { expr, .. } => expr.plan(), + RelExpr::MExpr { expr, .. } => &expr.plan, }; if let Some(physical) = plan.as_physical() { @@ -110,7 +104,7 @@ impl<'a> RelExpr<'a> { ) -> Result { let plan = match self { RelExpr::SExpr { expr } => expr.plan(), - RelExpr::MExpr { expr, .. } => expr.plan(), + RelExpr::MExpr { expr, .. } => &expr.plan, }; if let Some(physical) = plan.as_physical() { diff --git a/src/query/service/src/sql/optimizer/property/mod.rs b/src/query/service/src/sql/optimizer/property/mod.rs index 84b24276fe598..486dc5f40dac0 100644 --- a/src/query/service/src/sql/optimizer/property/mod.rs +++ b/src/query/service/src/sql/optimizer/property/mod.rs @@ -14,6 +14,7 @@ mod builder; mod enforcer; +mod stat; use std::collections::HashSet; @@ -40,6 +41,10 @@ impl RequiredProperty { pub struct RelationalProperty { pub output_columns: ColumnSet, pub outer_columns: ColumnSet, + + // TODO(leiysky): introduce upper bound of cardinality to + // reduce error in estimation. + pub cardinality: f64, } #[derive(Default, Clone)] @@ -47,7 +52,7 @@ pub struct PhysicalProperty { pub distribution: Distribution, } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum Distribution { Any, Random, diff --git a/src/query/service/tests/it/sql/optimizer/mod.rs b/src/query/service/src/sql/optimizer/property/stat.rs similarity index 88% rename from src/query/service/tests/it/sql/optimizer/mod.rs rename to src/query/service/src/sql/optimizer/property/stat.rs index 0d0bc27de3a8f..ea0ed57e60edf 100644 --- a/src/query/service/tests/it/sql/optimizer/mod.rs +++ b/src/query/service/src/sql/optimizer/property/stat.rs @@ -1,4 +1,4 @@ -// Copyright 2021 Datafuse Labs. +// Copyright 2022 Datafuse Labs. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,6 +11,3 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - -mod heuristic; -mod pattern_extractor; diff --git a/src/query/service/src/sql/optimizer/rule/factory.rs b/src/query/service/src/sql/optimizer/rule/factory.rs index c030a07f0319b..5fa57b78c8452 100644 --- a/src/query/service/src/sql/optimizer/rule/factory.rs +++ b/src/query/service/src/sql/optimizer/rule/factory.rs @@ -20,6 +20,7 @@ use super::rewrite::RuleNormalizeScalarFilter; use super::rewrite::RulePushDownFilterEvalScalar; use super::rewrite::RulePushDownFilterJoin; use super::rewrite::RulePushDownFilterProject; +use super::transform::RuleCommuteJoin; use crate::sql::optimizer::rule::rewrite::RuleEliminateFilter; use crate::sql::optimizer::rule::rewrite::RuleEliminateProject; use crate::sql::optimizer::rule::rewrite::RuleMergeEvalScalar; @@ -68,6 +69,7 @@ impl RuleFactory { RuleID::NormalizeDisjunctiveFilter => { Ok(Box::new(RuleNormalizeDisjunctiveFilter::new())) } + RuleID::CommuteJoin => Ok(Box::new(RuleCommuteJoin::new())), } } } diff --git a/src/query/service/src/sql/optimizer/rule/mod.rs b/src/query/service/src/sql/optimizer/rule/mod.rs index d3ca8bcfc48c1..36f1966ba6268 100644 --- a/src/query/service/src/sql/optimizer/rule/mod.rs +++ b/src/query/service/src/sql/optimizer/rule/mod.rs @@ -24,6 +24,7 @@ mod rewrite; mod rule_implement_get; mod rule_implement_hash_join; mod rule_set; +mod transform; mod transform_state; pub use factory::RuleFactory; @@ -63,6 +64,9 @@ pub enum RuleID { MergeFilter, SplitAggregate, + // Exploration rules + CommuteJoin, + // Implementation rules ImplementGet, ImplementHashJoin, @@ -71,8 +75,6 @@ pub enum RuleID { impl Display for RuleID { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - RuleID::ImplementGet => write!(f, "ImplementGet"), - RuleID::ImplementHashJoin => write!(f, "ImplementHashJoin"), RuleID::PushDownFilterProject => write!(f, "PushDownFilterProject"), RuleID::PushDownFilterEvalScalar => write!(f, "PushDownFilterEvalScalar"), RuleID::PushDownFilterJoin => write!(f, "PushDownFilterJoin"), @@ -91,6 +93,11 @@ impl Display for RuleID { RuleID::NormalizeScalarFilter => write!(f, "NormalizeScalarFilter"), RuleID::SplitAggregate => write!(f, "SplitAggregate"), RuleID::NormalizeDisjunctiveFilter => write!(f, "NormalizeDisjunctiveFilter"), + + RuleID::CommuteJoin => write!(f, "CommuteJoin"), + + RuleID::ImplementGet => write!(f, "ImplementGet"), + RuleID::ImplementHashJoin => write!(f, "ImplementHashJoin"), } } } diff --git a/src/query/service/src/sql/optimizer/rule/rule_implement_get.rs b/src/query/service/src/sql/optimizer/rule/rule_implement_get.rs index 7b6b69069094d..fb1d174adc851 100644 --- a/src/query/service/src/sql/optimizer/rule/rule_implement_get.rs +++ b/src/query/service/src/sql/optimizer/rule/rule_implement_get.rs @@ -47,8 +47,8 @@ impl Rule for RuleImplementGet { self.id } - fn apply(&self, expression: &SExpr, state: &mut TransformState) -> Result<()> { - let plan = expression.plan().clone(); + fn apply(&self, s_expr: &SExpr, state: &mut TransformState) -> Result<()> { + let plan = s_expr.plan().clone(); let logical_get: LogicalGet = plan.try_into()?; let result = SExpr::create_leaf( diff --git a/src/query/service/src/sql/optimizer/rule/rule_implement_hash_join.rs b/src/query/service/src/sql/optimizer/rule/rule_implement_hash_join.rs index afc0e771a6087..d462aa3ec2205 100644 --- a/src/query/service/src/sql/optimizer/rule/rule_implement_hash_join.rs +++ b/src/query/service/src/sql/optimizer/rule/rule_implement_hash_join.rs @@ -59,8 +59,8 @@ impl Rule for RuleImplementHashJoin { self.id } - fn apply(&self, expression: &SExpr, state: &mut TransformState) -> Result<()> { - let plan = expression.plan().clone(); + fn apply(&self, s_expr: &SExpr, state: &mut TransformState) -> Result<()> { + let plan = s_expr.plan().clone(); let logical_join: LogicalInnerJoin = plan.try_into()?; let result = SExpr::create( @@ -73,8 +73,8 @@ impl Rule for RuleImplementHashJoin { from_correlated_subquery: logical_join.from_correlated_subquery, } .into(), - expression.children().to_vec(), - expression.original_group(), + s_expr.children().to_vec(), + None, ); state.add_result(result); diff --git a/src/query/service/src/sql/optimizer/rule/transform/mod.rs b/src/query/service/src/sql/optimizer/rule/transform/mod.rs new file mode 100644 index 0000000000000..f346792bc613d --- /dev/null +++ b/src/query/service/src/sql/optimizer/rule/transform/mod.rs @@ -0,0 +1,17 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod rule_commute_join; + +pub use rule_commute_join::RuleCommuteJoin; diff --git a/src/query/service/src/sql/optimizer/rule/transform/rule_commute_join.rs b/src/query/service/src/sql/optimizer/rule/transform/rule_commute_join.rs new file mode 100644 index 0000000000000..3ff64aad7042c --- /dev/null +++ b/src/query/service/src/sql/optimizer/rule/transform/rule_commute_join.rs @@ -0,0 +1,85 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_exception::Result; + +use crate::sql::optimizer::rule::Rule; +use crate::sql::optimizer::rule::TransformState; +use crate::sql::optimizer::RuleID; +use crate::sql::optimizer::SExpr; +use crate::sql::plans::JoinType; +use crate::sql::plans::LogicalInnerJoin; +use crate::sql::plans::PatternPlan; +use crate::sql::plans::RelOp; + +/// Rule to apply commutivity of join operator. +/// Since we will always use the right child as build side, this +/// rule will help us measure which child is the better one. +/// +/// TODO(leiysky): currently, we only support commutate for inner/cross join. +/// Other join types will be added as soon as we implement them in Processor. +pub struct RuleCommuteJoin { + id: RuleID, + pattern: SExpr, +} + +impl RuleCommuteJoin { + pub fn new() -> Self { + Self { + id: RuleID::CommuteJoin, + + // LogicalJoin + // | \ + // * * + pattern: SExpr::create_binary( + PatternPlan { + plan_type: RelOp::LogicalInnerJoin, + } + .into(), + SExpr::create_pattern_leaf(), + SExpr::create_pattern_leaf(), + ), + } + } +} + +impl Rule for RuleCommuteJoin { + fn id(&self) -> RuleID { + self.id + } + + fn apply(&self, s_expr: &SExpr, state: &mut TransformState) -> Result<()> { + let mut join: LogicalInnerJoin = s_expr.plan().clone().try_into()?; + let left_child = s_expr.child(0)?; + let right_child = s_expr.child(1)?; + + match join.join_type { + JoinType::Inner | JoinType::Cross => { + // Swap the join conditions side + (join.left_conditions, join.right_conditions) = + (join.right_conditions, join.left_conditions); + let result = + SExpr::create_binary(join.into(), right_child.clone(), left_child.clone()); + state.add_result(result); + } + _ => {} + } + + Ok(()) + } + + fn pattern(&self) -> &SExpr { + &self.pattern + } +} diff --git a/src/query/service/src/sql/optimizer/s_expr.rs b/src/query/service/src/sql/optimizer/s_expr.rs index d52c2c0d0673b..d65e6ca6f3d53 100644 --- a/src/query/service/src/sql/optimizer/s_expr.rs +++ b/src/query/service/src/sql/optimizer/s_expr.rs @@ -18,6 +18,7 @@ use common_exception::Result; use crate::sql::optimizer::rule::AppliedRules; use crate::sql::optimizer::rule::RuleID; use crate::sql::plans::Operator; +use crate::sql::plans::PatternPlan; use crate::sql::plans::RelOp; use crate::sql::plans::RelOperator; use crate::sql::IndexType; @@ -25,10 +26,10 @@ use crate::sql::IndexType; /// `SExpr` is abbreviation of single expression, which is a tree of relational operators. #[derive(Clone, Debug)] pub struct SExpr { - plan: RelOperator, - children: Vec, + pub(super) plan: RelOperator, + pub(super) children: Vec, - original_group: Option, + pub(super) original_group: Option, /// A bitmap to record applied rules on current SExpr, to prevent /// redundant transformations. @@ -62,6 +63,17 @@ impl SExpr { Self::create(plan, vec![], None) } + pub fn create_pattern_leaf() -> Self { + Self::create( + PatternPlan { + plan_type: RelOp::Pattern, + } + .into(), + vec![], + None, + ) + } + pub fn plan(&self) -> &RelOperator { &self.plan } diff --git a/src/query/service/src/sql/planner/binder/aggregate.rs b/src/query/service/src/sql/planner/binder/aggregate.rs index 76e2890c6ebd1..2b9e3c11f99ec 100644 --- a/src/query/service/src/sql/planner/binder/aggregate.rs +++ b/src/query/service/src/sql/planner/binder/aggregate.rs @@ -44,7 +44,7 @@ use crate::sql::plans::ScalarExpr; use crate::sql::plans::ScalarItem; use crate::sql::BindContext; -#[derive(Default, Clone, PartialEq, Debug)] +#[derive(Default, Clone, PartialEq, Eq, Debug)] pub struct AggregateInfo { /// Aggregation functions pub aggregate_functions: Vec, diff --git a/src/query/service/src/sql/planner/binder/bind_context.rs b/src/query/service/src/sql/planner/binder/bind_context.rs index eabea1b410a63..4a2af59bfb67f 100644 --- a/src/query/service/src/sql/planner/binder/bind_context.rs +++ b/src/query/service/src/sql/planner/binder/bind_context.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::collections::HashMap; +use std::hash::Hash; use std::sync::Arc; use common_ast::ast::TableAlias; @@ -33,7 +34,7 @@ use crate::sql::optimizer::SExpr; use crate::sql::plans::Scalar; use crate::sql::NameResolutionContext; -#[derive(Clone, PartialEq, Eq, Debug)] +#[derive(Clone, Debug)] pub struct ColumnBinding { /// Database name of this `ColumnBinding` in current context pub database_name: Option, @@ -52,6 +53,20 @@ pub struct ColumnBinding { pub visible_in_unqualified_wildcard: bool, } +impl PartialEq for ColumnBinding { + fn eq(&self, other: &Self) -> bool { + self.index == other.index + } +} + +impl Eq for ColumnBinding {} + +impl Hash for ColumnBinding { + fn hash(&self, state: &mut H) { + self.index.hash(state); + } +} + #[derive(Debug, Clone)] pub enum NameResolutionResult { Column(ColumnBinding), diff --git a/src/query/service/src/sql/planner/binder/mod.rs b/src/query/service/src/sql/planner/binder/mod.rs index fffaa6eb40ddc..41fb02854decd 100644 --- a/src/query/service/src/sql/planner/binder/mod.rs +++ b/src/query/service/src/sql/planner/binder/mod.rs @@ -111,7 +111,7 @@ impl<'a> Binder { Statement::Query(query) => { let (s_expr, bind_context) = self.bind_query(bind_context, query).await?; Plan::Query { - s_expr, + s_expr: Box::new(s_expr), metadata: self.metadata.clone(), bind_context: Box::new(bind_context), rewrite_kind: None, diff --git a/src/query/service/src/sql/planner/binder/table.rs b/src/query/service/src/sql/planner/binder/table.rs index 84a2917e6ddc1..17ba0eeece739 100644 --- a/src/query/service/src/sql/planner/binder/table.rs +++ b/src/query/service/src/sql/planner/binder/table.rs @@ -80,6 +80,7 @@ impl<'a> Binder { ); self.bind_base_table(bind_context, database, table_index) + .await } pub(super) async fn bind_table_reference( @@ -154,8 +155,9 @@ impl<'a> Binder { .write() .add_table(catalog, database.clone(), table_meta); - let (s_expr, mut bind_context) = - self.bind_base_table(bind_context, database.as_str(), table_index)?; + let (s_expr, mut bind_context) = self + .bind_base_table(bind_context, database.as_str(), table_index) + .await?; if let Some(alias) = alias { bind_context.apply_table_alias(alias, &self.name_resolution_ctx)?; } @@ -216,8 +218,9 @@ impl<'a> Binder { table.clone(), ); - let (s_expr, mut bind_context) = - self.bind_base_table(bind_context, "system", table_index)?; + let (s_expr, mut bind_context) = self + .bind_base_table(bind_context, "system", table_index) + .await?; if let Some(alias) = alias { bind_context.apply_table_alias(alias, &self.name_resolution_ctx)?; } @@ -279,16 +282,15 @@ impl<'a> Binder { Ok((cte_info.s_expr.clone(), new_bind_context)) } - fn bind_base_table( + async fn bind_base_table( &mut self, bind_context: &BindContext, database_name: &str, table_index: IndexType, ) -> Result<(SExpr, BindContext)> { let mut bind_context = BindContext::with_parent(Box::new(bind_context.clone())); - let metadata = self.metadata.read(); - let columns = metadata.columns_by_table_index(table_index); - let table = metadata.table(table_index); + let columns = self.metadata.read().columns_by_table_index(table_index); + let table = self.metadata.read().table(table_index).clone(); for column in columns.iter() { let visible_in_unqualified_wildcard = column.path_indices.is_none(); let column_binding = ColumnBinding { @@ -301,6 +303,7 @@ impl<'a> Binder { }; bind_context.add_column_binding(column_binding); } + let stat = table.table.statistics(self.ctx.clone()).await?; Ok(( SExpr::create_leaf( LogicalGet { @@ -309,6 +312,7 @@ impl<'a> Binder { push_down_predicates: None, limit: None, order_by: None, + statistics: stat, } .into(), ), diff --git a/src/query/service/src/sql/planner/plans/aggregate.rs b/src/query/service/src/sql/planner/plans/aggregate.rs index 9e4ade3222131..d451ef55bcd55 100644 --- a/src/query/service/src/sql/planner/plans/aggregate.rs +++ b/src/query/service/src/sql/planner/plans/aggregate.rs @@ -26,7 +26,7 @@ use crate::sql::plans::PhysicalOperator; use crate::sql::plans::RelOp; use crate::sql::plans::ScalarItem; -#[derive(Clone, Debug, PartialEq, Eq, Copy)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, Copy)] pub enum AggregateMode { Partial, Final, @@ -36,7 +36,7 @@ pub enum AggregateMode { Initial, } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Aggregate { pub mode: AggregateMode, // group by scalar expressions, such as: group by col1 asc, col2 desc; @@ -144,9 +144,19 @@ impl LogicalOperator for Aggregate { .cloned() .collect(); + // Derive cardinality. We can not estimate the cardinality of an aggregate with group by, until + // we have information about distribution of group keys. So we pass through the cardinality. + let cardinality = if self.group_items.is_empty() { + // Scalar aggregation + 1.0 + } else { + input_prop.cardinality + }; + Ok(RelationalProperty { output_columns, outer_columns, + cardinality, }) } } diff --git a/src/query/service/src/sql/planner/plans/eval_scalar.rs b/src/query/service/src/sql/planner/plans/eval_scalar.rs index 8e061b9ca16a2..77fae5185a6a6 100644 --- a/src/query/service/src/sql/planner/plans/eval_scalar.rs +++ b/src/query/service/src/sql/planner/plans/eval_scalar.rs @@ -28,12 +28,12 @@ use crate::sql::plans::ScalarExpr; use crate::sql::IndexType; /// Evaluate scalar expression -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct EvalScalar { pub items: Vec, } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct ScalarItem { pub scalar: Scalar, pub index: IndexType, @@ -98,9 +98,13 @@ impl LogicalOperator for EvalScalar { } outer_columns = outer_columns.difference(&output_columns).cloned().collect(); + // Derive cardinality + let cardinality = input_prop.cardinality; + Ok(RelationalProperty { output_columns, outer_columns, + cardinality, }) } } diff --git a/src/query/service/src/sql/planner/plans/exchange.rs b/src/query/service/src/sql/planner/plans/exchange.rs index 7dd0c134fccbe..1d4e32534064c 100644 --- a/src/query/service/src/sql/planner/plans/exchange.rs +++ b/src/query/service/src/sql/planner/plans/exchange.rs @@ -24,7 +24,7 @@ use crate::sql::plans::PhysicalOperator; use crate::sql::plans::RelOp; use crate::sql::plans::Scalar; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum Exchange { Hash(Vec), Broadcast, diff --git a/src/query/service/src/sql/planner/plans/filter.rs b/src/query/service/src/sql/planner/plans/filter.rs index 4eb5a8f3dfccf..32fb43bc9ecca 100644 --- a/src/query/service/src/sql/planner/plans/filter.rs +++ b/src/query/service/src/sql/planner/plans/filter.rs @@ -26,7 +26,7 @@ use crate::sql::plans::RelOp; use crate::sql::plans::Scalar; use crate::sql::plans::ScalarExpr; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Filter { pub predicates: Vec, // True if the plan represents having, else the plan represents where @@ -87,9 +87,14 @@ impl LogicalOperator for Filter { } outer_columns = outer_columns.difference(&output_columns).cloned().collect(); + // Derive cardinality. We can not estimate the cardinality of the filter until we have + // NDV(Number of Distinct Values), so we pass it through. + let cardinality = input_prop.cardinality; + Ok(RelationalProperty { output_columns, outer_columns, + cardinality, }) } } diff --git a/src/query/service/src/sql/planner/plans/hash_join.rs b/src/query/service/src/sql/planner/plans/hash_join.rs index 68bebe572573f..2d1c4699b6d90 100644 --- a/src/query/service/src/sql/planner/plans/hash_join.rs +++ b/src/query/service/src/sql/planner/plans/hash_join.rs @@ -26,7 +26,7 @@ use crate::sql::plans::RelOp; use crate::sql::plans::Scalar; use crate::sql::IndexType; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct PhysicalHashJoin { pub build_keys: Vec, pub probe_keys: Vec, diff --git a/src/query/service/src/sql/planner/plans/limit.rs b/src/query/service/src/sql/planner/plans/limit.rs index f8527e530ba6e..0166038b00850 100644 --- a/src/query/service/src/sql/planner/plans/limit.rs +++ b/src/query/service/src/sql/planner/plans/limit.rs @@ -24,7 +24,7 @@ use crate::sql::plans::Operator; use crate::sql::plans::PhysicalOperator; use crate::sql::plans::RelOp; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Limit { pub limit: Option, pub offset: usize, @@ -71,6 +71,15 @@ impl PhysicalOperator for Limit { impl LogicalOperator for Limit { fn derive_relational_prop<'a>(&self, rel_expr: &RelExpr<'a>) -> Result { - rel_expr.derive_relational_prop_child(0) + let input_prop = rel_expr.derive_relational_prop_child(0)?; + + Ok(RelationalProperty { + output_columns: input_prop.output_columns, + outer_columns: input_prop.outer_columns, + cardinality: match self.limit { + Some(limit) if (limit as f64) < input_prop.cardinality => limit as f64, + _ => input_prop.cardinality, + }, + }) } } diff --git a/src/query/service/src/sql/planner/plans/logical_get.rs b/src/query/service/src/sql/planner/plans/logical_get.rs index 9e48c8990b50e..aea1004964f0f 100644 --- a/src/query/service/src/sql/planner/plans/logical_get.rs +++ b/src/query/service/src/sql/planner/plans/logical_get.rs @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_catalog::table::TableStatistics; use common_exception::Result; +use itertools::Itertools; use crate::sql::optimizer::ColumnSet; use crate::sql::optimizer::RelExpr; @@ -32,6 +34,29 @@ pub struct LogicalGet { pub push_down_predicates: Option>, pub limit: Option, pub order_by: Option>, + + // statistics will be ignored in comparison and hashing + pub statistics: Option, +} + +impl PartialEq for LogicalGet { + fn eq(&self, other: &Self) -> bool { + self.table_index == other.table_index + && self.columns == other.columns + && self.push_down_predicates == other.push_down_predicates + } +} + +impl Eq for LogicalGet {} + +impl std::hash::Hash for LogicalGet { + fn hash(&self, state: &mut H) { + self.table_index.hash(state); + for column in self.columns.iter().sorted() { + column.hash(state); + } + self.push_down_predicates.hash(state); + } } impl Operator for LogicalGet { @@ -61,6 +86,10 @@ impl LogicalOperator for LogicalGet { Ok(RelationalProperty { output_columns: self.columns.clone(), outer_columns: Default::default(), + cardinality: self + .statistics + .as_ref() + .map_or(0.0, |stat| stat.num_rows.map_or(0.0, |num| num as f64)), }) } } diff --git a/src/query/service/src/sql/planner/plans/logical_join.rs b/src/query/service/src/sql/planner/plans/logical_join.rs index 62f3ab8354e86..6eb7380ac443a 100644 --- a/src/query/service/src/sql/planner/plans/logical_join.rs +++ b/src/query/service/src/sql/planner/plans/logical_join.rs @@ -27,7 +27,7 @@ use crate::sql::plans::RelOp; use crate::sql::plans::Scalar; use crate::sql::IndexType; -#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] +#[derive(Clone, Debug, Eq, PartialEq, Hash, serde::Serialize, serde::Deserialize)] pub enum JoinType { Inner, Left, @@ -76,7 +76,7 @@ impl Display for JoinType { } } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct LogicalInnerJoin { pub left_conditions: Vec, pub right_conditions: Vec, @@ -141,9 +141,24 @@ impl LogicalOperator for LogicalInnerJoin { } outer_columns = outer_columns.difference(&output_columns).cloned().collect(); + // Derive cardinality. We can not estimate the cardinality of inner join until we have + // distribution information of join keys, so we set it to the maximum value. + let cardinality = match self.join_type { + JoinType::Inner + | JoinType::Left + | JoinType::Right + | JoinType::Full + | JoinType::Cross => left_prop.cardinality * right_prop.cardinality, + + JoinType::Semi | JoinType::Anti | JoinType::Mark | JoinType::Single => { + left_prop.cardinality + } + }; + Ok(RelationalProperty { output_columns, outer_columns, + cardinality, }) } } diff --git a/src/query/service/src/sql/planner/plans/mod.rs b/src/query/service/src/sql/planner/plans/mod.rs index b9c0db46da89a..f177c0ed71679 100644 --- a/src/query/service/src/sql/planner/plans/mod.rs +++ b/src/query/service/src/sql/planner/plans/mod.rs @@ -119,7 +119,7 @@ use crate::sql::optimizer::SExpr; pub enum Plan { // `SELECT` statement Query { - s_expr: SExpr, + s_expr: Box, metadata: MetadataRef, bind_context: Box, rewrite_kind: Option, diff --git a/src/query/service/src/sql/planner/plans/operator.rs b/src/query/service/src/sql/planner/plans/operator.rs index 587ccca875721..dfbbbdf814a44 100644 --- a/src/query/service/src/sql/planner/plans/operator.rs +++ b/src/query/service/src/sql/planner/plans/operator.rs @@ -65,7 +65,7 @@ pub trait PhysicalOperator { } /// Relational operator -#[derive(Clone, PartialEq, Eq, Debug)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum RelOp { // Logical operators LogicalGet, @@ -90,7 +90,7 @@ pub enum RelOp { } /// Relational operators -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum RelOperator { LogicalGet(LogicalGet), LogicalInnerJoin(LogicalInnerJoin), diff --git a/src/query/service/src/sql/planner/plans/pattern.rs b/src/query/service/src/sql/planner/plans/pattern.rs index dbc0cd40280a5..e2fcb13c2a185 100644 --- a/src/query/service/src/sql/planner/plans/pattern.rs +++ b/src/query/service/src/sql/planner/plans/pattern.rs @@ -17,7 +17,7 @@ use crate::sql::plans::Operator; use crate::sql::plans::PhysicalOperator; use crate::sql::plans::RelOp; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct PatternPlan { pub plan_type: RelOp, } diff --git a/src/query/service/src/sql/planner/plans/physical_scan.rs b/src/query/service/src/sql/planner/plans/physical_scan.rs index 5d7e4fed12b89..84fb0106506b6 100644 --- a/src/query/service/src/sql/planner/plans/physical_scan.rs +++ b/src/query/service/src/sql/planner/plans/physical_scan.rs @@ -12,7 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::hash::Hash; + use common_exception::Result; +use itertools::Itertools; use crate::sql::optimizer::ColumnSet; use crate::sql::optimizer::Distribution; @@ -27,7 +30,7 @@ use crate::sql::plans::Scalar; use crate::sql::plans::SortItem; use crate::sql::IndexType; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct PhysicalScan { pub table_index: IndexType, pub columns: ColumnSet, @@ -36,6 +39,17 @@ pub struct PhysicalScan { pub order_by: Option>, } +#[allow(clippy::derive_hash_xor_eq)] +impl Hash for PhysicalScan { + fn hash(&self, state: &mut H) { + self.table_index.hash(state); + for column in self.columns.iter().sorted() { + column.hash(state); + } + self.push_down_predicates.hash(state); + } +} + impl Operator for PhysicalScan { fn rel_op(&self) -> RelOp { RelOp::PhysicalScan diff --git a/src/query/service/src/sql/planner/plans/project.rs b/src/query/service/src/sql/planner/plans/project.rs index adb34868474ce..23b20b8d3ab27 100644 --- a/src/query/service/src/sql/planner/plans/project.rs +++ b/src/query/service/src/sql/planner/plans/project.rs @@ -13,6 +13,7 @@ // limitations under the License. use common_exception::Result; +use itertools::Itertools; use crate::sql::optimizer::ColumnSet; use crate::sql::optimizer::PhysicalProperty; @@ -24,11 +25,20 @@ use crate::sql::plans::Operator; use crate::sql::plans::PhysicalOperator; use crate::sql::plans::RelOp; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct Project { pub columns: ColumnSet, } +#[allow(clippy::derive_hash_xor_eq)] +impl std::hash::Hash for Project { + fn hash(&self, state: &mut H) { + for column in self.columns.iter().sorted() { + column.hash(state); + } + } +} + impl Operator for Project { fn rel_op(&self) -> RelOp { RelOp::Project @@ -72,6 +82,7 @@ impl LogicalOperator for Project { Ok(RelationalProperty { output_columns: self.columns.clone(), outer_columns: input_prop.outer_columns, + cardinality: input_prop.cardinality, }) } } diff --git a/src/query/service/src/sql/planner/plans/scalar.rs b/src/query/service/src/sql/planner/plans/scalar.rs index 37d5d7675c41a..54325060a6871 100644 --- a/src/query/service/src/sql/planner/plans/scalar.rs +++ b/src/query/service/src/sql/planner/plans/scalar.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::hash::Hash; + use common_ast::ast::BinaryOperator; use common_datavalues::BooleanType; use common_datavalues::DataTypeImpl; @@ -41,7 +43,7 @@ pub trait ScalarExpr { // fn contains_subquery(&self) -> bool; } -#[derive(Clone, PartialEq, Debug)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum Scalar { BoundColumnRef(BoundColumnRef), ConstantExpr(ConstantExpr), @@ -267,7 +269,7 @@ impl TryFrom for SubqueryExpr { } } -#[derive(Clone, PartialEq, Eq, Debug)] +#[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct BoundColumnRef { pub column: ColumnBinding, } @@ -286,7 +288,7 @@ impl ScalarExpr for BoundColumnRef { } } -#[derive(Clone, PartialEq, Eq, Debug)] +#[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct ConstantExpr { pub value: DataValue, @@ -307,7 +309,7 @@ impl ScalarExpr for ConstantExpr { } } -#[derive(Clone, PartialEq, Debug)] +#[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct AndExpr { pub left: Box, pub right: Box, @@ -330,7 +332,7 @@ impl ScalarExpr for AndExpr { } } -#[derive(Clone, PartialEq, Debug)] +#[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct OrExpr { pub left: Box, pub right: Box, @@ -353,7 +355,7 @@ impl ScalarExpr for OrExpr { } } -#[derive(Clone, PartialEq, Eq, Debug)] +#[derive(Clone, PartialEq, Eq, Hash, Debug)] pub enum ComparisonOp { Equal, NotEqual, @@ -404,7 +406,7 @@ impl<'a> TryFrom<&'a BinaryOperator> for ComparisonOp { } } -#[derive(Clone, PartialEq, Debug)] +#[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct ComparisonExpr { pub op: ComparisonOp, pub left: Box, @@ -433,7 +435,7 @@ impl ScalarExpr for ComparisonExpr { } } -#[derive(Clone, PartialEq, Debug)] +#[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct AggregateFunction { pub display_name: String, @@ -462,7 +464,7 @@ impl ScalarExpr for AggregateFunction { } } -#[derive(Clone, PartialEq, Debug)] +#[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct FunctionCall { pub arguments: Vec, @@ -492,7 +494,7 @@ impl ScalarExpr for FunctionCall { } } -#[derive(Clone, PartialEq, Debug)] +#[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct CastExpr { pub argument: Box, pub from_type: Box, @@ -513,7 +515,7 @@ impl ScalarExpr for CastExpr { } } -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, Hash)] pub enum SubqueryType { Any, All, @@ -536,6 +538,20 @@ pub struct SubqueryExpr { pub outer_columns: ColumnSet, } +impl PartialEq for SubqueryExpr { + fn eq(&self, _other: &Self) -> bool { + false + } +} + +impl Eq for SubqueryExpr {} + +impl Hash for SubqueryExpr { + fn hash(&self, _state: &mut H) { + unreachable!() + } +} + impl ScalarExpr for SubqueryExpr { fn data_type(&self) -> DataTypeImpl { match &self.typ { @@ -556,9 +572,3 @@ impl ScalarExpr for SubqueryExpr { false } } - -impl PartialEq for SubqueryExpr { - fn eq(&self, _other: &Self) -> bool { - false - } -} diff --git a/src/query/service/src/sql/planner/plans/sort.rs b/src/query/service/src/sql/planner/plans/sort.rs index ccfb746487467..eb0b38b2dac1c 100644 --- a/src/query/service/src/sql/planner/plans/sort.rs +++ b/src/query/service/src/sql/planner/plans/sort.rs @@ -25,13 +25,13 @@ use crate::sql::plans::PhysicalOperator; use crate::sql::plans::RelOp; use crate::sql::IndexType; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Sort { pub items: Vec, pub limit: Option, } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct SortItem { pub index: IndexType, pub asc: bool, diff --git a/src/query/service/src/sql/planner/plans/union_all.rs b/src/query/service/src/sql/planner/plans/union_all.rs index ec45bcafc49e4..ef4cd75fd89e8 100644 --- a/src/query/service/src/sql/planner/plans/union_all.rs +++ b/src/query/service/src/sql/planner/plans/union_all.rs @@ -24,7 +24,7 @@ use crate::sql::plans::Operator; use crate::sql::plans::PhysicalOperator; use crate::sql::plans::RelOp; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct UnionAll; impl Operator for UnionAll { @@ -68,9 +68,12 @@ impl LogicalOperator for UnionAll { .cloned() .collect(); + let cardinality = left_prop.cardinality + right_prop.cardinality; + Ok(RelationalProperty { output_columns, outer_columns, + cardinality, }) } } diff --git a/src/query/service/tests/it/sql/mod.rs b/src/query/service/tests/it/sql/mod.rs index bc97ecc2d82cd..1022aa00b6ac6 100644 --- a/src/query/service/tests/it/sql/mod.rs +++ b/src/query/service/tests/it/sql/mod.rs @@ -14,7 +14,6 @@ mod exec; mod expr_parser; -mod optimizer; mod planner; mod sql_common; mod sql_parser; diff --git a/src/query/service/tests/it/sql/optimizer/heuristic/exchange.rs b/src/query/service/tests/it/sql/optimizer/heuristic/exchange.rs deleted file mode 100644 index 989fe37d2ab62..0000000000000 --- a/src/query/service/tests/it/sql/optimizer/heuristic/exchange.rs +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use common_ast::parser::parse_sql; -use common_ast::parser::tokenize_sql; -use common_ast::Backtrace; -use common_ast::Dialect; -use common_base::base::tokio; -use common_catalog::table_context::TableContext; -use common_exception::ErrorCode; -use common_exception::Result; -use databend_query::sessions::QueryContext; -use databend_query::sql::optimizer::HeuristicOptimizer; -use databend_query::sql::optimizer::RuleList; -use databend_query::sql::optimizer::DEFAULT_REWRITE_RULES; -use databend_query::sql::plans::Plan; -use databend_query::sql::Binder; -use databend_query::sql::Metadata; -use databend_query::sql::NameResolutionContext; -use goldenfile::Mint; -use parking_lot::RwLock; - -use crate::sql::optimizer::heuristic::run_suites; -use crate::sql::optimizer::heuristic::Suite; -use crate::tests::create_query_context; - -async fn run_cluster_test(ctx: Arc, suite: &Suite) -> Result { - let tokens = tokenize_sql(&suite.query)?; - let bt = Backtrace::new(); - let (stmt, _) = parse_sql(&tokens, Dialect::PostgreSQL, &bt)?; - let binder = Binder::new( - ctx.clone(), - ctx.get_catalog_manager()?, - NameResolutionContext::default(), - Arc::new(RwLock::new(Metadata::create())), - ); - let plan = binder.bind(&stmt).await?; - - let result = match plan { - Plan::Query { - s_expr, - metadata, - bind_context, - .. - } => { - let mut heuristic_opt = HeuristicOptimizer::new( - ctx.clone(), - bind_context, - metadata.clone(), - RuleList::create(suite.rules.clone())?, - true, - ); - let optimized = heuristic_opt.optimize(s_expr)?; - optimized.to_format_tree(&metadata).format_indent() - } - _ => Err(ErrorCode::LogicalError("Unsupported non-query statement")), - }?; - - Ok(result) -} - -#[tokio::test] -pub async fn test_heuristic_optimizer_exchange() -> Result<()> { - let mut mint = Mint::new("tests/it/sql/optimizer/heuristic/testdata/"); - let mut file = mint.new_goldenfile("exchange.test")?; - - let (_guard, ctx) = create_query_context().await?; - - let suites = vec![ - Suite { - comment: "".to_string(), - query: "select * from numbers(1) t, numbers(2) t1 where t.number = t1.number".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "# Result of t1 join t is distributed on t.number".to_string(), - query: "select * from numbers(1) t, numbers(2) t1, numbers(3) t2 where t.number = t1.number and t.number = t2.number".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "".to_string(), - query: "select * from (select number as a, number+1 as b from numbers(1)) t, numbers(2) t1, numbers(3) t2 where a = t1.number and b = t2.number".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "".to_string(), - query: "select * from (select sum(number) as number from numbers(1) group by number) t, numbers(2) t1 where t.number = t1.number".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - ]; - - run_suites(ctx, &mut file, &suites, run_cluster_test).await -} diff --git a/src/query/service/tests/it/sql/optimizer/heuristic/join.rs b/src/query/service/tests/it/sql/optimizer/heuristic/join.rs deleted file mode 100644 index 7eb0811520071..0000000000000 --- a/src/query/service/tests/it/sql/optimizer/heuristic/join.rs +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_base::base::tokio; -use common_exception::Result; -use databend_query::sql::optimizer::DEFAULT_REWRITE_RULES; -use goldenfile::Mint; - -use super::run_suites; -use super::Suite; -use crate::sql::optimizer::heuristic::run_test; -use crate::tests::create_query_context; - -#[tokio::test] -pub async fn test_heuristic_optimizer_join() -> Result<()> { - let mut mint = Mint::new("tests/it/sql/optimizer/heuristic/testdata/"); - let mut file = mint.new_goldenfile("join.test")?; - - let (_guard, ctx) = create_query_context().await?; - - let suites = vec![ - Suite { - comment: "# Transform cross join into inner join".to_string(), - query: "select t.number from numbers(1) as t, numbers(1) as t1 where t.number = t1.number" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, Suite { - comment: "".to_string(), - query: "select t.number from numbers(1) as t, numbers(1) as t1 where t.number = t1.number and t.number = t1.number + 1" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, Suite { - comment: "# Push predicates down through join".to_string(), - query: "select t.number from numbers(1) as t, numbers(1) as t1 where t.number > 1 and 1 < t1.number" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, Suite { - comment: "".to_string(), - query: "select t.number from numbers(1) as t, numbers(1) as t1 where t.number + t1.number = 1" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, Suite { - comment: "# Incompatible join keys, cannot push into join".to_string(), - query: "select t.number from numbers(1) as t, numbers(1) as t1 where t.number = cast(t1.number as string)" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, Suite { - comment: "# Join multiple tables".to_string(), - query: "select t.number from numbers(1) as t, numbers(1) as t1, numbers(1) as t2 where t1.number = t2.number and t.number = 1" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - ]; - - run_suites(ctx, &mut file, &suites, run_test).await -} diff --git a/src/query/service/tests/it/sql/optimizer/heuristic/mod.rs b/src/query/service/tests/it/sql/optimizer/heuristic/mod.rs deleted file mode 100644 index 61f4cc995dcbc..0000000000000 --- a/src/query/service/tests/it/sql/optimizer/heuristic/mod.rs +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -mod exchange; -mod join; -mod prune_columns; -mod select; -mod subquery; - -use std::future::Future; -use std::io::Write; -use std::sync::Arc; - -use common_ast::parser::parse_sql; -use common_ast::parser::tokenize_sql; -use common_ast::Backtrace; -use common_ast::Dialect; -use common_catalog::table_context::TableContext; -use common_exception::ErrorCode; -use common_exception::Result; -use databend_query::sessions::QueryContext; -use databend_query::sql::optimizer::HeuristicOptimizer; -use databend_query::sql::optimizer::RuleID; -use databend_query::sql::optimizer::RuleList; -use databend_query::sql::plans::Plan; -use databend_query::sql::Binder; -use databend_query::sql::Metadata; -use databend_query::sql::NameResolutionContext; -use parking_lot::RwLock; - -pub(super) struct Suite { - pub comment: String, - pub query: String, - pub rules: Vec, -} - -async fn run_test(ctx: Arc, suite: &Suite) -> Result { - let tokens = tokenize_sql(&suite.query)?; - let bt = Backtrace::new(); - let (stmt, _) = parse_sql(&tokens, Dialect::PostgreSQL, &bt)?; - let binder = Binder::new( - ctx.clone(), - ctx.get_catalog_manager()?, - NameResolutionContext::default(), - Arc::new(RwLock::new(Metadata::create())), - ); - let plan = binder.bind(&stmt).await?; - - let result = match plan { - Plan::Query { - s_expr, - metadata, - bind_context, - .. - } => { - let mut heuristic_opt = HeuristicOptimizer::new( - ctx.clone(), - bind_context, - metadata.clone(), - RuleList::create(suite.rules.clone())?, - false, - ); - let optimized = heuristic_opt.optimize(s_expr)?; - optimized.to_format_tree(&metadata).format_indent() - } - _ => Err(ErrorCode::LogicalError("Unsupported non-query statement")), - }?; - - Ok(result) -} - -pub(super) async fn run_suites<'a, Fut: Future>>( - ctx: Arc, - file: &mut std::fs::File, - suites: &'a [Suite], - test_func: impl Fn(Arc, &'a Suite) -> Fut, -) -> Result<()> { - for suite in suites { - let result = test_func(ctx.clone(), suite).await?; - - if !suite.comment.is_empty() { - writeln!(file, "{}", &suite.comment)?; - } - writeln!(file, "{}", &suite.query)?; - writeln!(file, "----")?; - writeln!(file, "{result}")?; - writeln!(file)?; - } - - Ok(()) -} diff --git a/src/query/service/tests/it/sql/optimizer/heuristic/prune_columns.rs b/src/query/service/tests/it/sql/optimizer/heuristic/prune_columns.rs deleted file mode 100644 index 5509c1b313d67..0000000000000 --- a/src/query/service/tests/it/sql/optimizer/heuristic/prune_columns.rs +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_base::base::tokio; -use common_exception::Result; -use goldenfile::Mint; - -use super::run_suites; -use super::run_test; -use super::Suite; -use crate::tests::create_query_context; - -#[tokio::test] -pub async fn test_heuristic_optimizer_prune_columns() -> Result<()> { - let mut mint = Mint::new("tests/it/sql/optimizer/heuristic/testdata/"); - let mut file = mint.new_goldenfile("prune_columns.test")?; - - let (_guard, ctx) = create_query_context().await?; - - let suites = vec![ - Suite { - comment: "# Prune unused columns from Project".to_string(), - query: "select * from (select a from (select number as a, number + 1 as b from numbers(1)))" - .to_string(), - rules: vec![], - }, - Suite { - comment: "# Prune unused columns from Aggregate".to_string(), - query: "select a from (select number as a, count(*) as b from numbers(1) group by a)".to_string(), - rules: vec![], - }, - Suite { - comment: "# Prune unused columns for simple plan nodes (Project, Filter, Aggregate...)".to_string(), - query: "select a from (select number as a, number b, sum(number) as c, number as d, number as e from numbers(1) group by a, b, d, e) where b > 1 order by d limit 1".to_string(), - rules: vec![], - }, - Suite { - comment: "# Prune unused columns for join plan nodes (LogicalInnerJoin ...)".to_string(), - query: "select * from (select t1.a from (select number + 1 as a, number + 1 as b, number + 1 as c, number + 1 as d from numbers(1)) as t1, (select number + 1 as a, number + 1 as b, number + 1 as c from numbers(1)) as t2 where t1.b = t2.b and t1.c = 1)".to_string(), - rules: vec![], - }, - Suite { - comment: "# Prune unused columns for correlated query".to_string(), - query: "select t1.a from (select number + 1 as a, number + 1 as b from numbers(1)) as t1 where t1.a = (select count(*) from (select t2.a, t3.a from (select number + 1 as a, number + 1 as b, number + 1 as c, number + 1 as d from numbers(1)) as t2, (select number + 1 as a, number + 1 as b, number + 1 as c from numbers(1)) as t3 where t2.b = t3.b and t2.c = 1))".to_string(), - rules: vec![], - }, - Suite { - comment: "# Prune unused columns with order by".to_string(), - query: "select name from system.functions order by example".to_string(), - rules: vec![], - }, - Suite { - comment: "# Prune unused columns with cross join".to_string(), - query: "select t.number from numbers(10) t where exists(select * from numbers(10))".to_string(), - rules: vec![], - }, - ]; - - run_suites(ctx, &mut file, &suites, run_test).await -} diff --git a/src/query/service/tests/it/sql/optimizer/heuristic/select.rs b/src/query/service/tests/it/sql/optimizer/heuristic/select.rs deleted file mode 100644 index 554ab4709f545..0000000000000 --- a/src/query/service/tests/it/sql/optimizer/heuristic/select.rs +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_base::base::tokio; -use common_exception::Result; -use databend_query::sql::optimizer::DEFAULT_REWRITE_RULES; -use goldenfile::Mint; - -use super::run_suites; -use super::Suite; -use crate::sql::optimizer::heuristic::run_test; -use crate::tests::create_query_context; - -#[tokio::test] -pub async fn test_heuristic_optimizer_select() -> Result<()> { - let mut mint = Mint::new("tests/it/sql/optimizer/heuristic/testdata/"); - let mut file = mint.new_goldenfile("select.test")?; - - let (_guard, ctx) = create_query_context().await?; - - let suites = vec![ - Suite { - comment: "".to_string(), - query: "select * from numbers(1)".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "".to_string(), - query: "select * from (select * from numbers(1)) as t1 where number = 1".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: r#"# `b = 1` can not be pushed down"#.to_string(), - query: "select * from (select number as a, number + 1 as b from numbers(1)) as t1 where a = 1 and b = 1".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "".to_string(), - query: "select * from (select number as a, number + 1 as b from numbers(1)) as t1 where a = 1".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "".to_string(), - query: "select * from numbers(1) where number = pow(1, 1 + 1)".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "".to_string(), - query: "select * from numbers(1) where TRUE and 1 = 1".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "".to_string(), - query: "select * from numbers(1) where number = 0 and false".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "".to_string(), - query: "select * from numbers(1) where number = 0 and null".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "# If there is only one conjunction and the value is null, then we won't rewrite it".to_string(), - query: "select * from numbers(1) where null".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "".to_string(), - query: "select a from (select number as a, number as b from numbers(1))".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "".to_string(), - query: "select a from (select number as a, number+1 as b from numbers(1))".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - ]; - - run_suites(ctx, &mut file, &suites, run_test).await -} diff --git a/src/query/service/tests/it/sql/optimizer/heuristic/subquery.rs b/src/query/service/tests/it/sql/optimizer/heuristic/subquery.rs deleted file mode 100644 index 45d1b16a0c6b3..0000000000000 --- a/src/query/service/tests/it/sql/optimizer/heuristic/subquery.rs +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -use common_base::base::tokio; -use common_exception::Result; -use databend_query::sql::optimizer::DEFAULT_REWRITE_RULES; -use goldenfile::Mint; - -use super::run_suites; -use super::Suite; -use crate::sql::optimizer::heuristic::run_test; -use crate::tests::create_query_context; - -#[tokio::test] -pub async fn test_heuristic_optimizer_subquery() -> Result<()> { - let mut mint = Mint::new("tests/it/sql/optimizer/heuristic/testdata/"); - let mut file = mint.new_goldenfile("subquery.test")?; - - let (_guard, ctx) = create_query_context().await?; - - let suites = vec![ - Suite { - comment: "# Correlated subquery with joins".to_string(), - query: "select t.number from numbers(1) as t, numbers(1) as t1 where t.number = (select count(*) from numbers(1) as t2, numbers(1) as t3 where t.number = t2.number)" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "# Exists correlated subquery with joins".to_string(), - query: "select t.number from numbers(1) as t where exists (select t1.number from numbers(1) as t1 where t.number = t1.number) or t.number > 1" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "# Uncorrelated subquery".to_string(), - query: "select t.number from numbers(1) as t where exists (select * from numbers(1) where number = 0)" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "# Uncorrelated subquery".to_string(), - query: "select t.number from numbers(1) as t where number = (select * from numbers(1) where number = 0)" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "# Correlated subquery can be translated to SemiJoin".to_string(), - query: "select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number)" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "# Correlated subquery can be translated to AntiJoin".to_string(), - query: "select t.number from numbers(1) as t where not exists (select * from numbers(1) where number = t.number)" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "".to_string(), - query: "select * from numbers(1) as t where exists (select number as a from numbers(1) where number = t.number)" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "# Exists with different kinds of predicate".to_string(), - query: "select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number and number = 0 and t.number < 10)" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "# Exists with non-equi predicate".to_string(), - query: "select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number and t.number < number)" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "# Exists project required columns".to_string(), - query: "select t.number from numbers(1) as t where exists (select number as a, number as b, number as c from numbers(1) where number = t.number)".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "# Push down filter through CrossApply".to_string(), - query: "select t.number from numbers(1) as t, numbers(1) as t1 where (select count(*) = 1 from numbers(1) where t.number = number) and t.number = t1.number" - .to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - Suite { - comment: "# Semi join with other conditions".to_string(), - query: "select t.number from numbers(1) as t where exists(select * from numbers(1) as t1 where t.number > t1.number) and not exists(select * from numbers(1) as t1 where t.number < t1.number)".to_string(), - rules: DEFAULT_REWRITE_RULES.clone(), - }, - ]; - - run_suites(ctx, &mut file, &suites, run_test).await -} diff --git a/src/query/service/tests/it/sql/optimizer/heuristic/testdata/exchange.test b/src/query/service/tests/it/sql/optimizer/heuristic/testdata/exchange.test deleted file mode 100644 index cc741ee7762d6..0000000000000 --- a/src/query/service/tests/it/sql/optimizer/heuristic/testdata/exchange.test +++ /dev/null @@ -1,54 +0,0 @@ -select * from numbers(1) t, numbers(2) t1 where t.number = t1.number ----- -Exchange(Merge) - HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [t.number (#0)], join filters: [] - Exchange(Hash): keys: [t.number (#0)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Exchange(Hash): keys: [t1.number (#1)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Result of t1 join t is distributed on t.number -select * from numbers(1) t, numbers(2) t1, numbers(3) t2 where t.number = t1.number and t.number = t2.number ----- -Exchange(Merge) - HashJoin: INNER, build keys: [t2.number (#2)], probe keys: [t.number (#0)], join filters: [] - HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [t.number (#0)], join filters: [] - Exchange(Hash): keys: [t.number (#0)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Exchange(Hash): keys: [t1.number (#1)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Exchange(Hash): keys: [t2.number (#2)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -select * from (select number as a, number+1 as b from numbers(1)) t, numbers(2) t1, numbers(3) t2 where a = t1.number and b = t2.number ----- -Exchange(Merge) - HashJoin: INNER, build keys: [t2.number (#4)], probe keys: [t.b (#1)], join filters: [] - Exchange(Hash): keys: [t.b (#1)] - HashJoin: INNER, build keys: [t1.number (#3)], probe keys: [t.a (#0)], join filters: [] - Exchange(Hash): keys: [t.a (#0)] - EvalScalar: [+(numbers.number (#0), 1)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Exchange(Hash): keys: [t1.number (#3)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Exchange(Hash): keys: [t2.number (#4)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -select * from (select sum(number) as number from numbers(1) group by number) t, numbers(2) t1 where t.number = t1.number ----- -Exchange(Merge) - HashJoin: INNER, build keys: [t1.number (#4)], probe keys: [t.number (#1)], join filters: [] - Exchange(Hash): keys: [t.number (#1)] - Project: [number (#1)] - EvalScalar: [sum(number) (#3)] - Aggregate(Final): group items: [numbers.number (#0)], aggregate functions: [sum(number)] - Aggregate(Partial): group items: [numbers.number (#0)], aggregate functions: [sum(number)] - Exchange(Hash): keys: [numbers.number (#0)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Exchange(Hash): keys: [t1.number (#4)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - diff --git a/src/query/service/tests/it/sql/optimizer/heuristic/testdata/join.test b/src/query/service/tests/it/sql/optimizer/heuristic/testdata/join.test deleted file mode 100644 index b1a46104f65b1..0000000000000 --- a/src/query/service/tests/it/sql/optimizer/heuristic/testdata/join.test +++ /dev/null @@ -1,58 +0,0 @@ -# Transform cross join into inner join -select t.number from numbers(1) as t, numbers(1) as t1 where t.number = t1.number ----- -Project: [number (#0)] - HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [t.number (#0)], join filters: [] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -select t.number from numbers(1) as t, numbers(1) as t1 where t.number = t1.number and t.number = t1.number + 1 ----- -Project: [number (#0)] - HashJoin: INNER, build keys: [t1.number (#1), +(t1.number (#1), 1)], probe keys: [t.number (#0), t.number (#0)], join filters: [] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Push predicates down through join -select t.number from numbers(1) as t, numbers(1) as t1 where t.number > 1 and 1 < t1.number ----- -Project: [number (#0)] - CrossJoin - Filter: [t.number (#0) > 1] - Scan: default.system.numbers, filters: [t.number (#0) > 1], Sort: [none], limit: [none] - Filter: [1 < t1.number (#1)] - Scan: default.system.numbers, filters: [1 < t1.number (#1)], Sort: [none], limit: [none] - - -select t.number from numbers(1) as t, numbers(1) as t1 where t.number + t1.number = 1 ----- -Project: [number (#0)] - Filter: [+(t.number (#0), t1.number (#1)) = 1] - CrossJoin - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Incompatible join keys, cannot push into join -select t.number from numbers(1) as t, numbers(1) as t1 where t.number = cast(t1.number as string) ----- -Project: [number (#0)] - HashJoin: INNER, build keys: [CAST(CAST(t1.number (#1) AS VARCHAR) AS DOUBLE)], probe keys: [CAST(t.number (#0) AS DOUBLE)], join filters: [] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Join multiple tables -select t.number from numbers(1) as t, numbers(1) as t1, numbers(1) as t2 where t1.number = t2.number and t.number = 1 ----- -Project: [number (#0)] - HashJoin: INNER, build keys: [t2.number (#2)], probe keys: [t1.number (#1)], join filters: [] - CrossJoin - Filter: [t.number (#0) = 1] - Scan: default.system.numbers, filters: [t.number (#0) = 1], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - diff --git a/src/query/service/tests/it/sql/optimizer/heuristic/testdata/prune_columns.test b/src/query/service/tests/it/sql/optimizer/heuristic/testdata/prune_columns.test deleted file mode 100644 index c39149ee6a676..0000000000000 --- a/src/query/service/tests/it/sql/optimizer/heuristic/testdata/prune_columns.test +++ /dev/null @@ -1,107 +0,0 @@ -# Prune unused columns from Project -select * from (select a from (select number as a, number + 1 as b from numbers(1))) ----- -Project: [number (#0)] - EvalScalar: [numbers.a (#0)] - Project: [number (#0)] - EvalScalar: [numbers.a (#0)] - Project: [number (#0)] - EvalScalar: [numbers.number (#0)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Prune unused columns from Aggregate -select a from (select number as a, count(*) as b from numbers(1) group by a) ----- -Project: [number (#0)] - EvalScalar: [numbers.a (#0)] - Project: [number (#0)] - EvalScalar: [group_item (#0)] - Aggregate(Initial): group items: [numbers.number (#0)], aggregate functions: [] - EvalScalar: [numbers.number (#0)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Prune unused columns for simple plan nodes (Project, Filter, Aggregate...) -select a from (select number as a, number b, sum(number) as c, number as d, number as e from numbers(1) group by a, b, d, e) where b > 1 order by d limit 1 ----- -Limit: [1], Offset: [0] - Project: [number (#0)] - EvalScalar: [numbers.a (#0)] - Sort: [number (#0) ASC], limit: [none] - Filter: [numbers.b (#0) > 1] - Project: [number (#0)] - EvalScalar: [group_item (#0)] - Aggregate(Initial): group items: [numbers.number (#0), numbers.number (#0), numbers.number (#0), numbers.number (#0)], aggregate functions: [] - EvalScalar: [numbers.number (#0), numbers.number (#0), numbers.number (#0), numbers.number (#0), numbers.number (#0)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Prune unused columns for join plan nodes (LogicalInnerJoin ...) -select * from (select t1.a from (select number + 1 as a, number + 1 as b, number + 1 as c, number + 1 as d from numbers(1)) as t1, (select number + 1 as a, number + 1 as b, number + 1 as c from numbers(1)) as t2 where t1.b = t2.b and t1.c = 1) ----- -Project: [a (#1)] - EvalScalar: [t1.a (#1)] - Project: [a (#1)] - EvalScalar: [t1.a (#1)] - Filter: [t1.b (#2) = t2.b (#11), t1.c (#3) = 1] - CrossJoin - Project: [a (#1),b (#2),c (#3)] - EvalScalar: [+(numbers.number (#0), 1), +(numbers.number (#0), 1), +(numbers.number (#0), 1)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Project: [b (#11)] - EvalScalar: [+(numbers.number (#9), 1)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Prune unused columns for correlated query -select t1.a from (select number + 1 as a, number + 1 as b from numbers(1)) as t1 where t1.a = (select count(*) from (select t2.a, t3.a from (select number + 1 as a, number + 1 as b, number + 1 as c, number + 1 as d from numbers(1)) as t2, (select number + 1 as a, number + 1 as b, number + 1 as c from numbers(1)) as t3 where t2.b = t3.b and t2.c = 1)) ----- -Project: [a (#1)] - EvalScalar: [t1.a (#1)] - Filter: [t1.a (#1) = scalar_subquery_21 (#21)] - HashJoin: SINGLE, build keys: [], probe keys: [], join filters: [] - Project: [a (#1)] - EvalScalar: [+(numbers.number (#0), 1)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Project: [COUNT(*) (#21)] - EvalScalar: [COUNT(*) (#22)] - Aggregate(Initial): group items: [], aggregate functions: [COUNT(*)] - Project: [a (#6)] - EvalScalar: [t2.a (#6)] - Filter: [t2.b (#7) = t3.b (#16), t2.c (#8) = 1] - CrossJoin - Project: [a (#6),b (#7),c (#8)] - EvalScalar: [+(numbers.number (#5), 1), +(numbers.number (#5), 1), +(numbers.number (#5), 1)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Project: [b (#16)] - EvalScalar: [+(numbers.number (#14), 1)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Prune unused columns with order by -select name from system.functions order by example ----- -Project: [name (#0)] - EvalScalar: [functions.name (#0)] - Sort: [example (#7) ASC], limit: [none] - Scan: default.system.functions, filters: [], Sort: [none], limit: [none] - - -# Prune unused columns with cross join -select t.number from numbers(10) t where exists(select * from numbers(10)) ----- -Project: [number (#0)] - EvalScalar: [t.number (#0)] - Filter: [subquery_3 (#3)] - CrossJoin - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Project: [subquery (#3)] - EvalScalar: [count(*) (#2) = 1] - Aggregate(Initial): group items: [], aggregate functions: [count(*)] - Limit: [1], Offset: [0] - Project: [number (#1)] - EvalScalar: [numbers.number (#1)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - diff --git a/src/query/service/tests/it/sql/optimizer/heuristic/testdata/select.test b/src/query/service/tests/it/sql/optimizer/heuristic/testdata/select.test deleted file mode 100644 index 81fb21fefd296..0000000000000 --- a/src/query/service/tests/it/sql/optimizer/heuristic/testdata/select.test +++ /dev/null @@ -1,67 +0,0 @@ -select * from numbers(1) ----- -Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -select * from (select * from numbers(1)) as t1 where number = 1 ----- -Filter: [t1.number (#0) = 1] - Scan: default.system.numbers, filters: [t1.number (#0) = 1], Sort: [none], limit: [none] - - -# `b = 1` can not be pushed down -select * from (select number as a, number + 1 as b from numbers(1)) as t1 where a = 1 and b = 1 ----- -Filter: [t1.a (#0) = 1, t1.b (#1) = 1] - EvalScalar: [+(numbers.number (#0), 1)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -select * from (select number as a, number + 1 as b from numbers(1)) as t1 where a = 1 ----- -EvalScalar: [+(numbers.number (#0), 1)] - Filter: [t1.a (#0) = 1] - Scan: default.system.numbers, filters: [t1.a (#0) = 1], Sort: [none], limit: [none] - - -select * from numbers(1) where number = pow(1, 1 + 1) ----- -Filter: [numbers.number (#0) = 1] - Scan: default.system.numbers, filters: [numbers.number (#0) = 1], Sort: [none], limit: [none] - - -select * from numbers(1) where TRUE and 1 = 1 ----- -Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -select * from numbers(1) where number = 0 and false ----- -Filter: [false] - Scan: default.system.numbers, filters: [false], Sort: [none], limit: [none] - - -select * from numbers(1) where number = 0 and null ----- -Filter: [false] - Scan: default.system.numbers, filters: [false], Sort: [none], limit: [none] - - -# If there is only one conjunction and the value is null, then we won't rewrite it -select * from numbers(1) where null ----- -Filter: [NULL] - Scan: default.system.numbers, filters: [NULL], Sort: [none], limit: [none] - - -select a from (select number as a, number as b from numbers(1)) ----- -Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -select a from (select number as a, number+1 as b from numbers(1)) ----- -Project: [number (#0)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - diff --git a/src/query/service/tests/it/sql/optimizer/heuristic/testdata/subquery.test b/src/query/service/tests/it/sql/optimizer/heuristic/testdata/subquery.test deleted file mode 100644 index c534375ca9da9..0000000000000 --- a/src/query/service/tests/it/sql/optimizer/heuristic/testdata/subquery.test +++ /dev/null @@ -1,150 +0,0 @@ -# Correlated subquery with joins -select t.number from numbers(1) as t, numbers(1) as t1 where t.number = (select count(*) from numbers(1) as t2, numbers(1) as t3 where t.number = t2.number) ----- -Project: [number (#0)] - Filter: [t.number (#0) = CAST(if(is_null(scalar_subquery_4 (#4)), 0, scalar_subquery_4 (#4)) AS BIGINT UNSIGNED)] - HashJoin: SINGLE, build keys: [subquery_6 (#6)], probe keys: [subquery_0 (#0)], join filters: [] - CrossJoin - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Project: [COUNT(*) (#4),number (#6)] - EvalScalar: [COUNT(*) (#5)] - Aggregate(Final): group items: [subquery_6 (#6)], aggregate functions: [COUNT(*)] - Aggregate(Partial): group items: [subquery_6 (#6)], aggregate functions: [COUNT(*)] - HashJoin: INNER, build keys: [t2.number (#2)], probe keys: [subquery_6 (#6)], join filters: [] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - CrossJoin - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Exists correlated subquery with joins -select t.number from numbers(1) as t where exists (select t1.number from numbers(1) as t1 where t.number = t1.number) or t.number > 1 ----- -Project: [number (#0)] - Filter: [(3 (#3)) OR (t.number (#0) > 1)] - HashJoin: MARK, build keys: [subquery_0 (#0)], probe keys: [subquery_2 (#2)], join filters: [] - HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [subquery_2 (#2)], join filters: [] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Uncorrelated subquery -select t.number from numbers(1) as t where exists (select * from numbers(1) where number = 0) ----- -Project: [number (#0)] - CrossJoin - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Project: [subquery (#3)] - Filter: [subquery_3 (#3)] - EvalScalar: [count(*) (#2) = 1] - Aggregate(Final): group items: [], aggregate functions: [count(*)] - Aggregate(Partial): group items: [], aggregate functions: [count(*)] - Limit: [1], Offset: [0] - Filter: [numbers.number (#1) = 0] - Scan: default.system.numbers, filters: [numbers.number (#1) = 0], Sort: [none], limit: [none] - - -# Uncorrelated subquery -select t.number from numbers(1) as t where number = (select * from numbers(1) where number = 0) ----- -Project: [number (#0)] - Filter: [t.number (#0) = scalar_subquery_1 (#1)] - HashJoin: SINGLE, build keys: [], probe keys: [], join filters: [] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Filter: [numbers.number (#1) = 0] - Scan: default.system.numbers, filters: [numbers.number (#1) = 0], Sort: [none], limit: [none] - - -# Correlated subquery can be translated to SemiJoin -select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number) ----- -Project: [number (#0)] - HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Correlated subquery can be translated to AntiJoin -select t.number from numbers(1) as t where not exists (select * from numbers(1) where number = t.number) ----- -Project: [number (#0)] - Filter: [not(3 (#3))] - HashJoin: MARK, build keys: [subquery_0 (#0)], probe keys: [subquery_2 (#2)], join filters: [] - HashJoin: INNER, build keys: [numbers.number (#1)], probe keys: [subquery_2 (#2)], join filters: [] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -select * from numbers(1) as t where exists (select number as a from numbers(1) where number = t.number) ----- -Project: [number (#0)] - HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Exists with different kinds of predicate -select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number and number = 0 and t.number < 10) ----- -Project: [number (#0)] - HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [] - Filter: [t.number (#0) < 10] - Scan: default.system.numbers, filters: [t.number (#0) < 10], Sort: [none], limit: [none] - Filter: [numbers.number (#1) = 0] - Scan: default.system.numbers, filters: [numbers.number (#1) = 0], Sort: [none], limit: [none] - - -# Exists with non-equi predicate -select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number and t.number < number) ----- -Project: [number (#0)] - HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [t.number (#0) < numbers.number (#1)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Exists project required columns -select t.number from numbers(1) as t where exists (select number as a, number as b, number as c from numbers(1) where number = t.number) ----- -Project: [number (#0)] - HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Push down filter through CrossApply -select t.number from numbers(1) as t, numbers(1) as t1 where (select count(*) = 1 from numbers(1) where t.number = number) and t.number = t1.number ----- -Project: [number (#0)] - Filter: [CAST(if(is_null(scalar_subquery_3 (#3)), 0, scalar_subquery_3 (#3)) AS BIGINT UNSIGNED)] - HashJoin: SINGLE, build keys: [subquery_5 (#5)], probe keys: [subquery_0 (#0)], join filters: [] - HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [t.number (#0)], join filters: [] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Project: [COUNT(*) = 1 (#3),number (#5)] - EvalScalar: [COUNT(*) (#4) = 1] - Aggregate(Final): group items: [subquery_5 (#5)], aggregate functions: [COUNT(*)] - Aggregate(Partial): group items: [subquery_5 (#5)], aggregate functions: [COUNT(*)] - HashJoin: INNER, build keys: [numbers.number (#2)], probe keys: [subquery_5 (#5)], join filters: [] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - -# Semi join with other conditions -select t.number from numbers(1) as t where exists(select * from numbers(1) as t1 where t.number > t1.number) and not exists(select * from numbers(1) as t1 where t.number < t1.number) ----- -Project: [number (#0)] - Filter: [not(4 (#4))] - HashJoin: MARK, build keys: [subquery_0 (#0)], probe keys: [subquery_3 (#3)], join filters: [] - Filter: [subquery_3 (#3) < t1.number (#2)] - CrossJoin - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - HashJoin: SEMI, build keys: [], probe keys: [], join filters: [t.number (#0) > t1.number (#1)] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] - - diff --git a/src/query/service/tests/it/sql/optimizer/pattern_extractor.rs b/src/query/service/tests/it/sql/optimizer/pattern_extractor.rs deleted file mode 100644 index 2447f3fded3ea..0000000000000 --- a/src/query/service/tests/it/sql/optimizer/pattern_extractor.rs +++ /dev/null @@ -1,173 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use databend_query::sql::optimizer::MExpr; -use databend_query::sql::optimizer::Memo; -use databend_query::sql::optimizer::PatternExtractor; -use databend_query::sql::optimizer::SExpr; -use databend_query::sql::plans::Operator; -use databend_query::sql::plans::PatternPlan; -use databend_query::sql::plans::RelOp; - -fn compare_s_expr(lhs: &SExpr, rhs: &SExpr) -> bool { - // Compare children first - if lhs.arity() != rhs.arity() { - return false; - } - - for (l_child, r_child) in lhs.children().iter().zip(rhs.children().iter()) { - if !compare_s_expr(l_child, r_child) { - return false; - } - } - - lhs.plan().rel_op() == rhs.plan().rel_op() -} - -#[test] -fn test_unary_expression() { - // Project - // \ - // LogicalGet - let expr = SExpr::create_unary( - PatternPlan { - plan_type: RelOp::Project, - } - .into(), - SExpr::create_leaf( - PatternPlan { - plan_type: RelOp::LogicalGet, - } - .into(), - ), - ); - - // Project - // \ - // Pattern - let pattern = SExpr::create_unary( - From::from(PatternPlan { - plan_type: RelOp::Project, - }), - SExpr::create_leaf(From::from(PatternPlan { - plan_type: RelOp::Pattern, - })), - ); - - let mut pattern_extractor = PatternExtractor::create(); - let mut memo = Memo::create(); - memo.init(expr).unwrap(); - - let group_expression = memo - .root() - .unwrap() - .iter() - .take(1) - .cloned() - .collect::>()[0] - .clone(); - let result = pattern_extractor.extract(&memo, &group_expression, &pattern); - - let expected = vec![SExpr::create( - From::from(PatternPlan { - plan_type: RelOp::Project, - }), - vec![SExpr::create( - From::from(PatternPlan { - plan_type: RelOp::LogicalGet, - }), - vec![], - Some(0), - )], - Some(1), - )]; - assert!(compare_s_expr(&result[0], &expected[0])); -} - -#[test] -fn test_multiple_expression() { - // Project - // \ - // LogicalGet - let expr = SExpr::create_unary( - From::from(PatternPlan { - plan_type: RelOp::Project, - }), - SExpr::create_leaf(From::from(PatternPlan { - plan_type: RelOp::LogicalGet, - })), - ); - - // Project - // \ - // LogicalGet - let pattern = SExpr::create_unary( - PatternPlan { - plan_type: RelOp::Project, - } - .into(), - SExpr::create_leaf( - PatternPlan { - plan_type: RelOp::LogicalGet, - } - .into(), - ), - ); - - let mut pattern_extractor = PatternExtractor::create(); - let mut memo = Memo::create(); - memo.init(expr).unwrap(); - - memo.insert_m_expr( - 0, - MExpr::create( - 0, - PatternPlan { - plan_type: RelOp::LogicalGet, - } - .into(), - vec![], - ), - ) - .unwrap(); - - let group_expression = memo - .root() - .unwrap() - .iter() - .take(1) - .cloned() - .collect::>()[0] - .clone(); - let result = pattern_extractor.extract(&memo, &group_expression, &pattern); - - let expected_expr = SExpr::create( - PatternPlan { - plan_type: RelOp::Project, - } - .into(), - vec![SExpr::create( - PatternPlan { - plan_type: RelOp::LogicalGet, - } - .into(), - vec![], - Some(0), - )], - Some(1), - ); - - let expected = vec![expected_expr.clone(), expected_expr]; - assert!(compare_s_expr(&result[0], &expected[0])); -} diff --git a/tests/logictest/suites/base/15_query/cte.test b/tests/logictest/suites/base/15_query/cte.test index 725b096e8051c..a528dd79cd138 100644 --- a/tests/logictest/suites/base/15_query/cte.test +++ b/tests/logictest/suites/base/15_query/cte.test @@ -103,7 +103,7 @@ WITH test1 AS (SELECT i + 1, j + 1 FROM test1) SELECT * FROM (SELECT * FROM test 4 5 statement query III -SELECT * FROM (WITH t1 AS (SELECT to_int32(number) i FROM numbers(5)) SELECT * FROM t1) l INNER JOIN test1 r on l.i = r.i order by l.i, r.j; +SELECT * FROM (WITH t1 AS (SELECT i FROM test1) SELECT * FROM t1) l INNER JOIN test1 r on l.i = r.i order by l.i, r.j; ---- 1 1 2 diff --git a/tests/logictest/suites/crdb/natual_join b/tests/logictest/suites/crdb/natural_join similarity index 96% rename from tests/logictest/suites/crdb/natual_join rename to tests/logictest/suites/crdb/natural_join index ebc8399fafe13..03c6ba94c9362 100644 --- a/tests/logictest/suites/crdb/natual_join +++ b/tests/logictest/suites/crdb/natural_join @@ -17,7 +17,7 @@ statement ok INSERT INTO t2 VALUES (0, 5), (1, 3), (1, 4), (3, 2), (3, 3), (4, 6); statement query IIII -SELECT k, v, x, y FROM t1 NATURAL JOIN t2; +SELECT k, v, x, y FROM t1 NATURAL JOIN t2 ORDER BY k, v, x, y; ---- -1 -1 0 5 diff --git a/tests/logictest/suites/mode/cluster/exchange.test b/tests/logictest/suites/mode/cluster/exchange.test new file mode 100644 index 0000000000000..0e50b86282155 --- /dev/null +++ b/tests/logictest/suites/mode/cluster/exchange.test @@ -0,0 +1,57 @@ +statement query T +explain select * from numbers(1) t, numbers(2) t1 where t.number = t1.number; + +---- +Exchange(Merge) +└── HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [t.number (#0)], join filters: [] + ├── Exchange(Hash): keys: [t.number (#0)] + │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Exchange(Hash): keys: [t1.number (#1)] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select * from numbers(1) t, numbers(2) t1, numbers(3) t2 where t.number = t1.number and t.number = t2.number; + +---- +Exchange(Merge) +└── HashJoin: INNER, build keys: [t2.number (#2)], probe keys: [t.number (#0)], join filters: [] + ├── HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [t.number (#0)], join filters: [] + │ ├── Exchange(Hash): keys: [t.number (#0)] + │ │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + │ └── Exchange(Hash): keys: [t1.number (#1)] + │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Exchange(Hash): keys: [t2.number (#2)] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select * from (select number as a, number+1 as b from numbers(1)) t, numbers(2) t1, numbers(3) t2 where a = t1.number and b = t2.number; + +---- +Exchange(Merge) +└── HashJoin: INNER, build keys: [t2.number (#4)], probe keys: [t.b (#1)], join filters: [] + ├── Exchange(Hash): keys: [t.b (#1)] + │ └── HashJoin: INNER, build keys: [t1.number (#3)], probe keys: [t.a (#0)], join filters: [] + │ ├── Exchange(Hash): keys: [t.a (#0)] + │ │ └── EvalScalar: [+(numbers.number (#0), 1)] + │ │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + │ └── Exchange(Hash): keys: [t1.number (#3)] + │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Exchange(Hash): keys: [t2.number (#4)] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select * from (select sum(number) as number from numbers(1) group by number) t, numbers(2) t1 where t.number = t1.number; + +---- +Exchange(Merge) +└── HashJoin: INNER, build keys: [t1.number (#4)], probe keys: [t.number (#1)], join filters: [] + ├── Exchange(Hash): keys: [t.number (#1)] + │ └── Project: [number (#1)] + │ └── EvalScalar: [sum(number) (#3)] + │ └── Aggregate(Final): group items: [numbers.number (#0)], aggregate functions: [sum(number)] + │ └── Aggregate(Partial): group items: [numbers.number (#0)], aggregate functions: [sum(number)] + │ └── Exchange(Hash): keys: [numbers.number (#0)] + │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Exchange(Hash): keys: [t1.number (#4)] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + diff --git a/tests/logictest/suites/mode/standalone/04_0000_explain b/tests/logictest/suites/mode/standalone/04_0000_explain deleted file mode 100644 index 452608a2905ad..0000000000000 --- a/tests/logictest/suites/mode/standalone/04_0000_explain +++ /dev/null @@ -1,19 +0,0 @@ -statement ok -set enable_planner_v2 = 0; - -statement ok -set max_threads = 16; - -statement query T -explain select sum(number+1)+2 as sumx from numbers_mt(80000) where (number+1)=4 limit 1; - ----- -Limit: 1 - Projection: (sum((number + 1)) + 2) as sumx:UInt64 - Expression: (sum((number + 1)) + 2):UInt64 (Before Projection) - AggregatorFinal: groupBy=[[]], aggr=[[sum((number + 1))]] - AggregatorPartial: groupBy=[[]], aggr=[[sum((number + 1))]] - Expression: (number + 1):UInt64 (Before GroupBy) - Filter: ((number + 1) = 4) - ReadDataSource: scan schema: [number:UInt64], statistics: [read_rows: 80000, read_bytes: 640000, partitions_scanned: 9, partitions_total: 9], push_downs: [projections: [0], filters: [((number + 1) = 4)]] - diff --git a/tests/logictest/suites/mode/standalone/04_0002_explain_v2 b/tests/logictest/suites/mode/standalone/explain/explain.test similarity index 94% rename from tests/logictest/suites/mode/standalone/04_0002_explain_v2 rename to tests/logictest/suites/mode/standalone/explain/explain.test index 1a24f833a998e..cb620ac2ad285 100644 --- a/tests/logictest/suites/mode/standalone/04_0002_explain_v2 +++ b/tests/logictest/suites/mode/standalone/explain/explain.test @@ -1,22 +1,15 @@ --- TODO(need fix) - -onlyif mysql statement ok drop table if exists t1 all; -onlyif mysql statement ok drop table if exists t2 all; -onlyif mysql statement ok create table t1(a int, b int); -onlyif mysql statement ok create table t2(a int, b int); -onlyif mysql statement query T explain select t1.a from t1 where a > 0; @@ -25,7 +18,6 @@ Project: [a (#0)] └── Filter: [t1.a (#0) > 0] └── Scan: default.default.t1, filters: [t1.a (#0) > 0], Sort: [none], limit: [none] -onlyif mysql statement query T explain select * from t1, t2 where (t1.a = t2.a and t1.a > 3) or (t1.a = t2.a and t2.a > 5 and t1.a > 1); @@ -35,7 +27,6 @@ Filter: [(t1.a (#0) > 3) OR ((t2.a (#2) > 5) AND (t1.a (#0) > 1))] ├── Scan: default.default.t1, filters: [], Sort: [none], limit: [none] └── Scan: default.default.t2, filters: [], Sort: [none], limit: [none] -onlyif mysql statement query T explain select * from t1, t2 where (t1.a = t2.a and t1.a > 3) or (t1.a = t2.a); @@ -44,7 +35,6 @@ HashJoin: INNER, build keys: [t2.a (#2)], probe keys: [t1.a (#0)], join filters: ├── Scan: default.default.t1, filters: [], Sort: [none], limit: [none] └── Scan: default.default.t2, filters: [], Sort: [none], limit: [none] -onlyif mysql statement query T explain raw select * from t1, t2 where (t1.a = t2.a and t1.a > 3) or (t1.a = t2.a); @@ -56,7 +46,6 @@ Project: [a (#0),b (#1),a (#2),b (#3)] ├── LogicalGet: default.default.t1, Sort: [none], limit: [none] └── LogicalGet: default.default.t2, Sort: [none], limit: [none] -onlyif mysql statement query T explain raw select * from t1 inner join t2 on t1.a = t2.a and t1.b = t2.b and t1.a > 2; @@ -67,7 +56,6 @@ Project: [a (#0),b (#1),a (#2),b (#3)] ├── LogicalGet: default.default.t1, Sort: [none], limit: [none] └── LogicalGet: default.default.t2, Sort: [none], limit: [none] -onlyif mysql statement query T explain syntax select 1, 'ab', [1,2,3], (1, 'a'); @@ -78,7 +66,6 @@ SELECT [1, 2, 3], (1, 'a') -onlyif mysql statement query T explain syntax select a, sum(b) as sum from t1 where a in (1, 2) and b > 0 and b < 100 group by a order by a; @@ -95,7 +82,6 @@ WHERE GROUP BY a ORDER BY a -onlyif mysql statement query T explain syntax select * from t1 inner join t2 on t1.a = t2.a and t1.b = t2.b and t1.a > 2; @@ -107,7 +93,6 @@ FROM AND t1.b = t2.b AND t1.a > 2 -onlyif mysql statement query T explain syntax delete from t1 where a > 100 and b > 1 and b < 10; @@ -119,7 +104,6 @@ WHERE AND b > 1 AND b < 10 -onlyif mysql statement query T explain syntax copy into t1 from 's3://mybucket/data.csv' file_format = ( type = 'CSV' field_delimiter = ',' record_delimiter = '\n' skip_header = 1) size_limit=10; @@ -135,7 +119,6 @@ FILE_FORMAT = ( ) SIZE_LIMIT = 10 -onlyif mysql statement query T explain syntax copy into 's3://mybucket/data.csv' from t1 file_format = ( type = 'CSV' field_delimiter = ',' record_delimiter = '\n' skip_header = 1) size_limit=10; @@ -151,7 +134,6 @@ FILE_FORMAT = ( ) SIZE_LIMIT = 10 -onlyif mysql statement query T explain syntax create table t3(a int64, b uint64, c float64, d string, e array(int32), f tuple(f1 bool, f2 string)) engine=fuse cluster by (a, b, c) comment='test' compression='LZ4'; @@ -172,7 +154,6 @@ CLUSTER BY ( comment = 'test', compression = 'LZ4' -onlyif mysql statement query T explain syntax create view v as select number % 3 as a from numbers(100) where number > 10; @@ -185,10 +166,9 @@ AS WHERE number > 10 -onlyif mysql statement ok drop table t1; -onlyif mysql statement ok drop table t2; + diff --git a/tests/logictest/suites/mode/standalone/explain/join.test b/tests/logictest/suites/mode/standalone/explain/join.test new file mode 100644 index 0000000000000..b1ef0cf0ec783 --- /dev/null +++ b/tests/logictest/suites/mode/standalone/explain/join.test @@ -0,0 +1,60 @@ +statement query T +explain select t.number from numbers(1) as t, numbers(1) as t1 where t.number = t1.number; + +---- +Project: [number (#0)] +└── HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [t.number (#0)], join filters: [] + ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t, numbers(1) as t1 where t.number = t1.number and t.number = t1.number + 1; + +---- +Project: [number (#0)] +└── HashJoin: INNER, build keys: [t1.number (#1), +(t1.number (#1), 1)], probe keys: [t.number (#0), t.number (#0)], join filters: [] + ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t, numbers(1) as t1 where t.number > 1 and 1 < t1.number; + +---- +Project: [number (#0)] +└── CrossJoin + ├── Filter: [t.number (#0) > 1] + │ └── Scan: default.system.numbers, filters: [t.number (#0) > 1], Sort: [none], limit: [none] + └── Filter: [1 < t1.number (#1)] + └── Scan: default.system.numbers, filters: [1 < t1.number (#1)], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t, numbers(1) as t1 where t.number + t1.number = 1; + +---- +Project: [number (#0)] +└── Filter: [+(t.number (#0), t1.number (#1)) = 1] + └── CrossJoin + ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t, numbers(1) as t1 where t.number = cast(t1.number as string); + +---- +Project: [number (#0)] +└── HashJoin: INNER, build keys: [CAST(CAST(t1.number (#1) AS VARCHAR) AS DOUBLE)], probe keys: [CAST(t.number (#0) AS DOUBLE)], join filters: [] + ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t, numbers(1) as t1, numbers(1) as t2 where t1.number = t2.number and t.number = 1; + +---- +Project: [number (#0)] +└── HashJoin: INNER, build keys: [t2.number (#2)], probe keys: [t1.number (#1)], join filters: [] + ├── CrossJoin + │ ├── Filter: [t.number (#0) = 1] + │ │ └── Scan: default.system.numbers, filters: [t.number (#0) = 1], Sort: [none], limit: [none] + │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + diff --git a/tests/logictest/suites/mode/standalone/explain/limit.test b/tests/logictest/suites/mode/standalone/explain/limit.test index 6291f5c1274d9..9a0d13471bdff 100644 --- a/tests/logictest/suites/mode/standalone/explain/limit.test +++ b/tests/logictest/suites/mode/standalone/explain/limit.test @@ -64,3 +64,4 @@ Limit: [1], Offset: [0] └── Aggregate(Final): group items: [t.number (#4)], aggregate functions: [count(t.number)] └── Aggregate(Partial): group items: [t.number (#4)], aggregate functions: [count(t.number)] └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + diff --git a/tests/logictest/suites/mode/standalone/explain/prune_column.test b/tests/logictest/suites/mode/standalone/explain/prune_column.test new file mode 100644 index 0000000000000..94eca42e1c13e --- /dev/null +++ b/tests/logictest/suites/mode/standalone/explain/prune_column.test @@ -0,0 +1,90 @@ +statement query T +explain select * from (select a from (select number as a, number + 1 as b from numbers(1))); + +---- +Project: [number (#0)] +└── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select a from (select number as a, count(*) as b from numbers(1) group by a); + +---- +Project: [number (#0)] +└── Aggregate(Final): group items: [numbers.number (#0)], aggregate functions: [] + └── Aggregate(Partial): group items: [numbers.number (#0)], aggregate functions: [] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select a from (select number as a, number b, sum(number) as c, number as d, number as e from numbers(1) group by a, b, d, e) where b > 1 order by d limit 1; + +---- +Project: [number (#0)] +└── Limit: [1], Offset: [0] + └── Sort: [number (#0) ASC], limit: [1] + └── Project: [number (#0)] + └── Filter: [numbers.b (#0) > 1] + └── Aggregate(Final): group items: [numbers.number (#0), numbers.number (#0), numbers.number (#0), numbers.number (#0)], aggregate functions: [] + └── Aggregate(Partial): group items: [numbers.number (#0), numbers.number (#0), numbers.number (#0), numbers.number (#0)], aggregate functions: [] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select * from (select t1.a from (select number + 1 as a, number + 1 as b, number + 1 as c, number + 1 as d from numbers(1)) as t1, (select number + 1 as a, number + 1 as b, number + 1 as c from numbers(1)) as t2 where t1.b = t2.b and t1.c = 1); + +---- +Project: [a (#1)] +└── HashJoin: INNER, build keys: [t2.b (#11)], probe keys: [t1.b (#2)], join filters: [] + ├── Project: [a (#1),b (#2)] + │ └── Filter: [t1.c (#3) = 1] + │ └── EvalScalar: [+(numbers.number (#0), 1), +(numbers.number (#0), 1), +(numbers.number (#0), 1)] + │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Project: [b (#11)] + └── EvalScalar: [+(numbers.number (#9), 1)] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select t1.a from (select number + 1 as a, number + 1 as b from numbers(1)) as t1 where t1.a = (select count(*) from (select t2.a, t3.a from (select number + 1 as a, number + 1 as b, number + 1 as c, number + 1 as d from numbers(1)) as t2, (select number + 1 as a, number + 1 as b, number + 1 as c from numbers(1)) as t3 where t2.b = t3.b and t2.c = 1)); + +---- +Project: [a (#1)] +└── Filter: [t1.a (#1) = scalar_subquery_21 (#21)] + └── HashJoin: SINGLE, build keys: [], probe keys: [], join filters: [] + ├── Project: [a (#1)] + │ └── EvalScalar: [+(numbers.number (#0), 1)] + │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Project: [COUNT(*) (#21)] + └── EvalScalar: [COUNT(*) (#22)] + └── Aggregate(Final): group items: [], aggregate functions: [COUNT(*)] + └── Aggregate(Partial): group items: [], aggregate functions: [COUNT(*)] + └── Project: [a (#6)] + └── HashJoin: INNER, build keys: [t3.b (#16)], probe keys: [t2.b (#7)], join filters: [] + ├── Project: [a (#6),b (#7)] + │ └── Filter: [t2.c (#8) = 1] + │ └── EvalScalar: [+(numbers.number (#5), 1), +(numbers.number (#5), 1), +(numbers.number (#5), 1)] + │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Project: [b (#16)] + └── EvalScalar: [+(numbers.number (#14), 1)] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select name from system.functions order by example; + +---- +Project: [name (#0)] +└── Sort: [example (#7) ASC], limit: [none] + └── Scan: default.system.functions, filters: [], Sort: [example (#7) ASC], limit: [none] + +statement query T +explain select t.number from numbers(10) t where exists(select * from numbers(10)); + +---- +Project: [number (#0)] +└── CrossJoin + ├── Project: [subquery (#3)] + │ └── Filter: [subquery_3 (#3)] + │ └── EvalScalar: [count(*) (#2) = 1] + │ └── Aggregate(Final): group items: [], aggregate functions: [count(*)] + │ └── Aggregate(Partial): group items: [], aggregate functions: [count(*)] + │ └── Limit: [1], Offset: [0] + │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [1] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + diff --git a/tests/logictest/suites/mode/standalone/explain/select.test b/tests/logictest/suites/mode/standalone/explain/select.test new file mode 100644 index 0000000000000..8c808b98aa4ef --- /dev/null +++ b/tests/logictest/suites/mode/standalone/explain/select.test @@ -0,0 +1,76 @@ +statement query T +explain select * from numbers(1); + +---- +Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select * from (select * from numbers(1)) as t1 where number = 1; + +---- +Filter: [t1.number (#0) = 1] +└── Scan: default.system.numbers, filters: [t1.number (#0) = 1], Sort: [none], limit: [none] + +statement query T +explain select * from (select number as a, number + 1 as b from numbers(1)) as t1 where a = 1 and b = 1; + +---- +Filter: [t1.a (#0) = 1, t1.b (#1) = 1] +└── EvalScalar: [+(numbers.number (#0), 1)] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select * from (select number as a, number + 1 as b from numbers(1)) as t1 where a = 1; + +---- +EvalScalar: [+(numbers.number (#0), 1)] +└── Filter: [t1.a (#0) = 1] + └── Scan: default.system.numbers, filters: [t1.a (#0) = 1], Sort: [none], limit: [none] + +statement query T +explain select * from numbers(1) where number = pow(1, 1 + 1); + +---- +Filter: [numbers.number (#0) = 1] +└── Scan: default.system.numbers, filters: [numbers.number (#0) = 1], Sort: [none], limit: [none] + +statement query T +explain select * from numbers(1) where TRUE and 1 = 1; + +---- +Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select * from numbers(1) where number = 0 and false; + +---- +Filter: [false] +└── Scan: default.system.numbers, filters: [false], Sort: [none], limit: [none] + +statement query T +explain select * from numbers(1) where number = 0 and null; + +---- +Filter: [false] +└── Scan: default.system.numbers, filters: [false], Sort: [none], limit: [none] + +statement query T +explain select * from numbers(1) where null; + +---- +Filter: [NULL] +└── Scan: default.system.numbers, filters: [NULL], Sort: [none], limit: [none] + +statement query T +explain select a from (select number as a, number as b from numbers(1)); + +---- +Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select a from (select number as a, number+1 as b from numbers(1)); + +---- +Project: [number (#0)] +└── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + diff --git a/tests/logictest/suites/mode/standalone/explain/subquery.test b/tests/logictest/suites/mode/standalone/explain/subquery.test new file mode 100644 index 0000000000000..842cb70dda457 --- /dev/null +++ b/tests/logictest/suites/mode/standalone/explain/subquery.test @@ -0,0 +1,151 @@ +statement query T +explain select t.number from numbers(1) as t, numbers(1) as t1 where t.number = (select count(*) from numbers(1) as t2, numbers(1) as t3 where t.number = t2.number); + +---- +Project: [number (#0)] +└── Filter: [t.number (#0) = CAST(if(is_null(scalar_subquery_4 (#4)), 0, scalar_subquery_4 (#4)) AS BIGINT UNSIGNED)] + └── HashJoin: SINGLE, build keys: [subquery_6 (#6)], probe keys: [subquery_0 (#0)], join filters: [] + ├── CrossJoin + │ ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Project: [COUNT(*) (#4),number (#6)] + └── EvalScalar: [COUNT(*) (#5)] + └── Aggregate(Final): group items: [subquery_6 (#6)], aggregate functions: [COUNT(*)] + └── Aggregate(Partial): group items: [subquery_6 (#6)], aggregate functions: [COUNT(*)] + └── HashJoin: INNER, build keys: [t2.number (#2)], probe keys: [subquery_6 (#6)], join filters: [] + ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── CrossJoin + ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t where exists (select t1.number from numbers(1) as t1 where t.number = t1.number) or t.number > 1; + +---- +Project: [number (#0)] +└── Filter: [(3 (#3)) OR (t.number (#0) > 1)] + └── HashJoin: MARK, build keys: [subquery_0 (#0)], probe keys: [subquery_2 (#2)], join filters: [] + ├── HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [subquery_2 (#2)], join filters: [] + │ ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t where exists (select * from numbers(1) where number = 0); + +---- +Project: [number (#0)] +└── CrossJoin + ├── Project: [subquery (#3)] + │ └── Filter: [subquery_3 (#3)] + │ └── EvalScalar: [count(*) (#2) = 1] + │ └── Aggregate(Final): group items: [], aggregate functions: [count(*)] + │ └── Aggregate(Partial): group items: [], aggregate functions: [count(*)] + │ └── Limit: [1], Offset: [0] + │ └── Filter: [numbers.number (#1) = 0] + │ └── Scan: default.system.numbers, filters: [numbers.number (#1) = 0], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t where number = (select * from numbers(1) where number = 0); + +---- +Project: [number (#0)] +└── Filter: [t.number (#0) = scalar_subquery_1 (#1)] + └── HashJoin: SINGLE, build keys: [], probe keys: [], join filters: [] + ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Filter: [numbers.number (#1) = 0] + └── Scan: default.system.numbers, filters: [numbers.number (#1) = 0], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number); + +---- +Project: [number (#0)] +└── HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [] + ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t where not exists (select * from numbers(1) where number = t.number); + +---- +Project: [number (#0)] +└── Filter: [not(3 (#3))] + └── HashJoin: MARK, build keys: [subquery_0 (#0)], probe keys: [subquery_2 (#2)], join filters: [] + ├── HashJoin: INNER, build keys: [numbers.number (#1)], probe keys: [subquery_2 (#2)], join filters: [] + │ ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select * from numbers(1) as t where exists (select number as a from numbers(1) where number = t.number); + +---- +Project: [number (#0)] +└── HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [] + ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number and number = 0 and t.number < 10); + +---- +Project: [number (#0)] +└── HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [] + ├── Filter: [t.number (#0) < 10] + │ └── Scan: default.system.numbers, filters: [t.number (#0) < 10], Sort: [none], limit: [none] + └── Filter: [numbers.number (#1) = 0] + └── Scan: default.system.numbers, filters: [numbers.number (#1) = 0], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number and t.number < number); + +---- +Project: [number (#0)] +└── HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [t.number (#0) < numbers.number (#1)] + ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t where exists (select number as a, number as b, number as c from numbers(1) where number = t.number); + +---- +Project: [number (#0)] +└── HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [] + ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t, numbers(1) as t1 where (select count(*) = 1 from numbers(1) where t.number = number) and t.number = t1.number; + +---- +Project: [number (#0)] +└── Filter: [CAST(if(is_null(scalar_subquery_3 (#3)), 0, scalar_subquery_3 (#3)) AS BIGINT UNSIGNED)] + └── HashJoin: SINGLE, build keys: [subquery_5 (#5)], probe keys: [subquery_0 (#0)], join filters: [] + ├── HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [t.number (#0)], join filters: [] + │ ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Project: [COUNT(*) = 1 (#3),number (#5)] + └── EvalScalar: [COUNT(*) (#4) = 1] + └── Aggregate(Final): group items: [subquery_5 (#5)], aggregate functions: [COUNT(*)] + └── Aggregate(Partial): group items: [subquery_5 (#5)], aggregate functions: [COUNT(*)] + └── HashJoin: INNER, build keys: [numbers.number (#2)], probe keys: [subquery_5 (#5)], join filters: [] + ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + +statement query T +explain select t.number from numbers(1) as t where exists(select * from numbers(1) as t1 where t.number > t1.number) and not exists(select * from numbers(1) as t1 where t.number < t1.number); + +---- +Project: [number (#0)] +└── Filter: [not(4 (#4))] + └── HashJoin: MARK, build keys: [subquery_0 (#0)], probe keys: [subquery_3 (#3)], join filters: [] + ├── Filter: [subquery_3 (#3) < t1.number (#2)] + │ └── CrossJoin + │ ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + │ └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── HashJoin: SEMI, build keys: [], probe keys: [], join filters: [t.number (#0) > t1.number (#1)] + ├── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] + └── Scan: default.system.numbers, filters: [], Sort: [none], limit: [none] +