|
| 1 | +// Copyright 2022 Datafuse Labs. |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +use common_exception::ErrorCode; |
| 16 | +use common_exception::Result; |
| 17 | + |
| 18 | +use crate::sql::find_smallest_column; |
| 19 | +use crate::sql::optimizer::ColumnSet; |
| 20 | +use crate::sql::optimizer::SExpr; |
| 21 | +use crate::sql::plans::Aggregate; |
| 22 | +use crate::sql::plans::EvalScalar; |
| 23 | +use crate::sql::plans::LogicalGet; |
| 24 | +use crate::sql::plans::Project; |
| 25 | +use crate::sql::plans::RelOperator; |
| 26 | +use crate::sql::MetadataRef; |
| 27 | +use crate::sql::ScalarExpr; |
| 28 | + |
| 29 | +pub struct ColumnPruner { |
| 30 | + metadata: MetadataRef, |
| 31 | +} |
| 32 | + |
| 33 | +impl ColumnPruner { |
| 34 | + pub fn new(metadata: MetadataRef) -> Self { |
| 35 | + Self { metadata } |
| 36 | + } |
| 37 | + |
| 38 | + pub fn prune_columns(&self, expr: &SExpr, require_columns: ColumnSet) -> Result<SExpr> { |
| 39 | + match expr.plan() { |
| 40 | + // For project and aggregate, collect required columns for its child |
| 41 | + RelOperator::Project(p) => Ok(SExpr::create_unary( |
| 42 | + RelOperator::Project(p.clone()), |
| 43 | + self.keep_required_columns(expr.child(0)?, p.columns.clone())?, |
| 44 | + )), |
| 45 | + RelOperator::Aggregate(p) => { |
| 46 | + let mut used = p.group_items.iter().fold(ColumnSet::new(), |acc, v| { |
| 47 | + acc.union(&v.scalar.used_columns()).cloned().collect() |
| 48 | + }); |
| 49 | + used = p.aggregate_functions.iter().fold(used, |acc, v| { |
| 50 | + acc.union(&v.scalar.used_columns()).cloned().collect() |
| 51 | + }); |
| 52 | + Ok(SExpr::create_unary( |
| 53 | + RelOperator::Aggregate(p.clone()), |
| 54 | + self.keep_required_columns(expr.child(0)?, used)?, |
| 55 | + )) |
| 56 | + } |
| 57 | + // For the other plan nodes, keep searching for Project node with required columns |
| 58 | + p => { |
| 59 | + let children = expr |
| 60 | + .children() |
| 61 | + .iter() |
| 62 | + .map(|expr| self.prune_columns(expr, require_columns.clone())) |
| 63 | + .collect::<Result<Vec<_>>>()?; |
| 64 | + Ok(SExpr::create(p.clone(), children, None)) |
| 65 | + } |
| 66 | + } |
| 67 | + } |
| 68 | + |
| 69 | + /// Keep columns referenced by parent plan node. |
| 70 | + /// `required` contains columns referenced by its ancestors. When a node has multiple children, |
| 71 | + /// the required columns for each child could be different and we may include columns not needed |
| 72 | + /// by a specific child. Columns should be skipped once we found it not exist in the subtree as we |
| 73 | + /// visit a plan node. |
| 74 | + fn keep_required_columns(&self, expr: &SExpr, mut required: ColumnSet) -> Result<SExpr> { |
| 75 | + match expr.plan() { |
| 76 | + RelOperator::LogicalGet(p) => { |
| 77 | + let mut used: ColumnSet = required.intersection(&p.columns).cloned().collect(); |
| 78 | + if used.is_empty() { |
| 79 | + let columns = self.metadata.read().columns_by_table_index(p.table_index); |
| 80 | + let smallest_index = find_smallest_column(&columns); |
| 81 | + used.insert(smallest_index); |
| 82 | + } |
| 83 | + |
| 84 | + Ok(SExpr::create_leaf(RelOperator::LogicalGet(LogicalGet { |
| 85 | + table_index: p.table_index, |
| 86 | + columns: used, |
| 87 | + push_down_predicates: p.push_down_predicates.clone(), |
| 88 | + }))) |
| 89 | + } |
| 90 | + RelOperator::LogicalInnerJoin(p) => { |
| 91 | + // Include columns referenced in left conditions |
| 92 | + let left = p.left_conditions.iter().fold(required.clone(), |acc, v| { |
| 93 | + acc.union(&v.used_columns()).cloned().collect() |
| 94 | + }); |
| 95 | + // Include columns referenced in left conditions |
| 96 | + let right = p.right_conditions.iter().fold(required.clone(), |acc, v| { |
| 97 | + acc.union(&v.used_columns()).cloned().collect() |
| 98 | + }); |
| 99 | + |
| 100 | + let others = p.other_conditions.iter().fold(required, |acc, v| { |
| 101 | + acc.union(&v.used_columns()).cloned().collect() |
| 102 | + }); |
| 103 | + |
| 104 | + Ok(SExpr::create_binary( |
| 105 | + RelOperator::LogicalInnerJoin(p.clone()), |
| 106 | + self.keep_required_columns( |
| 107 | + expr.child(0)?, |
| 108 | + left.union(&others).cloned().collect(), |
| 109 | + )?, |
| 110 | + self.keep_required_columns( |
| 111 | + expr.child(1)?, |
| 112 | + right.union(&others).cloned().collect(), |
| 113 | + )?, |
| 114 | + )) |
| 115 | + } |
| 116 | + RelOperator::Project(p) => { |
| 117 | + let used: ColumnSet = p.columns.intersection(&required).cloned().collect(); |
| 118 | + Ok(SExpr::create_unary( |
| 119 | + RelOperator::Project(Project { |
| 120 | + columns: used.clone(), |
| 121 | + }), |
| 122 | + self.keep_required_columns(expr.child(0)?, used)?, |
| 123 | + )) |
| 124 | + } |
| 125 | + RelOperator::EvalScalar(p) => { |
| 126 | + let mut used = vec![]; |
| 127 | + // Only keep columns needed by parent plan. |
| 128 | + p.items.iter().for_each(|s| { |
| 129 | + if !required.contains(&s.index) { |
| 130 | + return; |
| 131 | + } |
| 132 | + used.push(s.clone()); |
| 133 | + s.scalar.used_columns().iter().for_each(|c| { |
| 134 | + required.insert(*c); |
| 135 | + }) |
| 136 | + }); |
| 137 | + Ok(SExpr::create_unary( |
| 138 | + RelOperator::EvalScalar(EvalScalar { items: used }), |
| 139 | + self.keep_required_columns(expr.child(0)?, required)?, |
| 140 | + )) |
| 141 | + } |
| 142 | + RelOperator::Filter(p) => { |
| 143 | + let used = p.predicates.iter().fold(required, |acc, v| { |
| 144 | + acc.union(&v.used_columns()).cloned().collect() |
| 145 | + }); |
| 146 | + Ok(SExpr::create_unary( |
| 147 | + RelOperator::Filter(p.clone()), |
| 148 | + self.keep_required_columns(expr.child(0)?, used)?, |
| 149 | + )) |
| 150 | + } |
| 151 | + RelOperator::Aggregate(p) => { |
| 152 | + let mut used = vec![]; |
| 153 | + for item in &p.aggregate_functions { |
| 154 | + if required.contains(&item.index) { |
| 155 | + for c in item.scalar.used_columns() { |
| 156 | + required.insert(c); |
| 157 | + } |
| 158 | + used.push(item.clone()); |
| 159 | + } |
| 160 | + } |
| 161 | + p.group_items.iter().for_each(|i| { |
| 162 | + // If the group item comes from a complex expression, we only include the final |
| 163 | + // column index here. The used columns will be included in its EvalScalar child. |
| 164 | + required.insert(i.index); |
| 165 | + }); |
| 166 | + Ok(SExpr::create_unary( |
| 167 | + RelOperator::Aggregate(Aggregate { |
| 168 | + group_items: p.group_items.clone(), |
| 169 | + aggregate_functions: used, |
| 170 | + from_distinct: p.from_distinct, |
| 171 | + mode: p.mode, |
| 172 | + }), |
| 173 | + self.keep_required_columns(expr.child(0)?, required)?, |
| 174 | + )) |
| 175 | + } |
| 176 | + RelOperator::Sort(p) => { |
| 177 | + p.items.iter().for_each(|s| { |
| 178 | + required.insert(s.index); |
| 179 | + }); |
| 180 | + Ok(SExpr::create_unary( |
| 181 | + RelOperator::Sort(p.clone()), |
| 182 | + self.keep_required_columns(expr.child(0)?, required)?, |
| 183 | + )) |
| 184 | + } |
| 185 | + RelOperator::Limit(p) => Ok(SExpr::create_unary( |
| 186 | + RelOperator::Limit(p.clone()), |
| 187 | + self.keep_required_columns(expr.child(0)?, required)?, |
| 188 | + )), |
| 189 | + |
| 190 | + _ => Err(ErrorCode::LogicalError( |
| 191 | + "Attempting to prune columns of a physical plan is not allowed", |
| 192 | + )), |
| 193 | + } |
| 194 | + } |
| 195 | +} |
0 commit comments