Skip to content

Commit d1dd8ad

Browse files
committed
implement cost-based optimization
1 parent 8f8220a commit d1dd8ad

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+1252
-1418
lines changed

src/query/catalog/src/table.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ pub enum NavigationPoint {
224224
TimePoint(DateTime<Utc>),
225225
}
226226

227-
#[derive(Debug)]
227+
#[derive(Debug, Clone)]
228228
pub struct TableStatistics {
229229
pub num_rows: Option<u64>,
230230
pub data_size: Option<u64>,

src/query/datavalues/src/data_value.rs

+18
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
use std::cmp::Ordering;
1919
use std::fmt;
20+
use std::hash::Hash;
2021
use std::sync::Arc;
2122

2223
use common_exception::ErrorCode;
@@ -367,6 +368,23 @@ impl Ord for DataValue {
367368
}
368369
}
369370

371+
#[allow(clippy::derive_hash_xor_eq)]
372+
impl Hash for DataValue {
373+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
374+
match self {
375+
DataValue::Null => std::mem::discriminant(self).hash(state),
376+
DataValue::Boolean(v) => v.hash(state),
377+
DataValue::UInt64(v) => v.hash(state),
378+
DataValue::Int64(v) => v.hash(state),
379+
DataValue::Float64(v) => v.to_bits().hash(state),
380+
DataValue::String(v) => v.hash(state),
381+
DataValue::Array(v) => v.hash(state),
382+
DataValue::Struct(v) => v.hash(state),
383+
DataValue::Variant(v) => v.hash(state),
384+
}
385+
}
386+
}
387+
370388
// Did not use std::convert:TryFrom
371389
// Because we do not need custom type error.
372390
pub trait DFTryFrom<T>: Sized {

src/query/datavalues/src/types/data_type.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ use crate::serializations::ConstSerializer;
4646
pub const ARROW_EXTENSION_NAME: &str = "ARROW:extension:databend_name";
4747
pub const ARROW_EXTENSION_META: &str = "ARROW:extension:databend_metadata";
4848

49-
#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
49+
#[derive(Clone, Debug, Hash, serde::Deserialize, serde::Serialize)]
50+
#[allow(clippy::derive_hash_xor_eq)]
5051
#[serde(tag = "type")]
5152
#[enum_dispatch(DataType)]
5253
pub enum DataTypeImpl {

src/query/datavalues/src/types/type_array.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use crate::prelude::*;
2626
use crate::serializations::ArraySerializer;
2727
use crate::serializations::TypeSerializerImpl;
2828

29-
#[derive(Clone, serde::Deserialize, serde::Serialize)]
29+
#[derive(Clone, Hash, serde::Deserialize, serde::Serialize)]
3030
pub struct ArrayType {
3131
inner: Box<DataTypeImpl>,
3232
}

src/query/datavalues/src/types/type_boolean.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ pub use crate::prelude::*;
2424
use crate::serializations::BooleanSerializer;
2525
use crate::serializations::TypeSerializerImpl;
2626

27-
#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
27+
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
2828
pub struct BooleanType {}
2929

3030
impl BooleanType {

src/query/datavalues/src/types/type_date.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ pub fn check_date(days: i32) -> Result<()> {
4141
))
4242
}
4343

44-
#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
44+
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
4545
pub struct DateType {}
4646

4747
impl DateType {

src/query/datavalues/src/types/type_interval.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use crate::prelude::*;
2525
use crate::serializations::DateSerializer;
2626
use crate::serializations::TypeSerializerImpl;
2727

28-
#[derive(Clone, serde::Deserialize, serde::Serialize)]
28+
#[derive(Clone, Hash, serde::Deserialize, serde::Serialize)]
2929
pub struct IntervalType {
3030
kind: IntervalKind,
3131
}

src/query/datavalues/src/types/type_null.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use crate::prelude::*;
2222
use crate::serializations::NullSerializer;
2323
use crate::serializations::TypeSerializerImpl;
2424

25-
#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
25+
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
2626
pub struct NullType {}
2727

2828
impl NullType {

src/query/datavalues/src/types/type_nullable.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use crate::prelude::*;
2828
use crate::serializations::NullableSerializer;
2929
use crate::serializations::TypeSerializerImpl;
3030

31-
#[derive(Clone, serde::Deserialize, serde::Serialize)]
31+
#[derive(Clone, Hash, serde::Deserialize, serde::Serialize)]
3232
pub struct NullableType {
3333
inner: Box<DataTypeImpl>,
3434
}

src/query/datavalues/src/types/type_primitive.rs

+6
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,12 @@ macro_rules! impl_numeric {
144144
write!(f, "{}", self.name())
145145
}
146146
}
147+
148+
impl std::hash::Hash for PrimitiveDataType<$ty> {
149+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
150+
self.data_type_id().hash(state);
151+
}
152+
}
147153
};
148154
}
149155
//

src/query/datavalues/src/types/type_string.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use crate::prelude::*;
2424
use crate::serializations::StringSerializer;
2525
use crate::serializations::TypeSerializerImpl;
2626

27-
#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
27+
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
2828
pub struct StringType {}
2929

3030
impl StringType {

src/query/datavalues/src/types/type_struct.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use crate::prelude::*;
2727
use crate::serializations::StructSerializer;
2828
use crate::serializations::TypeSerializerImpl;
2929

30-
#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
30+
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
3131
pub struct StructType {
3232
names: Option<Vec<String>>,
3333
types: Vec<DataTypeImpl>,

src/query/datavalues/src/types/type_timestamp.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ pub fn check_timestamp(micros: i64) -> Result<()> {
4747
}
4848

4949
/// Timestamp type only stores UTC time in microseconds
50-
#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
50+
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
5151
pub struct TimestampType {
5252
/// Typically are used - 0 (seconds) 3 (milliseconds), 6 (microseconds)
5353
precision: usize,

src/query/datavalues/src/types/type_variant.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use crate::prelude::*;
2525
use crate::serializations::TypeSerializerImpl;
2626
use crate::serializations::VariantSerializer;
2727

28-
#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
28+
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
2929
pub struct VariantType {}
3030

3131
impl VariantType {

src/query/datavalues/src/types/type_variant_array.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use crate::prelude::*;
2525
use crate::serializations::TypeSerializerImpl;
2626
use crate::serializations::VariantSerializer;
2727

28-
#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
28+
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
2929
pub struct VariantArrayType {}
3030

3131
impl VariantArrayType {

src/query/datavalues/src/types/type_variant_object.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use crate::prelude::*;
2525
use crate::serializations::TypeSerializerImpl;
2626
use crate::serializations::VariantSerializer;
2727

28-
#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
28+
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
2929
pub struct VariantObjectType {}
3030

3131
impl VariantObjectType {

src/query/datavalues/src/variant_value.rs

+10
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use core::str::FromStr;
1616
use std::cmp::Ordering;
1717
use std::fmt::Display;
1818
use std::fmt::Formatter;
19+
use std::hash::Hash;
1920
use std::ops::Deref;
2021

2122
use common_exception::ErrorCode;
@@ -219,6 +220,15 @@ impl PartialOrd for VariantValue {
219220
}
220221
}
221222

223+
#[allow(clippy::derive_hash_xor_eq)]
224+
impl Hash for VariantValue {
225+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
226+
let v = self.as_ref().to_string();
227+
let u = v.as_bytes();
228+
Hash::hash(&u, state);
229+
}
230+
}
231+
222232
impl Display for VariantValue {
223233
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
224234
write!(f, "{}", self.as_ref())

src/query/functions/src/scalars/hashes/hash_base.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -227,13 +227,13 @@ impl DFHash for DataValue {
227227
}
228228
DataValue::Array(vals) => {
229229
for v in vals {
230-
v.hash(state);
230+
DFHash::hash(v, state);
231231
Hash::hash(&',', state);
232232
}
233233
}
234234
DataValue::Struct(vals) => {
235235
for v in vals {
236-
v.hash(state);
236+
DFHash::hash(v, state);
237237
Hash::hash(&',', state);
238238
}
239239
}

src/query/service/src/interpreters/interpreter_copy_v2.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ impl CopyInterpreterV2 {
186186
let select_interpreter = SelectInterpreterV2::try_create(
187187
self.ctx.clone(),
188188
*(bind_context.clone()),
189-
s_expr.clone(),
189+
*s_expr.clone(),
190190
metadata.clone(),
191191
)?;
192192

src/query/service/src/interpreters/interpreter_explain_v2.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ impl Interpreter for ExplainInterpreterV2 {
5656
Plan::Query {
5757
s_expr, metadata, ..
5858
} => {
59-
self.explain_pipeline(s_expr.clone(), metadata.clone())
59+
self.explain_pipeline(*s_expr.clone(), metadata.clone())
6060
.await?
6161
}
6262
_ => {
@@ -67,7 +67,7 @@ impl Interpreter for ExplainInterpreterV2 {
6767
Plan::Query {
6868
s_expr, metadata, ..
6969
} => {
70-
self.explain_fragments(s_expr.clone(), metadata.clone())
70+
self.explain_fragments(*s_expr.clone(), metadata.clone())
7171
.await?
7272
}
7373
_ => {

src/query/service/src/interpreters/interpreter_factory_v2.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ impl InterpreterFactoryV2 {
6565
} => Ok(Arc::new(SelectInterpreterV2::try_create(
6666
ctx,
6767
*bind_context.clone(),
68-
s_expr.clone(),
68+
*s_expr.clone(),
6969
metadata.clone(),
7070
)?)),
7171
Plan::Explain { kind, plan } => Ok(Arc::new(ExplainInterpreterV2::try_create(

src/query/service/src/interpreters/interpreter_insert_v2.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ impl InsertInterpreterV2 {
109109
} => SelectInterpreterV2::try_create(
110110
self.ctx.clone(),
111111
*bind_context.clone(),
112-
s_expr.clone(),
112+
*s_expr.clone(),
113113
metadata.clone(),
114114
),
115115
_ => unreachable!(),

src/query/service/src/sql/optimizer/cascades/explore_rules.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use crate::sql::optimizer::RuleID;
1516
use crate::sql::optimizer::RuleSet;
1617

1718
pub fn get_explore_rule_set() -> RuleSet {
18-
RuleSet::create_with_ids(vec![]).unwrap()
19+
RuleSet::create_with_ids(vec![RuleID::CommuteJoin]).unwrap()
1920
}
2021

2122
#[cfg(test)]

src/query/service/src/sql/optimizer/cascades/implement_rules.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use crate::sql::optimizer::RuleID;
1616
use crate::sql::optimizer::RuleSet;
1717

1818
pub fn get_implement_rule_set() -> RuleSet {
19-
RuleSet::create_with_ids(vec![RuleID::ImplementGet]).unwrap()
19+
RuleSet::create_with_ids(vec![RuleID::ImplementGet, RuleID::ImplementHashJoin]).unwrap()
2020
}
2121

2222
#[cfg(test)]

0 commit comments

Comments
 (0)