Skip to content

Commit d1f17fc

Browse files
authored
dq: add cbo cost functions (#7617)
1 parent 77fc854 commit d1f17fc

File tree

2 files changed

+78
-10
lines changed

2 files changed

+78
-10
lines changed

ydb/library/yql/providers/dq/opt/logical_optimize.cpp

+69-1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,74 @@ bool IsStreamLookup(const TCoEquiJoinTuple& joinTuple) {
3838

3939
}
4040

41+
/**
42+
* DQ Specific cost function and join applicability cost function
43+
*/
44+
struct TDqCBOProviderContext : public NYql::TBaseProviderContext {
45+
TDqCBOProviderContext(TTypeAnnotationContext& typeCtx, const TDqConfiguration::TPtr& config)
46+
: NYql::TBaseProviderContext()
47+
, Config(config)
48+
, TypesCtx(typeCtx) {}
49+
50+
virtual bool IsJoinApplicable(const std::shared_ptr<NYql::IBaseOptimizerNode>& left,
51+
const std::shared_ptr<NYql::IBaseOptimizerNode>& right,
52+
const std::set<std::pair<NYql::NDq::TJoinColumn, NYql::NDq::TJoinColumn>>& joinConditions,
53+
const TVector<TString>& leftJoinKeys, const TVector<TString>& rightJoinKeys,
54+
NYql::EJoinAlgoType joinAlgo, NYql::EJoinKind joinKind) override;
55+
56+
virtual double ComputeJoinCost(const NYql::TOptimizerStatistics& leftStats, const NYql::TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, NYql::EJoinAlgoType joinAlgo) const override;
57+
58+
TDqConfiguration::TPtr Config;
59+
TTypeAnnotationContext& TypesCtx;
60+
};
61+
62+
63+
bool TDqCBOProviderContext::IsJoinApplicable(const std::shared_ptr<NYql::IBaseOptimizerNode>& left,
64+
const std::shared_ptr<NYql::IBaseOptimizerNode>& right,
65+
const std::set<std::pair<NYql::NDq::TJoinColumn, NYql::NDq::TJoinColumn>>& joinConditions,
66+
const TVector<TString>& leftJoinKeys, const TVector<TString>& rightJoinKeys,
67+
NYql::EJoinAlgoType joinAlgo, NYql::EJoinKind joinKind) {
68+
Y_UNUSED(left);
69+
Y_UNUSED(right);
70+
Y_UNUSED(joinConditions);
71+
Y_UNUSED(leftJoinKeys);
72+
Y_UNUSED(rightJoinKeys);
73+
74+
switch(joinAlgo) {
75+
76+
case EJoinAlgoType::MapJoin:
77+
if (joinKind == EJoinKind::OuterJoin || joinKind == EJoinKind::Exclusion)
78+
return false;
79+
if (auto hashJoinMode = Config->HashJoinMode.Get().GetOrElse(EHashJoinMode::Off);
80+
hashJoinMode == EHashJoinMode::Off || hashJoinMode == EHashJoinMode::Map)
81+
return true;
82+
break;
83+
84+
case EJoinAlgoType::GraceJoin:
85+
return true;
86+
87+
default:
88+
break;
89+
}
90+
return false;
91+
}
92+
93+
94+
double TDqCBOProviderContext::ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const {
95+
Y_UNUSED(outputByteSize);
96+
97+
switch(joinAlgo) {
98+
case EJoinAlgoType::MapJoin:
99+
return 1.5 * (leftStats.Nrows + 1.8 * rightStats.Nrows + outputRows);
100+
case EJoinAlgoType::GraceJoin:
101+
return 1.5 * (leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows);
102+
default:
103+
Y_ENSURE(false, "Illegal join type encountered");
104+
return 0;
105+
}
106+
}
107+
108+
41109
class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase {
42110
public:
43111
TDqsLogicalOptProposalTransformer(TTypeAnnotationContext* typeCtx, const TDqConfiguration::TPtr& config)
@@ -206,7 +274,7 @@ class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase {
206274
};
207275

208276
std::unique_ptr<IOptimizerNew> opt;
209-
TBaseProviderContext pctx;
277+
TDqCBOProviderContext pctx(TypesCtx, Config);
210278

211279
switch (TypesCtx.CostBasedOptimizer) {
212280
case ECostBasedOptimizerType::Native:

ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json

+9-9
Original file line numberDiff line numberDiff line change
@@ -755,23 +755,23 @@
755755
"test.test[dq-blacklisted_pragmas--Results]": [],
756756
"test.test[dq-join_cbo_native_3_tables--Analyze]": [
757757
{
758-
"checksum": "94e6af2e865eab35e76cc9963452ad0d",
759-
"size": 13889,
760-
"uri": "https://{canondata_backend}/1924537/25b076e867ef14e000e544c6dba85dd8ef6e3620/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Analyze_/plan.txt"
758+
"checksum": "90555f07378f801872485e6ac96dfd73",
759+
"size": 12314,
760+
"uri": "https://{canondata_backend}/1942278/d3f67196e7e0096e289743f5dbfd5dc2f990f9e6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Analyze_/plan.txt"
761761
}
762762
],
763763
"test.test[dq-join_cbo_native_3_tables--Debug]": [
764764
{
765-
"checksum": "fd20054511c7328de8f8c6c45539b48b",
766-
"size": 5339,
767-
"uri": "https://{canondata_backend}/1936273/7a32049e7d34640d0891b0eccadb21c671bd9ed5/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched"
765+
"checksum": "91570a2f667516ba1f3f28642698441f",
766+
"size": 4802,
767+
"uri": "https://{canondata_backend}/1942278/d3f67196e7e0096e289743f5dbfd5dc2f990f9e6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched"
768768
}
769769
],
770770
"test.test[dq-join_cbo_native_3_tables--Plan]": [
771771
{
772-
"checksum": "94e6af2e865eab35e76cc9963452ad0d",
773-
"size": 13889,
774-
"uri": "https://{canondata_backend}/1924537/25b076e867ef14e000e544c6dba85dd8ef6e3620/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Plan_/plan.txt"
772+
"checksum": "90555f07378f801872485e6ac96dfd73",
773+
"size": 12314,
774+
"uri": "https://{canondata_backend}/1942278/d3f67196e7e0096e289743f5dbfd5dc2f990f9e6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Plan_/plan.txt"
775775
}
776776
],
777777
"test.test[dq-join_cbo_native_3_tables--Results]": [

0 commit comments

Comments
 (0)