Skip to content

Commit c1d313f

Browse files
Merge ae6e17a into c4ac4da
2 parents c4ac4da + ae6e17a commit c1d313f

File tree

8 files changed

+269
-79
lines changed

8 files changed

+269
-79
lines changed

ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,12 @@ bool IsLookupJoinApplicableDetailed(const std::shared_ptr<NYql::TRelOptimizerNod
9090
bool IsLookupJoinApplicable(std::shared_ptr<IBaseOptimizerNode> left,
9191
std::shared_ptr<IBaseOptimizerNode> right,
9292
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
93+
const TVector<TString>& leftJoinKeys,
94+
const TVector<TString>& rightJoinKeys,
9395
TKqpProviderContext& ctx) {
9496

9597
Y_UNUSED(left);
98+
Y_UNUSED(leftJoinKeys);
9699

97100
auto rightStats = right->Stats;
98101

@@ -114,27 +117,24 @@ bool IsLookupJoinApplicable(std::shared_ptr<IBaseOptimizerNode> left,
114117
}
115118
}
116119

117-
TVector<TString> joinKeys;
118-
for( auto [leftJc, rightJc] : joinConditions ) {
119-
joinKeys.emplace_back( rightJc.AttributeName);
120-
}
121-
122-
return IsLookupJoinApplicableDetailed(std::static_pointer_cast<TRelOptimizerNode>(right), joinKeys, ctx);
120+
return IsLookupJoinApplicableDetailed(std::static_pointer_cast<TRelOptimizerNode>(right), rightJoinKeys, ctx);
123121
}
124122

125123
}
126124

127125
bool TKqpProviderContext::IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
128126
const std::shared_ptr<IBaseOptimizerNode>& right,
129127
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
128+
const TVector<TString>& leftJoinKeys,
129+
const TVector<TString>& rightJoinKeys,
130130
EJoinAlgoType joinAlgo) {
131131

132132
switch( joinAlgo ) {
133133
case EJoinAlgoType::LookupJoin:
134134
if (OptLevel==2 && left->Stats->Nrows > 10e3) {
135135
return false;
136136
}
137-
return IsLookupJoinApplicable(left, right, joinConditions, *this);
137+
return IsLookupJoinApplicable(left, right, joinConditions, leftJoinKeys, rightJoinKeys, *this);
138138

139139
case EJoinAlgoType::DictJoin:
140140
return right->Stats->Nrows < 10e5;

ydb/core/kqp/opt/logical/kqp_opt_cbo.h

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ struct TKqpProviderContext : public NYql::IProviderContext {
2626
virtual bool IsJoinApplicable(const std::shared_ptr<NYql::IBaseOptimizerNode>& left,
2727
const std::shared_ptr<NYql::IBaseOptimizerNode>& right,
2828
const std::set<std::pair<NYql::NDq::TJoinColumn, NYql::NDq::TJoinColumn>>& joinConditions,
29+
const TVector<TString>& leftJoinKeys, const TVector<TString>& rightJoinKeys,
2930
NYql::EJoinAlgoType joinAlgo) override;
3031

3132
virtual double ComputeJoinCost(const NYql::TOptimizerStatistics& leftStats, const NYql::TOptimizerStatistics& rightStats, NYql::EJoinAlgoType joinAlgo) const override;

ydb/library/yql/core/cbo/cbo_optimizer_new.cpp

+8-2
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,14 @@ TJoinOptimizerNode::TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>
6767
IBaseOptimizerNode(JoinNodeType),
6868
LeftArg(left),
6969
RightArg(right),
70-
JoinConditions(joinConditions),
70+
JoinConditions(joinConditions),
7171
JoinType(joinType),
7272
JoinAlgo(joinAlgo) {
7373
IsReorderable = (JoinType==EJoinKind::InnerJoin) && (nonReorderable==false);
74+
for (auto [l,r] : joinConditions ) {
75+
LeftJoinKeys.push_back(l.AttributeName);
76+
RightJoinKeys.push_back(r.AttributeName);
77+
}
7478
}
7579

7680
TVector<TString> TJoinOptimizerNode::Labels() {
@@ -97,7 +101,9 @@ void TJoinOptimizerNode::Print(std::stringstream& stream, int ntabs) {
97101
stream << "\t";
98102
}
99103

100-
stream << *Stats << "\n";
104+
if (Stats) {
105+
stream << *Stats << "\n";
106+
}
101107

102108
LeftArg->Print(stream, ntabs+1);
103109
RightArg->Print(stream, ntabs+1);

ydb/library/yql/core/cbo/cbo_optimizer_new.h

+15-3
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ struct IProviderContext {
9292
virtual bool IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
9393
const std::shared_ptr<IBaseOptimizerNode>& right,
9494
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
95+
const TVector<TString>& leftJoinKeys,
96+
const TVector<TString>& rightJoinKeys,
9597
EJoinAlgoType joinAlgo) = 0;
9698

9799
};
@@ -111,11 +113,15 @@ struct TDummyProviderContext : public IProviderContext {
111113
bool IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
112114
const std::shared_ptr<IBaseOptimizerNode>& right,
113115
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
116+
const TVector<TString>& leftJoinKeys,
117+
const TVector<TString>& rightJoinKeys,
114118
EJoinAlgoType joinAlgo) override {
115119

116120
Y_UNUSED(left);
117121
Y_UNUSED(right);
118122
Y_UNUSED(joinConditions);
123+
Y_UNUSED(leftJoinKeys);
124+
Y_UNUSED(rightJoinKeys);
119125
Y_UNUSED(joinAlgo);
120126

121127
return true;
@@ -137,13 +143,19 @@ struct TDummyProviderContext : public IProviderContext {
137143
struct TJoinOptimizerNode : public IBaseOptimizerNode {
138144
std::shared_ptr<IBaseOptimizerNode> LeftArg;
139145
std::shared_ptr<IBaseOptimizerNode> RightArg;
140-
std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> JoinConditions;
146+
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> JoinConditions;
147+
TVector<TString> LeftJoinKeys;
148+
TVector<TString> RightJoinKeys;
141149
EJoinKind JoinType;
142150
EJoinAlgoType JoinAlgo;
143151
bool IsReorderable;
144152

145-
TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left, const std::shared_ptr<IBaseOptimizerNode>& right,
146-
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions, const EJoinKind joinType, const EJoinAlgoType joinAlgo, bool nonReorderable=false);
153+
TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left,
154+
const std::shared_ptr<IBaseOptimizerNode>& right,
155+
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
156+
const EJoinKind joinType,
157+
const EJoinAlgoType joinAlgo,
158+
bool nonReorderable=false);
147159
virtual ~TJoinOptimizerNode() {}
148160
virtual TVector<TString> Labels();
149161
virtual void Print(std::stringstream& stream, int ntabs=0);

ydb/library/yql/core/yql_cost_function.cpp

+9-4
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,13 @@ TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStat
4343

4444
double newCard;
4545
EStatisticsType outputType;
46-
TVector<TString> joinedTableKeys;
46+
bool leftKeyColumns = false;
47+
bool rightKeyColumns = false;
48+
4749

4850
if (IsPKJoin(rightStats,rightJoinKeys)) {
4951
newCard = leftStats.Nrows;
50-
joinedTableKeys = leftStats.KeyColumns;
52+
leftKeyColumns = true;
5153
if (leftStats.Type == EStatisticsType::BaseTable){
5254
outputType = EStatisticsType::FilteredFactTable;
5355
} else {
@@ -56,7 +58,7 @@ TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStat
5658
}
5759
else if (IsPKJoin(leftStats,leftJoinKeys)) {
5860
newCard = rightStats.Nrows;
59-
joinedTableKeys = rightStats.KeyColumns;
61+
rightKeyColumns = true;
6062
if (rightStats.Type == EStatisticsType::BaseTable){
6163
outputType = EStatisticsType::FilteredFactTable;
6264
} else {
@@ -74,9 +76,11 @@ TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStat
7476
+ newCard
7577
+ leftStats.Cost + rightStats.Cost;
7678

77-
return TOptimizerStatistics(outputType, newCard, newNCols, cost, joinedTableKeys);
79+
return TOptimizerStatistics(outputType, newCard, newNCols, cost,
80+
leftKeyColumns ? leftStats.KeyColumns : ( rightKeyColumns ? rightStats.KeyColumns : TOptimizerStatistics::EmptyColumns));
7881
}
7982

83+
8084
TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats,
8185
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions, EJoinAlgoType joinAlgo, const IProviderContext& ctx) {
8286

@@ -90,3 +94,4 @@ TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStat
9094

9195
return ComputeJoinStats(leftStats, rightStats, leftJoinKeys, rightJoinKeys, joinAlgo, ctx);
9296
}
97+

ydb/library/yql/core/yql_statistics.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,5 @@ TOptimizerStatistics& TOptimizerStatistics::operator+=(const TOptimizerStatistic
1717
Cost += other.Cost;
1818
return *this;
1919
}
20+
21+
const TVector<TString>& TOptimizerStatistics::EmptyColumns = TVector<TString>();

ydb/library/yql/core/yql_statistics.h

+8-10
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,19 @@ struct TOptimizerStatistics {
2525
double Nrows = 0;
2626
int Ncols = 0;
2727
double Cost;
28-
TVector<TString> KeyColumns;
29-
30-
TString Descr;
31-
32-
TOptimizerStatistics() {}
33-
TOptimizerStatistics(double nrows, int ncols): Nrows(nrows), Ncols(ncols) {}
34-
TOptimizerStatistics(double nrows, int ncols, double cost): Nrows(nrows), Ncols(ncols), Cost(cost) {}
35-
TOptimizerStatistics(EStatisticsType type, double nrows, int ncols, double cost): Type(type), Nrows(nrows), Ncols(ncols), Cost(cost) {}
36-
TOptimizerStatistics(EStatisticsType type, double nrows, int ncols, double cost, TVector<TString> keyColumns): Type(type), Nrows(nrows), Ncols(ncols), Cost(cost), KeyColumns(keyColumns) {}
37-
TOptimizerStatistics(double nrows,int ncols, double cost, TString descr): Nrows(nrows), Ncols(ncols), Cost(cost), Descr(descr) {}
28+
const TVector<TString>& KeyColumns;
3829

30+
TOptimizerStatistics() : KeyColumns(EmptyColumns) {}
31+
TOptimizerStatistics(double nrows, int ncols): Nrows(nrows), Ncols(ncols), KeyColumns(EmptyColumns) {}
32+
TOptimizerStatistics(double nrows, int ncols, double cost): Nrows(nrows), Ncols(ncols), Cost(cost), KeyColumns(EmptyColumns) {}
33+
TOptimizerStatistics(EStatisticsType type, double nrows, int ncols, double cost): Type(type), Nrows(nrows), Ncols(ncols), Cost(cost), KeyColumns(EmptyColumns) {}
34+
TOptimizerStatistics(EStatisticsType type, double nrows, int ncols, double cost, const TVector<TString>& keyColumns): Type(type), Nrows(nrows), Ncols(ncols), Cost(cost), KeyColumns(keyColumns) {}
3935

4036
TOptimizerStatistics& operator+=(const TOptimizerStatistics& other);
4137
bool Empty() const;
4238

4339
friend std::ostream& operator<<(std::ostream& os, const TOptimizerStatistics& s);
40+
41+
static const TVector<TString>& EmptyColumns;
4442
};
4543
}

0 commit comments

Comments
 (0)