Skip to content

Commit d9ff14c

Browse files
authored
[CBO] Join order hints added (#8106)
1 parent d468e6d commit d9ff14c

19 files changed

+323
-107
lines changed

ydb/core/kqp/opt/kqp_opt.h

+12
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ struct TKqpOptimizeContext : public TSimpleRefCount<TKqpOptimizeContext> {
3333
std::shared_ptr<NJson::TJsonValue> OverrideStatistics{};
3434
std::shared_ptr<NYql::TCardinalityHints> CardinalityHints{};
3535
std::shared_ptr<NYql::TJoinAlgoHints> JoinAlgoHints{};
36+
std::shared_ptr<NYql::TJoinOrderHints> JoinOrderHints{};
3637

3738
std::shared_ptr<NJson::TJsonValue> GetOverrideStatistics() {
3839
if (Config->OptOverrideStatistics.Get()) {
@@ -70,6 +71,17 @@ struct TKqpOptimizeContext : public TSimpleRefCount<TKqpOptimizeContext> {
7071
}
7172
}
7273

74+
NYql::TJoinOrderHints GetJoinOrderHints() {
75+
if (Config->OptJoinOrderHints.Get()) {
76+
if (!JoinOrderHints) {
77+
JoinOrderHints = std::make_shared<NYql::TJoinOrderHints>(*Config->OptJoinOrderHints.Get());
78+
}
79+
return *JoinOrderHints;
80+
} else {
81+
return NYql::TJoinOrderHints();
82+
}
83+
}
84+
7385
bool IsDataQuery() const {
7486
return QueryCtx->Type == NYql::EKikimrQueryType::Dml;
7587
}

ydb/core/kqp/opt/logical/kqp_opt_log.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,12 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase {
155155
rels.emplace_back(std::make_shared<TKqpRelOptimizerNode>(TString(label), stat, node));
156156
},
157157
KqpCtx.EquiJoinsCount,
158-
KqpCtx.GetCardinalityHints(),
159-
KqpCtx.GetJoinAlgoHints());
158+
TOptimizerHints{
159+
.CardinalityHints = KqpCtx.GetCardinalityHints(),
160+
.JoinAlgoHints = KqpCtx.GetJoinAlgoHints(),
161+
.JoinOrderHints = KqpCtx.GetJoinOrderHints()
162+
}
163+
);
160164
DumpAppliedRule("OptimizeEquiJoinWithCosts", node.Ptr(), output.Ptr(), ctx);
161165
return output;
162166
}

ydb/core/kqp/provider/yql_kikimr_settings.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ TKikimrConfiguration::TKikimrConfiguration() {
8484
REGISTER_SETTING(*this, OptOverrideStatistics);
8585
REGISTER_SETTING(*this, OptCardinalityHints);
8686
REGISTER_SETTING(*this, OptJoinAlgoHints);
87+
REGISTER_SETTING(*this, OptJoinOrderHints);
8788
REGISTER_SETTING(*this, OverridePlanner);
8889
REGISTER_SETTING(*this, UseGraceJoinCoreForMap);
8990

ydb/core/kqp/provider/yql_kikimr_settings.h

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ struct TKikimrSettings {
5757
NCommon::TConfSetting<TString, false> OptOverrideStatistics;
5858
NCommon::TConfSetting<TString, false> OptCardinalityHints;
5959
NCommon::TConfSetting<TString, false> OptJoinAlgoHints;
60+
NCommon::TConfSetting<TString, false> OptJoinOrderHints;
6061

6162
/* Disable optimizer rules */
6263
NCommon::TConfSetting<bool, false> OptDisableTopSort;

ydb/core/kqp/ut/common/kqp_ut_common.cpp

+28-5
Original file line numberDiff line numberDiff line change
@@ -1402,7 +1402,7 @@ bool JoinOrderAndAlgosMatch(const TString& optimized, const TString& reference){
14021402
}
14031403

14041404
/* Temporary solution to canonize tests */
1405-
NJson::TJsonValue CanonizeJoinOrderImpl(const NJson::TJsonValue& opt) {
1405+
NJson::TJsonValue GetDetailedJoinOrderImpl(const NJson::TJsonValue& opt) {
14061406
NJson::TJsonValue res;
14071407

14081408
auto op = opt.GetMapSafe().at("Operators").GetArraySafe()[0];
@@ -1416,18 +1416,41 @@ NJson::TJsonValue CanonizeJoinOrderImpl(const NJson::TJsonValue& opt) {
14161416

14171417
auto subplans = opt.GetMapSafe().at("Plans").GetArraySafe();
14181418
for (size_t i = 0; i< subplans.size(); ++i) {
1419-
res["args"].AppendValue(CanonizeJoinOrderImpl(subplans[i]));
1419+
res["args"].AppendValue(GetDetailedJoinOrderImpl(subplans[i]));
14201420
}
14211421
return res;
14221422
}
14231423

1424-
/* Temporary solution to canonize tests */
1425-
NJson::TJsonValue CanonizeJoinOrder(const TString& deserializedPlan) {
1424+
NJson::TJsonValue GetDetailedJoinOrder(const TString& deserializedPlan) {
1425+
NJson::TJsonValue optRoot;
1426+
NJson::ReadJsonTree(deserializedPlan, &optRoot, true);
1427+
optRoot = SimplifyPlan(optRoot.GetMapSafe().at("SimplifiedPlan"));
1428+
return GetDetailedJoinOrderImpl(SimplifyPlan(optRoot));
1429+
}
1430+
1431+
NJson::TJsonValue GetJoinOrderImpl(const NJson::TJsonValue& opt) {
1432+
if (!opt.GetMapSafe().contains("Plans")) {
1433+
auto op = opt.GetMapSafe().at("Operators").GetArraySafe()[0];
1434+
return op.GetMapSafe().at("Table").GetStringSafe();
1435+
}
1436+
1437+
NJson::TJsonValue res;
1438+
1439+
auto subplans = opt.GetMapSafe().at("Plans").GetArraySafe();
1440+
for (size_t i = 0; i < subplans.size(); ++i) {
1441+
res.AppendValue(GetJoinOrderImpl(subplans[i]));
1442+
}
1443+
1444+
return res;
1445+
}
1446+
1447+
NJson::TJsonValue GetJoinOrder(const TString& deserializedPlan) {
14261448
NJson::TJsonValue optRoot;
14271449
NJson::ReadJsonTree(deserializedPlan, &optRoot, true);
14281450
optRoot = SimplifyPlan(optRoot.GetMapSafe().at("SimplifiedPlan"));
1429-
return CanonizeJoinOrderImpl(SimplifyPlan(optRoot));
1451+
return GetJoinOrderImpl(SimplifyPlan(optRoot));
14301452
}
14311453

1454+
14321455
} // namspace NKqp
14331456
} // namespace NKikimr

ydb/core/kqp/ut/common/kqp_ut_common.h

+5-2
Original file line numberDiff line numberDiff line change
@@ -364,8 +364,11 @@ void WaitForZeroSessions(const NKqp::TKqpCounters& counters);
364364

365365
bool JoinOrderAndAlgosMatch(const TString& optimized, const TString& reference);
366366

367-
/* Temporary solution to canonize tests */
368-
NJson::TJsonValue CanonizeJoinOrder(const TString& deserializedPlan);
367+
/* Gets join order with details as: join algo, join type and scan type. */
368+
NJson::TJsonValue GetDetailedJoinOrder(const TString& deserializedPlan);
369+
370+
/* Gets tables join order without details : only tables. */
371+
NJson::TJsonValue GetJoinOrder(const TString& deserializedPlan);
369372

370373
} // namespace NKqp
371374
} // namespace NKikimr
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
PRAGMA TablePathPrefix='/Root';
2+
3+
PRAGMA ydb.OptJoinOrderHints='[ ["R", "S"], ["T", "U"] ]';
4+
PRAGMA ydb.OptCardinalityHints =
5+
'[
6+
{"labels":["R"], "op":"#", "value":10e8},
7+
{"labels":["T"], "op":"#", "value":1},
8+
{"labels":["R", "T"], "op":"#", "value":1},
9+
{"labels":["R", "S"], "op":"#", "value":10e8},
10+
{"labels":["T", "U"], "op":"#", "value":10e8},
11+
{"labels":["V"], "op":"#", "value":1}
12+
]';
13+
14+
SELECT * FROM
15+
R INNER JOIN S on R.id = S.id
16+
INNER JOIN T on R.id = T.id
17+
INNER JOIN U on T.id = U.id
18+
INNER JOIN V on U.id = V.id;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
PRAGMA TablePathPrefix='/Root';
2+
3+
PRAGMA ydb.OptCardinalityHints =
4+
'[
5+
{"labels":["R"], "op":"#", "value":10e8},
6+
{"labels":["T"], "op":"#", "value":1},
7+
{"labels":["S"], "op":"#", "value":10e8},
8+
{"labels":["R", "T"], "op":"#", "value":1},
9+
{"labels":["R", "S"], "op":"#", "value":10e8}
10+
]';
11+
PRAGMA ydb.OptJoinOrderHints='[ "T", ["R", "S"] ]';
12+
13+
SELECT * FROM
14+
R INNER JOIN S on R.id = S.id
15+
INNER JOIN T on R.id = T.id

ydb/core/kqp/ut/join/kqp_join_order_ut.cpp

+16-5
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,6 @@ void ExplainJoinOrderTestDataQuery(const TString& queryPath, bool useStreamLooku
168168
NJson::TJsonValue plan;
169169
NJson::ReadJsonTree(result.GetPlan(), &plan, true);
170170
Cerr << result.GetPlan() << Endl;
171-
Cerr << CanonizeJoinOrder(result.GetPlan()) << Endl;
172171
}
173172
}
174173

@@ -194,7 +193,7 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) {
194193
TChainTester(65).Test();
195194
}
196195

197-
void ExecuteJoinOrderTestDataQueryWithStats(const TString& queryPath, const TString& statsPath, bool useStreamLookupJoin, bool useColumnStore) {
196+
TString ExecuteJoinOrderTestDataQueryWithStats(const TString& queryPath, const TString& statsPath, bool useStreamLookupJoin, bool useColumnStore) {
198197
auto kikimr = GetKikimrWithJoinSettings(useStreamLookupJoin, GetStatic(statsPath));
199198
auto db = kikimr.GetTableClient();
200199
auto session = db.CreateSession().GetValueSync().GetSession();
@@ -208,7 +207,9 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) {
208207
auto execRes = db.StreamExecuteScanQuery(query, TStreamExecScanQuerySettings().Explain(true)).ExtractValueSync();
209208
execRes.GetIssues().PrintTo(Cerr);
210209
UNIT_ASSERT_VALUES_EQUAL(execRes.GetStatus(), EStatus::SUCCESS);
211-
Cerr << CollectStreamResult(execRes).PlanJson;
210+
auto plan = CollectStreamResult(execRes).PlanJson;
211+
Cerr << plan.GetRef();
212+
return plan.GetRef();
212213
}
213214
}
214215

@@ -256,7 +257,7 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) {
256257
);
257258
}
258259

259-
Y_UNIT_TEST_XOR_OR_BOTH_FALSE(FiveWayJoinWithPreds, StreamLookupJoin, ColumnStore) {
260+
Y_UNIT_TEST_XOR_OR_BOTH_FALSE(FiveWayJoinWithPreds, StreamLookupJoin, ColumnStore) {
260261
ExecuteJoinOrderTestDataQueryWithStats(
261262
"queries/five_way_join_with_preds.sql", "stats/basic.json", StreamLookupJoin, ColumnStore
262263
);
@@ -362,6 +363,16 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) {
362363
ExecuteJoinOrderTestDataQueryWithStats("queries/tpcds96.sql", "stats/tpcds1000s.json", StreamLookupJoin, ColumnStore);
363364
}
364365

366+
Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TestJoinOrderHintsSimple, StreamLookupJoin, ColumnStore) {
367+
auto plan = ExecuteJoinOrderTestDataQueryWithStats("queries/join_order_hints_simple.sql", "stats/basic.json", StreamLookupJoin, ColumnStore);
368+
UNIT_ASSERT_VALUES_EQUAL(GetJoinOrder(plan).GetStringRobust(), R"(["T",["R","S"]])") ;
369+
}
370+
371+
Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TestJoinOrderHintsComplex, StreamLookupJoin, ColumnStore) {
372+
auto plan = ExecuteJoinOrderTestDataQueryWithStats("queries/join_order_hints_complex.sql", "stats/basic.json", StreamLookupJoin, ColumnStore);
373+
UNIT_ASSERT_VALUES_EQUAL(GetJoinOrder(plan).GetStringRobust(), R"([[["R","S"],["T","U"]],"V"])") ;
374+
}
375+
365376
void JoinOrderTestWithOverridenStats(const TString& queryPath, const TString& statsPath, TString correctJoinOrderPath, bool useStreamLookupJoin, bool useColumnStore
366377
) {
367378
auto kikimr = GetKikimrWithJoinSettings(useStreamLookupJoin, GetStatic(statsPath));
@@ -386,7 +397,7 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) {
386397
correctJoinOrderPath = correctJoinOrderPath.substr(0, correctJoinOrderPath.find(".json")) + "_column_store.json";
387398
}
388399

389-
auto currentJoinOrder = CanonizeJoinOrder(result.GetPlan());
400+
auto currentJoinOrder = GetDetailedJoinOrder(result.GetPlan());
390401
Cerr << currentJoinOrder << Endl;
391402
/* to canonize the tests use --test-param CANONIZE_JOIN_ORDER_TESTS=TRUE */
392403
TString canonize = GetTestParam("CANONIZE_JOIN_ORDER_TESTS"); canonize.to_lower();

ydb/library/yql/core/cbo/cbo_optimizer_new.cpp

+30-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
#include <util/generic/hash.h>
77
#include <util/generic/hash_set.h>
88
#include <util/string/cast.h>
9-
10-
#include <library/cpp/disjoint_sets/disjoint_sets.h>
9+
#include <util/string/printf.h>
1110

1211
const TString& ToString(NYql::EJoinKind);
1312
const TString& ToString(NYql::EJoinAlgoType);
@@ -307,6 +306,35 @@ TCardinalityHints::TCardinalityHints(const TString& json) {
307306
}
308307
}
309308

309+
std::shared_ptr<IBaseOptimizerNode> MakeJoinTreeFromJson(const NJson::TJsonValue& jsonTree) {
310+
if (jsonTree.IsArray()) {
311+
auto children = jsonTree.GetArraySafe();
312+
Y_ENSURE(children.size() == 2, Sprintf("Expected 2 inputs for JoinOrder hints, got: %ld", children.size()));
313+
314+
auto joinNode = TJoinOptimizerNode(
315+
MakeJoinTreeFromJson(children[0]),
316+
MakeJoinTreeFromJson(children[1]),
317+
{},
318+
EJoinKind::Cross, // just a stub
319+
EJoinAlgoType::Undefined,
320+
true
321+
);
322+
return std::make_shared<TJoinOptimizerNode>(std::move(joinNode));
323+
}
324+
325+
Y_ENSURE(
326+
jsonTree.IsString(),
327+
Sprintf("A relation must be a string for JoinOrder hints! Got %s, expected a string.", jsonTree.GetStringRobust().c_str())
328+
);
329+
return std::make_shared<TRelOptimizerNode>(jsonTree.GetStringSafe(), nullptr);
330+
}
331+
332+
TJoinOrderHints::TJoinOrderHints(const TString& json) {
333+
NJson::TJsonValue jsonTree;
334+
NJson::ReadJsonTree(json, &jsonTree, true);
335+
HintsTree = MakeJoinTreeFromJson(jsonTree);
336+
}
337+
310338
TJoinAlgoHints::TJoinAlgoHints(const TString& json) {
311339
auto jsonValue = NJson::TJsonValue();
312340
NJson::ReadJsonTree(json, &jsonValue, true);

ydb/library/yql/core/cbo/cbo_optimizer_new.h

+15-2
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,19 @@ struct TJoinAlgoHints {
108108
TJoinAlgoHints(const TString& json);
109109
};
110110

111+
struct TJoinOrderHints {
112+
std::shared_ptr<IBaseOptimizerNode> HintsTree;
113+
114+
TJoinOrderHints() {}
115+
TJoinOrderHints(const TString& json);
116+
};
117+
118+
struct TOptimizerHints {
119+
TCardinalityHints CardinalityHints;
120+
TJoinAlgoHints JoinAlgoHints;
121+
TJoinOrderHints JoinOrderHints;
122+
};
123+
111124
/**
112125
* This is a temporary structure for KQP provider
113126
* We will soon be supporting multiple providers and we will need to design
@@ -235,8 +248,8 @@ struct IOptimizerNew {
235248
virtual ~IOptimizerNew() = default;
236249
virtual std::shared_ptr<TJoinOptimizerNode> JoinSearch(
237250
const std::shared_ptr<TJoinOptimizerNode>& joinTree,
238-
TCardinalityHints hints = {},
239-
TJoinAlgoHints joinHints = {}) = 0;
251+
const TOptimizerHints& hints = {}
252+
) = 0;
240253
};
241254

242255
} // namespace NYql

ydb/library/yql/dq/opt/bitset.h

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ inline bool Overlaps(const TNodeSet& lhs, const TNodeSet& rhs) {
1515
return (lhs & rhs) != 0;
1616
}
1717

18+
/* checks if lhs subset of rhs */
1819
template <typename TNodeSet>
1920
inline bool IsSubset(const TNodeSet& lhs, const TNodeSet& rhs) {
2021
return (lhs & rhs) == lhs;

ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -39,18 +39,18 @@ class TDPHypSolver {
3939
TDPHypSolver(
4040
TJoinHypergraph<TNodeSet>& graph,
4141
IProviderContext& ctx,
42-
TCardinalityHints hints,
43-
TJoinAlgoHints joinHints
42+
const TCardinalityHints& hints,
43+
const TJoinAlgoHints& joinHints
4444
)
4545
: Graph_(graph)
4646
, NNodes_(graph.GetNodes().size())
4747
, Pctx_(ctx)
4848
{
49-
for (auto h : hints.Hints) {
49+
for (const auto& h : hints.Hints) {
5050
TNodeSet hintSet = Graph_.GetNodesByRelNames(h.JoinLabels);
5151
CardHintsTable_[hintSet] = h;
5252
}
53-
for (auto h : joinHints.Hints) {
53+
for (const auto& h : joinHints.Hints) {
5454
TNodeSet hintSet = Graph_.GetNodesByRelNames(h.JoinLabels);
5555
JoinAlgoHintsTable_[hintSet] = h;
5656
}
@@ -485,8 +485,8 @@ template<typename TNodeSet> void TDPHypSolver<TNodeSet>::EmitCsgCmp(const TNodeS
485485

486486
TNodeSet joined = s1 | s2;
487487

488-
auto maybeCardHint = CardHintsTable_.contains(joined) ? & CardHintsTable_.at(joined) : nullptr;
489-
auto maybeJoinAlgoHint = JoinAlgoHintsTable_.contains(joined) ? & JoinAlgoHintsTable_.at(joined) : nullptr;
488+
auto maybeCardHint = CardHintsTable_.contains(joined) ? & CardHintsTable_[joined] : nullptr;
489+
auto maybeJoinAlgoHint = JoinAlgoHintsTable_.contains(joined) ? & JoinAlgoHintsTable_[joined] : nullptr;
490490

491491
auto bestJoin = PickBestJoin(
492492
leftNodes,

0 commit comments

Comments
 (0)