Skip to content

Commit 73b4f84

Browse files
authored
Implement converter yt join tree -> optimizer join tree YQL-17437 (#1671)
1 parent a5a4ea2 commit 73b4f84

File tree

3 files changed

+149
-0
lines changed

3 files changed

+149
-0
lines changed

ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp

+49
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <library/cpp/testing/unittest/registar.h>
22

33
#include <ydb/library/yql/providers/yt/provider/yql_yt_join_impl.h>
4+
#include <ydb/library/yql/core/cbo/cbo_optimizer_new.h>
45

56
namespace NYql {
67

@@ -70,6 +71,54 @@ Y_UNIT_TEST(OrderJoinsDoesNothingWhenCBODisabled) {
7071
UNIT_ASSERT_VALUES_EQUAL(tree, optimizedTree);
7172
}
7273

74+
Y_UNIT_TEST(BuildOptimizerTree2Tables) {
75+
TExprContext exprCtx;
76+
auto tree = MakeOp({"c", "c_nationkey"}, {"n", "n_nationkey"}, {"c", "n"}, exprCtx);
77+
tree->Left = MakeLeaf({"c"}, {"c"}, 100000, 12333, exprCtx);
78+
tree->Right = MakeLeaf({"n"}, {"n"}, 1000, 1233, exprCtx);
79+
80+
std::shared_ptr<IBaseOptimizerNode> resultTree;
81+
std::shared_ptr<IProviderContext> resultCtx;
82+
BuildOptimizerJoinTree(resultTree, resultCtx, tree);
83+
84+
UNIT_ASSERT(resultTree->Kind == JoinNodeType);
85+
auto root = std::static_pointer_cast<TJoinOptimizerNode>(resultTree);
86+
UNIT_ASSERT(root->LeftArg->Kind == RelNodeType);
87+
UNIT_ASSERT(root->RightArg->Kind == RelNodeType);
88+
89+
auto left = std::static_pointer_cast<TRelOptimizerNode>(root->LeftArg);
90+
auto right = std::static_pointer_cast<TRelOptimizerNode>(root->RightArg);
91+
92+
UNIT_ASSERT_VALUES_EQUAL(left->Label, "c");
93+
UNIT_ASSERT_VALUES_EQUAL(right->Label, "n");
94+
UNIT_ASSERT_VALUES_EQUAL(left->Stats->Nrows, 100000);
95+
UNIT_ASSERT_VALUES_EQUAL(right->Stats->Nrows, 1000);
96+
}
97+
98+
Y_UNIT_TEST(BuildOptimizerTree2TablesComplexLabel) {
99+
TExprContext exprCtx;
100+
auto tree = MakeOp({"c", "c_nationkey"}, {"n", "n_nationkey"}, {"c", "n", "e"}, exprCtx);
101+
tree->Left = MakeLeaf({"c"}, {"c"}, 1000000, 1233333, exprCtx);
102+
tree->Right = MakeLeaf({"n"}, {"n", "e"}, 10000, 12333, exprCtx);
103+
104+
std::shared_ptr<IBaseOptimizerNode> resultTree;
105+
std::shared_ptr<IProviderContext> resultCtx;
106+
BuildOptimizerJoinTree(resultTree, resultCtx, tree);
107+
108+
UNIT_ASSERT(resultTree->Kind == JoinNodeType);
109+
auto root = std::static_pointer_cast<TJoinOptimizerNode>(resultTree);
110+
UNIT_ASSERT(root->LeftArg->Kind == RelNodeType);
111+
UNIT_ASSERT(root->RightArg->Kind == RelNodeType);
112+
113+
auto left = std::static_pointer_cast<TRelOptimizerNode>(root->LeftArg);
114+
auto right = std::static_pointer_cast<TRelOptimizerNode>(root->RightArg);
115+
116+
UNIT_ASSERT_VALUES_EQUAL(left->Label, "c");
117+
UNIT_ASSERT_VALUES_EQUAL(right->Label, "n");
118+
UNIT_ASSERT_VALUES_EQUAL(left->Stats->Nrows, 1000000);
119+
UNIT_ASSERT_VALUES_EQUAL(right->Stats->Nrows, 10000);
120+
}
121+
73122
#define ADD_TEST(Name) \
74123
Y_UNIT_TEST(Name ## _PG) { \
75124
Name(ECostBasedOptimizerType::PG); \

ydb/library/yql/providers/yt/provider/yql_yt_join_impl.h

+5
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,9 @@ IGraphTransformer::TStatus RewriteYtEquiJoin(TYtEquiJoin equiJoin, TYtJoinNodeOp
6666
TMaybeNode<TExprBase> ExportYtEquiJoin(TYtEquiJoin equiJoin, const TYtJoinNodeOp& op, TExprContext& ctx, const TYtState::TPtr& state);
6767
TYtJoinNodeOp::TPtr OrderJoins(TYtJoinNodeOp::TPtr op, const TYtState::TPtr& state, TExprContext& ctx, bool debug = false);
6868

69+
struct IBaseOptimizerNode;
70+
struct IProviderContext;
71+
72+
void BuildOptimizerJoinTree(std::shared_ptr<IBaseOptimizerNode>& tree, std::shared_ptr<IProviderContext>& ctx, TYtJoinNodeOp::TPtr op);
73+
6974
}

ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp

+95
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <ydb/library/yql/parser/pg_wrapper/interface/optimizer.h>
55
#include <ydb/library/yql/providers/common/provider/yql_provider.h>
66
#include <ydb/library/yql/utils/log/log.h>
7+
#include <ydb/library/yql/core/cbo/cbo_optimizer_new.h>
78

89
#include <ydb/library/yql/dq/opt/dq_opt_log.h>
910

@@ -408,8 +409,102 @@ class TJoinReorderer {
408409
IOptimizer::TOutput Result;
409410
};
410411

412+
class TOptimizerTreeBuilder
413+
{
414+
public:
415+
TOptimizerTreeBuilder(std::shared_ptr<IBaseOptimizerNode>& tree, std::shared_ptr<IProviderContext>& ctx, TYtJoinNodeOp::TPtr inputTree)
416+
: Tree(tree)
417+
, Ctx(ctx)
418+
, InputTree(inputTree)
419+
{ }
420+
421+
void Do() {
422+
Ctx = std::make_shared<TDummyProviderContext>();
423+
Tree = ProcessNode(InputTree);
424+
}
425+
426+
private:
427+
std::shared_ptr<IBaseOptimizerNode> ProcessNode(TYtJoinNode::TPtr node) {
428+
if (auto* op = dynamic_cast<TYtJoinNodeOp*>(node.Get())) {
429+
return OnOp(op);
430+
} else if (auto* leaf = dynamic_cast<TYtJoinNodeLeaf*>(node.Get())) {
431+
return OnLeaf(leaf);
432+
} else {
433+
YQL_ENSURE("Unknown node type");
434+
return nullptr;
435+
}
436+
}
437+
438+
std::shared_ptr<IBaseOptimizerNode> OnOp(TYtJoinNodeOp* op) {
439+
auto joinKind = ConvertToJoinKind(TString(op->JoinKind->Content()));
440+
auto left = ProcessNode(op->Left);
441+
auto right = ProcessNode(op->Right);
442+
YQL_ENSURE(op->LeftLabel->ChildrenSize() == op->RightLabel->ChildrenSize());
443+
std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> joinConditions;
444+
for (ui32 i = 0; i < op->LeftLabel->ChildrenSize(); i += 2) {
445+
auto ltable = op->LeftLabel->Child(i)->Content();
446+
auto lcolumn = op->LeftLabel->Child(i + 1)->Content();
447+
auto rtable = op->RightLabel->Child(i)->Content();
448+
auto rcolumn = op->RightLabel->Child(i + 1)->Content();
449+
NDq::TJoinColumn lcol{TString(ltable), TString(lcolumn)};
450+
NDq::TJoinColumn rcol{TString(rtable), TString(rcolumn)};
451+
joinConditions.insert({lcol, rcol});
452+
}
453+
454+
return std::make_shared<TJoinOptimizerNode>(
455+
left, right, joinConditions, joinKind, EJoinAlgoType::GraceJoin
456+
);
457+
}
458+
459+
std::shared_ptr<IBaseOptimizerNode> OnLeaf(TYtJoinNodeLeaf* leaf) {
460+
TString label;
461+
if (leaf->Label->ChildrenSize() == 0) {
462+
label = leaf->Label->Content();
463+
} else {
464+
for (ui32 i = 0; i < leaf->Label->ChildrenSize(); ++i) {
465+
label += leaf->Label->Child(i)->Content();
466+
if (i+1 != leaf->Label->ChildrenSize()) {
467+
label += ",";
468+
}
469+
}
470+
}
471+
472+
TYtSection section{leaf->Section};
473+
auto stat = std::make_shared<TOptimizerStatistics>();
474+
if (Y_UNLIKELY(!section.Settings().Empty()) && Y_UNLIKELY(section.Settings().Item(0).Name() == "Test")) {
475+
for (const auto& setting : section.Settings()) {
476+
if (setting.Name() == "Rows") {
477+
stat->Nrows += FromString<ui64>(setting.Value().Ref().Content());
478+
} else if (setting.Name() == "Size") {
479+
stat->Cost += FromString<ui64>(setting.Value().Ref().Content());
480+
}
481+
}
482+
} else {
483+
for (auto path: section.Paths()) {
484+
auto tableStat = TYtTableBaseInfo::GetStat(path.Table());
485+
stat->Cost += tableStat->DataSize;
486+
stat->Nrows += tableStat->RecordsCount;
487+
}
488+
}
489+
490+
return std::make_shared<TRelOptimizerNode>(
491+
std::move(label), std::move(stat)
492+
);
493+
}
494+
495+
std::shared_ptr<IBaseOptimizerNode>& Tree;
496+
std::shared_ptr<IProviderContext>& Ctx;
497+
498+
TYtJoinNodeOp::TPtr InputTree;
499+
};
500+
411501
} // namespace
412502

503+
void BuildOptimizerJoinTree(std::shared_ptr<IBaseOptimizerNode>& tree, std::shared_ptr<IProviderContext>& ctx, TYtJoinNodeOp::TPtr op)
504+
{
505+
TOptimizerTreeBuilder(tree, ctx, op).Do();
506+
}
507+
413508
TYtJoinNodeOp::TPtr OrderJoins(TYtJoinNodeOp::TPtr op, const TYtState::TPtr& state, TExprContext& ctx, bool debug)
414509
{
415510
if (state->Types->CostBasedOptimizer == ECostBasedOptimizerType::Disable) {

0 commit comments

Comments
 (0)