|
| 1 | +#include <library/cpp/testing/unittest/registar.h> |
| 2 | +#include <library/cpp/testing/hook/hook.h> |
| 3 | +#include <ydb/library/yql/core/yql_type_annotation.h> |
| 4 | +#include <ydb/library/yql/providers/common/provider/yql_provider.h> |
| 5 | +#include <ydb/library/yql/parser/pg_wrapper/interface/optimizer.h> |
| 6 | + |
| 7 | +#include <ydb/library/yql/dq/opt/dq_opt_log.h> |
| 8 | +#include <ydb/library/yql/dq/opt/dq_opt_join.h> |
| 9 | + |
| 10 | +using namespace NYql; |
| 11 | +using namespace NNodes; |
| 12 | +using namespace NYql::NDq; |
| 13 | + |
| 14 | +namespace { |
| 15 | + |
| 16 | +TExprNode::TPtr MakeLabel(TExprContext& ctx, const std::vector<TStringBuf>& vars) { |
| 17 | + TVector<TExprNodePtr> label; label.reserve(vars.size()); |
| 18 | + |
| 19 | + auto pos = ctx.AppendPosition({}); |
| 20 | + for (auto var : vars) { |
| 21 | + label.emplace_back(ctx.NewAtom(pos, var)); |
| 22 | + } |
| 23 | + |
| 24 | + return Build<TCoAtomList>(ctx, pos) |
| 25 | + .Add(label) |
| 26 | + .Done() |
| 27 | + .Ptr(); |
| 28 | +} |
| 29 | + |
| 30 | +} // namespace |
| 31 | + |
| 32 | +Y_UNIT_TEST_SUITE(DQCBO) { |
| 33 | + |
| 34 | +Y_UNIT_TEST(Empty) { |
| 35 | + TBaseProviderContext pctx; |
| 36 | + TExprContext dummyCtx; |
| 37 | + std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); |
| 38 | +} |
| 39 | + |
| 40 | +Y_UNIT_TEST(JoinSearch2Rels) { |
| 41 | + TBaseProviderContext pctx; |
| 42 | + TExprContext dummyCtx; |
| 43 | + std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); |
| 44 | + |
| 45 | + auto rel1 = std::make_shared<TRelOptimizerNode>( |
| 46 | + "a", |
| 47 | + TOptimizerStatistics(BaseTable, 100000, 1, 0, 1000000) |
| 48 | + ); |
| 49 | + auto rel2 = std::make_shared<TRelOptimizerNode>( |
| 50 | + "b", |
| 51 | + TOptimizerStatistics(BaseTable, 1000000, 1, 0, 9000009) |
| 52 | + ); |
| 53 | + |
| 54 | + TVector<NDq::TJoinColumn> leftKeys = {NDq::TJoinColumn("a", "1")}; |
| 55 | + TVector<NDq::TJoinColumn> rightKeys ={NDq::TJoinColumn("b", "1")}; |
| 56 | + |
| 57 | + auto op = std::make_shared<TJoinOptimizerNode>( |
| 58 | + std::static_pointer_cast<IBaseOptimizerNode>(rel1), |
| 59 | + std::static_pointer_cast<IBaseOptimizerNode>(rel2), |
| 60 | + leftKeys, |
| 61 | + rightKeys, |
| 62 | + InnerJoin, |
| 63 | + EJoinAlgoType::GraceJoin, |
| 64 | + true, |
| 65 | + false |
| 66 | + ); |
| 67 | + |
| 68 | + auto res = optimizer->JoinSearch(op); |
| 69 | + std::stringstream ss; |
| 70 | + res->Print(ss); |
| 71 | + Cout << ss.str() << '\n'; |
| 72 | + TString expected = R"__(Join: (InnerJoin,MapJoin,RightAny) b.1=a.1, |
| 73 | +Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: 1, Storage: NA |
| 74 | + Rel: b |
| 75 | + Type: BaseTable, Nrows: 1e+06, Ncols: 1, ByteSize: 0, Cost: 9.00001e+06, Sel: 1, Storage: NA |
| 76 | + Rel: a |
| 77 | + Type: BaseTable, Nrows: 100000, Ncols: 1, ByteSize: 0, Cost: 1e+06, Sel: 1, Storage: NA |
| 78 | +)__"; |
| 79 | + |
| 80 | + UNIT_ASSERT_STRINGS_EQUAL(expected, ss.str()); |
| 81 | +} |
| 82 | + |
| 83 | +Y_UNIT_TEST(JoinSearch3Rels) { |
| 84 | + TBaseProviderContext pctx; |
| 85 | + TExprContext dummyCtx; |
| 86 | + std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); |
| 87 | + |
| 88 | + auto rel1 = std::make_shared<TRelOptimizerNode>("a", |
| 89 | + TOptimizerStatistics(BaseTable, 100000, 1, 0, 1000000)); |
| 90 | + auto rel2 = std::make_shared<TRelOptimizerNode>("b", |
| 91 | + TOptimizerStatistics(BaseTable, 1000000, 1, 0, 9000009)); |
| 92 | + auto rel3 = std::make_shared<TRelOptimizerNode>("c", |
| 93 | + TOptimizerStatistics(BaseTable, 10000, 1, 0, 9009)); |
| 94 | + |
| 95 | + TVector<NDq::TJoinColumn> leftKeys = {NDq::TJoinColumn("a", "1")}; |
| 96 | + TVector<NDq::TJoinColumn> rightKeys ={NDq::TJoinColumn("b", "1")}; |
| 97 | + |
| 98 | + auto op1 = std::make_shared<TJoinOptimizerNode>( |
| 99 | + std::static_pointer_cast<IBaseOptimizerNode>(rel1), |
| 100 | + std::static_pointer_cast<IBaseOptimizerNode>(rel2), |
| 101 | + leftKeys, |
| 102 | + rightKeys, |
| 103 | + InnerJoin, |
| 104 | + EJoinAlgoType::GraceJoin, |
| 105 | + false, |
| 106 | + false |
| 107 | + ); |
| 108 | + |
| 109 | + leftKeys.push_back(NDq::TJoinColumn("a", "1")); |
| 110 | + rightKeys.push_back(NDq::TJoinColumn("c", "1")); |
| 111 | + |
| 112 | + auto op2 = std::make_shared<TJoinOptimizerNode>( |
| 113 | + std::static_pointer_cast<IBaseOptimizerNode>(op1), |
| 114 | + std::static_pointer_cast<IBaseOptimizerNode>(rel3), |
| 115 | + leftKeys, |
| 116 | + rightKeys, |
| 117 | + InnerJoin, |
| 118 | + EJoinAlgoType::GraceJoin, |
| 119 | + true, |
| 120 | + false |
| 121 | + ); |
| 122 | + |
| 123 | + auto res = optimizer->JoinSearch(op2); |
| 124 | + std::stringstream ss; |
| 125 | + res->Print(ss); |
| 126 | + Cout << ss.str() << '\n'; |
| 127 | + |
| 128 | + TString expected = R"__(Join: (InnerJoin,MapJoin,LeftAny) a.1=b.1, |
| 129 | +Type: ManyManyJoin, Nrows: 4e+13, Ncols: 3, ByteSize: 0, Cost: 4.004e+13, Sel: 1, Storage: NA |
| 130 | + Join: (InnerJoin,MapJoin) b.1=a.1, |
| 131 | + Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: 1, Storage: NA |
| 132 | + Rel: b |
| 133 | + Type: BaseTable, Nrows: 1e+06, Ncols: 1, ByteSize: 0, Cost: 9.00001e+06, Sel: 1, Storage: NA |
| 134 | + Rel: a |
| 135 | + Type: BaseTable, Nrows: 100000, Ncols: 1, ByteSize: 0, Cost: 1e+06, Sel: 1, Storage: NA |
| 136 | + Rel: c |
| 137 | + Type: BaseTable, Nrows: 10000, Ncols: 1, ByteSize: 0, Cost: 9009, Sel: 1, Storage: NA |
| 138 | +)__"; |
| 139 | + |
| 140 | + UNIT_ASSERT_STRINGS_EQUAL(expected, ss.str()); |
| 141 | +} |
| 142 | + |
| 143 | +Y_UNIT_TEST(RelCollector) { |
| 144 | + TExprContext ctx; |
| 145 | + auto pos = ctx.AppendPosition({}); |
| 146 | + TVector<TExprBase> joinArgs; |
| 147 | + TVector<TExprBase> tables; |
| 148 | + tables.emplace_back(Build<TCoEquiJoinInput>(ctx, pos).List(Build<TCoAtomList>(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "orders")).Done()); |
| 149 | + tables.emplace_back(Build<TCoEquiJoinInput>(ctx, pos).List(Build<TCoAtomList>(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "customer")).Done()); |
| 150 | + tables.emplace_back(Build<TCoEquiJoinInput>(ctx, pos).List(Build<TCoAtomList>(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "nation")).Done()); |
| 151 | + |
| 152 | + auto joinTree = Build<TCoAtomList>(ctx, pos).Done().Ptr(); |
| 153 | + auto settings = Build<TCoAtomList>(ctx, pos).Done().Ptr(); |
| 154 | + |
| 155 | + joinArgs.insert(joinArgs.end(), tables.begin(), tables.end()); |
| 156 | + joinArgs.emplace_back(joinTree); |
| 157 | + joinArgs.emplace_back(settings); |
| 158 | + |
| 159 | + TCoEquiJoin equiJoin = Build<TCoEquiJoin>(ctx, pos) |
| 160 | + .Add(joinArgs) |
| 161 | + .Done(); |
| 162 | + |
| 163 | + TTypeAnnotationContext typeCtx; |
| 164 | + TVector<std::shared_ptr<TRelOptimizerNode>> rels; |
| 165 | + UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto, auto, auto) {}) == false); |
| 166 | + |
| 167 | + typeCtx.SetStats(tables[1].Ptr()->Child(0), std::make_shared<TOptimizerStatistics>(BaseTable, 1, 1, 1)); |
| 168 | + UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto, auto, auto) {}) == false); |
| 169 | + |
| 170 | + typeCtx.SetStats(tables[0].Ptr()->Child(0), std::make_shared<TOptimizerStatistics>(BaseTable, 1, 1, 1)); |
| 171 | + typeCtx.SetStats(tables[2].Ptr()->Child(0), std::make_shared<TOptimizerStatistics>(BaseTable, 1, 1, 1)); |
| 172 | + |
| 173 | + TVector<TString> labels; |
| 174 | + UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto label, auto, auto) { labels.emplace_back(label); }) == true); |
| 175 | + UNIT_ASSERT(labels.size() == 3); |
| 176 | + UNIT_ASSERT_STRINGS_EQUAL(labels[0], "orders"); |
| 177 | + UNIT_ASSERT_STRINGS_EQUAL(labels[1], "customer"); |
| 178 | + UNIT_ASSERT_STRINGS_EQUAL(labels[2], "nation"); |
| 179 | +} |
| 180 | + |
| 181 | +Y_UNIT_TEST(RelCollectorBrokenEquiJoin) { |
| 182 | + TExprContext ctx; |
| 183 | + auto pos = ctx.AppendPosition({}); |
| 184 | + TVector<TExprBase> joinArgs; |
| 185 | + auto joinTree = Build<TCoAtomList>(ctx, pos).Done().Ptr(); |
| 186 | + auto settings = Build<TCoAtomList>(ctx, pos).Done().Ptr(); |
| 187 | + TCoEquiJoin equiJoin = Build<TCoEquiJoin>(ctx, pos) |
| 188 | + .Add(joinArgs) |
| 189 | + .Done(); |
| 190 | + |
| 191 | + TTypeAnnotationContext typeCtx; |
| 192 | + TVector<std::shared_ptr<TRelOptimizerNode>> rels; |
| 193 | + UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto, auto, auto) {}) == false); |
| 194 | +} |
| 195 | + |
| 196 | +void _DqOptimizeEquiJoinWithCosts(const std::function<IOptimizerNew*()>& optFactory, TExprContext& ctx) { |
| 197 | + TTypeAnnotationContext typeCtx; |
| 198 | + auto pos = ctx.AppendPosition({}); |
| 199 | + TVector<TExprBase> joinArgs; |
| 200 | + TVector<TExprBase> tables; |
| 201 | + tables.emplace_back(Build<TCoEquiJoinInput>(ctx, pos).List(Build<TCoAtomList>(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "orders")).Done()); |
| 202 | + tables.emplace_back(Build<TCoEquiJoinInput>(ctx, pos).List(Build<TCoAtomList>(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "customer")).Done()); |
| 203 | + |
| 204 | + auto settings = Build<TCoAtomList>(ctx, pos).Done().Ptr(); |
| 205 | + |
| 206 | + auto joinTree = Build<TCoEquiJoinTuple>(ctx, pos) |
| 207 | + .Type(ctx.NewAtom(pos, "Inner")) |
| 208 | + .LeftScope(ctx.NewAtom(pos, "orders")) |
| 209 | + .RightScope(ctx.NewAtom(pos, "customer")) |
| 210 | + .LeftKeys(MakeLabel(ctx, {"orders", "a"})) |
| 211 | + .RightKeys(MakeLabel(ctx, {"customer", "b"})) |
| 212 | + .Options(settings) |
| 213 | + .Done().Ptr(); |
| 214 | + |
| 215 | + joinArgs.insert(joinArgs.end(), tables.begin(), tables.end()); |
| 216 | + joinArgs.emplace_back(joinTree); |
| 217 | + joinArgs.emplace_back(settings); |
| 218 | + |
| 219 | + typeCtx.SetStats(tables[0].Ptr()->Child(0), std::make_shared<TOptimizerStatistics>(BaseTable, 1, 1, 1)); |
| 220 | + typeCtx.SetStats(tables[1].Ptr()->Child(0), std::make_shared<TOptimizerStatistics>(BaseTable, 1, 1, 1)); |
| 221 | + |
| 222 | + TCoEquiJoin equiJoin = Build<TCoEquiJoin>(ctx, pos) |
| 223 | + .Add(joinArgs) |
| 224 | + .Done(); |
| 225 | + |
| 226 | + auto opt = std::unique_ptr<IOptimizerNew>(optFactory()); |
| 227 | + std::function<void(TVector<std::shared_ptr<TRelOptimizerNode>>&, TStringBuf, const TExprNode::TPtr, const std::shared_ptr<TOptimizerStatistics>&)> providerCollect = [](auto& rels, auto label, auto node, auto stats) { |
| 228 | + Y_UNUSED(node); |
| 229 | + auto rel = std::make_shared<TRelOptimizerNode>(TString(label), *stats); |
| 230 | + rels.push_back(rel); |
| 231 | + }; |
| 232 | + auto res = DqOptimizeEquiJoinWithCosts(equiJoin, ctx, typeCtx, 2, *opt, providerCollect); |
| 233 | + UNIT_ASSERT(equiJoin.Ptr() != res.Ptr()); |
| 234 | + UNIT_ASSERT(equiJoin.Ptr()->ChildrenSize() == res.Ptr()->ChildrenSize()); |
| 235 | + UNIT_ASSERT(equiJoin.Maybe<TCoEquiJoin>()); |
| 236 | + auto resStr = NCommon::ExprToPrettyString(ctx, *res.Ptr()); |
| 237 | + auto expected = R"__(( |
| 238 | +(let $1 '('"Inner" '"orders" '"customer" '('"orders" '"a") '('"customer" '"b") '('('join_algo 'MapJoin)))) |
| 239 | +(return (EquiJoin '('() '"orders") '('() '"customer") $1 '())) |
| 240 | +) |
| 241 | +)__"; |
| 242 | + UNIT_ASSERT_STRINGS_EQUAL(expected, resStr); |
| 243 | +} |
| 244 | + |
| 245 | +Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsNative) { |
| 246 | + TExprContext ctx; |
| 247 | + TBaseProviderContext pctx; |
| 248 | + std::function<IOptimizerNew*()> optFactory = [&]() { |
| 249 | + TExprContext dummyCtx; |
| 250 | + return MakeNativeOptimizerNew(pctx, 100000, dummyCtx); |
| 251 | + }; |
| 252 | + _DqOptimizeEquiJoinWithCosts(optFactory, ctx); |
| 253 | +} |
| 254 | + |
| 255 | +Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsPG) { |
| 256 | + TExprContext ctx; |
| 257 | + TBaseProviderContext pctx; |
| 258 | + std::function<void(const TString&)> log = [&](auto str) { |
| 259 | + Cerr << str; |
| 260 | + }; |
| 261 | + std::function<IOptimizerNew*()> optFactory = [&]() { |
| 262 | + return MakePgOptimizerNew(pctx, ctx, log); |
| 263 | + }; |
| 264 | + _DqOptimizeEquiJoinWithCosts(optFactory, ctx); |
| 265 | +} |
| 266 | + |
| 267 | +} // DQCBO |
0 commit comments