From 0c3e11866af7421d829df9d37c34df79bd4262be Mon Sep 17 00:00:00 2001 From: yumkam Date: Wed, 14 Aug 2024 20:00:43 +0300 Subject: [PATCH 1/7] dq: add cbo cost functions (#7617) --- .../yql/providers/dq/opt/logical_optimize.cpp | 70 ++++++++++++++++++- .../sql/dq_file/part17/canondata/result.json | 18 ++--- 2 files changed, 78 insertions(+), 10 deletions(-) diff --git a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp index a02369aadb4f..5c603c998803 100644 --- a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp +++ b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp @@ -38,6 +38,74 @@ bool IsStreamLookup(const TCoEquiJoinTuple& joinTuple) { } +/** + * DQ Specific cost function and join applicability cost function +*/ +struct TDqCBOProviderContext : public NYql::TBaseProviderContext { + TDqCBOProviderContext(TTypeAnnotationContext& typeCtx, const TDqConfiguration::TPtr& config) + : NYql::TBaseProviderContext() + , Config(config) + , TypesCtx(typeCtx) {} + + virtual bool IsJoinApplicable(const std::shared_ptr& left, + const std::shared_ptr& right, + const std::set>& joinConditions, + const TVector& leftJoinKeys, const TVector& rightJoinKeys, + NYql::EJoinAlgoType joinAlgo, NYql::EJoinKind joinKind) override; + + virtual double ComputeJoinCost(const NYql::TOptimizerStatistics& leftStats, const NYql::TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, NYql::EJoinAlgoType joinAlgo) const override; + + TDqConfiguration::TPtr Config; + TTypeAnnotationContext& TypesCtx; +}; + + +bool TDqCBOProviderContext::IsJoinApplicable(const std::shared_ptr& left, + const std::shared_ptr& right, + const std::set>& joinConditions, + const TVector& leftJoinKeys, const TVector& rightJoinKeys, + NYql::EJoinAlgoType joinAlgo, NYql::EJoinKind joinKind) { + Y_UNUSED(left); + Y_UNUSED(right); + Y_UNUSED(joinConditions); + Y_UNUSED(leftJoinKeys); + Y_UNUSED(rightJoinKeys); + + switch(joinAlgo) { + + case EJoinAlgoType::MapJoin: + if (joinKind == EJoinKind::OuterJoin || joinKind == EJoinKind::Exclusion) + return false; + if (auto hashJoinMode = Config->HashJoinMode.Get().GetOrElse(EHashJoinMode::Off); + hashJoinMode == EHashJoinMode::Off || hashJoinMode == EHashJoinMode::Map) + return true; + break; + + case EJoinAlgoType::GraceJoin: + return true; + + default: + break; + } + return false; +} + + +double TDqCBOProviderContext::ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const { + Y_UNUSED(outputByteSize); + + switch(joinAlgo) { + case EJoinAlgoType::MapJoin: + return 1.5 * (leftStats.Nrows + 1.8 * rightStats.Nrows + outputRows); + case EJoinAlgoType::GraceJoin: + return 1.5 * (leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows); + default: + Y_ENSURE(false, "Illegal join type encountered"); + return 0; + } +} + + class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase { public: TDqsLogicalOptProposalTransformer(TTypeAnnotationContext* typeCtx, const TDqConfiguration::TPtr& config) @@ -207,7 +275,7 @@ class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase { }; std::unique_ptr opt; - TBaseProviderContext pctx; + TDqCBOProviderContext pctx(TypesCtx, Config); switch (TypesCtx.CostBasedOptimizer) { case ECostBasedOptimizerType::Native: diff --git a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json index 74a9b4702330..da50af5c1f5b 100644 --- a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json @@ -755,23 +755,23 @@ "test.test[dq-blacklisted_pragmas--Results]": [], "test.test[dq-join_cbo_native_3_tables--Analyze]": [ { - "checksum": "94e6af2e865eab35e76cc9963452ad0d", - "size": 13889, - "uri": "https://{canondata_backend}/1924537/25b076e867ef14e000e544c6dba85dd8ef6e3620/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Analyze_/plan.txt" + "checksum": "90555f07378f801872485e6ac96dfd73", + "size": 12314, + "uri": "https://{canondata_backend}/1942278/d3f67196e7e0096e289743f5dbfd5dc2f990f9e6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Analyze_/plan.txt" } ], "test.test[dq-join_cbo_native_3_tables--Debug]": [ { - "checksum": "fd20054511c7328de8f8c6c45539b48b", - "size": 5339, - "uri": "https://{canondata_backend}/1936273/7a32049e7d34640d0891b0eccadb21c671bd9ed5/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched" + "checksum": "91570a2f667516ba1f3f28642698441f", + "size": 4802, + "uri": "https://{canondata_backend}/1942278/d3f67196e7e0096e289743f5dbfd5dc2f990f9e6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched" } ], "test.test[dq-join_cbo_native_3_tables--Plan]": [ { - "checksum": "94e6af2e865eab35e76cc9963452ad0d", - "size": 13889, - "uri": "https://{canondata_backend}/1924537/25b076e867ef14e000e544c6dba85dd8ef6e3620/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Plan_/plan.txt" + "checksum": "90555f07378f801872485e6ac96dfd73", + "size": 12314, + "uri": "https://{canondata_backend}/1942278/d3f67196e7e0096e289743f5dbfd5dc2f990f9e6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Plan_/plan.txt" } ], "test.test[dq-join_cbo_native_3_tables--Results]": [ From 7a4a18f37b5d655041b42860864a48ef4ead76e8 Mon Sep 17 00:00:00 2001 From: Pavel Velikhov Date: Mon, 14 Oct 2024 14:09:35 +0300 Subject: [PATCH 2/7] Refactored join conditions in CBO (#10366) --- ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp | 28 ++--- ydb/core/kqp/opt/logical/kqp_opt_cbo.h | 3 +- .../yql/core/cbo/cbo_optimizer_new.cpp | 58 +++------ ydb/library/yql/core/cbo/cbo_optimizer_new.h | 49 +++----- ydb/library/yql/core/yql_cost_function.h | 9 +- ydb/library/yql/dq/opt/dq_cbo_ut.cpp | 31 +++-- ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h | 24 ++-- .../yql/dq/opt/dq_opt_hypergraph_ut.cpp | 24 ++-- .../yql/dq/opt/dq_opt_join_cost_based.cpp | 23 ++-- .../yql/dq/opt/dq_opt_join_hypergraph.h | 115 ++++++++++++------ .../yql/dq/opt/dq_opt_join_tree_node.cpp | 9 +- .../yql/dq/opt/dq_opt_join_tree_node.h | 16 +-- .../yql/dq/opt/dq_opt_make_join_hypergraph.h | 12 +- ydb/library/yql/dq/opt/dq_opt_stat.cpp | 35 ++++-- .../yql/providers/dq/opt/logical_optimize.cpp | 7 +- .../yt/provider/ut/yql_yt_cbo_ut.cpp | 7 +- .../yt/provider/yql_yt_join_reorder.cpp | 22 ++-- ydb/library/yql/sql/pg/optimizer.cpp | 25 ++-- .../sql/dq_file/part17/canondata/result.json | 6 +- 19 files changed, 260 insertions(+), 243 deletions(-) diff --git a/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp b/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp index 240f38fbaffd..ebd541bb9889 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp @@ -36,7 +36,7 @@ TMaybeNode GetRightTableKeyPrefix(const TKqlKeyRange& range) { /** * KQP specific rule to check if a LookupJoin is applicable */ -bool IsLookupJoinApplicableDetailed(const std::shared_ptr& node, const TVector& joinColumns, const TKqpProviderContext& ctx) { +bool IsLookupJoinApplicableDetailed(const std::shared_ptr& node, const TVector& joinColumns, const TKqpProviderContext& ctx) { auto rel = std::static_pointer_cast(node); auto expr = TExprBase(rel->Node); @@ -45,7 +45,7 @@ bool IsLookupJoinApplicableDetailed(const std::shared_ptrStats->KeyColumns->Data[0] == s;}) != joinColumns.end()) { + if (std::find_if(joinColumns.begin(), joinColumns.end(), [&] (const TJoinColumn& c) { return node->Stats->KeyColumns->Data[0] == c.AttributeName;}) != joinColumns.end()) { return true; } @@ -97,8 +97,8 @@ bool IsLookupJoinApplicableDetailed(const std::shared_ptrStats->KeyColumns->Data.size() && (find_if(joinColumns.begin(), joinColumns.end(), [&] (const TString& s) { - return node->Stats->KeyColumns->Data[prefixSize] == s; + if (prefixSize < node->Stats->KeyColumns->Data.size() && (std::find_if(joinColumns.begin(), joinColumns.end(), [&] (const TJoinColumn& c) { + return node->Stats->KeyColumns->Data[prefixSize] == c.AttributeName; }) == joinColumns.end())){ return false; } @@ -108,12 +108,11 @@ bool IsLookupJoinApplicableDetailed(const std::shared_ptr left, std::shared_ptr right, - const std::set>& joinConditions, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, TKqpProviderContext& ctx ) { - Y_UNUSED(left, joinConditions, leftJoinKeys); + Y_UNUSED(left, leftJoinKeys); if (!(right->Stats->StorageType == EStorageType::RowStorage)) { return false; @@ -130,7 +129,7 @@ bool IsLookupJoinApplicable(std::shared_ptr left, } for (auto rightCol : rightJoinKeys) { - if (std::find(rightStats->KeyColumns->Data.begin(), rightStats->KeyColumns->Data.end(), rightCol) == rightStats->KeyColumns->Data.end()) { + if (find(rightStats->KeyColumns->Data.begin(), rightStats->KeyColumns->Data.end(), rightCol.AttributeName) == rightStats->KeyColumns->Data.end()) { return false; } } @@ -142,18 +141,17 @@ bool IsLookupJoinApplicable(std::shared_ptr left, bool TKqpProviderContext::IsJoinApplicable(const std::shared_ptr& left, const std::shared_ptr& right, - const std::set>& joinConditions, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, EJoinAlgoType joinAlgo, - EJoinKind joinKind) { + EJoinKind joinKind) { switch( joinAlgo ) { case EJoinAlgoType::LookupJoin: if ((OptLevel != 3) && (left->Stats->Nrows > 1000)) { return false; } - return IsLookupJoinApplicable(left, right, joinConditions, leftJoinKeys, rightJoinKeys, *this); + return IsLookupJoinApplicable(left, right, leftJoinKeys, rightJoinKeys, *this); case EJoinAlgoType::LookupJoinReverse: if (joinKind != EJoinKind::LeftSemi) { @@ -162,7 +160,7 @@ bool TKqpProviderContext::IsJoinApplicable(const std::shared_ptrStats->Nrows > 1000)) { return false; } - return IsLookupJoinApplicable(right, left, joinConditions, rightJoinKeys, leftJoinKeys, *this); + return IsLookupJoinApplicable(right, left, rightJoinKeys, leftJoinKeys, *this); case EJoinAlgoType::MapJoin: return joinKind != EJoinKind::OuterJoin && joinKind != EJoinKind::Exclusion && right->Stats->ByteSize < 1e6; diff --git a/ydb/core/kqp/opt/logical/kqp_opt_cbo.h b/ydb/core/kqp/opt/logical/kqp_opt_cbo.h index 9df809aaacb7..52aa93ef4143 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_cbo.h +++ b/ydb/core/kqp/opt/logical/kqp_opt_cbo.h @@ -25,8 +25,7 @@ struct TKqpProviderContext : public NYql::TBaseProviderContext { virtual bool IsJoinApplicable(const std::shared_ptr& left, const std::shared_ptr& right, - const std::set>& joinConditions, - const TVector& leftJoinKeys, const TVector& rightJoinKeys, + const TVector& leftJoinKeys, const TVector& rightJoinKeys, NYql::EJoinAlgoType joinAlgo, NYql::EJoinKind joinKind) override; virtual double ComputeJoinCost(const NYql::TOptimizerStatistics& leftStats, const NYql::TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, NYql::EJoinAlgoType joinAlgo) const override; diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp index 02eeabc4784e..048ef307cb76 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp +++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp @@ -77,7 +77,8 @@ void TRelOptimizerNode::Print(std::stringstream& stream, int ntabs) { TJoinOptimizerNode::TJoinOptimizerNode( const std::shared_ptr& left, const std::shared_ptr& right, - const std::set>& joinConditions, + TVector leftKeys, + TVector rightKeys, const EJoinKind joinType, const EJoinAlgoType joinAlgo, bool leftAny, @@ -86,18 +87,14 @@ TJoinOptimizerNode::TJoinOptimizerNode( ) : IBaseOptimizerNode(JoinNodeType) , LeftArg(left) , RightArg(right) - , JoinConditions(joinConditions) + , LeftJoinKeys(leftKeys) + , RightJoinKeys(rightKeys) , JoinType(joinType) , JoinAlgo(joinAlgo) , LeftAny(leftAny) , RightAny(rightAny) , IsReorderable(!nonReorderable) -{ - for (const auto& [l,r] : joinConditions ) { - LeftJoinKeys.push_back(l.AttributeName); - RightJoinKeys.push_back(r.AttributeName); - } -} +{} TVector TJoinOptimizerNode::Labels() { auto res = LeftArg->Labels(); @@ -120,10 +117,10 @@ void TJoinOptimizerNode::Print(std::stringstream& stream, int ntabs) { } stream << ") "; - for (auto c : JoinConditions){ - stream << c.first.RelName << "." << c.first.AttributeName - << "=" << c.second.RelName << "." - << c.second.AttributeName << ","; + for (size_t i=0; iPrint(stream, ntabs+1); } -bool IsPKJoin(const TOptimizerStatistics& stats, const TVector& joinKeys) { +bool IsPKJoin(const TOptimizerStatistics& stats, const TVector& joinKeys) { if (!stats.KeyColumns) { return false; } for(size_t i = 0; i < stats.KeyColumns->Data.size(); i++){ - if (std::find(joinKeys.begin(), joinKeys.end(), stats.KeyColumns->Data[i]) == joinKeys.end()) { + if (std::find_if(joinKeys.begin(), joinKeys.end(), + [&] (const TJoinColumn& c) { return c.AttributeName == stats.KeyColumns->Data[i];}) == joinKeys.end()) { return false; } } @@ -154,15 +152,13 @@ bool IsPKJoin(const TOptimizerStatistics& stats, const TVector& joinKey bool TBaseProviderContext::IsJoinApplicable(const std::shared_ptr& left, const std::shared_ptr& right, - const std::set>& joinConditions, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, EJoinAlgoType joinAlgo, EJoinKind joinKind) { Y_UNUSED(left); Y_UNUSED(right); - Y_UNUSED(joinConditions); Y_UNUSED(leftJoinKeys); Y_UNUSED(rightJoinKeys); Y_UNUSED(joinKind); @@ -183,30 +179,12 @@ double TBaseProviderContext::ComputeJoinCost(const TOptimizerStatistics& leftSta * * The build is on the right side, so we make the build side a bit more expensive than the probe */ -TOptimizerStatistics TBaseProviderContext::ComputeJoinStats( - const TOptimizerStatistics& leftStats, - const TOptimizerStatistics& rightStats, - const std::set>& joinConditions, - EJoinAlgoType joinAlgo, - EJoinKind joinKind, - TCardinalityHints::TCardinalityHint* maybeHint) const -{ - TVector leftJoinKeys; - TVector rightJoinKeys; - - for (auto c : joinConditions) { - leftJoinKeys.emplace_back(c.first.AttributeName); - rightJoinKeys.emplace_back(c.second.AttributeName); - } - - return ComputeJoinStats(leftStats, rightStats, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind, maybeHint); -} TOptimizerStatistics TBaseProviderContext::ComputeJoinStats( const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, EJoinAlgoType joinAlgo, EJoinKind joinKind, TCardinalityHints::TCardinalityHint* maybeHint) const @@ -266,9 +244,9 @@ TOptimizerStatistics TBaseProviderContext::ComputeJoinStats( std::optional lhsUniqueVals; std::optional rhsUniqueVals; if (leftStats.ColumnStatistics && rightStats.ColumnStatistics && !leftJoinKeys.empty() && !rightJoinKeys.empty()) { - auto lhs = leftJoinKeys[0]; + auto lhs = leftJoinKeys[0].AttributeName; lhsUniqueVals = leftStats.ColumnStatistics->Data[lhs].NumUniqueVals; - auto rhs = rightJoinKeys[0]; + auto rhs = rightJoinKeys[0].AttributeName; rightStats.ColumnStatistics->Data[rhs]; rhsUniqueVals = leftStats.ColumnStatistics->Data[lhs].NumUniqueVals; } diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.h b/ydb/library/yql/core/cbo/cbo_optimizer_new.h index 0a564e4c3595..af3b94529027 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer_new.h +++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.h @@ -201,27 +201,18 @@ struct IProviderContext { virtual TOptimizerStatistics ComputeJoinStats( const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, - const std::set>& joinConditions, - EJoinAlgoType joinAlgo, - EJoinKind joinKind, - TCardinalityHints::TCardinalityHint* maybeHint = nullptr) const = 0; - - virtual TOptimizerStatistics ComputeJoinStats( - const TOptimizerStatistics& leftStats, - const TOptimizerStatistics& rightStats, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, EJoinAlgoType joinAlgo, EJoinKind joinKind, TCardinalityHints::TCardinalityHint* maybeHint = nullptr) const = 0; virtual bool IsJoinApplicable(const std::shared_ptr& left, const std::shared_ptr& right, - const std::set>& joinConditions, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, EJoinAlgoType joinAlgo, - EJoinKind joinKind) = 0; + EJoinKind joinKin) = 0; }; /** @@ -233,27 +224,19 @@ struct TBaseProviderContext : public IProviderContext { double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const override; - bool IsJoinApplicable(const std::shared_ptr& left, - const std::shared_ptr& right, - const std::set>& joinConditions, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, + bool IsJoinApplicable( + const std::shared_ptr& leftStats, + const std::shared_ptr& rightStats, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, EJoinAlgoType joinAlgo, EJoinKind joinKind) override; virtual TOptimizerStatistics ComputeJoinStats( const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, - EJoinAlgoType joinAlgo, - EJoinKind joinKind, - TCardinalityHints::TCardinalityHint* maybeHint = nullptr) const override; - - virtual TOptimizerStatistics ComputeJoinStats( - const TOptimizerStatistics& leftStats, - const TOptimizerStatistics& rightStats, - const std::set>& joinConditions, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, EJoinAlgoType joinAlgo, EJoinKind joinKind, TCardinalityHints::TCardinalityHint* maybeHint = nullptr) const override; @@ -290,9 +273,8 @@ struct TRelOptimizerNode : public IBaseOptimizerNode { struct TJoinOptimizerNode : public IBaseOptimizerNode { std::shared_ptr LeftArg; std::shared_ptr RightArg; - const std::set> JoinConditions; - TVector LeftJoinKeys; - TVector RightJoinKeys; + TVector LeftJoinKeys; + TVector RightJoinKeys; EJoinKind JoinType; EJoinAlgoType JoinAlgo; /////////////////// 'ANY' flag means leaving only one row from the join side. @@ -303,7 +285,8 @@ struct TJoinOptimizerNode : public IBaseOptimizerNode { TJoinOptimizerNode(const std::shared_ptr& left, const std::shared_ptr& right, - const std::set>& joinConditions, + TVector leftKeys, + TVector rightKeys, const EJoinKind joinType, const EJoinAlgoType joinAlgo, bool leftAny, diff --git a/ydb/library/yql/core/yql_cost_function.h b/ydb/library/yql/core/yql_cost_function.h index cb12f37238b4..b69c5941db65 100644 --- a/ydb/library/yql/core/yql_cost_function.h +++ b/ydb/library/yql/core/yql_cost_function.h @@ -38,9 +38,14 @@ namespace NDq { struct TJoinColumn { TString RelName; TString AttributeName; + TString AttributeNameWithAliases; + ui32 EquivalenceClass = 0; + bool IsConstant = false; - TJoinColumn(TString relName, TString attributeName) : RelName(relName), - AttributeName(std::move(attributeName)) {} + TJoinColumn(TString relName, TString attributeName) : + RelName(relName), + AttributeName(attributeName), + AttributeNameWithAliases(attributeName) {} bool operator == (const TJoinColumn& other) const { return RelName == other.RelName && AttributeName == other.AttributeName; diff --git a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp index 28ca2d234e70..cd6a8026d2ce 100644 --- a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp +++ b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp @@ -45,15 +45,14 @@ Y_UNIT_TEST(JoinSearch2Rels) { auto rel2 = std::make_shared("b", std::make_shared(BaseTable, 1000000, 1, 0, 9000009)); - std::set> joinConditions; - joinConditions.insert({ - NDq::TJoinColumn("a", "1"), - NDq::TJoinColumn("b", "1") - }); + TVector leftKeys = {NDq::TJoinColumn("a", "1")}; + TVector rightKeys ={NDq::TJoinColumn("b", "1")}; + auto op = std::make_shared( std::static_pointer_cast(rel1), std::static_pointer_cast(rel2), - joinConditions, + leftKeys, + rightKeys, InnerJoin, EJoinAlgoType::GraceJoin, true, @@ -86,30 +85,28 @@ Y_UNIT_TEST(JoinSearch3Rels) { auto rel3 = std::make_shared("c", std::make_shared(BaseTable, 10000, 1, 0, 9009)); - std::set> joinConditions; - joinConditions.insert({ - NDq::TJoinColumn("a", "1"), - NDq::TJoinColumn("b", "1") - }); + TVector leftKeys = {NDq::TJoinColumn("a", "1")}; + TVector rightKeys ={NDq::TJoinColumn("b", "1")}; + auto op1 = std::make_shared( std::static_pointer_cast(rel1), std::static_pointer_cast(rel2), - joinConditions, + leftKeys, + rightKeys, InnerJoin, EJoinAlgoType::GraceJoin, false, false ); - joinConditions.insert({ - NDq::TJoinColumn("a", "1"), - NDq::TJoinColumn("c", "1") - }); + leftKeys.push_back(NDq::TJoinColumn("a", "1")); + rightKeys.push_back(NDq::TJoinColumn("c", "1")); auto op2 = std::make_shared( std::static_pointer_cast(op1), std::static_pointer_cast(rel3), - joinConditions, + leftKeys, + rightKeys, InnerJoin, EJoinAlgoType::GraceJoin, true, diff --git a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h index 32ae0fb96fb6..09b3a676ffc1 100644 --- a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h +++ b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h @@ -84,10 +84,8 @@ class TDPHypSolver { bool leftAny, bool rightAny, bool isCommutative, - const std::set>& joinConditions, - const std::set>& reversedJoinConditions, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, IProviderContext& ctx, TCardinalityHints::TCardinalityHint* maybeCardHint, TJoinAlgoHints::TJoinAlgoHint* maybeJoinHint @@ -414,17 +412,15 @@ template std::shared_ptr TDPHypS bool leftAny, bool rightAny, bool isCommutative, - const std::set>& joinConditions, - const std::set>& reversedJoinConditions, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, IProviderContext& ctx, TCardinalityHints::TCardinalityHint* maybeCardHint, TJoinAlgoHints::TJoinAlgoHint* maybeJoinAlgoHint ) { if (maybeJoinAlgoHint) { maybeJoinAlgoHint->Applied = true; - return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, maybeJoinAlgoHint->Algo, leftAny, rightAny, ctx, maybeCardHint); + return MakeJoinInternal(left, right, leftJoinKeys, rightJoinKeys, joinKind, maybeJoinAlgoHint->Algo, leftAny, rightAny, ctx, maybeCardHint); } double bestCost = std::numeric_limits::infinity(); @@ -432,7 +428,7 @@ template std::shared_ptr TDPHypS bool bestJoinIsReversed = false; for (auto joinAlgo : AllJoinAlgos) { - if (ctx.IsJoinApplicable(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind)){ + if (ctx.IsJoinApplicable(left, right, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind)){ auto cost = ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind, maybeCardHint).Cost; if (cost < bestCost) { bestCost = cost; @@ -442,7 +438,7 @@ template std::shared_ptr TDPHypS } if (isCommutative) { - if (ctx.IsJoinApplicable(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinAlgo, joinKind)){ + if (ctx.IsJoinApplicable(right, left, rightJoinKeys, leftJoinKeys, joinAlgo, joinKind)){ auto cost = ctx.ComputeJoinStats(*right->Stats, *left->Stats, rightJoinKeys, leftJoinKeys, joinAlgo, joinKind, maybeCardHint).Cost; if (cost < bestCost) { bestCost = cost; @@ -456,10 +452,10 @@ template std::shared_ptr TDPHypS Y_ENSURE(bestAlgo != EJoinAlgoType::Undefined, "No join was chosen!"); if (bestJoinIsReversed) { - return MakeJoinInternal(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, rightAny, leftAny, ctx, maybeCardHint); + return MakeJoinInternal(right, left, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, rightAny, leftAny, ctx, maybeCardHint); } - return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, leftAny, rightAny, ctx, maybeCardHint); + return MakeJoinInternal(left, right, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, leftAny, rightAny, ctx, maybeCardHint); } /* @@ -493,8 +489,6 @@ template void TDPHypSolver::EmitCsgCmp(const TNodeS csgCmpEdge->LeftAny, csgCmpEdge->RightAny, csgCmpEdge->IsCommutative, - csgCmpEdge->JoinConditions, - reversedEdge->JoinConditions, csgCmpEdge->LeftJoinKeys, csgCmpEdge->RightJoinKeys, Pctx_, diff --git a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp index 02e43b3bbbe3..6a80394d6625 100644 --- a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp @@ -24,11 +24,11 @@ std::shared_ptr CreateChain(size_t size, TString onAttribute auto ei = std::make_shared(eiStr, std::make_shared()); ei->Stats->Labels = std::make_shared>(TVector{eiStr}); - std::set> joinConditions; - joinConditions.insert({TJoinColumn(eiPrevStr, onAttribute), TJoinColumn(eiStr, onAttribute)}); + TVector leftKeys = {TJoinColumn(eiPrevStr, onAttribute)}; + TVector rightKeys = {TJoinColumn(eiStr, onAttribute)}; root = std::make_shared( - root, ei, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false + root, ei, leftKeys, rightKeys, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false ); } @@ -105,23 +105,26 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { auto lhs = CreateChain(3, "228", "a"); auto rhs = CreateChain(2, "1337", "b"); - std::set> joinConditions; - joinConditions.insert({TJoinColumn("a3", "1337"), TJoinColumn("b1", "1337")}); + TVector leftKeys = {TJoinColumn("a3", "1337")}; + TVector rightKeys = {TJoinColumn("b1", "1337")}; // a1 --228-- a2 --228-- a3 --1337-- b1 --1337-- b2 auto root = std::make_shared( - lhs, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false + lhs, rhs, leftKeys, rightKeys, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false ); - joinConditions.clear(); + leftKeys.clear(); + rightKeys.clear(); + + leftKeys.push_back(TJoinColumn("c2", "123")); + rightKeys.push_back(TJoinColumn("b2", "123")); - joinConditions.insert({TJoinColumn("c2", "123"), TJoinColumn("b2", "123")}); rhs = CreateChain(2, "228", "c"); // a1 --228-- a2 --228-- a3 --1337-- b1 --1337-- b2 --123-- c1 --228-- c2 // ^ we don't want to have transitive closure between c and a root = std::make_shared( - root, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false + root, rhs, leftKeys, rightKeys, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false ); auto graph = MakeJoinHypergraph(root); @@ -184,7 +187,8 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { TJoinOptimizerNode( GetJoinArg(lhsArg), GetJoinArg(rhsArg), - {{TJoinColumn(lhsCond.c_str(), col), TJoinColumn(rhsCond.c_str(), col)}}, + {TJoinColumn(lhsCond.c_str(), col)}, + {TJoinColumn(rhsCond.c_str(), col)}, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index 7e4182884ef5..0e3581be8bc6 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -83,7 +83,8 @@ std::shared_ptr ConvertToJoinTree( right = *it; } - std::set> joinConds; + TVector leftKeys; + TVector rightKeys; size_t joinKeysCount = joinTuple.LeftKeys().Size() / 2; for (size_t i = 0; i < joinKeysCount; ++i) { @@ -91,15 +92,15 @@ std::shared_ptr ConvertToJoinTree( auto leftScope = joinTuple.LeftKeys().Item(keyIndex).StringValue(); auto leftColumn = joinTuple.LeftKeys().Item(keyIndex + 1).StringValue(); + leftKeys.push_back(TJoinColumn(leftScope, leftColumn)); + auto rightScope = joinTuple.RightKeys().Item(keyIndex).StringValue(); auto rightColumn = joinTuple.RightKeys().Item(keyIndex + 1).StringValue(); - - joinConds.insert( std::make_pair( TJoinColumn(leftScope, leftColumn), - TJoinColumn(rightScope, rightColumn))); + rightKeys.push_back(TJoinColumn(rightScope, rightColumn)); } const auto linkSettings = GetEquiJoinLinkSettings(joinTuple.Options().Ref()); - return std::make_shared(left, right, joinConds, ConvertToJoinKind(joinTuple.Type().StringValue()), EJoinAlgoType::Undefined, + return std::make_shared(left, right, leftKeys, rightKeys, ConvertToJoinKind(joinTuple.Type().StringValue()), EJoinAlgoType::Undefined, linkSettings.LeftHints.contains("any"), linkSettings.RightHints.contains("any")); } @@ -139,11 +140,13 @@ TExprBase BuildTree(TExprContext& ctx, const TCoEquiJoin& equiJoin, TVector rightJoinColumns; // Build join conditions - for( auto pair : reorderResult->JoinConditions) { - leftJoinColumns.push_back(BuildAtom(pair.first.RelName, equiJoin.Pos(), ctx)); - leftJoinColumns.push_back(BuildAtom(pair.first.AttributeName, equiJoin.Pos(), ctx)); - rightJoinColumns.push_back(BuildAtom(pair.second.RelName, equiJoin.Pos(), ctx)); - rightJoinColumns.push_back(BuildAtom(pair.second.AttributeName, equiJoin.Pos(), ctx)); + for( auto leftKey : reorderResult->LeftJoinKeys) { + leftJoinColumns.push_back(BuildAtom(leftKey.RelName, equiJoin.Pos(), ctx)); + leftJoinColumns.push_back(BuildAtom(leftKey.AttributeNameWithAliases, equiJoin.Pos(), ctx)); + } + for( auto rightKey : reorderResult->RightJoinKeys) { + rightJoinColumns.push_back(BuildAtom(rightKey.RelName, equiJoin.Pos(), ctx)); + rightJoinColumns.push_back(BuildAtom(rightKey.AttributeNameWithAliases, equiJoin.Pos(), ctx)); } TExprNode::TListType options(1U, diff --git a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h index c6f5be64fb2e..6a73c7149a65 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h +++ b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h @@ -31,7 +31,8 @@ class TJoinHypergraph { bool leftAny, bool rightAny, bool isCommutative, - const std::set>& joinConditions + TVector& leftJoinKeys, + TVector& rightJoinKeys ) : Left(left) , Right(right) @@ -39,14 +40,24 @@ class TJoinHypergraph { , LeftAny(leftAny) , RightAny(rightAny) , IsCommutative(isCommutative) - , JoinConditions(joinConditions) + , LeftJoinKeys(leftJoinKeys) + , RightJoinKeys(rightJoinKeys) , IsReversed(false) { - BuildCondVectors(); + RemoveAttributeAliases(); } bool AreCondVectorEqual() const { - return LeftJoinKeys == RightJoinKeys; + TVector leftAttrNames; + TVector rightAttrNames; + for (auto & l : LeftJoinKeys) { + leftAttrNames.push_back(l.AttributeName); + } + for (auto & r : RightJoinKeys) { + rightAttrNames.push_back(r.AttributeName); + } + + return leftAttrNames == rightAttrNames; } inline bool IsSimple() const { @@ -58,32 +69,25 @@ class TJoinHypergraph { EJoinKind JoinKind; bool LeftAny, RightAny; bool IsCommutative; - std::set> JoinConditions; - TVector LeftJoinKeys; - TVector RightJoinKeys; + TVector LeftJoinKeys; + TVector RightJoinKeys; // JoinKind may not be commutative, so we need to know which edge is original and which is reversed. bool IsReversed; int64_t ReversedEdgeId = -1; - void BuildCondVectors() { - LeftJoinKeys.clear(); - RightJoinKeys.clear(); + void RemoveAttributeAliases() { - for (const auto& [left, right] : JoinConditions) { - auto leftKey = left.AttributeName; - auto rightKey = right.AttributeName; - - if (auto idx = leftKey.find_last_of('.'); idx != TString::npos) { - leftKey = leftKey.substr(idx+1); + for (auto& leftKey : LeftJoinKeys ) { + if (auto idx = leftKey.AttributeName.find_last_of('.'); idx != TString::npos) { + leftKey.AttributeName = leftKey.AttributeName.substr(idx+1); } + } - if (auto idx = rightKey.find_last_of('.'); idx != TString::npos) { - rightKey = rightKey.substr(idx+1); + for (auto& rightKey : RightJoinKeys ) { + if (auto idx = rightKey.AttributeName.find_last_of('.'); idx != TString::npos) { + rightKey.AttributeName = rightKey.AttributeName.substr(idx+1); } - - LeftJoinKeys.emplace_back(leftKey); - RightJoinKeys.emplace_back(rightKey); } } }; @@ -133,10 +137,30 @@ class TJoinHypergraph { }; for (const auto& edge: Edges_) { + TString leftKeyStr; + TString rightKeyStr; + + for (auto& l: edge.LeftJoinKeys) { + leftKeyStr.append(l.RelName); + leftKeyStr.append("."); + leftKeyStr.append(l.AttributeName); + leftKeyStr.append(","); + } + + for (auto& r: edge.RightJoinKeys) { + rightKeyStr.append(r.RelName); + rightKeyStr.append("."); + rightKeyStr.append(r.AttributeName); + rightKeyStr.append(","); + } res .append(edgeSideToString(edge.Left)) .append(" -> ") .append(edgeSideToString(edge.Right)) + .append(" on ") + .append(leftKeyStr) + .append("==") + .append(rightKeyStr) .append("\n"); } @@ -164,17 +188,12 @@ class TJoinHypergraph { AddEdgeImpl(edge); - std::set> reversedJoinConditions; - for (const auto& [lhs, rhs]: edge.JoinConditions) { - reversedJoinConditions.insert({rhs, lhs}); - } - TEdge reversedEdge = std::move(edge); std::swap(reversedEdge.Left, reversedEdge.Right); - reversedEdge.JoinConditions = std::move(reversedJoinConditions); + std::swap(reversedEdge.LeftJoinKeys, reversedEdge.RightJoinKeys); reversedEdge.IsReversed = true; reversedEdge.ReversedEdgeId = edgeId; - reversedEdge.BuildCondVectors(); + reversedEdge.RemoveAttributeAliases(); AddEdgeImpl(reversedEdge); } @@ -404,8 +423,15 @@ class TTransitiveClosureConstructor { edges.begin(), edges.end(), [](const THyperedge& lhs, const THyperedge& rhs) { - auto lhsAttributeNames = lhs.LeftJoinKeys; - auto rhsAttributeNames = rhs.LeftJoinKeys; + TVector lhsAttributeNames; + TVector rhsAttributeNames; + + for (auto & l : lhs.LeftJoinKeys ) { + lhsAttributeNames.push_back(l.AttributeName); + } + for (auto & r : rhs.LeftJoinKeys ) { + rhsAttributeNames.push_back(r.AttributeName); + } std::sort(lhsAttributeNames.begin(), lhsAttributeNames.end()); std::sort(rhsAttributeNames.begin(), rhsAttributeNames.end()); @@ -439,9 +465,12 @@ class TTransitiveClosureConstructor { bool isJoinCommutative = edges[groupBegin].IsCommutative; TVector groupConditionUsedAttributes; - for (const auto& [lhs, rhs]: edges[groupBegin].JoinConditions) { + for (const auto& lhs: edges[groupBegin].LeftJoinKeys) { groupConditionUsedAttributes.push_back(lhs.AttributeName); } + for (const auto& rhs: edges[groupBegin].RightJoinKeys) { + groupConditionUsedAttributes.push_back(rhs.AttributeName); + } TDisjointSets connectedComponents(nodeSetSize); for (size_t edgeId = groupBegin; edgeId < groupEnd; ++edgeId) { @@ -464,15 +493,15 @@ class TTransitiveClosureConstructor { TString lhsRelName = nodes[i].RelationOptimizerNode->Labels()[0]; TString rhsRelName = nodes[j].RelationOptimizerNode->Labels()[0]; - std::set> joinConditions; + TVector leftKeys; + TVector rightKeys; + for (const auto& attributeName: groupConditionUsedAttributes){ - joinConditions.insert({ - TJoinColumn(lhsRelName, attributeName), - TJoinColumn(rhsRelName, attributeName) - }); + leftKeys.push_back(TJoinColumn(lhsRelName, attributeName)); + rightKeys.push_back(TJoinColumn(rhsRelName, attributeName)); } - auto e = THyperedge(lhs, rhs, groupJoinKind, false, false, isJoinCommutative, joinConditions); + auto e = THyperedge(lhs, rhs, groupJoinKind, false, false, isJoinCommutative, leftKeys, rightKeys); Graph_.AddEdge(std::move(e)); } } @@ -480,8 +509,16 @@ class TTransitiveClosureConstructor { } bool HasOneGroup(const THyperedge& lhs, const THyperedge& rhs) { - auto lhsAttributeNames = lhs.LeftJoinKeys; - auto rhsAttributeNames = rhs.LeftJoinKeys; + TVector lhsAttributeNames; + TVector rhsAttributeNames; + + for (auto & l : lhs.LeftJoinKeys) { + lhsAttributeNames.push_back(l.AttributeName); + } + + for (auto & r : rhs.LeftJoinKeys) { + rhsAttributeNames.push_back(r.AttributeName); + } std::sort(lhsAttributeNames.begin(), lhsAttributeNames.end()); std::sort(rhsAttributeNames.begin(), rhsAttributeNames.end()); diff --git a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp index 5b13ee7cbd69..d54b793009b3 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp @@ -5,9 +5,8 @@ namespace NYql::NDq { std::shared_ptr MakeJoinInternal( std::shared_ptr left, std::shared_ptr right, - const std::set>& joinConditions, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, EJoinKind joinKind, EJoinAlgoType joinAlgo, bool leftAny, @@ -15,7 +14,7 @@ std::shared_ptr MakeJoinInternal( IProviderContext& ctx, TCardinalityHints::TCardinalityHint* maybeHint) { - auto res = std::make_shared(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, joinAlgo, leftAny, rightAny); + auto res = std::make_shared(left, right, leftJoinKeys, rightJoinKeys, joinKind, joinAlgo, leftAny, rightAny); res->Stats = std::make_shared(ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind, maybeHint)); return res; } @@ -39,7 +38,7 @@ std::shared_ptr ConvertFromInternal(const std::shared_ptr(left, right, join->JoinConditions, join->JoinType, join->JoinAlgo, join->LeftAny, join->RightAny); + auto newJoin = std::make_shared(left, right, join->LeftJoinKeys, join->RightJoinKeys, join->JoinType, join->JoinAlgo, join->LeftAny, join->RightAny); newJoin->Stats = join->Stats; return newJoin; } diff --git a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h index 9e626bc356bc..f8e50f3b3364 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h +++ b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h @@ -18,9 +18,8 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode { TJoinOptimizerNodeInternal( const std::shared_ptr& left, const std::shared_ptr& right, - const std::set>& joinConditions, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, const EJoinKind joinType, const EJoinAlgoType joinAlgo, const bool leftAny, @@ -29,7 +28,6 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode { : IBaseOptimizerNode(JoinNodeType) , LeftArg(left) , RightArg(right) - , JoinConditions(joinConditions) , LeftJoinKeys(leftJoinKeys) , RightJoinKeys(rightJoinKeys) , JoinType(joinType) @@ -51,9 +49,8 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode { std::shared_ptr LeftArg; std::shared_ptr RightArg; - const std::set>& JoinConditions; - const TVector& LeftJoinKeys; - const TVector& RightJoinKeys; + const TVector& LeftJoinKeys; + const TVector& RightJoinKeys; EJoinKind JoinType; EJoinAlgoType JoinAlgo; const bool LeftAny; @@ -66,9 +63,8 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode { std::shared_ptr MakeJoinInternal( std::shared_ptr left, std::shared_ptr right, - const std::set>& joinConditions, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, EJoinKind joinKind, EJoinAlgoType joinAlgo, bool leftAny, diff --git a/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h index 4e347ab59737..9d3443621e40 100644 --- a/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h +++ b/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h @@ -21,11 +21,13 @@ namespace NYql::NDq { inline TVector GetConditionUsedRelationNames(const std::shared_ptr& joinNode) { TVector res; - res.reserve(joinNode->JoinConditions.size()); + res.reserve(joinNode->LeftJoinKeys.size()); - for (const auto& [lhsTable, rhsTable]: joinNode->JoinConditions) { - res.push_back(lhsTable.RelName); - res.push_back(rhsTable.RelName); + for (const auto& lhs : joinNode->LeftJoinKeys ) { + res.push_back(lhs.RelName); + } + for (const auto& rhs : joinNode->RightJoinKeys ) { + res.push_back(rhs.RelName); } return res; @@ -57,7 +59,7 @@ typename TJoinHypergraph::TEdge MakeHyperedge( TNodeSet right = TES & subtreeNodes[joinNode->RightArg]; bool isCommutative = OperatorIsCommutative(joinNode->JoinType) && (joinNode->IsReorderable); - return typename TJoinHypergraph::TEdge(left, right, joinNode->JoinType, joinNode->LeftAny, joinNode->RightAny, isCommutative, joinNode->JoinConditions); + return typename TJoinHypergraph::TEdge(left, right, joinNode->JoinType, joinNode->LeftAny, joinNode->RightAny, isCommutative, joinNode->LeftJoinKeys, joinNode->RightJoinKeys); } template diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp index 149f72f79b66..a4b2c1299c37 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp @@ -30,6 +30,17 @@ namespace { return attributeName; } + TString ExtractAlias(TString attributeName) { + if (auto idx = attributeName.find_last_of('.'); idx != TString::npos) { + auto substr = attributeName.substr(0, idx); + if (auto idx2 = substr.find_last_of('.'); idx != TString::npos) { + substr = substr.substr(idx2+1); + } + return substr; + } + return TString(); + } + TVector InferLabels(std::shared_ptr& stats, TCoAtomList joinColumns) { if(stats->Labels) { return *stats->Labels; @@ -261,14 +272,18 @@ void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationCont leftStats = ApplyCardinalityHints(leftStats, leftLabels, hints); rightStats = ApplyCardinalityHints(rightStats, rightLabels, hints); - TVector leftJoinKeys; - TVector rightJoinKeys; + TVector leftJoinKeys; + TVector rightJoinKeys; for (size_t i=0; i leftJoinKeys; - TVector rightJoinKeys; + TVector leftJoinKeys; + TVector rightJoinKeys; for (size_t i=0; i& left, const std::shared_ptr& right, - const std::set>& joinConditions, - const TVector& leftJoinKeys, const TVector& rightJoinKeys, + const TVector& leftJoinKeys, const TVector& rightJoinKeys, NYql::EJoinAlgoType joinAlgo, NYql::EJoinKind joinKind) override; virtual double ComputeJoinCost(const NYql::TOptimizerStatistics& leftStats, const NYql::TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, NYql::EJoinAlgoType joinAlgo) const override; @@ -62,12 +61,10 @@ struct TDqCBOProviderContext : public NYql::TBaseProviderContext { bool TDqCBOProviderContext::IsJoinApplicable(const std::shared_ptr& left, const std::shared_ptr& right, - const std::set>& joinConditions, - const TVector& leftJoinKeys, const TVector& rightJoinKeys, + const TVector& leftJoinKeys, const TVector& rightJoinKeys, NYql::EJoinAlgoType joinAlgo, NYql::EJoinKind joinKind) { Y_UNUSED(left); Y_UNUSED(right); - Y_UNUSED(joinConditions); Y_UNUSED(leftJoinKeys); Y_UNUSED(rightJoinKeys); diff --git a/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp b/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp index 26297dca57d3..3ffda125c06c 100644 --- a/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp +++ b/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp @@ -77,10 +77,11 @@ Y_UNIT_TEST(NonReordable) { auto left = std::make_shared("a", stat); auto right = std::make_shared("a", stat); - std::set> joinConditions; - joinConditions.insert({NDq::TJoinColumn{"a", "b"}, NDq::TJoinColumn{"a","c"}}); + TVector leftKeys = {NDq::TJoinColumn{"a", "b"}}; + TVector rightKeys = {NDq::TJoinColumn{"a","c"}}; + auto root = std::make_shared( - left, right, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::GraceJoin, false, false, true); + left, right, leftKeys, rightKeys, EJoinKind::InnerJoin, EJoinAlgoType::GraceJoin, false, false, true); TBaseProviderContext optCtx; std::unique_ptr opt = std::unique_ptr(NDq::MakeNativeOptimizerNew(optCtx, 1024)); auto result = opt->JoinSearch(root); diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp index 620963d41ca6..cb4da0b600ea 100644 --- a/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp +++ b/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp @@ -155,11 +155,12 @@ class TYtJoinOptimizerNode: public TJoinOptimizerNode { public: TYtJoinOptimizerNode(const std::shared_ptr& left, const std::shared_ptr& right, - const std::set>& joinConditions, + const TVector& leftKeys, + const TVector& rightKeys, const EJoinKind joinType, const EJoinAlgoType joinAlgo, TYtJoinNodeOp* originalOp) - : TJoinOptimizerNode(left, right, joinConditions, joinType, joinAlgo, + : TJoinOptimizerNode(left, right, leftKeys, rightKeys, joinType, joinAlgo, originalOp ? originalOp->LinkSettings.LeftHints.contains("any") : false, originalOp ? originalOp->LinkSettings.RightHints.contains("any") : false, originalOp != nullptr) @@ -201,7 +202,8 @@ class TOptimizerTreeBuilder auto left = ProcessNode(op->Left); auto right = ProcessNode(op->Right); YQL_ENSURE(op->LeftLabel->ChildrenSize() == op->RightLabel->ChildrenSize()); - std::set> joinConditions; + TVector leftKeys; + TVector rightKeys; for (ui32 i = 0; i < op->LeftLabel->ChildrenSize(); i += 2) { auto ltable = op->LeftLabel->Child(i)->Content(); auto lcolumn = op->LeftLabel->Child(i + 1)->Content(); @@ -209,14 +211,15 @@ class TOptimizerTreeBuilder auto rcolumn = op->RightLabel->Child(i + 1)->Content(); NDq::TJoinColumn lcol{TString(ltable), TString(lcolumn)}; NDq::TJoinColumn rcol{TString(rtable), TString(rcolumn)}; - joinConditions.insert({lcol, rcol}); + leftKeys.push_back(lcol); + rightKeys.push_back(rcol); } bool nonReorderable = op->LinkSettings.ForceSortedMerge; Ctx->HasForceSortedMerge = Ctx->HasForceSortedMerge || op->LinkSettings.ForceSortedMerge; Ctx->HasHints = Ctx->HasHints || !op->LinkSettings.LeftHints.empty() || !op->LinkSettings.RightHints.empty(); return std::make_shared( - left, right, joinConditions, joinKind, EJoinAlgoType::GraceJoin, nonReorderable ? op : nullptr + left, right, leftKeys, rightKeys, joinKind, EJoinAlgoType::GraceJoin, nonReorderable ? op : nullptr ); } @@ -278,12 +281,13 @@ TYtJoinNode::TPtr BuildYtJoinTree(std::shared_ptr node, TVec ret = MakeIntrusive(); ret->JoinKind = ctx.NewAtom(pos, ConvertToJoinString(op->JoinType)); TVector leftLabel, rightLabel; - leftLabel.reserve(op->JoinConditions.size() * 2); - rightLabel.reserve(op->JoinConditions.size() * 2); - for (auto& [left, right] : op->JoinConditions) { + leftLabel.reserve(op->LeftJoinKeys.size() * 2); + rightLabel.reserve(op->RightJoinKeys.size() * 2); + for (auto& left : op->LeftJoinKeys) { leftLabel.emplace_back(ctx.NewAtom(pos, left.RelName)); leftLabel.emplace_back(ctx.NewAtom(pos, left.AttributeName)); - + } + for (auto& right : op->RightJoinKeys) { rightLabel.emplace_back(ctx.NewAtom(pos, right.RelName)); rightLabel.emplace_back(ctx.NewAtom(pos, right.AttributeName)); } diff --git a/ydb/library/yql/sql/pg/optimizer.cpp b/ydb/library/yql/sql/pg/optimizer.cpp index a60464d168a8..47e46d53a3fa 100644 --- a/ydb/library/yql/sql/pg/optimizer.cpp +++ b/ydb/library/yql/sql/pg/optimizer.cpp @@ -491,11 +491,11 @@ struct TPgOptimizerImpl std::vector>& rightVars, const std::shared_ptr& op) { - for (auto& [l, r]: op->JoinConditions) { - auto& ltable = l.RelName; - auto& lcol = l.AttributeName; - auto& rtable = r.RelName; - auto& rcol = r.AttributeName; + for (size_t i=0; iLeftJoinKeys.size(); i++ ) { + auto& ltable = op->LeftJoinKeys[i].RelName; + auto& lcol = op->LeftJoinKeys[i].AttributeName; + auto& rtable = op->RightJoinKeys[i].RelName; + auto& rcol = op->RightJoinKeys[i].AttributeName; const auto& lrelIds = Table2RelIds[ltable]; YQL_ENSURE(!lrelIds.empty()); @@ -562,7 +562,7 @@ struct TPgOptimizerImpl MakeEqClasses(EqClasses, leftVars, rightVars); } else if (op->JoinType == LeftJoin || op->JoinType == RightJoin) { - CHECK(op->JoinConditions.size() == 1, "Only 1 var per join supported"); + CHECK(op->LeftJoinKeys.size() == 1 && op->RightJoinKeys.size() == 1, "Only 1 var per join supported"); std::vector> leftVars, rightVars; ExtractVars(leftVars, rightVars, op); @@ -637,22 +637,23 @@ struct TPgOptimizerImpl YQL_ENSURE(node->LeftVars.size() == node->RightVars.size()); - std::set> joinConditions; + TVector leftJoinKeys; + TVector rightJoinKeys; + for (size_t i = 0; i < node->LeftVars.size(); i++) { auto [lrelId, lvarId] = node->LeftVars[i]; auto [rrelId, rvarId] = node->RightVars[i]; auto [ltable, lcolumn] = Var2TableCol[lrelId - 1][lvarId - 1]; auto [rtable, rcolumn] = Var2TableCol[rrelId - 1][rvarId - 1]; - joinConditions.insert({ - NDq::TJoinColumn{TString(ltable), TString(lcolumn)}, - NDq::TJoinColumn{TString(rtable), TString(rcolumn)} - }); + leftJoinKeys.push_back(NDq::TJoinColumn(TString(ltable), TString(lcolumn))); + rightJoinKeys.push_back(NDq::TJoinColumn(TString(rtable), TString(rcolumn))); } return std::make_shared( left, right, - joinConditions, + leftJoinKeys, + rightJoinKeys, joinKind, EJoinAlgoType::MapJoin, false, diff --git a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json index da50af5c1f5b..abc4f5033453 100644 --- a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json @@ -762,9 +762,9 @@ ], "test.test[dq-join_cbo_native_3_tables--Debug]": [ { - "checksum": "91570a2f667516ba1f3f28642698441f", - "size": 4802, - "uri": "https://{canondata_backend}/1942278/d3f67196e7e0096e289743f5dbfd5dc2f990f9e6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched" + "checksum": "bc4f0d3c80bc05fdb553d9d07ed58fd2", + "size": 4846, + "uri": "https://{canondata_backend}/1597364/aa2251cc1cffd9f5ef1d8d1793ee54509ab8cdfc/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched" } ], "test.test[dq-join_cbo_native_3_tables--Plan]": [ From 4d1257e64141c9bc60787779f0634e4cee714499 Mon Sep 17 00:00:00 2001 From: pilik Date: Wed, 16 Oct 2024 17:57:03 +0300 Subject: [PATCH 3/7] [CBO] Improve Transitive Closure + Cycle processing added (#10121) --- ydb/core/kqp/ut/common/kqp_ut_common.cpp | 18 + ydb/core/kqp/ut/common/kqp_ut_common.h | 2 + .../ut/join/data/join_order/lookupbug.json | 1 + .../kqp/ut/join/data/join_order/tpcc.json | 1 + .../join/data/join_order/tpcds64_1000s.json | 281 +++++++-------- .../tpcds64_1000s_column_store.json | 281 +++++++-------- .../data/join_order/tpcds64_small_1000s.json | 1 + .../tpcds64_small_1000s_column_store.json | 1 + .../join/data/join_order/tpcds78_1000s.json | 1 + .../tpcds78_1000s_column_store.json | 1 + .../ut/join/data/join_order/tpch2_1000s.json | 39 ++- .../join_order/tpch2_1000s_column_store.json | 41 +-- .../ut/join/data/join_order/tpch9_1000s.json | 39 ++- .../join_order/tpch9_1000s_column_store.json | 39 ++- ydb/core/kqp/ut/join/kqp_join_order_ut.cpp | 22 +- ydb/library/yql/core/yql_cost_function.h | 10 +- ydb/library/yql/dq/opt/dq_cbo_ut.cpp | 2 +- .../dq/opt/dq_opt_conflict_rules_collector.h | 2 +- ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h | 21 +- .../yql/dq/opt/dq_opt_hypergraph_ut.cpp | 325 +++++++++++++++--- .../yql/dq/opt/dq_opt_join_cost_based.cpp | 33 +- .../yql/dq/opt/dq_opt_join_hypergraph.h | 276 +++++++-------- .../yql/dq/opt/dq_opt_make_join_hypergraph.h | 69 +++- .../sql/dq_file/part17/canondata/result.json | 6 +- 24 files changed, 929 insertions(+), 583 deletions(-) diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.cpp b/ydb/core/kqp/ut/common/kqp_ut_common.cpp index 98f95d79dd20..fd2ee35ffc47 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.cpp +++ b/ydb/core/kqp/ut/common/kqp_ut_common.cpp @@ -1463,6 +1463,24 @@ NJson::TJsonValue GetJoinOrder(const TString& deserializedPlan) { return GetJoinOrderImpl(optRoot); } +NJson::TJsonValue GetJoinOrderFromDetailedJoinOrderImpl(const NJson::TJsonValue& opt) { + if (!opt.GetMapSafe().contains("table")) { + NJson::TJsonValue res; + auto args = opt.GetMapSafe().at("args").GetArraySafe(); + for (size_t i = 0; i < args.size(); ++i) { + res.AppendValue(GetJoinOrderFromDetailedJoinOrderImpl(args[i])); + } + return res; + } + + return opt.GetMapSafe().at("table"); +} + +NJson::TJsonValue GetJoinOrderFromDetailedJoinOrder(const TString& deserializedDetailedJoinOrder) { + NJson::TJsonValue optRoot; + NJson::ReadJsonTree(deserializedDetailedJoinOrder, &optRoot, true); + return GetJoinOrderFromDetailedJoinOrderImpl(optRoot); +} } // namspace NKqp } // namespace NKikimr diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.h b/ydb/core/kqp/ut/common/kqp_ut_common.h index b91d5a195e5d..1070cd253ec3 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.h +++ b/ydb/core/kqp/ut/common/kqp_ut_common.h @@ -350,5 +350,7 @@ NJson::TJsonValue GetDetailedJoinOrder(const TString& deserializedPlan, const TG /* Gets tables join order without details : only tables. */ NJson::TJsonValue GetJoinOrder(const TString& deserializedPlan); +NJson::TJsonValue GetJoinOrderFromDetailedJoinOrder(const TString& deserializedDetailedJoinOrder); + } // namespace NKqp } // namespace NKikimr diff --git a/ydb/core/kqp/ut/join/data/join_order/lookupbug.json b/ydb/core/kqp/ut/join/data/join_order/lookupbug.json index b57351147ecf..54a446f6c1db 100644 --- a/ydb/core/kqp/ut/join/data/join_order/lookupbug.json +++ b/ydb/core/kqp/ut/join/data/join_order/lookupbug.json @@ -60,3 +60,4 @@ } ] } + diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcc.json b/ydb/core/kqp/ut/join/data/join_order/tpcc.json index 54ab7ad3c989..0ec6395ac475 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcc.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcc.json @@ -24,3 +24,4 @@ } ] } + diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json index 7f6a30060565..cbc1461ddba9 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json @@ -6,6 +6,30 @@ "op_name":"InnerJoin (Grace)", "args": [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_returns" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_returns" + } + ] + }, { "op_name":"InnerJoin (MapJoin)", "args": @@ -23,7 +47,7 @@ "args": [ { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { @@ -43,34 +67,64 @@ "args": [ { - "op_name":"TableFullScan", - "table":"test\/ds\/store_sales" - }, - { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { - "op_name":"TableFullScan", - "table":"test\/ds\/catalog_sales" + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer" + }, + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableLookup", + "table":"test\/ds\/store_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + } + ] + } + ] }, { "op_name":"TableFullScan", - "table":"test\/ds\/catalog_returns" + "table":"test\/ds\/date_dim" } ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" } ] }, { "op_name":"TableFullScan", - "table":"test\/ds\/item" + "table":"test\/ds\/store" } ] }, { "op_name":"TableFullScan", - "table":"test\/ds\/store_returns" + "table":"test\/ds\/customer_demographics" } ] }, @@ -82,7 +136,7 @@ }, { "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" + "table":"test\/ds\/date_dim" } ] }, @@ -94,77 +148,23 @@ }, { "op_name":"TableFullScan", - "table":"test\/ds\/store" + "table":"test\/ds\/promotion" } ] }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/promotion" - } - ] - }, - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/household_demographics" - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/income_band" - } - ] - } - ] - }, - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ { "op_name":"InnerJoin (MapJoin)", "args": [ { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer" - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" - } - ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_address" - } - ] + "op_name":"TableFullScan", + "table":"test\/ds\/household_demographics" }, { "op_name":"TableFullScan", - "table":"test\/ds\/date_dim" + "table":"test\/ds\/income_band" } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/date_dim" } ] }, @@ -190,6 +190,30 @@ "op_name":"InnerJoin (Grace)", "args": [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_returns" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_returns" + } + ] + }, { "op_name":"InnerJoin (MapJoin)", "args": @@ -207,7 +231,7 @@ "args": [ { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { @@ -227,34 +251,64 @@ "args": [ { - "op_name":"TableFullScan", - "table":"test\/ds\/store_sales" - }, - { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { - "op_name":"TableFullScan", - "table":"test\/ds\/catalog_sales" + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer" + }, + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableLookup", + "table":"test\/ds\/store_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + } + ] + } + ] }, { "op_name":"TableFullScan", - "table":"test\/ds\/catalog_returns" + "table":"test\/ds\/date_dim" } ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" } ] }, { "op_name":"TableFullScan", - "table":"test\/ds\/item" + "table":"test\/ds\/store" } ] }, { "op_name":"TableFullScan", - "table":"test\/ds\/store_returns" + "table":"test\/ds\/customer_demographics" } ] }, @@ -266,7 +320,7 @@ }, { "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" + "table":"test\/ds\/date_dim" } ] }, @@ -278,77 +332,23 @@ }, { "op_name":"TableFullScan", - "table":"test\/ds\/store" + "table":"test\/ds\/promotion" } ] }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/promotion" - } - ] - }, - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/household_demographics" - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/income_band" - } - ] - } - ] - }, - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ { "op_name":"InnerJoin (MapJoin)", "args": [ { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer" - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" - } - ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_address" - } - ] + "op_name":"TableFullScan", + "table":"test\/ds\/household_demographics" }, { "op_name":"TableFullScan", - "table":"test\/ds\/date_dim" + "table":"test\/ds\/income_band" } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/date_dim" } ] }, @@ -372,3 +372,4 @@ } ] } + diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json index 7f6a30060565..ad5442c66b85 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json @@ -6,6 +6,30 @@ "op_name":"InnerJoin (Grace)", "args": [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_returns" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_returns" + } + ] + }, { "op_name":"InnerJoin (MapJoin)", "args": @@ -23,7 +47,7 @@ "args": [ { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { @@ -39,38 +63,68 @@ "args": [ { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/store_sales" - }, { "op_name":"InnerJoin (Grace)", "args": [ { "op_name":"TableFullScan", - "table":"test\/ds\/catalog_sales" + "table":"test\/ds\/customer" }, { - "op_name":"TableFullScan", - "table":"test\/ds\/catalog_returns" + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + } + ] } ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" } ] }, { "op_name":"TableFullScan", - "table":"test\/ds\/item" + "table":"test\/ds\/store" } ] }, { "op_name":"TableFullScan", - "table":"test\/ds\/store_returns" + "table":"test\/ds\/customer_demographics" } ] }, @@ -82,7 +136,7 @@ }, { "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" + "table":"test\/ds\/date_dim" } ] }, @@ -94,77 +148,23 @@ }, { "op_name":"TableFullScan", - "table":"test\/ds\/store" + "table":"test\/ds\/promotion" } ] }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/promotion" - } - ] - }, - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/household_demographics" - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/income_band" - } - ] - } - ] - }, - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ { "op_name":"InnerJoin (MapJoin)", "args": [ { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer" - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" - } - ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_address" - } - ] + "op_name":"TableFullScan", + "table":"test\/ds\/household_demographics" }, { "op_name":"TableFullScan", - "table":"test\/ds\/date_dim" + "table":"test\/ds\/income_band" } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/date_dim" } ] }, @@ -190,6 +190,30 @@ "op_name":"InnerJoin (Grace)", "args": [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_returns" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_returns" + } + ] + }, { "op_name":"InnerJoin (MapJoin)", "args": @@ -207,7 +231,7 @@ "args": [ { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { @@ -223,38 +247,68 @@ "args": [ { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/store_sales" - }, { "op_name":"InnerJoin (Grace)", "args": [ { "op_name":"TableFullScan", - "table":"test\/ds\/catalog_sales" + "table":"test\/ds\/customer" }, { - "op_name":"TableFullScan", - "table":"test\/ds\/catalog_returns" + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + } + ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + } + ] } ] + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" } ] }, { "op_name":"TableFullScan", - "table":"test\/ds\/item" + "table":"test\/ds\/store" } ] }, { "op_name":"TableFullScan", - "table":"test\/ds\/store_returns" + "table":"test\/ds\/customer_demographics" } ] }, @@ -266,7 +320,7 @@ }, { "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" + "table":"test\/ds\/date_dim" } ] }, @@ -278,77 +332,23 @@ }, { "op_name":"TableFullScan", - "table":"test\/ds\/store" + "table":"test\/ds\/promotion" } ] }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/promotion" - } - ] - }, - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/household_demographics" - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/income_band" - } - ] - } - ] - }, - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ { "op_name":"InnerJoin (MapJoin)", "args": [ { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer" - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" - } - ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_address" - } - ] + "op_name":"TableFullScan", + "table":"test\/ds\/household_demographics" }, { "op_name":"TableFullScan", - "table":"test\/ds\/date_dim" + "table":"test\/ds\/income_band" } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/date_dim" } ] }, @@ -372,3 +372,4 @@ } ] } + diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json index 5ce49b9af11a..12594bc92954 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json @@ -52,3 +52,4 @@ } ] } + diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json index 5ce49b9af11a..12594bc92954 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json @@ -52,3 +52,4 @@ } ] } + diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json index 0b7c1e407f39..cf514b8165a0 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json @@ -82,3 +82,4 @@ } ] } + diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json index 0b7c1e407f39..cf514b8165a0 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json @@ -82,3 +82,4 @@ } ] } + diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json index 04ca4d8b671c..c6fe57f61e98 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json @@ -87,10 +87,45 @@ ] }, { - "op_name":"TableFullScan", - "table":"part" + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"partsupp" + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"supplier" + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableLookup", + "table":"region" + }, + { + "op_name":"TableFullScan", + "table":"nation" + } + ] + } + ] + } + ] } ] + }, + { + "op_name":"TableFullScan", + "table":"part" } ] } + diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json index e9b2154c2720..1a0edf0bd330 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json @@ -7,16 +7,12 @@ "args": [ { - "op_name":"TableFullScan", - "table":"partsupp" - }, - { - "op_name":"InnerJoin (MapJoin)", + "op_name":"InnerJoin (Grace)", "args": [ { "op_name":"TableFullScan", - "table":"supplier" + "table":"partsupp" }, { "op_name":"InnerJoin (MapJoin)", @@ -24,22 +20,26 @@ [ { "op_name":"TableFullScan", - "table":"nation" + "table":"supplier" }, { - "op_name":"TableFullScan", - "table":"region" + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"nation" + }, + { + "op_name":"TableFullScan", + "table":"region" + } + ] } ] } ] - } - ] - }, - { - "op_name":"InnerJoin (Grace)", - "args": - [ + }, { "op_name":"InnerJoin (Grace)", "args": @@ -73,12 +73,13 @@ ] } ] - }, - { - "op_name":"TableFullScan", - "table":"part" } ] + }, + { + "op_name":"TableFullScan", + "table":"part" } ] } + diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json index 606c72699f4f..2239d496bf83 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json @@ -2,22 +2,22 @@ "op_name":"InnerJoin (Grace)", "args": [ + { + "op_name":"TableFullScan", + "table":"orders" + }, { "op_name":"InnerJoin (Grace)", "args": [ { "op_name":"TableFullScan", - "table":"orders" + "table":"lineitem" }, { "op_name":"InnerJoin (Grace)", "args": [ - { - "op_name":"TableFullScan", - "table":"lineitem" - }, { "op_name":"InnerJoin (Grace)", "args": @@ -31,24 +31,25 @@ "table":"part" } ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"supplier" + }, + { + "op_name":"TableFullScan", + "table":"nation" + } + ] } ] } ] - }, - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"supplier" - }, - { - "op_name":"TableFullScan", - "table":"nation" - } - ] } ] } + diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json index 606c72699f4f..2239d496bf83 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json @@ -2,22 +2,22 @@ "op_name":"InnerJoin (Grace)", "args": [ + { + "op_name":"TableFullScan", + "table":"orders" + }, { "op_name":"InnerJoin (Grace)", "args": [ { "op_name":"TableFullScan", - "table":"orders" + "table":"lineitem" }, { "op_name":"InnerJoin (Grace)", "args": [ - { - "op_name":"TableFullScan", - "table":"lineitem" - }, { "op_name":"InnerJoin (Grace)", "args": @@ -31,24 +31,25 @@ "table":"part" } ] + }, + { + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"supplier" + }, + { + "op_name":"TableFullScan", + "table":"nation" + } + ] } ] } ] - }, - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"supplier" - }, - { - "op_name":"TableFullScan", - "table":"nation" - } - ] } ] } + diff --git a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp index 706c8f73ccd1..29a95d24f186 100644 --- a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp @@ -55,6 +55,16 @@ void CreateTables(TSession session, const TString& schemaPath, bool useColumnSto res.GetIssues().PrintTo(Cerr); UNIT_ASSERT(res.IsSuccess()); } + +TString GetPrettyJSON(const NJson::TJsonValue& json) { + TStringStream ss; + NJsonWriter::TBuf writer; + writer.SetIndentSpaces(2); + writer.WriteJsonValue(&json); + writer.FlushTo(&ss); ss << Endl; + return ss.Str(); +} + /* * A basic join order test. We define 5 tables sharing the same * key attribute and construct various full clique join queries @@ -552,21 +562,19 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) { correctJoinOrderPath = correctJoinOrderPath.substr(0, correctJoinOrderPath.find(".json")) + "_column_store.json"; } - auto currentJoinOrder = GetDetailedJoinOrder(result.GetPlan()); - Cerr << currentJoinOrder << Endl; + auto currentJoinOrder = GetPrettyJSON(GetDetailedJoinOrder(result.GetPlan())); + /* to canonize the tests use --test-param CANONIZE_JOIN_ORDER_TESTS=TRUE */ TString canonize = GetTestParam("CANONIZE_JOIN_ORDER_TESTS"); canonize.to_lower(); if (canonize.equal("true")) { Cerr << "--------------------CANONIZING THE TESTS--------------------"; TOFStream stream(SRC_("data/" + correctJoinOrderPath)); - NJsonWriter::TBuf writer; - writer.SetIndentSpaces(2); - writer.WriteJsonValue(¤tJoinOrder); - writer.FlushTo(&stream); - stream << Endl; + stream << currentJoinOrder << Endl; } TString ref = GetStatic(correctJoinOrderPath); + Cout << "actual\n" << GetJoinOrder(result.GetPlan()).GetStringRobust() << Endl; + Cout << "expected\n" << GetJoinOrderFromDetailedJoinOrder(ref).GetStringRobust() << Endl; UNIT_ASSERT(JoinOrderAndAlgosMatch(result.GetPlan(), ref)); } } diff --git a/ydb/library/yql/core/yql_cost_function.h b/ydb/library/yql/core/yql_cost_function.h index b69c5941db65..51479f1fb12a 100644 --- a/ydb/library/yql/core/yql_cost_function.h +++ b/ydb/library/yql/core/yql_cost_function.h @@ -36,10 +36,10 @@ namespace NDq { * attribute name, used in join conditions */ struct TJoinColumn { - TString RelName; - TString AttributeName; - TString AttributeNameWithAliases; - ui32 EquivalenceClass = 0; + TString RelName{}; + TString AttributeName{}; + TString AttributeNameWithAliases{}; + std::optional EquivalenceClass{}; bool IsConstant = false; TJoinColumn(TString relName, TString attributeName) : @@ -51,7 +51,7 @@ struct TJoinColumn { return RelName == other.RelName && AttributeName == other.AttributeName; } - struct HashFunction + struct THashFunction { size_t operator()(const TJoinColumn& c) const { diff --git a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp index cd6a8026d2ce..96b666b64ffe 100644 --- a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp +++ b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp @@ -118,7 +118,7 @@ Y_UNIT_TEST(JoinSearch3Rels) { res->Print(ss); Cout << ss.str() << '\n'; - TString expected = R"__(Join: (InnerJoin,MapJoin,LeftAny) a.1=b.1,a.1=c.1, + TString expected = R"__(Join: (InnerJoin,MapJoin,LeftAny) a.1=b.1, Type: ManyManyJoin, Nrows: 4e+13, Ncols: 3, ByteSize: 0, Cost: 4.004e+13, Sel: 1, Storage: NA Join: (InnerJoin,MapJoin) b.1=a.1, Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: 1, Storage: NA diff --git a/ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h b/ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h index 4c4c20a9c71d..5d25d9cdc57b 100644 --- a/ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h +++ b/ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h @@ -68,7 +68,7 @@ class TConflictRulesCollector { ConflictRules_.emplace_back( SubtreeNodes_[child->LeftArg], SubtreeNodes_[child->RightArg] - ); + ); } }; diff --git a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h index 09b3a676ffc1..c54ff327efd4 100644 --- a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h +++ b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h @@ -32,6 +32,9 @@ namespace NYql::NDq { * * This class is templated by std::bitset with the largest number of joins we can process * or std::bitset<64>, which has a more efficient implementation of enumerating subsets of set. + * + * Also, it has a bool ProcessCycles template parameter, which makes algorithm consider all edges + * between csg-cmp. It makes dphyp slower, but without it we can miss a condition in case of cycles */ template class TDPHypSolver { @@ -58,7 +61,7 @@ class TDPHypSolver { void EnumerateCmpRec(const TNodeSet& s1, const TNodeSet& s2, const TNodeSet& x); - void EmitCsgCmp(const TNodeSet& s1, const TNodeSet& s2, const typename TJoinHypergraph::TEdge* csgCmpEdge); + void EmitCsgCmp(const TNodeSet& s1, const TNodeSet& s2, const typename TJoinHypergraph::TEdge* csgCmpEdge, const typename TJoinHypergraph::TEdge* reversedCsgCmpEdge); private: // Create an exclusion set that contains all the nodes of the graph that are smaller or equal to @@ -325,7 +328,7 @@ template void TDPHypSolver::EmitCsg(const TNodeSet s2[i] = 1; if (auto* edge = Graph_.FindEdgeBetween(s1, s2)) { - EmitCsgCmp(s1, s2, edge); + EmitCsgCmp(s1, s2, edge, &Graph_.GetEdge(edge->ReversedEdgeId)); } EnumerateCmpRec(s1, s2, x | MakeB(neighs, GetLowestSetBit(s2))); @@ -354,7 +357,7 @@ template void TDPHypSolver::EnumerateCmpRec(const if (DpTable_.contains(s2 | next)) { if (auto* edge = Graph_.FindEdgeBetween(s1, s2 | next)) { - EmitCsgCmp(s1, s2 | next, edge); + EmitCsgCmp(s1, s2 | next, edge, &Graph_.GetEdge(edge->ReversedEdgeId)); } } @@ -461,19 +464,23 @@ template std::shared_ptr TDPHypS /* * Emit a single CSG + CMP pair */ -template void TDPHypSolver::EmitCsgCmp(const TNodeSet& s1, const TNodeSet& s2, const typename TJoinHypergraph::TEdge* csgCmpEdge) { +template void TDPHypSolver::EmitCsgCmp( + const TNodeSet& s1, + const TNodeSet& s2, + const typename TJoinHypergraph::TEdge* csgCmpEdge, + const typename TJoinHypergraph::TEdge* reversedCsgCmpEdge +) { // Here we actually build the join and choose and compare the // new plan to what's in the dpTable, if it there Y_ENSURE(DpTable_.contains(s1), "DP Table does not contain S1"); Y_ENSURE(DpTable_.contains(s2), "DP Table does not conaint S2"); - const auto* reversedEdge = &Graph_.GetEdge(csgCmpEdge->ReversedEdgeId); auto leftNodes = DpTable_[s1]; auto rightNodes = DpTable_[s2]; if (csgCmpEdge->IsReversed) { - std::swap(csgCmpEdge, reversedEdge); + std::swap(csgCmpEdge, reversedCsgCmpEdge); std::swap(leftNodes, rightNodes); } @@ -503,7 +510,7 @@ template void TDPHypSolver::EmitCsgCmp(const TNodeS #ifndef NDEBUG auto pair = std::make_pair(s1, s2); Y_ENSURE (!CheckTable_.contains(pair), "Check table already contains pair S1|S2"); - CheckTable_[ std::pair(s1, s2) ] = true; + CheckTable_[pair] = true; #endif } diff --git a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp index 6a80394d6625..2ba71afe5d40 100644 --- a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp @@ -36,36 +36,67 @@ std::shared_ptr CreateChain(size_t size, TString onAttribute } template -std::shared_ptr Enumerate(const std::shared_ptr& root) { +std::shared_ptr Enumerate(const std::shared_ptr& root, const TOptimizerHints& hints = {}) { auto ctx = TProviderContext(); auto optimizer = std::unique_ptr(MakeNativeOptimizerNew(ctx, std::numeric_limits::max())); Y_ENSURE(root->Kind == EOptimizerNodeKind::JoinNodeType); - auto res = optimizer->JoinSearch(std::static_pointer_cast(root)); + auto res = optimizer->JoinSearch(std::static_pointer_cast(root), hints); Cout << "Optimized Tree:" << Endl; std::stringstream ss; res->Print(ss); Cout << ss.str() << Endl; return res; } +TVector CollectConditions(const std::shared_ptr& node) { + if (node->Kind != EOptimizerNodeKind::JoinNodeType) { + return {}; + } + + auto joinNode = std::static_pointer_cast(node); + auto lhsConds = CollectConditions(joinNode->LeftArg); + auto rhsConds = CollectConditions(joinNode->RightArg); + lhsConds.insert(lhsConds.end(), rhsConds.begin(), rhsConds.end()); + for (const auto& [lhsCond, rhsCond]: Zip(joinNode->LeftJoinKeys, joinNode->RightJoinKeys)) { + lhsConds.push_back(lhsCond); + lhsConds.push_back(rhsCond); + } + + return lhsConds; +} + +bool HaveSameConditions(const std::shared_ptr& actual, std::shared_ptr expected) { + auto actualConds = CollectConditions(actual); + auto expectedConds = CollectConditions(expected); + + return + std::unordered_set(actualConds.begin(), actualConds.end()) == + std::unordered_set(expectedConds.begin(), expectedConds.end()); +} + +bool HaveSameConditionCount(const std::shared_ptr& actual, std::shared_ptr expected) { + auto actualConds = CollectConditions(actual); + auto expectedConds = CollectConditions(expected); + + return actualConds.size() == expectedConds.size() && + std::unordered_set(actualConds.begin(), actualConds.end()).size() == + std::unordered_set(expectedConds.begin(), expectedConds.end()).size(); +} + Y_UNIT_TEST_SUITE(HypergraphBuild) { - using TNodeSet = std::bitset<64>; + using TNodeSet64 = std::bitset<64>; + using TNodeSet128 = std::bitset<256>; - void CheckClique(const TJoinHypergraph& graph) { + void CheckClique(const TJoinHypergraph& graph) { size_t nodeCount = graph.GetNodes().size(); for (size_t i = 0; i < nodeCount; ++i) { for (size_t j = 0; j < nodeCount; ++j) { - if (i == j) { - continue; - } - - TNodeSet lhs; - lhs[i] = 1; - TNodeSet rhs; - rhs[j] = 1; + if (i == j) continue; + TNodeSet64 lhs; lhs[i] = 1; + TNodeSet64 rhs; rhs[j] = 1; UNIT_ASSERT(graph.FindEdgeBetween(lhs, rhs)); } } @@ -73,7 +104,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { Y_UNIT_TEST(SimpleChain3NodesTransitiveClosure) { auto root = CreateChain(3, "Konstantin Vedernikov sidit na zp"); - auto graph = MakeJoinHypergraph(root); + auto graph = MakeJoinHypergraph(root); UNIT_ASSERT(graph.GetEdges().size() == 6); @@ -83,7 +114,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { Y_UNIT_TEST(SimpleChain4NodesTransitiveClosure) { auto root = CreateChain(4, "Ya hochu pitsu"); - auto graph = MakeJoinHypergraph(root); + auto graph = MakeJoinHypergraph(root); UNIT_ASSERT(graph.GetEdges().size() == 12); @@ -93,7 +124,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { Y_UNIT_TEST(SimpleChain5NodesTransitiveClosure) { auto root = CreateChain(5, "Dota2"); - auto graph = MakeJoinHypergraph(root); + auto graph = MakeJoinHypergraph(root); UNIT_ASSERT(graph.GetEdges().size() == 20); @@ -127,7 +158,8 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { root, rhs, leftKeys, rightKeys, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false ); - auto graph = MakeJoinHypergraph(root); + auto graph = MakeJoinHypergraph(root); + Cout << graph.String() << Endl; auto a1 = graph.GetNodesByRelNames({"a1"}); auto a2 = graph.GetNodesByRelNames({"a2"}); @@ -151,45 +183,72 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { UNIT_ASSERT(!graph.FindEdgeBetween(c2, a1)); UNIT_ASSERT(!graph.FindEdgeBetween(c2, a2)); UNIT_ASSERT(!graph.FindEdgeBetween(c2, a3)); + + Enumerate(root); } template std::shared_ptr GetJoinArg(const TJoinArg& joinArg) { if constexpr (std::is_same_v>) { return joinArg; - } else if (std::is_convertible_v) { + } else if constexpr (std::is_convertible_v) { std::shared_ptr root = std::make_shared(joinArg, std::make_shared()); + root->Stats->Nrows = rand() % 50'000 + 1; return root; } else { - static_assert(std::is_convertible_v || - std::is_same_v>, - "Args of join must be either Join or TString, for example: Join(Join('A', 'B'), 'C')"); + static_assert( + std::is_convertible_v || std::is_same_v>, + "Args of join must be either Join or TString, for example: Join(Join('A', 'B'), 'C')" + ); } Y_UNREACHABLE(); } + /* Example of usage: Join("A", "B", "A.id=B.id,A.kek=B.kek,A=B") (only equijoin supported)*/ template - std::shared_ptr Join(const TLhsArg& lhsArg, const TRhsArg& rhsArg, TString on="", TString onAttr="") { + std::shared_ptr Join(const TLhsArg& lhsArg, const TRhsArg& rhsArg, TString on="", EJoinKind kind = EJoinKind::InnerJoin) { if constexpr (std::is_convertible_v && std::is_convertible_v) { - on = Sprintf("%s,%s", lhsArg, rhsArg); + if (on.Empty()) { + on = Sprintf("%s=%s", lhsArg, rhsArg); + } } if (on.empty()) { throw std::invalid_argument("Bad argument."); } - - std::string lhsCond, rhsCond; - Split(on, ",", lhsCond, rhsCond); - auto col = onAttr.empty()? ToString(rand()): onAttr; + + TVector conds; + Split(on, ",", conds); + TVector leftJoinCond; + TVector rightJoinCond; + for (const TString& cond: conds) { + std::string lhsCond, rhsCond; // "A.id B.id" + Split(cond, "=", lhsCond, rhsCond); + + if (lhsCond.contains(".") && rhsCond.contains(".")) { + std::string lhsTable, lhsAttr; + Split(lhsCond, ".", lhsTable, lhsAttr); + std::string rhsTable, rhsAttr; + Split(rhsCond, ".", rhsTable, rhsAttr); + leftJoinCond.push_back(TJoinColumn(std::move(lhsTable), std::move(lhsAttr))); + rightJoinCond.push_back(TJoinColumn(std::move(rhsTable), std::move(rhsAttr))); + } else { + TString attr = ToString(rand()); + leftJoinCond.push_back(TJoinColumn(std::move(lhsCond), attr)); + rightJoinCond.push_back(TJoinColumn(std::move(rhsCond), attr)); + + } + } + std::shared_ptr root = std::make_shared( TJoinOptimizerNode( GetJoinArg(lhsArg), GetJoinArg(rhsArg), - {TJoinColumn(lhsCond.c_str(), col)}, - {TJoinColumn(rhsCond.c_str(), col)}, - EJoinKind::InnerJoin, + leftJoinCond, + rightJoinCond, + kind, EJoinAlgoType::Undefined, false, false @@ -198,11 +257,26 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { return root; } + template + std::shared_ptr FullJoin(const TLhsArg& lhsArg, const TRhsArg& rhsArg, TString on="") { + return Join(lhsArg, rhsArg, on, EJoinKind::OuterJoin); + } + + template + std::shared_ptr LeftJoin(const TLhsArg& lhsArg, const TRhsArg& rhsArg, TString on="") { + return Join(lhsArg, rhsArg, on, EJoinKind::LeftJoin); + } + + template + std::shared_ptr CrossJoin(const TLhsArg& lhsArg, const TRhsArg& rhsArg, TString on="") { + return Join(lhsArg, rhsArg, on, EJoinKind::Cross); + } + Y_UNIT_TEST(AnyJoinWithTransitiveClosure) { - auto root = Join("A", Join("B", Join("C", "D", "C,D", "id"), "B,C", "id"), "A,B", "id"); + auto root = Join("A", Join("B", Join("C", "D", "C.id=D.id"), "B.id=C.id"), "A.id=B.id"); std::static_pointer_cast(root)->LeftAny = true; - auto graph = MakeJoinHypergraph(root); + auto graph = MakeJoinHypergraph(root); Cout << graph.String() << Endl; auto A = graph.GetNodesByRelNames({"A"}); @@ -213,14 +287,16 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { UNIT_ASSERT(graph.FindEdgeBetween(B, D)); UNIT_ASSERT(!graph.FindEdgeBetween(A, D)); UNIT_ASSERT(!graph.FindEdgeBetween(A, C)); + + Enumerate(root); } Y_UNIT_TEST(AnyJoinConstraints1) { - auto anyJoin = Join(Join("A", "B"), "C", /*on=*/ "B,C"); + auto anyJoin = Join(Join("A", "B"), "C", /*on=*/ "B=C"); std::static_pointer_cast(anyJoin)->LeftAny = true; - auto join = Join(anyJoin, "D", /*on=*/"A,D"); + auto join = Join(anyJoin, "D", /*on=*/"A=D"); - auto graph = MakeJoinHypergraph(join); + auto graph = MakeJoinHypergraph(join); Cout << graph.String() << Endl; UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size()); @@ -229,11 +305,11 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { Y_UNIT_TEST(AnyJoinConstraints2) { - auto anyJoin = Join(Join(Join("A", "B"), "C", /*on=*/ "B,C"), "D", "C,D"); + auto anyJoin = Join(Join(Join("A", "B"), "C", /*on=*/ "B=C"), "D", "C=D"); std::static_pointer_cast(anyJoin)->LeftAny = true; - auto join = Join(anyJoin, "E", /*on=*/ "A,E"); + auto join = Join(anyJoin, "E", /*on=*/ "A=E"); - auto graph = MakeJoinHypergraph(join); + auto graph = MakeJoinHypergraph(join); Cout << graph.String() << Endl; UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size()); @@ -241,11 +317,11 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { } Y_UNIT_TEST(AnyJoinConstraints3) { - auto anyJoin = Join(Join("A", "B"), Join("C", "D"), /*on=*/"B,C"); + auto anyJoin = Join(Join("A", "B"), Join("C", "D"), /*on=*/"B=C"); std::static_pointer_cast(anyJoin)->RightAny = true; - auto join = Join(anyJoin, "E", /*on=*/ "C,E"); + auto join = Join(anyJoin, "E", /*on=*/ "C=E"); - auto graph = MakeJoinHypergraph(join); + auto graph = MakeJoinHypergraph(join); Cout << graph.String() << Endl; UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size()); @@ -253,14 +329,177 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { } Y_UNIT_TEST(IsReorderableConstraint) { - auto nonReorderable = Join(Join(Join("A", "B"), "C", /*on=*/ "B,C"), "D", "C,D"); + auto nonReorderable = Join(Join(Join("A", "B"), "C", /*on=*/ "B=C"), "D", "C=D"); std::static_pointer_cast(nonReorderable)->IsReorderable = false; - auto join = Join(nonReorderable, "E", /*on=*/ "A,E"); + auto join = Join(nonReorderable, "E", /*on=*/ "A=E"); - auto graph = MakeJoinHypergraph(join); + auto graph = MakeJoinHypergraph(join); Cout << graph.String() << Endl; UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size()); Enumerate(join); } + + Y_UNIT_TEST(JoinKindConflictSimple) { + auto join = Join(FullJoin("A", "B"), "C", "B=C"); + + auto graph = MakeJoinHypergraph(join); + Cout << graph.String() << Endl; + + UNIT_ASSERT(graph.GetEdges().size() == 4); + + auto A = graph.GetNodesByRelNames({"A"}); + auto B = graph.GetNodesByRelNames({"B"}); + auto C = graph.GetNodesByRelNames({"C"}); + UNIT_ASSERT(graph.FindEdgeBetween(A, B)); + UNIT_ASSERT(graph.FindEdgeBetween(B, A)); + UNIT_ASSERT(!graph.FindEdgeBetween(A | B, C)->IsSimple()); + UNIT_ASSERT(!graph.FindEdgeBetween(C, A | B)->IsSimple()); + + Enumerate(join); + } + + Y_UNIT_TEST(SimpleCycle) { + auto join = Join("A", Join("B", "C"), "A=B,A=C"); + + auto graph = MakeJoinHypergraph(join); + Cout << graph.String() << Endl; + for (const auto& e: graph.GetEdges()) { + UNIT_ASSERT(e.IsSimple()); + } + + auto optimizedJoin = Enumerate(join); + UNIT_ASSERT(HaveSameConditions(optimizedJoin, join)); + } + + /* We shouldn't have complex edges in inner equijoins */ + Y_UNIT_TEST(TransitiveClosurePlusCycle) { + auto join = Join("A", Join("B", Join("C", "D", "C.c0=D.d"), "B.b=C.c,B.b0=D.d1"), "A.a=B.b"); + + auto graph = MakeJoinHypergraph(join); + Cout << graph.String() << Endl; + for (const auto& e: graph.GetEdges()) { + UNIT_ASSERT(e.IsSimple()); + } + + auto A = graph.GetNodesByRelNames({"A"}); + auto C = graph.GetNodesByRelNames({"C"}); + UNIT_ASSERT(graph.FindEdgeBetween(A, C)); + + auto optimizedJoin = Enumerate(join); + UNIT_ASSERT(HaveSameConditionCount(optimizedJoin, join)); + } + + Y_UNIT_TEST(CondsThatMayCauseATransitiveClosureButTheyMustNot) { + auto join = Join("A", "B", "A.DOTA=B.LOL,A.LOL=B.LOL"); + + auto graph = MakeJoinHypergraph(join); + Cout << graph.String() << Endl; + for (const auto& e: graph.GetEdges()) { + UNIT_ASSERT(e.IsSimple()); + } + + auto optimizedJoin = Enumerate(join); + UNIT_ASSERT(HaveSameConditions(optimizedJoin, join)); + } + + Y_UNIT_TEST(TransitiveClosureManyCondsBetweenJoin) { + auto join = FullJoin(Join(Join("A", "B", "A.ID=B.ID,A.LOL=B.LOL"), "C", "A.ID=C.ID,A.KEK=C.KEK"), "D", "A.ID=D.ID"); + + auto graph = MakeJoinHypergraph(join); + Cout << graph.String() << Endl; + + auto B = graph.GetNodesByRelNames({"B"}); + auto C = graph.GetNodesByRelNames({"C"}); + UNIT_ASSERT(graph.FindEdgeBetween(B, C)); + + { + auto optimizedJoin = Enumerate(join, TOptimizerHints::Parse("Rows(B C # 0)")); + UNIT_ASSERT(HaveSameConditionCount(optimizedJoin, join)); + } + { + auto optimizedJoin = Enumerate(join, TOptimizerHints::Parse("JoinOrder((A B) C)")); + UNIT_ASSERT(HaveSameConditions(optimizedJoin, join)); + } + } + + auto MakeClique(size_t size) { + std::shared_ptr root = Join("R0", "R1", "R0.id=R1.id"); + + for (size_t i = 2; i < size; ++i) { + TString attr = ToString(rand()); + TString on = Sprintf("R%ld.id=R%ld.id", i - 1, i); + root = Join(root, Sprintf("R%ld", i), on); + } + + auto graph = MakeJoinHypergraph(root); + Cout << graph.String() << Endl; + CheckClique(graph); + + return root; + } + + auto MakeChain(size_t size) { + std::shared_ptr root = Join("R0", "R1"); + + for (size_t i = 2; i < size; ++i) { + TString attr = ToString(rand()); + TString on = Sprintf("R%ld.%s=R%ld.%s", i - 1, attr.c_str(), i, attr.c_str()); + root = Join(root, Sprintf("R%ld", i), on); + } + + auto graph = MakeJoinHypergraph>(root); + Cout << graph.String() << Endl; + return root; + } + + auto MakeStar(size_t size) { + std::shared_ptr root = Join("R0", "R1"); + + for (size_t i = 2; i < size; ++i) { + TString attr = ToString(rand()); + TString on = Sprintf("R0.%s=R%ld.%s", attr.c_str(), i, attr.c_str()); + root = Join(root, Sprintf("R%ld", i), on); + } + + auto graph = MakeJoinHypergraph(root); + Cout << graph.String() << Endl; + return root; + } + + Y_UNIT_TEST(JoinTopologiesBenchmark) { + #ifndef NDEBUG + enum { CliqueSize = 11, ChainSize = 71, StarSize = 15 }; + #else + enum { CliqueSize = 15, ChainSize = 165, StarSize = 20 }; + #endif + + { + size_t cliqueSize = CliqueSize; + auto startClique = std::chrono::high_resolution_clock::now(); + Enumerate(MakeClique(cliqueSize)); + auto endClique = std::chrono::high_resolution_clock::now(); + std::chrono::duration durationClique = endClique - startClique; + std::cerr << Sprintf("Time for Enumerate(MakeClique(%ld)): %f seconds", cliqueSize, durationClique.count()) << std::endl; + } + + { + size_t starSize = StarSize; + auto startStar = std::chrono::high_resolution_clock::now(); + Enumerate(MakeStar(starSize)); + auto endStar = std::chrono::high_resolution_clock::now(); + std::chrono::duration durationStar = endStar - startStar; + std::cerr << Sprintf("Time for Enumerate(MakeStar(%ld)): %f seconds", starSize, durationStar.count()) << std::endl; + } + + { + size_t chainSize = ChainSize; + auto startChain = std::chrono::high_resolution_clock::now(); + Enumerate(MakeChain(chainSize)); + auto endChain = std::chrono::high_resolution_clock::now(); + std::chrono::duration durationChain = endChain - startChain; + std::cerr << Sprintf("Time for Enumerate(MakeChain(%ld)): %f seconds", chainSize, durationChain.count()) << std::endl; + } + } + } diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index 0e3581be8bc6..2d26b2b0fc50 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -250,10 +250,10 @@ class TOptimizerNativeNew: public IOptimizerNew { if (relsCount <= 64) { // The algorithm is more efficient. return JoinSearchImpl(joinTree, hints); - } - - if (64 < relsCount && relsCount <= 128) { + } else if (64 < relsCount && relsCount <= 128) { return JoinSearchImpl(joinTree, hints); + } else if (128 < relsCount && relsCount <= 192) { + return JoinSearchImpl(joinTree, hints); } ComputeStatistics(joinTree, this->Pctx); @@ -263,6 +263,7 @@ class TOptimizerNativeNew: public IOptimizerNew { private: using TNodeSet64 = std::bitset<64>; using TNodeSet128 = std::bitset<128>; + using TNodeSet192 = std::bitset<192>; template std::shared_ptr JoinSearchImpl( @@ -273,14 +274,36 @@ class TOptimizerNativeNew: public IOptimizerNew { TDPHypSolver solver(hypergraph, this->Pctx); if (solver.CountCC(MaxDPhypTableSize_) >= MaxDPhypTableSize_) { - YQL_CLOG(TRACE, CoreDq) << "Maximum DPhyp threshold exceeded\n"; + YQL_CLOG(TRACE, CoreDq) << "Maximum DPhyp threshold exceeded"; ComputeStatistics(joinTree, this->Pctx); return joinTree; } auto bestJoinOrder = solver.Solve(hints); - return ConvertFromInternal(bestJoinOrder); + auto resTree = ConvertFromInternal(bestJoinOrder); + AddMissingConditions(hypergraph, resTree); + return resTree; + } + + /* Due to cycles we can miss some conditions in edges, because DPHyp enumerates trees */ + template + void AddMissingConditions( + TJoinHypergraph& hypergraph, + const std::shared_ptr& node + ) { + if (node->Kind != EOptimizerNodeKind::JoinNodeType) { + return; + } + + auto joinNode = std::static_pointer_cast(node); + AddMissingConditions(hypergraph, joinNode->LeftArg); + AddMissingConditions(hypergraph, joinNode->RightArg); + TNodeSet lhs = hypergraph.GetNodesByRelNames(joinNode->LeftArg->Labels()); + TNodeSet rhs = hypergraph.GetNodesByRelNames(joinNode->RightArg->Labels()); + + hypergraph.FindAllConditionsBetween(lhs, rhs, joinNode->LeftJoinKeys, joinNode->RightJoinKeys); } + private: ui32 MaxDPhypTableSize_; }; diff --git a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h index 6a73c7149a65..b91d7f6af614 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h +++ b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h @@ -1,13 +1,16 @@ #pragma once - #include +#include +#include + #include #include #include "bitset.h" #include #include +#include #include @@ -31,8 +34,8 @@ class TJoinHypergraph { bool leftAny, bool rightAny, bool isCommutative, - TVector& leftJoinKeys, - TVector& rightJoinKeys + const TVector& leftJoinKeys, + const TVector& rightJoinKeys ) : Left(left) , Right(right) @@ -44,20 +47,22 @@ class TJoinHypergraph { , RightJoinKeys(rightJoinKeys) , IsReversed(false) { + Y_ASSERT(LeftJoinKeys.size() == RightJoinKeys.size()); RemoveAttributeAliases(); } - bool AreCondVectorEqual() const { - TVector leftAttrNames; - TVector rightAttrNames; - for (auto & l : LeftJoinKeys) { - leftAttrNames.push_back(l.AttributeName); - } - for (auto & r : RightJoinKeys) { - rightAttrNames.push_back(r.AttributeName); + void RemoveAttributeAliases() { + for (auto& leftKey : LeftJoinKeys) { + if (auto idx = leftKey.AttributeName.find_last_of('.'); idx != TString::npos) { + leftKey.AttributeName = leftKey.AttributeName.substr(idx + 1); + } } - return leftAttrNames == rightAttrNames; + for (auto& rightKey : RightJoinKeys) { + if (auto idx = rightKey.AttributeName.find_last_of('.'); idx != TString::npos) { + rightKey.AttributeName = rightKey.AttributeName.substr(idx + 1); + } + } } inline bool IsSimple() const { @@ -76,19 +81,10 @@ class TJoinHypergraph { bool IsReversed; int64_t ReversedEdgeId = -1; - void RemoveAttributeAliases() { - - for (auto& leftKey : LeftJoinKeys ) { - if (auto idx = leftKey.AttributeName.find_last_of('.'); idx != TString::npos) { - leftKey.AttributeName = leftKey.AttributeName.substr(idx+1); - } - } - - for (auto& rightKey : RightJoinKeys ) { - if (auto idx = rightKey.AttributeName.find_last_of('.'); idx != TString::npos) { - rightKey.AttributeName = rightKey.AttributeName.substr(idx+1); - } - } + TEdge CreateReversed(int64_t reversedEdgeId) const { + auto reversedEdge = TEdge(Right, Left, JoinKind, RightAny, LeftAny, IsCommutative, RightJoinKeys, LeftJoinKeys); + reversedEdge.IsReversed = true; reversedEdge.ReversedEdgeId = reversedEdgeId; + return reversedEdge; } }; @@ -113,7 +109,7 @@ class TJoinHypergraph { res.append(Sprintf("%ld: %s\n", idx, relNameByNodeId[idx].c_str())); } - res.append("Edges: ").append("\n"); + res.append(Sprintf("Edges(%ld): ", Edges_.size())).append("\n"); auto edgeSideToString = [&relNameByNodeId](const TNodeSet& edgeSide) { @@ -137,30 +133,21 @@ class TJoinHypergraph { }; for (const auto& edge: Edges_) { - TString leftKeyStr; - TString rightKeyStr; - - for (auto& l: edge.LeftJoinKeys) { - leftKeyStr.append(l.RelName); - leftKeyStr.append("."); - leftKeyStr.append(l.AttributeName); - leftKeyStr.append(","); + TVector conds; + for (const auto& [lhsCond, rhsCond]: Zip(edge.LeftJoinKeys, edge.RightJoinKeys)) { + TString cond = Sprintf( + "%s.%s = %s.%s", + lhsCond.RelName.c_str(), lhsCond.AttributeName.c_str(), rhsCond.RelName.c_str(), rhsCond.AttributeName.c_str() + ); + + conds.push_back(std::move(cond)); } - for (auto& r: edge.RightJoinKeys) { - rightKeyStr.append(r.RelName); - rightKeyStr.append("."); - rightKeyStr.append(r.AttributeName); - rightKeyStr.append(","); - } res .append(edgeSideToString(edge.Left)) .append(" -> ") .append(edgeSideToString(edge.Right)) - .append(" on ") - .append(leftKeyStr) - .append("==") - .append(rightKeyStr) + .append("\t").append(JoinSeq(", ", conds)) .append("\n"); } @@ -188,13 +175,7 @@ class TJoinHypergraph { AddEdgeImpl(edge); - TEdge reversedEdge = std::move(edge); - std::swap(reversedEdge.Left, reversedEdge.Right); - std::swap(reversedEdge.LeftJoinKeys, reversedEdge.RightJoinKeys); - reversedEdge.IsReversed = true; - reversedEdge.ReversedEdgeId = edgeId; - reversedEdge.RemoveAttributeAliases(); - + TEdge reversedEdge = edge.CreateReversed(edgeId); AddEdgeImpl(reversedEdge); } @@ -241,10 +222,11 @@ class TJoinHypergraph { return Nodes_; } - inline const TVector& GetEdges() const { + inline TVector& GetEdges() { return Edges_; } + /* Find any edge between lhs and rhs. (It can skip conditions and generate invalid plan in case of cycles) */ const TEdge* FindEdgeBetween(const TNodeSet& lhs, const TNodeSet& rhs) const { for (const auto& edge: Edges_) { if ( @@ -260,6 +242,41 @@ class TJoinHypergraph { return nullptr; } + /* + * This functions returns all conditions without redundancy between lhs and rhs + * Many conditions can cause in a graph with cycles, but transitive closure conditions in one eq. class + * will be redudant, so we consider only one of condition from eq. class. + */ + void FindAllConditionsBetween( + const TNodeSet& lhs, + const TNodeSet& rhs, + TVector& resLeftJoinKeys, + TVector& resRightJoinKeys + ) { + for (const auto& edge: Edges_) { + if ( + IsSubset(edge.Left, lhs) && + !Overlaps(edge.Left, rhs) && + IsSubset(edge.Right, rhs) && + !Overlaps(edge.Right, lhs) + ) { + for (const auto& [lhsEdgeCond, rhsEdgeCond]: Zip(edge.LeftJoinKeys, edge.RightJoinKeys)) { + bool hasSameEquivClass = false; + for (const auto& lhsResJoinKey: resLeftJoinKeys) { + if (lhsEdgeCond.EquivalenceClass.has_value() && lhsEdgeCond.EquivalenceClass == lhsResJoinKey.EquivalenceClass || lhsEdgeCond == lhsResJoinKey) { + hasSameEquivClass = true; break; + } + } + + if (!hasSameEquivClass) { + resLeftJoinKeys.push_back(lhsEdgeCond); + resRightJoinKeys.push_back(rhsEdgeCond); + } + } + } + } + } + void UpdateEdgeSides(size_t idx, TNodeSet newLeft, TNodeSet newRight) { auto& edge = Edges_[idx]; @@ -386,15 +403,12 @@ class TJoinOrderHintsApplier { /* * This class construct transitive closure between nodes in hypergraph. * Transitive closure means that if we have an edge from (1,2) with join - * condition R.A = S.A and we have an edge from (2,3) with join condition - * S.A = T.A, we will find out that the join conditions form an equivalence set - * and add an edge (1,3) with join condition R.A = T.A. + * condition R.Z = S.A and we have an edge from (2,3) with join condition + * S.A = T.V, we will find out that the join conditions form an equivalence set + * and add an edge (1,3) with join condition R.Z = T.V. * Algorithm works as follows: - * 1) We leave only edges that do not conflict with themselves and - * in join condition equality attributes on left and right side must be equal by name. - * (e.g. a.id = b.id && a.kek = b.kek) - * 2) We group edges by attribute names in equality and joinKind - * 3) In each group we build connected components and in each components we add missing edges. + * 1) We leave only inner-join simple edges + * 2) We build connected components (by join conditions) and in each components we add missing edges. */ template class TTransitiveClosureConstructor { @@ -411,128 +425,72 @@ class TTransitiveClosureConstructor { EraseIf( edges, - [this](const THyperedge& edge) { - return - edge.IsReversed || - !(IsJoinTransitiveClosureSupported(edge.JoinKind) && edge.AreCondVectorEqual()) || - edge.LeftAny || edge.RightAny; + [](const THyperedge& edge) { + return edge.IsReversed || !edge.IsSimple() || edge.JoinKind != InnerJoin || edge.LeftAny || edge.RightAny; } ); - std::sort( - edges.begin(), - edges.end(), - [](const THyperedge& lhs, const THyperedge& rhs) { - TVector lhsAttributeNames; - TVector rhsAttributeNames; - - for (auto & l : lhs.LeftJoinKeys ) { - lhsAttributeNames.push_back(l.AttributeName); - } - for (auto & r : rhs.LeftJoinKeys ) { - rhsAttributeNames.push_back(r.AttributeName); - } - - std::sort(lhsAttributeNames.begin(), lhsAttributeNames.end()); - std::sort(rhsAttributeNames.begin(), rhsAttributeNames.end()); - - return - std::tie(lhsAttributeNames, lhs.JoinKind) < - std::tie(rhsAttributeNames, rhs.JoinKind); - } - ); - - size_t groupBegin = 0; - for (size_t groupEnd = 0; groupEnd < edges.size();) { - while (groupEnd < edges.size() && HasOneGroup(edges[groupBegin], edges[groupEnd])) { - ++groupEnd; - } + ConstructImpl(edges); + } - if (groupEnd - groupBegin >= 2) { - ComputeTransitiveClosureInGroup(edges, groupBegin, groupEnd); +private: + void ConstructImpl(const TVector& edges) { + std::vector joinCondById; + for (const auto& edge: edges) { + for (const auto& [lhs, rhs]: Zip(edge.LeftJoinKeys, edge.RightJoinKeys)) { + joinCondById.push_back(lhs); + joinCondById.push_back(rhs); } + } + std::sort(joinCondById.begin(), joinCondById.end()); + joinCondById.erase(std::unique(joinCondById.begin(), joinCondById.end()), joinCondById.end()); - groupBegin = groupEnd; + THashMap idByJoinCond; + for (size_t i = 0; i < joinCondById.size(); ++i) { + idByJoinCond[joinCondById[i]] = i; } - } -private: - void ComputeTransitiveClosureInGroup(const TVector& edges, size_t groupBegin, size_t groupEnd) { - size_t nodeSetSize = TNodeSet{}.size(); - const auto& nodes = Graph_.GetNodes(); + TDisjointSets connectedComponents(joinCondById.size()); + for (const auto& edge: edges) { + for (const auto& [lhs, rhs]: Zip(edge.LeftJoinKeys, edge.RightJoinKeys)) { + connectedComponents.UnionSets(idByJoinCond[lhs], idByJoinCond[rhs]); + } + } - EJoinKind groupJoinKind = edges[groupBegin].JoinKind; - bool isJoinCommutative = edges[groupBegin].IsCommutative; + for (auto& edge: Graph_.GetEdges()) { + for (auto& lhs : edge.LeftJoinKeys) { + if (idByJoinCond.contains(lhs)) { + lhs.EquivalenceClass = connectedComponents.CanonicSetElement(idByJoinCond[lhs]); + } + } - TVector groupConditionUsedAttributes; - for (const auto& lhs: edges[groupBegin].LeftJoinKeys) { - groupConditionUsedAttributes.push_back(lhs.AttributeName); - } - for (const auto& rhs: edges[groupBegin].RightJoinKeys) { - groupConditionUsedAttributes.push_back(rhs.AttributeName); + for (auto& rhs : edge.RightJoinKeys) { + if (idByJoinCond.contains(rhs)) { + rhs.EquivalenceClass = connectedComponents.CanonicSetElement(idByJoinCond[rhs]); + } + } } - TDisjointSets connectedComponents(nodeSetSize); - for (size_t edgeId = groupBegin; edgeId < groupEnd; ++edgeId) { - const auto& edge = edges[edgeId]; - connectedComponents.UnionSets(GetLowestSetBit(edge.Left), GetLowestSetBit(edge.Right)); + for (size_t i = 0; i < joinCondById.size(); ++i) { + joinCondById[i].EquivalenceClass = connectedComponents.CanonicSetElement(i); } - for (size_t i = 0; i < nodeSetSize; ++i) { + for (size_t i = 0; i < joinCondById.size(); ++i) { for (size_t j = 0; j < i; ++j) { - auto iGroup = connectedComponents.CanonicSetElement(i); - auto jGroup = connectedComponents.CanonicSetElement(j); - if (iGroup == jGroup) { - TNodeSet lhs; lhs[i] = 1; - TNodeSet rhs; rhs[j] = 1; - - const auto* edge = Graph_.FindEdgeBetween(lhs, rhs); - if (edge != nullptr) { - continue; - } + if (joinCondById[i].EquivalenceClass == joinCondById[j].EquivalenceClass && joinCondById[i].RelName != joinCondById[j].RelName) { + auto iNode = Graph_.GetNodesByRelNames({joinCondById[i].RelName}); + auto jNode = Graph_.GetNodesByRelNames({joinCondById[j].RelName}); - TString lhsRelName = nodes[i].RelationOptimizerNode->Labels()[0]; - TString rhsRelName = nodes[j].RelationOptimizerNode->Labels()[0]; - TVector leftKeys; - TVector rightKeys; - - for (const auto& attributeName: groupConditionUsedAttributes){ - leftKeys.push_back(TJoinColumn(lhsRelName, attributeName)); - rightKeys.push_back(TJoinColumn(rhsRelName, attributeName)); + if (Graph_.FindEdgeBetween(iNode, jNode)) { + continue; } - auto e = THyperedge(lhs, rhs, groupJoinKind, false, false, isJoinCommutative, leftKeys, rightKeys); - Graph_.AddEdge(std::move(e)); + Graph_.AddEdge(THyperedge(iNode, jNode, InnerJoin, false, false, true, {joinCondById[i]}, {joinCondById[j]})); } } } } - bool HasOneGroup(const THyperedge& lhs, const THyperedge& rhs) { - TVector lhsAttributeNames; - TVector rhsAttributeNames; - - for (auto & l : lhs.LeftJoinKeys) { - lhsAttributeNames.push_back(l.AttributeName); - } - - for (auto & r : rhs.LeftJoinKeys) { - rhsAttributeNames.push_back(r.AttributeName); - } - - std::sort(lhsAttributeNames.begin(), lhsAttributeNames.end()); - std::sort(rhsAttributeNames.begin(), rhsAttributeNames.end()); - - return lhsAttributeNames == rhsAttributeNames && lhs.JoinKind == rhs.JoinKind; - } - - bool IsJoinTransitiveClosureSupported(EJoinKind joinKind) { - return - OperatorsAreAssociative(joinKind, joinKind) && - OperatorsAreLeftAsscom(joinKind, joinKind) && - OperatorsAreRightAsscom(joinKind, joinKind); - } - private: TJoinHypergraph& Graph_; }; diff --git a/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h index 9d3443621e40..44e4ef468062 100644 --- a/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h +++ b/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h @@ -19,25 +19,31 @@ namespace NYql::NDq { -inline TVector GetConditionUsedRelationNames(const std::shared_ptr& joinNode) { +inline TVector GetConditionUsedRelationNames(const TVector& lhs, const TVector& rhs) { TVector res; - res.reserve(joinNode->LeftJoinKeys.size()); + res.reserve(lhs.size()); - for (const auto& lhs : joinNode->LeftJoinKeys ) { - res.push_back(lhs.RelName); - } - for (const auto& rhs : joinNode->RightJoinKeys ) { - res.push_back(rhs.RelName); + for (const auto& [lhsTable, rhsTable]: Zip(lhs, rhs)) { + res.push_back(lhsTable.RelName); + res.push_back(rhsTable.RelName); } return res; } +inline bool AllJoinsAreInner(const std::shared_ptr& joinTree) { + if (joinTree->Kind == RelNodeType) { return true; } + auto joinNode = std::static_pointer_cast(joinTree); + return (joinNode->JoinType == EJoinKind::InnerJoin) && AllJoinsAreInner(joinNode->LeftArg) && AllJoinsAreInner(joinNode->RightArg); +} + template typename TJoinHypergraph::TEdge MakeHyperedge( const std::shared_ptr& joinNode, const TNodeSet& conditionUsedRels, - std::unordered_map, TNodeSet>& subtreeNodes + std::unordered_map, TNodeSet>& subtreeNodes, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys ) { auto conflictRulesCollector = TConflictRulesCollector(joinNode, subtreeNodes); auto conflictRules = conflictRulesCollector.CollectConflicts(); @@ -59,7 +65,41 @@ typename TJoinHypergraph::TEdge MakeHyperedge( TNodeSet right = TES & subtreeNodes[joinNode->RightArg]; bool isCommutative = OperatorIsCommutative(joinNode->JoinType) && (joinNode->IsReorderable); - return typename TJoinHypergraph::TEdge(left, right, joinNode->JoinType, joinNode->LeftAny, joinNode->RightAny, isCommutative, joinNode->LeftJoinKeys, joinNode->RightJoinKeys); + return typename TJoinHypergraph::TEdge(left, right, joinNode->JoinType, joinNode->LeftAny, joinNode->RightAny, isCommutative, leftJoinKeys, rightJoinKeys); +} + +/* + * In this routine we decompose AND condition for equijoin into many edges, instead of one hyperedge. + * We group conditions with same relations into one (for example A.id = B.id, A.z = B.z). + */ +template +void AddCycle( + TJoinHypergraph& graph, + const std::shared_ptr& joinNode, + std::unordered_map, TNodeSet>& subtreeNodes +) { + auto zip = Zip(joinNode->LeftJoinKeys, joinNode->RightJoinKeys); + using TJoinCondition = std::pair; + std::vector joinConds{zip.begin(), zip.end()}; + std::sort(joinConds.begin(), joinConds.end()); + + auto isOneGroup = [](const TJoinCondition& lhs, const TJoinCondition& rhs) -> bool { + return lhs.first.RelName == rhs.first.RelName && lhs.second.RelName == rhs.second.RelName; + }; + + for (size_t i = 0; i < joinConds.size();) { + size_t groupBegin = i; + TVector curGroupLhsJoinKeys, curGroupRhsJoinKeys; + while (i < joinConds.size() && isOneGroup(joinConds[groupBegin], joinConds[i])) { + curGroupLhsJoinKeys.push_back(joinConds[i].first); + curGroupRhsJoinKeys.push_back(joinConds[i].second); + ++i; + } + + TNodeSet conditionUsedRels{}; + conditionUsedRels = graph.GetNodesByRelNames(GetConditionUsedRelationNames(curGroupLhsJoinKeys, curGroupRhsJoinKeys)); + graph.AddEdge(MakeHyperedge(joinNode, conditionUsedRels,subtreeNodes, curGroupLhsJoinKeys, curGroupRhsJoinKeys)); + } } template @@ -82,10 +122,15 @@ void MakeJoinHypergraphRec( subtreeNodes[joinTree] = subtreeNodes[joinNode->LeftArg] | subtreeNodes[joinNode->RightArg]; - TNodeSet conditionUsedRels{}; - conditionUsedRels = graph.GetNodesByRelNames(GetConditionUsedRelationNames(joinNode)); + /* In case of inner equi-innerjoins we create a cycle, not a hyperedge */ + if (joinNode->LeftJoinKeys.size() > 1 && AllJoinsAreInner(joinTree)) { + AddCycle(graph, joinNode, subtreeNodes); + return; + } - graph.AddEdge(MakeHyperedge(joinNode, conditionUsedRels, subtreeNodes)); + TNodeSet conditionUsedRels{}; + conditionUsedRels = graph.GetNodesByRelNames(GetConditionUsedRelationNames(joinNode->LeftJoinKeys, joinNode->RightJoinKeys)); + graph.AddEdge(MakeHyperedge(joinNode, conditionUsedRels, subtreeNodes, joinNode->LeftJoinKeys, joinNode->RightJoinKeys)); } template diff --git a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json index abc4f5033453..6f68ac37d3d2 100644 --- a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json @@ -762,9 +762,9 @@ ], "test.test[dq-join_cbo_native_3_tables--Debug]": [ { - "checksum": "bc4f0d3c80bc05fdb553d9d07ed58fd2", - "size": 4846, - "uri": "https://{canondata_backend}/1597364/aa2251cc1cffd9f5ef1d8d1793ee54509ab8cdfc/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched" + "checksum": "91570a2f667516ba1f3f28642698441f", + "size": 4802, + "uri": "https://{canondata_backend}/1847551/6ea0f0d238a8a57c98cf719da4e87036e3ffdde6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched" } ], "test.test[dq-join_cbo_native_3_tables--Plan]": [ From 8ba114b874145e75f3f7f4b37bab5fabc341dca2 Mon Sep 17 00:00:00 2001 From: pilik Date: Thu, 17 Oct 2024 16:56:04 +0300 Subject: [PATCH 4/7] [CBO] Transitive closure adding conditions to existed edges (#10528) --- ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp | 16 ++++++++++++++++ ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h | 17 +++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp index 2ba71afe5d40..9a70bd4ba251 100644 --- a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp @@ -423,6 +423,22 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { } } + Y_UNIT_TEST(ManyCondsBetweenJoinForTransitiveClosure) { + auto join = Join(Join("A", "B", "A.PUDGE=B.PUDGE,A.DOTA=B.DOTA"), "C", "A.PUDGE=C.PUDGE,A.DOTA=C.DOTA"); + + auto graph = MakeJoinHypergraph(join); + Cout << graph.String() << Endl; + + auto B = graph.GetNodesByRelNames({"B"}); + auto C = graph.GetNodesByRelNames({"C"}); + UNIT_ASSERT(graph.FindEdgeBetween(B, C)); + + { + auto optimizedJoin = Enumerate(join, TOptimizerHints::Parse("Rows(B C # 0)")); + UNIT_ASSERT(HaveSameConditionCount(optimizedJoin, join)); + } + } + auto MakeClique(size_t size) { std::shared_ptr root = Join("R0", "R1", "R0.id=R1.id"); diff --git a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h index b91d7f6af614..98ec18bfa98d 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h +++ b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h @@ -481,8 +481,21 @@ class TTransitiveClosureConstructor { auto iNode = Graph_.GetNodesByRelNames({joinCondById[i].RelName}); auto jNode = Graph_.GetNodesByRelNames({joinCondById[j].RelName}); - if (Graph_.FindEdgeBetween(iNode, jNode)) { - continue; + if (auto* maybeEdge = Graph_.FindEdgeBetween(iNode, jNode)) { + auto addUniqueKey = [](auto& vector, const auto& key) { + if (std::find(vector.begin(), vector.end(), key) == vector.end()) { + vector.push_back(key); + } + }; + + auto& revEdge = Graph_.GetEdge(maybeEdge->ReversedEdgeId); + addUniqueKey(revEdge.LeftJoinKeys, joinCondById[j]); + addUniqueKey(revEdge.RightJoinKeys, joinCondById[i]); + + auto& edge = Graph_.GetEdge(revEdge.ReversedEdgeId); + addUniqueKey(edge.LeftJoinKeys, joinCondById[i]); + addUniqueKey(edge.RightJoinKeys, joinCondById[j]); + continue; } Graph_.AddEdge(THyperedge(iNode, jNode, InnerJoin, false, false, true, {joinCondById[i]}, {joinCondById[j]})); From ccafaac92753aa0e85e9eb2a175dfb4e3b8d1e60 Mon Sep 17 00:00:00 2001 From: pilik Date: Thu, 17 Oct 2024 22:39:03 +0300 Subject: [PATCH 5/7] [CBO] No join was chosen bug fix (#10576) Co-authored-by: Pavel Ivanov --- ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp index 9a70bd4ba251..ff63e92436ad 100644 --- a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp @@ -193,7 +193,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { return joinArg; } else if constexpr (std::is_convertible_v) { std::shared_ptr root = std::make_shared(joinArg, std::make_shared()); - root->Stats->Nrows = rand() % 50'000 + 1; + root->Stats->Nrows = rand() % 100 + 1; return root; } else { static_assert( From 45f128ad0e845a777433b1e1e804e955ede43ab6 Mon Sep 17 00:00:00 2001 From: pilik Date: Fri, 18 Oct 2024 16:23:40 +0300 Subject: [PATCH 6/7] [CBO] TEST ASAN FIX (#10610) --- ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp index ff63e92436ad..8ebb67b9d59a 100644 --- a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp @@ -484,7 +484,11 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { } Y_UNIT_TEST(JoinTopologiesBenchmark) { - #ifndef NDEBUG + #if defined(_asan_enabled_) + enum { CliqueSize = 0, ChainSize = 0, StarSize = 0 }; + std::cerr << "test is not running for ASAN!" << std::endl; + return; + #elif !defined(NDEBUG) enum { CliqueSize = 11, ChainSize = 71, StarSize = 15 }; #else enum { CliqueSize = 15, ChainSize = 165, StarSize = 20 }; From de3cdd9defb8269fe90d48b2f828b99df5fff0fc Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Mon, 21 Oct 2024 15:45:57 +0300 Subject: [PATCH 7/7] [] fix --- .../join/data/join_order/tpcds64_1000s.json | 32 ++++++++++-- .../ut/join/data/join_order/tpch2_1000s.json | 50 +++---------------- 2 files changed, 36 insertions(+), 46 deletions(-) diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json index cbc1461ddba9..1953e92eaee3 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json @@ -87,8 +87,20 @@ "args": [ { - "op_name":"TableLookup", - "table":"test\/ds\/store_sales" + "op_name":"TableRangeScan", + "args": + [ + { + "op_name":"Filter", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] + } + ] }, { "op_name":"TableFullScan", @@ -271,8 +283,20 @@ "args": [ { - "op_name":"TableLookup", - "table":"test\/ds\/store_sales" + "op_name":"TableRangeScan", + "args": + [ + { + "op_name":"Filter", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] + } + ] }, { "op_name":"TableFullScan", diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json index c6fe57f61e98..ed5b0f7b181c 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json @@ -2,46 +2,6 @@ "op_name":"InnerJoin (Grace)", "args": [ - { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"partsupp" - }, - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"supplier" - }, - { - "op_name":"InnerJoin (MapJoin)", - "args": - [ - { - "op_name":"TablePointLookup", - "args": - [ - { - "op_name":"TableFullScan", - "table":"nation" - } - ] - }, - { - "op_name":"TableFullScan", - "table":"nation" - } - ] - } - ] - } - ] - }, { "op_name":"InnerJoin (Grace)", "args": @@ -107,8 +67,14 @@ "args": [ { - "op_name":"TableLookup", - "table":"region" + "op_name":"TablePointLookup", + "args": + [ + { + "op_name":"TableFullScan", + "table":"nation" + } + ] }, { "op_name":"TableFullScan",