diff --git a/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp b/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp index 079898bf9d4e..0e0bf93ce980 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp @@ -188,9 +188,9 @@ double TKqpProviderContext::ComputeJoinCost(const TOptimizerStatistics& leftStat return rightStats.Nrows + outputRows; case EJoinAlgoType::MapJoin: - return leftStats.Nrows + 1.8 * rightStats.Nrows + outputRows; + return 1.5 * (leftStats.Nrows + 1.8 * rightStats.Nrows + outputRows); case EJoinAlgoType::GraceJoin: - return leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows; + return 1.5 * (leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows); default: Y_ENSURE(false, "Illegal join type encountered"); return 0; diff --git a/ydb/core/kqp/ut/join/data/join_order/lookupbug.json b/ydb/core/kqp/ut/join/data/join_order/lookupbug.json new file mode 100644 index 000000000000..f2150054be84 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/join_order/lookupbug.json @@ -0,0 +1,31 @@ +{ + "op_name": "LeftJoin (MapJoin)", + "args": [ + { + "op_name": "LeftJoin (MapJoin)", + "args": [ + { + "op_name": "LeftJoin (MapJoin)", + "args": [ + { + "op_name": "TableFullScan", + "table": "quotas_browsers_relation" + }, + { + "op_name": "TableLookup", + "table": "browsers" + } + ] + }, + { + "op_name": "TableLookup", + "table": "browser_groups" + } + ] + }, + { + "op_name": "TableFullScan", + "table": "quota" + } + ] +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/data/queries/lookupbug.sql b/ydb/core/kqp/ut/join/data/queries/lookupbug.sql new file mode 100644 index 000000000000..ec88e372986a --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/lookupbug.sql @@ -0,0 +1,38 @@ +DECLARE $quotaName as Utf8?; +DECLARE $browserGroup as Utf8?; +DECLARE $limit as Uint32; +DECLARE $offset as Uint32; +PRAGMA TablePathPrefix ="/Root/"; + +$browsers = ( + SELECT + b.id as id, + q.name AS quota_name, + b.name AS name, + b.version AS version, + b.group AS group, + b.description AS description, + bg.browser_platform AS platform, + MAX_OF(qb.created_at, b.created_at) AS created_at, + qb.deleted_at AS deleted_at + FROM + quotas_browsers_relation AS qb + LEFT JOIN + browsers AS b + ON qb.browser_id = b.id + LEFT JOIN + browser_groups AS bg + ON + b.group = bg.name + LEFT JOIN + quota as q + ON + qb.quota_id = q.id + WHERE + ( + ($quotaName IS NOT NULL AND q.name = $quotaName) OR + $quotaName IS NULL ) AND + ( ($browserGroup IS NOT NULL AND b.group = $browserGroup) OR $browserGroup IS NULL + ) AND ( group IS NOT NULL )); + + SELECT * FROM $browsers ORDER BY created_at LIMIT $limit OFFSET $offset; \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/data/schema/lookupbug.sql b/ydb/core/kqp/ut/join/data/schema/lookupbug.sql new file mode 100644 index 000000000000..d4d218289cd6 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/schema/lookupbug.sql @@ -0,0 +1,46 @@ +CREATE TABLE `/Root/quotas_browsers_relation` ( + quota_id Utf8, + browser_id Utf8, + id Utf8, + created_at Timestamp, + deleted_at Timestamp, + primary key (quota_id, browser_id) +); + +CREATE TABLE `/Root/browsers` ( + id Utf8, + name Utf8, + version Utf8, + group Utf8, + created_at Timestamp, + deleted_at Timestamp, + description Utf8, + primary key (id) +); + +CREATE TABLE `/Root/browser_groups` ( + name Utf8, + platform Utf8, + sessions_per_agent_limit Uint32 , + cpu_cores_per_session Double , + ramdrive_gb_per_session Double , + ram_gb_per_session Double , + ramdrive_size_gb Double , + session_request_timeout_ms Uint32 , + browser_platform Utf8 , + service_startup_timeout_ms Uint32 , + session_attempt_timeout_ms Uint32, + primary key (name) +); + +CREATE TABLE `/Root/quota` ( + name Utf8, + created_at Timestamp , + owner Utf8 , + agents_max_limit Uint32 , + agent_kill_timeout_ms Uint32 , + agent_queue_time_limit_ms Uint32 , + agent_secret_id Utf8 , + id Utf8 , + primary key(name) +); \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/data/stats/lookupbug.json b/ydb/core/kqp/ut/join/data/stats/lookupbug.json new file mode 100644 index 000000000000..9857d867f2fd --- /dev/null +++ b/ydb/core/kqp/ut/join/data/stats/lookupbug.json @@ -0,0 +1,18 @@ +{ + "/Root/quotas_browsers_relation": { + "n_rows": 222, + "byte_size": 28140 + }, + "/Root/browsers": { + "n_rows": 87, + "byte_size": 26719 + }, + "/Root/browser_groups": { + "n_rows": 17, + "byte_size": 1905 + }, + "/Root/quota": { + "n_rows": 55, + "byte_size": 9241 + } +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/kqp_flip_join_ut.cpp b/ydb/core/kqp/ut/join/kqp_flip_join_ut.cpp index 9094f5fa31f0..e42719e5d8fe 100644 --- a/ydb/core/kqp/ut/join/kqp_flip_join_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_flip_join_ut.cpp @@ -203,7 +203,7 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) { auto result = ExecQueryAndTestResult(session, query, NoParams, R"([[[1];["Value11"]];[[2];["Value12"]]])"); - AssertTableReads(result, "/Root/FJ_Table_1", 2); + AssertTableReads(result, "/Root/FJ_Table_1", 3); AssertTableReads(result, "/Root/FJ_Table_2", 2); AssertTableReads(result, "/Root/FJ_Table_3", 4); } diff --git a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp index c1920201e175..6840d471c7a2 100644 --- a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp @@ -48,6 +48,8 @@ static void CreateSampleTable(TSession session) { UNIT_ASSERT(session.ExecuteSchemeQuery(GetStatic("schema/tpcc.sql")).GetValueSync().IsSuccess()); + UNIT_ASSERT(session.ExecuteSchemeQuery(GetStatic("schema/lookupbug.sql")).GetValueSync().IsSuccess()); + } static TKikimrRunner GetKikimrWithJoinSettings(bool useStreamLookupJoin = false, TString stats = ""){ @@ -148,7 +150,8 @@ void ExplainJoinOrderTestDataQuery(const TString& queryPath, bool useStreamLooku NJson::TJsonValue plan; NJson::ReadJsonTree(result.GetPlan(), &plan, true); - Cout << result.GetPlan(); + Cout << result.GetPlan() << Endl; + Cout << CanonizeJoinOrder(result.GetPlan()) << Endl; } } @@ -330,6 +333,12 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) { "queries/tpcc.sql", "stats/tpcc.json", "join_order/tpcc.json", false); } + Y_UNIT_TEST(LookupBug) { + JoinOrderTestWithOverridenStats( + "queries/lookupbug.sql", "stats/lookupbug.json", "join_order/lookupbug.json", false); + } + + } } } diff --git a/ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp b/ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp index f6489c34bd64..8a7ba8f5390a 100644 --- a/ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp @@ -126,7 +126,7 @@ namespace { TString attributeName; - if (IsAttribute(right, attributeName) && IsConstantExpr(left.Ptr())) { + if (IsAttribute(right, attributeName) && IsConstantExprWithParams(left.Ptr())) { std::swap(left, right); } @@ -139,8 +139,8 @@ namespace { // In case the right side is a constant that can be extracted, compute the selectivity using statistics // Currently, with the basic statistics we just return 1/nRows - else if (IsConstantExpr(right.Ptr())) { - if (stats->ColumnStatistics == nullptr) { + else if (IsConstantExprWithParams(right.Ptr())) { + if (!IsConstantExpr(right.Ptr()) || stats->ColumnStatistics == nullptr) { return DefaultSelectivity(stats, attributeName); } @@ -165,7 +165,7 @@ namespace { Y_UNUSED(stats); TString attributeName; - if (IsAttribute(right, attributeName) && IsConstantExpr(left.Ptr())) { + if (IsAttribute(right, attributeName) && IsConstantExprWithParams(left.Ptr())) { std::swap(left, right); } @@ -176,7 +176,7 @@ namespace { } // In case the right side is a constant that can be extracted, compute the selectivity using statistics // Currently, with the basic statistics we just return 0.5 - else if (IsConstantExpr(right.Ptr())) { + else if (IsConstantExprWithParams(right.Ptr())) { return 0.5; } } diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp index 4e3385fdc4c5..46e2a2bb9e06 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp @@ -133,6 +133,37 @@ bool IsConstantExpr(const TExprNode::TPtr& input) { return false; } +bool IsConstantExprWithParams(const TExprNode::TPtr& input) { + if (input->IsCallable("Parameter")) { + return true; + } + + if (input->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Pg) { + return IsConstantExprPg(input); + } + + if (!IsDataOrOptionalOfData(input->GetTypeAnn())) { + return false; + } + + if (!NeedCalc(TExprBase(input))) { + return true; + } + + else if (input->IsCallable(constantFoldingWhiteList)) { + for (size_t i = 0; i < input->ChildrenSize(); i++) { + auto callableInput = input->Child(i); + if (callableInput->GetTypeAnn()->GetKind() != ETypeAnnotationKind::Type && !IsConstantExprWithParams(callableInput)) { + return false; + } + } + return true; + } + + return false; +} + + /** * Compute statistics for map join * FIX: Currently we treat all join the same from the cost perspective, need to refine cost function diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.h b/ydb/library/yql/dq/opt/dq_opt_stat.h index dd5d628c44af..68cc21b02475 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.h +++ b/ydb/library/yql/dq/opt/dq_opt_stat.h @@ -24,5 +24,6 @@ void InferStatisticsForListParam(const TExprNode::TPtr& input, TTypeAnnotationCo double ComputePredicateSelectivity(const NNodes::TExprBase& input, const std::shared_ptr& stats); bool NeedCalc(NNodes::TExprBase node); bool IsConstantExpr(const TExprNode::TPtr& input); +bool IsConstantExprWithParams(const TExprNode::TPtr& input); } // namespace NYql::NDq {