Skip to content

Added parameter selectivity and boosted LookupJoin #6874

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,9 @@ double TKqpProviderContext::ComputeJoinCost(const TOptimizerStatistics& leftStat
return rightStats.Nrows + outputRows;

case EJoinAlgoType::MapJoin:
return leftStats.Nrows + 1.8 * rightStats.Nrows + outputRows;
return 1.5 * (leftStats.Nrows + 1.8 * rightStats.Nrows + outputRows);
case EJoinAlgoType::GraceJoin:
return leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows;
return 1.5 * (leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows);
default:
Y_ENSURE(false, "Illegal join type encountered");
return 0;
Expand Down
31 changes: 31 additions & 0 deletions ydb/core/kqp/ut/join/data/join_order/lookupbug.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"op_name": "LeftJoin (MapJoin)",
"args": [
{
"op_name": "LeftJoin (MapJoin)",
"args": [
{
"op_name": "LeftJoin (MapJoin)",
"args": [
{
"op_name": "TableFullScan",
"table": "quotas_browsers_relation"
},
{
"op_name": "TableLookup",
"table": "browsers"
}
]
},
{
"op_name": "TableLookup",
"table": "browser_groups"
}
]
},
{
"op_name": "TableFullScan",
"table": "quota"
}
]
}
38 changes: 38 additions & 0 deletions ydb/core/kqp/ut/join/data/queries/lookupbug.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
DECLARE $quotaName as Utf8?;
DECLARE $browserGroup as Utf8?;
DECLARE $limit as Uint32;
DECLARE $offset as Uint32;
PRAGMA TablePathPrefix ="/Root/";

$browsers = (
SELECT
b.id as id,
q.name AS quota_name,
b.name AS name,
b.version AS version,
b.group AS group,
b.description AS description,
bg.browser_platform AS platform,
MAX_OF(qb.created_at, b.created_at) AS created_at,
qb.deleted_at AS deleted_at
FROM
quotas_browsers_relation AS qb
LEFT JOIN
browsers AS b
ON qb.browser_id = b.id
LEFT JOIN
browser_groups AS bg
ON
b.group = bg.name
LEFT JOIN
quota as q
ON
qb.quota_id = q.id
WHERE
(
($quotaName IS NOT NULL AND q.name = $quotaName) OR
$quotaName IS NULL ) AND
( ($browserGroup IS NOT NULL AND b.group = $browserGroup) OR $browserGroup IS NULL
) AND ( group IS NOT NULL ));

SELECT * FROM $browsers ORDER BY created_at LIMIT $limit OFFSET $offset;
46 changes: 46 additions & 0 deletions ydb/core/kqp/ut/join/data/schema/lookupbug.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
CREATE TABLE `/Root/quotas_browsers_relation` (
quota_id Utf8,
browser_id Utf8,
id Utf8,
created_at Timestamp,
deleted_at Timestamp,
primary key (quota_id, browser_id)
);

CREATE TABLE `/Root/browsers` (
id Utf8,
name Utf8,
version Utf8,
group Utf8,
created_at Timestamp,
deleted_at Timestamp,
description Utf8,
primary key (id)
);

CREATE TABLE `/Root/browser_groups` (
name Utf8,
platform Utf8,
sessions_per_agent_limit Uint32 ,
cpu_cores_per_session Double ,
ramdrive_gb_per_session Double ,
ram_gb_per_session Double ,
ramdrive_size_gb Double ,
session_request_timeout_ms Uint32 ,
browser_platform Utf8 ,
service_startup_timeout_ms Uint32 ,
session_attempt_timeout_ms Uint32,
primary key (name)
);

CREATE TABLE `/Root/quota` (
name Utf8,
created_at Timestamp ,
owner Utf8 ,
agents_max_limit Uint32 ,
agent_kill_timeout_ms Uint32 ,
agent_queue_time_limit_ms Uint32 ,
agent_secret_id Utf8 ,
id Utf8 ,
primary key(name)
);
18 changes: 18 additions & 0 deletions ydb/core/kqp/ut/join/data/stats/lookupbug.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"/Root/quotas_browsers_relation": {
"n_rows": 222,
"byte_size": 28140
},
"/Root/browsers": {
"n_rows": 87,
"byte_size": 26719
},
"/Root/browser_groups": {
"n_rows": 17,
"byte_size": 1905
},
"/Root/quota": {
"n_rows": 55,
"byte_size": 9241
}
}
2 changes: 1 addition & 1 deletion ydb/core/kqp/ut/join/kqp_flip_join_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) {

auto result = ExecQueryAndTestResult(session, query, NoParams, R"([[[1];["Value11"]];[[2];["Value12"]]])");

AssertTableReads(result, "/Root/FJ_Table_1", 2);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

А почему тут поменялось количество чтений?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Стало больше, но тут приграничная история, был 1 lookup и 1 map, так и осталось, просто таблицы другие

AssertTableReads(result, "/Root/FJ_Table_1", 3);
AssertTableReads(result, "/Root/FJ_Table_2", 2);
AssertTableReads(result, "/Root/FJ_Table_3", 4);
}
Expand Down
11 changes: 10 additions & 1 deletion ydb/core/kqp/ut/join/kqp_join_order_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ static void CreateSampleTable(TSession session) {

UNIT_ASSERT(session.ExecuteSchemeQuery(GetStatic("schema/tpcc.sql")).GetValueSync().IsSuccess());

UNIT_ASSERT(session.ExecuteSchemeQuery(GetStatic("schema/lookupbug.sql")).GetValueSync().IsSuccess());

}

static TKikimrRunner GetKikimrWithJoinSettings(bool useStreamLookupJoin = false, TString stats = ""){
Expand Down Expand Up @@ -148,7 +150,8 @@ void ExplainJoinOrderTestDataQuery(const TString& queryPath, bool useStreamLooku

NJson::TJsonValue plan;
NJson::ReadJsonTree(result.GetPlan(), &plan, true);
Cout << result.GetPlan();
Cout << result.GetPlan() << Endl;
Cout << CanonizeJoinOrder(result.GetPlan()) << Endl;
}
}

Expand Down Expand Up @@ -330,6 +333,12 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) {
"queries/tpcc.sql", "stats/tpcc.json", "join_order/tpcc.json", false);
}

Y_UNIT_TEST(LookupBug) {
JoinOrderTestWithOverridenStats(
"queries/lookupbug.sql", "stats/lookupbug.json", "join_order/lookupbug.json", false);
}


}
}
}
10 changes: 5 additions & 5 deletions ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ namespace {

TString attributeName;

if (IsAttribute(right, attributeName) && IsConstantExpr(left.Ptr())) {
if (IsAttribute(right, attributeName) && IsConstantExprWithParams(left.Ptr())) {
std::swap(left, right);
}

Expand All @@ -139,8 +139,8 @@ namespace {
// In case the right side is a constant that can be extracted, compute the selectivity using statistics
// Currently, with the basic statistics we just return 1/nRows

else if (IsConstantExpr(right.Ptr())) {
if (stats->ColumnStatistics == nullptr) {
else if (IsConstantExprWithParams(right.Ptr())) {
if (!IsConstantExpr(right.Ptr()) || stats->ColumnStatistics == nullptr) {
return DefaultSelectivity(stats, attributeName);
}

Expand All @@ -165,7 +165,7 @@ namespace {
Y_UNUSED(stats);

TString attributeName;
if (IsAttribute(right, attributeName) && IsConstantExpr(left.Ptr())) {
if (IsAttribute(right, attributeName) && IsConstantExprWithParams(left.Ptr())) {
std::swap(left, right);
}

Expand All @@ -176,7 +176,7 @@ namespace {
}
// In case the right side is a constant that can be extracted, compute the selectivity using statistics
// Currently, with the basic statistics we just return 0.5
else if (IsConstantExpr(right.Ptr())) {
else if (IsConstantExprWithParams(right.Ptr())) {
return 0.5;
}
}
Expand Down
31 changes: 31 additions & 0 deletions ydb/library/yql/dq/opt/dq_opt_stat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,37 @@ bool IsConstantExpr(const TExprNode::TPtr& input) {
return false;
}

bool IsConstantExprWithParams(const TExprNode::TPtr& input) {
if (input->IsCallable("Parameter")) {
return true;
}

if (input->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Pg) {
return IsConstantExprPg(input);
}

if (!IsDataOrOptionalOfData(input->GetTypeAnn())) {
return false;
}

if (!NeedCalc(TExprBase(input))) {
return true;
}

else if (input->IsCallable(constantFoldingWhiteList)) {
for (size_t i = 0; i < input->ChildrenSize(); i++) {
auto callableInput = input->Child(i);
if (callableInput->GetTypeAnn()->GetKind() != ETypeAnnotationKind::Type && !IsConstantExprWithParams(callableInput)) {
return false;
}
}
return true;
}

return false;
}


/**
* Compute statistics for map join
* FIX: Currently we treat all join the same from the cost perspective, need to refine cost function
Expand Down
1 change: 1 addition & 0 deletions ydb/library/yql/dq/opt/dq_opt_stat.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,6 @@ void InferStatisticsForListParam(const TExprNode::TPtr& input, TTypeAnnotationCo
double ComputePredicateSelectivity(const NNodes::TExprBase& input, const std::shared_ptr<TOptimizerStatistics>& stats);
bool NeedCalc(NNodes::TExprBase node);
bool IsConstantExpr(const TExprNode::TPtr& input);
bool IsConstantExprWithParams(const TExprNode::TPtr& input);

} // namespace NYql::NDq {
Loading