Skip to content

Commit 315c7f3

Browse files
Added parameter selectivity and boosted LookupJoin (#6874)
1 parent bc17e65 commit 315c7f3

File tree

10 files changed

+183
-9
lines changed

10 files changed

+183
-9
lines changed

ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,9 @@ double TKqpProviderContext::ComputeJoinCost(const TOptimizerStatistics& leftStat
188188
return rightStats.Nrows + outputRows;
189189

190190
case EJoinAlgoType::MapJoin:
191-
return leftStats.Nrows + 1.8 * rightStats.Nrows + outputRows;
191+
return 1.5 * (leftStats.Nrows + 1.8 * rightStats.Nrows + outputRows);
192192
case EJoinAlgoType::GraceJoin:
193-
return leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows;
193+
return 1.5 * (leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows);
194194
default:
195195
Y_ENSURE(false, "Illegal join type encountered");
196196
return 0;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"op_name": "LeftJoin (MapJoin)",
3+
"args": [
4+
{
5+
"op_name": "LeftJoin (MapJoin)",
6+
"args": [
7+
{
8+
"op_name": "LeftJoin (MapJoin)",
9+
"args": [
10+
{
11+
"op_name": "TableFullScan",
12+
"table": "quotas_browsers_relation"
13+
},
14+
{
15+
"op_name": "TableLookup",
16+
"table": "browsers"
17+
}
18+
]
19+
},
20+
{
21+
"op_name": "TableLookup",
22+
"table": "browser_groups"
23+
}
24+
]
25+
},
26+
{
27+
"op_name": "TableFullScan",
28+
"table": "quota"
29+
}
30+
]
31+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
DECLARE $quotaName as Utf8?;
2+
DECLARE $browserGroup as Utf8?;
3+
DECLARE $limit as Uint32;
4+
DECLARE $offset as Uint32;
5+
PRAGMA TablePathPrefix ="/Root/";
6+
7+
$browsers = (
8+
SELECT
9+
b.id as id,
10+
q.name AS quota_name,
11+
b.name AS name,
12+
b.version AS version,
13+
b.group AS group,
14+
b.description AS description,
15+
bg.browser_platform AS platform,
16+
MAX_OF(qb.created_at, b.created_at) AS created_at,
17+
qb.deleted_at AS deleted_at
18+
FROM
19+
quotas_browsers_relation AS qb
20+
LEFT JOIN
21+
browsers AS b
22+
ON qb.browser_id = b.id
23+
LEFT JOIN
24+
browser_groups AS bg
25+
ON
26+
b.group = bg.name
27+
LEFT JOIN
28+
quota as q
29+
ON
30+
qb.quota_id = q.id
31+
WHERE
32+
(
33+
($quotaName IS NOT NULL AND q.name = $quotaName) OR
34+
$quotaName IS NULL ) AND
35+
( ($browserGroup IS NOT NULL AND b.group = $browserGroup) OR $browserGroup IS NULL
36+
) AND ( group IS NOT NULL ));
37+
38+
SELECT * FROM $browsers ORDER BY created_at LIMIT $limit OFFSET $offset;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
CREATE TABLE `/Root/quotas_browsers_relation` (
2+
quota_id Utf8,
3+
browser_id Utf8,
4+
id Utf8,
5+
created_at Timestamp,
6+
deleted_at Timestamp,
7+
primary key (quota_id, browser_id)
8+
);
9+
10+
CREATE TABLE `/Root/browsers` (
11+
id Utf8,
12+
name Utf8,
13+
version Utf8,
14+
group Utf8,
15+
created_at Timestamp,
16+
deleted_at Timestamp,
17+
description Utf8,
18+
primary key (id)
19+
);
20+
21+
CREATE TABLE `/Root/browser_groups` (
22+
name Utf8,
23+
platform Utf8,
24+
sessions_per_agent_limit Uint32 ,
25+
cpu_cores_per_session Double ,
26+
ramdrive_gb_per_session Double ,
27+
ram_gb_per_session Double ,
28+
ramdrive_size_gb Double ,
29+
session_request_timeout_ms Uint32 ,
30+
browser_platform Utf8 ,
31+
service_startup_timeout_ms Uint32 ,
32+
session_attempt_timeout_ms Uint32,
33+
primary key (name)
34+
);
35+
36+
CREATE TABLE `/Root/quota` (
37+
name Utf8,
38+
created_at Timestamp ,
39+
owner Utf8 ,
40+
agents_max_limit Uint32 ,
41+
agent_kill_timeout_ms Uint32 ,
42+
agent_queue_time_limit_ms Uint32 ,
43+
agent_secret_id Utf8 ,
44+
id Utf8 ,
45+
primary key(name)
46+
);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"/Root/quotas_browsers_relation": {
3+
"n_rows": 222,
4+
"byte_size": 28140
5+
},
6+
"/Root/browsers": {
7+
"n_rows": 87,
8+
"byte_size": 26719
9+
},
10+
"/Root/browser_groups": {
11+
"n_rows": 17,
12+
"byte_size": 1905
13+
},
14+
"/Root/quota": {
15+
"n_rows": 55,
16+
"byte_size": 9241
17+
}
18+
}

ydb/core/kqp/ut/join/kqp_flip_join_ut.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ Y_UNIT_TEST_SUITE(KqpFlipJoin) {
203203

204204
auto result = ExecQueryAndTestResult(session, query, NoParams, R"([[[1];["Value11"]];[[2];["Value12"]]])");
205205

206-
AssertTableReads(result, "/Root/FJ_Table_1", 2);
206+
AssertTableReads(result, "/Root/FJ_Table_1", 3);
207207
AssertTableReads(result, "/Root/FJ_Table_2", 2);
208208
AssertTableReads(result, "/Root/FJ_Table_3", 4);
209209
}

ydb/core/kqp/ut/join/kqp_join_order_ut.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ static void CreateSampleTable(TSession session) {
4848

4949
UNIT_ASSERT(session.ExecuteSchemeQuery(GetStatic("schema/tpcc.sql")).GetValueSync().IsSuccess());
5050

51+
UNIT_ASSERT(session.ExecuteSchemeQuery(GetStatic("schema/lookupbug.sql")).GetValueSync().IsSuccess());
52+
5153
}
5254

5355
static TKikimrRunner GetKikimrWithJoinSettings(bool useStreamLookupJoin = false, TString stats = ""){
@@ -148,7 +150,8 @@ void ExplainJoinOrderTestDataQuery(const TString& queryPath, bool useStreamLooku
148150

149151
NJson::TJsonValue plan;
150152
NJson::ReadJsonTree(result.GetPlan(), &plan, true);
151-
Cout << result.GetPlan();
153+
Cout << result.GetPlan() << Endl;
154+
Cout << CanonizeJoinOrder(result.GetPlan()) << Endl;
152155
}
153156
}
154157

@@ -330,6 +333,12 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) {
330333
"queries/tpcc.sql", "stats/tpcc.json", "join_order/tpcc.json", false);
331334
}
332335

336+
Y_UNIT_TEST(LookupBug) {
337+
JoinOrderTestWithOverridenStats(
338+
"queries/lookupbug.sql", "stats/lookupbug.json", "join_order/lookupbug.json", false);
339+
}
340+
341+
333342
}
334343
}
335344
}

ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ namespace {
126126

127127
TString attributeName;
128128

129-
if (IsAttribute(right, attributeName) && IsConstantExpr(left.Ptr())) {
129+
if (IsAttribute(right, attributeName) && IsConstantExprWithParams(left.Ptr())) {
130130
std::swap(left, right);
131131
}
132132

@@ -139,8 +139,8 @@ namespace {
139139
// In case the right side is a constant that can be extracted, compute the selectivity using statistics
140140
// Currently, with the basic statistics we just return 1/nRows
141141

142-
else if (IsConstantExpr(right.Ptr())) {
143-
if (stats->ColumnStatistics == nullptr) {
142+
else if (IsConstantExprWithParams(right.Ptr())) {
143+
if (!IsConstantExpr(right.Ptr()) || stats->ColumnStatistics == nullptr) {
144144
return DefaultSelectivity(stats, attributeName);
145145
}
146146

@@ -165,7 +165,7 @@ namespace {
165165
Y_UNUSED(stats);
166166

167167
TString attributeName;
168-
if (IsAttribute(right, attributeName) && IsConstantExpr(left.Ptr())) {
168+
if (IsAttribute(right, attributeName) && IsConstantExprWithParams(left.Ptr())) {
169169
std::swap(left, right);
170170
}
171171

@@ -176,7 +176,7 @@ namespace {
176176
}
177177
// In case the right side is a constant that can be extracted, compute the selectivity using statistics
178178
// Currently, with the basic statistics we just return 0.5
179-
else if (IsConstantExpr(right.Ptr())) {
179+
else if (IsConstantExprWithParams(right.Ptr())) {
180180
return 0.5;
181181
}
182182
}

ydb/library/yql/dq/opt/dq_opt_stat.cpp

+31
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,37 @@ bool IsConstantExpr(const TExprNode::TPtr& input) {
133133
return false;
134134
}
135135

136+
bool IsConstantExprWithParams(const TExprNode::TPtr& input) {
137+
if (input->IsCallable("Parameter")) {
138+
return true;
139+
}
140+
141+
if (input->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Pg) {
142+
return IsConstantExprPg(input);
143+
}
144+
145+
if (!IsDataOrOptionalOfData(input->GetTypeAnn())) {
146+
return false;
147+
}
148+
149+
if (!NeedCalc(TExprBase(input))) {
150+
return true;
151+
}
152+
153+
else if (input->IsCallable(constantFoldingWhiteList)) {
154+
for (size_t i = 0; i < input->ChildrenSize(); i++) {
155+
auto callableInput = input->Child(i);
156+
if (callableInput->GetTypeAnn()->GetKind() != ETypeAnnotationKind::Type && !IsConstantExprWithParams(callableInput)) {
157+
return false;
158+
}
159+
}
160+
return true;
161+
}
162+
163+
return false;
164+
}
165+
166+
136167
/**
137168
* Compute statistics for map join
138169
* FIX: Currently we treat all join the same from the cost perspective, need to refine cost function

ydb/library/yql/dq/opt/dq_opt_stat.h

+1
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,6 @@ void InferStatisticsForListParam(const TExprNode::TPtr& input, TTypeAnnotationCo
2424
double ComputePredicateSelectivity(const NNodes::TExprBase& input, const std::shared_ptr<TOptimizerStatistics>& stats);
2525
bool NeedCalc(NNodes::TExprBase node);
2626
bool IsConstantExpr(const TExprNode::TPtr& input);
27+
bool IsConstantExprWithParams(const TExprNode::TPtr& input);
2728

2829
} // namespace NYql::NDq {

0 commit comments

Comments
 (0)