Skip to content

Commit 4743435

Browse files
Don't run CBO if statistics is not avaliable (#7089)
1 parent ae7146c commit 4743435

File tree

12 files changed

+470
-390
lines changed

12 files changed

+470
-390
lines changed

ydb/core/kqp/gateway/kqp_metadata_loader.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,7 @@ NThreading::TFuture<TTableMetadataResult> TKqpTableMetadataLoader::LoadTableMeta
959959
auto s = resp.Simple;
960960
result.Metadata->RecordsCount = s.RowCount;
961961
result.Metadata->DataSize = s.BytesSize;
962+
result.Metadata->StatsLoaded = response.Success;
962963
promise.SetValue(result);
963964
});
964965

ydb/core/kqp/opt/kqp_statistics_transformer.cpp

+14
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ void InferStatisticsForReadTable(const TExprNode::TPtr& input, TTypeAnnotationCo
4444
Y_ENSURE(false, "Invalid node type for InferStatisticsForReadTable");
4545
}
4646

47+
if (!inputStats) {
48+
return;
49+
}
50+
4751
auto keyColumns = inputStats->KeyColumns;
4852
if (auto indexRead = inputNode.Maybe<TKqlReadTableIndex>()) {
4953
const auto& tableData = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, indexRead.Cast().Table().Path().Value());
@@ -93,6 +97,10 @@ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationCon
9397
auto path = readTable.Path();
9498

9599
const auto& tableData = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, path.Value());
100+
if (!tableData.Metadata->StatsLoaded && !kqpCtx.Config->OverrideStatistics.Get()) {
101+
return;
102+
}
103+
96104
double nRows = tableData.Metadata->RecordsCount;
97105
double byteSize = tableData.Metadata->DataSize;
98106
int nAttrs = tableData.Metadata->Columns.size();
@@ -128,6 +136,9 @@ void InferStatisticsForSteamLookup(const TExprNode::TPtr& input, TTypeAnnotation
128136

129137
int nAttrs = streamLookup.Columns().Size();
130138
auto inputStats = typeCtx->GetStats(streamLookup.Table().Raw());
139+
if (!inputStats) {
140+
return;
141+
}
131142
auto byteSize = inputStats->ByteSize * (nAttrs / (double) inputStats->Ncols);
132143

133144
typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(
@@ -155,6 +166,9 @@ void InferStatisticsForLookupTable(const TExprNode::TPtr& input, TTypeAnnotation
155166
double byteSize = 0;
156167

157168
auto inputStats = typeCtx->GetStats(lookupTable.Table().Raw());
169+
if (!inputStats) {
170+
return;
171+
}
158172

159173
if (lookupTable.LookupKeys().Maybe<TCoIterator>()) {
160174
if (inputStats) {

ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp

+13-3
Original file line numberDiff line numberDiff line change
@@ -918,14 +918,24 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
918918

919919
TExprBase KqpJoinToIndexLookup(const TExprBase& node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, bool useCBO)
920920
{
921-
if ((!useCBO && kqpCtx.IsScanQuery() && !kqpCtx.Config->EnableKqpScanQueryStreamIdxLookupJoin) || !node.Maybe<TDqJoin>()) {
921+
if (!node.Maybe<TDqJoin>()) {
922922
return node;
923923
}
924+
924925
auto join = node.Cast<TDqJoin>();
926+
auto algo = FromString<EJoinAlgoType>(join.JoinAlgo().StringValue());
927+
928+
if (algo == EJoinAlgoType::Undefined) {
929+
useCBO = false;
930+
}
931+
932+
if (!useCBO && kqpCtx.IsScanQuery() && !kqpCtx.Config->EnableKqpScanQueryStreamIdxLookupJoin) {
933+
return node;
934+
}
925935

926936
if (useCBO){
927-
auto algo = FromString<EJoinAlgoType>(join.JoinAlgo().StringValue());
928-
if (algo != EJoinAlgoType::LookupJoin && algo != EJoinAlgoType::LookupJoinReverse && algo != EJoinAlgoType::Undefined) {
937+
938+
if (algo != EJoinAlgoType::LookupJoin && algo != EJoinAlgoType::LookupJoinReverse) {
929939
return node;
930940
}
931941
}

ydb/core/kqp/provider/yql_kikimr_gateway.h

+3
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,7 @@ struct TKikimrTableMetadata : public TThrRefBase {
464464
ui64 DataSize = 0;
465465
ui64 MemorySize = 0;
466466
ui32 ShardsCount = 0;
467+
bool StatsLoaded = false;
467468

468469
TInstant LastAccessTime;
469470
TInstant LastUpdateTime;
@@ -500,6 +501,7 @@ struct TKikimrTableMetadata : public TThrRefBase {
500501
, Kind(static_cast<EKikimrTableKind>(message->GetKind()))
501502
, RecordsCount(message->GetRecordsCount())
502503
, DataSize(message->GetDataSize())
504+
, StatsLoaded(message->GetStatsLoaded())
503505
, KeyColumnNames(message->GetKeyColunmNames().begin(), message->GetKeyColunmNames().end())
504506

505507
{
@@ -565,6 +567,7 @@ struct TKikimrTableMetadata : public TThrRefBase {
565567
PathId.ToMessage(message->MutablePathId());
566568
message->SetSchemaVersion(SchemaVersion);
567569
message->SetKind(static_cast<ui32>(Kind));
570+
message->SetStatsLoaded(StatsLoaded);
568571
message->SetRecordsCount(RecordsCount);
569572
message->SetDataSize(DataSize);
570573
for(auto& [key, value] : Attributes) {

ydb/core/kqp/ut/perf/kqp_query_perf_ut.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,7 @@ Y_UNIT_TEST_SUITE(KqpQueryPerf) {
639639
if (settings.AppConfig.GetTableServiceConfig().GetEnableKqpDataQueryStreamIdxLookupJoin()) {
640640
UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 1);
641641
} else if (settings.AppConfig.GetTableServiceConfig().GetEnableKqpDataQueryStreamLookup()) {
642-
UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2);
642+
UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 3);
643643
} else {
644644
UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 5);
645645
}

ydb/core/kqp/ut/query/kqp_explain_ut.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -591,7 +591,7 @@ Y_UNIT_TEST_SUITE(KqpExplain) {
591591
NJson::ReadJsonTree(*res.PlanJson, &plan, true);
592592
UNIT_ASSERT(ValidatePlanNodeIds(plan));
593593

594-
auto join = FindPlanNodeByKv(plan, "Node Type", "FullJoin (Grace)");
594+
auto join = FindPlanNodeByKv(plan, "Node Type", "FullJoin (JoinDict)");
595595
UNIT_ASSERT(join.IsDefined());
596596
auto left = FindPlanNodeByKv(join, "Table", "EightShard");
597597
UNIT_ASSERT(left.IsDefined());

ydb/core/kqp/ut/spilling/kqp_scan_spilling_ut.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ Y_UNIT_TEST_TWIN(SpillingInRuntimeNodes, EnabledSpilling) {
9090
auto query = R"(
9191
--!syntax_v1
9292
PRAGMA ydb.EnableSpillingNodes="GraceJoin";
93+
PRAGMA ydb.OverrideStatistics='{"/Root/KeyValue" : {"n_rows":10e9, "byte_size":10e9}}';
9394
select t1.Key, t1.Value, t2.Key, t2.Value
9495
from `/Root/KeyValue` as t1 full join `/Root/KeyValue` as t2 on t1.Value = t2.Value
9596
order by t1.Value

ydb/core/protos/kqp.proto

+1
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ message TKqpTableMetadataProto {
186186
repeated TKqpTableMetadataProto SecondaryGlobalIndexMetadata = 12;
187187
optional uint64 RecordsCount = 13;
188188
optional uint64 DataSize = 14;
189+
optional bool StatsLoaded = 15;
189190
}
190191

191192
message TRlPath {

ydb/tests/functional/suite_tests/canondata/test_postgres.TestPGSQL.test_sql_suite_plan-jointest_join0.test_/query_1.plan

+46-43
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
"PlanNodeType": "Query",
55
"Plans": [
66
{
7-
"Node Type": "ResultSet_2",
8-
"PlanNodeId": 11,
7+
"Node Type": "ResultSet",
8+
"PlanNodeId": 12,
99
"PlanNodeType": "ResultSet",
1010
"Plans": [
1111
{
@@ -14,18 +14,18 @@
1414
{
1515
"Inputs": [
1616
{
17-
"ExternalPlanNodeId": 9
17+
"ExternalPlanNodeId": 10
1818
}
1919
],
2020
"Limit": "1001",
2121
"Name": "Limit"
2222
}
2323
],
24-
"PlanNodeId": 10,
24+
"PlanNodeId": 11,
2525
"Plans": [
2626
{
2727
"Node Type": "Merge",
28-
"PlanNodeId": 9,
28+
"PlanNodeId": 10,
2929
"PlanNodeType": "Connection",
3030
"Plans": [
3131
{
@@ -45,83 +45,82 @@
4545
"Condition": "q2 = i2_1.q2",
4646
"Inputs": [
4747
{
48-
"ExternalPlanNodeId": 7
48+
"ExternalPlanNodeId": 8
4949
},
5050
{
51-
"ExternalPlanNodeId": 4
51+
"ExternalPlanNodeId": 5
5252
}
5353
],
5454
"Name": "LeftJoin (MapJoin)"
5555
}
5656
],
57-
"PlanNodeId": 8,
57+
"PlanNodeId": 9,
5858
"Plans": [
5959
{
6060
"Node Type": "Broadcast",
61-
"PlanNodeId": 4,
61+
"PlanNodeId": 5,
6262
"PlanNodeType": "Connection",
6363
"Plans": [
6464
{
6565
"Node Type": "Collect",
66-
"PlanNodeId": 3,
66+
"PlanNodeId": 4,
6767
"Plans": [
6868
{
6969
"Node Type": "UnionAll",
70-
"PlanNodeId": 2,
70+
"PlanNodeId": 3,
7171
"PlanNodeType": "Connection",
7272
"Plans": [
7373
{
74-
"CTE Name": "precompute_1_0",
75-
"Node Type": "InnerJoin (MapJoin)-ConstantExpr-Filter-TableRangeScan-ConstantExpr",
74+
"Node Type": "InnerJoin (MapJoin)-Filter",
7675
"Operators": [
7776
{
7877
"Condition": "q1 = x",
7978
"Inputs": [
8079
{
81-
"InternalOperatorId": 2
80+
"InternalOperatorId": 1
8281
},
8382
{
84-
"InternalOperatorId": 1
83+
"Other": "ConstantExpression"
8584
}
8685
],
8786
"Name": "InnerJoin (MapJoin)"
8887
},
89-
{
90-
"Inputs": [],
91-
"Name": "ToFlow",
92-
"ToFlow": "precompute_0_0"
93-
},
9488
{
9589
"Inputs": [
9690
{
97-
"InternalOperatorId": 3
91+
"ExternalPlanNodeId": 1
9892
}
9993
],
10094
"Name": "Filter",
10195
"Predicate": "Exist(item.q1)"
102-
},
96+
}
97+
],
98+
"PlanNodeId": 2,
99+
"Plans": [
103100
{
104-
"Inputs": [
101+
"Node Type": "TableFullScan",
102+
"Operators": [
105103
{
106-
"InternalOperatorId": 4
104+
"Inputs": [],
105+
"Name": "TableFullScan",
106+
"ReadColumns": [
107+
"q1",
108+
"q2"
109+
],
110+
"ReadRanges": [
111+
"q1 (-\u221e, +\u221e)",
112+
"q2 (-\u221e, +\u221e)"
113+
],
114+
"ReadRangesPointPrefixLen": "0",
115+
"Scan": "Parallel",
116+
"Table": "postgres_jointest/join0.test_plan/int8_tbl"
107117
}
108118
],
109-
"Name": "TableRangeScan",
110-
"ReadColumns": [
111-
"q1",
112-
"q2"
113-
],
114-
"Table": "postgres_jointest/join0.test_plan/int8_tbl"
115-
},
116-
{
117-
"Inputs": [],
118-
"Iterator": "precompute_1_0",
119-
"Name": "Iterator"
119+
"PlanNodeId": 1,
120+
"Tables": [
121+
"postgres_jointest/join0.test_plan/int8_tbl"
122+
]
120123
}
121-
],
122-
"PlanNodeId": 1,
123-
"Tables": [
124-
"postgres_jointest/join0.test_plan/int8_tbl"
125124
]
126125
}
127126
]
@@ -132,12 +131,12 @@
132131
},
133132
{
134133
"Node Type": "Map",
135-
"PlanNodeId": 7,
134+
"PlanNodeId": 8,
136135
"PlanNodeType": "Connection",
137136
"Plans": [
138137
{
139138
"Node Type": "Collect",
140-
"PlanNodeId": 6,
139+
"PlanNodeId": 7,
141140
"Plans": [
142141
{
143142
"Node Type": "TableFullScan",
@@ -158,7 +157,7 @@
158157
"Table": "postgres_jointest/join0.test_plan/int8_tbl"
159158
}
160159
],
161-
"PlanNodeId": 5,
160+
"PlanNodeId": 6,
162161
"Tables": [
163162
"postgres_jointest/join0.test_plan/int8_tbl"
164163
]
@@ -205,7 +204,11 @@
205204
"q1",
206205
"q2"
207206
],
208-
"type": "Scan"
207+
"scan_by": [
208+
"q1 (-\u221e, +\u221e)",
209+
"q2 (-\u221e, +\u221e)"
210+
],
211+
"type": "FullScan"
209212
}
210213
]
211214
}

0 commit comments

Comments
 (0)