Skip to content

Commit ea061ca

Browse files
Fixed a problem with cardinality estimation for PK joins (#907)
1 parent 6f3e5d0 commit ea061ca

File tree

3 files changed

+7
-4
lines changed

3 files changed

+7
-4
lines changed

ydb/core/kqp/opt/kqp_statistics_transformer.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationCon
5555
const auto& tableData = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, path.Value());
5656
double nRows = tableData.Metadata->RecordsCount;
5757
int nAttrs = tableData.Metadata->Columns.size();
58-
YQL_CLOG(TRACE, CoreDq) << "Infer statistics for table: " << path.Value() << ", nrows: " << nRows << ", nattrs: " << nAttrs;
58+
YQL_CLOG(TRACE, CoreDq) << "Infer statistics for table: " << path.Value() << ", nrows: " << nRows << ", nattrs: " << nAttrs << ", nKeyColumns: " << tableData.Metadata->KeyColumnNames.size();
5959

6060
auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, nRows, nAttrs, 0.0, tableData.Metadata->KeyColumnNames);
6161
typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats));

ydb/library/yql/core/yql_cost_function.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,11 @@ TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStat
4141

4242
double newCard;
4343
EStatisticsType outputType;
44+
TVector<TString> joinedTableKeys;
4445

4546
if (IsPKJoin(rightStats,rightJoinKeys)) {
4647
newCard = std::max(leftStats.Nrows,rightStats.Nrows);
48+
joinedTableKeys = leftStats.KeyColumns;
4749
if (leftStats.Type == EStatisticsType::BaseTable){
4850
outputType = EStatisticsType::FilteredFactTable;
4951
} else {
@@ -52,6 +54,7 @@ TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStat
5254
}
5355
else if (IsPKJoin(leftStats,leftJoinKeys)) {
5456
newCard = std::max(leftStats.Nrows,rightStats.Nrows);
57+
joinedTableKeys = rightStats.KeyColumns;
5558
if (rightStats.Type == EStatisticsType::BaseTable){
5659
outputType = EStatisticsType::FilteredFactTable;
5760
} else {
@@ -69,7 +72,7 @@ TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStat
6972
+ newCard
7073
+ leftStats.Cost + rightStats.Cost;
7174

72-
return TOptimizerStatistics(outputType, newCard, newNCols, cost);
75+
return TOptimizerStatistics(outputType, newCard, newNCols, cost, joinedTableKeys);
7376
}
7477

7578
TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats,

ydb/library/yql/dq/opt/dq_opt_stat.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ void InferStatisticsForFlatMap(const TExprNode::TPtr& input, TTypeAnnotationCont
194194

195195
double selectivity = ComputePredicateSelectivity(flatmap.Lambda().Body(), inputStats);
196196

197-
auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost );
197+
auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost, inputStats->KeyColumns );
198198

199199
typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats) );
200200
}
@@ -235,7 +235,7 @@ void InferStatisticsForFilter(const TExprNode::TPtr& input, TTypeAnnotationConte
235235

236236
double selectivity = ComputePredicateSelectivity(filterBody, inputStats);
237237

238-
auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost);
238+
auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost, inputStats->KeyColumns);
239239

240240
typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats) );
241241
}

0 commit comments

Comments
 (0)