Skip to content

Commit 05ab73d

Browse files
Merge fd7f82b into edf8f5c
2 parents edf8f5c + fd7f82b commit 05ab73d

31 files changed

+1011
-141
lines changed

ydb/core/kqp/host/kqp_runner.cpp

+9-3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <ydb/core/kqp/opt/logical/kqp_opt_log.h>
77
#include <ydb/core/kqp/opt/kqp_statistics_transformer.h>
88
#include <ydb/core/kqp/opt/kqp_constant_folding_transformer.h>
9+
#include <ydb/core/kqp/opt/logical/kqp_opt_cbo.h>
910

1011

1112
#include <ydb/core/kqp/opt/physical/kqp_opt_phy.h>
@@ -20,6 +21,8 @@
2021
#include <ydb/library/yql/core/services/yql_transform_pipeline.h>
2122
#include <ydb/library/yql/core/yql_opt_proposed_by_data.h>
2223

24+
#include <ydb/library/yql/providers/dq/common/yql_dq_settings.h>
25+
2326
#include <util/generic/is_in.h>
2427

2528
namespace NKikimr {
@@ -143,6 +146,7 @@ class TKqpRunner : public IKqpRunner {
143146
, OptimizeCtx(MakeIntrusive<TKqpOptimizeContext>(cluster, Config, sessionCtx->QueryPtr(),
144147
sessionCtx->TablesPtr()))
145148
, BuildQueryCtx(MakeIntrusive<TKqpBuildQueryContext>())
149+
, Pctx(TKqpProviderContext(*OptimizeCtx, Config->CostBasedOptimizationLevel.Get().GetOrElse(TDqSettings::TDefault::CostBasedOptimizationLevel)))
146150
{
147151
CreateGraphTransformer(typesCtx, sessionCtx, funcRegistry);
148152
}
@@ -259,8 +263,8 @@ class TKqpRunner : public IKqpRunner {
259263
.AddPostTypeAnnotation(/* forSubgraph */ true)
260264
.AddCommonOptimization()
261265
.Add(CreateKqpConstantFoldingTransformer(OptimizeCtx, *typesCtx, Config), "ConstantFolding")
262-
.Add(CreateKqpStatisticsTransformer(OptimizeCtx, *typesCtx, Config), "Statistics")
263-
.Add(CreateKqpLogOptTransformer(OptimizeCtx, *typesCtx, Config), "LogicalOptimize")
266+
.Add(CreateKqpStatisticsTransformer(OptimizeCtx, *typesCtx, Config, Pctx), "Statistics")
267+
.Add(CreateKqpLogOptTransformer(OptimizeCtx, *typesCtx, Config, Pctx), "LogicalOptimize")
264268
.Add(CreateLogicalDataProposalsInspector(*typesCtx), "ProvidersLogicalOptimize")
265269
.Add(CreateKqpPhyOptTransformer(OptimizeCtx, *typesCtx), "KqpPhysicalOptimize")
266270
.Add(CreatePhysicalDataProposalsInspector(*typesCtx), "ProvidersPhysicalOptimize")
@@ -293,7 +297,7 @@ class TKqpRunner : public IKqpRunner {
293297
.AddTypeAnnotationTransformer(CreateKqpTypeAnnotationTransformer(Cluster, sessionCtx->TablesPtr(), *typesCtx, Config))
294298
.AddPostTypeAnnotation()
295299
.Add(CreateKqpBuildPhysicalQueryTransformer(OptimizeCtx, BuildQueryCtx), "BuildPhysicalQuery")
296-
.Add(CreateKqpStatisticsTransformer(OptimizeCtx, *typesCtx, Config), "Statistics")
300+
.Add(CreateKqpStatisticsTransformer(OptimizeCtx, *typesCtx, Config, Pctx), "Statistics")
297301
.Build(false);
298302

299303
auto physicalPeepholeTransformer = TTransformationPipeline(typesCtx)
@@ -355,6 +359,8 @@ class TKqpRunner : public IKqpRunner {
355359
TIntrusivePtr<TKqpOptimizeContext> OptimizeCtx;
356360
TIntrusivePtr<TKqpBuildQueryContext> BuildQueryCtx;
357361

362+
TKqpProviderContext Pctx;
363+
358364
TAutoPtr<IGraphTransformer> Transformer;
359365
};
360366

ydb/core/kqp/opt/kqp_query_plan.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <ydb/library/yql/dq/type_ann/dq_type_ann.h>
1515
#include <ydb/library/yql/dq/tasks/dq_tasks_graph.h>
1616
#include <ydb/library/yql/utils/plan/plan_utils.h>
17+
#include <ydb/library/yql/providers/dq/common/yql_dq_settings.h>
1718

1819
#include <library/cpp/json/writer/json.h>
1920
#include <library/cpp/json/json_reader.h>
@@ -1357,7 +1358,7 @@ class TxPlanSerializer {
13571358
}
13581359

13591360
void AddOptimizerEstimates(TOperator& op, const TExprBase& expr) {
1360-
if (!SerializerCtx.Config->HasOptEnableCostBasedOptimization()) {
1361+
if (SerializerCtx.Config->CostBasedOptimizationLevel.Get().GetOrElse(TDqSettings::TDefault::CostBasedOptimizationLevel)==0) {
13611362
return;
13621363
}
13631364

ydb/core/kqp/opt/kqp_statistics_transformer.cpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
#include <ydb/library/yql/dq/opt/dq_opt_stat.h>
44
#include <ydb/library/yql/core/yql_cost_function.h>
55

6+
#include <ydb/library/yql/providers/dq/common/yql_dq_settings.h>
7+
8+
69
#include <charconv>
710

811
using namespace NYql;
@@ -187,7 +190,7 @@ IGraphTransformer::TStatus TKqpStatisticsTransformer::DoTransform(TExprNode::TPt
187190
TExprNode::TPtr& output, TExprContext& ctx) {
188191

189192
output = input;
190-
if (!Config->HasOptEnableCostBasedOptimization()) {
193+
if (Config->CostBasedOptimizationLevel.Get().GetOrElse(TDqSettings::TDefault::CostBasedOptimizationLevel) == 0) {
191194
return IGraphTransformer::TStatus::Ok;
192195
}
193196

@@ -238,6 +241,6 @@ bool TKqpStatisticsTransformer::AfterLambdasSpecific(const TExprNode::TPtr& inpu
238241
}
239242

240243
TAutoPtr<IGraphTransformer> NKikimr::NKqp::CreateKqpStatisticsTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx,
241-
TTypeAnnotationContext& typeCtx, const TKikimrConfiguration::TPtr& config) {
242-
return THolder<IGraphTransformer>(new TKqpStatisticsTransformer(kqpCtx, typeCtx, config));
244+
TTypeAnnotationContext& typeCtx, const TKikimrConfiguration::TPtr& config, const TKqpProviderContext& pctx) {
245+
return THolder<IGraphTransformer>(new TKqpStatisticsTransformer(kqpCtx, typeCtx, config, pctx));
243246
}

ydb/core/kqp/opt/kqp_statistics_transformer.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <ydb/library/yql/core/yql_statistics.h>
66

77
#include <ydb/core/kqp/common/kqp_yql.h>
8+
#include <ydb/core/kqp/opt/logical/kqp_opt_cbo.h>
89
#include <ydb/library/yql/core/yql_graph_transformer.h>
910
#include <ydb/library/yql/core/yql_expr_optimize.h>
1011
#include <ydb/library/yql/core/yql_expr_type_annotation.h>
@@ -33,8 +34,8 @@ class TKqpStatisticsTransformer : public NYql::NDq::TDqStatisticsTransformerBase
3334

3435
public:
3536
TKqpStatisticsTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, TTypeAnnotationContext& typeCtx,
36-
const TKikimrConfiguration::TPtr& config) :
37-
TDqStatisticsTransformerBase(&typeCtx),
37+
const TKikimrConfiguration::TPtr& config, const TKqpProviderContext& pctx) :
38+
TDqStatisticsTransformerBase(&typeCtx, pctx),
3839
Config(config),
3940
KqpCtx(*kqpCtx) {}
4041

@@ -47,6 +48,6 @@ class TKqpStatisticsTransformer : public NYql::NDq::TDqStatisticsTransformerBase
4748
};
4849

4950
TAutoPtr<IGraphTransformer> CreateKqpStatisticsTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx,
50-
TTypeAnnotationContext& typeCtx, const TKikimrConfiguration::TPtr& config);
51+
TTypeAnnotationContext& typeCtx, const TKikimrConfiguration::TPtr& config, const TKqpProviderContext& pctx);
5152
}
5253
}
+164
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
#include "kqp_opt_cbo.h"
2+
#include "kqp_opt_log_impl.h"
3+
4+
#include <ydb/library/yql/core/yql_opt_utils.h>
5+
#include <ydb/library/yql/utils/log/log.h>
6+
7+
8+
namespace NKikimr::NKqp::NOpt {
9+
10+
using namespace NYql;
11+
using namespace NYql::NCommon;
12+
using namespace NYql::NDq;
13+
using namespace NYql::NNodes;
14+
15+
namespace {
16+
17+
/**
18+
* KQP specific rule to check if a LookupJoin is applicable
19+
*/
20+
bool IsLookupJoinApplicableDetailed(const std::shared_ptr<NYql::TRelOptimizerNode>& node, const TVector<TString>& joinColumns, const TKqpProviderContext& ctx) {
21+
22+
auto rel = std::static_pointer_cast<TKqpRelOptimizerNode>(node);
23+
auto expr = TExprBase(rel->Node);
24+
25+
if (ctx.KqpCtx.IsScanQuery() && !ctx.KqpCtx.Config->EnableKqpScanQueryStreamIdxLookupJoin) {
26+
return false;
27+
}
28+
29+
if (find_if(joinColumns.begin(), joinColumns.end(), [&] (const TString& s) { return node->Stats->KeyColumns[0] == s;})) {
30+
return true;
31+
}
32+
33+
auto readMatch = MatchRead<TKqlReadTable>(expr);
34+
TMaybeNode<TKqlKeyInc> maybeTablePrefix;
35+
size_t prefixSize;
36+
37+
if (readMatch) {
38+
if (readMatch->FlatMap && !IsPassthroughFlatMap(readMatch->FlatMap.Cast(), nullptr)){
39+
return false;
40+
}
41+
auto read = readMatch->Read.Cast<TKqlReadTable>();
42+
maybeTablePrefix = GetRightTableKeyPrefix(read.Range());
43+
44+
if (!maybeTablePrefix) {
45+
return false;
46+
}
47+
48+
prefixSize = maybeTablePrefix.Cast().ArgCount();
49+
50+
if (!prefixSize) {
51+
return true;
52+
}
53+
}
54+
else {
55+
readMatch = MatchRead<TKqlReadTableRangesBase>(expr);
56+
if (readMatch) {
57+
if (readMatch->FlatMap && !IsPassthroughFlatMap(readMatch->FlatMap.Cast(), nullptr)){
58+
return false;
59+
}
60+
auto read = readMatch->Read.Cast<TKqlReadTableRangesBase>();
61+
if (TCoVoid::Match(read.Ranges().Raw())) {
62+
return true;
63+
} else {
64+
auto prompt = TKqpReadTableExplainPrompt::Parse(read);
65+
66+
if (prompt.PointPrefixLen != prompt.UsedKeyColumns.size()) {
67+
return false;
68+
}
69+
70+
if (prompt.ExpectedMaxRanges != TMaybe<ui64>(1)) {
71+
return false;
72+
}
73+
prefixSize = prompt.PointPrefixLen;
74+
}
75+
}
76+
}
77+
if (! readMatch) {
78+
return false;
79+
}
80+
81+
if (prefixSize < node->Stats->KeyColumns.size() && !(find_if(joinColumns.begin(), joinColumns.end(), [&] (const TString& s) {
82+
return node->Stats->KeyColumns[prefixSize] == s;
83+
}))){
84+
return false;
85+
}
86+
87+
return true;
88+
}
89+
90+
bool IsLookupJoinApplicable(std::shared_ptr<IBaseOptimizerNode> left,
91+
std::shared_ptr<IBaseOptimizerNode> right,
92+
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
93+
TKqpProviderContext& ctx) {
94+
95+
Y_UNUSED(left);
96+
97+
auto rightStats = right->Stats;
98+
99+
if (rightStats->Type != EStatisticsType::BaseTable) {
100+
return false;
101+
}
102+
if (joinConditions.size() > rightStats->KeyColumns.size()) {
103+
return false;
104+
}
105+
106+
for (auto [leftCol, rightCol] : joinConditions) {
107+
if (! find_if(rightStats->KeyColumns.begin(), rightStats->KeyColumns.end(),
108+
[rightCol] (const TString& s) {
109+
return rightCol.AttributeName == s;
110+
} )) {
111+
return false;
112+
}
113+
}
114+
115+
TVector<TString> joinKeys;
116+
for( auto [leftJc, rightJc] : joinConditions ) {
117+
joinKeys.emplace_back( rightJc.AttributeName);
118+
}
119+
120+
return IsLookupJoinApplicableDetailed(std::static_pointer_cast<TRelOptimizerNode>(right), joinKeys, ctx);
121+
}
122+
123+
}
124+
125+
bool TKqpProviderContext::IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
126+
const std::shared_ptr<IBaseOptimizerNode>& right,
127+
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
128+
EJoinAlgoType joinAlgo) {
129+
130+
switch( joinAlgo ) {
131+
case EJoinAlgoType::LookupJoin:
132+
if (OptLevel==2 && left->Stats->Nrows > 10e3) {
133+
return false;
134+
}
135+
return IsLookupJoinApplicable(left, right, joinConditions, *this);
136+
137+
case EJoinAlgoType::DictJoin:
138+
return right->Stats->Nrows < 10e5;
139+
case EJoinAlgoType::MapJoin:
140+
return right->Stats->Nrows < 10e6;
141+
case EJoinAlgoType::GraceJoin:
142+
return true;
143+
}
144+
}
145+
146+
double TKqpProviderContext::ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, EJoinAlgoType joinAlgo) const {
147+
148+
switch(joinAlgo) {
149+
case EJoinAlgoType::LookupJoin:
150+
if (OptLevel==1) {
151+
return -1;
152+
}
153+
return leftStats.Nrows;
154+
case EJoinAlgoType::DictJoin:
155+
return leftStats.Nrows + 1.7 * rightStats.Nrows;
156+
case EJoinAlgoType::MapJoin:
157+
return leftStats.Nrows + 1.8 * rightStats.Nrows;
158+
case EJoinAlgoType::GraceJoin:
159+
return leftStats.Nrows + 2.0 * rightStats.Nrows;
160+
}
161+
}
162+
163+
164+
}
+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#pragma once
2+
3+
#include <ydb/library/yql/ast/yql_expr.h>
4+
#include <ydb/library/yql/core/cbo/cbo_optimizer_new.h>
5+
6+
#include <ydb/core/kqp/opt/kqp_opt.h>
7+
8+
namespace NKikimr::NKqp::NOpt {
9+
10+
/**
11+
* KQP specific Rel node, includes a pointer to ExprNode
12+
*/
13+
struct TKqpRelOptimizerNode : public NYql::TRelOptimizerNode {
14+
const NYql::TExprNode::TPtr Node;
15+
16+
TKqpRelOptimizerNode(TString label, std::shared_ptr<NYql::TOptimizerStatistics> stats, const NYql::TExprNode::TPtr node) :
17+
TRelOptimizerNode(label, stats), Node(node) { }
18+
};
19+
20+
/**
21+
* KQP Specific cost function and join applicability cost function
22+
*/
23+
struct TKqpProviderContext : public NYql::IProviderContext {
24+
TKqpProviderContext(const TKqpOptimizeContext& kqpCtx, const int optLevel) : KqpCtx(kqpCtx), OptLevel(optLevel) {}
25+
26+
virtual bool IsJoinApplicable(const std::shared_ptr<NYql::IBaseOptimizerNode>& left,
27+
const std::shared_ptr<NYql::IBaseOptimizerNode>& right,
28+
const std::set<std::pair<NYql::NDq::TJoinColumn, NYql::NDq::TJoinColumn>>& joinConditions,
29+
NYql::EJoinAlgoType joinAlgo) override;
30+
31+
virtual double ComputeJoinCost(const NYql::TOptimizerStatistics& leftStats, const NYql::TOptimizerStatistics& rightStats, NYql::EJoinAlgoType joinAlgo) const override;
32+
33+
const TKqpOptimizeContext& KqpCtx;
34+
int OptLevel;
35+
};
36+
37+
}

ydb/core/kqp/opt/logical/kqp_opt_log.cpp

+11-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "kqp_opt_log_rules.h"
2+
#include "kqp_opt_cbo.h"
23

34
#include <ydb/core/kqp/common/kqp_yql.h>
45
#include <ydb/core/kqp/opt/kqp_opt_impl.h>
@@ -21,11 +22,12 @@ using namespace NYql::NNodes;
2122
class TKqpLogicalOptTransformer : public TOptimizeTransformerBase {
2223
public:
2324
TKqpLogicalOptTransformer(TTypeAnnotationContext& typesCtx, const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx,
24-
const TKikimrConfiguration::TPtr& config)
25+
const TKikimrConfiguration::TPtr& config, TKqpProviderContext& pctx)
2526
: TOptimizeTransformerBase(nullptr, NYql::NLog::EComponent::ProviderKqp, {})
2627
, TypesCtx(typesCtx)
2728
, KqpCtx(*kqpCtx)
2829
, Config(config)
30+
, Pctx(pctx)
2931
{
3032
#define HNDL(name) "KqpLogical-"#name, Hndl(&TKqpLogicalOptTransformer::name)
3133
AddHandler(0, &TCoFlatMapBase::Match, HNDL(PushPredicateToReadTable));
@@ -134,7 +136,10 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase {
134136

135137
TMaybeNode<TExprBase> OptimizeEquiJoinWithCosts(TExprBase node, TExprContext& ctx) {
136138
auto maxDPccpDPTableSize = Config->MaxDPccpDPTableSize.Get().GetOrElse(TDqSettings::TDefault::MaxDPccpDPTableSize);
137-
TExprBase output = DqOptimizeEquiJoinWithCosts(node, ctx, TypesCtx, Config->HasOptEnableCostBasedOptimization(), maxDPccpDPTableSize);
139+
TExprBase output = DqOptimizeEquiJoinWithCosts(node, ctx, TypesCtx, Config->CostBasedOptimizationLevel.Get().GetOrElse(TDqSettings::TDefault::CostBasedOptimizationLevel),
140+
maxDPccpDPTableSize, Pctx, [](auto& rels, auto label, auto node, auto stat) {
141+
rels.emplace_back(std::make_shared<TKqpRelOptimizerNode>(TString(label), stat, node));
142+
});
138143
DumpAppliedRule("OptimizeEquiJoinWithCosts", node.Ptr(), output.Ptr(), ctx);
139144
return output;
140145
}
@@ -269,12 +274,14 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase {
269274
TTypeAnnotationContext& TypesCtx;
270275
const TKqpOptimizeContext& KqpCtx;
271276
const TKikimrConfiguration::TPtr& Config;
277+
TKqpProviderContext& Pctx;
272278
};
273279

274280
TAutoPtr<IGraphTransformer> CreateKqpLogOptTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx,
275-
TTypeAnnotationContext& typesCtx, const TKikimrConfiguration::TPtr& config)
281+
TTypeAnnotationContext& typesCtx, const TKikimrConfiguration::TPtr& config,
282+
TKqpProviderContext& pctx)
276283
{
277-
return THolder<IGraphTransformer>(new TKqpLogicalOptTransformer(typesCtx, kqpCtx, config));
284+
return THolder<IGraphTransformer>(new TKqpLogicalOptTransformer(typesCtx, kqpCtx, config, pctx));
278285
}
279286

280287
} // namespace NKikimr::NKqp::NOpt
+3-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
#pragma once
22

33
#include <ydb/core/kqp/opt/kqp_opt.h>
4+
#include <ydb/core/kqp/opt/logical/kqp_opt_cbo.h>
45

56
namespace NKikimr::NKqp::NOpt {
67

78
struct TKqpOptimizeContext;
89

910
TAutoPtr<NYql::IGraphTransformer> CreateKqpLogOptTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx,
10-
NYql::TTypeAnnotationContext& typesCtx, const NYql::TKikimrConfiguration::TPtr& config);
11+
NYql::TTypeAnnotationContext& typesCtx, const NYql::TKikimrConfiguration::TPtr& config,
12+
NKikimr::NKqp::NOpt::TKqpProviderContext& pctx);
1113

1214
} // namespace NKikimr::NKqp::NOpt

0 commit comments

Comments
 (0)