1
+ #include " kqp_opt_cbo.h"
2
+ #include " kqp_opt_log_impl.h"
3
+
4
+ #include < ydb/library/yql/core/yql_opt_utils.h>
5
+ #include < ydb/library/yql/utils/log/log.h>
6
+
7
+
8
+ namespace NKikimr ::NKqp::NOpt {
9
+
10
+ using namespace NYql ;
11
+ using namespace NYql ::NCommon;
12
+ using namespace NYql ::NDq;
13
+ using namespace NYql ::NNodes;
14
+
15
+ namespace {
16
+
17
+ /* *
18
+ * KQP specific rule to check if a LookupJoin is applicable
19
+ */
20
+ bool IsLookupJoinApplicableDetailed (const std::shared_ptr<NYql::TRelOptimizerNode>& node, const TVector<TString>& joinColumns, const TKqpProviderContext& ctx) {
21
+
22
+ auto rel = std::static_pointer_cast<TKqpRelOptimizerNode>(node);
23
+ auto expr = TExprBase (rel->Node );
24
+
25
+ if (ctx.KqpCtx .IsScanQuery () && !ctx.KqpCtx .Config ->EnableKqpScanQueryStreamIdxLookupJoin ) {
26
+ return false ;
27
+ }
28
+
29
+ if (find_if (joinColumns.begin (), joinColumns.end (), [&] (const TString& s) { return node->Stats ->KeyColumns [0 ] == s;})) {
30
+ return true ;
31
+ }
32
+
33
+ auto readMatch = MatchRead<TKqlReadTable>(expr);
34
+ TMaybeNode<TKqlKeyInc> maybeTablePrefix;
35
+ size_t prefixSize;
36
+
37
+ if (readMatch) {
38
+ if (readMatch->FlatMap && !IsPassthroughFlatMap (readMatch->FlatMap .Cast (), nullptr )){
39
+ return false ;
40
+ }
41
+ auto read = readMatch->Read .Cast <TKqlReadTable>();
42
+ maybeTablePrefix = GetRightTableKeyPrefix (read .Range ());
43
+
44
+ if (!maybeTablePrefix) {
45
+ return false ;
46
+ }
47
+
48
+ prefixSize = maybeTablePrefix.Cast ().ArgCount ();
49
+
50
+ if (!prefixSize) {
51
+ return true ;
52
+ }
53
+ }
54
+ else {
55
+ readMatch = MatchRead<TKqlReadTableRangesBase>(expr);
56
+ if (readMatch) {
57
+ if (readMatch->FlatMap && !IsPassthroughFlatMap (readMatch->FlatMap .Cast (), nullptr )){
58
+ return false ;
59
+ }
60
+ auto read = readMatch->Read .Cast <TKqlReadTableRangesBase>();
61
+ if (TCoVoid::Match (read .Ranges ().Raw ())) {
62
+ return true ;
63
+ } else {
64
+ auto prompt = TKqpReadTableExplainPrompt::Parse (read );
65
+
66
+ if (prompt.PointPrefixLen != prompt.UsedKeyColumns .size ()) {
67
+ return false ;
68
+ }
69
+
70
+ if (prompt.ExpectedMaxRanges != TMaybe<ui64>(1 )) {
71
+ return false ;
72
+ }
73
+ prefixSize = prompt.PointPrefixLen ;
74
+ }
75
+ }
76
+ }
77
+ if (! readMatch) {
78
+ return false ;
79
+ }
80
+
81
+ if (prefixSize < node->Stats ->KeyColumns .size () && !(find_if (joinColumns.begin (), joinColumns.end (), [&] (const TString& s) {
82
+ return node->Stats ->KeyColumns [prefixSize] == s;
83
+ }))){
84
+ return false ;
85
+ }
86
+
87
+ return true ;
88
+ }
89
+
90
+ bool IsLookupJoinApplicable (std::shared_ptr<IBaseOptimizerNode> left,
91
+ std::shared_ptr<IBaseOptimizerNode> right,
92
+ const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
93
+ TKqpProviderContext& ctx) {
94
+
95
+ Y_UNUSED (left);
96
+
97
+ auto rightStats = right->Stats ;
98
+
99
+ if (rightStats->Type != EStatisticsType::BaseTable) {
100
+ return false ;
101
+ }
102
+ if (joinConditions.size () > rightStats->KeyColumns .size ()) {
103
+ return false ;
104
+ }
105
+
106
+ for (auto [leftCol, rightCol] : joinConditions) {
107
+ if (! find_if (rightStats->KeyColumns .begin (), rightStats->KeyColumns .end (),
108
+ [rightCol] (const TString& s) {
109
+ return rightCol.AttributeName == s;
110
+ } )) {
111
+ return false ;
112
+ }
113
+ }
114
+
115
+ TVector<TString> joinKeys;
116
+ for ( auto [leftJc, rightJc] : joinConditions ) {
117
+ joinKeys.emplace_back ( rightJc.AttributeName );
118
+ }
119
+
120
+ return IsLookupJoinApplicableDetailed (std::static_pointer_cast<TRelOptimizerNode>(right), joinKeys, ctx);
121
+ }
122
+
123
+ }
124
+
125
+ bool TKqpProviderContext::IsJoinApplicable (const std::shared_ptr<IBaseOptimizerNode>& left,
126
+ const std::shared_ptr<IBaseOptimizerNode>& right,
127
+ const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
128
+ EJoinAlgoType joinAlgo) {
129
+
130
+ switch ( joinAlgo ) {
131
+ case EJoinAlgoType::LookupJoin:
132
+ if (OptLevel==2 && left->Stats ->Nrows > 10e3 ) {
133
+ return false ;
134
+ }
135
+ return IsLookupJoinApplicable (left, right, joinConditions, *this );
136
+
137
+ case EJoinAlgoType::DictJoin:
138
+ return right->Stats ->Nrows < 10e5 ;
139
+ case EJoinAlgoType::MapJoin:
140
+ return right->Stats ->Nrows < 10e6 ;
141
+ case EJoinAlgoType::GraceJoin:
142
+ return true ;
143
+ }
144
+ }
145
+
146
+ double TKqpProviderContext::ComputeJoinCost (const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, EJoinAlgoType joinAlgo) const {
147
+
148
+ switch (joinAlgo) {
149
+ case EJoinAlgoType::LookupJoin:
150
+ if (OptLevel==1 ) {
151
+ return -1 ;
152
+ }
153
+ return leftStats.Nrows ;
154
+ case EJoinAlgoType::DictJoin:
155
+ return leftStats.Nrows + 1.7 * rightStats.Nrows ;
156
+ case EJoinAlgoType::MapJoin:
157
+ return leftStats.Nrows + 1.8 * rightStats.Nrows ;
158
+ case EJoinAlgoType::GraceJoin:
159
+ return leftStats.Nrows + 2.0 * rightStats.Nrows ;
160
+ }
161
+ }
162
+
163
+
164
+ }
0 commit comments