1
1
#include " yql_pq_provider_impl.h"
2
2
3
3
#include < ydb/library/yql/core/expr_nodes/yql_expr_nodes.h>
4
+ #include < ydb/library/yql/core/yql_opt_utils.h>
4
5
#include < ydb/library/yql/core/yql_type_helpers.h>
5
6
#include < ydb/library/yql/providers/common/provider/yql_data_provider_impl.h>
6
7
#include < ydb/library/yql/providers/common/provider/yql_provider_names.h>
7
8
#include < ydb/library/yql/providers/common/provider/yql_provider.h>
8
9
#include < ydb/library/yql/providers/common/transform/yql_optimize.h>
9
10
#include < ydb/library/yql/providers/dq/expr_nodes/dqs_expr_nodes.h>
10
11
#include < ydb/library/yql/providers/pq/common/pq_meta_fields.h>
12
+ #include < ydb/library/yql/providers/pq/common/yql_names.h>
11
13
#include < ydb/library/yql/providers/pq/expr_nodes/yql_pq_expr_nodes.h>
12
14
#include < ydb/library/yql/utils/log/log.h>
13
15
#include < ydb/library/yql/utils/plan/plan_utils.h>
@@ -30,22 +32,20 @@ namespace {
30
32
}
31
33
};
32
34
33
- std::unordered_set<TString> GetUsedMetadataFields (const TCoExtractMembers& extract) {
34
- std::unordered_set<TString> usedMetadataFields;
35
- for (const auto extractMember : extract.Members ()) {
36
- if (FindPqMetaFieldDescriptorBySysColumn (extractMember.StringValue ())) {
37
- usedMetadataFields.emplace (extractMember.StringValue ());
38
- }
35
+ std::unordered_set<TString> GetUsedColumnNames (const TCoExtractMembers& extractMembers) {
36
+ std::unordered_set<TString> usedColumnNames;
37
+ for (const auto & member : extractMembers.Members ()) {
38
+ usedColumnNames.emplace (member.StringValue ());
39
39
}
40
40
41
- return usedMetadataFields ;
41
+ return usedColumnNames ;
42
42
}
43
43
44
- TVector<TCoNameValueTuple> DropUnusedMetadata (const TPqTopic& pqTopic, const std::unordered_set<TString>& usedMetadataFields ) {
44
+ TVector<TCoNameValueTuple> DropUnusedMetadata (const TPqTopic& pqTopic, const std::unordered_set<TString>& usedColumnNames ) {
45
45
TVector<TCoNameValueTuple> newSourceMetadata;
46
46
for (auto metadataItem : pqTopic.Metadata ()) {
47
47
auto metadataName = metadataItem.Cast <TCoNameValueTuple>().Value ().Maybe <TCoAtom>().Cast ().StringValue ();
48
- if (usedMetadataFields .contains (metadataName)) {
48
+ if (FindPqMetaFieldDescriptorBySysColumn (metadataName) && usedColumnNames .contains (metadataName)) {
49
49
newSourceMetadata.push_back (metadataItem);
50
50
}
51
51
}
@@ -88,18 +88,18 @@ TCoNameValueTupleList DropUnusedMetadataFromDqWrapSettings(
88
88
.Done ();
89
89
}
90
90
91
- TExprNode::TPtr DropUnusedMetadataFieldsFromRowType (
91
+ TExprNode::TPtr DropUnusedRowItems (
92
92
TPositionHandle position,
93
93
const TStructExprType* oldRowType,
94
- const std::unordered_set<TString>& usedMetadataFields ,
94
+ const std::unordered_set<TString>& usedColumnNames ,
95
95
TExprContext& ctx)
96
96
{
97
97
TVector<const TItemExprType*> newFields;
98
98
newFields.reserve (oldRowType->GetSize ());
99
99
100
100
for (auto itemExprType : oldRowType->GetItems ()) {
101
101
const auto columnName = TString (itemExprType->GetName ());
102
- if (FindPqMetaFieldDescriptorBySysColumn (columnName) && !usedMetadataFields .contains (columnName)) {
102
+ if (!usedColumnNames .contains (columnName)) {
103
103
continue ;
104
104
}
105
105
@@ -109,14 +109,14 @@ TExprNode::TPtr DropUnusedMetadataFieldsFromRowType(
109
109
return ExpandType (position, *ctx.MakeType <TStructExprType>(newFields), ctx);
110
110
}
111
111
112
- TExprNode::TPtr DropUnusedMetadataFieldsFromColumns (
112
+ TExprNode::TPtr DropUnusedColumns (
113
113
TExprBase oldColumns,
114
- const std::unordered_set<TString>& usedMetadataFields ,
114
+ const std::unordered_set<TString>& usedColumnNames ,
115
115
TExprContext& ctx)
116
116
{
117
117
TExprNode::TListType res;
118
118
for (const auto & column : oldColumns.Cast <TCoAtomList>()) {
119
- if (FindPqMetaFieldDescriptorBySysColumn (column. StringValue ()) && !usedMetadataFields .contains (column.StringValue ())) {
119
+ if (!usedColumnNames .contains (column.StringValue ())) {
120
120
continue ;
121
121
}
122
122
@@ -160,57 +160,68 @@ class TPqLogicalOptProposalTransformer : public TOptimizeTransformerBase {
160
160
}*/
161
161
162
162
TMaybeNode<TExprBase> ExtractMembersOverDqWrap (TExprBase node, TExprContext& ctx) const {
163
- const auto & extract = node.Cast <TCoExtractMembers>();
164
- const auto & input = extract.Input ();
165
- const auto dqSourceWrap = input.Maybe <TDqSourceWrap>();
166
- const auto dqPqTopicSource = dqSourceWrap.Input ().Maybe <TDqPqTopicSource>();
167
- const auto pqTopic = dqPqTopicSource.Topic ().Maybe <TPqTopic>();
168
- if (!pqTopic) {
163
+ const auto & extractMembers = node.Cast <TCoExtractMembers>();
164
+ const auto & extractMembersInput = extractMembers.Input ();
165
+ const auto & maybeDqSourceWrap = extractMembersInput.Maybe <TDqSourceWrap>();
166
+ if (!maybeDqSourceWrap) {
167
+ return node;
168
+ }
169
+
170
+ const auto & dqSourceWrap = maybeDqSourceWrap.Cast ();
171
+ if (dqSourceWrap.DataSource ().Category () != PqProviderName) {
172
+ return node;
173
+ }
174
+
175
+ const auto & maybeDqPqTopicSource = dqSourceWrap.Input ().Maybe <TDqPqTopicSource>();
176
+ if (!maybeDqPqTopicSource) {
169
177
return node;
170
178
}
171
179
172
- const auto usedMetadataFields = GetUsedMetadataFields (extract);
173
- const auto newSourceMetadata = DropUnusedMetadata (pqTopic.Cast (), usedMetadataFields);
174
- if (newSourceMetadata.size () == pqTopic.Metadata ().Cast ().Size ()) {
180
+ const auto & dqPqTopicSource = maybeDqPqTopicSource.Cast ();
181
+ const auto & pqTopic = dqPqTopicSource.Topic ();
182
+
183
+ auto usedColumnNames = GetUsedColumnNames (extractMembers);
184
+ const TStructExprType* inputRowType = pqTopic.RowSpec ().Ref ().GetTypeAnn ()->Cast <TTypeExprType>()->GetType ()->Cast <TStructExprType>();
185
+ const TStructExprType* outputRowType = node.Ref ().GetTypeAnn ()->Cast <TListExprType>()->GetItemType ()->Cast <TStructExprType>();
186
+ if (outputRowType->GetSize () == 0 && inputRowType->GetSize () > 0 ) {
187
+ auto item = GetLightColumn (*inputRowType);
188
+ YQL_ENSURE (item);
189
+ YQL_ENSURE (usedColumnNames.insert (TString (item->GetName ())).second );
190
+ }
191
+
192
+ const auto oldRowType = pqTopic.Ref ().GetTypeAnn ()->Cast <TListExprType>()->GetItemType ()->Cast <TStructExprType>();
193
+ if (oldRowType->GetSize () == usedColumnNames.size ()) {
175
194
return node;
176
195
}
177
196
178
- const auto oldRowType = pqTopic.Ref ().GetTypeAnn ()
179
- ->Cast <TListExprType>()->GetItemType ()->Cast <TStructExprType>();
197
+ const auto & newSourceMetadata = DropUnusedMetadata (pqTopic, usedColumnNames);
180
198
181
- auto newPqTopicSource = Build<TDqPqTopicSource>(ctx, node .Pos ())
182
- .InitFrom (dqPqTopicSource. Cast () )
199
+ const TExprNode::TPtr newPqTopicSource = Build<TDqPqTopicSource>(ctx, dqPqTopicSource .Pos ())
200
+ .InitFrom (dqPqTopicSource)
183
201
.Topic <TPqTopic>()
184
- .InitFrom (pqTopic. Cast () )
202
+ .InitFrom (pqTopic)
185
203
.Metadata ().Add (newSourceMetadata).Build ()
186
- .Build ();
187
-
188
- if (dqPqTopicSource.Columns ()) {
189
- auto newColumns = DropUnusedMetadataFieldsFromColumns (
190
- dqPqTopicSource.Columns ().Cast (),
191
- usedMetadataFields,
192
- ctx);
193
- newPqTopicSource.Columns (newColumns);
194
- }
204
+ .RowSpec (DropUnusedRowItems (pqTopic.RowSpec ().Pos (), inputRowType, usedColumnNames, ctx))
205
+ .Build ()
206
+ .Columns (DropUnusedColumns (dqPqTopicSource.Columns (), usedColumnNames, ctx))
207
+ .Done ()
208
+ .Ptr ();
195
209
196
- const auto newDqSourceWrap = Build<TDqSourceWrap>(ctx, node.Pos ())
197
- .InitFrom (dqSourceWrap.Cast ())
198
- .Input (newPqTopicSource.Done ())
199
- .Settings (DropUnusedMetadataFromDqWrapSettings (
200
- dqSourceWrap.Cast (),
201
- newSourceMetadata,
202
- ctx))
203
- .RowType (DropUnusedMetadataFieldsFromRowType (
204
- node.Pos (),
205
- oldRowType,
206
- usedMetadataFields,
207
- ctx))
210
+ const TExprNode::TPtr newDqSourceWrap = Build<TDqSourceWrap>(ctx, dqSourceWrap.Pos ())
211
+ .InitFrom (dqSourceWrap)
212
+ .Input (newPqTopicSource)
213
+ .Settings (DropUnusedMetadataFromDqWrapSettings (dqSourceWrap, newSourceMetadata, ctx))
214
+ .RowType (DropUnusedRowItems (dqSourceWrap.RowType ().Pos (), oldRowType, usedColumnNames, ctx))
208
215
.Done ()
209
216
.Ptr ();
210
217
218
+ if (outputRowType->GetSize () == usedColumnNames.size ()) {
219
+ return newDqSourceWrap;
220
+ }
221
+
211
222
return Build<TCoExtractMembers>(ctx, node.Pos ())
212
- .InitFrom (extract )
213
- .Input (ctx.ReplaceNode (input .Ptr (), dqSourceWrap.Ref (), newDqSourceWrap))
223
+ .InitFrom (extractMembers )
224
+ .Input (ctx.ReplaceNode (extractMembersInput .Ptr (), dqSourceWrap.Ref (), newDqSourceWrap))
214
225
.Done ();
215
226
}
216
227
0 commit comments