1
+ #include < ydb/library/yql/providers/common/schema/parser/yql_type_parser.h>
1
2
#include < ydb/library/yql/public/udf/udf_version.h>
2
3
#include < ydb/library/yql/public/purecalc/purecalc.h>
3
4
#include < ydb/library/yql/public/purecalc/io_specs/mkql/spec.h>
10
11
#include < ydb/core/fq/libs/common/util.h>
11
12
#include < ydb/core/fq/libs/row_dispatcher/json_filter.h>
12
13
14
+ #include < cxxabi.h>
15
+
13
16
14
17
namespace {
15
18
@@ -23,6 +26,12 @@ NYT::TNode CreateTypeNode(const TString& fieldType) {
23
26
.Add (fieldType);
24
27
}
25
28
29
+ NYT::TNode CreateOptionalTypeNode (const TString& fieldType) {
30
+ return NYT::TNode::CreateList ()
31
+ .Add (" OptionalType" )
32
+ .Add (CreateTypeNode (fieldType));
33
+ }
34
+
26
35
void AddField (NYT::TNode& node, const TString& fieldName, const TString& fieldType) {
27
36
node.Add (
28
37
NYT::TNode::CreateList ()
@@ -31,18 +40,29 @@ void AddField(NYT::TNode& node, const TString& fieldName, const TString& fieldTy
31
40
);
32
41
}
33
42
34
- void AddOptionalField (NYT::TNode& node, const TString& fieldName, const TString& fieldType) {
35
- node.Add (NYT::TNode::CreateList ()
36
- .Add (fieldName)
37
- .Add (NYT::TNode::CreateList ().Add (" OptionalType" ).Add (CreateTypeNode (fieldType)))
43
+ void AddTypedField (NYT::TNode& node, const TString& fieldName, const TString& fieldTypeYson) {
44
+ NYT::TNode parsedType;
45
+ Y_ENSURE (NYql::NCommon::ParseYson (parsedType, fieldTypeYson, Cerr), " Invalid field type" );
46
+
47
+ // TODO: remove this when the re-parsing is removed from pq read actor
48
+ if (parsedType == CreateTypeNode (" Json" )) {
49
+ parsedType = CreateTypeNode (" String" );
50
+ } else if (parsedType == CreateOptionalTypeNode (" Json" )) {
51
+ parsedType = CreateOptionalTypeNode (" String" );
52
+ }
53
+
54
+ node.Add (
55
+ NYT::TNode::CreateList ()
56
+ .Add (fieldName)
57
+ .Add (parsedType)
38
58
);
39
59
}
40
60
41
- NYT::TNode MakeInputSchema (const TVector<TString>& columns) {
61
+ NYT::TNode MakeInputSchema (const TVector<TString>& columns, const TVector<TString>& types ) {
42
62
auto structMembers = NYT::TNode::CreateList ();
43
63
AddField (structMembers, OffsetFieldName, " Uint64" );
44
- for (const auto & col : columns) {
45
- AddOptionalField (structMembers, col, " String " );
64
+ for (size_t i = 0 ; i < columns. size (); ++i ) {
65
+ AddTypedField (structMembers, columns[i], types[i] );
46
66
}
47
67
return NYT::TNode::CreateList ().Add (" StructType" ).Add (std::move (structMembers));
48
68
}
@@ -68,7 +88,7 @@ class TFilterInputSpec : public NYql::NPureCalc::TInputSpecBase {
68
88
TVector<NYT::TNode> Schemas;
69
89
};
70
90
71
- class TFilterInputConsumer : public NYql ::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<TVector<std::string_view> >&>> {
91
+ class TFilterInputConsumer : public NYql ::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector* >&>> {
72
92
public:
73
93
TFilterInputConsumer (
74
94
const TFilterInputSpec& spec,
@@ -106,15 +126,15 @@ class TFilterInputConsumer : public NYql::NPureCalc::IConsumer<std::pair<const T
106
126
}
107
127
}
108
128
109
- void OnObject (std::pair<const TVector<ui64>&, const TVector<TVector<std::string_view> >&> values) override {
129
+ void OnObject (std::pair<const TVector<ui64>&, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector* >&> values) override {
110
130
Y_ENSURE (FieldsPositions.size () == values.second .size ());
111
131
112
132
NKikimr::NMiniKQL::TThrowingBindTerminator bind;
113
133
with_lock (Worker->GetScopedAlloc ()) {
114
134
auto & holderFactory = Worker->GetGraph ().GetHolderFactory ();
115
135
116
136
// TODO: use blocks here
117
- for (size_t rowId = 0 ; rowId < values.second .front (). size (); ++rowId) {
137
+ for (size_t rowId = 0 ; rowId < values.second .front ()-> size (); ++rowId) {
118
138
NYql::NUdf::TUnboxedValue* items = nullptr ;
119
139
120
140
NYql::NUdf::TUnboxedValue result = Cache.NewArray (
@@ -126,13 +146,16 @@ class TFilterInputConsumer : public NYql::NPureCalc::IConsumer<std::pair<const T
126
146
127
147
size_t fieldId = 0 ;
128
148
for (const auto & column : values.second ) {
129
- items[FieldsPositions[fieldId++]] = column[rowId].data () // Check that std::string_view was initialized in json_parser
130
- ? NKikimr::NMiniKQL::MakeString (column[rowId]).MakeOptional ()
131
- : NKikimr::NUdf::TUnboxedValuePod ();
149
+ items[FieldsPositions[fieldId++]] = column->at (rowId);
132
150
}
133
151
134
152
Worker->Push (std::move (result));
135
153
}
154
+
155
+ // Clear cache after each object because
156
+ // values allocated on another allocator and should be released
157
+ Cache.Clear ();
158
+ Worker->GetGraph ().Invalidate ();
136
159
}
137
160
}
138
161
@@ -216,7 +239,7 @@ struct NYql::NPureCalc::TInputSpecTraits<TFilterInputSpec> {
216
239
static constexpr bool IsPartial = false ;
217
240
static constexpr bool SupportPushStreamMode = true ;
218
241
219
- using TConsumerType = THolder<NYql::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<TVector<std::string_view> >&>>>;
242
+ using TConsumerType = THolder<NYql::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector* >&>>>;
220
243
221
244
static TConsumerType MakeConsumer (
222
245
const TFilterInputSpec& spec,
@@ -244,12 +267,15 @@ class TJsonFilter::TImpl {
244
267
const TVector<TString>& types,
245
268
const TString& whereFilter,
246
269
TCallback callback)
247
- : Sql(GenerateSql(columns, types, whereFilter)) {
270
+ : Sql(GenerateSql(whereFilter)) {
271
+ Y_ENSURE (columns.size () == types.size (), " Number of columns and types should by equal" );
248
272
auto factory = NYql::NPureCalc::MakeProgramFactory (NYql::NPureCalc::TProgramFactoryOptions ());
249
273
274
+ // Program should be stateless because input values
275
+ // allocated on another allocator and should be released
250
276
LOG_ROW_DISPATCHER_DEBUG (" Creating program..." );
251
277
Program = factory->MakePushStreamProgram (
252
- TFilterInputSpec (MakeInputSchema (columns)),
278
+ TFilterInputSpec (MakeInputSchema (columns, types )),
253
279
TFilterOutputSpec (MakeOutputSchema ()),
254
280
Sql,
255
281
NYql::NPureCalc::ETranslationMode::SQL
@@ -258,7 +284,7 @@ class TJsonFilter::TImpl {
258
284
LOG_ROW_DISPATCHER_DEBUG (" Program created" );
259
285
}
260
286
261
- void Push (const TVector<ui64>& offsets, const TVector<TVector<std::string_view> >& values) {
287
+ void Push (const TVector<ui64>& offsets, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector* >& values) {
262
288
Y_ENSURE (values, " Expected non empty schema" );
263
289
InputConsumer->OnObject (std::make_pair (offsets, values));
264
290
}
@@ -268,29 +294,9 @@ class TJsonFilter::TImpl {
268
294
}
269
295
270
296
private:
271
- TString GenerateSql (const TVector<TString>& columnNames, const TVector<TString>& columnTypes, const TString& whereFilter) {
297
+ TString GenerateSql (const TString& whereFilter) {
272
298
TStringStream str;
273
- str << " $fields = SELECT " ;
274
- Y_ABORT_UNLESS (columnNames.size () == columnTypes.size ());
275
- str << OffsetFieldName << " , " ;
276
- for (size_t i = 0 ; i < columnNames.size (); ++i) {
277
- TString columnType = columnTypes[i];
278
- TString columnName = NFq::EncloseAndEscapeString (columnNames[i], ' `' );
279
- if (columnType == " Json" ) {
280
- columnType = " String" ;
281
- } else if (columnType == " Optional<Json>" ) {
282
- columnType = " Optional<String>" ;
283
- }
284
-
285
- if (columnType.StartsWith (" Optional" )) {
286
- str << " IF(" << columnName << " IS NOT NULL, Unwrap(CAST(" << columnName << " as " << columnType << " )), NULL)" ;
287
- } else {
288
- str << " Unwrap(CAST(" << columnName << " as " << columnType << " ))" ;
289
- }
290
- str << " as " << columnName << ((i != columnNames.size () - 1 ) ? " ," : " " );
291
- }
292
- str << " FROM Input;\n " ;
293
- str << " $filtered = SELECT * FROM $fields " << whereFilter << " ;\n " ;
299
+ str << " $filtered = SELECT * FROM Input " << whereFilter << " ;\n " ;
294
300
295
301
str << " SELECT " << OffsetFieldName << " , Unwrap(Json::SerializeJson(Yson::From(RemoveMembers(TableRow(), [\" " << OffsetFieldName;
296
302
str << " \" ])))) as data FROM $filtered" ;
@@ -300,7 +306,7 @@ class TJsonFilter::TImpl {
300
306
301
307
private:
302
308
THolder<NYql::NPureCalc::TPushStreamProgram<TFilterInputSpec, TFilterOutputSpec>> Program;
303
- THolder<NYql::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<TVector<std::string_view> >&>>> InputConsumer;
309
+ THolder<NYql::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector* >&>>> InputConsumer;
304
310
const TString Sql;
305
311
};
306
312
@@ -315,7 +321,7 @@ TJsonFilter::TJsonFilter(
315
321
TJsonFilter::~TJsonFilter () {
316
322
}
317
323
318
- void TJsonFilter::Push (const TVector<ui64>& offsets, const TVector<TVector<std::string_view> >& values) {
324
+ void TJsonFilter::Push (const TVector<ui64>& offsets, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector* >& values) {
319
325
Impl->Push (offsets, values);
320
326
}
321
327
0 commit comments