@@ -230,8 +230,9 @@ class TJsonParser::TImpl {
230
230
public:
231
231
TImpl (
232
232
const TVector<TString>& columns,
233
+ const TVector<TString>& types,
233
234
TCallback callback)
234
- : Sql(GenerateSql(columns)) {
235
+ : Sql(GenerateSql(columns, types )) {
235
236
auto options = NYql::NPureCalc::TProgramFactoryOptions ();
236
237
auto factory = NYql::NPureCalc::MakeProgramFactory (options);
237
238
@@ -240,7 +241,7 @@ class TJsonParser::TImpl {
240
241
TParserInputSpec (),
241
242
TParserOutputSpec (MakeOutputSchema (columns)),
242
243
Sql,
243
- NYql::NPureCalc::ETranslationMode::SQL
244
+ NYql::NPureCalc::ETranslationMode::SExpr
244
245
);
245
246
LOG_ROW_DISPATCHER_DEBUG (" Program created" );
246
247
InputConsumer = Program->Apply (MakeHolder<TParserOutputConsumer>(callback));
@@ -257,19 +258,51 @@ class TJsonParser::TImpl {
257
258
}
258
259
259
260
private:
260
- TString GenerateSql (const TVector<TString>& columns) {
261
- TStringStream str;
262
- str << " $json = SELECT CAST(data AS Json) as `Json`, " << OffsetFieldName << " FROM Input;" ;
263
- str << " \n SELECT " << OffsetFieldName << " , " ;
264
- for (auto it = columns.begin (); it != columns.end (); ++it) {
265
- str << R"( CAST(Unwrap(JSON_VALUE(`Json`, "$.)" << *it << " \" )) as String) as "
266
- << *it << ((it != columns.end () - 1 ) ? " ," : " " );
261
+ TString GenerateSql (const TVector<TString>& columnNames, const TVector<TString>& columnTypes) {
262
+ Y_ABORT_UNLESS (columnNames.size () == columnTypes.size (), " Unexpected column types size" );
263
+
264
+ TStringStream udfOutputType;
265
+ TStringStream resultType;
266
+ for (size_t i = 0 ; i < columnNames.size (); ++i) {
267
+ const TString& lastSymbol = i + 1 == columnNames.size () ? " " : " " ;
268
+ const TString& column = columnNames[i];
269
+ const TString& type = SkipOptional (columnTypes[i]);
270
+
271
+ udfOutputType << " '('" << column << " (DataType '" << type << " ))" << lastSymbol;
272
+ resultType << " '('" << column << " (SafeCast (Member $parsed '" << column << " ) $string_type))" << lastSymbol;
267
273
}
268
- str << " FROM $json;" ;
274
+
275
+ TStringStream str;
276
+ str << R"(
277
+ (
278
+ (let $string_type (DataType 'String))
279
+
280
+ (let $input_type (TupleType $string_type (DataType 'Uint64)))
281
+ (let $output_type (TupleType (StructType )" << udfOutputType.Str () << R"( ) (DataType 'Uint64)))
282
+ (let $udf_argument_type (TupleType $input_type (StructType) $output_type))
283
+ (let $udf_callable_type (CallableType '('1) '((StreamType $output_type)) '((StreamType $input_type)) '((OptionalType (DataType 'Utf8)))))
284
+ (let $udf (Udf 'ClickHouseClient.ParseFormat (Void) $udf_argument_type 'json_each_row $udf_callable_type (VoidType) '"" '()))
285
+
286
+ (return (Map (Apply $udf (Map (Self '0) (lambda '($input) (block '(
287
+ (return '((Member $input 'data) (Member $input ')" << OffsetFieldName << R"( )))
288
+ ))))) (lambda '($output) (block '(
289
+ (let $parsed (Nth $output '0))
290
+ (return (AsStruct '(')" << OffsetFieldName << R"( (Nth $output '1)) )" << resultType.Str () << R"( ))
291
+ )))))
292
+ )
293
+ )" ;
269
294
LOG_ROW_DISPATCHER_DEBUG (" GenerateSql " << str.Str ());
270
295
return str.Str ();
271
296
}
272
297
298
+ static TString SkipOptional (TStringBuf type) {
299
+ if (type.StartsWith (" Optional" )) {
300
+ Y_ABORT_UNLESS (type.SkipPrefix (" Optional<" ));
301
+ Y_ABORT_UNLESS (type.ChopSuffix (" >" ));
302
+ }
303
+ return TString (type);
304
+ }
305
+
273
306
private:
274
307
THolder<NYql::NPureCalc::TPushStreamProgram<TParserInputSpec, TParserOutputSpec>> Program;
275
308
THolder<NYql::NPureCalc::IConsumer<TInputConsumerArg>> InputConsumer;
@@ -278,8 +311,9 @@ class TJsonParser::TImpl {
278
311
279
312
TJsonParser::TJsonParser (
280
313
const TVector<TString>& columns,
314
+ const TVector<TString>& types,
281
315
TCallback callback)
282
- : Impl(std::make_unique<TJsonParser::TImpl>(columns, callback)) {
316
+ : Impl(std::make_unique<TJsonParser::TImpl>(columns, types, callback)) {
283
317
}
284
318
285
319
TJsonParser::~TJsonParser () {
@@ -295,8 +329,9 @@ TString TJsonParser::GetSql() {
295
329
296
330
std::unique_ptr<TJsonParser> NewJsonParser (
297
331
const TVector<TString>& columns,
332
+ const TVector<TString>& types,
298
333
TCallback callback) {
299
- return std::unique_ptr<TJsonParser>(new TJsonParser (columns, callback));
334
+ return std::unique_ptr<TJsonParser>(new TJsonParser (columns, types, callback));
300
335
}
301
336
302
337
} // namespace NFq
0 commit comments