Skip to content

Commit 3682034

Browse files
committed
yson fix
1 parent 4f60da1 commit 3682034

File tree

5 files changed

+99
-28
lines changed

5 files changed

+99
-28
lines changed

ydb/library/yql/minikql/computation/mkql_block_reader.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ struct TConverterTraits {
184184
using TTuple = TTupleBlockItemConverter<Nullable>;
185185
template <typename T, bool Nullable>
186186
using TFixedSize = TFixedSizeBlockItemConverter<T, Nullable>;
187-
template <typename TStringType, bool Nullable, NUdf::EPgStringType PgString = NUdf::EPgStringType::None>
187+
template <typename TStringType, bool Nullable, NUdf::EDataSlot OriginalT = NUdf::EDataSlot::String, NUdf::EPgStringType PgString = NUdf::EPgStringType::None>
188188
using TStrings = TStringBlockItemConverter<TStringType, Nullable, PgString>;
189189
using TExtOptional = TExternalOptionalBlockItemConverter;
190190

@@ -193,15 +193,15 @@ struct TConverterTraits {
193193
return std::make_unique<TFixedSize<ui64, true>>();
194194
} else {
195195
if (desc.Typelen == -1) {
196-
auto ret = std::make_unique<TStrings<arrow::BinaryType, true, NUdf::EPgStringType::Text>>();
196+
auto ret = std::make_unique<TStrings<arrow::BinaryType, true, NUdf::EDataSlot::String, NUdf::EPgStringType::Text>>();
197197
ret->SetPgBuilder(pgBuilder, desc.TypeId, desc.Typelen);
198198
return ret;
199199
} else if (desc.Typelen == -2) {
200-
auto ret = std::make_unique<TStrings<arrow::BinaryType, true, NUdf::EPgStringType::CString>>();
200+
auto ret = std::make_unique<TStrings<arrow::BinaryType, true, NUdf::EDataSlot::String, NUdf::EPgStringType::CString>>();
201201
ret->SetPgBuilder(pgBuilder, desc.TypeId, desc.Typelen);
202202
return ret;
203203
} else {
204-
auto ret = std::make_unique<TStrings<arrow::BinaryType, true, NUdf::EPgStringType::Fixed>>();
204+
auto ret = std::make_unique<TStrings<arrow::BinaryType, true, NUdf::EDataSlot::String, NUdf::EPgStringType::Fixed>>();
205205
ret->SetPgBuilder(pgBuilder, desc.TypeId, desc.Typelen);
206206
return ret;
207207
}

ydb/library/yql/minikql/computation/mkql_block_transport.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ struct TSerializerTraits {
500500
using TTuple = TTupleBlockSerializer<Nullable>;
501501
template <typename T, bool Nullable>
502502
using TFixedSize = TFixedSizeBlockSerializer<sizeof(T), Nullable>;
503-
template <typename TStringType, bool Nullable>
503+
template <typename TStringType, bool Nullable, NUdf::EDataSlot OriginalT = NUdf::EDataSlot::String>
504504
using TStrings = TStringBlockSerializer<TStringType, Nullable>;
505505
using TExtOptional = TExtOptionalBlockSerializer;
506506

@@ -519,7 +519,7 @@ struct TDeserializerTraits {
519519
using TTuple = TTupleBlockDeserializer<Nullable>;
520520
template <typename T, bool Nullable>
521521
using TFixedSize = TFixedSizeBlockDeserializer<sizeof(T), Nullable>;
522-
template <typename TStringType, bool Nullable>
522+
template <typename TStringType, bool Nullable, NUdf::EDataSlot OriginalT = NUdf::EDataSlot::String>
523523
using TStrings = TStringBlockDeserializer<TStringType, Nullable>;
524524
using TExtOptional = TExtOptionalBlockDeserializer;
525525

ydb/library/yql/minikql/mkql_type_builder.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2425,7 +2425,7 @@ struct TComparatorTraits {
24252425
using TTuple = NUdf::TTupleBlockItemComparator<Nullable>;
24262426
template <typename T, bool Nullable>
24272427
using TFixedSize = NUdf::TFixedSizeBlockItemComparator<T, Nullable>;
2428-
template <typename TStringType, bool Nullable>
2428+
template <typename TStringType, bool Nullable, NUdf::EDataSlot OriginalT = NUdf::EDataSlot::String>
24292429
using TStrings = NUdf::TStringBlockItemComparator<TStringType, Nullable>;
24302430
using TExtOptional = NUdf::TExternalOptionalBlockItemComparator;
24312431

@@ -2441,7 +2441,7 @@ struct THasherTraits {
24412441
using TTuple = NUdf::TTupleBlockItemHasher<Nullable>;
24422442
template <typename T, bool Nullable>
24432443
using TFixedSize = NUdf::TFixedSizeBlockItemHasher<T, Nullable>;
2444-
template <typename TStringType, bool Nullable>
2444+
template <typename TStringType, bool Nullable, NUdf::EDataSlot OriginalT = NUdf::EDataSlot::String>
24452445
using TStrings = NUdf::TStringBlockItemHasher<TStringType, Nullable>;
24462446
using TExtOptional = NUdf::TExternalOptionalBlockItemHasher;
24472447

ydb/library/yql/providers/yt/comp_nodes/dq/arrow_converter.cpp

Lines changed: 79 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,69 @@ class TYsonReaderDetails {
205205
size_t Available_;
206206
};
207207

208+
namespace {
209+
void SkipYson(TYsonReaderDetails& buf) {
210+
switch (buf.Current()) {
211+
case BeginListSymbol: {
212+
buf.Next();
213+
for (;;) {
214+
SkipYson(buf);
215+
if (buf.Current() == ListItemSeparatorSymbol) {
216+
buf.Next();
217+
}
218+
if (buf.Current() == EndListSymbol) {
219+
break;
220+
}
221+
}
222+
buf.Next();
223+
break;
224+
}
225+
case BeginMapSymbol: {
226+
buf.Next();
227+
for (;;) {
228+
SkipYson(buf);
229+
YQL_ENSURE(buf.Current() == KeyValueSeparatorSymbol);
230+
buf.Next();
231+
SkipYson(buf);
232+
if (buf.Current() == KeyedItemSeparatorSymbol) {
233+
buf.Next();
234+
}
235+
if (buf.Current() == EndMapSymbol) {
236+
break;
237+
}
238+
}
239+
buf.Next();
240+
break;
241+
}
242+
case StringMarker:
243+
buf.Next();
244+
buf.Skip(buf.ReadVarI32());
245+
break;
246+
case Uint64Marker:
247+
case Int64Marker:
248+
buf.Next();
249+
Y_UNUSED(buf.ReadVarI64());
250+
break;
251+
case TrueMarker:
252+
case FalseMarker:
253+
buf.Next();
254+
break;
255+
case DoubleMarker:
256+
buf.Next();
257+
Y_UNUSED(buf.NextDouble());
258+
break;
259+
default:
260+
YQL_ENSURE(false, "Unexpected char: " + std::string{buf.Current()});
261+
}
262+
}
263+
264+
NUdf::TBlockItem ReadYson(TYsonReaderDetails& buf) {
265+
const char* beg = buf.Data();
266+
SkipYson(buf);
267+
return NUdf::TBlockItem(std::string_view(beg, buf.Data() - beg));
268+
}
269+
};
270+
208271
class IYsonBlockReader {
209272
public:
210273
virtual NUdf::TBlockItem GetItem(TYsonReaderDetails& buf) = 0;
@@ -272,7 +335,7 @@ class TYsonTupleBlockReader final : public IYsonBlockReaderWithNativeFlag<Native
272335
TVector<NUdf::TBlockItem> Items_;
273336
};
274337

275-
template<typename T, bool Nullable, bool Native>
338+
template<typename T, bool Nullable, NKikimr::NUdf::EDataSlot OriginalT, bool Native>
276339
class TYsonStringBlockReader final : public IYsonBlockReaderWithNativeFlag<Native> {
277340
public:
278341
NUdf::TBlockItem GetItem(TYsonReaderDetails& buf) override final {
@@ -281,13 +344,18 @@ class TYsonStringBlockReader final : public IYsonBlockReaderWithNativeFlag<Nativ
281344
}
282345
return GetNotNull(buf);
283346
}
347+
284348
NUdf::TBlockItem GetNotNull(TYsonReaderDetails& buf) override final {
285-
YQL_ENSURE(buf.Current() == StringMarker);
286-
buf.Next();
287-
const i32 length = buf.ReadVarI32();
288-
auto res = NUdf::TBlockItem(NUdf::TStringRef(buf.Data(), length));
289-
buf.Skip(length);
290-
return res;
349+
if constexpr (NUdf::EDataSlot::Yson != OriginalT) {
350+
YQL_ENSURE(buf.Current() == StringMarker);
351+
buf.Next();
352+
const i32 length = buf.ReadVarI32();
353+
auto res = NUdf::TBlockItem(NUdf::TStringRef(buf.Data(), length));
354+
buf.Skip(length);
355+
return res;
356+
} else {
357+
return ReadYson(buf);
358+
}
291359
}
292360
private:
293361
const TVector<std::unique_ptr<IYsonBlockReader>> Children_;
@@ -300,7 +368,7 @@ struct TYtColumnConverterSettings {
300368
NKikimr::NMiniKQL::TType* Type;
301369
const NUdf::IPgBuilder* PgBuilder;
302370
arrow::MemoryPool& Pool;
303-
bool IsNative;
371+
const bool IsNative;
304372
bool IsTopOptional = false;
305373
std::shared_ptr<arrow::DataType> ArrowType;
306374
std::unique_ptr<NKikimr::NUdf::IArrayBuilder> Builder;
@@ -397,16 +465,16 @@ struct TYsonBlockReaderTraits {
397465
using TTuple = TYsonTupleBlockReader<Nullable, Native>;
398466
template <typename T, bool Nullable>
399467
using TFixedSize = TYsonFixedSizeBlockReader<T, Nullable, Native>;
400-
template <typename TStringType, bool Nullable>
401-
using TStrings = TYsonStringBlockReader<TStringType, Nullable, Native>;
468+
template <typename TStringType, bool Nullable, NKikimr::NUdf::EDataSlot OriginalT>
469+
using TStrings = TYsonStringBlockReader<TStringType, Nullable, OriginalT, Native>;
402470
using TExtOptional = TYsonExternalOptBlockReader<Native>;
403471

404472
static std::unique_ptr<TResult> MakePg(const NUdf::TPgTypeDescription& desc, const NUdf::IPgBuilder* pgBuilder) {
405473
Y_UNUSED(pgBuilder);
406474
if (desc.PassByValue) {
407475
return std::make_unique<TFixedSize<ui64, true>>();
408476
} else {
409-
return std::make_unique<TStrings<arrow::BinaryType, true>>();
477+
return std::make_unique<TStrings<arrow::BinaryType, true, NKikimr::NUdf::EDataSlot::String>>();
410478
}
411479
}
412480

ydb/library/yql/public/udf/arrow/block_reader.h

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ class TFixedSizeBlockReader final : public IBlockReader {
9797
}
9898
};
9999

100-
template<typename TStringType, bool Nullable>
100+
template<typename TStringType, bool Nullable, NKikimr::NUdf::EDataSlot OriginalT = NKikimr::NUdf::EDataSlot::String>
101101
class TStringBlockReader final : public IBlockReader {
102102
public:
103103
using TOffset = typename TStringType::offset_type;
@@ -364,16 +364,16 @@ struct TReaderTraits {
364364
using TTuple = TTupleBlockReader<Nullable>;
365365
template <typename T, bool Nullable>
366366
using TFixedSize = TFixedSizeBlockReader<T, Nullable>;
367-
template <typename TStringType, bool Nullable>
368-
using TStrings = TStringBlockReader<TStringType, Nullable>;
367+
template <typename TStringType, bool Nullable, NKikimr::NUdf::EDataSlot OriginalT>
368+
using TStrings = TStringBlockReader<TStringType, Nullable, OriginalT>;
369369
using TExtOptional = TExternalOptionalBlockReader;
370370

371371
static std::unique_ptr<TResult> MakePg(const TPgTypeDescription& desc, const IPgBuilder* pgBuilder) {
372372
Y_UNUSED(pgBuilder);
373373
if (desc.PassByValue) {
374374
return std::make_unique<TFixedSize<ui64, true>>();
375375
} else {
376-
return std::make_unique<TStrings<arrow::BinaryType, true>>();
376+
return std::make_unique<TStrings<arrow::BinaryType, true, NKikimr::NUdf::EDataSlot::String>>();
377377
}
378378
}
379379
};
@@ -396,12 +396,12 @@ std::unique_ptr<typename TTraits::TResult> MakeFixedSizeBlockReaderImpl(bool isO
396396
}
397397
}
398398

399-
template <typename TTraits, typename T>
399+
template <typename TTraits, typename T, NKikimr::NUdf::EDataSlot OriginalT>
400400
std::unique_ptr<typename TTraits::TResult> MakeStringBlockReaderImpl(bool isOptional) {
401401
if (isOptional) {
402-
return std::make_unique<typename TTraits::template TStrings<T, true>>();
402+
return std::make_unique<typename TTraits::template TStrings<T, true, OriginalT>>();
403403
} else {
404-
return std::make_unique<typename TTraits::template TStrings<T, false>>();
404+
return std::make_unique<typename TTraits::template TStrings<T, false, OriginalT>>();
405405
}
406406
}
407407

@@ -489,12 +489,15 @@ std::unique_ptr<typename TTraits::TResult> MakeBlockReaderImpl(const ITypeInfoHe
489489
case NUdf::EDataSlot::Double:
490490
return MakeFixedSizeBlockReaderImpl<TTraits, double>(isOptional);
491491
case NUdf::EDataSlot::String:
492+
return MakeStringBlockReaderImpl<TTraits, arrow::BinaryType, NUdf::EDataSlot::String>(isOptional);
492493
case NUdf::EDataSlot::Yson:
494+
return MakeStringBlockReaderImpl<TTraits, arrow::BinaryType, NUdf::EDataSlot::Yson>(isOptional);
493495
case NUdf::EDataSlot::JsonDocument:
494-
return MakeStringBlockReaderImpl<TTraits, arrow::BinaryType>(isOptional);
496+
return MakeStringBlockReaderImpl<TTraits, arrow::BinaryType, NUdf::EDataSlot::JsonDocument>(isOptional);
495497
case NUdf::EDataSlot::Utf8:
498+
return MakeStringBlockReaderImpl<TTraits, arrow::StringType, NUdf::EDataSlot::Utf8>(isOptional);
496499
case NUdf::EDataSlot::Json:
497-
return MakeStringBlockReaderImpl<TTraits, arrow::StringType>(isOptional);
500+
return MakeStringBlockReaderImpl<TTraits, arrow::StringType, NUdf::EDataSlot::Json>(isOptional);
498501
default:
499502
Y_ENSURE(false, "Unsupported data slot");
500503
}

0 commit comments

Comments
 (0)