diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp index ca4bb2e546c5..03b759de4f1c 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp @@ -16,7 +16,8 @@ BEGIN_SIMPLE_ARROW_UDF(TInc, i32(i32)) { struct TIncKernelExec : public NYql::NUdf::TUnaryKernelExec { template - static void Process(NYql::NUdf::TBlockItem arg, const TSink& sink) { + static void Process(const NYql::NUdf::IValueBuilder* valueBuilder, NYql::NUdf::TBlockItem arg, const TSink& sink) { + Y_UNUSED(valueBuilder); sink(NYql::NUdf::TBlockItem(arg.As() + 1)); } }; diff --git a/ydb/library/yql/public/udf/arrow/block_reader.h b/ydb/library/yql/public/udf/arrow/block_reader.h index bac76df5afee..ad0c39dbe61c 100644 --- a/ydb/library/yql/public/udf/arrow/block_reader.h +++ b/ydb/library/yql/public/udf/arrow/block_reader.h @@ -40,6 +40,8 @@ class TFixedSizeBlockReaderBase : public IBlockReader { if (IsNull(data, index)) { return {}; } + } else { + Y_DEBUG_ABORT_UNLESS(!data.MayHaveNulls()); } return static_cast(this)->MakeBlockItem(data.GetValues(1)[index]); } @@ -129,6 +131,8 @@ class TStringBlockReader final : public IBlockReader { if (IsNull(data, index)) { return {}; } + } else { + Y_DEBUG_ABORT_UNLESS(!data.MayHaveNulls()); } const TOffset* offsets = data.GetValues(1); @@ -210,6 +214,8 @@ class TTupleBlockReaderBase : public IBlockReader { if constexpr (Nullable) { if (IsNull(data, index)) { return {}; + } else { + Y_DEBUG_ABORT_UNLESS(!data.MayHaveNulls()); } } return static_cast(this)->GetChildrenItems(data, index); diff --git a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h index 5b75c5196020..9b98b4ff3dae 100644 --- a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h +++ b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h @@ -27,12 +27,12 @@ using TExec = arrow::Status(*)(arrow::compute::KernelContext*, const arrow::comp class TUdfKernelState : public arrow::compute::KernelState { public: - TUdfKernelState(const TVector& argTypes, const TType* outputType, bool onlyScalars, const ITypeInfoHelper* typeInfoHelper, const IPgBuilder& pgBuilder) + TUdfKernelState(const TVector& argTypes, const TType* outputType, bool onlyScalars, const ITypeInfoHelper* typeInfoHelper, const IValueBuilder* valueBuilder) : ArgTypes_(argTypes) , OutputType_(outputType) , OnlyScalars_(onlyScalars) , TypeInfoHelper_(typeInfoHelper) - , PgBuilder_(pgBuilder) + , ValueBuilder_(valueBuilder) { Readers_.resize(ArgTypes_.size()); } @@ -48,7 +48,7 @@ class TUdfKernelState : public arrow::compute::KernelState { IArrayBuilder& GetArrayBuilder() { Y_ENSURE(!OnlyScalars_); if (!ArrayBuilder_) { - ArrayBuilder_ = MakeArrayBuilder(*TypeInfoHelper_, OutputType_, *GetYqlMemoryPool(), TypeInfoHelper_->GetMaxBlockLength(OutputType_), &PgBuilder_); + ArrayBuilder_ = MakeArrayBuilder(*TypeInfoHelper_, OutputType_, *GetYqlMemoryPool(), TypeInfoHelper_->GetMaxBlockLength(OutputType_), &ValueBuilder_->GetPgBuilder()); } return *ArrayBuilder_; @@ -62,13 +62,18 @@ class TUdfKernelState : public arrow::compute::KernelState { return *ScalarBuilder_; } + + const IValueBuilder& GetValueBuilder() { + Y_ENSURE(ValueBuilder_); + return *ValueBuilder_; + } private: const TVector ArgTypes_; const TType* OutputType_; const bool OnlyScalars_; const ITypeInfoHelper* TypeInfoHelper_; - const IPgBuilder& PgBuilder_; + const IValueBuilder* ValueBuilder_; TVector> Readers_; std::unique_ptr ArrayBuilder_; std::unique_ptr ScalarBuilder_; @@ -157,7 +162,7 @@ class TSimpleArrowUdfImpl : public TBoxedValue { } } - TUdfKernelState kernelState(ArgTypes_, OutputType_, OnlyScalars_, TypeInfoHelper_.Get(), valueBuilder->GetPgBuilder()); + TUdfKernelState kernelState(ArgTypes_, OutputType_, OnlyScalars_, TypeInfoHelper_.Get(), valueBuilder); arrow::compute::ExecContext execContext(GetYqlMemoryPool()); arrow::compute::KernelContext kernelContext(&execContext); kernelContext.SetState(&kernelState); @@ -351,6 +356,7 @@ TReader* CastToBlockReaderImpl(IBlockReader& reader) { template struct TUnaryKernelExec { + static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) { auto& state = dynamic_cast(*ctx->state()); auto& reader = state.GetReader(0); @@ -362,7 +368,7 @@ struct TUnaryKernelExec { auto* builderImpl = CastToScalarBuilderImpl(builder); auto item = readerImpl->GetScalarItem(*arg.scalar()); - TDerived::Process(item, [&](TBlockItem out) { + TDerived::Process(&state.GetValueBuilder(), item, [&](TBlockItem out) { *res = builderImpl->Build(out); }); } @@ -377,7 +383,7 @@ struct TUnaryKernelExec { for (int64_t i = 0; i < array.length;) { for (size_t j = 0; j < maxBlockLength && i < array.length; ++j, ++i) { auto item = readerImpl->GetItem(array, i); - TDerived::Process(item, [&](TBlockItem out) { + TDerived::Process(&state.GetValueBuilder(), item, [&](TBlockItem out) { builderImpl->Add(out); }); } @@ -411,7 +417,8 @@ struct TBinaryKernelExec { auto item1 = reader1Impl->GetScalarItem(*arg1.scalar()); auto item2 = reader2Impl->GetScalarItem(*arg2.scalar()); - TDerived::Process(item1, item2, [&](TBlockItem out) { + + TDerived::Process(&state.GetValueBuilder(), item1, item2, [&](TBlockItem out) { *res = builderImpl->Build(out); }); } @@ -427,7 +434,7 @@ struct TBinaryKernelExec { for (int64_t i = 0; i < array2.length;) { for (size_t j = 0; j < maxBlockLength && i < array2.length; ++j, ++i) { auto item2 = reader2Impl->GetItem(array2, i); - TDerived::Process(item1, item2, [&](TBlockItem out) { + TDerived::Process(&state.GetValueBuilder(), item1, item2, [&](TBlockItem out) { builderImpl->Add(out); }); } @@ -448,7 +455,7 @@ struct TBinaryKernelExec { for (int64_t i = 0; i < array1.length;) { for (size_t j = 0; j < maxBlockLength && i < array1.length; ++j, ++i) { auto item1 = reader1Impl->GetItem(array1, i); - TDerived::Process(item1, item2, [&](TBlockItem out) { + TDerived::Process(&state.GetValueBuilder(), item1, item2, [&](TBlockItem out) { builderImpl->Add(out); }); } @@ -470,9 +477,9 @@ struct TBinaryKernelExec { Y_ENSURE(array1.length == array2.length); for (int64_t i = 0; i < array1.length;) { for (size_t j = 0; j < maxBlockLength && i < array1.length; ++j, ++i) { - auto item1 = reader1.GetItem(array1, i); - auto item2 = reader2.GetItem(array2, i); - TDerived::Process(item1, item2, [&](TBlockItem out) { + auto item1 = reader1Impl->GetItem(array1, i); + auto item2 = reader2Impl->GetItem(array2, i); + TDerived::Process(&state.GetValueBuilder(), item1, item2, [&](TBlockItem out) { builderImpl->Add(out); }); } @@ -529,7 +536,7 @@ struct TGenericKernelExec { auto& reader = state.GetReader(k); args[k] = reader.GetScalarItem(*batch[k].scalar()); } - TDerived::Process(items, [&](TBlockItem out) { + TDerived::Process(&state.GetValueBuilder(), items, [&](TBlockItem out) { *res = builderImpl->Build(out); }); } else { @@ -559,7 +566,7 @@ struct TGenericKernelExec { args[k] = reader.GetItem(*batch[k].array(), i); } - TDerived::Process(items, [&](TBlockItem out) { + TDerived::Process(&state.GetValueBuilder(), items, [&](TBlockItem out) { builderImpl->Add(out); }); } diff --git a/ydb/library/yql/udfs/common/string/string_udf.cpp b/ydb/library/yql/udfs/common/string/string_udf.cpp index 6ee4bf145605..c99f0c258d81 100644 --- a/ydb/library/yql/udfs/common/string/string_udf.cpp +++ b/ydb/library/yql/udfs/common/string/string_udf.cpp @@ -31,59 +31,59 @@ using namespace NUdf; namespace { -#define STRING_UDF(udfName, function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ - const TString input(args[0].AsStringRef()); \ - const auto& result = function(input); \ - return valueBuilder->NewString(result); \ - } \ - \ - struct T##udfName##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const TSink& sink) { \ - const TString input(arg1.AsStringRef()); \ - const auto& result = function(input); \ - sink(TBlockItem(result)); \ - } \ - }; \ - \ - END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) +#define STRING_UDF(udfName, function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ + const TString input(args[0].AsStringRef()); \ + const auto& result = function(input); \ + return valueBuilder->NewString(result); \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + const TString input(arg1.AsStringRef()); \ + const auto& result = function(input); \ + sink(TBlockItem(result)); \ + } \ + }; \ + \ + END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) \ // 'unsafe' udf is actually strict - it returns null on any exception -#define STRING_UNSAFE_UDF(udfName, function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, TOptional(TOptional)) { \ - EMPTY_RESULT_ON_EMPTY_ARG(0); \ - const TString input(args[0].AsStringRef()); \ - try { \ - const auto& result = function(input); \ - return valueBuilder->NewString(result); \ - } catch (yexception&) { \ - return TUnboxedValue(); \ - } \ - } \ - \ - struct T##udfName##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const TSink& sink) { \ - if (!arg1) { \ - return sink(TBlockItem()); \ - } \ - \ - const TString input(arg1.AsStringRef()); \ - try { \ - const auto& result = function(input); \ - sink(TBlockItem(result)); \ - } catch (yexception&) { \ - return sink(TBlockItem()); \ - } \ - } \ - }; \ - \ +#define STRING_UNSAFE_UDF(udfName, function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, TOptional(TOptional)) { \ + EMPTY_RESULT_ON_EMPTY_ARG(0); \ + const TString input(args[0].AsStringRef()); \ + try { \ + const auto& result = function(input); \ + return valueBuilder->NewString(result); \ + } catch (yexception&) { \ + return TUnboxedValue(); \ + } \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + if (!arg1) { \ + return sink(TBlockItem()); \ + } \ + \ + const TString input(arg1.AsStringRef()); \ + try { \ + const auto& result = function(input); \ + sink(TBlockItem(result)); \ + } catch (yexception&) { \ + return sink(TBlockItem()); \ + } \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) #define STROKA_UDF(udfName, function) \ @@ -112,30 +112,30 @@ namespace { } \ } -#define STROKA_ASCII_CASE_UDF(udfName, function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ - TString input(args[0].AsStringRef()); \ - if (input.function()) { \ - return valueBuilder->NewString(input); \ - } else { \ - return args[0]; \ - } \ - } \ - \ - struct T##udfName##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const TSink& sink) { \ - TString input(arg1.AsStringRef()); \ - if (input.function()) { \ - sink(TBlockItem(input)); \ - } else { \ - sink(arg1); \ - } \ - } \ - }; \ - \ +#define STROKA_ASCII_CASE_UDF(udfName, function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ + TString input(args[0].AsStringRef()); \ + if (input.function()) { \ + return valueBuilder->NewString(input); \ + } else { \ + return args[0]; \ + } \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + TString input(arg1.AsStringRef()); \ + if (input.function()) { \ + sink(TBlockItem(input)); \ + } else { \ + sink(arg1); \ + } \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) @@ -163,45 +163,45 @@ namespace { } \ } -#define IS_ASCII_UDF(function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, bool(TOptional)) { \ - Y_UNUSED(valueBuilder); \ - if (args[0]) { \ - const TStringBuf input(args[0].AsStringRef()); \ - bool result = true; \ - for (auto c : input) { \ - if (!function(c)) { \ - result = false; \ - break; \ - } \ - } \ - return TUnboxedValuePod(result); \ - } else { \ - return TUnboxedValuePod(false); \ - } \ - } \ - \ - struct T##function##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const TSink& sink) { \ - if (arg1) { \ - const TStringBuf input(arg1.AsStringRef()); \ - bool result = true; \ - for (auto c : input) { \ - if (!function(c)) { \ - result = false; \ - break; \ - } \ - } \ - sink(TBlockItem(result)); \ - } else { \ - sink(TBlockItem(false)); \ - } \ - } \ - }; \ - \ +#define IS_ASCII_UDF(function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, bool(TOptional)) { \ + Y_UNUSED(valueBuilder); \ + if (args[0]) { \ + const TStringBuf input(args[0].AsStringRef()); \ + bool result = true; \ + for (auto c : input) { \ + if (!function(c)) { \ + result = false; \ + break; \ + } \ + } \ + return TUnboxedValuePod(result); \ + } else { \ + return TUnboxedValuePod(false); \ + } \ + } \ + \ + struct T##function##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + if (arg1) { \ + const TStringBuf input(arg1.AsStringRef()); \ + bool result = true; \ + for (auto c : input) { \ + if (!function(c)) { \ + result = false; \ + break; \ + } \ + } \ + sink(TBlockItem(result)); \ + } else { \ + sink(TBlockItem(false)); \ + } \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) @@ -231,7 +231,7 @@ namespace { : public TGenericKernelExec \ { \ template \ - static void Process(TBlockItem args, const TSink& sink) { \ + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { \ TStringStream result; \ const TStringBuf input(args.GetElement(0).AsStringRef()); \ char paddingSymbol = ' '; \ @@ -253,67 +253,67 @@ namespace { \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) -#define STRING_STREAM_NUM_FORMATTER_UDF(function, argType) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap)) { \ - TStringStream result; \ - result << function(args[0].Get()); \ - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ - } \ - \ - struct T##function##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const TSink& sink) { \ - TStringStream result; \ - result << function(arg1.Get()); \ - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ - } \ - }; \ - \ +#define STRING_STREAM_NUM_FORMATTER_UDF(function, argType) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap)) { \ + TStringStream result; \ + result << function(args[0].Get()); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##function##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + TStringStream result; \ + result << function(arg1.Get()); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) -#define STRING_STREAM_TEXT_FORMATTER_UDF(function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap)) { \ - TStringStream result; \ - const TStringBuf input(args[0].AsStringRef()); \ - result << function(input); \ - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ - } \ - \ - struct T##function##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const TSink& sink) { \ - TStringStream result; \ - const TStringBuf input(arg1.AsStringRef()); \ - result << function(input); \ - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ - } \ - }; \ - \ +#define STRING_STREAM_TEXT_FORMATTER_UDF(function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap)) { \ + TStringStream result; \ + const TStringBuf input(args[0].AsStringRef()); \ + result << function(input); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##function##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + TStringStream result; \ + const TStringBuf input(arg1.AsStringRef()); \ + result << function(input); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) -#define STRING_STREAM_HRSZ_FORMATTER_UDF(udfName, hrSize) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ - TStringStream result; \ - result << HumanReadableSize(args[0].Get(), hrSize); \ - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ - } \ - \ - struct T##udfName##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const TSink& sink) { \ - TStringStream result; \ - result << HumanReadableSize(arg1.Get(), hrSize); \ - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ - } \ - }; \ - \ +#define STRING_STREAM_HRSZ_FORMATTER_UDF(udfName, hrSize) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ + TStringStream result; \ + result << HumanReadableSize(args[0].Get(), hrSize); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + TStringStream result; \ + result << HumanReadableSize(arg1.Get(), hrSize); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) #define STRING_UDF_MAP(XX) \ @@ -414,7 +414,7 @@ namespace { : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { TString input(arg1.AsStringRef()); ui64 maxLength = arg2.Get(); CollapseText(input, maxLength); @@ -437,7 +437,7 @@ namespace { struct TContainsKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { if (!arg1) return sink(TBlockItem(false)); @@ -461,7 +461,7 @@ namespace { : public TGenericKernelExec { template - static void Process(TBlockItem args, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { TString result(args.GetElement(0).AsStringRef()); const TStringBuf what(args.GetElement(1).AsStringRef()); const TStringBuf with(args.GetElement(2).AsStringRef()); @@ -490,7 +490,7 @@ namespace { : public TGenericKernelExec { template - static void Process(TBlockItem args, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { std::string result(args.GetElement(0).AsStringRef()); const std::string_view what(args.GetElement(1).AsStringRef()); const std::string_view with(args.GetElement(2).AsStringRef()); @@ -519,7 +519,7 @@ namespace { : public TGenericKernelExec { template - static void Process(TBlockItem args, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { std::string result(args.GetElement(0).AsStringRef()); const std::string_view what(args.GetElement(1).AsStringRef()); const std::string_view with(args.GetElement(2).AsStringRef()); @@ -558,7 +558,7 @@ namespace { : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { std::string input(arg1.AsStringRef()); const std::string_view remove(arg2.AsStringRef()); std::array chars{}; @@ -602,7 +602,7 @@ namespace { : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { std::string input(arg1.AsStringRef()); const std::string_view remove(arg2.AsStringRef()); std::array chars{}; @@ -642,7 +642,7 @@ namespace { : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { std::string input(arg1.AsStringRef()); const std::string_view remove(arg2.AsStringRef()); std::array chars{}; @@ -789,7 +789,7 @@ namespace { struct TLevensteinDistanceKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { const std::string_view left(arg1.AsStringRef()); const std::string_view right(arg2.AsStringRef()); const ui64 result = NLevenshtein::Distance(left, right); @@ -811,7 +811,7 @@ namespace { : public TUnaryKernelExec { template - static void Process(TBlockItem arg1, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { TStringStream result; result << HumanReadable(TDuration::MicroSeconds(arg1.Get())); sink(TBlockItem(TStringRef(result.Data(), result.Size()))); @@ -829,7 +829,7 @@ namespace { struct TPrecKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { TStringStream result; result << Prec(arg1.Get(), arg2.Get()); sink(TBlockItem(TStringRef(result.Data(), result.Size()))); diff --git a/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h b/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h index d80bc065f86e..482550fa0a55 100644 --- a/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h +++ b/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h @@ -36,7 +36,7 @@ inline bool PrepareUrl(const std::string_view& keyStr, TUri& parser) { } \ struct udfName##KernelExec : public TUnaryKernelExec { \ template \ - static void Process(TBlockItem arg, const TSink& sink) { \ + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { \ if (!arg) { \ return sink(TBlockItem()); \ } \ @@ -60,7 +60,7 @@ BEGIN_SIMPLE_ARROW_UDF(TNormalize, TOptional(TOptional)) { } struct TNormalizeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -81,7 +81,7 @@ BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetScheme, char*(TAutoMap)) { } struct TGetSchemeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const std::string_view url(arg.AsStringRef()); const std::string_view prefix(GetSchemePrefix(url)); const std::string_view scheme = url.substr(std::distance(url.begin(), prefix.begin()), prefix.size()); @@ -124,7 +124,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetPort, TOptional(TOptional)) { } struct TGetPortKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -152,7 +152,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetTail, TOptional(TOptional)) { } struct TGetTailKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -186,7 +186,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetPath, TOptional(TOptional)) { } struct TGetPathKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -216,7 +216,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetFragment, TOptional(TOptional)) { } struct TGetFragmentKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -256,7 +256,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetDomain, TOptional(TOptional, ui8)) { } struct TGetDomainKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { if (!arg1) { return sink(TBlockItem()); } @@ -276,7 +276,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetTLD, char*(TAutoMap)) { } struct TGetTLDKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const TStringBuf url(arg.AsStringRef()); return sink(TBlockItem(GetZone(GetOnlyHost(url)))); } @@ -291,7 +291,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetDomainLevel, ui64(TAutoMap)) { } struct TGetDomainLevelKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { std::vector parts; StringSplitter(GetOnlyHost(arg.AsStringRef())).Split('.').AddTo(&parts); return sink(TBlockItem(ui64(parts.size()))); @@ -360,7 +360,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetCGIParam, TOptional(TOptional, char*)) } struct TGetCGIParamKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { if (!arg1) { return sink(TBlockItem()); } @@ -387,7 +387,7 @@ BEGIN_SIMPLE_ARROW_UDF(TCutQueryStringAndFragment, char*(TAutoMap)) { } struct TCutQueryStringAndFragmentKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const std::string_view input(arg.AsStringRef()); const auto cut = input.find_first_of("?#"); sink(TBlockItem(arg.AsStringRef().Substring(0U, cut))); @@ -407,7 +407,7 @@ BEGIN_SIMPLE_ARROW_UDF(TEncode, TOptional(TOptional)) { } struct TEncodeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -435,7 +435,7 @@ BEGIN_SIMPLE_ARROW_UDF(TDecode, TOptional(TOptional)) { } struct TDecodeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -457,7 +457,7 @@ BEGIN_SIMPLE_ARROW_UDF(TIsKnownTLD, bool(TAutoMap)) { } struct TIsKnownTLDKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { sink(TBlockItem(static_cast(IsTld(arg.AsStringRef())))); } }; @@ -469,7 +469,7 @@ BEGIN_SIMPLE_ARROW_UDF(TIsWellKnownTLD, bool(TAutoMap)) { } struct TIsWellKnownTLDKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { sink(TBlockItem(static_cast(IsVeryGoodTld(arg.AsStringRef())))); } }; @@ -483,7 +483,7 @@ BEGIN_SIMPLE_ARROW_UDF(THostNameToPunycode, TOptional(TAutoMap)) t } struct THostNameToPunycodeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) try { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) try { const TUtf16String& input = UTF8ToWide(arg.AsStringRef()); return sink(TBlockItem(TStringRef(HostNameToPunycode(input)))); } catch (TPunycodeError&) { @@ -498,7 +498,7 @@ BEGIN_SIMPLE_ARROW_UDF(TForceHostNameToPunycode, char*(TAutoMap)) { } struct TForceHostNameToPunycodeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const TUtf16String& input = UTF8ToWide(arg.AsStringRef()); sink(TBlockItem(TStringRef(ForceHostNameToPunycode(input)))); } @@ -514,7 +514,7 @@ BEGIN_SIMPLE_ARROW_UDF(TPunycodeToHostName, TOptional(TAutoMap)) t } struct TPunycodeToHostNameKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) try { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) try { const TStringRef& input = arg.AsStringRef(); const auto& result = WideToUTF8(PunycodeToHostName(input)); return sink(TBlockItem(TStringRef(result))); @@ -531,7 +531,7 @@ BEGIN_SIMPLE_ARROW_UDF(TForcePunycodeToHostName, char*(TAutoMap)) { } struct TForcePunycodeToHostNameKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const TStringRef& input = arg.AsStringRef(); const auto& result = WideToUTF8(ForcePunycodeToHostName(input)); sink(TBlockItem(TStringRef(result))); @@ -545,7 +545,7 @@ BEGIN_SIMPLE_ARROW_UDF(TCanBePunycodeHostName, bool(TAutoMap)) { } struct TCanBePunycodeHostNameKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { sink(TBlockItem(static_cast(CanBePunycodeHostName(arg.AsStringRef())))); } };