From c8c10c0c40dc0789646a789efaa6f83fdba509ef Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Mon, 20 May 2024 17:38:02 +0300 Subject: [PATCH 01/18] 2 --- .../yql/public/udf/arrow/block_builder.h | 1 + ydb/library/yql/public/udf/arrow/block_item.h | 13 + .../yql/public/udf/arrow/block_reader.h | 6 + .../yql/public/udf/arrow/udf_arrow_helpers.h | 36 +- .../udfs/common/datetime2/datetime_udf.cpp | 178 +++-- .../datetime2/test/canondata/result.json | 10 + .../test.test_BlockStartOf_/results.txt | 314 +++++++++ .../test.test_BlockTmGet_/results.txt | 628 ++++++++++++++++++ .../datetime2/test/cases/BlockStartOf.attr | 0 .../datetime2/test/cases/BlockStartOf.in | 15 + .../datetime2/test/cases/BlockStartOf.in.attr | 17 + .../datetime2/test/cases/BlockStartOf.sql | 30 + .../common/datetime2/test/cases/BlockTmGet.in | 10 + .../datetime2/test/cases/BlockTmGet.in.attr | 17 + .../datetime2/test/cases/BlockTmGet.sql | 27 + .../common/datetime2/test/cases/StartOf.sql | 3 +- 16 files changed, 1257 insertions(+), 48 deletions(-) create mode 100644 ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt create mode 100644 ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt create mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.attr create mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in create mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in.attr create mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.sql create mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in create mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in.attr create mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.sql diff --git a/ydb/library/yql/public/udf/arrow/block_builder.h b/ydb/library/yql/public/udf/arrow/block_builder.h index 2f46532de716..f63d0601741b 100644 --- a/ydb/library/yql/public/udf/arrow/block_builder.h +++ b/ydb/library/yql/public/udf/arrow/block_builder.h @@ -55,6 +55,7 @@ class IScalarBuilder { public: virtual ~IScalarBuilder() = default; virtual arrow::Datum Build(TBlockItem value) const = 0; + virtual arrow::Datum Build(NUdf::TUnboxedValuePod value) const = 0; }; inline std::shared_ptr GetArrowType(const ITypeInfoHelper& typeInfoHelper, const TType* type) { diff --git a/ydb/library/yql/public/udf/arrow/block_item.h b/ydb/library/yql/public/udf/arrow/block_item.h index f04a25666b12..c0e9a018a886 100644 --- a/ydb/library/yql/public/udf/arrow/block_item.h +++ b/ydb/library/yql/public/udf/arrow/block_item.h @@ -59,6 +59,19 @@ class TBlockItem { Raw.Halfs[1] = high; } + inline static TBlockItem Embedded(const TStringRef& value) { + UDF_VERIFY(value.Size() <= sizeof(TRawEmbeddedValue::Buffer)); + + TBlockItem v; + v.Raw.Embedded.Size = value.Size(); + v.Raw.Embedded.Meta = static_cast(EMarkers::Embedded); + if (v.Raw.Embedded.Size) { + std::memcpy(v.Raw.Embedded.Buffer, value.Data(), v.Raw.Embedded.Size); + } + + return v; + } + inline ui64 Low() const { return Raw.Halfs[0]; } diff --git a/ydb/library/yql/public/udf/arrow/block_reader.h b/ydb/library/yql/public/udf/arrow/block_reader.h index bac76df5afee..ad0c39dbe61c 100644 --- a/ydb/library/yql/public/udf/arrow/block_reader.h +++ b/ydb/library/yql/public/udf/arrow/block_reader.h @@ -40,6 +40,8 @@ class TFixedSizeBlockReaderBase : public IBlockReader { if (IsNull(data, index)) { return {}; } + } else { + Y_DEBUG_ABORT_UNLESS(!data.MayHaveNulls()); } return static_cast(this)->MakeBlockItem(data.GetValues(1)[index]); } @@ -129,6 +131,8 @@ class TStringBlockReader final : public IBlockReader { if (IsNull(data, index)) { return {}; } + } else { + Y_DEBUG_ABORT_UNLESS(!data.MayHaveNulls()); } const TOffset* offsets = data.GetValues(1); @@ -210,6 +214,8 @@ class TTupleBlockReaderBase : public IBlockReader { if constexpr (Nullable) { if (IsNull(data, index)) { return {}; + } else { + Y_DEBUG_ABORT_UNLESS(!data.MayHaveNulls()); } } return static_cast(this)->GetChildrenItems(data, index); diff --git a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h index 5b75c5196020..f3f869b3a08e 100644 --- a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h +++ b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h @@ -27,12 +27,12 @@ using TExec = arrow::Status(*)(arrow::compute::KernelContext*, const arrow::comp class TUdfKernelState : public arrow::compute::KernelState { public: - TUdfKernelState(const TVector& argTypes, const TType* outputType, bool onlyScalars, const ITypeInfoHelper* typeInfoHelper, const IPgBuilder& pgBuilder) + TUdfKernelState(const TVector& argTypes, const TType* outputType, bool onlyScalars, const ITypeInfoHelper* typeInfoHelper, const IValueBuilder* valueBuilder) : ArgTypes_(argTypes) , OutputType_(outputType) , OnlyScalars_(onlyScalars) , TypeInfoHelper_(typeInfoHelper) - , PgBuilder_(pgBuilder) + , ValueBuilder_(valueBuilder) { Readers_.resize(ArgTypes_.size()); } @@ -48,7 +48,7 @@ class TUdfKernelState : public arrow::compute::KernelState { IArrayBuilder& GetArrayBuilder() { Y_ENSURE(!OnlyScalars_); if (!ArrayBuilder_) { - ArrayBuilder_ = MakeArrayBuilder(*TypeInfoHelper_, OutputType_, *GetYqlMemoryPool(), TypeInfoHelper_->GetMaxBlockLength(OutputType_), &PgBuilder_); + ArrayBuilder_ = MakeArrayBuilder(*TypeInfoHelper_, OutputType_, *GetYqlMemoryPool(), TypeInfoHelper_->GetMaxBlockLength(OutputType_), &ValueBuilder_->GetPgBuilder()); } return *ArrayBuilder_; @@ -62,13 +62,18 @@ class TUdfKernelState : public arrow::compute::KernelState { return *ScalarBuilder_; } + + const IValueBuilder& GetValueBuilder() { + Y_ENSURE(ValueBuilder_); + return *ValueBuilder_; + } private: const TVector ArgTypes_; const TType* OutputType_; const bool OnlyScalars_; const ITypeInfoHelper* TypeInfoHelper_; - const IPgBuilder& PgBuilder_; + const IValueBuilder* ValueBuilder_; TVector> Readers_; std::unique_ptr ArrayBuilder_; std::unique_ptr ScalarBuilder_; @@ -157,7 +162,7 @@ class TSimpleArrowUdfImpl : public TBoxedValue { } } - TUdfKernelState kernelState(ArgTypes_, OutputType_, OnlyScalars_, TypeInfoHelper_.Get(), valueBuilder->GetPgBuilder()); + TUdfKernelState kernelState(ArgTypes_, OutputType_, OnlyScalars_, TypeInfoHelper_.Get(), valueBuilder); arrow::compute::ExecContext execContext(GetYqlMemoryPool()); arrow::compute::KernelContext kernelContext(&execContext); kernelContext.SetState(&kernelState); @@ -351,7 +356,17 @@ TReader* CastToBlockReaderImpl(IBlockReader& reader) { template struct TUnaryKernelExec { - static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) { + +template +static void Process(const TBlockItem& arg, TUdfKernelState& state, const TSink& sink) { + if constexpr (std::is_invocable_v), TBlockItem, TUdfKernelState&, TSink&>) { + TDerived::Process(arg, state, sink); + } else { + TDerived::Process(arg, sink); + } +} + +static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) { auto& state = dynamic_cast(*ctx->state()); auto& reader = state.GetReader(0); auto* readerImpl = CastToBlockReaderImpl(reader); @@ -394,6 +409,15 @@ struct TUnaryKernelExec { template struct TBinaryKernelExec { + template + static void Process(const TBlockItem& arg1, const TBlockItem& arg2, TUdfKernelState& state, const TSink& sink) { + if constexpr (std::is_invocable_v), TBlockItem, TBlockItem, TUdfKernelState&, TSink&>) { + TDerived::Process(arg1, arg2, state, sink); + } else { + TDerived::Process(arg1, arg2, sink); + } + } + static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) { auto& state = dynamic_cast(*ctx->state()); diff --git a/ydb/library/yql/udfs/common/datetime2/datetime_udf.cpp b/ydb/library/yql/udfs/common/datetime2/datetime_udf.cpp index 90e87a99660f..18a1e6c1dd12 100644 --- a/ydb/library/yql/udfs/common/datetime2/datetime_udf.cpp +++ b/ydb/library/yql/udfs/common/datetime2/datetime_udf.cpp @@ -389,28 +389,38 @@ TTMStorage& Reference(NUdf::TUnboxedValuePod& value) { return *reinterpret_cast(value.GetRawPtr()); } -NUdf::TUnboxedValuePod DoAddMonths(const NUdf::TUnboxedValuePod& date, i64 months, const NUdf::IDateBuilder& builder) { +template +TValue DoAddMonths(const TValue& date, i64 months, const NUdf::IDateBuilder& builder) { auto result = date; auto& storage = Reference(result); if (!NYql::DateTime::DoAddMonths(storage, months, builder)) { - return NUdf::TUnboxedValuePod{}; + return TValue{}; } return result; } -NUdf::TUnboxedValuePod DoAddYears(const NUdf::TUnboxedValuePod& date, i64 years, const NUdf::IDateBuilder& builder) { + +template +TValue DoAddQuarters(const TValue& date, i64 quarters, const NUdf::IDateBuilder& builder) { + return DoAddMonths(date, quarters * 3ll, builder); +} + +template +TValue DoAddYears(const TValue& date, i64 years, const NUdf::IDateBuilder& builder) { auto result = date; auto& storage = Reference(result); if (!NYql::DateTime::DoAddYears(storage, years, builder)) { - return NUdf::TUnboxedValuePod{}; + return TValue{}; } return result; } #define ACCESSORS(field, type) \ - inline type Get##field(const TUnboxedValuePod& tm) { \ + template \ + inline type Get##field(const TValue& tm) { \ return (type)Reference(tm).field; \ } \ - Y_DECLARE_UNUSED inline void Set##field(TUnboxedValuePod& tm, type value) { \ + template \ + Y_DECLARE_UNUSED inline void Set##field(TValue& tm, type value) { \ Reference(tm).field = value; \ } @@ -552,6 +562,24 @@ NUdf::TUnboxedValuePod DoAddYears(const NUdf::TUnboxedValuePod& date, i64 years, IFunctionTypeInfoBuilder& builder, bool typesOnly) { + const auto typeInfoHelper = builder.TypeInfoHelper(); + + TTupleTypeInspector tuple(*typeInfoHelper, userType); + Y_ENSURE(tuple); + Y_ENSURE(tuple.GetElementsCount() > 0); + TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); + Y_ENSURE(argsTuple); + + if (argsTuple.GetElementsCount() != 1) { + builder.SetError("Expected one argument"); + return true; + } + auto argType = argsTuple.GetElementType(0); + TVector argBlockTypes; + argBlockTypes.push_back(argType); + + TBlockTypeInspector block(*typeInfoHelper, argType); + builder.UserType(userType); builder.Args()->Add().Flags(ICallablePayload::TArgumentFlags::AutoMap); builder.Returns(builder.Resource(TMResourceName)); @@ -724,68 +752,124 @@ NUdf::TUnboxedValuePod DoAddYears(const NUdf::TUnboxedValuePod& date, i64 years, // Get* + #define GET_METHOD(field, type) \ - SIMPLE_STRICT_UDF(TGet##field, type(TAutoMap>)) { \ + struct TGet##field##KernelExec : TUnaryKernelExec, TFixedSizeArrayBuilder> { \ + template \ + static void Process(TBlockItem item, TSink& sink) { \ + sink(TBlockItem(Get##field(item))); \ + } \ + }; \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(TGet##field, type(TAutoMap>)) { \ Y_UNUSED(valueBuilder); \ return TUnboxedValuePod(Get##field(args[0])); \ - } + } \ + END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGet##field, TGet##field##KernelExec::Do, arrow::compute::NullHandling::INTERSECTION); GET_METHOD(Year, ui16) GET_METHOD(DayOfYear, ui16) GET_METHOD(Month, ui8) + + template + TValue GetMonthNameValue(size_t idx) { + static const std::array monthNames = {{ + TValue::Embedded(TStringRef::Of("January")), + TValue::Embedded(TStringRef::Of("February")), + TValue::Embedded(TStringRef::Of("March")), + TValue::Embedded(TStringRef::Of("April")), + TValue::Embedded(TStringRef::Of("May")), + TValue::Embedded(TStringRef::Of("June")), + TValue::Embedded(TStringRef::Of("July")), + TValue::Embedded(TStringRef::Of("August")), + TValue::Embedded(TStringRef::Of("September")), + TValue::Embedded(TStringRef::Of("October")), + TValue::Embedded(TStringRef::Of("November")), + TValue::Embedded(TStringRef::Of("December")) + }}; + return monthNames.at(idx); + } + + struct TGetMonthNameKernelExec : TUnaryKernelExec, TStringArrayBuilder> { + template + static void Process(TBlockItem item, TSink& sink) { + sink(GetMonthNameValue(GetMonth(item) - 1U)); + } + }; - SIMPLE_STRICT_UDF(TGetMonthName, char*(TAutoMap>)) { + BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetMonthName, char*(TAutoMap>)) { Y_UNUSED(valueBuilder); - static const std::array monthNames = {{ - TUnboxedValuePod::Embedded(TStringRef::Of("January")), - TUnboxedValuePod::Embedded(TStringRef::Of("February")), - TUnboxedValuePod::Embedded(TStringRef::Of("March")), - TUnboxedValuePod::Embedded(TStringRef::Of("April")), - TUnboxedValuePod::Embedded(TStringRef::Of("May")), - TUnboxedValuePod::Embedded(TStringRef::Of("June")), - TUnboxedValuePod::Embedded(TStringRef::Of("July")), - TUnboxedValuePod::Embedded(TStringRef::Of("August")), - TUnboxedValuePod::Embedded(TStringRef::Of("September")), - TUnboxedValuePod::Embedded(TStringRef::Of("October")), - TUnboxedValuePod::Embedded(TStringRef::Of("November")), - TUnboxedValuePod::Embedded(TStringRef::Of("December")) - }}; - return monthNames.at(GetMonth(*args) - 1U); + return GetMonthNameValue(GetMonth(*args) - 1U); } + END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetMonthName, TGetMonthNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); GET_METHOD(WeekOfYear, ui8) GET_METHOD(WeekOfYearIso8601, ui8) - SIMPLE_STRICT_UDF(TGetDayOfMonth, ui8(TAutoMap>)) { + struct TGetDayOfMonthKernelExec : TUnaryKernelExec, TFixedSizeArrayBuilder> { + template + static void Process(TBlockItem item, TSink& sink) { + sink(GetDay(item)); + } + }; + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfMonth, ui8(TAutoMap>)) { Y_UNUSED(valueBuilder); return TUnboxedValuePod(GetDay(args[0])); } + END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfMonth, TGetDayOfMonthKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); GET_METHOD(DayOfWeek, ui8) - SIMPLE_STRICT_UDF(TGetDayOfWeekName, char*(TAutoMap>)) { - Y_UNUSED(valueBuilder); - static const std::array dayNames = {{ - TUnboxedValuePod::Embedded(TStringRef::Of("Monday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Tuesday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Wednesday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Thursday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Friday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Saturday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Sunday")) + template + TValue GetDayNameValue(size_t idx) { + static const std::array dayNames = {{ + TValue::Embedded(TStringRef::Of("Monday")), + TValue::Embedded(TStringRef::Of("Tuesday")), + TValue::Embedded(TStringRef::Of("Wednesday")), + TValue::Embedded(TStringRef::Of("Thursday")), + TValue::Embedded(TStringRef::Of("Friday")), + TValue::Embedded(TStringRef::Of("Saturday")), + TValue::Embedded(TStringRef::Of("Sunday")) }}; - return dayNames.at(GetDayOfWeek(*args) - 1U); + return dayNames.at(idx); } + struct TGetDayOfWeekNameKernelExec : TUnaryKernelExec, TStringArrayBuilder> { + template + static void Process(TBlockItem item, TSink& sink) { + sink(GetDayNameValue(GetDayOfWeek(item) - 1U)); + } + }; + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfWeekName, char*(TAutoMap>)) { + Y_UNUSED(valueBuilder); + return GetDayNameValue(GetDayOfWeek(*args) - 1U); + } + END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfWeekName, TGetDayOfWeekNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); + GET_METHOD(TimezoneId, ui16) - SIMPLE_STRICT_UDF(TGetTimezoneName, char*(TAutoMap>)) { + struct TTGetTimezoneNameKernelExec : TUnaryKernelExec, TStringArrayBuilder> { + template + static void Process(TBlockItem item, TUdfKernelState& state, TSink& sink) { + auto timezoneId = GetTimezoneId(item); + if (timezoneId >= NUdf::GetTimezones().size()) { + sink(TBlockItem{}); + } else { + auto str = state.GetValueBuilder().NewString(NUdf::GetTimezones()[timezoneId]); + sink(str); + } + } + }; + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetTimezoneName, char*(TAutoMap>)) { auto timezoneId = GetTimezoneId(args[0]); if (timezoneId >= NUdf::GetTimezones().size()) { return TUnboxedValuePod(); } return valueBuilder->NewString(NUdf::GetTimezones()[timezoneId]); } + END_SIMPLE_ARROW_UDF(TGetTimezoneName, TTGetTimezoneNameKernelExec::Do); // Update @@ -1157,20 +1241,32 @@ NUdf::TUnboxedValuePod DoAddYears(const NUdf::TUnboxedValuePod& date, i64 years, auto& storage = Reference(args[0]); return TUnboxedValuePod((i64)storage.ToTimeOfDay()); } + END_SIMPLE_ARROW_UDF(TTimeOfDay, timeOfDayKernelExecDo); // Add ... - SIMPLE_STRICT_UDF(TShiftYears, TOptional>(TAutoMap>, i32)) { + template + struct TAddKernelExec : TBinaryKernelExec> { + template + static void Process(TBlockItem date, TBlockItem arg, TUdfKernelState& state, TSink& sink) { + sink(Core(date, arg.Get(), state.GetValueBuilder().GetDateBuilder())); + } + }; + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftYears, TOptional>(TAutoMap>, i32)) { return DoAddYears(args[0], args[1].Get(), valueBuilder->GetDateBuilder()); } + END_SIMPLE_ARROW_UDF(TShiftYears, TAddKernelExec>::Do); - SIMPLE_STRICT_UDF(TShiftQuarters, TOptional>(TAutoMap>, i32)) { - return DoAddMonths(args[0], 3ll * args[1].Get(), valueBuilder->GetDateBuilder()); + BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftQuarters, TOptional>(TAutoMap>, i32)) { + return DoAddQuarters(args[0], args[1].Get(), valueBuilder->GetDateBuilder()); } + END_SIMPLE_ARROW_UDF(TShiftQuarters, TAddKernelExec>::Do); - SIMPLE_STRICT_UDF(TShiftMonths, TOptional>(TAutoMap>, i32)) { + BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftMonths, TOptional>(TAutoMap>, i32)) { return DoAddMonths(args[0], args[1].Get(), valueBuilder->GetDateBuilder()); } + END_SIMPLE_ARROW_UDF(TShiftMonths, TAddKernelExec>::Do); template struct PrintNDigits; diff --git a/ydb/library/yql/udfs/common/datetime2/test/canondata/result.json b/ydb/library/yql/udfs/common/datetime2/test/canondata/result.json index c8b2559c8b02..eaa37e5f488c 100644 --- a/ydb/library/yql/udfs/common/datetime2/test/canondata/result.json +++ b/ydb/library/yql/udfs/common/datetime2/test/canondata/result.json @@ -9,6 +9,16 @@ "uri": "file://test.test_BlockGet_/results.txt" } ], + "test.test[BlockStartOf]": [ + { + "uri": "file://test.test_BlockStartOf_/results.txt" + } + ], + "test.test[BlockTmGet]": [ + { + "uri": "file://test.test_BlockTmGet_/results.txt" + } + ], "test.test[BlockTo]": [ { "uri": "file://test.test_BlockTo_/results.txt" diff --git a/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt b/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt new file mode 100644 index 000000000000..20890003833a --- /dev/null +++ b/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt @@ -0,0 +1,314 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column8"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column9"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column10"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "column11"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + #; + #; + #; + #; + #; + [ + "1970-01-01T04:00:00,Europe/Moscow" + ]; + [ + "1970-01-01T05:00:00,Europe/Moscow" + ]; + [ + "1970-01-01T05:00:00,Europe/Moscow" + ]; + [ + "1970-01-01T04:59:57,Europe/Moscow" + ]; + [ + "18000000000" + ]; + [ + "1970-01-31T00:00:00,Europe/Moscow" + ] + ]; + [ + [ + "2018-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2018-10-01T00:00:00,Europe/Moscow" + ]; + [ + "2018-12-01T00:00:00,Europe/Moscow" + ]; + [ + "2018-12-10T00:00:00,Europe/Moscow" + ]; + [ + "2018-12-15T00:00:00,Europe/Moscow" + ]; + [ + "2018-12-15T00:00:00,Europe/Moscow" + ]; + [ + "2018-12-15T00:00:00,Europe/Moscow" + ]; + [ + "2018-12-15T01:00:00,Europe/Moscow" + ]; + [ + "2018-12-15T01:02:00,Europe/Moscow" + ]; + [ + "2018-12-15T01:01:57,Europe/Moscow" + ]; + [ + "3723456789" + ]; + [ + "2018-12-31T00:00:00,Europe/Moscow" + ] + ]; + [ + [ + "2105-01-01T00:00:00,GMT" + ]; + [ + "2105-10-01T00:00:00,GMT" + ]; + [ + "2105-12-01T00:00:00,GMT" + ]; + [ + "2105-12-28T00:00:00,GMT" + ]; + [ + "2105-12-31T00:00:00,GMT" + ]; + [ + "2105-12-31T13:00:00,GMT" + ]; + [ + "2105-12-31T16:00:00,GMT" + ]; + [ + "2105-12-31T16:15:00,GMT" + ]; + [ + "2105-12-31T16:23:40,GMT" + ]; + [ + "2105-12-31T16:23:44,GMT" + ]; + [ + "59025000000" + ]; + [ + "2105-12-31T00:00:00,GMT" + ] + ]; + [ + [ + "2106-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2105-12-28T00:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T01:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T01:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T00:59:58,Europe/Moscow" + ]; + [ + "3600000000" + ]; + # + ]; + [ + [ + "2019-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2019-07-01T00:00:00,Europe/Moscow" + ]; + [ + "2019-07-01T00:00:00,Europe/Moscow" + ]; + [ + "2019-07-22T00:00:00,Europe/Moscow" + ]; + [ + "2019-07-24T00:00:00,Europe/Moscow" + ]; + [ + "2019-07-24T00:00:00,Europe/Moscow" + ]; + [ + "2019-07-24T12:00:00,Europe/Moscow" + ]; + [ + "2019-07-24T12:00:00,Europe/Moscow" + ]; + [ + "2019-07-24T12:00:00,Europe/Moscow" + ]; + [ + "2019-07-24T11:59:57,Europe/Moscow" + ]; + [ + "43200000000" + ]; + [ + "2019-07-31T00:00:00,Europe/Moscow" + ] + ] + ] + } + ] + } +] \ No newline at end of file diff --git a/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt b/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt new file mode 100644 index 000000000000..262c45b59713 --- /dev/null +++ b/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt @@ -0,0 +1,628 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "ryear"; + [ + "OptionalType"; + [ + "DataType"; + "Uint16" + ] + ] + ]; + [ + "rdayofyear"; + [ + "OptionalType"; + [ + "DataType"; + "Uint16" + ] + ] + ]; + [ + "rmonth"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rmonthname"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "rweekofyear"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rweekofyeariso8601"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rdayofmonth"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rdayofweek"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rdayofweekname"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "rhour"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rminute"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rsecond"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rmsec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "rusec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "rtz"; + [ + "OptionalType"; + [ + "DataType"; + "Uint16" + ] + ] + ]; + [ + "rtzname"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "1970" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "4" + ]; + [ + "Thursday" + ]; + [ + "11" + ]; + [ + "14" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1970" + ]; + [ + "2" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "2" + ]; + [ + "5" + ]; + [ + "Friday" + ]; + [ + "14" + ]; + [ + "8" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1970" + ]; + [ + "32" + ]; + [ + "2" + ]; + [ + "February" + ]; + [ + "5" + ]; + [ + "5" + ]; + [ + "1" + ]; + [ + "7" + ]; + [ + "Sunday" + ]; + [ + "17" + ]; + [ + "3" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1970" + ]; + [ + "246" + ]; + [ + "9" + ]; + [ + "September" + ]; + [ + "36" + ]; + [ + "36" + ]; + [ + "3" + ]; + [ + "4" + ]; + [ + "Thursday" + ]; + [ + "7" + ]; + [ + "22" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1970" + ]; + [ + "365" + ]; + [ + "12" + ]; + [ + "December" + ]; + [ + "53" + ]; + [ + "53" + ]; + [ + "31" + ]; + [ + "4" + ]; + [ + "Thursday" + ]; + [ + "23" + ]; + [ + "59" + ]; + [ + "59" + ]; + [ + "999" + ]; + [ + "999999" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1971" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "1" + ]; + [ + "53" + ]; + [ + "1" + ]; + [ + "5" + ]; + [ + "Friday" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1971" + ]; + [ + "14" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "3" + ]; + [ + "2" + ]; + [ + "14" + ]; + [ + "4" + ]; + [ + "Thursday" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1978" + ]; + [ + "25" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "5" + ]; + [ + "4" + ]; + [ + "25" + ]; + [ + "3" + ]; + [ + "Wednesday" + ]; + [ + "16" + ]; + [ + "15" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "477" + ]; + [ + "Europe/Uzhgorod" + ] + ]; + [ + [ + "2018" + ]; + [ + "335" + ]; + [ + "12" + ]; + [ + "December" + ]; + [ + "48" + ]; + [ + "48" + ]; + [ + "1" + ]; + [ + "6" + ]; + [ + "Saturday" + ]; + [ + "1" + ]; + [ + "2" + ]; + [ + "3" + ]; + [ + "456" + ]; + [ + "456789" + ]; + [ + "1" + ]; + [ + "Europe/Moscow" + ] + ] + ] + } + ] + } +] \ No newline at end of file diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.attr b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.attr new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in new file mode 100644 index 000000000000..f482585e7204 --- /dev/null +++ b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in @@ -0,0 +1,15 @@ +{ + "ftztimestamp"="1970-01-01T05:00:00.000000,Europe/Moscow" +}; +{ + "ftztimestamp"="2018-12-15T01:02:03.456789,Europe/Moscow" +}; +{ + "ftztimestamp"="2105-12-31T16:23:45.000000,GMT" +}; +{ + "ftztimestamp"="2106-01-01T01:00:00.000000,Europe/Moscow" +}; +{ + "ftztimestamp"="2019-07-24T12:00:00,Europe/Moscow" +}; diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in.attr b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in.attr new file mode 100644 index 000000000000..3915337be3c3 --- /dev/null +++ b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in.attr @@ -0,0 +1,17 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ]; + ] + ] + } +} + diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.sql b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.sql new file mode 100644 index 000000000000..e531d6f1c8d6 --- /dev/null +++ b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.sql @@ -0,0 +1,30 @@ +/* syntax version 1 */ +pragma UseBlocks; +insert into @t + select + cast(ftztimestamp as TzTimestamp) as `tztimestamp`, + from Input; + +commit; + +select + DateTime::StartOfYear(`tztimestamp`), + + DateTime::StartOfQuarter(`tztimestamp`), + + DateTime::StartOfMonth(`tztimestamp`), + + DateTime::StartOfWeek(`tztimestamp`), + + DateTime::StartOfDay(`tztimestamp`), + + DateTime::StartOf(`tztimestamp`, Interval("PT13H")), + + DateTime::StartOf(`tztimestamp`, Interval("PT4H")), + DateTime::StartOf(`tztimestamp`, Interval("PT15M")), + DateTime::StartOf(`tztimestamp`, Interval("PT20S")), + DateTime::StartOf(`tztimestamp`, Interval("PT7S")), + DateTime::TimeOfDay(`tztimestamp`), + + DateTime::EndOfMonth(`tztimestamp`), +from @t; diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in new file mode 100644 index 000000000000..06d60295808c --- /dev/null +++ b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in @@ -0,0 +1,10 @@ +{"ftztimestamp"="1970-01-01T11:14:00.000000,GMT"}; +{"ftztimestamp"="1970-01-02T14:08:00.000000,GMT"}; +{"ftztimestamp"="1970-02-01T17:03:00.000000,GMT"}; +{"ftztimestamp"="1970-09-03T07:22:00.000000,GMT"}; +{"ftztimestamp"="1970-12-31T23:59:59.999999,GMT"}; +{"ftztimestamp"="1971-01-01T00:00:00.000000,GMT"}; +{"ftztimestamp"="1971-01-14T00:00:00.000000,GMT"}; +{"ftztimestamp"="1978-01-25T16:15:00.000000,Europe/Uzhgorod"}; +{"ftztimestamp"="2018-12-01T01:02:03.456789,Europe/Moscow"}; + diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in.attr b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in.attr new file mode 100644 index 000000000000..3915337be3c3 --- /dev/null +++ b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in.attr @@ -0,0 +1,17 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ]; + ] + ] + } +} + diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.sql b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.sql new file mode 100644 index 000000000000..3087d4e78d4d --- /dev/null +++ b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.sql @@ -0,0 +1,27 @@ +/* syntax version 1 */ +pragma UseBlocks; +insert into @t + select + cast(ftztimestamp as TzTimestamp) as `tm`, + from Input; + +commit; + +SELECT + DateTime::GetYear(tm) as ryear, + DateTime::GetDayOfYear(tm) as rdayofyear, + DateTime::GetMonth(tm) as rmonth, + DateTime::GetMonthName(tm) as rmonthname, + DateTime::GetWeekOfYear(tm) as rweekofyear, + DateTime::GetWeekOfYearIso8601(tm) as rweekofyeariso8601, + DateTime::GetDayOfMonth(tm) as rdayofmonth, + DateTime::GetDayOfWeek(tm) as rdayofweek, + DateTime::GetDayOfWeekName(tm) as rdayofweekname, + DateTime::GetHour(tm) as rhour, + DateTime::GetMinute(tm) as rminute, + DateTime::GetSecond(tm) as rsecond, + DateTime::GetMillisecondOfSecond(tm) as rmsec, + DateTime::GetMicrosecondOfSecond(tm) as rusec, + DateTime::GetTimezoneId(tm) as rtz, + DateTime::GetTimezoneName(tm) as rtzname +FROM @t; diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/StartOf.sql b/ydb/library/yql/udfs/common/datetime2/test/cases/StartOf.sql index 81fad1263285..201db382300c 100644 --- a/ydb/library/yql/udfs/common/datetime2/test/cases/StartOf.sql +++ b/ydb/library/yql/udfs/common/datetime2/test/cases/StartOf.sql @@ -12,7 +12,8 @@ select $format(DateTime::StartOf(`tztimestamp`, Interval("PT15M"))), $format(DateTime::StartOf(`tztimestamp`, Interval("PT20S"))), $format(DateTime::StartOf(`tztimestamp`, Interval("PT7S"))), - DateTime::TimeOfDay(`tztimestamp`) + DateTime::TimeOfDay(`tztimestamp`), + $format(DateTime::EndOfMonth(`tztimestamp`)), from ( select cast(ftztimestamp as TzTimestamp) as `tztimestamp` From ea9ad9239ca984e9c4929af264dffd5c38be5863 Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Mon, 1 Jul 2024 18:25:44 +0300 Subject: [PATCH 02/18] remove tests --- .../datetime2/test/canondata/result.json | 10 - .../test.test_BlockStartOf_/results.txt | 314 --------- .../test.test_BlockTmGet_/results.txt | 628 ------------------ .../datetime2/test/cases/BlockStartOf.attr | 0 .../datetime2/test/cases/BlockStartOf.in | 15 - .../datetime2/test/cases/BlockStartOf.in.attr | 17 - .../datetime2/test/cases/BlockStartOf.sql | 30 - .../common/datetime2/test/cases/BlockTmGet.in | 10 - .../datetime2/test/cases/BlockTmGet.in.attr | 17 - .../datetime2/test/cases/BlockTmGet.sql | 27 - .../common/datetime2/test/cases/StartOf.sql | 3 +- 11 files changed, 1 insertion(+), 1070 deletions(-) delete mode 100644 ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt delete mode 100644 ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt delete mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.attr delete mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in delete mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in.attr delete mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.sql delete mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in delete mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in.attr delete mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.sql diff --git a/ydb/library/yql/udfs/common/datetime2/test/canondata/result.json b/ydb/library/yql/udfs/common/datetime2/test/canondata/result.json index eaa37e5f488c..c8b2559c8b02 100644 --- a/ydb/library/yql/udfs/common/datetime2/test/canondata/result.json +++ b/ydb/library/yql/udfs/common/datetime2/test/canondata/result.json @@ -9,16 +9,6 @@ "uri": "file://test.test_BlockGet_/results.txt" } ], - "test.test[BlockStartOf]": [ - { - "uri": "file://test.test_BlockStartOf_/results.txt" - } - ], - "test.test[BlockTmGet]": [ - { - "uri": "file://test.test_BlockTmGet_/results.txt" - } - ], "test.test[BlockTo]": [ { "uri": "file://test.test_BlockTo_/results.txt" diff --git a/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt b/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt deleted file mode 100644 index 20890003833a..000000000000 --- a/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt +++ /dev/null @@ -1,314 +0,0 @@ -[ - { - "Write" = [ - { - "Type" = [ - "ListType"; - [ - "StructType"; - [ - [ - "column0"; - [ - "OptionalType"; - [ - "DataType"; - "TzTimestamp" - ] - ] - ]; - [ - "column1"; - [ - "OptionalType"; - [ - "DataType"; - "TzTimestamp" - ] - ] - ]; - [ - "column2"; - [ - "OptionalType"; - [ - "DataType"; - "TzTimestamp" - ] - ] - ]; - [ - "column3"; - [ - "OptionalType"; - [ - "DataType"; - "TzTimestamp" - ] - ] - ]; - [ - "column4"; - [ - "OptionalType"; - [ - "DataType"; - "TzTimestamp" - ] - ] - ]; - [ - "column5"; - [ - "OptionalType"; - [ - "DataType"; - "TzTimestamp" - ] - ] - ]; - [ - "column6"; - [ - "OptionalType"; - [ - "DataType"; - "TzTimestamp" - ] - ] - ]; - [ - "column7"; - [ - "OptionalType"; - [ - "DataType"; - "TzTimestamp" - ] - ] - ]; - [ - "column8"; - [ - "OptionalType"; - [ - "DataType"; - "TzTimestamp" - ] - ] - ]; - [ - "column9"; - [ - "OptionalType"; - [ - "DataType"; - "TzTimestamp" - ] - ] - ]; - [ - "column10"; - [ - "OptionalType"; - [ - "DataType"; - "Interval" - ] - ] - ]; - [ - "column11"; - [ - "OptionalType"; - [ - "DataType"; - "TzTimestamp" - ] - ] - ] - ] - ] - ]; - "Data" = [ - [ - #; - #; - #; - #; - #; - #; - [ - "1970-01-01T04:00:00,Europe/Moscow" - ]; - [ - "1970-01-01T05:00:00,Europe/Moscow" - ]; - [ - "1970-01-01T05:00:00,Europe/Moscow" - ]; - [ - "1970-01-01T04:59:57,Europe/Moscow" - ]; - [ - "18000000000" - ]; - [ - "1970-01-31T00:00:00,Europe/Moscow" - ] - ]; - [ - [ - "2018-01-01T00:00:00,Europe/Moscow" - ]; - [ - "2018-10-01T00:00:00,Europe/Moscow" - ]; - [ - "2018-12-01T00:00:00,Europe/Moscow" - ]; - [ - "2018-12-10T00:00:00,Europe/Moscow" - ]; - [ - "2018-12-15T00:00:00,Europe/Moscow" - ]; - [ - "2018-12-15T00:00:00,Europe/Moscow" - ]; - [ - "2018-12-15T00:00:00,Europe/Moscow" - ]; - [ - "2018-12-15T01:00:00,Europe/Moscow" - ]; - [ - "2018-12-15T01:02:00,Europe/Moscow" - ]; - [ - "2018-12-15T01:01:57,Europe/Moscow" - ]; - [ - "3723456789" - ]; - [ - "2018-12-31T00:00:00,Europe/Moscow" - ] - ]; - [ - [ - "2105-01-01T00:00:00,GMT" - ]; - [ - "2105-10-01T00:00:00,GMT" - ]; - [ - "2105-12-01T00:00:00,GMT" - ]; - [ - "2105-12-28T00:00:00,GMT" - ]; - [ - "2105-12-31T00:00:00,GMT" - ]; - [ - "2105-12-31T13:00:00,GMT" - ]; - [ - "2105-12-31T16:00:00,GMT" - ]; - [ - "2105-12-31T16:15:00,GMT" - ]; - [ - "2105-12-31T16:23:40,GMT" - ]; - [ - "2105-12-31T16:23:44,GMT" - ]; - [ - "59025000000" - ]; - [ - "2105-12-31T00:00:00,GMT" - ] - ]; - [ - [ - "2106-01-01T00:00:00,Europe/Moscow" - ]; - [ - "2106-01-01T00:00:00,Europe/Moscow" - ]; - [ - "2106-01-01T00:00:00,Europe/Moscow" - ]; - [ - "2105-12-28T00:00:00,Europe/Moscow" - ]; - [ - "2106-01-01T00:00:00,Europe/Moscow" - ]; - [ - "2106-01-01T00:00:00,Europe/Moscow" - ]; - [ - "2106-01-01T00:00:00,Europe/Moscow" - ]; - [ - "2106-01-01T01:00:00,Europe/Moscow" - ]; - [ - "2106-01-01T01:00:00,Europe/Moscow" - ]; - [ - "2106-01-01T00:59:58,Europe/Moscow" - ]; - [ - "3600000000" - ]; - # - ]; - [ - [ - "2019-01-01T00:00:00,Europe/Moscow" - ]; - [ - "2019-07-01T00:00:00,Europe/Moscow" - ]; - [ - "2019-07-01T00:00:00,Europe/Moscow" - ]; - [ - "2019-07-22T00:00:00,Europe/Moscow" - ]; - [ - "2019-07-24T00:00:00,Europe/Moscow" - ]; - [ - "2019-07-24T00:00:00,Europe/Moscow" - ]; - [ - "2019-07-24T12:00:00,Europe/Moscow" - ]; - [ - "2019-07-24T12:00:00,Europe/Moscow" - ]; - [ - "2019-07-24T12:00:00,Europe/Moscow" - ]; - [ - "2019-07-24T11:59:57,Europe/Moscow" - ]; - [ - "43200000000" - ]; - [ - "2019-07-31T00:00:00,Europe/Moscow" - ] - ] - ] - } - ] - } -] \ No newline at end of file diff --git a/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt b/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt deleted file mode 100644 index 262c45b59713..000000000000 --- a/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt +++ /dev/null @@ -1,628 +0,0 @@ -[ - { - "Write" = [ - { - "Type" = [ - "ListType"; - [ - "StructType"; - [ - [ - "ryear"; - [ - "OptionalType"; - [ - "DataType"; - "Uint16" - ] - ] - ]; - [ - "rdayofyear"; - [ - "OptionalType"; - [ - "DataType"; - "Uint16" - ] - ] - ]; - [ - "rmonth"; - [ - "OptionalType"; - [ - "DataType"; - "Uint8" - ] - ] - ]; - [ - "rmonthname"; - [ - "OptionalType"; - [ - "DataType"; - "String" - ] - ] - ]; - [ - "rweekofyear"; - [ - "OptionalType"; - [ - "DataType"; - "Uint8" - ] - ] - ]; - [ - "rweekofyeariso8601"; - [ - "OptionalType"; - [ - "DataType"; - "Uint8" - ] - ] - ]; - [ - "rdayofmonth"; - [ - "OptionalType"; - [ - "DataType"; - "Uint8" - ] - ] - ]; - [ - "rdayofweek"; - [ - "OptionalType"; - [ - "DataType"; - "Uint8" - ] - ] - ]; - [ - "rdayofweekname"; - [ - "OptionalType"; - [ - "DataType"; - "String" - ] - ] - ]; - [ - "rhour"; - [ - "OptionalType"; - [ - "DataType"; - "Uint8" - ] - ] - ]; - [ - "rminute"; - [ - "OptionalType"; - [ - "DataType"; - "Uint8" - ] - ] - ]; - [ - "rsecond"; - [ - "OptionalType"; - [ - "DataType"; - "Uint8" - ] - ] - ]; - [ - "rmsec"; - [ - "OptionalType"; - [ - "DataType"; - "Uint32" - ] - ] - ]; - [ - "rusec"; - [ - "OptionalType"; - [ - "DataType"; - "Uint32" - ] - ] - ]; - [ - "rtz"; - [ - "OptionalType"; - [ - "DataType"; - "Uint16" - ] - ] - ]; - [ - "rtzname"; - [ - "OptionalType"; - [ - "DataType"; - "String" - ] - ] - ] - ] - ] - ]; - "Data" = [ - [ - [ - "1970" - ]; - [ - "1" - ]; - [ - "1" - ]; - [ - "January" - ]; - [ - "1" - ]; - [ - "1" - ]; - [ - "1" - ]; - [ - "4" - ]; - [ - "Thursday" - ]; - [ - "11" - ]; - [ - "14" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "GMT" - ] - ]; - [ - [ - "1970" - ]; - [ - "2" - ]; - [ - "1" - ]; - [ - "January" - ]; - [ - "1" - ]; - [ - "1" - ]; - [ - "2" - ]; - [ - "5" - ]; - [ - "Friday" - ]; - [ - "14" - ]; - [ - "8" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "GMT" - ] - ]; - [ - [ - "1970" - ]; - [ - "32" - ]; - [ - "2" - ]; - [ - "February" - ]; - [ - "5" - ]; - [ - "5" - ]; - [ - "1" - ]; - [ - "7" - ]; - [ - "Sunday" - ]; - [ - "17" - ]; - [ - "3" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "GMT" - ] - ]; - [ - [ - "1970" - ]; - [ - "246" - ]; - [ - "9" - ]; - [ - "September" - ]; - [ - "36" - ]; - [ - "36" - ]; - [ - "3" - ]; - [ - "4" - ]; - [ - "Thursday" - ]; - [ - "7" - ]; - [ - "22" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "GMT" - ] - ]; - [ - [ - "1970" - ]; - [ - "365" - ]; - [ - "12" - ]; - [ - "December" - ]; - [ - "53" - ]; - [ - "53" - ]; - [ - "31" - ]; - [ - "4" - ]; - [ - "Thursday" - ]; - [ - "23" - ]; - [ - "59" - ]; - [ - "59" - ]; - [ - "999" - ]; - [ - "999999" - ]; - [ - "0" - ]; - [ - "GMT" - ] - ]; - [ - [ - "1971" - ]; - [ - "1" - ]; - [ - "1" - ]; - [ - "January" - ]; - [ - "1" - ]; - [ - "53" - ]; - [ - "1" - ]; - [ - "5" - ]; - [ - "Friday" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "GMT" - ] - ]; - [ - [ - "1971" - ]; - [ - "14" - ]; - [ - "1" - ]; - [ - "January" - ]; - [ - "3" - ]; - [ - "2" - ]; - [ - "14" - ]; - [ - "4" - ]; - [ - "Thursday" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "GMT" - ] - ]; - [ - [ - "1978" - ]; - [ - "25" - ]; - [ - "1" - ]; - [ - "January" - ]; - [ - "5" - ]; - [ - "4" - ]; - [ - "25" - ]; - [ - "3" - ]; - [ - "Wednesday" - ]; - [ - "16" - ]; - [ - "15" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "0" - ]; - [ - "477" - ]; - [ - "Europe/Uzhgorod" - ] - ]; - [ - [ - "2018" - ]; - [ - "335" - ]; - [ - "12" - ]; - [ - "December" - ]; - [ - "48" - ]; - [ - "48" - ]; - [ - "1" - ]; - [ - "6" - ]; - [ - "Saturday" - ]; - [ - "1" - ]; - [ - "2" - ]; - [ - "3" - ]; - [ - "456" - ]; - [ - "456789" - ]; - [ - "1" - ]; - [ - "Europe/Moscow" - ] - ] - ] - } - ] - } -] \ No newline at end of file diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.attr b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.attr deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in deleted file mode 100644 index f482585e7204..000000000000 --- a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in +++ /dev/null @@ -1,15 +0,0 @@ -{ - "ftztimestamp"="1970-01-01T05:00:00.000000,Europe/Moscow" -}; -{ - "ftztimestamp"="2018-12-15T01:02:03.456789,Europe/Moscow" -}; -{ - "ftztimestamp"="2105-12-31T16:23:45.000000,GMT" -}; -{ - "ftztimestamp"="2106-01-01T01:00:00.000000,Europe/Moscow" -}; -{ - "ftztimestamp"="2019-07-24T12:00:00,Europe/Moscow" -}; diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in.attr b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in.attr deleted file mode 100644 index 3915337be3c3..000000000000 --- a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.in.attr +++ /dev/null @@ -1,17 +0,0 @@ -{ - "_yql_row_spec" = { - "Type" = [ - "StructType"; - [ - [ - "ftztimestamp"; - [ - "DataType"; - "String" - ] - ]; - ] - ] - } -} - diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.sql b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.sql deleted file mode 100644 index e531d6f1c8d6..000000000000 --- a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockStartOf.sql +++ /dev/null @@ -1,30 +0,0 @@ -/* syntax version 1 */ -pragma UseBlocks; -insert into @t - select - cast(ftztimestamp as TzTimestamp) as `tztimestamp`, - from Input; - -commit; - -select - DateTime::StartOfYear(`tztimestamp`), - - DateTime::StartOfQuarter(`tztimestamp`), - - DateTime::StartOfMonth(`tztimestamp`), - - DateTime::StartOfWeek(`tztimestamp`), - - DateTime::StartOfDay(`tztimestamp`), - - DateTime::StartOf(`tztimestamp`, Interval("PT13H")), - - DateTime::StartOf(`tztimestamp`, Interval("PT4H")), - DateTime::StartOf(`tztimestamp`, Interval("PT15M")), - DateTime::StartOf(`tztimestamp`, Interval("PT20S")), - DateTime::StartOf(`tztimestamp`, Interval("PT7S")), - DateTime::TimeOfDay(`tztimestamp`), - - DateTime::EndOfMonth(`tztimestamp`), -from @t; diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in deleted file mode 100644 index 06d60295808c..000000000000 --- a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in +++ /dev/null @@ -1,10 +0,0 @@ -{"ftztimestamp"="1970-01-01T11:14:00.000000,GMT"}; -{"ftztimestamp"="1970-01-02T14:08:00.000000,GMT"}; -{"ftztimestamp"="1970-02-01T17:03:00.000000,GMT"}; -{"ftztimestamp"="1970-09-03T07:22:00.000000,GMT"}; -{"ftztimestamp"="1970-12-31T23:59:59.999999,GMT"}; -{"ftztimestamp"="1971-01-01T00:00:00.000000,GMT"}; -{"ftztimestamp"="1971-01-14T00:00:00.000000,GMT"}; -{"ftztimestamp"="1978-01-25T16:15:00.000000,Europe/Uzhgorod"}; -{"ftztimestamp"="2018-12-01T01:02:03.456789,Europe/Moscow"}; - diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in.attr b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in.attr deleted file mode 100644 index 3915337be3c3..000000000000 --- a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.in.attr +++ /dev/null @@ -1,17 +0,0 @@ -{ - "_yql_row_spec" = { - "Type" = [ - "StructType"; - [ - [ - "ftztimestamp"; - [ - "DataType"; - "String" - ] - ]; - ] - ] - } -} - diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.sql b/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.sql deleted file mode 100644 index 3087d4e78d4d..000000000000 --- a/ydb/library/yql/udfs/common/datetime2/test/cases/BlockTmGet.sql +++ /dev/null @@ -1,27 +0,0 @@ -/* syntax version 1 */ -pragma UseBlocks; -insert into @t - select - cast(ftztimestamp as TzTimestamp) as `tm`, - from Input; - -commit; - -SELECT - DateTime::GetYear(tm) as ryear, - DateTime::GetDayOfYear(tm) as rdayofyear, - DateTime::GetMonth(tm) as rmonth, - DateTime::GetMonthName(tm) as rmonthname, - DateTime::GetWeekOfYear(tm) as rweekofyear, - DateTime::GetWeekOfYearIso8601(tm) as rweekofyeariso8601, - DateTime::GetDayOfMonth(tm) as rdayofmonth, - DateTime::GetDayOfWeek(tm) as rdayofweek, - DateTime::GetDayOfWeekName(tm) as rdayofweekname, - DateTime::GetHour(tm) as rhour, - DateTime::GetMinute(tm) as rminute, - DateTime::GetSecond(tm) as rsecond, - DateTime::GetMillisecondOfSecond(tm) as rmsec, - DateTime::GetMicrosecondOfSecond(tm) as rusec, - DateTime::GetTimezoneId(tm) as rtz, - DateTime::GetTimezoneName(tm) as rtzname -FROM @t; diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/StartOf.sql b/ydb/library/yql/udfs/common/datetime2/test/cases/StartOf.sql index 201db382300c..81fad1263285 100644 --- a/ydb/library/yql/udfs/common/datetime2/test/cases/StartOf.sql +++ b/ydb/library/yql/udfs/common/datetime2/test/cases/StartOf.sql @@ -12,8 +12,7 @@ select $format(DateTime::StartOf(`tztimestamp`, Interval("PT15M"))), $format(DateTime::StartOf(`tztimestamp`, Interval("PT20S"))), $format(DateTime::StartOf(`tztimestamp`, Interval("PT7S"))), - DateTime::TimeOfDay(`tztimestamp`), - $format(DateTime::EndOfMonth(`tztimestamp`)), + DateTime::TimeOfDay(`tztimestamp`) from ( select cast(ftztimestamp as TzTimestamp) as `tztimestamp` From 01995381eba39261b83273f4c5e5649f1771a30d Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Wed, 3 Jul 2024 08:35:30 +0300 Subject: [PATCH 03/18] value builder --- .../yql/public/udf/arrow/udf_arrow_helpers.h | 39 +++------- .../yql/udfs/common/string/string_udf.cpp | 72 +++++++++---------- .../udfs/common/url_base/lib/url_base_udf.h | 42 +++++------ 3 files changed, 68 insertions(+), 85 deletions(-) diff --git a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h index f3f869b3a08e..cace7cf945e8 100644 --- a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h +++ b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h @@ -357,15 +357,6 @@ TReader* CastToBlockReaderImpl(IBlockReader& reader) { template struct TUnaryKernelExec { -template -static void Process(const TBlockItem& arg, TUdfKernelState& state, const TSink& sink) { - if constexpr (std::is_invocable_v), TBlockItem, TUdfKernelState&, TSink&>) { - TDerived::Process(arg, state, sink); - } else { - TDerived::Process(arg, sink); - } -} - static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) { auto& state = dynamic_cast(*ctx->state()); auto& reader = state.GetReader(0); @@ -377,7 +368,7 @@ static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute auto* builderImpl = CastToScalarBuilderImpl(builder); auto item = readerImpl->GetScalarItem(*arg.scalar()); - TDerived::Process(item, [&](TBlockItem out) { + TDerived::Process(item, state.GetValueBuilder(), [&](auto out) { *res = builderImpl->Build(out); }); } @@ -392,7 +383,7 @@ static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute for (int64_t i = 0; i < array.length;) { for (size_t j = 0; j < maxBlockLength && i < array.length; ++j, ++i) { auto item = readerImpl->GetItem(array, i); - TDerived::Process(item, [&](TBlockItem out) { + TDerived::Process(item, state.GetValueBuilder(), [&](auto out) { builderImpl->Add(out); }); } @@ -409,15 +400,6 @@ static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute template struct TBinaryKernelExec { - template - static void Process(const TBlockItem& arg1, const TBlockItem& arg2, TUdfKernelState& state, const TSink& sink) { - if constexpr (std::is_invocable_v), TBlockItem, TBlockItem, TUdfKernelState&, TSink&>) { - TDerived::Process(arg1, arg2, state, sink); - } else { - TDerived::Process(arg1, arg2, sink); - } - } - static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) { auto& state = dynamic_cast(*ctx->state()); @@ -435,7 +417,8 @@ struct TBinaryKernelExec { auto item1 = reader1Impl->GetScalarItem(*arg1.scalar()); auto item2 = reader2Impl->GetScalarItem(*arg2.scalar()); - TDerived::Process(item1, item2, [&](TBlockItem out) { + + TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { *res = builderImpl->Build(out); }); } @@ -451,7 +434,7 @@ struct TBinaryKernelExec { for (int64_t i = 0; i < array2.length;) { for (size_t j = 0; j < maxBlockLength && i < array2.length; ++j, ++i) { auto item2 = reader2Impl->GetItem(array2, i); - TDerived::Process(item1, item2, [&](TBlockItem out) { + TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { builderImpl->Add(out); }); } @@ -472,7 +455,7 @@ struct TBinaryKernelExec { for (int64_t i = 0; i < array1.length;) { for (size_t j = 0; j < maxBlockLength && i < array1.length; ++j, ++i) { auto item1 = reader1Impl->GetItem(array1, i); - TDerived::Process(item1, item2, [&](TBlockItem out) { + TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { builderImpl->Add(out); }); } @@ -494,9 +477,9 @@ struct TBinaryKernelExec { Y_ENSURE(array1.length == array2.length); for (int64_t i = 0; i < array1.length;) { for (size_t j = 0; j < maxBlockLength && i < array1.length; ++j, ++i) { - auto item1 = reader1.GetItem(array1, i); - auto item2 = reader2.GetItem(array2, i); - TDerived::Process(item1, item2, [&](TBlockItem out) { + auto item1 = reader1Impl->GetItem(array1, i); + auto item2 = reader2Impl->GetItem(array2, i); + TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { builderImpl->Add(out); }); } @@ -553,7 +536,7 @@ struct TGenericKernelExec { auto& reader = state.GetReader(k); args[k] = reader.GetScalarItem(*batch[k].scalar()); } - TDerived::Process(items, [&](TBlockItem out) { + TDerived::Process(items, state.GetValueBuilder(), [&](TBlockItem out) { *res = builderImpl->Build(out); }); } else { @@ -583,7 +566,7 @@ struct TGenericKernelExec { args[k] = reader.GetItem(*batch[k].array(), i); } - TDerived::Process(items, [&](TBlockItem out) { + TDerived::Process(items, state.GetValueBuilder(), [&](TBlockItem out) { builderImpl->Add(out); }); } diff --git a/ydb/library/yql/udfs/common/string/string_udf.cpp b/ydb/library/yql/udfs/common/string/string_udf.cpp index 6ee4bf145605..3b1b95ca0757 100644 --- a/ydb/library/yql/udfs/common/string/string_udf.cpp +++ b/ydb/library/yql/udfs/common/string/string_udf.cpp @@ -31,24 +31,24 @@ using namespace NUdf; namespace { -#define STRING_UDF(udfName, function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ - const TString input(args[0].AsStringRef()); \ - const auto& result = function(input); \ - return valueBuilder->NewString(result); \ - } \ - \ - struct T##udfName##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const TSink& sink) { \ - const TString input(arg1.AsStringRef()); \ - const auto& result = function(input); \ - sink(TBlockItem(result)); \ - } \ - }; \ - \ +#define STRING_UDF(udfName, function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ + const TString input(args[0].AsStringRef()); \ + const auto& result = function(input); \ + return valueBuilder->NewString(result); \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { \ + const TString input(arg1.AsStringRef()); \ + const auto& result = function(input); \ + sink(TBlockItem(result)); \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) @@ -69,7 +69,7 @@ namespace { : public TUnaryKernelExec \ { \ template \ - static void Process(TBlockItem arg1, const TSink& sink) { \ + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { \ if (!arg1) { \ return sink(TBlockItem()); \ } \ @@ -126,7 +126,7 @@ namespace { : public TUnaryKernelExec \ { \ template \ - static void Process(TBlockItem arg1, const TSink& sink) { \ + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { \ TString input(arg1.AsStringRef()); \ if (input.function()) { \ sink(TBlockItem(input)); \ @@ -185,7 +185,7 @@ namespace { : public TUnaryKernelExec \ { \ template \ - static void Process(TBlockItem arg1, const TSink& sink) { \ + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { \ if (arg1) { \ const TStringBuf input(arg1.AsStringRef()); \ bool result = true; \ @@ -231,7 +231,7 @@ namespace { : public TGenericKernelExec \ { \ template \ - static void Process(TBlockItem args, const TSink& sink) { \ + static void Process(TBlockItem args, const IValueBuilder&, const TSink& sink) { \ TStringStream result; \ const TStringBuf input(args.GetElement(0).AsStringRef()); \ char paddingSymbol = ' '; \ @@ -264,7 +264,7 @@ namespace { : public TUnaryKernelExec \ { \ template \ - static void Process(TBlockItem arg1, const TSink& sink) { \ + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { \ TStringStream result; \ result << function(arg1.Get()); \ sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ @@ -285,7 +285,7 @@ namespace { : public TUnaryKernelExec \ { \ template \ - static void Process(TBlockItem arg1, const TSink& sink) { \ + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { \ TStringStream result; \ const TStringBuf input(arg1.AsStringRef()); \ result << function(input); \ @@ -307,7 +307,7 @@ namespace { : public TUnaryKernelExec \ { \ template \ - static void Process(TBlockItem arg1, const TSink& sink) { \ + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { \ TStringStream result; \ result << HumanReadableSize(arg1.Get(), hrSize); \ sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ @@ -414,7 +414,7 @@ namespace { : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { TString input(arg1.AsStringRef()); ui64 maxLength = arg2.Get(); CollapseText(input, maxLength); @@ -437,7 +437,7 @@ namespace { struct TContainsKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { if (!arg1) return sink(TBlockItem(false)); @@ -461,7 +461,7 @@ namespace { : public TGenericKernelExec { template - static void Process(TBlockItem args, const TSink& sink) { + static void Process(TBlockItem args, const IValueBuilder&, const TSink& sink) { TString result(args.GetElement(0).AsStringRef()); const TStringBuf what(args.GetElement(1).AsStringRef()); const TStringBuf with(args.GetElement(2).AsStringRef()); @@ -490,7 +490,7 @@ namespace { : public TGenericKernelExec { template - static void Process(TBlockItem args, const TSink& sink) { + static void Process(TBlockItem args, const IValueBuilder&, const TSink& sink) { std::string result(args.GetElement(0).AsStringRef()); const std::string_view what(args.GetElement(1).AsStringRef()); const std::string_view with(args.GetElement(2).AsStringRef()); @@ -519,7 +519,7 @@ namespace { : public TGenericKernelExec { template - static void Process(TBlockItem args, const TSink& sink) { + static void Process(TBlockItem args, const IValueBuilder&, const TSink& sink) { std::string result(args.GetElement(0).AsStringRef()); const std::string_view what(args.GetElement(1).AsStringRef()); const std::string_view with(args.GetElement(2).AsStringRef()); @@ -558,7 +558,7 @@ namespace { : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { std::string input(arg1.AsStringRef()); const std::string_view remove(arg2.AsStringRef()); std::array chars{}; @@ -602,7 +602,7 @@ namespace { : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { std::string input(arg1.AsStringRef()); const std::string_view remove(arg2.AsStringRef()); std::array chars{}; @@ -642,7 +642,7 @@ namespace { : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { std::string input(arg1.AsStringRef()); const std::string_view remove(arg2.AsStringRef()); std::array chars{}; @@ -789,7 +789,7 @@ namespace { struct TLevensteinDistanceKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { const std::string_view left(arg1.AsStringRef()); const std::string_view right(arg2.AsStringRef()); const ui64 result = NLevenshtein::Distance(left, right); @@ -811,7 +811,7 @@ namespace { : public TUnaryKernelExec { template - static void Process(TBlockItem arg1, const TSink& sink) { + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { TStringStream result; result << HumanReadable(TDuration::MicroSeconds(arg1.Get())); sink(TBlockItem(TStringRef(result.Data(), result.Size()))); @@ -829,7 +829,7 @@ namespace { struct TPrecKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { TStringStream result; result << Prec(arg1.Get(), arg2.Get()); sink(TBlockItem(TStringRef(result.Data(), result.Size()))); diff --git a/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h b/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h index d80bc065f86e..89d673ff82b8 100644 --- a/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h +++ b/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h @@ -36,7 +36,7 @@ inline bool PrepareUrl(const std::string_view& keyStr, TUri& parser) { } \ struct udfName##KernelExec : public TUnaryKernelExec { \ template \ - static void Process(TBlockItem arg, const TSink& sink) { \ + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { \ if (!arg) { \ return sink(TBlockItem()); \ } \ @@ -60,7 +60,7 @@ BEGIN_SIMPLE_ARROW_UDF(TNormalize, TOptional(TOptional)) { } struct TNormalizeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -81,7 +81,7 @@ BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetScheme, char*(TAutoMap)) { } struct TGetSchemeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { const std::string_view url(arg.AsStringRef()); const std::string_view prefix(GetSchemePrefix(url)); const std::string_view scheme = url.substr(std::distance(url.begin(), prefix.begin()), prefix.size()); @@ -124,7 +124,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetPort, TOptional(TOptional)) { } struct TGetPortKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -152,7 +152,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetTail, TOptional(TOptional)) { } struct TGetTailKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -186,7 +186,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetPath, TOptional(TOptional)) { } struct TGetPathKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -216,7 +216,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetFragment, TOptional(TOptional)) { } struct TGetFragmentKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -256,7 +256,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetDomain, TOptional(TOptional, ui8)) { } struct TGetDomainKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { if (!arg1) { return sink(TBlockItem()); } @@ -276,7 +276,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetTLD, char*(TAutoMap)) { } struct TGetTLDKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { const TStringBuf url(arg.AsStringRef()); return sink(TBlockItem(GetZone(GetOnlyHost(url)))); } @@ -291,7 +291,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetDomainLevel, ui64(TAutoMap)) { } struct TGetDomainLevelKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { std::vector parts; StringSplitter(GetOnlyHost(arg.AsStringRef())).Split('.').AddTo(&parts); return sink(TBlockItem(ui64(parts.size()))); @@ -360,7 +360,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetCGIParam, TOptional(TOptional, char*)) } struct TGetCGIParamKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { if (!arg1) { return sink(TBlockItem()); } @@ -387,7 +387,7 @@ BEGIN_SIMPLE_ARROW_UDF(TCutQueryStringAndFragment, char*(TAutoMap)) { } struct TCutQueryStringAndFragmentKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { const std::string_view input(arg.AsStringRef()); const auto cut = input.find_first_of("?#"); sink(TBlockItem(arg.AsStringRef().Substring(0U, cut))); @@ -407,7 +407,7 @@ BEGIN_SIMPLE_ARROW_UDF(TEncode, TOptional(TOptional)) { } struct TEncodeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -435,7 +435,7 @@ BEGIN_SIMPLE_ARROW_UDF(TDecode, TOptional(TOptional)) { } struct TDecodeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -457,7 +457,7 @@ BEGIN_SIMPLE_ARROW_UDF(TIsKnownTLD, bool(TAutoMap)) { } struct TIsKnownTLDKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { sink(TBlockItem(static_cast(IsTld(arg.AsStringRef())))); } }; @@ -469,7 +469,7 @@ BEGIN_SIMPLE_ARROW_UDF(TIsWellKnownTLD, bool(TAutoMap)) { } struct TIsWellKnownTLDKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { sink(TBlockItem(static_cast(IsVeryGoodTld(arg.AsStringRef())))); } }; @@ -483,7 +483,7 @@ BEGIN_SIMPLE_ARROW_UDF(THostNameToPunycode, TOptional(TAutoMap)) t } struct THostNameToPunycodeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) try { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) try { const TUtf16String& input = UTF8ToWide(arg.AsStringRef()); return sink(TBlockItem(TStringRef(HostNameToPunycode(input)))); } catch (TPunycodeError&) { @@ -498,7 +498,7 @@ BEGIN_SIMPLE_ARROW_UDF(TForceHostNameToPunycode, char*(TAutoMap)) { } struct TForceHostNameToPunycodeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { const TUtf16String& input = UTF8ToWide(arg.AsStringRef()); sink(TBlockItem(TStringRef(ForceHostNameToPunycode(input)))); } @@ -514,7 +514,7 @@ BEGIN_SIMPLE_ARROW_UDF(TPunycodeToHostName, TOptional(TAutoMap)) t } struct TPunycodeToHostNameKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) try { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) try { const TStringRef& input = arg.AsStringRef(); const auto& result = WideToUTF8(PunycodeToHostName(input)); return sink(TBlockItem(TStringRef(result))); @@ -531,7 +531,7 @@ BEGIN_SIMPLE_ARROW_UDF(TForcePunycodeToHostName, char*(TAutoMap)) { } struct TForcePunycodeToHostNameKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { const TStringRef& input = arg.AsStringRef(); const auto& result = WideToUTF8(ForcePunycodeToHostName(input)); sink(TBlockItem(TStringRef(result))); @@ -545,7 +545,7 @@ BEGIN_SIMPLE_ARROW_UDF(TCanBePunycodeHostName, bool(TAutoMap)) { } struct TCanBePunycodeHostNameKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const TSink& sink) { + static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { sink(TBlockItem(static_cast(CanBePunycodeHostName(arg.AsStringRef())))); } }; From e25069e53ed89f6d811478f93e03dad7b1770f71 Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Wed, 3 Jul 2024 10:38:30 +0300 Subject: [PATCH 04/18] remove datetime modifications --- .../yql/public/udf/arrow/udf_arrow_helpers.h | 2 +- .../udfs/common/datetime2/datetime_udf.cpp | 178 ++++-------------- 2 files changed, 42 insertions(+), 138 deletions(-) diff --git a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h index cace7cf945e8..4bef86700eab 100644 --- a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h +++ b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h @@ -384,7 +384,7 @@ static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute for (size_t j = 0; j < maxBlockLength && i < array.length; ++j, ++i) { auto item = readerImpl->GetItem(array, i); TDerived::Process(item, state.GetValueBuilder(), [&](auto out) { - builderImpl->Add(out); + builder.Add(out); }); } auto outputDatum = builderImpl->Build(false); diff --git a/ydb/library/yql/udfs/common/datetime2/datetime_udf.cpp b/ydb/library/yql/udfs/common/datetime2/datetime_udf.cpp index 18a1e6c1dd12..90e87a99660f 100644 --- a/ydb/library/yql/udfs/common/datetime2/datetime_udf.cpp +++ b/ydb/library/yql/udfs/common/datetime2/datetime_udf.cpp @@ -389,38 +389,28 @@ TTMStorage& Reference(NUdf::TUnboxedValuePod& value) { return *reinterpret_cast(value.GetRawPtr()); } -template -TValue DoAddMonths(const TValue& date, i64 months, const NUdf::IDateBuilder& builder) { +NUdf::TUnboxedValuePod DoAddMonths(const NUdf::TUnboxedValuePod& date, i64 months, const NUdf::IDateBuilder& builder) { auto result = date; auto& storage = Reference(result); if (!NYql::DateTime::DoAddMonths(storage, months, builder)) { - return TValue{}; + return NUdf::TUnboxedValuePod{}; } return result; } - -template -TValue DoAddQuarters(const TValue& date, i64 quarters, const NUdf::IDateBuilder& builder) { - return DoAddMonths(date, quarters * 3ll, builder); -} - -template -TValue DoAddYears(const TValue& date, i64 years, const NUdf::IDateBuilder& builder) { +NUdf::TUnboxedValuePod DoAddYears(const NUdf::TUnboxedValuePod& date, i64 years, const NUdf::IDateBuilder& builder) { auto result = date; auto& storage = Reference(result); if (!NYql::DateTime::DoAddYears(storage, years, builder)) { - return TValue{}; + return NUdf::TUnboxedValuePod{}; } return result; } #define ACCESSORS(field, type) \ - template \ - inline type Get##field(const TValue& tm) { \ + inline type Get##field(const TUnboxedValuePod& tm) { \ return (type)Reference(tm).field; \ } \ - template \ - Y_DECLARE_UNUSED inline void Set##field(TValue& tm, type value) { \ + Y_DECLARE_UNUSED inline void Set##field(TUnboxedValuePod& tm, type value) { \ Reference(tm).field = value; \ } @@ -562,24 +552,6 @@ TValue DoAddYears(const TValue& date, i64 years, const NUdf::IDateBuilder& build IFunctionTypeInfoBuilder& builder, bool typesOnly) { - const auto typeInfoHelper = builder.TypeInfoHelper(); - - TTupleTypeInspector tuple(*typeInfoHelper, userType); - Y_ENSURE(tuple); - Y_ENSURE(tuple.GetElementsCount() > 0); - TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); - Y_ENSURE(argsTuple); - - if (argsTuple.GetElementsCount() != 1) { - builder.SetError("Expected one argument"); - return true; - } - auto argType = argsTuple.GetElementType(0); - TVector argBlockTypes; - argBlockTypes.push_back(argType); - - TBlockTypeInspector block(*typeInfoHelper, argType); - builder.UserType(userType); builder.Args()->Add().Flags(ICallablePayload::TArgumentFlags::AutoMap); builder.Returns(builder.Resource(TMResourceName)); @@ -752,124 +724,68 @@ TValue DoAddYears(const TValue& date, i64 years, const NUdf::IDateBuilder& build // Get* - #define GET_METHOD(field, type) \ - struct TGet##field##KernelExec : TUnaryKernelExec, TFixedSizeArrayBuilder> { \ - template \ - static void Process(TBlockItem item, TSink& sink) { \ - sink(TBlockItem(Get##field(item))); \ - } \ - }; \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(TGet##field, type(TAutoMap>)) { \ + SIMPLE_STRICT_UDF(TGet##field, type(TAutoMap>)) { \ Y_UNUSED(valueBuilder); \ return TUnboxedValuePod(Get##field(args[0])); \ - } \ - END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGet##field, TGet##field##KernelExec::Do, arrow::compute::NullHandling::INTERSECTION); + } GET_METHOD(Year, ui16) GET_METHOD(DayOfYear, ui16) GET_METHOD(Month, ui8) - - template - TValue GetMonthNameValue(size_t idx) { - static const std::array monthNames = {{ - TValue::Embedded(TStringRef::Of("January")), - TValue::Embedded(TStringRef::Of("February")), - TValue::Embedded(TStringRef::Of("March")), - TValue::Embedded(TStringRef::Of("April")), - TValue::Embedded(TStringRef::Of("May")), - TValue::Embedded(TStringRef::Of("June")), - TValue::Embedded(TStringRef::Of("July")), - TValue::Embedded(TStringRef::Of("August")), - TValue::Embedded(TStringRef::Of("September")), - TValue::Embedded(TStringRef::Of("October")), - TValue::Embedded(TStringRef::Of("November")), - TValue::Embedded(TStringRef::Of("December")) - }}; - return monthNames.at(idx); - } - - struct TGetMonthNameKernelExec : TUnaryKernelExec, TStringArrayBuilder> { - template - static void Process(TBlockItem item, TSink& sink) { - sink(GetMonthNameValue(GetMonth(item) - 1U)); - } - }; - BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetMonthName, char*(TAutoMap>)) { + SIMPLE_STRICT_UDF(TGetMonthName, char*(TAutoMap>)) { Y_UNUSED(valueBuilder); - return GetMonthNameValue(GetMonth(*args) - 1U); + static const std::array monthNames = {{ + TUnboxedValuePod::Embedded(TStringRef::Of("January")), + TUnboxedValuePod::Embedded(TStringRef::Of("February")), + TUnboxedValuePod::Embedded(TStringRef::Of("March")), + TUnboxedValuePod::Embedded(TStringRef::Of("April")), + TUnboxedValuePod::Embedded(TStringRef::Of("May")), + TUnboxedValuePod::Embedded(TStringRef::Of("June")), + TUnboxedValuePod::Embedded(TStringRef::Of("July")), + TUnboxedValuePod::Embedded(TStringRef::Of("August")), + TUnboxedValuePod::Embedded(TStringRef::Of("September")), + TUnboxedValuePod::Embedded(TStringRef::Of("October")), + TUnboxedValuePod::Embedded(TStringRef::Of("November")), + TUnboxedValuePod::Embedded(TStringRef::Of("December")) + }}; + return monthNames.at(GetMonth(*args) - 1U); } - END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetMonthName, TGetMonthNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); GET_METHOD(WeekOfYear, ui8) GET_METHOD(WeekOfYearIso8601, ui8) - struct TGetDayOfMonthKernelExec : TUnaryKernelExec, TFixedSizeArrayBuilder> { - template - static void Process(TBlockItem item, TSink& sink) { - sink(GetDay(item)); - } - }; - - BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfMonth, ui8(TAutoMap>)) { + SIMPLE_STRICT_UDF(TGetDayOfMonth, ui8(TAutoMap>)) { Y_UNUSED(valueBuilder); return TUnboxedValuePod(GetDay(args[0])); } - END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfMonth, TGetDayOfMonthKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); GET_METHOD(DayOfWeek, ui8) - template - TValue GetDayNameValue(size_t idx) { - static const std::array dayNames = {{ - TValue::Embedded(TStringRef::Of("Monday")), - TValue::Embedded(TStringRef::Of("Tuesday")), - TValue::Embedded(TStringRef::Of("Wednesday")), - TValue::Embedded(TStringRef::Of("Thursday")), - TValue::Embedded(TStringRef::Of("Friday")), - TValue::Embedded(TStringRef::Of("Saturday")), - TValue::Embedded(TStringRef::Of("Sunday")) - }}; - return dayNames.at(idx); - } - - struct TGetDayOfWeekNameKernelExec : TUnaryKernelExec, TStringArrayBuilder> { - template - static void Process(TBlockItem item, TSink& sink) { - sink(GetDayNameValue(GetDayOfWeek(item) - 1U)); - } - }; - - BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfWeekName, char*(TAutoMap>)) { + SIMPLE_STRICT_UDF(TGetDayOfWeekName, char*(TAutoMap>)) { Y_UNUSED(valueBuilder); - return GetDayNameValue(GetDayOfWeek(*args) - 1U); + static const std::array dayNames = {{ + TUnboxedValuePod::Embedded(TStringRef::Of("Monday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Tuesday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Wednesday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Thursday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Friday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Saturday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Sunday")) + }}; + return dayNames.at(GetDayOfWeek(*args) - 1U); } - END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfWeekName, TGetDayOfWeekNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); GET_METHOD(TimezoneId, ui16) - struct TTGetTimezoneNameKernelExec : TUnaryKernelExec, TStringArrayBuilder> { - template - static void Process(TBlockItem item, TUdfKernelState& state, TSink& sink) { - auto timezoneId = GetTimezoneId(item); - if (timezoneId >= NUdf::GetTimezones().size()) { - sink(TBlockItem{}); - } else { - auto str = state.GetValueBuilder().NewString(NUdf::GetTimezones()[timezoneId]); - sink(str); - } - } - }; - - BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetTimezoneName, char*(TAutoMap>)) { + SIMPLE_STRICT_UDF(TGetTimezoneName, char*(TAutoMap>)) { auto timezoneId = GetTimezoneId(args[0]); if (timezoneId >= NUdf::GetTimezones().size()) { return TUnboxedValuePod(); } return valueBuilder->NewString(NUdf::GetTimezones()[timezoneId]); } - END_SIMPLE_ARROW_UDF(TGetTimezoneName, TTGetTimezoneNameKernelExec::Do); // Update @@ -1241,32 +1157,20 @@ TValue DoAddYears(const TValue& date, i64 years, const NUdf::IDateBuilder& build auto& storage = Reference(args[0]); return TUnboxedValuePod((i64)storage.ToTimeOfDay()); } - END_SIMPLE_ARROW_UDF(TTimeOfDay, timeOfDayKernelExecDo); // Add ... - template - struct TAddKernelExec : TBinaryKernelExec> { - template - static void Process(TBlockItem date, TBlockItem arg, TUdfKernelState& state, TSink& sink) { - sink(Core(date, arg.Get(), state.GetValueBuilder().GetDateBuilder())); - } - }; - - BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftYears, TOptional>(TAutoMap>, i32)) { + SIMPLE_STRICT_UDF(TShiftYears, TOptional>(TAutoMap>, i32)) { return DoAddYears(args[0], args[1].Get(), valueBuilder->GetDateBuilder()); } - END_SIMPLE_ARROW_UDF(TShiftYears, TAddKernelExec>::Do); - BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftQuarters, TOptional>(TAutoMap>, i32)) { - return DoAddQuarters(args[0], args[1].Get(), valueBuilder->GetDateBuilder()); + SIMPLE_STRICT_UDF(TShiftQuarters, TOptional>(TAutoMap>, i32)) { + return DoAddMonths(args[0], 3ll * args[1].Get(), valueBuilder->GetDateBuilder()); } - END_SIMPLE_ARROW_UDF(TShiftQuarters, TAddKernelExec>::Do); - BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftMonths, TOptional>(TAutoMap>, i32)) { + SIMPLE_STRICT_UDF(TShiftMonths, TOptional>(TAutoMap>, i32)) { return DoAddMonths(args[0], args[1].Get(), valueBuilder->GetDateBuilder()); } - END_SIMPLE_ARROW_UDF(TShiftMonths, TAddKernelExec>::Do); template struct PrintNDigits; From b36b77c28753140a147e791dc8678195db43cdbd Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Fri, 5 Jul 2024 16:26:09 +0300 Subject: [PATCH 05/18] fix build --- ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp index ca4bb2e546c5..94b8dd33ba85 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp @@ -16,7 +16,7 @@ BEGIN_SIMPLE_ARROW_UDF(TInc, i32(i32)) { struct TIncKernelExec : public NYql::NUdf::TUnaryKernelExec { template - static void Process(NYql::NUdf::TBlockItem arg, const TSink& sink) { + static void Process(NYql::NUdf::TBlockItem arg, const IValueBuilder& valueBuilder, const TSink& sink) { sink(NYql::NUdf::TBlockItem(arg.As() + 1)); } }; From e8c31b0bc6c02ef092efc3820661db662818b14d Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Mon, 8 Jul 2024 13:45:19 +0300 Subject: [PATCH 06/18] add sink as lvalue --- .../yql/public/udf/arrow/udf_arrow_helpers.h | 47 ++++++++++++------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h index 4bef86700eab..384f86d99c8e 100644 --- a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h +++ b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h @@ -368,9 +368,10 @@ static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute auto* builderImpl = CastToScalarBuilderImpl(builder); auto item = readerImpl->GetScalarItem(*arg.scalar()); - TDerived::Process(item, state.GetValueBuilder(), [&](auto out) { + auto sink = [&](auto out) { *res = builderImpl->Build(out); - }); + }; + TDerived::Process(item, state.GetValueBuilder(), sink); } else { auto& array = *arg.array(); @@ -383,9 +384,10 @@ static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute for (int64_t i = 0; i < array.length;) { for (size_t j = 0; j < maxBlockLength && i < array.length; ++j, ++i) { auto item = readerImpl->GetItem(array, i); - TDerived::Process(item, state.GetValueBuilder(), [&](auto out) { - builder.Add(out); - }); + auto sink = [&](auto out) { + builderImpl->Add(out); + }; + TDerived::Process(item, state.GetValueBuilder(), sink); } auto outputDatum = builderImpl->Build(false); ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); }); @@ -418,9 +420,10 @@ struct TBinaryKernelExec { auto item1 = reader1Impl->GetScalarItem(*arg1.scalar()); auto item2 = reader2Impl->GetScalarItem(*arg2.scalar()); - TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { + auto sink = [&](TBlockItem out) { *res = builderImpl->Build(out); - }); + }; + TDerived::Process(item1, item2, state.GetValueBuilder(), sink); } else if (arg1.is_scalar() && arg2.is_array()) { auto item1 = reader1Impl->GetScalarItem(*arg1.scalar()); @@ -434,9 +437,11 @@ struct TBinaryKernelExec { for (int64_t i = 0; i < array2.length;) { for (size_t j = 0; j < maxBlockLength && i < array2.length; ++j, ++i) { auto item2 = reader2Impl->GetItem(array2, i); - TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { + + auto sink = [&](TBlockItem out) { builderImpl->Add(out); - }); + }; + TDerived::Process(item1, item2, state.GetValueBuilder(), sink); } auto outputDatum = builder.Build(false); ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); }); @@ -455,9 +460,11 @@ struct TBinaryKernelExec { for (int64_t i = 0; i < array1.length;) { for (size_t j = 0; j < maxBlockLength && i < array1.length; ++j, ++i) { auto item1 = reader1Impl->GetItem(array1, i); - TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { + + auto sink = [&](TBlockItem out) { builderImpl->Add(out); - }); + }; + TDerived::Process(item1, item2, state.GetValueBuilder(), sink); } auto outputDatum = builder.Build(false); ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); }); @@ -479,9 +486,11 @@ struct TBinaryKernelExec { for (size_t j = 0; j < maxBlockLength && i < array1.length; ++j, ++i) { auto item1 = reader1Impl->GetItem(array1, i); auto item2 = reader2Impl->GetItem(array2, i); - TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { + + auto sink = [&](TBlockItem out) { builderImpl->Add(out); - }); + }; + TDerived::Process(item1, item2, state.GetValueBuilder(), sink); } auto outputDatum = builder.Build(false); ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); }); @@ -536,9 +545,11 @@ struct TGenericKernelExec { auto& reader = state.GetReader(k); args[k] = reader.GetScalarItem(*batch[k].scalar()); } - TDerived::Process(items, state.GetValueBuilder(), [&](TBlockItem out) { + + auto sink = [&](TBlockItem out) { *res = builderImpl->Build(out); - }); + }; + TDerived::Process(items, state.GetValueBuilder(), sink); } else { auto& builder = state.GetArrayBuilder(); auto* builderImpl = CastToArrayBuilderImpl(builder); @@ -566,9 +577,11 @@ struct TGenericKernelExec { args[k] = reader.GetItem(*batch[k].array(), i); } - TDerived::Process(items, state.GetValueBuilder(), [&](TBlockItem out) { + + auto sink = [&](TBlockItem out) { builderImpl->Add(out); - }); + }; + TDerived::Process(items, state.GetValueBuilder(), sink); } auto outputDatum = builderImpl->Build(false); ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); }); From 079c11b8d90113ae1c190df16b946de57717e783 Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Mon, 8 Jul 2024 15:58:54 +0300 Subject: [PATCH 07/18] fix build --- ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp index 94b8dd33ba85..4f89e9b6fb11 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp @@ -16,7 +16,7 @@ BEGIN_SIMPLE_ARROW_UDF(TInc, i32(i32)) { struct TIncKernelExec : public NYql::NUdf::TUnaryKernelExec { template - static void Process(NYql::NUdf::TBlockItem arg, const IValueBuilder& valueBuilder, const TSink& sink) { + static void Process(NYql::NUdf::TBlockItem arg, const NYql::NUdf::IValueBuilder& valueBuilder, const TSink& sink) { sink(NYql::NUdf::TBlockItem(arg.As() + 1)); } }; From 2f5a4ad0c70c3e22e00d83db60bc316b434c2715 Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Mon, 8 Jul 2024 17:23:33 +0300 Subject: [PATCH 08/18] fix build --- ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp index 4f89e9b6fb11..fa3374864c21 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp @@ -17,6 +17,7 @@ BEGIN_SIMPLE_ARROW_UDF(TInc, i32(i32)) { struct TIncKernelExec : public NYql::NUdf::TUnaryKernelExec { template static void Process(NYql::NUdf::TBlockItem arg, const NYql::NUdf::IValueBuilder& valueBuilder, const TSink& sink) { + Y_UNUSED(valueBuilder); sink(NYql::NUdf::TBlockItem(arg.As() + 1)); } }; From bdd9c882ca5512cb5cdf36c39cbb942d20a640eb Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Tue, 9 Jul 2024 16:04:33 +0300 Subject: [PATCH 09/18] Revert "add sink as lvalue" This reverts commit e8c31b0bc6c02ef092efc3820661db662818b14d. --- .../yql/public/udf/arrow/udf_arrow_helpers.h | 47 +++++++------------ 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h index 384f86d99c8e..4bef86700eab 100644 --- a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h +++ b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h @@ -368,10 +368,9 @@ static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute auto* builderImpl = CastToScalarBuilderImpl(builder); auto item = readerImpl->GetScalarItem(*arg.scalar()); - auto sink = [&](auto out) { + TDerived::Process(item, state.GetValueBuilder(), [&](auto out) { *res = builderImpl->Build(out); - }; - TDerived::Process(item, state.GetValueBuilder(), sink); + }); } else { auto& array = *arg.array(); @@ -384,10 +383,9 @@ static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute for (int64_t i = 0; i < array.length;) { for (size_t j = 0; j < maxBlockLength && i < array.length; ++j, ++i) { auto item = readerImpl->GetItem(array, i); - auto sink = [&](auto out) { - builderImpl->Add(out); - }; - TDerived::Process(item, state.GetValueBuilder(), sink); + TDerived::Process(item, state.GetValueBuilder(), [&](auto out) { + builder.Add(out); + }); } auto outputDatum = builderImpl->Build(false); ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); }); @@ -420,10 +418,9 @@ struct TBinaryKernelExec { auto item1 = reader1Impl->GetScalarItem(*arg1.scalar()); auto item2 = reader2Impl->GetScalarItem(*arg2.scalar()); - auto sink = [&](TBlockItem out) { + TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { *res = builderImpl->Build(out); - }; - TDerived::Process(item1, item2, state.GetValueBuilder(), sink); + }); } else if (arg1.is_scalar() && arg2.is_array()) { auto item1 = reader1Impl->GetScalarItem(*arg1.scalar()); @@ -437,11 +434,9 @@ struct TBinaryKernelExec { for (int64_t i = 0; i < array2.length;) { for (size_t j = 0; j < maxBlockLength && i < array2.length; ++j, ++i) { auto item2 = reader2Impl->GetItem(array2, i); - - auto sink = [&](TBlockItem out) { + TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { builderImpl->Add(out); - }; - TDerived::Process(item1, item2, state.GetValueBuilder(), sink); + }); } auto outputDatum = builder.Build(false); ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); }); @@ -460,11 +455,9 @@ struct TBinaryKernelExec { for (int64_t i = 0; i < array1.length;) { for (size_t j = 0; j < maxBlockLength && i < array1.length; ++j, ++i) { auto item1 = reader1Impl->GetItem(array1, i); - - auto sink = [&](TBlockItem out) { + TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { builderImpl->Add(out); - }; - TDerived::Process(item1, item2, state.GetValueBuilder(), sink); + }); } auto outputDatum = builder.Build(false); ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); }); @@ -486,11 +479,9 @@ struct TBinaryKernelExec { for (size_t j = 0; j < maxBlockLength && i < array1.length; ++j, ++i) { auto item1 = reader1Impl->GetItem(array1, i); auto item2 = reader2Impl->GetItem(array2, i); - - auto sink = [&](TBlockItem out) { + TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { builderImpl->Add(out); - }; - TDerived::Process(item1, item2, state.GetValueBuilder(), sink); + }); } auto outputDatum = builder.Build(false); ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); }); @@ -545,11 +536,9 @@ struct TGenericKernelExec { auto& reader = state.GetReader(k); args[k] = reader.GetScalarItem(*batch[k].scalar()); } - - auto sink = [&](TBlockItem out) { + TDerived::Process(items, state.GetValueBuilder(), [&](TBlockItem out) { *res = builderImpl->Build(out); - }; - TDerived::Process(items, state.GetValueBuilder(), sink); + }); } else { auto& builder = state.GetArrayBuilder(); auto* builderImpl = CastToArrayBuilderImpl(builder); @@ -577,11 +566,9 @@ struct TGenericKernelExec { args[k] = reader.GetItem(*batch[k].array(), i); } - - auto sink = [&](TBlockItem out) { + TDerived::Process(items, state.GetValueBuilder(), [&](TBlockItem out) { builderImpl->Add(out); - }; - TDerived::Process(items, state.GetValueBuilder(), sink); + }); } auto outputDatum = builderImpl->Build(false); ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); }); From 7e4fadde6c332c8b12d74b0a5e4fb9af47b5e59e Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Tue, 9 Jul 2024 17:38:50 +0300 Subject: [PATCH 10/18] builder->buiilderImpl --- ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h index 4bef86700eab..cace7cf945e8 100644 --- a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h +++ b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h @@ -384,7 +384,7 @@ static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute for (size_t j = 0; j < maxBlockLength && i < array.length; ++j, ++i) { auto item = readerImpl->GetItem(array, i); TDerived::Process(item, state.GetValueBuilder(), [&](auto out) { - builder.Add(out); + builderImpl->Add(out); }); } auto outputDatum = builderImpl->Build(false); From e50193d1088abec0e59fcf614b68d8abb51b557f Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Tue, 9 Jul 2024 20:46:35 +0300 Subject: [PATCH 11/18] fix style in string udf --- .../yql/udfs/common/string/string_udf.cpp | 336 +++++++++--------- 1 file changed, 168 insertions(+), 168 deletions(-) diff --git a/ydb/library/yql/udfs/common/string/string_udf.cpp b/ydb/library/yql/udfs/common/string/string_udf.cpp index 3b1b95ca0757..04274d39a131 100644 --- a/ydb/library/yql/udfs/common/string/string_udf.cpp +++ b/ydb/library/yql/udfs/common/string/string_udf.cpp @@ -31,59 +31,59 @@ using namespace NUdf; namespace { -#define STRING_UDF(udfName, function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ - const TString input(args[0].AsStringRef()); \ - const auto& result = function(input); \ - return valueBuilder->NewString(result); \ - } \ - \ - struct T##udfName##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { \ - const TString input(arg1.AsStringRef()); \ - const auto& result = function(input); \ - sink(TBlockItem(result)); \ - } \ - }; \ - \ +#define STRING_UDF(udfName, function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ + const TString input(args[0].AsStringRef()); \ + const auto& result = function(input); \ + return valueBuilder->NewString(result); \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) {\ + const TString input(arg1.AsStringRef()); \ + const auto& result = function(input); \ + sink(TBlockItem(result)); \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) // 'unsafe' udf is actually strict - it returns null on any exception -#define STRING_UNSAFE_UDF(udfName, function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, TOptional(TOptional)) { \ - EMPTY_RESULT_ON_EMPTY_ARG(0); \ - const TString input(args[0].AsStringRef()); \ - try { \ - const auto& result = function(input); \ - return valueBuilder->NewString(result); \ - } catch (yexception&) { \ - return TUnboxedValue(); \ - } \ - } \ - \ - struct T##udfName##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { \ - if (!arg1) { \ - return sink(TBlockItem()); \ - } \ - \ - const TString input(arg1.AsStringRef()); \ - try { \ - const auto& result = function(input); \ - sink(TBlockItem(result)); \ - } catch (yexception&) { \ - return sink(TBlockItem()); \ - } \ - } \ - }; \ - \ +#define STRING_UNSAFE_UDF(udfName, function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, TOptional(TOptional)) { \ + EMPTY_RESULT_ON_EMPTY_ARG(0); \ + const TString input(args[0].AsStringRef()); \ + try { \ + const auto& result = function(input); \ + return valueBuilder->NewString(result); \ + } catch (yexception&) { \ + return TUnboxedValue(); \ + } \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) {\ + if (!arg1) { \ + return sink(TBlockItem()); \ + } \ + \ + const TString input(arg1.AsStringRef()); \ + try { \ + const auto& result = function(input); \ + sink(TBlockItem(result)); \ + } catch (yexception&) { \ + return sink(TBlockItem()); \ + } \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) #define STROKA_UDF(udfName, function) \ @@ -112,30 +112,30 @@ namespace { } \ } -#define STROKA_ASCII_CASE_UDF(udfName, function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ - TString input(args[0].AsStringRef()); \ - if (input.function()) { \ - return valueBuilder->NewString(input); \ - } else { \ - return args[0]; \ - } \ - } \ - \ - struct T##udfName##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { \ - TString input(arg1.AsStringRef()); \ - if (input.function()) { \ - sink(TBlockItem(input)); \ - } else { \ - sink(arg1); \ - } \ - } \ - }; \ - \ +#define STROKA_ASCII_CASE_UDF(udfName, function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ + TString input(args[0].AsStringRef()); \ + if (input.function()) { \ + return valueBuilder->NewString(input); \ + } else { \ + return args[0]; \ + } \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) {\ + TString input(arg1.AsStringRef()); \ + if (input.function()) { \ + sink(TBlockItem(input)); \ + } else { \ + sink(arg1); \ + } \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) @@ -164,44 +164,44 @@ namespace { } #define IS_ASCII_UDF(function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, bool(TOptional)) { \ - Y_UNUSED(valueBuilder); \ - if (args[0]) { \ - const TStringBuf input(args[0].AsStringRef()); \ - bool result = true; \ - for (auto c : input) { \ - if (!function(c)) { \ - result = false; \ - break; \ - } \ - } \ - return TUnboxedValuePod(result); \ - } else { \ - return TUnboxedValuePod(false); \ - } \ - } \ - \ - struct T##function##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { \ - if (arg1) { \ - const TStringBuf input(arg1.AsStringRef()); \ - bool result = true; \ - for (auto c : input) { \ - if (!function(c)) { \ - result = false; \ - break; \ - } \ - } \ - sink(TBlockItem(result)); \ - } else { \ - sink(TBlockItem(false)); \ - } \ - } \ - }; \ - \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, bool(TOptional)) { \ + Y_UNUSED(valueBuilder); \ + if (args[0]) { \ + const TStringBuf input(args[0].AsStringRef()); \ + bool result = true; \ + for (auto c : input) { \ + if (!function(c)) { \ + result = false; \ + break; \ + } \ + } \ + return TUnboxedValuePod(result); \ + } else { \ + return TUnboxedValuePod(false); \ + } \ + } \ + \ + struct T##function##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) {\ + if (arg1) { \ + const TStringBuf input(arg1.AsStringRef()); \ + bool result = true; \ + for (auto c : input) { \ + if (!function(c)) { \ + result = false; \ + break; \ + } \ + } \ + sink(TBlockItem(result)); \ + } else { \ + sink(TBlockItem(false)); \ + } \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) @@ -231,7 +231,7 @@ namespace { : public TGenericKernelExec \ { \ template \ - static void Process(TBlockItem args, const IValueBuilder&, const TSink& sink) { \ + static void Process(TBlockItem args, const IValueBuilder&, const TSink& sink) { \ TStringStream result; \ const TStringBuf input(args.GetElement(0).AsStringRef()); \ char paddingSymbol = ' '; \ @@ -253,67 +253,67 @@ namespace { \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) -#define STRING_STREAM_NUM_FORMATTER_UDF(function, argType) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap)) { \ - TStringStream result; \ - result << function(args[0].Get()); \ - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ - } \ - \ - struct T##function##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { \ - TStringStream result; \ - result << function(arg1.Get()); \ - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ - } \ - }; \ - \ +#define STRING_STREAM_NUM_FORMATTER_UDF(function, argType) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap)) { \ + TStringStream result; \ + result << function(args[0].Get()); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##function##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) {\ + TStringStream result; \ + result << function(arg1.Get()); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) -#define STRING_STREAM_TEXT_FORMATTER_UDF(function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap)) { \ - TStringStream result; \ - const TStringBuf input(args[0].AsStringRef()); \ - result << function(input); \ - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ - } \ - \ - struct T##function##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { \ - TStringStream result; \ - const TStringBuf input(arg1.AsStringRef()); \ - result << function(input); \ - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ - } \ - }; \ - \ +#define STRING_STREAM_TEXT_FORMATTER_UDF(function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap)) { \ + TStringStream result; \ + const TStringBuf input(args[0].AsStringRef()); \ + result << function(input); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##function##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) {\ + TStringStream result; \ + const TStringBuf input(arg1.AsStringRef()); \ + result << function(input); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) -#define STRING_STREAM_HRSZ_FORMATTER_UDF(udfName, hrSize) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ - TStringStream result; \ - result << HumanReadableSize(args[0].Get(), hrSize); \ - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ - } \ - \ - struct T##udfName##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { \ - TStringStream result; \ - result << HumanReadableSize(arg1.Get(), hrSize); \ - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ - } \ - }; \ - \ +#define STRING_STREAM_HRSZ_FORMATTER_UDF(udfName, hrSize) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ + TStringStream result; \ + result << HumanReadableSize(args[0].Get(), hrSize); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) {\ + TStringStream result; \ + result << HumanReadableSize(arg1.Get(), hrSize); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) #define STRING_UDF_MAP(XX) \ From 786cbddc321c418a1c49be118b0c92de35ddd53f Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Tue, 9 Jul 2024 21:04:07 +0300 Subject: [PATCH 12/18] fix style --- ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h index cace7cf945e8..45f8434c0f45 100644 --- a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h +++ b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h @@ -357,7 +357,7 @@ TReader* CastToBlockReaderImpl(IBlockReader& reader) { template struct TUnaryKernelExec { -static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) { + static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) { auto& state = dynamic_cast(*ctx->state()); auto& reader = state.GetReader(0); auto* readerImpl = CastToBlockReaderImpl(reader); From c0fa8f7ea19d06392a90ca4b931979e16e977556 Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Tue, 9 Jul 2024 21:08:19 +0300 Subject: [PATCH 13/18] auto->TBlockItem --- ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h index 45f8434c0f45..9136909775ac 100644 --- a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h +++ b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h @@ -368,7 +368,7 @@ struct TUnaryKernelExec { auto* builderImpl = CastToScalarBuilderImpl(builder); auto item = readerImpl->GetScalarItem(*arg.scalar()); - TDerived::Process(item, state.GetValueBuilder(), [&](auto out) { + TDerived::Process(item, state.GetValueBuilder(), [&](TBlockItem out) { *res = builderImpl->Build(out); }); } @@ -383,7 +383,7 @@ struct TUnaryKernelExec { for (int64_t i = 0; i < array.length;) { for (size_t j = 0; j < maxBlockLength && i < array.length; ++j, ++i) { auto item = readerImpl->GetItem(array, i); - TDerived::Process(item, state.GetValueBuilder(), [&](auto out) { + TDerived::Process(item, state.GetValueBuilder(), [&](TBlockItem out) { builderImpl->Add(out); }); } From 39e94701a24f24dbbf41faa510f729c86af363c9 Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Tue, 9 Jul 2024 22:37:24 +0300 Subject: [PATCH 14/18] remove embedded --- ydb/library/yql/public/udf/arrow/block_item.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/ydb/library/yql/public/udf/arrow/block_item.h b/ydb/library/yql/public/udf/arrow/block_item.h index c0e9a018a886..f04a25666b12 100644 --- a/ydb/library/yql/public/udf/arrow/block_item.h +++ b/ydb/library/yql/public/udf/arrow/block_item.h @@ -59,19 +59,6 @@ class TBlockItem { Raw.Halfs[1] = high; } - inline static TBlockItem Embedded(const TStringRef& value) { - UDF_VERIFY(value.Size() <= sizeof(TRawEmbeddedValue::Buffer)); - - TBlockItem v; - v.Raw.Embedded.Size = value.Size(); - v.Raw.Embedded.Meta = static_cast(EMarkers::Embedded); - if (v.Raw.Embedded.Size) { - std::memcpy(v.Raw.Embedded.Buffer, value.Data(), v.Raw.Embedded.Size); - } - - return v; - } - inline ui64 Low() const { return Raw.Halfs[0]; } From 4884058a4945beeee88e885bf81784a77239b48f Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Wed, 10 Jul 2024 12:21:26 +0300 Subject: [PATCH 15/18] remove unboxed value build from scalar builder --- ydb/library/yql/public/udf/arrow/block_builder.h | 1 - 1 file changed, 1 deletion(-) diff --git a/ydb/library/yql/public/udf/arrow/block_builder.h b/ydb/library/yql/public/udf/arrow/block_builder.h index f63d0601741b..2f46532de716 100644 --- a/ydb/library/yql/public/udf/arrow/block_builder.h +++ b/ydb/library/yql/public/udf/arrow/block_builder.h @@ -55,7 +55,6 @@ class IScalarBuilder { public: virtual ~IScalarBuilder() = default; virtual arrow::Datum Build(TBlockItem value) const = 0; - virtual arrow::Datum Build(NUdf::TUnboxedValuePod value) const = 0; }; inline std::shared_ptr GetArrowType(const ITypeInfoHelper& typeInfoHelper, const TType* type) { From 7b86f22f1bbcb7200a047ed5f1ccd41ca2acb266 Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Wed, 10 Jul 2024 18:21:31 +0300 Subject: [PATCH 16/18] move value builder to second argument --- .../yql/public/udf/arrow/udf_arrow_helpers.h | 16 +++---- .../yql/udfs/common/string/string_udf.cpp | 38 ++++++++--------- .../udfs/common/url_base/lib/url_base_udf.h | 42 +++++++++---------- 3 files changed, 48 insertions(+), 48 deletions(-) diff --git a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h index 9136909775ac..9b98b4ff3dae 100644 --- a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h +++ b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h @@ -368,7 +368,7 @@ struct TUnaryKernelExec { auto* builderImpl = CastToScalarBuilderImpl(builder); auto item = readerImpl->GetScalarItem(*arg.scalar()); - TDerived::Process(item, state.GetValueBuilder(), [&](TBlockItem out) { + TDerived::Process(&state.GetValueBuilder(), item, [&](TBlockItem out) { *res = builderImpl->Build(out); }); } @@ -383,7 +383,7 @@ struct TUnaryKernelExec { for (int64_t i = 0; i < array.length;) { for (size_t j = 0; j < maxBlockLength && i < array.length; ++j, ++i) { auto item = readerImpl->GetItem(array, i); - TDerived::Process(item, state.GetValueBuilder(), [&](TBlockItem out) { + TDerived::Process(&state.GetValueBuilder(), item, [&](TBlockItem out) { builderImpl->Add(out); }); } @@ -418,7 +418,7 @@ struct TBinaryKernelExec { auto item1 = reader1Impl->GetScalarItem(*arg1.scalar()); auto item2 = reader2Impl->GetScalarItem(*arg2.scalar()); - TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { + TDerived::Process(&state.GetValueBuilder(), item1, item2, [&](TBlockItem out) { *res = builderImpl->Build(out); }); } @@ -434,7 +434,7 @@ struct TBinaryKernelExec { for (int64_t i = 0; i < array2.length;) { for (size_t j = 0; j < maxBlockLength && i < array2.length; ++j, ++i) { auto item2 = reader2Impl->GetItem(array2, i); - TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { + TDerived::Process(&state.GetValueBuilder(), item1, item2, [&](TBlockItem out) { builderImpl->Add(out); }); } @@ -455,7 +455,7 @@ struct TBinaryKernelExec { for (int64_t i = 0; i < array1.length;) { for (size_t j = 0; j < maxBlockLength && i < array1.length; ++j, ++i) { auto item1 = reader1Impl->GetItem(array1, i); - TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { + TDerived::Process(&state.GetValueBuilder(), item1, item2, [&](TBlockItem out) { builderImpl->Add(out); }); } @@ -479,7 +479,7 @@ struct TBinaryKernelExec { for (size_t j = 0; j < maxBlockLength && i < array1.length; ++j, ++i) { auto item1 = reader1Impl->GetItem(array1, i); auto item2 = reader2Impl->GetItem(array2, i); - TDerived::Process(item1, item2, state.GetValueBuilder(), [&](TBlockItem out) { + TDerived::Process(&state.GetValueBuilder(), item1, item2, [&](TBlockItem out) { builderImpl->Add(out); }); } @@ -536,7 +536,7 @@ struct TGenericKernelExec { auto& reader = state.GetReader(k); args[k] = reader.GetScalarItem(*batch[k].scalar()); } - TDerived::Process(items, state.GetValueBuilder(), [&](TBlockItem out) { + TDerived::Process(&state.GetValueBuilder(), items, [&](TBlockItem out) { *res = builderImpl->Build(out); }); } else { @@ -566,7 +566,7 @@ struct TGenericKernelExec { args[k] = reader.GetItem(*batch[k].array(), i); } - TDerived::Process(items, state.GetValueBuilder(), [&](TBlockItem out) { + TDerived::Process(&state.GetValueBuilder(), items, [&](TBlockItem out) { builderImpl->Add(out); }); } diff --git a/ydb/library/yql/udfs/common/string/string_udf.cpp b/ydb/library/yql/udfs/common/string/string_udf.cpp index 04274d39a131..c8234277d6fd 100644 --- a/ydb/library/yql/udfs/common/string/string_udf.cpp +++ b/ydb/library/yql/udfs/common/string/string_udf.cpp @@ -42,7 +42,7 @@ namespace { : public TUnaryKernelExec \ { \ template \ - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) {\ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {\ const TString input(arg1.AsStringRef()); \ const auto& result = function(input); \ sink(TBlockItem(result)); \ @@ -69,7 +69,7 @@ namespace { : public TUnaryKernelExec \ { \ template \ - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) {\ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {\ if (!arg1) { \ return sink(TBlockItem()); \ } \ @@ -126,7 +126,7 @@ namespace { : public TUnaryKernelExec \ { \ template \ - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) {\ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {\ TString input(arg1.AsStringRef()); \ if (input.function()) { \ sink(TBlockItem(input)); \ @@ -185,7 +185,7 @@ namespace { : public TUnaryKernelExec \ { \ template \ - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) {\ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {\ if (arg1) { \ const TStringBuf input(arg1.AsStringRef()); \ bool result = true; \ @@ -231,7 +231,7 @@ namespace { : public TGenericKernelExec \ { \ template \ - static void Process(TBlockItem args, const IValueBuilder&, const TSink& sink) { \ + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { \ TStringStream result; \ const TStringBuf input(args.GetElement(0).AsStringRef()); \ char paddingSymbol = ' '; \ @@ -264,7 +264,7 @@ namespace { : public TUnaryKernelExec \ { \ template \ - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) {\ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {\ TStringStream result; \ result << function(arg1.Get()); \ sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ @@ -285,7 +285,7 @@ namespace { : public TUnaryKernelExec \ { \ template \ - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) {\ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {\ TStringStream result; \ const TStringBuf input(arg1.AsStringRef()); \ result << function(input); \ @@ -307,7 +307,7 @@ namespace { : public TUnaryKernelExec \ { \ template \ - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) {\ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {\ TStringStream result; \ result << HumanReadableSize(arg1.Get(), hrSize); \ sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ @@ -414,7 +414,7 @@ namespace { : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { TString input(arg1.AsStringRef()); ui64 maxLength = arg2.Get(); CollapseText(input, maxLength); @@ -437,7 +437,7 @@ namespace { struct TContainsKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { if (!arg1) return sink(TBlockItem(false)); @@ -461,7 +461,7 @@ namespace { : public TGenericKernelExec { template - static void Process(TBlockItem args, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { TString result(args.GetElement(0).AsStringRef()); const TStringBuf what(args.GetElement(1).AsStringRef()); const TStringBuf with(args.GetElement(2).AsStringRef()); @@ -490,7 +490,7 @@ namespace { : public TGenericKernelExec { template - static void Process(TBlockItem args, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { std::string result(args.GetElement(0).AsStringRef()); const std::string_view what(args.GetElement(1).AsStringRef()); const std::string_view with(args.GetElement(2).AsStringRef()); @@ -519,7 +519,7 @@ namespace { : public TGenericKernelExec { template - static void Process(TBlockItem args, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { std::string result(args.GetElement(0).AsStringRef()); const std::string_view what(args.GetElement(1).AsStringRef()); const std::string_view with(args.GetElement(2).AsStringRef()); @@ -558,7 +558,7 @@ namespace { : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { std::string input(arg1.AsStringRef()); const std::string_view remove(arg2.AsStringRef()); std::array chars{}; @@ -602,7 +602,7 @@ namespace { : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { std::string input(arg1.AsStringRef()); const std::string_view remove(arg2.AsStringRef()); std::array chars{}; @@ -642,7 +642,7 @@ namespace { : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { std::string input(arg1.AsStringRef()); const std::string_view remove(arg2.AsStringRef()); std::array chars{}; @@ -789,7 +789,7 @@ namespace { struct TLevensteinDistanceKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { const std::string_view left(arg1.AsStringRef()); const std::string_view right(arg2.AsStringRef()); const ui64 result = NLevenshtein::Distance(left, right); @@ -811,7 +811,7 @@ namespace { : public TUnaryKernelExec { template - static void Process(TBlockItem arg1, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { TStringStream result; result << HumanReadable(TDuration::MicroSeconds(arg1.Get())); sink(TBlockItem(TStringRef(result.Data(), result.Size()))); @@ -829,7 +829,7 @@ namespace { struct TPrecKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { TStringStream result; result << Prec(arg1.Get(), arg2.Get()); sink(TBlockItem(TStringRef(result.Data(), result.Size()))); diff --git a/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h b/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h index 89d673ff82b8..482550fa0a55 100644 --- a/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h +++ b/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h @@ -36,7 +36,7 @@ inline bool PrepareUrl(const std::string_view& keyStr, TUri& parser) { } \ struct udfName##KernelExec : public TUnaryKernelExec { \ template \ - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { \ + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { \ if (!arg) { \ return sink(TBlockItem()); \ } \ @@ -60,7 +60,7 @@ BEGIN_SIMPLE_ARROW_UDF(TNormalize, TOptional(TOptional)) { } struct TNormalizeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -81,7 +81,7 @@ BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetScheme, char*(TAutoMap)) { } struct TGetSchemeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const std::string_view url(arg.AsStringRef()); const std::string_view prefix(GetSchemePrefix(url)); const std::string_view scheme = url.substr(std::distance(url.begin(), prefix.begin()), prefix.size()); @@ -124,7 +124,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetPort, TOptional(TOptional)) { } struct TGetPortKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -152,7 +152,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetTail, TOptional(TOptional)) { } struct TGetTailKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -186,7 +186,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetPath, TOptional(TOptional)) { } struct TGetPathKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -216,7 +216,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetFragment, TOptional(TOptional)) { } struct TGetFragmentKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -256,7 +256,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetDomain, TOptional(TOptional, ui8)) { } struct TGetDomainKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { if (!arg1) { return sink(TBlockItem()); } @@ -276,7 +276,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetTLD, char*(TAutoMap)) { } struct TGetTLDKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const TStringBuf url(arg.AsStringRef()); return sink(TBlockItem(GetZone(GetOnlyHost(url)))); } @@ -291,7 +291,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetDomainLevel, ui64(TAutoMap)) { } struct TGetDomainLevelKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { std::vector parts; StringSplitter(GetOnlyHost(arg.AsStringRef())).Split('.').AddTo(&parts); return sink(TBlockItem(ui64(parts.size()))); @@ -360,7 +360,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetCGIParam, TOptional(TOptional, char*)) } struct TGetCGIParamKernelExec : public TBinaryKernelExec { template - static void Process(TBlockItem arg1, TBlockItem arg2, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { if (!arg1) { return sink(TBlockItem()); } @@ -387,7 +387,7 @@ BEGIN_SIMPLE_ARROW_UDF(TCutQueryStringAndFragment, char*(TAutoMap)) { } struct TCutQueryStringAndFragmentKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const std::string_view input(arg.AsStringRef()); const auto cut = input.find_first_of("?#"); sink(TBlockItem(arg.AsStringRef().Substring(0U, cut))); @@ -407,7 +407,7 @@ BEGIN_SIMPLE_ARROW_UDF(TEncode, TOptional(TOptional)) { } struct TEncodeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -435,7 +435,7 @@ BEGIN_SIMPLE_ARROW_UDF(TDecode, TOptional(TOptional)) { } struct TDecodeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { return sink(TBlockItem()); } @@ -457,7 +457,7 @@ BEGIN_SIMPLE_ARROW_UDF(TIsKnownTLD, bool(TAutoMap)) { } struct TIsKnownTLDKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { sink(TBlockItem(static_cast(IsTld(arg.AsStringRef())))); } }; @@ -469,7 +469,7 @@ BEGIN_SIMPLE_ARROW_UDF(TIsWellKnownTLD, bool(TAutoMap)) { } struct TIsWellKnownTLDKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { sink(TBlockItem(static_cast(IsVeryGoodTld(arg.AsStringRef())))); } }; @@ -483,7 +483,7 @@ BEGIN_SIMPLE_ARROW_UDF(THostNameToPunycode, TOptional(TAutoMap)) t } struct THostNameToPunycodeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) try { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) try { const TUtf16String& input = UTF8ToWide(arg.AsStringRef()); return sink(TBlockItem(TStringRef(HostNameToPunycode(input)))); } catch (TPunycodeError&) { @@ -498,7 +498,7 @@ BEGIN_SIMPLE_ARROW_UDF(TForceHostNameToPunycode, char*(TAutoMap)) { } struct TForceHostNameToPunycodeKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const TUtf16String& input = UTF8ToWide(arg.AsStringRef()); sink(TBlockItem(TStringRef(ForceHostNameToPunycode(input)))); } @@ -514,7 +514,7 @@ BEGIN_SIMPLE_ARROW_UDF(TPunycodeToHostName, TOptional(TAutoMap)) t } struct TPunycodeToHostNameKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) try { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) try { const TStringRef& input = arg.AsStringRef(); const auto& result = WideToUTF8(PunycodeToHostName(input)); return sink(TBlockItem(TStringRef(result))); @@ -531,7 +531,7 @@ BEGIN_SIMPLE_ARROW_UDF(TForcePunycodeToHostName, char*(TAutoMap)) { } struct TForcePunycodeToHostNameKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const TStringRef& input = arg.AsStringRef(); const auto& result = WideToUTF8(ForcePunycodeToHostName(input)); sink(TBlockItem(TStringRef(result))); @@ -545,7 +545,7 @@ BEGIN_SIMPLE_ARROW_UDF(TCanBePunycodeHostName, bool(TAutoMap)) { } struct TCanBePunycodeHostNameKernelExec : public TUnaryKernelExec { template - static void Process(TBlockItem arg, const IValueBuilder&, const TSink& sink) { + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { sink(TBlockItem(static_cast(CanBePunycodeHostName(arg.AsStringRef())))); } }; From abb66e3b18dcbfc2eb7c8d6789c869bea6a95893 Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Wed, 10 Jul 2024 18:32:12 +0300 Subject: [PATCH 17/18] fix style --- .../yql/udfs/common/string/string_udf.cpp | 338 +++++++++--------- 1 file changed, 169 insertions(+), 169 deletions(-) diff --git a/ydb/library/yql/udfs/common/string/string_udf.cpp b/ydb/library/yql/udfs/common/string/string_udf.cpp index c8234277d6fd..c99f0c258d81 100644 --- a/ydb/library/yql/udfs/common/string/string_udf.cpp +++ b/ydb/library/yql/udfs/common/string/string_udf.cpp @@ -31,59 +31,59 @@ using namespace NUdf; namespace { -#define STRING_UDF(udfName, function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ - const TString input(args[0].AsStringRef()); \ - const auto& result = function(input); \ - return valueBuilder->NewString(result); \ - } \ - \ - struct T##udfName##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {\ - const TString input(arg1.AsStringRef()); \ - const auto& result = function(input); \ - sink(TBlockItem(result)); \ - } \ - }; \ - \ - END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) +#define STRING_UDF(udfName, function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ + const TString input(args[0].AsStringRef()); \ + const auto& result = function(input); \ + return valueBuilder->NewString(result); \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + const TString input(arg1.AsStringRef()); \ + const auto& result = function(input); \ + sink(TBlockItem(result)); \ + } \ + }; \ + \ + END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) \ // 'unsafe' udf is actually strict - it returns null on any exception -#define STRING_UNSAFE_UDF(udfName, function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, TOptional(TOptional)) { \ - EMPTY_RESULT_ON_EMPTY_ARG(0); \ - const TString input(args[0].AsStringRef()); \ - try { \ - const auto& result = function(input); \ - return valueBuilder->NewString(result); \ - } catch (yexception&) { \ - return TUnboxedValue(); \ - } \ - } \ - \ - struct T##udfName##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {\ - if (!arg1) { \ - return sink(TBlockItem()); \ - } \ - \ - const TString input(arg1.AsStringRef()); \ - try { \ - const auto& result = function(input); \ - sink(TBlockItem(result)); \ - } catch (yexception&) { \ - return sink(TBlockItem()); \ - } \ - } \ - }; \ - \ +#define STRING_UNSAFE_UDF(udfName, function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, TOptional(TOptional)) { \ + EMPTY_RESULT_ON_EMPTY_ARG(0); \ + const TString input(args[0].AsStringRef()); \ + try { \ + const auto& result = function(input); \ + return valueBuilder->NewString(result); \ + } catch (yexception&) { \ + return TUnboxedValue(); \ + } \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + if (!arg1) { \ + return sink(TBlockItem()); \ + } \ + \ + const TString input(arg1.AsStringRef()); \ + try { \ + const auto& result = function(input); \ + sink(TBlockItem(result)); \ + } catch (yexception&) { \ + return sink(TBlockItem()); \ + } \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) #define STROKA_UDF(udfName, function) \ @@ -112,30 +112,30 @@ namespace { } \ } -#define STROKA_ASCII_CASE_UDF(udfName, function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ - TString input(args[0].AsStringRef()); \ - if (input.function()) { \ - return valueBuilder->NewString(input); \ - } else { \ - return args[0]; \ - } \ - } \ - \ - struct T##udfName##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {\ - TString input(arg1.AsStringRef()); \ - if (input.function()) { \ - sink(TBlockItem(input)); \ - } else { \ - sink(arg1); \ - } \ - } \ - }; \ - \ +#define STROKA_ASCII_CASE_UDF(udfName, function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ + TString input(args[0].AsStringRef()); \ + if (input.function()) { \ + return valueBuilder->NewString(input); \ + } else { \ + return args[0]; \ + } \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + TString input(arg1.AsStringRef()); \ + if (input.function()) { \ + sink(TBlockItem(input)); \ + } else { \ + sink(arg1); \ + } \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) @@ -163,45 +163,45 @@ namespace { } \ } -#define IS_ASCII_UDF(function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, bool(TOptional)) { \ - Y_UNUSED(valueBuilder); \ - if (args[0]) { \ - const TStringBuf input(args[0].AsStringRef()); \ - bool result = true; \ - for (auto c : input) { \ - if (!function(c)) { \ - result = false; \ - break; \ - } \ - } \ - return TUnboxedValuePod(result); \ - } else { \ - return TUnboxedValuePod(false); \ - } \ - } \ - \ - struct T##function##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {\ - if (arg1) { \ - const TStringBuf input(arg1.AsStringRef()); \ - bool result = true; \ - for (auto c : input) { \ - if (!function(c)) { \ - result = false; \ - break; \ - } \ - } \ - sink(TBlockItem(result)); \ - } else { \ - sink(TBlockItem(false)); \ - } \ - } \ - }; \ - \ +#define IS_ASCII_UDF(function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, bool(TOptional)) { \ + Y_UNUSED(valueBuilder); \ + if (args[0]) { \ + const TStringBuf input(args[0].AsStringRef()); \ + bool result = true; \ + for (auto c : input) { \ + if (!function(c)) { \ + result = false; \ + break; \ + } \ + } \ + return TUnboxedValuePod(result); \ + } else { \ + return TUnboxedValuePod(false); \ + } \ + } \ + \ + struct T##function##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + if (arg1) { \ + const TStringBuf input(arg1.AsStringRef()); \ + bool result = true; \ + for (auto c : input) { \ + if (!function(c)) { \ + result = false; \ + break; \ + } \ + } \ + sink(TBlockItem(result)); \ + } else { \ + sink(TBlockItem(false)); \ + } \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) @@ -253,67 +253,67 @@ namespace { \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) -#define STRING_STREAM_NUM_FORMATTER_UDF(function, argType) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap)) { \ - TStringStream result; \ - result << function(args[0].Get()); \ - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ - } \ - \ - struct T##function##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {\ - TStringStream result; \ - result << function(arg1.Get()); \ - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ - } \ - }; \ - \ +#define STRING_STREAM_NUM_FORMATTER_UDF(function, argType) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap)) { \ + TStringStream result; \ + result << function(args[0].Get()); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##function##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + TStringStream result; \ + result << function(arg1.Get()); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) -#define STRING_STREAM_TEXT_FORMATTER_UDF(function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap)) { \ - TStringStream result; \ - const TStringBuf input(args[0].AsStringRef()); \ - result << function(input); \ - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ - } \ - \ - struct T##function##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {\ - TStringStream result; \ - const TStringBuf input(arg1.AsStringRef()); \ - result << function(input); \ - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ - } \ - }; \ - \ +#define STRING_STREAM_TEXT_FORMATTER_UDF(function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap)) { \ + TStringStream result; \ + const TStringBuf input(args[0].AsStringRef()); \ + result << function(input); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##function##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + TStringStream result; \ + const TStringBuf input(arg1.AsStringRef()); \ + result << function(input); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) -#define STRING_STREAM_HRSZ_FORMATTER_UDF(udfName, hrSize) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ - TStringStream result; \ - result << HumanReadableSize(args[0].Get(), hrSize); \ - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ - } \ - \ - struct T##udfName##KernelExec \ - : public TUnaryKernelExec \ - { \ - template \ - static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {\ - TStringStream result; \ - result << HumanReadableSize(arg1.Get(), hrSize); \ - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ - } \ - }; \ - \ +#define STRING_STREAM_HRSZ_FORMATTER_UDF(udfName, hrSize) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap)) { \ + TStringStream result; \ + result << HumanReadableSize(args[0].Get(), hrSize); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec \ + { \ + template \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + TStringStream result; \ + result << HumanReadableSize(arg1.Get(), hrSize); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) #define STRING_UDF_MAP(XX) \ From 36cdbb4ecb00e88d3554f66d6c289be599e33504 Mon Sep 17 00:00:00 2001 From: Fiodar Miron Date: Wed, 10 Jul 2024 20:06:07 +0300 Subject: [PATCH 18/18] fix build in blocks_ut --- ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp index fa3374864c21..03b759de4f1c 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_blocks_ut.cpp @@ -16,7 +16,7 @@ BEGIN_SIMPLE_ARROW_UDF(TInc, i32(i32)) { struct TIncKernelExec : public NYql::NUdf::TUnaryKernelExec { template - static void Process(NYql::NUdf::TBlockItem arg, const NYql::NUdf::IValueBuilder& valueBuilder, const TSink& sink) { + static void Process(const NYql::NUdf::IValueBuilder* valueBuilder, NYql::NUdf::TBlockItem arg, const TSink& sink) { Y_UNUSED(valueBuilder); sink(NYql::NUdf::TBlockItem(arg.As() + 1)); }