From d7266522eb5220999e915ccab5b2480400d3ba8e Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Tue, 9 Jan 2024 19:52:32 +0300 Subject: [PATCH 1/2] fix trivial detector --- ydb/core/formats/arrow/arrow_helpers.cpp | 7 +++++-- ydb/core/formats/arrow/permutations.cpp | 4 ++-- ydb/core/formats/arrow/permutations.h | 3 +-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/ydb/core/formats/arrow/arrow_helpers.cpp b/ydb/core/formats/arrow/arrow_helpers.cpp index c66abcf7a566..eac6a6670e00 100644 --- a/ydb/core/formats/arrow/arrow_helpers.cpp +++ b/ydb/core/formats/arrow/arrow_helpers.cpp @@ -363,7 +363,10 @@ std::vector> SliceSortedBatches(const std::v } // Check if the permutation doesn't reorder anything -bool IsNoOp(const arrow::UInt64Array& permutation) { +bool IsTrivial(const arrow::UInt64Array& permutation, const ui64 originalLength) { + if ((ui64)permutation.length() != originalLength) { + return false; + } for (i64 i = 0; i < permutation.length(); ++i) { if (permutation.Value(i) != (ui64)i) { return false; @@ -376,7 +379,7 @@ std::shared_ptr Reorder(const std::shared_ptr& permutation, const bool canRemove) { Y_ABORT_UNLESS(permutation->length() == batch->num_rows() || canRemove); - auto res = IsNoOp(*permutation) ? batch : arrow::compute::Take(batch, permutation); + auto res = IsTrivial(*permutation, batch->num_rows()) ? batch : arrow::compute::Take(batch, permutation); Y_ABORT_UNLESS(res.ok()); return (*res).record_batch(); } diff --git a/ydb/core/formats/arrow/permutations.cpp b/ydb/core/formats/arrow/permutations.cpp index a2f13517f71b..a348cdfdd991 100644 --- a/ydb/core/formats/arrow/permutations.cpp +++ b/ydb/core/formats/arrow/permutations.cpp @@ -46,9 +46,9 @@ std::shared_ptr MakePermutation(const int size, const bool r return out; } -std::shared_ptr MakeSortPermutation(const std::shared_ptr& batch, - const std::shared_ptr& sortingKey, const bool andUnique) { +std::shared_ptr MakeSortPermutation(const std::shared_ptr& batch, const std::vector& sortingKey, const bool andUnique) { auto keyBatch = ExtractColumns(batch, sortingKey); + AFL_VERIFY(!!keyBatch)("problem", "cannot_find_columns")("schema", batch->schema()->ToString())("columns", sortingKey); auto keyColumns = std::make_shared(keyBatch->columns()); std::vector points; points.reserve(keyBatch->num_rows()); diff --git a/ydb/core/formats/arrow/permutations.h b/ydb/core/formats/arrow/permutations.h index b451aea7884a..2635f26dc2a5 100644 --- a/ydb/core/formats/arrow/permutations.h +++ b/ydb/core/formats/arrow/permutations.h @@ -140,8 +140,7 @@ class TShardingSplitIndex { std::shared_ptr MakePermutation(const int size, const bool reverse = false); std::shared_ptr MakeFilterPermutation(const std::vector& indexes); std::shared_ptr MakeFilterPermutation(const std::vector& indexes); -std::shared_ptr MakeSortPermutation(const std::shared_ptr& batch, - const std::shared_ptr& sortingKey, const bool andUnique); +std::shared_ptr MakeSortPermutation(const std::shared_ptr& batch, const std::vector& sortingKey, const bool andUnique); std::shared_ptr ReverseRecords(const std::shared_ptr& batch); std::shared_ptr CopyRecords(const std::shared_ptr& source, const std::vector& indexes); From 7cc6f14a511d101316d0087afa71e52e1f1f7998 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Tue, 9 Jan 2024 20:37:15 +0300 Subject: [PATCH 2/2] fix --- ydb/core/formats/arrow/permutations.cpp | 8 +++++--- ydb/core/formats/arrow/permutations.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/ydb/core/formats/arrow/permutations.cpp b/ydb/core/formats/arrow/permutations.cpp index a348cdfdd991..6f38f9fb0050 100644 --- a/ydb/core/formats/arrow/permutations.cpp +++ b/ydb/core/formats/arrow/permutations.cpp @@ -46,9 +46,11 @@ std::shared_ptr MakePermutation(const int size, const bool r return out; } -std::shared_ptr MakeSortPermutation(const std::shared_ptr& batch, const std::vector& sortingKey, const bool andUnique) { - auto keyBatch = ExtractColumns(batch, sortingKey); - AFL_VERIFY(!!keyBatch)("problem", "cannot_find_columns")("schema", batch->schema()->ToString())("columns", sortingKey); +std::shared_ptr MakeSortPermutation(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, const bool andUnique) { + auto keyBatch = ExtractColumns(batch, sortingKey, false); + AFL_VERIFY(batch); + AFL_VERIFY(sortingKey); + AFL_VERIFY(!!keyBatch)("problem", "cannot_find_columns")("schema", batch->schema()->ToString())("columns", sortingKey->ToString()); auto keyColumns = std::make_shared(keyBatch->columns()); std::vector points; points.reserve(keyBatch->num_rows()); diff --git a/ydb/core/formats/arrow/permutations.h b/ydb/core/formats/arrow/permutations.h index 2635f26dc2a5..584db8350888 100644 --- a/ydb/core/formats/arrow/permutations.h +++ b/ydb/core/formats/arrow/permutations.h @@ -140,7 +140,7 @@ class TShardingSplitIndex { std::shared_ptr MakePermutation(const int size, const bool reverse = false); std::shared_ptr MakeFilterPermutation(const std::vector& indexes); std::shared_ptr MakeFilterPermutation(const std::vector& indexes); -std::shared_ptr MakeSortPermutation(const std::shared_ptr& batch, const std::vector& sortingKey, const bool andUnique); +std::shared_ptr MakeSortPermutation(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, const bool andUnique); std::shared_ptr ReverseRecords(const std::shared_ptr& batch); std::shared_ptr CopyRecords(const std::shared_ptr& source, const std::vector& indexes);