Skip to content

Commit 24e9824

Browse files
fix trivial batch modification detector (#896)
1 parent 3175dd3 commit 24e9824

File tree

3 files changed

+11
-7
lines changed

3 files changed

+11
-7
lines changed

ydb/core/formats/arrow/arrow_helpers.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,10 @@ std::vector<std::shared_ptr<arrow::RecordBatch>> SliceSortedBatches(const std::v
363363
}
364364

365365
// Check if the permutation doesn't reorder anything
366-
bool IsNoOp(const arrow::UInt64Array& permutation) {
366+
bool IsTrivial(const arrow::UInt64Array& permutation, const ui64 originalLength) {
367+
if ((ui64)permutation.length() != originalLength) {
368+
return false;
369+
}
367370
for (i64 i = 0; i < permutation.length(); ++i) {
368371
if (permutation.Value(i) != (ui64)i) {
369372
return false;
@@ -376,7 +379,7 @@ std::shared_ptr<arrow::RecordBatch> Reorder(const std::shared_ptr<arrow::RecordB
376379
const std::shared_ptr<arrow::UInt64Array>& permutation, const bool canRemove) {
377380
Y_ABORT_UNLESS(permutation->length() == batch->num_rows() || canRemove);
378381

379-
auto res = IsNoOp(*permutation) ? batch : arrow::compute::Take(batch, permutation);
382+
auto res = IsTrivial(*permutation, batch->num_rows()) ? batch : arrow::compute::Take(batch, permutation);
380383
Y_ABORT_UNLESS(res.ok());
381384
return (*res).record_batch();
382385
}

ydb/core/formats/arrow/permutations.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,11 @@ std::shared_ptr<arrow::UInt64Array> MakePermutation(const int size, const bool r
4646
return out;
4747
}
4848

49-
std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch,
50-
const std::shared_ptr<arrow::Schema>& sortingKey, const bool andUnique) {
51-
auto keyBatch = ExtractColumns(batch, sortingKey);
49+
std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& sortingKey, const bool andUnique) {
50+
auto keyBatch = ExtractColumns(batch, sortingKey, false);
51+
AFL_VERIFY(batch);
52+
AFL_VERIFY(sortingKey);
53+
AFL_VERIFY(!!keyBatch)("problem", "cannot_find_columns")("schema", batch->schema()->ToString())("columns", sortingKey->ToString());
5254
auto keyColumns = std::make_shared<TArrayVec>(keyBatch->columns());
5355
std::vector<TRawReplaceKey> points;
5456
points.reserve(keyBatch->num_rows());

ydb/core/formats/arrow/permutations.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,7 @@ class TShardingSplitIndex {
140140
std::shared_ptr<arrow::UInt64Array> MakePermutation(const int size, const bool reverse = false);
141141
std::shared_ptr<arrow::UInt64Array> MakeFilterPermutation(const std::vector<ui64>& indexes);
142142
std::shared_ptr<arrow::UInt64Array> MakeFilterPermutation(const std::vector<ui32>& indexes);
143-
std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch,
144-
const std::shared_ptr<arrow::Schema>& sortingKey, const bool andUnique);
143+
std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& sortingKey, const bool andUnique);
145144
std::shared_ptr<arrow::RecordBatch> ReverseRecords(const std::shared_ptr<arrow::RecordBatch>& batch);
146145

147146
std::shared_ptr<arrow::Array> CopyRecords(const std::shared_ptr<arrow::Array>& source, const std::vector<ui64>& indexes);

0 commit comments

Comments
 (0)