|
5 | 5 | #include <ydb/library/actors/core/log.h>
|
6 | 6 | #include <ydb/core/formats/arrow/permutations.h>
|
7 | 7 | #include <ydb/core/formats/arrow/arrow_helpers.h>
|
| 8 | +#include <ydb/core/formats/arrow/splitter/simple.h> |
| 9 | +#include <ydb/core/formats/arrow/save_load/saver.h> |
8 | 10 |
|
9 | 11 | namespace NKikimr::NArrow::NAccessor {
|
10 | 12 |
|
@@ -72,64 +74,32 @@ const std::partial_ordering IChunkedArray::TAddress::Compare(const TAddress& ite
|
72 | 74 | return TComparator::TypedCompare<true>(*Array, Position, *item.Array, item.Position);
|
73 | 75 | }
|
74 | 76 |
|
75 |
| -namespace { |
76 |
| -class TChunkAccessor { |
77 |
| -private: |
78 |
| - std::shared_ptr<arrow::ChunkedArray> ChunkedArray; |
79 |
| -public: |
80 |
| - TChunkAccessor(const std::shared_ptr<arrow::ChunkedArray>& chunkedArray) |
81 |
| - : ChunkedArray(chunkedArray) |
82 |
| - { |
83 |
| - |
84 |
| - } |
85 |
| - ui64 GetChunksCount() const { |
86 |
| - return (ui64)ChunkedArray->num_chunks(); |
87 |
| - } |
88 |
| - ui64 GetChunkLength(const ui32 idx) const { |
89 |
| - return (ui64)ChunkedArray->chunk(idx)->length(); |
90 |
| - } |
91 |
| - std::shared_ptr<arrow::Array> GetArray(const ui32 idx) const { |
92 |
| - return ChunkedArray->chunk(idx); |
93 |
| - } |
94 |
| -}; |
95 |
| - |
96 |
| -} |
97 |
| - |
98 |
| -std::optional<ui64> TTrivialArray::DoGetRawSize() const { |
99 |
| - return NArrow::GetArrayDataSize(Array); |
| 77 | + TChunkedArraySerialized::TChunkedArraySerialized(const std::shared_ptr<IChunkedArray>& array, const TString& serializedData) |
| 78 | + : Array(array) |
| 79 | + , SerializedData(serializedData) { |
| 80 | + AFL_VERIFY(serializedData); |
| 81 | + AFL_VERIFY(Array); |
| 82 | + AFL_VERIFY(Array->GetRecordsCount()); |
100 | 83 | }
|
101 | 84 |
|
102 | 85 | std::partial_ordering IChunkedArray::TCurrentChunkAddress::Compare(const ui64 position, const TCurrentChunkAddress& item, const ui64 itemPosition) const {
|
103 |
| - AFL_VERIFY(StartPosition <= position); |
104 |
| - AFL_VERIFY(position < FinishPosition); |
105 |
| - AFL_VERIFY(item.StartPosition <= itemPosition); |
106 |
| - AFL_VERIFY(itemPosition < item.FinishPosition); |
107 |
| - return TComparator::TypedCompare<true>(*Array, position - StartPosition, *item.Array, itemPosition - item.StartPosition); |
| 86 | + AFL_VERIFY(GetStartPosition() <= position)("pos", position)("start", GetStartPosition()); |
| 87 | + AFL_VERIFY(position < GetFinishPosition())("pos", position)("finish", GetFinishPosition()); |
| 88 | + AFL_VERIFY(item.GetStartPosition() <= itemPosition)("start", item.GetStartPosition())("item", itemPosition); |
| 89 | + AFL_VERIFY(itemPosition < item.GetFinishPosition())("item", itemPosition)("finish", item.GetFinishPosition()); |
| 90 | + return TComparator::TypedCompare<true>(*Array, position - GetStartPosition(), *item.Array, itemPosition - item.GetStartPosition()); |
108 | 91 | }
|
109 | 92 |
|
110 | 93 | std::shared_ptr<arrow::Array> IChunkedArray::TCurrentChunkAddress::CopyRecord(const ui64 recordIndex) const {
|
111 |
| - AFL_VERIFY(StartPosition <= recordIndex); |
112 |
| - AFL_VERIFY(recordIndex < FinishPosition); |
113 |
| - return NArrow::CopyRecords(Array, { recordIndex - StartPosition }); |
| 94 | + AFL_VERIFY(GetStartPosition() <= recordIndex); |
| 95 | + AFL_VERIFY(recordIndex < GetFinishPosition()); |
| 96 | + return NArrow::CopyRecords(Array, { recordIndex - GetStartPosition() }); |
114 | 97 | }
|
115 | 98 |
|
116 | 99 | TString IChunkedArray::TCurrentChunkAddress::DebugString(const ui64 position) const {
|
117 |
| - AFL_VERIFY(position < FinishPosition); |
118 |
| - AFL_VERIFY(StartPosition <= position); |
119 |
| - return NArrow::DebugString(Array, position - StartPosition); |
120 |
| -} |
121 |
| - |
122 |
| -IChunkedArray::TCurrentChunkAddress TTrivialChunkedArray::DoGetChunk(const std::optional<TCurrentChunkAddress>& chunkCurrent, const ui64 position) const { |
123 |
| - TChunkAccessor accessor(Array); |
124 |
| - return SelectChunk(chunkCurrent, position, accessor); |
125 |
| -} |
126 |
| - |
127 |
| -std::optional<ui64> TTrivialChunkedArray::DoGetRawSize() const { |
128 |
| - ui64 result = 0; |
129 |
| - for (auto&& i : Array->chunks()) { |
130 |
| - result += NArrow::GetArrayDataSize(i); |
131 |
| - } |
132 |
| - return result; |
| 100 | + AFL_VERIFY(position < GetFinishPosition()); |
| 101 | + AFL_VERIFY(GetStartPosition() <= position); |
| 102 | + return NArrow::DebugString(Array, position - GetStartPosition()); |
133 | 103 | }
|
134 | 104 |
|
135 | 105 | }
|
0 commit comments