|
4 | 4 | #include <ydb/core/formats/arrow/arrow_helpers.h>
|
5 | 5 |
|
6 | 6 | #include <ydb/library/accessor/accessor.h>
|
| 7 | +#include <ydb/library/accessor/validator.h> |
7 | 8 | #include <ydb/library/actors/core/log.h>
|
8 | 9 |
|
9 | 10 | #include <contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.h>
|
@@ -34,65 +35,68 @@ class IChunkedArray {
|
34 | 35 | SparsedArray
|
35 | 36 | };
|
36 | 37 |
|
37 |
| - class TCurrentArrayAddress { |
| 38 | + class TCommonChunkAddress { |
38 | 39 | private:
|
39 |
| - YDB_READONLY_DEF(std::shared_ptr<IChunkedArray>, Array); |
40 | 40 | YDB_READONLY(ui64, StartPosition, 0);
|
41 | 41 | YDB_READONLY(ui64, FinishPosition, 0);
|
42 | 42 | YDB_READONLY(ui64, ChunkIndex, 0);
|
43 | 43 |
|
44 | 44 | public:
|
45 |
| - TCurrentArrayAddress(const std::shared_ptr<IChunkedArray>& arr, const ui32 pos, const ui32 idx) |
46 |
| - : Array(arr) |
47 |
| - , StartPosition(pos) |
48 |
| - , FinishPosition(pos + arr->GetRecordsCount()) |
49 |
| - , ChunkIndex(idx) { |
50 |
| - AFL_VERIFY(arr); |
51 |
| - AFL_VERIFY(arr->GetRecordsCount()); |
52 |
| - } |
53 |
| - |
54 | 45 | TString DebugString() const {
|
55 | 46 | return TStringBuilder() << "start=" << StartPosition << ";"
|
56 | 47 | << "chunk_index=" << ChunkIndex << ";"
|
57 |
| - << "length=" << Array->GetRecordsCount() << ";"; |
| 48 | + << "finish=" << FinishPosition << ";" |
| 49 | + << "size=" << FinishPosition - StartPosition << ";" |
| 50 | + ; |
58 | 51 | }
|
59 |
| - }; |
60 |
| - |
61 |
| - class TCurrentChunkAddress { |
62 |
| - private: |
63 |
| - YDB_READONLY_DEF(std::shared_ptr<arrow::Array>, Array); |
64 |
| - YDB_READONLY(ui64, StartPosition, 0); |
65 |
| - YDB_READONLY(ui64, FinishPosition, 0); |
66 |
| - YDB_READONLY(ui64, ChunkIndex, 0); |
67 |
| - |
68 |
| - public: |
69 |
| - TString DebugString(const ui64 position) const; |
70 | 52 |
|
71 | 53 | ui64 GetLength() const {
|
72 |
| - return Array->length(); |
| 54 | + return FinishPosition - StartPosition; |
73 | 55 | }
|
74 | 56 |
|
75 | 57 | bool Contains(const ui64 position) const {
|
76 | 58 | return position >= StartPosition && position < FinishPosition;
|
77 | 59 | }
|
78 | 60 |
|
| 61 | + TCommonChunkAddress(const ui64 start, const ui64 finish, const ui64 index) |
| 62 | + : StartPosition(start) |
| 63 | + , FinishPosition(finish) |
| 64 | + , ChunkIndex(index) { |
| 65 | + AFL_VERIFY(FinishPosition > StartPosition); |
| 66 | + } |
| 67 | + }; |
| 68 | + |
| 69 | + class TCurrentArrayAddress: public TCommonChunkAddress { |
| 70 | + private: |
| 71 | + YDB_READONLY_DEF(std::shared_ptr<IChunkedArray>, Array); |
| 72 | + |
| 73 | + public: |
| 74 | + TCurrentArrayAddress(const std::shared_ptr<IChunkedArray>& arr, const ui32 pos, const ui32 idx) |
| 75 | + : TCommonChunkAddress(pos, pos + TValidator::CheckNotNull(arr)->GetRecordsCount(), idx) |
| 76 | + , Array(arr) { |
| 77 | + AFL_VERIFY(Array); |
| 78 | + AFL_VERIFY(Array->GetRecordsCount()); |
| 79 | + } |
| 80 | + }; |
| 81 | + |
| 82 | + class TCurrentChunkAddress: public TCommonChunkAddress { |
| 83 | + private: |
| 84 | + using TBase = TCommonChunkAddress; |
| 85 | + YDB_READONLY_DEF(std::shared_ptr<arrow::Array>, Array); |
| 86 | + |
| 87 | + public: |
| 88 | + using TBase::DebugString; |
| 89 | + TString DebugString(const ui64 position) const; |
| 90 | + |
79 | 91 | std::shared_ptr<arrow::Array> CopyRecord(const ui64 recordIndex) const;
|
80 | 92 |
|
81 | 93 | std::partial_ordering Compare(const ui64 position, const TCurrentChunkAddress& item, const ui64 itemPosition) const;
|
82 | 94 |
|
83 | 95 | TCurrentChunkAddress(const std::shared_ptr<arrow::Array>& arr, const ui64 pos, const ui32 chunkIdx)
|
84 |
| - : Array(arr) |
85 |
| - , StartPosition(pos) |
86 |
| - , ChunkIndex(chunkIdx) { |
87 |
| - AFL_VERIFY(arr); |
88 |
| - AFL_VERIFY(arr->length()); |
89 |
| - FinishPosition = StartPosition + arr->length(); |
90 |
| - } |
91 |
| - |
92 |
| - TString DebugString() const { |
93 |
| - return TStringBuilder() << "start=" << StartPosition << ";" |
94 |
| - << "chunk_index=" << ChunkIndex << ";" |
95 |
| - << "length=" << Array->length() << ";"; |
| 96 | + : TCommonChunkAddress(pos, pos + TValidator::CheckNotNull(arr)->length(), chunkIdx) |
| 97 | + , Array(arr) { |
| 98 | + AFL_VERIFY(Array); |
| 99 | + AFL_VERIFY(Array->length()); |
96 | 100 | }
|
97 | 101 | };
|
98 | 102 |
|
@@ -143,7 +147,8 @@ class IChunkedArray {
|
143 | 147 | ui64 idx = 0;
|
144 | 148 | if (chunkCurrent) {
|
145 | 149 | if (position < chunkCurrent->GetFinishPosition()) {
|
146 |
| - return accessor.OnArray(chunkCurrent->GetChunkIndex(), chunkCurrent->GetStartPosition(), position - chunkCurrent->GetStartPosition()); |
| 150 | + return accessor.OnArray( |
| 151 | + chunkCurrent->GetChunkIndex(), chunkCurrent->GetStartPosition(), position - chunkCurrent->GetStartPosition()); |
147 | 152 | }
|
148 | 153 | AFL_VERIFY(chunkCurrent->GetChunkIndex() < accessor.GetChunksCount());
|
149 | 154 | startIndex = chunkCurrent->GetChunkIndex();
|
@@ -239,7 +244,8 @@ class IChunkedArray {
|
239 | 244 |
|
240 | 245 | std::shared_ptr<arrow::ChunkedArray> Slice(const ui32 offset, const ui32 count) const;
|
241 | 246 |
|
242 |
| - TCurrentArrayAddress GetArray(const std::optional<TCurrentArrayAddress>& chunkCurrent, const ui64 position, const std::shared_ptr<IChunkedArray>& selfPtr) const { |
| 247 | + TCurrentArrayAddress GetArray( |
| 248 | + const std::optional<TCurrentArrayAddress>& chunkCurrent, const ui64 position, const std::shared_ptr<IChunkedArray>& selfPtr) const { |
243 | 249 | AFL_VERIFY(position < GetRecordsCount());
|
244 | 250 | return DoGetArray(chunkCurrent, position, selfPtr);
|
245 | 251 | }
|
|
0 commit comments