|
10 | 10 |
|
11 | 11 | namespace NKikimr::NOlap::NReader::NCommon {
|
12 | 12 |
|
| 13 | +class IKernelFetchLogic { |
| 14 | +private: |
| 15 | + YDB_READONLY(ui32, ColumnId, 0); |
| 16 | + |
| 17 | + virtual void DoStart(const std::shared_ptr<NArrow::NAccessor::TAccessorsCollection>& resources, TReadActionsCollection& nextRead) = 0; |
| 18 | + virtual void DoOnDataReceived(TReadActionsCollection& nextRead, NBlobOperations::NRead::TCompositeReadBlobs& blobs) = 0; |
| 19 | + virtual void DoOnDataCollected(const std::shared_ptr<NArrow::NAccessor::TAccessorsCollection>& resources) = 0; |
| 20 | + |
| 21 | +protected: |
| 22 | + const std::shared_ptr<IDataSource> Source; |
| 23 | + |
| 24 | +public: |
| 25 | + using TFactory = NObjectFactory::TParametrizedObjectFactory<IKernelFetchLogic, TString, ui32, const std::shared_ptr<IDataSource>&>; |
| 26 | + |
| 27 | + virtual ~IKernelFetchLogic() = default; |
| 28 | + |
| 29 | + IKernelFetchLogic(const ui32 columnId, const std::shared_ptr<IDataSource>& source) |
| 30 | + : ColumnId(columnId) |
| 31 | + , Source(source) { |
| 32 | + } |
| 33 | + |
| 34 | + void Start(const std::shared_ptr<NArrow::NAccessor::TAccessorsCollection>& resources, TReadActionsCollection& nextRead) { |
| 35 | + DoStart(resources, nextRead); |
| 36 | + } |
| 37 | + void OnDataReceived(TReadActionsCollection& nextRead, NBlobOperations::NRead::TCompositeReadBlobs& blobs) { |
| 38 | + DoOnDataReceived(nextRead, blobs); |
| 39 | + } |
| 40 | + void OnDataCollected(const std::shared_ptr<NArrow::NAccessor::TAccessorsCollection>& resources) { |
| 41 | + DoOnDataCollected(resources); |
| 42 | + } |
| 43 | +}; |
| 44 | + |
| 45 | +class TChunkRestoreInfo { |
| 46 | +private: |
| 47 | + std::optional<TBlobRange> BlobRange; |
| 48 | + std::optional<TPortionDataAccessor::TAssembleBlobInfo> Data; |
| 49 | + const ui32 RecordsCount; |
| 50 | + |
| 51 | +public: |
| 52 | + TChunkRestoreInfo(const ui32 recordsCount, const TBlobRange& range) |
| 53 | + : BlobRange(range) |
| 54 | + , RecordsCount(recordsCount) |
| 55 | + { |
| 56 | + } |
| 57 | + |
| 58 | + const std::optional<TBlobRange>& GetBlobRangeOptional() const { |
| 59 | + return BlobRange; |
| 60 | + } |
| 61 | + |
| 62 | + TChunkRestoreInfo(const ui32 recordsCount, const TPortionDataAccessor::TAssembleBlobInfo& defaultData) |
| 63 | + : Data(defaultData) |
| 64 | + , RecordsCount(recordsCount) |
| 65 | + { |
| 66 | + } |
| 67 | + |
| 68 | + TPortionDataAccessor::TAssembleBlobInfo ExtractDataVerified() { |
| 69 | + AFL_VERIFY(!!Data); |
| 70 | + Data->SetExpectedRecordsCount(RecordsCount); |
| 71 | + return std::move(*Data); |
| 72 | + } |
| 73 | + |
| 74 | + void SetBlobData(const TString& data) { |
| 75 | + AFL_VERIFY(!Data); |
| 76 | + Data.emplace(data); |
| 77 | + } |
| 78 | +}; |
| 79 | + |
| 80 | +class TDefaultFetchLogic: public IKernelFetchLogic { |
| 81 | +private: |
| 82 | + using TBase = IKernelFetchLogic; |
| 83 | + static const inline auto Registrator = TFactory::TRegistrator<TDefaultFetchLogic>("default"); |
| 84 | + |
| 85 | + std::vector<TChunkRestoreInfo> ColumnChunks; |
| 86 | + std::optional<TString> StorageId; |
| 87 | + virtual void DoOnDataCollected(const std::shared_ptr<NArrow::NAccessor::TAccessorsCollection>& resources) override { |
| 88 | + AFL_VERIFY(!IIndexInfo::IsSpecialColumn(GetColumnId())); |
| 89 | + std::vector<TPortionDataAccessor::TAssembleBlobInfo> chunks; |
| 90 | + for (auto&& i : ColumnChunks) { |
| 91 | + chunks.emplace_back(i.ExtractDataVerified()); |
| 92 | + } |
| 93 | + |
| 94 | + TPortionDataAccessor::TPreparedColumn column(std::move(chunks), Source->GetSourceSchema()->GetColumnLoaderVerified(GetColumnId())); |
| 95 | + resources->AddVerified(GetColumnId(), column.AssembleAccessor().DetachResult(), true); |
| 96 | + } |
| 97 | + |
| 98 | + virtual void DoOnDataReceived(TReadActionsCollection& /*nextRead*/, NBlobOperations::NRead::TCompositeReadBlobs& blobs) override { |
| 99 | + if (ColumnChunks.empty()) { |
| 100 | + return; |
| 101 | + } |
| 102 | + for (auto&& i : ColumnChunks) { |
| 103 | + if (!i.GetBlobRangeOptional()) { |
| 104 | + continue; |
| 105 | + } |
| 106 | + AFL_VERIFY(!!StorageId); |
| 107 | + i.SetBlobData(blobs.Extract(*StorageId, *i.GetBlobRangeOptional())); |
| 108 | + } |
| 109 | + } |
| 110 | + |
| 111 | + virtual void DoStart(const std::shared_ptr<NArrow::NAccessor::TAccessorsCollection>& resources, TReadActionsCollection& nextRead) override { |
| 112 | + if (resources->HasColumn(GetColumnId())) { |
| 113 | + return; |
| 114 | + } |
| 115 | + auto columnChunks = Source->GetStageData().GetPortionAccessor().GetColumnChunksPointers(GetColumnId()); |
| 116 | + if (columnChunks.empty()) { |
| 117 | + ColumnChunks.emplace_back( |
| 118 | + Source->GetRecordsCount(), TPortionDataAccessor::TAssembleBlobInfo(Source->GetRecordsCount(), |
| 119 | + Source->GetSourceSchema()->GetExternalDefaultValueVerified(GetColumnId()))); |
| 120 | + return; |
| 121 | + } |
| 122 | + StorageId = Source->GetColumnStorageId(GetColumnId()); |
| 123 | + TBlobsAction blobsAction(Source->GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN); |
| 124 | + auto reading = blobsAction.GetReading(*StorageId); |
| 125 | + auto filterPtr = Source->GetStageData().GetAppliedFilter(); |
| 126 | + const NArrow::TColumnFilter& cFilter = filterPtr ? *filterPtr : NArrow::TColumnFilter::BuildAllowFilter(); |
| 127 | + auto itFilter = cFilter.GetIterator(false, Source->GetRecordsCount()); |
| 128 | + bool itFinished = false; |
| 129 | + for (auto&& c : columnChunks) { |
| 130 | + AFL_VERIFY(!itFinished); |
| 131 | + if (!itFilter.IsBatchForSkip(c->GetMeta().GetRecordsCount())) { |
| 132 | + reading->SetIsBackgroundProcess(false); |
| 133 | + reading->AddRange(Source->RestoreBlobRange(c->BlobRange)); |
| 134 | + ColumnChunks.emplace_back(c->GetMeta().GetRecordsCount(), Source->RestoreBlobRange(c->BlobRange)); |
| 135 | + } else { |
| 136 | + ColumnChunks.emplace_back(c->GetMeta().GetRecordsCount(), TPortionDataAccessor::TAssembleBlobInfo( |
| 137 | + c->GetMeta().GetRecordsCount(), Source->GetSourceSchema()->GetExternalDefaultValueVerified(c->GetColumnId()))); |
| 138 | + } |
| 139 | + itFinished = !itFilter.Next(c->GetMeta().GetRecordsCount()); |
| 140 | + } |
| 141 | + AFL_VERIFY(itFinished)("filter", itFilter.DebugString())("count", Source->GetRecordsCount()); |
| 142 | + for (auto&& i : blobsAction.GetReadingActions()) { |
| 143 | + nextRead.Add(i); |
| 144 | + } |
| 145 | + } |
| 146 | + |
| 147 | +public: |
| 148 | + TDefaultFetchLogic(const ui32 columnId, const std::shared_ptr<IDataSource>& source) |
| 149 | + : TBase(columnId, source) { |
| 150 | + } |
| 151 | +}; |
| 152 | + |
| 153 | +class TColumnsFetcherTask: public NBlobOperations::NRead::ITask, public NColumnShard::TMonitoringObjectsCounter<TColumnsFetcherTask> { |
| 154 | +private: |
| 155 | + using TBase = NBlobOperations::NRead::ITask; |
| 156 | + std::shared_ptr<IDataSource> Source; |
| 157 | + THashMap<ui32, std::shared_ptr<IKernelFetchLogic>> DataFetchers; |
| 158 | + TFetchingScriptCursor Cursor; |
| 159 | + NBlobOperations::NRead::TCompositeReadBlobs ProvidedBlobs; |
| 160 | + const NColumnShard::TCounterGuard Guard; |
| 161 | + virtual void DoOnDataReady(const std::shared_ptr<NResourceBroker::NSubscribe::TResourcesGuard>& resourcesGuard) override; |
| 162 | + virtual bool DoOnError(const TString& storageId, const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) override { |
| 163 | + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("error_on_blob_reading", range.ToString())( |
| 164 | + "scan_actor_id", Source->GetContext()->GetCommonContext()->GetScanActorId())("status", status.GetErrorMessage())( |
| 165 | + "status_code", status.GetStatus())("storage_id", storageId); |
| 166 | + NActors::TActorContext::AsActorContext().Send(Source->GetContext()->GetCommonContext()->GetScanActorId(), |
| 167 | + std::make_unique<NColumnShard::TEvPrivate::TEvTaskProcessedResult>( |
| 168 | + TConclusionStatus::Fail("cannot read blob range " + range.ToString()))); |
| 169 | + return false; |
| 170 | + } |
| 171 | + |
| 172 | +public: |
| 173 | + TColumnsFetcherTask(TReadActionsCollection&& actions, const THashMap<ui32, std::shared_ptr<IKernelFetchLogic>>& fetchers, |
| 174 | + const std::shared_ptr<IDataSource>& source, const TFetchingScriptCursor& cursor, const TString& taskCustomer, |
| 175 | + const TString& externalTaskId = "") |
| 176 | + : TBase(actions, taskCustomer, externalTaskId) |
| 177 | + , Source(source) |
| 178 | + , DataFetchers(fetchers) |
| 179 | + , Cursor(cursor) |
| 180 | + , Guard(Source->GetContext()->GetCommonContext()->GetCounters().GetFetchBlobsGuard()) |
| 181 | + { |
| 182 | + } |
| 183 | +}; |
| 184 | + |
13 | 185 | class TBlobsFetcherTask: public NBlobOperations::NRead::ITask, public NColumnShard::TMonitoringObjectsCounter<TBlobsFetcherTask> {
|
14 | 186 | private:
|
15 | 187 | using TBase = NBlobOperations::NRead::ITask;
|
|
0 commit comments