Skip to content

Commit 5219891

Browse files
ivanmorozov333zverevgeny
authored andcommitted
dont create delete flag column in indexation (#7082)
1 parent c67e3c2 commit 5219891

File tree

8 files changed

+27
-14
lines changed

8 files changed

+27
-14
lines changed

ydb/core/kqp/ut/olap/helpers/typed_local.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class TTypedLocalHelper: public Tests::NCS::THelper {
7474
void GetCount(ui64& count);
7575

7676
template <class TFiller>
77-
void FillTable(const TFiller& fillPolicy, const ui32 pkKff = 0, const ui32 numRows = 800000) const {
77+
void FillTable(const TFiller& fillPolicy, const double pkKff = 0, const ui32 numRows = 800000) const {
7878
std::vector<NArrow::NConstruction::IArrayBuilder::TPtr> builders;
7979
builders.emplace_back(NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TIntSeqFiller<arrow::Int64Type>>::BuildNotNullable("pk_int", numRows * pkKff));
8080
builders.emplace_back(std::make_shared<NArrow::NConstruction::TSimpleArrayConstructor<TFiller>>("field", fillPolicy));

ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ using namespace NYdb::NTable;
1212
Y_UNIT_TEST_SUITE(KqpOlapStats) {
1313
constexpr size_t inserted_rows = 1000;
1414
constexpr size_t tables_in_store = 1000;
15-
constexpr size_t size_single_table = 13352;
15+
constexpr size_t size_single_table = 13152;
1616

1717
const TVector<TTestHelper::TColumnSchema> schema = {
1818
TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false),

ydb/core/kqp/ut/olap/sys_view_ut.cpp

+7-4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "helpers/get_value.h"
66

77
#include <library/cpp/testing/unittest/registar.h>
8+
#include <ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h>
89
#include <ydb/core/tx/columnshard/hooks/testing/controller.h>
910
#include <ydb/core/tx/columnshard/test_helper/controllers.h>
1011

@@ -229,7 +230,9 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) {
229230
helper.CreateTestOlapTable();
230231
NArrow::NConstruction::TStringPoolFiller sPool(3, 52);
231232
helper.FillTable(sPool, 0, 800000);
232-
csController->WaitCompactions(TDuration::Seconds(10));
233+
csController->WaitCompactions(TDuration::Seconds(5));
234+
helper.FillTable(sPool, 0.5, 800000);
235+
csController->WaitCompactions(TDuration::Seconds(5));
233236

234237
helper.GetVolumes(rawBytes1, bytes1, false, {"new_column_ui64"});
235238
AFL_VERIFY(rawBytes1 == 0);
@@ -241,9 +244,9 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) {
241244
csController->WaitActualization(TDuration::Seconds(10));
242245
ui64 rawBytes2;
243246
ui64 bytes2;
244-
helper.GetVolumes(rawBytes2, bytes2, false, {"new_column_ui64"});
245-
AFL_VERIFY(rawBytes2 == 6500023)("real", rawBytes2);
246-
AFL_VERIFY(bytes2 == 38880)("b", bytes2);
247+
helper.GetVolumes(rawBytes2, bytes2, false, { "new_column_ui64", NOlap::IIndexInfo::SPEC_COL_DELETE_FLAG });
248+
AFL_VERIFY(rawBytes2 == 0)("real", rawBytes2);
249+
AFL_VERIFY(bytes2 == 0)("b", bytes2);
247250
}
248251
}
249252

ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@ std::vector<NKikimr::NOlap::TWritePortionInfoWithBlobsResult> TMerger::Execute(c
2020
arrow::FieldVector indexFields;
2121
indexFields.emplace_back(IColumnMerger::PortionIdField);
2222
indexFields.emplace_back(IColumnMerger::PortionRecordIndexField);
23-
IIndexInfo::AddSpecialFields(indexFields);
23+
if (resultFiltered->HasColumnId((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG)) {
24+
IIndexInfo::AddDeleteFields(indexFields);
25+
}
26+
IIndexInfo::AddSnapshotFields(indexFields);
2427
auto dataSchema = std::make_shared<arrow::Schema>(indexFields);
2528
NArrow::NMerger::TMergePartialStream mergeStream(
2629
resultFiltered->GetIndexInfo().GetReplaceKey(), dataSchema, false, IIndexInfo::GetSnapshotColumnNames());
@@ -137,7 +140,7 @@ std::vector<NKikimr::NOlap::TWritePortionInfoWithBlobsResult> TMerger::Execute(c
137140
TGeneralSerializedSlice slice(dataWithSecondary.GetExternalData(), schemaDetails, Context.Counters.SplitterCounters);
138141

139142
auto b = batchResult->Slice(recordIdx, slice.GetRecordsCount());
140-
const ui32 deletionsCount = IIndexInfo::CalcDeletions(b, true);
143+
const ui32 deletionsCount = IIndexInfo::CalcDeletions(b, false);
141144
auto constructor = TWritePortionInfoWithBlobsConstructor::BuildByBlobs(slice.GroupChunksByBlobs(groups),
142145
dataWithSecondary.GetSecondaryInplaceData(), pathId, resultFiltered->GetVersion(), resultFiltered->GetSnapshot(),
143146
SaverContext.GetStoragesManager());

ydb/core/tx/columnshard/engines/changes/indexation.cpp

+8-3
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,11 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont
158158

159159
for (auto& inserted : DataToIndex) {
160160
auto blobSchema = context.SchemaVersions.GetSchemaVerified(inserted.GetSchemaVersion());
161-
std::vector<ui32> filteredIds = inserted.GetMeta().GetSchemaSubset().Apply(blobSchema->GetIndexInfo().GetColumnIds(true));
161+
std::vector<ui32> filteredIds = inserted.GetMeta().GetSchemaSubset().Apply(blobSchema->GetIndexInfo().GetColumnIds(false));
162162
usageColumnIds.insert(filteredIds.begin(), filteredIds.end());
163+
if (inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete) {
164+
usageColumnIds.emplace((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG);
165+
}
163166
if (usageColumnIds.size() == resultSchema->GetIndexInfo().GetColumnIds(true).size()) {
164167
break;
165168
}
@@ -179,8 +182,10 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont
179182
}
180183

181184
IIndexInfo::AddSnapshotColumns(*batch, inserted.GetSnapshot());
182-
IIndexInfo::AddDeleteFlagsColumn(*batch, inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete);
183-
usageColumnIds.insert(IIndexInfo::GetSystemColumnIds().begin(), IIndexInfo::GetSystemColumnIds().end());
185+
if (usageColumnIds.contains((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG)) {
186+
IIndexInfo::AddDeleteFlagsColumn(*batch, inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete);
187+
}
188+
usageColumnIds.insert(IIndexInfo::GetSnapshotColumnIds().begin(), IIndexInfo::GetSnapshotColumnIds().end());
184189

185190
batch = resultSchema->NormalizeBatch(*blobSchema, batch, usageColumnIds).DetachResult();
186191
pathBatches.Add(inserted, shardingFilterCommit, batch);

ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,6 @@ std::optional<TWritePortionInfoWithBlobsResult> TReadPortionInfoWithBlobs::SyncP
104104
std::vector<std::shared_ptr<IPortionDataChunk>> newChunks;
105105
if (it != columnChunks.end()) {
106106
newChunks = to->GetIndexInfo().ActualizeColumnData(it->second, from->GetIndexInfo(), i);
107-
} else {
108-
newChunks = to->GetIndexInfo().MakeEmptyChunks(i, pageSizes, to->GetIndexInfo().GetColumnFeaturesVerified(i));
109107
}
110108
AFL_VERIFY(entityChunksNew.emplace(i, std::move(newChunks)).second);
111109
}

ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h

+4
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ class IIndexInfo {
7777
fields.push_back(arrow::field(SPEC_COL_TX_ID, arrow::uint64()));
7878
}
7979

80+
static void AddDeleteFields(std::vector<std::shared_ptr<arrow::Field>>& fields) {
81+
fields.push_back(arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean()));
82+
}
83+
8084
static const std::set<ui32>& GetSnapshotColumnIdsSet() {
8185
static const std::set<ui32> result = { (ui32)ESpecialColumn::PLAN_STEP, (ui32)ESpecialColumn::TX_ID };
8286
return result;

ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) {
500500
ui64 txId = 1;
501501
auto selectInfo = engine.Select(paths[0], TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false));
502502
UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 1);
503-
UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK[0]->NumChunks(), columnIds.size() + TIndexInfo::GetSystemColumnNames().size());
503+
UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK[0]->NumChunks(), columnIds.size() + TIndexInfo::GetSnapshotColumnIdsSet().size());
504504
}
505505

506506
{ // select another pathId

0 commit comments

Comments
 (0)