Skip to content

Commit 7d062e7

Browse files
authored
Merge cf5260b into bf4f55b
2 parents bf4f55b + cf5260b commit 7d062e7

File tree

7 files changed

+215
-115
lines changed

7 files changed

+215
-115
lines changed

ydb/core/kqp/executer_actor/kqp_executer_impl.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1622,13 +1622,14 @@ class TKqpExecuterBase : public TActor<TDerived> {
16221622
THashMap<ui64, ui64> assignedShardsCount;
16231623
auto& stage = stageInfo.Meta.GetStage(stageInfo.Id);
16241624

1625+
auto& columnShardHashV1Params = stageInfo.Meta.ColumnShardHashV1Params;
16251626
if (enableShuffleElimination && stageInfo.Meta.ColumnTableInfoPtr) {
16261627
const auto& tableDesc = stageInfo.Meta.ColumnTableInfoPtr->Description;
1627-
stageInfo.Meta.SourceShardCount = tableDesc.GetColumnShardCount();
1628-
stageInfo.Meta.SourceTableKeyColumnTypes = std::make_shared<TVector<NScheme::TTypeInfo>>();
1628+
columnShardHashV1Params.SourceShardCount = tableDesc.GetColumnShardCount();
1629+
columnShardHashV1Params.SourceTableKeyColumnTypes = std::make_shared<TVector<NScheme::TTypeInfo>>();
16291630
for (const auto& column: tableDesc.GetSharding().GetHashSharding().GetColumns()) {
16301631
auto columnType = stageInfo.Meta.TableConstInfo->Columns.at(column).Type;
1631-
stageInfo.Meta.SourceTableKeyColumnTypes->push_back(columnType);
1632+
columnShardHashV1Params.SourceTableKeyColumnTypes->push_back(columnType);
16321633
}
16331634
}
16341635

@@ -1688,8 +1689,8 @@ class TKqpExecuterBase : public TActor<TDerived> {
16881689

16891690
} else if (enableShuffleElimination /* save partitioning for shuffle elimination */) {
16901691
std::size_t stageInternalTaskId = 0;
1691-
stageInfo.Meta.TaskIdByHash = std::make_shared<TVector<ui64>>();
1692-
stageInfo.Meta.TaskIdByHash->resize(stageInfo.Meta.SourceShardCount);
1692+
columnShardHashV1Params.TaskIdByHash = std::make_shared<TVector<ui64>>();
1693+
columnShardHashV1Params.TaskIdByHash->resize(columnShardHashV1Params.SourceShardCount);
16931694

16941695
for (auto&& pair : nodeShards) {
16951696
const auto nodeId = pair.first;
@@ -1739,7 +1740,7 @@ class TKqpExecuterBase : public TActor<TDerived> {
17391740

17401741
for (const auto& readInfo: *task.Meta.Reads) {
17411742
Y_ENSURE(hashByShardId.contains(readInfo.ShardId));
1742-
(*stageInfo.Meta.TaskIdByHash)[hashByShardId[readInfo.ShardId]] = stageInternalTaskId;
1743+
(*columnShardHashV1Params.TaskIdByHash)[hashByShardId[readInfo.ShardId]] = stageInternalTaskId;
17431744
}
17441745

17451746
}

ydb/core/kqp/executer_actor/kqp_tasks_graph.cpp

Lines changed: 88 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,16 @@ using namespace NYql::NDq;
2121
using namespace NYql::NNodes;
2222

2323

24+
TString KeyTypesToString(const TVector<NScheme::TTypeInfo>& keyColumnTypes) {
25+
TVector<TString> stringNames;
26+
stringNames.reserve(keyColumnTypes.size());
27+
for (const auto& keyColumnType: keyColumnTypes) {
28+
stringNames.push_back(NYql::NProto::TypeIds_Name(keyColumnType.GetTypeId()));
29+
}
30+
return "[" + JoinSeq(",", stringNames) + "]";
31+
};
32+
33+
2434
void LogStage(const NActors::TActorContext& ctx, const TStageInfo& stageInfo) {
2535
LOG_DEBUG_S(ctx, NKikimrServices::KQP_EXECUTER, stageInfo.DebugString());
2636
}
@@ -473,17 +483,15 @@ void BuildKqpStageChannels(TKqpTasksGraph& tasksGraph, TStageInfo& stageInfo,
473483
<< (spilling ? " with spilling" : " without spilling"));
474484
};
475485

476-
477486
bool hasMap = false;
478-
bool isFusedStage = (stageInfo.Meta.TaskIdByHash != nullptr);
487+
auto& columnShardHashV1Params = stageInfo.Meta.ColumnShardHashV1Params;
488+
bool isFusedStage = (columnShardHashV1Params.TaskIdByHash != nullptr);
479489
if (enableShuffleElimination && !isFusedStage) { // taskIdHash can be already set if it is a fused stage, so hashpartition will derive columnv1 parameters from there
480490
for (ui32 inputIndex = 0; inputIndex < stage.InputsSize(); ++inputIndex) {
481491
const auto& input = stage.GetInputs(inputIndex);
482-
auto& originStageInfo = tasksGraph.GetStageInfo(NYql::NDq::TStageId(stageInfo.Id.TxId, input.GetStageIndex()));
483-
stageInfo.Meta.TaskIdByHash = originStageInfo.Meta.TaskIdByHash;
484-
stageInfo.Meta.SourceShardCount = originStageInfo.Meta.SourceShardCount;
485-
stageInfo.Meta.SourceTableKeyColumnTypes = originStageInfo.Meta.SourceTableKeyColumnTypes;
486-
if (input.GetTypeCase() == NKqpProto::TKqpPhyConnection::kMap) {
492+
auto& originStageInfo = tasksGraph.GetStageInfo(NYql::NDq::TStageId(stageInfo.Id.TxId, input.GetStageIndex()));;
493+
columnShardHashV1Params = originStageInfo.Meta.ColumnShardHashV1Params;
494+
if (input.GetTypeCase() == NKqpProto::TKqpPhyConnection::kMap) {
487495
// We want to enforce sourceShardCount from map connection, cause it can be at most one map connection
488496
// and ColumnShardHash in Shuffle will use this parameter to shuffle on this map (same with taskIdByHash mapping)
489497
hasMap = true;
@@ -494,10 +502,10 @@ void BuildKqpStageChannels(TKqpTasksGraph& tasksGraph, TStageInfo& stageInfo,
494502

495503
// if it is stage, where we don't inherit parallelism.
496504
if (enableShuffleElimination && !hasMap && !isFusedStage && stageInfo.Tasks.size() > 0 && stage.InputsSize() > 0) {
497-
stageInfo.Meta.SourceShardCount = stageInfo.Tasks.size();
498-
stageInfo.Meta.TaskIdByHash = std::make_shared<TVector<ui64>>(stageInfo.Meta.SourceShardCount);
499-
for (std::size_t i = 0; i < stageInfo.Meta.SourceShardCount; ++i) {
500-
(*stageInfo.Meta.TaskIdByHash)[i] = i;
505+
columnShardHashV1Params.SourceShardCount = stageInfo.Tasks.size();
506+
columnShardHashV1Params.TaskIdByHash = std::make_shared<TVector<ui64>>(columnShardHashV1Params.SourceShardCount);
507+
for (std::size_t i = 0; i < columnShardHashV1Params.SourceShardCount; ++i) {
508+
(*columnShardHashV1Params.TaskIdByHash)[i] = i;
501509
}
502510

503511
for (auto& input : stage.GetInputs()) {
@@ -510,17 +518,17 @@ void BuildKqpStageChannels(TKqpTasksGraph& tasksGraph, TStageInfo& stageInfo,
510518
continue;
511519
}
512520

513-
Y_ENSURE(enableShuffleElimination, "OptShuffleElimination wasn't turned on, but ColumnShardHashV1 detected!");
514-
// ^ if the flag if false, and kColumnShardHashV1 detected - then the data which would be returned - would be incorrect,
521+
Y_ENSURE(enableShuffleElimination, "OptShuffleElimination wasn't turned on, but ColumnShardHashV1 detected!");
522+
// ^ if the flag if false, and kColumnShardHashV1 detected - then the data which would be returned - would be incorrect,
515523
// because we didn't save partitioning in the BuildScanTasksFromShards.
516524

517525
auto columnShardHashV1 = hashShuffle.GetColumnShardHashV1();
518-
stageInfo.Meta.SourceTableKeyColumnTypes = std::make_shared<TVector<NScheme::TTypeInfo>>();
519-
stageInfo.Meta.SourceTableKeyColumnTypes->reserve(columnShardHashV1.KeyColumnTypesSize());
526+
columnShardHashV1Params.SourceTableKeyColumnTypes = std::make_shared<TVector<NScheme::TTypeInfo>>();
527+
columnShardHashV1Params.SourceTableKeyColumnTypes->reserve(columnShardHashV1.KeyColumnTypesSize());
520528
for (const auto& keyColumnType: columnShardHashV1.GetKeyColumnTypes()) {
521529
auto typeId = static_cast<NScheme::TTypeId>(keyColumnType);
522530
auto typeInfo = NScheme::TTypeInfo{typeId};
523-
stageInfo.Meta.SourceTableKeyColumnTypes->push_back(typeInfo);
531+
columnShardHashV1Params.SourceTableKeyColumnTypes->push_back(typeInfo);
524532
}
525533
break;
526534
}
@@ -544,18 +552,49 @@ void BuildKqpStageChannels(TKqpTasksGraph& tasksGraph, TStageInfo& stageInfo,
544552
}
545553
case NKqpProto::TKqpPhyCnHashShuffle::kColumnShardHashV1: {
546554
Y_ENSURE(enableShuffleElimination, "OptShuffleElimination wasn't turned on, but ColumnShardHashV1 detected!");
547-
inputStageInfo.Meta.TaskIdByHash = stageInfo.Meta.TaskIdByHash;
548-
inputStageInfo.Meta.SourceShardCount = stageInfo.Meta.SourceShardCount;
549-
inputStageInfo.Meta.SourceTableKeyColumnTypes = stageInfo.Meta.SourceTableKeyColumnTypes;
555+
556+
LOG_DEBUG_S(
557+
*TlsActivationContext,
558+
NKikimrServices::KQP_EXECUTER,
559+
"Propogating columnhashv1 pararms to stage: "
560+
<< "[" << inputStageInfo.Id.TxId << ":" << inputStageInfo.Id.StageId << "]" << " "
561+
<< KeyTypesToString(*columnShardHashV1Params.SourceTableKeyColumnTypes) << " "
562+
<< "[" << JoinSeq(",", input.GetHashShuffle().GetKeyColumns()) << "]";
563+
);
564+
565+
Y_ENSURE(
566+
columnShardHashV1Params.SourceTableKeyColumnTypes->size() == input.GetHashShuffle().KeyColumnsSize(),
567+
TStringBuilder{}
568+
<< "Hashshuffle keycolumns and keytypes args count mismatch during executer stage, types: "
569+
<< KeyTypesToString(*columnShardHashV1Params.SourceTableKeyColumnTypes) << " for the columns: "
570+
<< "[" << JoinSeq(",", input.GetHashShuffle().GetKeyColumns()) << "]"
571+
);
572+
573+
for (auto& originTaskId : inputStageInfo.Tasks) {
574+
auto& originTask = tasksGraph.GetTask(originTaskId);
575+
auto& taskOutput = originTask.Outputs[outputIdx];
576+
taskOutput.Meta.ColumnShardHashV1Params = columnShardHashV1Params;
577+
}
578+
579+
inputStageInfo.Meta.ColumnShardHashV1Params = columnShardHashV1Params;
550580
hashKind = NHashKind::EColumnShardHashV1;
551581
break;
552582
}
553583
default: {
554584
Y_ENSURE(false, "undefined type of hash for shuffle");
555585
}
556586
}
557-
BuildHashShuffleChannels(tasksGraph, stageInfo, inputIdx, inputStageInfo, outputIdx,
558-
input.GetHashShuffle().GetKeyColumns(), enableSpilling, log, hashKind);
587+
BuildHashShuffleChannels(
588+
tasksGraph,
589+
stageInfo,
590+
inputIdx,
591+
inputStageInfo,
592+
outputIdx,
593+
input.GetHashShuffle().GetKeyColumns(),
594+
enableSpilling,
595+
log,
596+
hashKind
597+
);
559598
break;
560599
}
561600
case NKqpProto::TKqpPhyConnection::kBroadcast:
@@ -1036,7 +1075,7 @@ void FillTaskMeta(const TStageInfo& stageInfo, const TTask& task, NYql::NDqProto
10361075
private:
10371076
const TTableConstInfo& TableInfo;
10381077
public:
1039-
TResolverTable(const TTableConstInfo& tableInfo)
1078+
TResolverTable(const TTableConstInfo& tableInfo)
10401079
: TableInfo(tableInfo) {
10411080

10421081
}
@@ -1118,9 +1157,9 @@ void FillTaskMeta(const TStageInfo& stageInfo, const TTask& task, NYql::NDqProto
11181157
}
11191158

11201159
void FillOutputDesc(
1121-
const TKqpTasksGraph& tasksGraph,
1122-
NYql::NDqProto::TTaskOutput& outputDesc,
1123-
const TTaskOutput& output,
1160+
const TKqpTasksGraph& tasksGraph,
1161+
NYql::NDqProto::TTaskOutput& outputDesc,
1162+
const TTaskOutput& output,
11241163
bool enableSpilling,
11251164
const TStageInfo& stageInfo
11261165
) {
@@ -1143,19 +1182,37 @@ void FillOutputDesc(
11431182
break;
11441183
}
11451184
case NHashKind::EColumnShardHashV1: {
1146-
Y_ENSURE(stageInfo.Meta.SourceShardCount != 0, "ShardCount for ColumnShardHashV1 Shuffle can't be equal to 0");
1147-
Y_ENSURE(stageInfo.Meta.TaskIdByHash != nullptr, "TaskIdByHash for ColumnShardHashV1 wasn't propogated to this stage");
1148-
Y_ENSURE(stageInfo.Meta.SourceTableKeyColumnTypes != nullptr, "SourceTableKeyColumnTypes for ColumnShardHashV1 wasn't propogated to this stage");
1185+
auto& columnShardHashV1Params = output.Meta.ColumnShardHashV1Params;
1186+
LOG_DEBUG_S(
1187+
*TlsActivationContext,
1188+
NKikimrServices::KQP_EXECUTER,
1189+
"Filling columnshardhashv1 params for sending it to runtime: "
1190+
<< "[" << stageInfo.Id.TxId << ":" << stageInfo.Id.StageId << "]"
1191+
<< " " << KeyTypesToString(*columnShardHashV1Params.SourceTableKeyColumnTypes)
1192+
<< " for the columns: " << "[" << JoinSeq(",", output.KeyColumns) << "]"
1193+
);
1194+
Y_ENSURE(columnShardHashV1Params.SourceShardCount != 0, "ShardCount for ColumnShardHashV1 Shuffle can't be equal to 0");
1195+
Y_ENSURE(columnShardHashV1Params.TaskIdByHash != nullptr, "TaskIdByHash for ColumnShardHashV1 wasn't propogated to this stage");
1196+
Y_ENSURE(columnShardHashV1Params.SourceTableKeyColumnTypes != nullptr, "SourceTableKeyColumnTypes for ColumnShardHashV1 wasn't propogated to this stage");
1197+
1198+
Y_ENSURE(
1199+
columnShardHashV1Params.SourceTableKeyColumnTypes->size() == output.KeyColumns.size(),
1200+
TStringBuilder{}
1201+
<< "Hashshuffle keycolumns and keytypes args count mismatch during executer stage, types: "
1202+
<< KeyTypesToString(*columnShardHashV1Params.SourceTableKeyColumnTypes) << " for the columns: "
1203+
<< "[" << JoinSeq(",", output.KeyColumns) << "]"
1204+
);
1205+
11491206
auto& columnShardHashV1 = *hashPartitionDesc.MutableColumnShardHashV1();
1150-
columnShardHashV1.SetShardCount(stageInfo.Meta.SourceShardCount);
1207+
columnShardHashV1.SetShardCount(columnShardHashV1Params.SourceShardCount);
11511208

11521209
auto* columnTypes = columnShardHashV1.MutableKeyColumnTypes();
1153-
for (const auto& type: *stageInfo.Meta.SourceTableKeyColumnTypes) {
1210+
for (const auto& type: *columnShardHashV1Params.SourceTableKeyColumnTypes) {
11541211
columnTypes->Add(type.GetTypeId());
11551212
}
11561213

11571214
auto* taskIdByHash = columnShardHashV1.MutableTaskIdByHash();
1158-
for (std::size_t taskID: *stageInfo.Meta.TaskIdByHash) {
1215+
for (std::size_t taskID: *columnShardHashV1Params.TaskIdByHash) {
11591216
taskIdByHash->Add(taskID);
11601217
}
11611218
break;

ydb/core/kqp/executer_actor/kqp_tasks_graph.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ struct TTransaction : private TMoveOnly {
2929
, Params(std::move(params)) {}
3030
};
3131

32+
struct TColumnShardHashV1Params {
33+
ui64 SourceShardCount = 0;
34+
std::shared_ptr<TVector<NScheme::TTypeInfo>> SourceTableKeyColumnTypes = nullptr;
35+
std::shared_ptr<TVector<ui64>> TaskIdByHash = nullptr; // hash belongs [0; ShardCount]
36+
};
37+
3238
struct TStageInfoMeta {
3339
const IKqpGateway::TPhysicalTxData& Tx;
3440

@@ -44,11 +50,7 @@ struct TStageInfoMeta {
4450
THolder<TKeyDesc> ShardKey;
4551
NSchemeCache::TSchemeCacheRequest::EKind ShardKind = NSchemeCache::TSchemeCacheRequest::EKind::KindUnknown;
4652

47-
// used for ColumnV1Hashing
48-
ui64 SourceShardCount = 0;
49-
std::shared_ptr<TVector<NScheme::TTypeInfo>> SourceTableKeyColumnTypes = nullptr;
50-
std::shared_ptr<TVector<ui64>> TaskIdByHash = nullptr; // hash belongs [0; ShardCount]
51-
//
53+
TColumnShardHashV1Params ColumnShardHashV1Params{};
5254

5355
const NKqpProto::TKqpPhyStage& GetStage(const size_t idx) const {
5456
auto& txBody = Tx.Body;
@@ -148,6 +150,7 @@ struct TTaskInputMeta {
148150
struct TTaskOutputMeta {
149151
NKikimrKqp::TKqpTableSinkSettings* SinkSettings = nullptr;
150152
THashMap<ui64, const TKeyDesc::TPartitionInfo*> ShardPartitions;
153+
TColumnShardHashV1Params ColumnShardHashV1Params;
151154
};
152155

153156
struct TShardKeyRanges {
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
PRAGMA ydb.OptShuffleElimination = 'true';
2+
#!/bin/bash
3+
4+
DIRECTORY="./" # Замените на путь к вашей директории
5+
6+
for FILE in "$DIRECTORY"/*; do
7+
if [ -f "$FILE" ]; then # Проверяем, что это файл
8+
{ echo "PRAGMA ydb.OptShuffleElimination = 'true';"; cat "$FILE"; } > "$FILE.tmp" && mv "$FILE.tmp" "$FILE"
9+
fi
10+
done

0 commit comments

Comments
 (0)