@@ -489,17 +489,27 @@ void BuildKqpStageChannels(TKqpTasksGraph& tasksGraph, TStageInfo& stageInfo,
489
489
<< (spilling ? " with spilling" : " without spilling" ));
490
490
};
491
491
492
-
493
492
bool hasMap = false ;
494
- bool isFusedStage = (stageInfo.Meta .TaskIdByHash != nullptr );
495
- if (enableShuffleElimination && !isFusedStage) { // taskIdHash can be already set if it is a fused stage, so hashpartition will derive columnv1 parameters from there
493
+ auto & columnShardHashV1Params = stageInfo.Meta .ColumnShardHashV1Params ;
494
+ bool isFusedWithScanStage = (stageInfo.Meta .TableConstInfo != nullptr );
495
+ if (enableShuffleElimination && !isFusedWithScanStage) { // taskIdHash can be already set if it is a fused stage, so hashpartition will derive columnv1 parameters from there
496
496
for (ui32 inputIndex = 0 ; inputIndex < stage.InputsSize (); ++inputIndex) {
497
497
const auto & input = stage.GetInputs (inputIndex);
498
498
auto & originStageInfo = tasksGraph.GetStageInfo (NYql::NDq::TStageId (stageInfo.Id .TxId , input.GetStageIndex ()));
499
- stageInfo.Meta .TaskIdByHash = originStageInfo.Meta .TaskIdByHash ;
500
- stageInfo.Meta .SourceShardCount = originStageInfo.Meta .SourceShardCount ;
501
- stageInfo.Meta .SourceTableKeyColumnTypes = originStageInfo.Meta .SourceTableKeyColumnTypes ;
502
- if (input.GetTypeCase () == NKqpProto::TKqpPhyConnection::kMap ) {
499
+ ui32 outputIdx = input.GetOutputIndex ();
500
+ columnShardHashV1Params = originStageInfo.Meta .GetColumnShardHashV1Params (outputIdx);
501
+ if (input.GetTypeCase () == NKqpProto::TKqpPhyConnection::kMap || inputIndex == stage.InputsSize () - 1 ) { // this branch is only for logging purposes
502
+ LOG_DEBUG_S (
503
+ *TlsActivationContext,
504
+ NKikimrServices::KQP_EXECUTER,
505
+ " Chosed "
506
+ << " [" << originStageInfo.Id .TxId << " :" << originStageInfo.Id .StageId << " ]"
507
+ << " outputIdx: " << outputIdx << " to propogate through inputs stages of the stage "
508
+ << " [" << stageInfo.Id .TxId << " :" << stageInfo.Id .StageId << " ]" << " : "
509
+ << columnShardHashV1Params.KeyTypesToString ();
510
+ );
511
+ }
512
+ if (input.GetTypeCase () == NKqpProto::TKqpPhyConnection::kMap ) {
503
513
// We want to enforce sourceShardCount from map connection, cause it can be at most one map connection
504
514
// and ColumnShardHash in Shuffle will use this parameter to shuffle on this map (same with taskIdByHash mapping)
505
515
hasMap = true ;
@@ -509,11 +519,11 @@ void BuildKqpStageChannels(TKqpTasksGraph& tasksGraph, TStageInfo& stageInfo,
509
519
}
510
520
511
521
// if it is stage, where we don't inherit parallelism.
512
- if (enableShuffleElimination && !hasMap && !isFusedStage && stageInfo.Tasks .size () > 0 && stage.InputsSize () > 0 ) {
513
- stageInfo. Meta .SourceShardCount = stageInfo.Tasks .size ();
514
- stageInfo. Meta . TaskIdByHash = std::make_shared<TVector<ui64>>(stageInfo. Meta .SourceShardCount );
515
- for (std::size_t i = 0 ; i < stageInfo. Meta .SourceShardCount ; ++i) {
516
- (*stageInfo. Meta .TaskIdByHash )[i] = i;
522
+ if (enableShuffleElimination && !hasMap && !isFusedWithScanStage && stageInfo.Tasks .size () > 0 && stage.InputsSize () > 0 ) {
523
+ columnShardHashV1Params .SourceShardCount = stageInfo.Tasks .size ();
524
+ columnShardHashV1Params. TaskIdByHash = std::make_shared<TVector<ui64>>(columnShardHashV1Params .SourceShardCount );
525
+ for (std::size_t i = 0 ; i < columnShardHashV1Params .SourceShardCount ; ++i) {
526
+ (*columnShardHashV1Params .TaskIdByHash )[i] = i;
517
527
}
518
528
519
529
for (auto & input : stage.GetInputs ()) {
@@ -526,17 +536,17 @@ void BuildKqpStageChannels(TKqpTasksGraph& tasksGraph, TStageInfo& stageInfo,
526
536
continue ;
527
537
}
528
538
529
- Y_ENSURE (enableShuffleElimination, " OptShuffleElimination wasn't turned on, but ColumnShardHashV1 detected!" );
530
- // ^ if the flag if false, and kColumnShardHashV1 detected - then the data which would be returned - would be incorrect,
539
+ Y_ENSURE (enableShuffleElimination, " OptShuffleElimination wasn't turned on, but ColumnShardHashV1 detected!" );
540
+ // ^ if the flag if false, and kColumnShardHashV1 detected - then the data which would be returned - would be incorrect,
531
541
// because we didn't save partitioning in the BuildScanTasksFromShards.
532
542
533
543
auto columnShardHashV1 = hashShuffle.GetColumnShardHashV1 ();
534
- stageInfo. Meta .SourceTableKeyColumnTypes = std::make_shared<TVector<NScheme::TTypeInfo>>();
535
- stageInfo. Meta .SourceTableKeyColumnTypes ->reserve (columnShardHashV1.KeyColumnTypesSize ());
544
+ columnShardHashV1Params .SourceTableKeyColumnTypes = std::make_shared<TVector<NScheme::TTypeInfo>>();
545
+ columnShardHashV1Params .SourceTableKeyColumnTypes ->reserve (columnShardHashV1.KeyColumnTypesSize ());
536
546
for (const auto & keyColumnType: columnShardHashV1.GetKeyColumnTypes ()) {
537
547
auto typeId = static_cast <NScheme::TTypeId>(keyColumnType);
538
548
auto typeInfo = NScheme::TTypeInfo{typeId};
539
- stageInfo. Meta .SourceTableKeyColumnTypes ->push_back (typeInfo);
549
+ columnShardHashV1Params .SourceTableKeyColumnTypes ->push_back (typeInfo);
540
550
}
541
551
break ;
542
552
}
@@ -560,18 +570,44 @@ void BuildKqpStageChannels(TKqpTasksGraph& tasksGraph, TStageInfo& stageInfo,
560
570
}
561
571
case NKqpProto::TKqpPhyCnHashShuffle::kColumnShardHashV1 : {
562
572
Y_ENSURE (enableShuffleElimination, " OptShuffleElimination wasn't turned on, but ColumnShardHashV1 detected!" );
563
- inputStageInfo.Meta .TaskIdByHash = stageInfo.Meta .TaskIdByHash ;
564
- inputStageInfo.Meta .SourceShardCount = stageInfo.Meta .SourceShardCount ;
565
- inputStageInfo.Meta .SourceTableKeyColumnTypes = stageInfo.Meta .SourceTableKeyColumnTypes ;
573
+
574
+ LOG_DEBUG_S (
575
+ *TlsActivationContext,
576
+ NKikimrServices::KQP_EXECUTER,
577
+ " Propogating columnhashv1 pararms to stage"
578
+ << " [" << inputStageInfo.Id .TxId << " :" << inputStageInfo.Id .StageId << " ]" << " which is input of stage "
579
+ << " [" << stageInfo.Id .TxId << " :" << stageInfo.Id .StageId << " ]" << " : "
580
+ << columnShardHashV1Params.KeyTypesToString () << " "
581
+ << " [" << JoinSeq (" ," , input.GetHashShuffle ().GetKeyColumns ()) << " ]" ;
582
+ );
583
+
584
+ Y_ENSURE (
585
+ columnShardHashV1Params.SourceTableKeyColumnTypes ->size () == input.GetHashShuffle ().KeyColumnsSize (),
586
+ TStringBuilder{}
587
+ << " Hashshuffle keycolumns and keytypes args count mismatch during executer stage, types: "
588
+ << columnShardHashV1Params.KeyTypesToString () << " for the columns: "
589
+ << " [" << JoinSeq (" ," , input.GetHashShuffle ().GetKeyColumns ()) << " ]"
590
+ );
591
+
592
+ inputStageInfo.Meta .HashParamsByOutput [outputIdx] = columnShardHashV1Params;
566
593
hashKind = NHashKind::EColumnShardHashV1;
567
594
break ;
568
595
}
569
596
default : {
570
597
Y_ENSURE (false , " undefined type of hash for shuffle" );
571
598
}
572
599
}
573
- BuildHashShuffleChannels (tasksGraph, stageInfo, inputIdx, inputStageInfo, outputIdx,
574
- input.GetHashShuffle ().GetKeyColumns (), enableSpilling, log, hashKind);
600
+ BuildHashShuffleChannels (
601
+ tasksGraph,
602
+ stageInfo,
603
+ inputIdx,
604
+ inputStageInfo,
605
+ outputIdx,
606
+ input.GetHashShuffle ().GetKeyColumns (),
607
+ enableSpilling,
608
+ log,
609
+ hashKind
610
+ );
575
611
break ;
576
612
}
577
613
case NKqpProto::TKqpPhyConnection::kBroadcast :
@@ -1052,7 +1088,7 @@ void FillTaskMeta(const TStageInfo& stageInfo, const TTask& task, NYql::NDqProto
1052
1088
private:
1053
1089
const TTableConstInfo& TableInfo;
1054
1090
public:
1055
- TResolverTable (const TTableConstInfo& tableInfo)
1091
+ TResolverTable (const TTableConstInfo& tableInfo)
1056
1092
: TableInfo(tableInfo) {
1057
1093
1058
1094
}
@@ -1134,9 +1170,10 @@ void FillTaskMeta(const TStageInfo& stageInfo, const TTask& task, NYql::NDqProto
1134
1170
}
1135
1171
1136
1172
void FillOutputDesc (
1137
- const TKqpTasksGraph& tasksGraph,
1138
- NYql::NDqProto::TTaskOutput& outputDesc,
1139
- const TTaskOutput& output,
1173
+ const TKqpTasksGraph& tasksGraph,
1174
+ NYql::NDqProto::TTaskOutput& outputDesc,
1175
+ const TTaskOutput& output,
1176
+ ui32 outputIdx,
1140
1177
bool enableSpilling,
1141
1178
const TStageInfo& stageInfo
1142
1179
) {
@@ -1159,19 +1196,37 @@ void FillOutputDesc(
1159
1196
break ;
1160
1197
}
1161
1198
case NHashKind::EColumnShardHashV1: {
1162
- Y_ENSURE (stageInfo.Meta .SourceShardCount != 0 , " ShardCount for ColumnShardHashV1 Shuffle can't be equal to 0" );
1163
- Y_ENSURE (stageInfo.Meta .TaskIdByHash != nullptr , " TaskIdByHash for ColumnShardHashV1 wasn't propogated to this stage" );
1164
- Y_ENSURE (stageInfo.Meta .SourceTableKeyColumnTypes != nullptr , " SourceTableKeyColumnTypes for ColumnShardHashV1 wasn't propogated to this stage" );
1199
+ auto & columnShardHashV1Params = stageInfo.Meta .GetColumnShardHashV1Params (outputIdx);
1200
+ LOG_DEBUG_S (
1201
+ *TlsActivationContext,
1202
+ NKikimrServices::KQP_EXECUTER,
1203
+ " Filling columnshardhashv1 params for sending it to runtime "
1204
+ << " [" << stageInfo.Id .TxId << " :" << stageInfo.Id .StageId << " ]"
1205
+ << " : " << columnShardHashV1Params.KeyTypesToString ()
1206
+ << " for the columns: " << " [" << JoinSeq (" ," , output.KeyColumns ) << " ]"
1207
+ );
1208
+ Y_ENSURE (columnShardHashV1Params.SourceShardCount != 0 , " ShardCount for ColumnShardHashV1 Shuffle can't be equal to 0" );
1209
+ Y_ENSURE (columnShardHashV1Params.TaskIdByHash != nullptr , " TaskIdByHash for ColumnShardHashV1 wasn't propogated to this stage" );
1210
+ Y_ENSURE (columnShardHashV1Params.SourceTableKeyColumnTypes != nullptr , " SourceTableKeyColumnTypes for ColumnShardHashV1 wasn't propogated to this stage" );
1211
+
1212
+ Y_ENSURE (
1213
+ columnShardHashV1Params.SourceTableKeyColumnTypes ->size () == output.KeyColumns .size (),
1214
+ TStringBuilder{}
1215
+ << " Hashshuffle keycolumns and keytypes args count mismatch during executer FillOutputDesc stage, types: "
1216
+ << columnShardHashV1Params.KeyTypesToString () << " for the columns: "
1217
+ << " [" << JoinSeq (" ," , output.KeyColumns ) << " ]"
1218
+ );
1219
+
1165
1220
auto & columnShardHashV1 = *hashPartitionDesc.MutableColumnShardHashV1 ();
1166
- columnShardHashV1.SetShardCount (stageInfo. Meta .SourceShardCount );
1221
+ columnShardHashV1.SetShardCount (columnShardHashV1Params .SourceShardCount );
1167
1222
1168
1223
auto * columnTypes = columnShardHashV1.MutableKeyColumnTypes ();
1169
- for (const auto & type: *stageInfo. Meta .SourceTableKeyColumnTypes ) {
1224
+ for (const auto & type: *columnShardHashV1Params .SourceTableKeyColumnTypes ) {
1170
1225
columnTypes->Add (type.GetTypeId ());
1171
1226
}
1172
1227
1173
1228
auto * taskIdByHash = columnShardHashV1.MutableTaskIdByHash ();
1174
- for (std::size_t taskID: *stageInfo. Meta .TaskIdByHash ) {
1229
+ for (std::size_t taskID: *columnShardHashV1Params .TaskIdByHash ) {
1175
1230
taskIdByHash->Add (taskID);
1176
1231
}
1177
1232
break ;
@@ -1346,8 +1401,9 @@ void SerializeTaskToProto(
1346
1401
if (task.Outputs .size () > 1 ) {
1347
1402
enableSpilling = tasksGraph.GetMeta ().AllowWithSpilling ;
1348
1403
}
1349
- for (const auto & output : task.Outputs ) {
1350
- FillOutputDesc (tasksGraph, *result->AddOutputs (), output, enableSpilling, stageInfo);
1404
+ for (ui32 outputIdx = 0 ; outputIdx < task.Outputs .size (); ++outputIdx) {
1405
+ const auto & output = task.Outputs [outputIdx];
1406
+ FillOutputDesc (tasksGraph, *result->AddOutputs (), output, outputIdx, enableSpilling, stageInfo);
1351
1407
}
1352
1408
1353
1409
const NKqpProto::TKqpPhyStage& stage = stageInfo.Meta .GetStage (stageInfo.Id );
0 commit comments