@@ -21,6 +21,16 @@ using namespace NYql::NDq;
21
21
using namespace NYql ::NNodes;
22
22
23
23
24
+ TString KeyTypesToString (const TVector<NScheme::TTypeInfo>& keyColumnTypes) {
25
+ TVector<TString> stringNames;
26
+ stringNames.reserve (keyColumnTypes.size ());
27
+ for (const auto & keyColumnType: keyColumnTypes) {
28
+ stringNames.push_back (NYql::NProto::TypeIds_Name (keyColumnType.GetTypeId ()));
29
+ }
30
+ return " [" + JoinSeq (" ," , stringNames) + " ]" ;
31
+ };
32
+
33
+
24
34
void LogStage (const NActors::TActorContext& ctx, const TStageInfo& stageInfo) {
25
35
LOG_DEBUG_S (ctx, NKikimrServices::KQP_EXECUTER, stageInfo.DebugString ());
26
36
}
@@ -473,17 +483,16 @@ void BuildKqpStageChannels(TKqpTasksGraph& tasksGraph, TStageInfo& stageInfo,
473
483
<< (spilling ? " with spilling" : " without spilling" ));
474
484
};
475
485
476
-
477
486
bool hasMap = false ;
478
- bool isFusedStage = (stageInfo.Meta .TaskIdByHash != nullptr );
479
- if (enableShuffleElimination && !isFusedStage) { // taskIdHash can be already set if it is a fused stage, so hashpartition will derive columnv1 parameters from there
487
+ auto & columnShardHashV1Params = stageInfo.Meta .ColumnShardHashV1Params ;
488
+ bool isFusedWithScanStage = (stageInfo.Meta .TableConstInfo != nullptr );
489
+ if (enableShuffleElimination && !isFusedWithScanStage) { // taskIdHash can be already set if it is a fused stage, so hashpartition will derive columnv1 parameters from there
480
490
for (ui32 inputIndex = 0 ; inputIndex < stage.InputsSize (); ++inputIndex) {
481
491
const auto & input = stage.GetInputs (inputIndex);
482
492
auto & originStageInfo = tasksGraph.GetStageInfo (NYql::NDq::TStageId (stageInfo.Id .TxId , input.GetStageIndex ()));
483
- stageInfo.Meta .TaskIdByHash = originStageInfo.Meta .TaskIdByHash ;
484
- stageInfo.Meta .SourceShardCount = originStageInfo.Meta .SourceShardCount ;
485
- stageInfo.Meta .SourceTableKeyColumnTypes = originStageInfo.Meta .SourceTableKeyColumnTypes ;
486
- if (input.GetTypeCase () == NKqpProto::TKqpPhyConnection::kMap ) {
493
+ ui32 outputIdx = input.GetOutputIndex ();
494
+ columnShardHashV1Params = originStageInfo.Meta .GetColumnShardHashV1Params (outputIdx);
495
+ if (input.GetTypeCase () == NKqpProto::TKqpPhyConnection::kMap ) {
487
496
// We want to enforce sourceShardCount from map connection, cause it can be at most one map connection
488
497
// and ColumnShardHash in Shuffle will use this parameter to shuffle on this map (same with taskIdByHash mapping)
489
498
hasMap = true ;
@@ -493,11 +502,11 @@ void BuildKqpStageChannels(TKqpTasksGraph& tasksGraph, TStageInfo& stageInfo,
493
502
}
494
503
495
504
// if it is stage, where we don't inherit parallelism.
496
- if (enableShuffleElimination && !hasMap && !isFusedStage && stageInfo.Tasks .size () > 0 && stage.InputsSize () > 0 ) {
497
- stageInfo. Meta .SourceShardCount = stageInfo.Tasks .size ();
498
- stageInfo. Meta . TaskIdByHash = std::make_shared<TVector<ui64>>(stageInfo. Meta .SourceShardCount );
499
- for (std::size_t i = 0 ; i < stageInfo. Meta .SourceShardCount ; ++i) {
500
- (*stageInfo. Meta .TaskIdByHash )[i] = i;
505
+ if (enableShuffleElimination && !hasMap && !isFusedWithScanStage && stageInfo.Tasks .size () > 0 && stage.InputsSize () > 0 ) {
506
+ columnShardHashV1Params .SourceShardCount = stageInfo.Tasks .size ();
507
+ columnShardHashV1Params. TaskIdByHash = std::make_shared<TVector<ui64>>(columnShardHashV1Params .SourceShardCount );
508
+ for (std::size_t i = 0 ; i < columnShardHashV1Params .SourceShardCount ; ++i) {
509
+ (*columnShardHashV1Params .TaskIdByHash )[i] = i;
501
510
}
502
511
503
512
for (auto & input : stage.GetInputs ()) {
@@ -510,17 +519,17 @@ void BuildKqpStageChannels(TKqpTasksGraph& tasksGraph, TStageInfo& stageInfo,
510
519
continue ;
511
520
}
512
521
513
- Y_ENSURE (enableShuffleElimination, " OptShuffleElimination wasn't turned on, but ColumnShardHashV1 detected!" );
514
- // ^ if the flag if false, and kColumnShardHashV1 detected - then the data which would be returned - would be incorrect,
522
+ Y_ENSURE (enableShuffleElimination, " OptShuffleElimination wasn't turned on, but ColumnShardHashV1 detected!" );
523
+ // ^ if the flag if false, and kColumnShardHashV1 detected - then the data which would be returned - would be incorrect,
515
524
// because we didn't save partitioning in the BuildScanTasksFromShards.
516
525
517
526
auto columnShardHashV1 = hashShuffle.GetColumnShardHashV1 ();
518
- stageInfo. Meta .SourceTableKeyColumnTypes = std::make_shared<TVector<NScheme::TTypeInfo>>();
519
- stageInfo. Meta .SourceTableKeyColumnTypes ->reserve (columnShardHashV1.KeyColumnTypesSize ());
527
+ columnShardHashV1Params .SourceTableKeyColumnTypes = std::make_shared<TVector<NScheme::TTypeInfo>>();
528
+ columnShardHashV1Params .SourceTableKeyColumnTypes ->reserve (columnShardHashV1.KeyColumnTypesSize ());
520
529
for (const auto & keyColumnType: columnShardHashV1.GetKeyColumnTypes ()) {
521
530
auto typeId = static_cast <NScheme::TTypeId>(keyColumnType);
522
531
auto typeInfo = NScheme::TTypeInfo{typeId};
523
- stageInfo. Meta .SourceTableKeyColumnTypes ->push_back (typeInfo);
532
+ columnShardHashV1Params .SourceTableKeyColumnTypes ->push_back (typeInfo);
524
533
}
525
534
break ;
526
535
}
@@ -544,18 +553,43 @@ void BuildKqpStageChannels(TKqpTasksGraph& tasksGraph, TStageInfo& stageInfo,
544
553
}
545
554
case NKqpProto::TKqpPhyCnHashShuffle::kColumnShardHashV1 : {
546
555
Y_ENSURE (enableShuffleElimination, " OptShuffleElimination wasn't turned on, but ColumnShardHashV1 detected!" );
547
- inputStageInfo.Meta .TaskIdByHash = stageInfo.Meta .TaskIdByHash ;
548
- inputStageInfo.Meta .SourceShardCount = stageInfo.Meta .SourceShardCount ;
549
- inputStageInfo.Meta .SourceTableKeyColumnTypes = stageInfo.Meta .SourceTableKeyColumnTypes ;
556
+
557
+ LOG_DEBUG_S (
558
+ *TlsActivationContext,
559
+ NKikimrServices::KQP_EXECUTER,
560
+ " Propogating columnhashv1 pararms to stage"
561
+ << " [" << inputStageInfo.Id .TxId << " :" << inputStageInfo.Id .StageId << " ]" << " : "
562
+ << KeyTypesToString (*columnShardHashV1Params.SourceTableKeyColumnTypes ) << " "
563
+ << " [" << JoinSeq (" ," , input.GetHashShuffle ().GetKeyColumns ()) << " ]" ;
564
+ );
565
+
566
+ Y_ENSURE (
567
+ columnShardHashV1Params.SourceTableKeyColumnTypes ->size () == input.GetHashShuffle ().KeyColumnsSize (),
568
+ TStringBuilder{}
569
+ << " Hashshuffle keycolumns and keytypes args count mismatch during executer stage, types: "
570
+ << KeyTypesToString (*columnShardHashV1Params.SourceTableKeyColumnTypes ) << " for the columns: "
571
+ << " [" << JoinSeq (" ," , input.GetHashShuffle ().GetKeyColumns ()) << " ]"
572
+ );
573
+
574
+ inputStageInfo.Meta .HashParamsByOutput [outputIdx] = columnShardHashV1Params;
550
575
hashKind = NHashKind::EColumnShardHashV1;
551
576
break ;
552
577
}
553
578
default : {
554
579
Y_ENSURE (false , " undefined type of hash for shuffle" );
555
580
}
556
581
}
557
- BuildHashShuffleChannels (tasksGraph, stageInfo, inputIdx, inputStageInfo, outputIdx,
558
- input.GetHashShuffle ().GetKeyColumns (), enableSpilling, log , hashKind);
582
+ BuildHashShuffleChannels (
583
+ tasksGraph,
584
+ stageInfo,
585
+ inputIdx,
586
+ inputStageInfo,
587
+ outputIdx,
588
+ input.GetHashShuffle ().GetKeyColumns (),
589
+ enableSpilling,
590
+ log ,
591
+ hashKind
592
+ );
559
593
break ;
560
594
}
561
595
case NKqpProto::TKqpPhyConnection::kBroadcast :
@@ -1036,7 +1070,7 @@ void FillTaskMeta(const TStageInfo& stageInfo, const TTask& task, NYql::NDqProto
1036
1070
private:
1037
1071
const TTableConstInfo& TableInfo;
1038
1072
public:
1039
- TResolverTable (const TTableConstInfo& tableInfo)
1073
+ TResolverTable (const TTableConstInfo& tableInfo)
1040
1074
: TableInfo(tableInfo) {
1041
1075
1042
1076
}
@@ -1118,9 +1152,10 @@ void FillTaskMeta(const TStageInfo& stageInfo, const TTask& task, NYql::NDqProto
1118
1152
}
1119
1153
1120
1154
void FillOutputDesc (
1121
- const TKqpTasksGraph& tasksGraph,
1122
- NYql::NDqProto::TTaskOutput& outputDesc,
1123
- const TTaskOutput& output,
1155
+ const TKqpTasksGraph& tasksGraph,
1156
+ NYql::NDqProto::TTaskOutput& outputDesc,
1157
+ const TTaskOutput& output,
1158
+ ui32 outputIdx,
1124
1159
bool enableSpilling,
1125
1160
const TStageInfo& stageInfo
1126
1161
) {
@@ -1143,19 +1178,37 @@ void FillOutputDesc(
1143
1178
break ;
1144
1179
}
1145
1180
case NHashKind::EColumnShardHashV1: {
1146
- Y_ENSURE (stageInfo.Meta .SourceShardCount != 0 , " ShardCount for ColumnShardHashV1 Shuffle can't be equal to 0" );
1147
- Y_ENSURE (stageInfo.Meta .TaskIdByHash != nullptr , " TaskIdByHash for ColumnShardHashV1 wasn't propogated to this stage" );
1148
- Y_ENSURE (stageInfo.Meta .SourceTableKeyColumnTypes != nullptr , " SourceTableKeyColumnTypes for ColumnShardHashV1 wasn't propogated to this stage" );
1181
+ auto & columnShardHashV1Params = stageInfo.Meta .GetColumnShardHashV1Params (outputIdx);
1182
+ LOG_DEBUG_S (
1183
+ *TlsActivationContext,
1184
+ NKikimrServices::KQP_EXECUTER,
1185
+ " Filling columnshardhashv1 params for sending it to runtime "
1186
+ << " [" << stageInfo.Id .TxId << " :" << stageInfo.Id .StageId << " ]"
1187
+ << " : " << KeyTypesToString (*columnShardHashV1Params.SourceTableKeyColumnTypes )
1188
+ << " for the columns: " << " [" << JoinSeq (" ," , output.KeyColumns ) << " ]"
1189
+ );
1190
+ Y_ENSURE (columnShardHashV1Params.SourceShardCount != 0 , " ShardCount for ColumnShardHashV1 Shuffle can't be equal to 0" );
1191
+ Y_ENSURE (columnShardHashV1Params.TaskIdByHash != nullptr , " TaskIdByHash for ColumnShardHashV1 wasn't propogated to this stage" );
1192
+ Y_ENSURE (columnShardHashV1Params.SourceTableKeyColumnTypes != nullptr , " SourceTableKeyColumnTypes for ColumnShardHashV1 wasn't propogated to this stage" );
1193
+
1194
+ Y_ENSURE (
1195
+ columnShardHashV1Params.SourceTableKeyColumnTypes ->size () == output.KeyColumns .size (),
1196
+ TStringBuilder{}
1197
+ << " Hashshuffle keycolumns and keytypes args count mismatch during executer stage, types: "
1198
+ << KeyTypesToString (*columnShardHashV1Params.SourceTableKeyColumnTypes ) << " for the columns: "
1199
+ << " [" << JoinSeq (" ," , output.KeyColumns ) << " ]"
1200
+ );
1201
+
1149
1202
auto & columnShardHashV1 = *hashPartitionDesc.MutableColumnShardHashV1 ();
1150
- columnShardHashV1.SetShardCount (stageInfo. Meta .SourceShardCount );
1203
+ columnShardHashV1.SetShardCount (columnShardHashV1Params .SourceShardCount );
1151
1204
1152
1205
auto * columnTypes = columnShardHashV1.MutableKeyColumnTypes ();
1153
- for (const auto & type: *stageInfo. Meta .SourceTableKeyColumnTypes ) {
1206
+ for (const auto & type: *columnShardHashV1Params .SourceTableKeyColumnTypes ) {
1154
1207
columnTypes->Add (type.GetTypeId ());
1155
1208
}
1156
1209
1157
1210
auto * taskIdByHash = columnShardHashV1.MutableTaskIdByHash ();
1158
- for (std::size_t taskID: *stageInfo. Meta .TaskIdByHash ) {
1211
+ for (std::size_t taskID: *columnShardHashV1Params .TaskIdByHash ) {
1159
1212
taskIdByHash->Add (taskID);
1160
1213
}
1161
1214
break ;
@@ -1330,8 +1383,9 @@ void SerializeTaskToProto(
1330
1383
if (task.Outputs .size () > 1 ) {
1331
1384
enableSpilling = tasksGraph.GetMeta ().AllowWithSpilling ;
1332
1385
}
1333
- for (const auto & output : task.Outputs ) {
1334
- FillOutputDesc (tasksGraph, *result->AddOutputs (), output, enableSpilling, stageInfo);
1386
+ for (ui32 outputIdx = 0 ; outputIdx < task.Outputs .size (); ++outputIdx) {
1387
+ const auto & output = task.Outputs [outputIdx];
1388
+ FillOutputDesc (tasksGraph, *result->AddOutputs (), output, outputIdx, enableSpilling, stageInfo);
1335
1389
}
1336
1390
1337
1391
const NKqpProto::TKqpPhyStage& stage = stageInfo.Meta .GetStage (stageInfo.Id );
0 commit comments