@@ -12,6 +12,33 @@ ui64 NonZeroMin(ui64 a, ui64 b) {
12
12
return (b == 0 ) ? a : ((a == 0 || a > b) ? b : a);
13
13
}
14
14
15
+ ui64 ExportMinStats (std::vector<ui64>& data);
16
+ ui64 ExportMaxStats (std::vector<ui64>& data);
17
+
18
+ void TMinStats::Resize (ui32 count) {
19
+ Values.resize (count);
20
+ }
21
+
22
+ void TMinStats::Set (ui32 index, ui64 value) {
23
+ Y_ASSERT (index < Values.size ());
24
+ auto maybeMin = Values[index ] == MinValue;
25
+ Values[index ] = value;
26
+ if (maybeMin) {
27
+ MinValue = ExportMinStats (Values);
28
+ }
29
+ }
30
+
31
+ void TMaxStats::Resize (ui32 count) {
32
+ Values.resize (count);
33
+ }
34
+
35
+ void TMaxStats::Set (ui32 index, ui64 value) {
36
+ Y_ASSERT (index < Values.size ());
37
+ auto isMonotonic = value >= Values[index ];
38
+ Values[index ] = value;
39
+ MaxValue = isMonotonic ? (value > MaxValue ? value : MaxValue) : ExportMaxStats (Values);
40
+ }
41
+
15
42
void TTimeSeriesStats::ExportAggStats (NYql::NDqProto::TDqStatsAggr& stats) {
16
43
NKikimr::NKqp::ExportAggStats (Values, stats);
17
44
}
@@ -272,6 +299,8 @@ void TStageExecutionStats::Resize(ui32 taskCount) {
272
299
273
300
WaitInputTimeUs.Resize (taskCount);
274
301
WaitOutputTimeUs.Resize (taskCount);
302
+ CurrentWaitInputTimeUs.Resize (taskCount);
303
+ CurrentWaitOutputTimeUs.Resize (taskCount);
275
304
276
305
SpillingComputeBytes.Resize (taskCount);
277
306
SpillingChannelBytes.Resize (taskCount);
@@ -456,6 +485,8 @@ ui64 TStageExecutionStats::UpdateStats(const NYql::NDqProto::TDqTaskStats& taskS
456
485
SetNonZero (DurationUs, index , durationUs);
457
486
WaitInputTimeUs.SetNonZero (index , taskStats.GetWaitInputTimeUs ());
458
487
WaitOutputTimeUs.SetNonZero (index , taskStats.GetWaitOutputTimeUs ());
488
+ CurrentWaitInputTimeUs.Set (index , taskStats.GetCurrentWaitInputTimeUs ());
489
+ CurrentWaitOutputTimeUs.Set (index , taskStats.GetCurrentWaitOutputTimeUs ());
459
490
460
491
SpillingComputeBytes.SetNonZero (index , taskStats.GetSpillingComputeWriteBytes ());
461
492
SpillingChannelBytes.SetNonZero (index , taskStats.GetSpillingChannelWriteBytes ());
@@ -572,6 +603,23 @@ ui64 TStageExecutionStats::UpdateStats(const NYql::NDqProto::TDqTaskStats& taskS
572
603
return baseTimeMs;
573
604
}
574
605
606
+ bool TStageExecutionStats::IsDeadlocked (ui64 deadline) {
607
+ if (CurrentWaitInputTimeUs.MinValue < deadline || InputStages.empty ()) {
608
+ return false ;
609
+ }
610
+
611
+ for (auto stat : InputStages) {
612
+ if (stat->CurrentWaitOutputTimeUs .MinValue < deadline && !stat->IsFinished ()) {
613
+ return false ;
614
+ }
615
+ }
616
+ return true ;
617
+ }
618
+
619
+ bool TStageExecutionStats::IsFinished () {
620
+ return FinishedCount == Task2Index.size ();
621
+ }
622
+
575
623
namespace {
576
624
577
625
TTableStat operator - (const TTableStat& l, const TTableStat& r) {
@@ -739,6 +787,37 @@ bool CollectProfileStats(Ydb::Table::QueryStatsCollection::Mode statsMode) {
739
787
return statsMode >= Ydb::Table::QueryStatsCollection::STATS_COLLECTION_PROFILE;
740
788
}
741
789
790
+ void TQueryExecutionStats::Prepare () {
791
+ if (CollectFullStats (StatsMode)) {
792
+ // stages
793
+ for (auto & [stageId, info] : TasksGraph->GetStagesInfo ()) {
794
+ auto [it, inserted] = StageStats.try_emplace (stageId);
795
+ Y_ENSURE (inserted);
796
+ it->second .StageId = stageId;
797
+ }
798
+ // connections
799
+ for (auto & [_, stageStats] : StageStats) {
800
+ auto & info = TasksGraph->GetStageInfo (stageStats.StageId );
801
+ auto & stage = info.Meta .GetStage (info.Id );
802
+ for (const auto & input : stage.GetInputs ()) {
803
+ auto & peerStageStats = StageStats[NYql::NDq::TStageId (stageStats.StageId .TxId , input.GetStageIndex ())];
804
+ stageStats.InputStages .push_back (&peerStageStats);
805
+ peerStageStats.OutputStages .push_back (&stageStats);
806
+ }
807
+ }
808
+ // tasks
809
+ for (auto & task : TasksGraph->GetTasks ()) {
810
+ auto & stageStats = StageStats[task.StageId ];
811
+ stageStats.Task2Index .emplace (task.Id , stageStats.Task2Index .size ());
812
+ }
813
+ for (auto & [_, stageStats] : StageStats) {
814
+ stageStats.TaskCount = (stageStats.Task2Index .size () + 3 ) & ~3 ;
815
+ stageStats.Resize (stageStats.TaskCount );
816
+ }
817
+ }
818
+ }
819
+
820
+
742
821
void TQueryExecutionStats::FillStageDurationUs (NYql::NDqProto::TDqStageStats& stats) {
743
822
if (stats.HasStartTimeMs () && stats.HasFinishTimeMs ()) {
744
823
auto startTimeMs = stats.GetStartTimeMs ().GetMin ();
@@ -1165,17 +1244,67 @@ void TQueryExecutionStats::UpdateTaskStats(ui64 taskId, const NYql::NDqProto::TD
1165
1244
Y_ASSERT (stats.GetTasks ().size () == 1 );
1166
1245
const NYql::NDqProto::TDqTaskStats& taskStats = stats.GetTasks (0 );
1167
1246
Y_ASSERT (taskStats.GetTaskId () == taskId);
1168
- auto stageId = taskStats. GetStageId () ;
1247
+ auto stageId = TasksGraph-> GetTask (taskId). StageId ;
1169
1248
auto [it, inserted] = StageStats.try_emplace (stageId);
1170
1249
if (inserted) {
1171
- it->second .StageId = TasksGraph-> GetTask (taskStats. GetTaskId ()). StageId ;
1250
+ it->second .StageId = stageId ;
1172
1251
it->second .SetHistorySampleCount (HistorySampleCount);
1173
1252
}
1174
1253
BaseTimeMs = NonZeroMin (BaseTimeMs, it->second .UpdateStats (taskStats, state, stats.GetMaxMemoryUsage (), stats.GetDurationUs ()));
1254
+
1255
+ constexpr ui64 deadline = 60'000'000 ; // 60s
1256
+ if (it->second .CurrentWaitOutputTimeUs .MinValue > deadline) {
1257
+ for (auto stat : it->second .OutputStages ) {
1258
+ if (stat->IsDeadlocked (deadline)) {
1259
+ DeadlockedStageId = stat->StageId .StageId ;
1260
+ break ;
1261
+ }
1262
+ }
1263
+ } else if (it->second .IsDeadlocked (deadline)) {
1264
+ DeadlockedStageId = it->second .StageId .StageId ;
1265
+ }
1175
1266
}
1176
1267
1177
1268
// SIMD-friendly aggregations are below. Compiler is able to vectorize sum/count, but needs help with min/max
1178
1269
1270
+ ui64 ExportMinStats (std::vector<ui64>& data) {
1271
+
1272
+ Y_DEBUG_ABORT_UNLESS ((data.size () & 3 ) == 0 );
1273
+
1274
+ ui64 min4[4 ] = {0 , 0 , 0 , 0 };
1275
+
1276
+ for (auto it = data.begin (); it < data.end (); it += 4 ) {
1277
+ min4[0 ] = min4[0 ] ? (it[0 ] ? (min4[0 ] < it[0 ] ? min4[0 ] : it[0 ]) : min4[0 ]) : it[0 ];
1278
+ min4[1 ] = min4[1 ] ? (it[1 ] ? (min4[1 ] < it[1 ] ? min4[1 ] : it[1 ]) : min4[1 ]) : it[1 ];
1279
+ min4[2 ] = min4[2 ] ? (it[2 ] ? (min4[2 ] < it[2 ] ? min4[2 ] : it[2 ]) : min4[2 ]) : it[2 ];
1280
+ min4[3 ] = min4[3 ] ? (it[3 ] ? (min4[3 ] < it[3 ] ? min4[3 ] : it[3 ]) : min4[3 ]) : it[3 ];
1281
+ }
1282
+
1283
+ ui64 min01 = min4[0 ] ? (min4[1 ] ? (min4[0 ] < min4[1 ] ? min4[0 ] : min4[1 ]) : min4[0 ]) : min4[1 ];
1284
+ ui64 min23 = min4[2 ] ? (min4[3 ] ? (min4[2 ] < min4[3 ] ? min4[2 ] : min4[3 ]) : min4[2 ]) : min4[3 ];
1285
+
1286
+ return min01 ? (min23 ? (min01 < min23 ? min01 : min23) : min01) : min23;
1287
+ }
1288
+
1289
+ ui64 ExportMaxStats (std::vector<ui64>& data) {
1290
+
1291
+ Y_DEBUG_ABORT_UNLESS ((data.size () & 3 ) == 0 );
1292
+
1293
+ ui64 max4[4 ] = {0 , 0 , 0 , 0 };
1294
+
1295
+ for (auto it = data.begin (); it < data.end (); it += 4 ) {
1296
+ max4[0 ] = max4[0 ] > it[0 ] ? max4[0 ] : it[0 ];
1297
+ max4[1 ] = max4[1 ] > it[1 ] ? max4[1 ] : it[1 ];
1298
+ max4[2 ] = max4[2 ] > it[2 ] ? max4[2 ] : it[2 ];
1299
+ max4[3 ] = max4[3 ] > it[3 ] ? max4[3 ] : it[3 ];
1300
+ }
1301
+
1302
+ ui64 max01 = max4[0 ] > max4[1 ] ? max4[0 ] : max4[1 ];
1303
+ ui64 max23 = max4[2 ] > max4[3 ] ? max4[2 ] : max4[3 ];
1304
+
1305
+ return max01 > max23 ? max01 : max23;
1306
+ }
1307
+
1179
1308
void ExportAggStats (std::vector<ui64>& data, NYql::NDqProto::TDqStatsMinMax& stats) {
1180
1309
1181
1310
Y_DEBUG_ABORT_UNLESS ((data.size () & 3 ) == 0 );
@@ -1562,7 +1691,7 @@ void TQueryExecutionStats::Finish() {
1562
1691
}
1563
1692
1564
1693
AdjustBaseTime (stageStats);
1565
- auto it = StageStats.find (stageId. StageId );
1694
+ auto it = StageStats.find (stageId);
1566
1695
if (it != StageStats.end ()) {
1567
1696
it->second .ExportHistory (BaseTimeMs, *stageStats);
1568
1697
}
0 commit comments