@@ -1535,15 +1535,17 @@ ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer,
1535
1535
* @param CommandList The command-list to append the QueryKernelTimestamps
1536
1536
* command to.
1537
1537
* @param SignalEvent The event that must be signaled after the profiling is
1538
- * finished. This event will contain the profiling information.
1538
+ * finished.
1539
1539
* @param WaitEvent The event that must be waited on before starting the
1540
1540
* profiling.
1541
+ * @param ProfilingEvent The event that will contain the profiling data.
1541
1542
* @return UR_RESULT_SUCCESS or an error code on failure.
1542
1543
*/
1543
1544
ur_result_t appendProfilingQueries (ur_exp_command_buffer_handle_t CommandBuffer,
1544
1545
ze_command_list_handle_t CommandList,
1545
1546
ur_event_handle_t SignalEvent,
1546
- ur_event_handle_t WaitEvent) {
1547
+ ur_event_handle_t WaitEvent,
1548
+ ur_event_handle_t ProfilingEvent) {
1547
1549
// Multiple submissions of a command buffer implies that we need to save
1548
1550
// the event timestamps before resubmiting the command buffer. We
1549
1551
// therefore copy these timestamps in a dedicated USM memory section
@@ -1556,12 +1558,17 @@ ur_result_t appendProfilingQueries(ur_exp_command_buffer_handle_t CommandBuffer,
1556
1558
Profiling->Timestamps =
1557
1559
new ze_kernel_timestamp_result_t [Profiling->NumEvents ];
1558
1560
1561
+ uint32_t NumWaitEvents = WaitEvent ? 1 : 0 ;
1562
+ ze_event_handle_t *ZeWaitEventList =
1563
+ WaitEvent ? &(WaitEvent->ZeEvent ) : nullptr ;
1564
+ ze_event_handle_t ZeSignalEvent =
1565
+ SignalEvent ? SignalEvent->ZeEvent : nullptr ;
1559
1566
ZE2UR_CALL (zeCommandListAppendQueryKernelTimestamps,
1560
1567
(CommandList, CommandBuffer->ZeEventsList .size (),
1561
1568
CommandBuffer->ZeEventsList .data (), (void *)Profiling->Timestamps ,
1562
- 0 , SignalEvent-> ZeEvent , 1 , &(WaitEvent-> ZeEvent ) ));
1569
+ 0 , ZeSignalEvent, NumWaitEvents, ZeWaitEventList ));
1563
1570
1564
- SignalEvent ->CommandData = static_cast <void *>(Profiling);
1571
+ ProfilingEvent ->CommandData = static_cast <void *>(Profiling);
1565
1572
1566
1573
return UR_RESULT_SUCCESS;
1567
1574
}
@@ -1615,8 +1622,8 @@ ur_result_t enqueueImmediateAppendPath(
1615
1622
1616
1623
if (DoProfiling) {
1617
1624
UR_CALL (appendProfilingQueries (CommandBuffer, CommandListHelper->first ,
1618
- *Event,
1619
- CommandBuffer-> ComputeFinishedEvent ));
1625
+ *Event, CommandBuffer-> ComputeFinishedEvent ,
1626
+ *Event ));
1620
1627
}
1621
1628
1622
1629
// When the current execution is finished, signal ExecutionFinishedEvent to
@@ -1694,25 +1701,28 @@ ur_result_t enqueueWaitEventPath(ur_exp_command_buffer_handle_t CommandBuffer,
1694
1701
(ZeCopyCommandQueue, 1 , &CommandBuffer->ZeCopyCommandList , nullptr ));
1695
1702
}
1696
1703
1704
+ ZE2UR_CALL (zeCommandListAppendBarrier,
1705
+ (SignalCommandList->first , nullptr , 1 ,
1706
+ &(CommandBuffer->ExecutionFinishedEvent ->ZeEvent )));
1707
+
1697
1708
// Reset the wait-event for the UR command-buffer that is signaled when its
1698
1709
// submission dependencies have been satisfied.
1699
1710
ZE2UR_CALL (zeCommandListAppendEventReset,
1700
1711
(SignalCommandList->first , CommandBuffer->WaitEvent ->ZeEvent ));
1712
+
1701
1713
// Reset the all-reset-event for the UR command-buffer that is signaled when
1702
1714
// all events of the main command-list have been reset.
1703
1715
ZE2UR_CALL (zeCommandListAppendEventReset,
1704
1716
(SignalCommandList->first , CommandBuffer->AllResetEvent ->ZeEvent ));
1705
1717
1706
1718
if (DoProfiling) {
1707
1719
UR_CALL (appendProfilingQueries (CommandBuffer, SignalCommandList->first ,
1708
- *Event,
1709
- CommandBuffer->ExecutionFinishedEvent ));
1710
- } else {
1711
- ZE2UR_CALL (zeCommandListAppendBarrier,
1712
- (SignalCommandList->first , (*Event)->ZeEvent , 1 ,
1713
- &(CommandBuffer->ExecutionFinishedEvent ->ZeEvent )));
1720
+ nullptr , nullptr , *Event));
1714
1721
}
1715
1722
1723
+ ZE2UR_CALL (zeCommandListAppendBarrier,
1724
+ (SignalCommandList->first , (*Event)->ZeEvent , 0 , nullptr ));
1725
+
1716
1726
UR_CALL (Queue->executeCommandList (SignalCommandList, false /* IsBlocking*/ ,
1717
1727
false /* OKToBatchCommand*/ ));
1718
1728
0 commit comments