14
14
#include " event_pool.hpp"
15
15
#include " event_provider.hpp"
16
16
17
- ur_event_handle_t_::ur_event_handle_t_ (v2::event_allocation eventAllocation,
18
- v2::event_pool *pool)
19
- : type(eventAllocation.type), zeEvent(std::move(eventAllocation.borrow)),
20
- pool(pool) {}
17
+ #include " ../ur_interface_loader.hpp"
18
+
19
+ ur_event_handle_t_::ur_event_handle_t_ (
20
+ v2::raii::cache_borrowed_event eventAllocation, v2::event_pool *pool)
21
+ : zeEvent(std::move(eventAllocation)), pool(pool),
22
+ adjustedEventStartTimestamp(0 ), recordEventEndTimestamp(0 ),
23
+ adjustedEventEndTimestamp(0 ),
24
+ zeTimerResolution(getDevice()->ZeDeviceProperties->timerResolution),
25
+ timestampMaxValue(getDevice()->getTimestampMask()) {}
21
26
22
27
void ur_event_handle_t_::reset () {
23
28
// consider make an abstraction for regular/counter based
24
29
// events if there's more of this type of conditions
25
- if (type == v2::event_type::EVENT_REGULAR ) {
30
+ if (pool-> getFlags () & v2::EVENT_FLAGS_COUNTER ) {
26
31
zeEventHostReset (zeEvent.get ());
27
32
}
28
33
}
@@ -40,11 +45,90 @@ ur_result_t ur_event_handle_t_::release() {
40
45
if (!RefCount.decrementAndTest ())
41
46
return UR_RESULT_SUCCESS;
42
47
48
+ if (isTimestamped () && adjustedEventEndTimestamp == 0 ) {
49
+ // L0 will write end timestamp to this event some time in the future,
50
+ // so we can't release it yet.
51
+ // TODO: delay releasing until the end timestamp is written.
52
+ return UR_RESULT_SUCCESS;
53
+ }
54
+
43
55
pool->free (this );
44
56
45
57
return UR_RESULT_SUCCESS;
46
58
}
47
59
60
+ bool ur_event_handle_t_::isTimestamped () const {
61
+ // If we are recording, the start time of the event will be non-zero.
62
+ return adjustedEventStartTimestamp != 0 ;
63
+ }
64
+
65
+ bool ur_event_handle_t_::isProfilingEnabled () const {
66
+ return pool->getFlags () & v2::EVENT_FLAGS_PROFILING_ENABLED;
67
+ }
68
+
69
+ ur_device_handle_t ur_event_handle_t_::getDevice () const {
70
+ return pool->getProvider ()->device ();
71
+ }
72
+
73
+ uint64_t ur_event_handle_t_::getEventStartTimestmap () const {
74
+ return adjustedEventStartTimestamp;
75
+ }
76
+
77
+ static uint64_t adjustEndEventTimestamp (uint64_t adjustedStartTimestamp,
78
+ uint64_t endTimestamp,
79
+ uint64_t timestampMaxValue,
80
+ uint64_t timerResolution) {
81
+ // End time needs to be adjusted for resolution and valid bits.
82
+ uint64_t adjustedTimestamp =
83
+ (endTimestamp & timestampMaxValue) * timerResolution;
84
+
85
+ // Handle a possible wrap-around (the underlying HW counter is < 64-bit).
86
+ // Note, it will not report correct time if there were multiple wrap
87
+ // arounds, and the longer term plan is to enlarge the capacity of the
88
+ // HW timestamps.
89
+ if (adjustedTimestamp < adjustedStartTimestamp)
90
+ adjustedTimestamp += timestampMaxValue * timerResolution;
91
+
92
+ return adjustedTimestamp;
93
+ }
94
+
95
+ uint64_t ur_event_handle_t_::getEventEndTimestamp () {
96
+ std::scoped_lock<ur_shared_mutex> lock (this ->Mutex );
97
+
98
+ // If adjustedEventEndTimestamp on the event is non-zero it means it has
99
+ // collected the result of the queue already. In that case it has been
100
+ // adjusted and is ready for immediate return.
101
+ if (adjustedEventEndTimestamp)
102
+ return adjustedEventEndTimestamp;
103
+
104
+ // If the result is 0, we have not yet gotten results back and so we just
105
+ // return it.
106
+ if (recordEventEndTimestamp == 0 )
107
+ return recordEventEndTimestamp;
108
+
109
+ // Now that we have the result, there is no need to keep it in the queue
110
+ // anymore, so we cache it on the event and evict the record from the
111
+ // queue.
112
+ adjustedEventEndTimestamp =
113
+ adjustEndEventTimestamp (getEventStartTimestmap (), recordEventEndTimestamp,
114
+ timestampMaxValue, zeTimerResolution);
115
+ return adjustedEventEndTimestamp;
116
+ }
117
+
118
+ void ur_event_handle_t_::recordStartTimestamp () {
119
+ uint64_t deviceStartTimestamp = 0 ;
120
+ UR_CALL_THROWS (ur::level_zero::urDeviceGetGlobalTimestamps (
121
+ getDevice (), &deviceStartTimestamp, nullptr ));
122
+
123
+ std::scoped_lock<ur_shared_mutex> lock (this ->Mutex );
124
+
125
+ adjustedEventStartTimestamp = deviceStartTimestamp;
126
+ }
127
+
128
+ uint64_t *ur_event_handle_t_::getEventEndTimestampPtr () {
129
+ return &recordEventEndTimestamp;
130
+ }
131
+
48
132
namespace ur ::level_zero {
49
133
ur_result_t urEventRetain (ur_event_handle_t hEvent) { return hEvent->retain (); }
50
134
@@ -88,4 +172,82 @@ ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName,
88
172
89
173
return UR_RESULT_SUCCESS;
90
174
}
175
+
176
+ ur_result_t urEventGetProfilingInfo (
177
+ ur_event_handle_t hEvent, // /< [in] handle of the event object
178
+ ur_profiling_info_t
179
+ propName, // /< [in] the name of the profiling property to query
180
+ size_t
181
+ propValueSize, // /< [in] size in bytes of the profiling property value
182
+ void *pPropValue, // /< [out][optional] value of the profiling property
183
+ size_t *pPropValueSizeRet // /< [out][optional] pointer to the actual size in
184
+ // /< bytes returned in propValue
185
+ ) {
186
+ // The event must either have profiling enabled or be recording timestamps.
187
+ bool isTimestampedEvent = hEvent->isTimestamped ();
188
+ if (!hEvent->isProfilingEnabled () && !isTimestampedEvent) {
189
+ return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE;
190
+ }
191
+
192
+ UrReturnHelper returnValue (propValueSize, pPropValue, pPropValueSizeRet);
193
+
194
+ // For timestamped events we have the timestamps ready directly on the event
195
+ // handle, so we short-circuit the return.
196
+ if (isTimestampedEvent) {
197
+ uint64_t contextStartTime = hEvent->getEventStartTimestmap ();
198
+ switch (propName) {
199
+ case UR_PROFILING_INFO_COMMAND_QUEUED:
200
+ case UR_PROFILING_INFO_COMMAND_SUBMIT:
201
+ return returnValue (contextStartTime);
202
+ case UR_PROFILING_INFO_COMMAND_END:
203
+ case UR_PROFILING_INFO_COMMAND_START:
204
+ case UR_PROFILING_INFO_COMMAND_COMPLETE: {
205
+ return returnValue (hEvent->getEventEndTimestamp ());
206
+ }
207
+ default :
208
+ logger::error (" urEventGetProfilingInfo: not supported ParamName" );
209
+ return UR_RESULT_ERROR_INVALID_VALUE;
210
+ }
211
+ }
212
+
213
+ ze_kernel_timestamp_result_t tsResult;
214
+
215
+ auto zeTimerResolution =
216
+ hEvent->getDevice ()->ZeDeviceProperties ->timerResolution ;
217
+ auto timestampMaxValue = hEvent->getDevice ()->getTimestampMask ();
218
+
219
+ switch (propName) {
220
+ case UR_PROFILING_INFO_COMMAND_START: {
221
+ ZE2UR_CALL (zeEventQueryKernelTimestamp, (hEvent->getZeEvent (), &tsResult));
222
+ uint64_t contextStartTime =
223
+ (tsResult.global .kernelStart & timestampMaxValue) * zeTimerResolution;
224
+ return returnValue (contextStartTime);
225
+ }
226
+ case UR_PROFILING_INFO_COMMAND_END:
227
+ case UR_PROFILING_INFO_COMMAND_COMPLETE: {
228
+ ZE2UR_CALL (zeEventQueryKernelTimestamp, (hEvent->getZeEvent (), &tsResult));
229
+
230
+ uint64_t contextStartTime =
231
+ (tsResult.global .kernelStart & timestampMaxValue);
232
+
233
+ auto adjustedEndTime =
234
+ adjustEndEventTimestamp (contextStartTime, tsResult.global .kernelEnd ,
235
+ timestampMaxValue, zeTimerResolution);
236
+ return returnValue (adjustedEndTime);
237
+ }
238
+ case UR_PROFILING_INFO_COMMAND_QUEUED:
239
+ case UR_PROFILING_INFO_COMMAND_SUBMIT:
240
+ // Note: No users for this case
241
+ // The "command_submit" time is implemented by recording submission
242
+ // timestamp with a call to urDeviceGetGlobalTimestamps before command
243
+ // enqueue.
244
+ //
245
+ return returnValue (uint64_t {0 });
246
+ default :
247
+ logger::error (" urEventGetProfilingInfo: not supported ParamName" );
248
+ return UR_RESULT_ERROR_INVALID_VALUE;
249
+ }
250
+
251
+ return UR_RESULT_SUCCESS;
252
+ }
91
253
} // namespace ur::level_zero
0 commit comments