Skip to content

Commit 097d21c

Browse files
[SYCL] Add a partial profiling workaround for acc devices (intel#9062)
With the recent fixes to command submit profiling info, queue profiling is no longer supported on devices with OpenCL version < 2.1. This primarily affects applications on accelerator devices. The full solution would be to provide a fallback implementation of profiling information that is less precise but does not require querying device time. As a temporary workaround, this patch allows to create an accelerator queue with profiling despite the lack of queue_profiling aspect and instead throw an error if submit profiling info is requested.
1 parent 84cc1e1 commit 097d21c

File tree

6 files changed

+189
-11
lines changed

6 files changed

+189
-11
lines changed

sycl/source/detail/device_info.hpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -227,15 +227,19 @@ struct get_device_info_impl<std::vector<info::fp_config>,
227227
}
228228
};
229229

230+
inline bool checkNativeQueueProfiling(RT::PiDevice Dev, const plugin &Plugin) {
231+
pi_queue_properties Properties;
232+
Plugin.call<PiApiKind::piDeviceGetInfo>(
233+
Dev, PiInfoCode<info::device::queue_profiling>::value, sizeof(Properties),
234+
&Properties, nullptr);
235+
return Properties & PI_QUEUE_FLAG_PROFILING_ENABLE;
236+
}
237+
230238
// Specialization for queue_profiling. In addition to pi_queue level profiling,
231239
// piGetDeviceAndHostTimer support is needed for command_submit query support.
232240
template <> struct get_device_info_impl<bool, info::device::queue_profiling> {
233241
static bool get(RT::PiDevice Dev, const plugin &Plugin) {
234-
pi_queue_properties Properties;
235-
Plugin.call<PiApiKind::piDeviceGetInfo>(
236-
Dev, PiInfoCode<info::device::queue_profiling>::value,
237-
sizeof(Properties), &Properties, nullptr);
238-
if (!(Properties & PI_QUEUE_FLAG_PROFILING_ENABLE))
242+
if (!checkNativeQueueProfiling(Dev, Plugin))
239243
return false;
240244
RT::PiResult Result =
241245
Plugin.call_nocheck<detail::PiApiKind::piGetDeviceAndHostTimer>(

sycl/source/detail/event_impl.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,8 @@ event_impl::event_impl(RT::PiEvent Event, const context &SyclContext)
147147

148148
event_impl::event_impl(const QueueImplPtr &Queue)
149149
: MQueue{Queue},
150-
MIsProfilingEnabled{Queue->is_host() || Queue->MIsProfilingEnabled} {
150+
MIsProfilingEnabled{Queue->is_host() || Queue->MIsProfilingEnabled},
151+
MLimitedProfiling{MIsProfilingEnabled && Queue->isProfilingLimited()} {
151152
this->setContextImpl(Queue->getContextImplPtr());
152153

153154
if (Queue->is_host()) {
@@ -265,6 +266,13 @@ template <>
265266
uint64_t
266267
event_impl::get_profiling_info<info::event_profiling::command_submit>() {
267268
checkProfilingPreconditions();
269+
if (MLimitedProfiling)
270+
throw sycl::exception(
271+
make_error_code(sycl::errc::invalid),
272+
"Submit profiling information is temporarily unsupported on this "
273+
"device. This is indicated by the lack of queue_profiling aspect, but, "
274+
"as a temporary workaround, profiling can still be enabled to use "
275+
"command_start and command_end profiling info.");
268276
return MSubmitTime;
269277
}
270278

@@ -416,7 +424,7 @@ void event_impl::cleanDepEventsThroughOneLevel() {
416424
}
417425

418426
void event_impl::setSubmissionTime() {
419-
if (!MIsProfilingEnabled)
427+
if (!MIsProfilingEnabled || MLimitedProfiling)
420428
return;
421429
if (QueueImplPtr Queue = MQueue.lock()) {
422430
try {

sycl/source/detail/event_impl.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ class event_impl {
274274
void *MCommand = nullptr;
275275
std::weak_ptr<queue_impl> MQueue;
276276
const bool MIsProfilingEnabled = false;
277+
const bool MLimitedProfiling = false;
277278

278279
std::weak_ptr<queue_impl> MWorkerQueue;
279280
std::weak_ptr<queue_impl> MSubmittedQueue;

sycl/source/detail/queue_impl.hpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,19 @@ class queue_impl {
140140
throw sycl::exception(make_error_code(errc::invalid),
141141
"Queue cannot be constructed with both of "
142142
"discard_events and enable_profiling.");
143-
if (!MDevice->has(aspect::queue_profiling))
144-
throw sycl::exception(make_error_code(errc::feature_not_supported),
145-
"Cannot enable profiling, the associated device "
146-
"does not have the queue_profiling aspect");
143+
if (!MDevice->has(aspect::queue_profiling)) {
144+
// TODO temporary workaround, see MLimitedProfiling
145+
if (MDevice->is_accelerator() &&
146+
checkNativeQueueProfiling(MDevice->getHandleRef(),
147+
Context->getPlugin())) {
148+
MLimitedProfiling = true;
149+
} else {
150+
throw sycl::exception(
151+
make_error_code(errc::feature_not_supported),
152+
"Cannot enable profiling, the associated device "
153+
"does not have the queue_profiling aspect");
154+
}
155+
}
147156
}
148157
if (has_property<ext::intel::property::queue::compute_index>()) {
149158
int Idx = get_property<ext::intel::property::queue::compute_index>()
@@ -648,6 +657,8 @@ class queue_impl {
648657
size_t Offset,
649658
const std::vector<event> &DepEvents);
650659

660+
bool isProfilingLimited() { return MLimitedProfiling; }
661+
651662
protected:
652663
// template is needed for proper unit testing
653664
template <typename HandlerType = handler>
@@ -810,6 +821,12 @@ class queue_impl {
810821
/// The instance ID of the trace event for queue object
811822
uint64_t MInstanceID = 0;
812823

824+
// TODO this is a temporary workaround to allow use of start & end info
825+
// on FPGA OpenCL 1.2 (current implementation of profiling does not
826+
// support submit time stamps on this OpenCL version). Remove once
827+
// the fallback implementation of profiling info is in place.
828+
bool MLimitedProfiling = false;
829+
813830
public:
814831
// Queue constructed with the discard_events property
815832
const bool MDiscardEvents;
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
//
3+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
4+
//==----------------- event_profiling_workaround.cpp -----------------------==//
5+
//
6+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
7+
// See https://llvm.org/LICENSE.txt for license information.
8+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
#include <cassert>
13+
#include <iostream>
14+
#include <sycl/sycl.hpp>
15+
16+
using namespace sycl;
17+
18+
bool verifyProfiling(event Event) {
19+
const std::string ExpectedError =
20+
"Submit profiling information is temporarily unsupported on this device. "
21+
"This is indicated by the lack of queue_profiling aspect, but, as a "
22+
"temporary workaround, profiling can still be enabled to use "
23+
"command_start and command_end profiling info.";
24+
bool CaughtException = false;
25+
try {
26+
auto Submit =
27+
Event.get_profiling_info<sycl::info::event_profiling::command_submit>();
28+
} catch (sycl::exception &e) {
29+
CaughtException =
30+
std::string(e.what()).find(ExpectedError) != std::string::npos;
31+
}
32+
assert(CaughtException);
33+
34+
auto Start =
35+
Event.get_profiling_info<sycl::info::event_profiling::command_start>();
36+
auto End =
37+
Event.get_profiling_info<sycl::info::event_profiling::command_end>();
38+
39+
assert(Start <= End);
40+
41+
bool Pass = sycl::info::event_command_status::complete ==
42+
Event.get_info<sycl::info::event::command_execution_status>();
43+
44+
return Pass;
45+
}
46+
47+
// The test checks the workaround for partial profiling support on FPGA
48+
// devices.
49+
int main() {
50+
device Dev;
51+
if (Dev.has(aspect::queue_profiling)) {
52+
std::cout << "Profiling is fully supported, skipping the test" << std::endl;
53+
return 0;
54+
}
55+
56+
const size_t Size = 10000;
57+
int Data[Size] = {0};
58+
for (size_t I = 0; I < Size; ++I) {
59+
Data[I] = I;
60+
}
61+
int Values[Size] = {0};
62+
63+
{
64+
buffer<int, 1> BufferFrom(Data, range<1>(Size));
65+
buffer<int, 1> BufferTo(Values, range<1>(Size));
66+
67+
// buffer copy
68+
queue copyQueue{Dev, sycl::property::queue::enable_profiling()};
69+
event copyEvent = copyQueue.submit([&](sycl::handler &Cgh) {
70+
accessor<int, 1, access::mode::read, access::target::device> AccessorFrom(
71+
BufferFrom, Cgh, range<1>(Size));
72+
accessor<int, 1, access::mode::write, access::target::device> AccessorTo(
73+
BufferTo, Cgh, range<1>(Size));
74+
Cgh.copy(AccessorFrom, AccessorTo);
75+
});
76+
77+
// kernel launch
78+
queue kernelQueue{Dev, sycl::property::queue::enable_profiling()};
79+
event kernelEvent = kernelQueue.submit([&](sycl::handler &CGH) {
80+
CGH.single_task<class EmptyKernel>([=]() {});
81+
});
82+
copyEvent.wait();
83+
kernelEvent.wait();
84+
85+
assert(verifyProfiling(copyEvent) && verifyProfiling(kernelEvent));
86+
}
87+
88+
for (size_t I = 0; I < Size; ++I) {
89+
assert(Data[I] == Values[I]);
90+
}
91+
92+
return 0;
93+
}

sycl/unittests/queue/GetProfilingInfo.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,3 +389,58 @@ TEST(GetProfilingInfo, unsupported_device_host_time) {
389389
"does not have the queue_profiling aspect");
390390
}
391391
}
392+
393+
static pi_result redefinedDeviceGetInfoAcc(pi_device device,
394+
pi_device_info param_name,
395+
size_t param_value_size,
396+
void *param_value,
397+
size_t *param_value_size_ret) {
398+
if (param_name == PI_DEVICE_INFO_TYPE) {
399+
auto *Result = reinterpret_cast<_pi_device_type *>(param_value);
400+
*Result = PI_DEVICE_TYPE_ACC;
401+
}
402+
return PI_SUCCESS;
403+
}
404+
405+
TEST(GetProfilingInfo, partial_profiling_workaround) {
406+
sycl::unittest::PiMock Mock;
407+
sycl::platform Plt = Mock.getPlatform();
408+
Mock.redefine<sycl::detail::PiApiKind::piGetDeviceAndHostTimer>(
409+
redefinedFailedPiGetDeviceAndHostTimer);
410+
Mock.redefineAfter<sycl::detail::PiApiKind::piDeviceGetInfo>(
411+
redefinedDeviceGetInfoAcc);
412+
413+
const sycl::device Dev = Plt.get_devices()[0];
414+
sycl::context Ctx{Dev};
415+
416+
ASSERT_FALSE(Dev.has(sycl::aspect::queue_profiling));
417+
418+
static sycl::unittest::PiImage DevImage_1 =
419+
generateTestImage<InfoTestKernel>();
420+
static sycl::unittest::PiImageArray<1> DevImageArray = {&DevImage_1};
421+
auto KernelID_1 = sycl::get_kernel_id<InfoTestKernel>();
422+
sycl::queue Queue{
423+
Ctx, Dev, sycl::property_list{sycl::property::queue::enable_profiling{}}};
424+
auto KernelBundle = sycl::get_kernel_bundle<sycl::bundle_state::input>(
425+
Ctx, {Dev}, {KernelID_1});
426+
427+
const int globalWIs{512};
428+
auto event = Queue.submit([&](sycl::handler &cgh) {
429+
cgh.parallel_for<InfoTestKernel>(globalWIs, [=](sycl::id<1> idx) {});
430+
});
431+
event.wait();
432+
try {
433+
event.get_profiling_info<sycl::info::event_profiling::command_submit>();
434+
FAIL() << "No exception was thrown";
435+
} catch (sycl::exception &e) {
436+
EXPECT_EQ(e.code(), sycl::errc::invalid);
437+
EXPECT_STREQ(
438+
e.what(),
439+
"Submit profiling information is temporarily unsupported on this "
440+
"device. This is indicated by the lack of queue_profiling aspect, but, "
441+
"as a temporary workaround, profiling can still be enabled to use "
442+
"command_start and command_end profiling info.");
443+
}
444+
event.get_profiling_info<sycl::info::event_profiling::command_start>();
445+
event.get_profiling_info<sycl::info::event_profiling::command_end>();
446+
}

0 commit comments

Comments
 (0)