Skip to content

[SYCL] Add a partial profiling workaround for acc devices #9062

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions sycl/source/detail/device_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,15 +227,19 @@ struct get_device_info_impl<std::vector<info::fp_config>,
}
};

inline bool checkNativeQueueProfiling(RT::PiDevice Dev, const plugin &Plugin) {
pi_queue_properties Properties;
Plugin.call<PiApiKind::piDeviceGetInfo>(
Dev, PiInfoCode<info::device::queue_profiling>::value, sizeof(Properties),
&Properties, nullptr);
return Properties & PI_QUEUE_FLAG_PROFILING_ENABLE;
}

// Specialization for queue_profiling. In addition to pi_queue level profiling,
// piGetDeviceAndHostTimer support is needed for command_submit query support.
template <> struct get_device_info_impl<bool, info::device::queue_profiling> {
static bool get(RT::PiDevice Dev, const plugin &Plugin) {
pi_queue_properties Properties;
Plugin.call<PiApiKind::piDeviceGetInfo>(
Dev, PiInfoCode<info::device::queue_profiling>::value,
sizeof(Properties), &Properties, nullptr);
if (!(Properties & PI_QUEUE_FLAG_PROFILING_ENABLE))
if (!checkNativeQueueProfiling(Dev, Plugin))
return false;
RT::PiResult Result =
Plugin.call_nocheck<detail::PiApiKind::piGetDeviceAndHostTimer>(
Expand Down
12 changes: 10 additions & 2 deletions sycl/source/detail/event_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ event_impl::event_impl(RT::PiEvent Event, const context &SyclContext)

event_impl::event_impl(const QueueImplPtr &Queue)
: MQueue{Queue},
MIsProfilingEnabled{Queue->is_host() || Queue->MIsProfilingEnabled} {
MIsProfilingEnabled{Queue->is_host() || Queue->MIsProfilingEnabled},
MLimitedProfiling{MIsProfilingEnabled && Queue->isProfilingLimited()} {
this->setContextImpl(Queue->getContextImplPtr());

if (Queue->is_host()) {
Expand Down Expand Up @@ -265,6 +266,13 @@ template <>
uint64_t
event_impl::get_profiling_info<info::event_profiling::command_submit>() {
checkProfilingPreconditions();
if (MLimitedProfiling)
throw sycl::exception(
make_error_code(sycl::errc::invalid),
"Submit profiling information is temporarily unsupported on this "
"device. This is indicated by the lack of queue_profiling aspect, but, "
"as a temporary workaround, profiling can still be enabled to use "
"command_start and command_end profiling info.");
return MSubmitTime;
}

Expand Down Expand Up @@ -416,7 +424,7 @@ void event_impl::cleanDepEventsThroughOneLevel() {
}

void event_impl::setSubmissionTime() {
if (!MIsProfilingEnabled)
if (!MIsProfilingEnabled || MLimitedProfiling)
return;
if (QueueImplPtr Queue = MQueue.lock()) {
try {
Expand Down
1 change: 1 addition & 0 deletions sycl/source/detail/event_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ class event_impl {
void *MCommand = nullptr;
std::weak_ptr<queue_impl> MQueue;
const bool MIsProfilingEnabled = false;
const bool MLimitedProfiling = false;

std::weak_ptr<queue_impl> MWorkerQueue;
std::weak_ptr<queue_impl> MSubmittedQueue;
Expand Down
25 changes: 21 additions & 4 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,19 @@ class queue_impl {
throw sycl::exception(make_error_code(errc::invalid),
"Queue cannot be constructed with both of "
"discard_events and enable_profiling.");
if (!MDevice->has(aspect::queue_profiling))
throw sycl::exception(make_error_code(errc::feature_not_supported),
"Cannot enable profiling, the associated device "
"does not have the queue_profiling aspect");
if (!MDevice->has(aspect::queue_profiling)) {
// TODO temporary workaround, see MLimitedProfiling
if (MDevice->is_accelerator() &&
checkNativeQueueProfiling(MDevice->getHandleRef(),
Context->getPlugin())) {
MLimitedProfiling = true;
} else {
throw sycl::exception(
make_error_code(errc::feature_not_supported),
"Cannot enable profiling, the associated device "
"does not have the queue_profiling aspect");
}
}
}
if (has_property<ext::intel::property::queue::compute_index>()) {
int Idx = get_property<ext::intel::property::queue::compute_index>()
Expand Down Expand Up @@ -648,6 +657,8 @@ class queue_impl {
size_t Offset,
const std::vector<event> &DepEvents);

bool isProfilingLimited() { return MLimitedProfiling; }

protected:
// template is needed for proper unit testing
template <typename HandlerType = handler>
Expand Down Expand Up @@ -810,6 +821,12 @@ class queue_impl {
/// The instance ID of the trace event for queue object
uint64_t MInstanceID = 0;

// TODO this is a temporary workaround to allow use of start & end info
// on FPGA OpenCL 1.2 (current implementation of profiling does not
// support submit time stamps on this OpenCL version). Remove once
// the fallback implementation of profiling info is in place.
bool MLimitedProfiling = false;

public:
// Queue constructed with the discard_events property
const bool MDiscardEvents;
Expand Down
93 changes: 93 additions & 0 deletions sycl/test-e2e/Basic/event_profiling_workaround.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
//
// RUN: %ACC_RUN_PLACEHOLDER %t.out
//==----------------- event_profiling_workaround.cpp -----------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <cassert>
#include <iostream>
#include <sycl/sycl.hpp>

using namespace sycl;

bool verifyProfiling(event Event) {
const std::string ExpectedError =
"Submit profiling information is temporarily unsupported on this device. "
"This is indicated by the lack of queue_profiling aspect, but, as a "
"temporary workaround, profiling can still be enabled to use "
"command_start and command_end profiling info.";
bool CaughtException = false;
try {
auto Submit =
Event.get_profiling_info<sycl::info::event_profiling::command_submit>();
} catch (sycl::exception &e) {
CaughtException =
std::string(e.what()).find(ExpectedError) != std::string::npos;
}
assert(CaughtException);

auto Start =
Event.get_profiling_info<sycl::info::event_profiling::command_start>();
auto End =
Event.get_profiling_info<sycl::info::event_profiling::command_end>();

assert(Start <= End);

bool Pass = sycl::info::event_command_status::complete ==
Event.get_info<sycl::info::event::command_execution_status>();

return Pass;
}

// The test checks the workaround for partial profiling support on FPGA
// devices.
int main() {
device Dev;
if (Dev.has(aspect::queue_profiling)) {
std::cout << "Profiling is fully supported, skipping the test" << std::endl;
return 0;
}

const size_t Size = 10000;
int Data[Size] = {0};
for (size_t I = 0; I < Size; ++I) {
Data[I] = I;
}
int Values[Size] = {0};

{
buffer<int, 1> BufferFrom(Data, range<1>(Size));
buffer<int, 1> BufferTo(Values, range<1>(Size));

// buffer copy
queue copyQueue{Dev, sycl::property::queue::enable_profiling()};
event copyEvent = copyQueue.submit([&](sycl::handler &Cgh) {
accessor<int, 1, access::mode::read, access::target::device> AccessorFrom(
BufferFrom, Cgh, range<1>(Size));
accessor<int, 1, access::mode::write, access::target::device> AccessorTo(
BufferTo, Cgh, range<1>(Size));
Cgh.copy(AccessorFrom, AccessorTo);
});

// kernel launch
queue kernelQueue{Dev, sycl::property::queue::enable_profiling()};
event kernelEvent = kernelQueue.submit([&](sycl::handler &CGH) {
CGH.single_task<class EmptyKernel>([=]() {});
});
copyEvent.wait();
kernelEvent.wait();

assert(verifyProfiling(copyEvent) && verifyProfiling(kernelEvent));
}

for (size_t I = 0; I < Size; ++I) {
assert(Data[I] == Values[I]);
}

return 0;
}
55 changes: 55 additions & 0 deletions sycl/unittests/queue/GetProfilingInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,3 +389,58 @@ TEST(GetProfilingInfo, unsupported_device_host_time) {
"does not have the queue_profiling aspect");
}
}

static pi_result redefinedDeviceGetInfoAcc(pi_device device,
pi_device_info param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret) {
if (param_name == PI_DEVICE_INFO_TYPE) {
auto *Result = reinterpret_cast<_pi_device_type *>(param_value);
*Result = PI_DEVICE_TYPE_ACC;
}
return PI_SUCCESS;
}

TEST(GetProfilingInfo, partial_profiling_workaround) {
sycl::unittest::PiMock Mock;
sycl::platform Plt = Mock.getPlatform();
Mock.redefine<sycl::detail::PiApiKind::piGetDeviceAndHostTimer>(
redefinedFailedPiGetDeviceAndHostTimer);
Mock.redefineAfter<sycl::detail::PiApiKind::piDeviceGetInfo>(
redefinedDeviceGetInfoAcc);

const sycl::device Dev = Plt.get_devices()[0];
sycl::context Ctx{Dev};

ASSERT_FALSE(Dev.has(sycl::aspect::queue_profiling));

static sycl::unittest::PiImage DevImage_1 =
generateTestImage<InfoTestKernel>();
static sycl::unittest::PiImageArray<1> DevImageArray = {&DevImage_1};
auto KernelID_1 = sycl::get_kernel_id<InfoTestKernel>();
sycl::queue Queue{
Ctx, Dev, sycl::property_list{sycl::property::queue::enable_profiling{}}};
auto KernelBundle = sycl::get_kernel_bundle<sycl::bundle_state::input>(
Ctx, {Dev}, {KernelID_1});

const int globalWIs{512};
auto event = Queue.submit([&](sycl::handler &cgh) {
cgh.parallel_for<InfoTestKernel>(globalWIs, [=](sycl::id<1> idx) {});
});
event.wait();
try {
event.get_profiling_info<sycl::info::event_profiling::command_submit>();
FAIL() << "No exception was thrown";
} catch (sycl::exception &e) {
EXPECT_EQ(e.code(), sycl::errc::invalid);
EXPECT_STREQ(
e.what(),
"Submit profiling information is temporarily unsupported on this "
"device. This is indicated by the lack of queue_profiling aspect, but, "
"as a temporary workaround, profiling can still be enabled to use "
"command_start and command_end profiling info.");
}
event.get_profiling_info<sycl::info::event_profiling::command_start>();
event.get_profiling_info<sycl::info::event_profiling::command_end>();
}