Skip to content

[SYCL][CUDA][HIP] Report every device in its own platform #4571

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -678,13 +678,16 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
/// Triggers the CUDA Driver initialization (cuInit) the first time, so this
/// must be the first PI API called.
///
/// However because multiple devices in a context is not currently supported,
/// place each device in a separate platform.
///
pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms,
pi_uint32 *num_platforms) {

try {
static std::once_flag initFlag;
static pi_uint32 numPlatforms = 1;
static _pi_platform platformId;
static std::vector<_pi_platform> platformIds;

if (num_entries == 0 && platforms != nullptr) {
return PI_INVALID_VALUE;
Expand All @@ -709,14 +712,18 @@ pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms,
return;
}
try {
platformId.devices_.reserve(numDevices);
// make one platform per device
numPlatforms = numDevices;
platformIds.resize(numDevices);

for (int i = 0; i < numDevices; ++i) {
CUdevice device;
err = PI_CHECK_ERROR(cuDeviceGet(&device, i));
platformId.devices_.emplace_back(
new _pi_device{device, &platformId});
platformIds[i].devices_.emplace_back(
new _pi_device{device, &platformIds[i]});

{
const auto &dev = platformId.devices_.back().get();
const auto &dev = platformIds[i].devices_.back().get();
size_t maxWorkGroupSize = 0u;
size_t maxThreadsPerBlock[3] = {};
pi_result retError = cuda_piDeviceGetInfo(
Expand All @@ -737,11 +744,17 @@ pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms,
}
} catch (const std::bad_alloc &) {
// Signal out-of-memory situation
platformId.devices_.clear();
for (int i = 0; i < numDevices; ++i) {
platformIds[i].devices_.clear();
}
platformIds.clear();
err = PI_OUT_OF_HOST_MEMORY;
} catch (...) {
// Clear and rethrow to allow retry
platformId.devices_.clear();
for (int i = 0; i < numDevices; ++i) {
platformIds[i].devices_.clear();
}
platformIds.clear();
throw;
}
},
Expand All @@ -752,7 +765,9 @@ pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms,
}

if (platforms != nullptr) {
*platforms = &platformId;
for (unsigned i = 0; i < std::min(num_entries, numPlatforms); ++i) {
platforms[i] = &platformIds[i];
}
}

return err;
Expand Down
27 changes: 20 additions & 7 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -671,13 +671,16 @@ extern "C" {
/// Triggers the HIP Driver initialization (hipInit) the first time, so this
/// must be the first PI API called.
///
/// However because multiple devices in a context is not currently supported,
/// place each device in a separate platform.
///
pi_result hip_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms,
pi_uint32 *num_platforms) {

try {
static std::once_flag initFlag;
static pi_uint32 numPlatforms = 1;
static _pi_platform platformId;
static std::vector<_pi_platform> platformIds;

if (num_entries == 0 and platforms != nullptr) {
return PI_INVALID_VALUE;
Expand Down Expand Up @@ -707,20 +710,28 @@ pi_result hip_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms,
return;
}
try {
platformId.devices_.reserve(numDevices);
numPlatforms = numDevices;
platformIds.resize(numDevices);

for (int i = 0; i < numDevices; ++i) {
hipDevice_t device;
err = PI_CHECK_ERROR(hipDeviceGet(&device, i));
platformId.devices_.emplace_back(
new _pi_device{device, &platformId});
platformIds[i].devices_.emplace_back(
new _pi_device{device, &platformIds[i]});
}
} catch (const std::bad_alloc &) {
// Signal out-of-memory situation
platformId.devices_.clear();
for (int i = 0; i < numDevices; ++i) {
platformIds[i].devices_.clear();
}
platformIds.clear();
err = PI_OUT_OF_HOST_MEMORY;
} catch (...) {
// Clear and rethrow to allow retry
platformId.devices_.clear();
for (int i = 0; i < numDevices; ++i) {
platformIds[i].devices_.clear();
}
platformIds.clear();
throw;
}
},
Expand All @@ -731,7 +742,9 @@ pi_result hip_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms,
}

if (platforms != nullptr) {
*platforms = &platformId;
for (unsigned i = 0; i < std::min(num_entries, numPlatforms); ++i) {
platforms[i] = &platformIds[i];
}
}

return err;
Expand Down