-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[Offload] Fix handling of 'bare' mode when environment missing #136794
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-openmp Author: Joseph Huber (jhuber6) ChangesSummary: Full diff: https://github.com/llvm/llvm-project/pull/136794.diff 6 Files Affected:
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h
index ccf8e727c4045..3ae447b14f320 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h
@@ -19,6 +19,7 @@ namespace llvm {
namespace omp {
enum OMPTgtExecModeFlags : unsigned char {
+ OMP_TGT_EXEC_MODE_BARE = 0,
OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
OMP_TGT_EXEC_MODE_GENERIC_SPMD =
diff --git a/offload/DeviceRTL/src/Kernel.cpp b/offload/DeviceRTL/src/Kernel.cpp
index eac68a15538c4..467e44a65276c 100644
--- a/offload/DeviceRTL/src/Kernel.cpp
+++ b/offload/DeviceRTL/src/Kernel.cpp
@@ -26,6 +26,7 @@ using namespace ompx;
// These flags are copied from "llvm/Frontend/OpenMP/OMPDeviceConstants.h" and
// must be kept in-sync.
enum OMPTgtExecModeFlags : unsigned char {
+ OMP_TGT_EXEC_MODE_BARE = 0,
OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
OMP_TGT_EXEC_MODE_GENERIC_SPMD =
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index a30589e039468..e54a8afdd3f4f 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -297,6 +297,7 @@ struct GenericKernelTy {
/// Indicate whether an execution mode is valid.
static bool isValidExecutionMode(OMPTgtExecModeFlags ExecutionMode) {
switch (ExecutionMode) {
+ case OMP_TGT_EXEC_MODE_BARE:
case OMP_TGT_EXEC_MODE_SPMD:
case OMP_TGT_EXEC_MODE_GENERIC:
case OMP_TGT_EXEC_MODE_GENERIC_SPMD:
@@ -309,6 +310,8 @@ struct GenericKernelTy {
/// Get the execution mode name of the kernel.
const char *getExecutionModeName() const {
switch (KernelEnvironment.Configuration.ExecMode) {
+ case OMP_TGT_EXEC_MODE_BARE:
+ return "BARE";
case OMP_TGT_EXEC_MODE_SPMD:
return "SPMD";
case OMP_TGT_EXEC_MODE_GENERIC:
@@ -364,6 +367,9 @@ struct GenericKernelTy {
bool isSPMDMode() const {
return KernelEnvironment.Configuration.ExecMode == OMP_TGT_EXEC_MODE_SPMD;
}
+ bool isBareMode() const {
+ return KernelEnvironment.Configuration.ExecMode == OMP_TGT_EXEC_MODE_BARE;
+ }
/// The kernel name.
const char *Name;
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 4d2ebcbc7be8e..9938a0e914cc9 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -440,18 +440,19 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
ImagePtr = &Image;
// Retrieve kernel environment object for the kernel.
- GlobalTy KernelEnv(std::string(Name) + "_kernel_environment",
- sizeof(KernelEnvironment), &KernelEnvironment);
+ std::string EnvironmentName = std::string(Name) + "_kernel_environment";
GenericGlobalHandlerTy &GHandler = GenericDevice.Plugin.getGlobalHandler();
- if (auto Err =
- GHandler.readGlobalFromImage(GenericDevice, *ImagePtr, KernelEnv)) {
- [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
- DP("Failed to read kernel environment for '%s': %s\n"
- "Using default SPMD (2) execution mode\n",
- Name, ErrStr.data());
- assert(KernelEnvironment.Configuration.ReductionDataSize == 0 &&
- "Default initialization failed.");
- IsBareKernel = true;
+ if (GHandler.isSymbolInImage(GenericDevice, Image, EnvironmentName)) {
+ GlobalTy KernelEnv(EnvironmentName, sizeof(KernelEnvironment),
+ &KernelEnvironment);
+ if (auto Err =
+ GHandler.readGlobalFromImage(GenericDevice, *ImagePtr, KernelEnv))
+ return Err;
+ } else {
+ KernelEnvironment = KernelEnvironmentTy{};
+ DP("Failed to read kernel environment for '%s' Using default Bare (0) "
+ "execution mode\n",
+ Name);
}
// Max = Config.Max > 0 ? min(Config.Max, Device.Max) : Device.Max;
@@ -573,7 +574,7 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
KernelArgs.ThreadLimit[2]};
uint32_t NumBlocks[3] = {KernelArgs.NumTeams[0], KernelArgs.NumTeams[1],
KernelArgs.NumTeams[2]};
- if (!IsBareKernel) {
+ if (!isBareMode()) {
NumThreads[0] = getNumThreads(GenericDevice, NumThreads);
NumBlocks[0] = getNumBlocks(GenericDevice, NumBlocks, KernelArgs.Tripcount,
NumThreads[0], KernelArgs.ThreadLimit[0] > 0);
@@ -627,7 +628,7 @@ KernelLaunchParamsTy GenericKernelTy::prepareArgs(
uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,
uint32_t ThreadLimitClause[3]) const {
- assert(!IsBareKernel && "bare kernel should not call this function");
+ assert(!isBareMode() && "bare kernel should not call this function");
assert(ThreadLimitClause[1] == 1 && ThreadLimitClause[2] == 1 &&
"Multi dimensional launch not supported yet.");
@@ -645,7 +646,7 @@ uint32_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
uint64_t LoopTripCount,
uint32_t &NumThreads,
bool IsNumThreadsFromUser) const {
- assert(!IsBareKernel && "bare kernel should not call this function");
+ assert(!isBareMode() && "bare kernel should not call this function");
assert(NumTeamsClause[1] == 1 && NumTeamsClause[2] == 1 &&
"Multi dimensional launch not supported yet.");
diff --git a/offload/test/offloading/ompx_bare.c b/offload/test/offloading/ompx_bare.c
index 6a6ada9617cf5..9c8addf03c4dc 100644
--- a/offload/test/offloading/ompx_bare.c
+++ b/offload/test/offloading/ompx_bare.c
@@ -15,7 +15,7 @@ int main(int argc, char *argv[]) {
const int N = num_blocks * block_size;
int *data = (int *)malloc(N * sizeof(int));
- // CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with [64,1,1] blocks and [64,1,1] threads in SPMD mode
+ // CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with [64,1,1] blocks and [64,1,1] threads in BARE mode
#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) map(from: data[0:N])
{
diff --git a/offload/test/offloading/ompx_bare_multi_dim.cpp b/offload/test/offloading/ompx_bare_multi_dim.cpp
index d37278525fdb0..3a726f89f7dfb 100644
--- a/offload/test/offloading/ompx_bare_multi_dim.cpp
+++ b/offload/test/offloading/ompx_bare_multi_dim.cpp
@@ -7,7 +7,7 @@
#include <cassert>
#include <vector>
-// CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with [2,4,6] blocks and [32,4,2] threads in SPMD mode
+// CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with [2,4,6] blocks and [32,4,2] threads in BARE mode
int main(int argc, char *argv[]) {
int bs[3] = {32u, 4u, 2u};
|
@llvm/pr-subscribers-offload Author: Joseph Huber (jhuber6) ChangesSummary: Full diff: https://github.com/llvm/llvm-project/pull/136794.diff 6 Files Affected:
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h
index ccf8e727c4045..3ae447b14f320 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h
@@ -19,6 +19,7 @@ namespace llvm {
namespace omp {
enum OMPTgtExecModeFlags : unsigned char {
+ OMP_TGT_EXEC_MODE_BARE = 0,
OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
OMP_TGT_EXEC_MODE_GENERIC_SPMD =
diff --git a/offload/DeviceRTL/src/Kernel.cpp b/offload/DeviceRTL/src/Kernel.cpp
index eac68a15538c4..467e44a65276c 100644
--- a/offload/DeviceRTL/src/Kernel.cpp
+++ b/offload/DeviceRTL/src/Kernel.cpp
@@ -26,6 +26,7 @@ using namespace ompx;
// These flags are copied from "llvm/Frontend/OpenMP/OMPDeviceConstants.h" and
// must be kept in-sync.
enum OMPTgtExecModeFlags : unsigned char {
+ OMP_TGT_EXEC_MODE_BARE = 0,
OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
OMP_TGT_EXEC_MODE_GENERIC_SPMD =
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index a30589e039468..e54a8afdd3f4f 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -297,6 +297,7 @@ struct GenericKernelTy {
/// Indicate whether an execution mode is valid.
static bool isValidExecutionMode(OMPTgtExecModeFlags ExecutionMode) {
switch (ExecutionMode) {
+ case OMP_TGT_EXEC_MODE_BARE:
case OMP_TGT_EXEC_MODE_SPMD:
case OMP_TGT_EXEC_MODE_GENERIC:
case OMP_TGT_EXEC_MODE_GENERIC_SPMD:
@@ -309,6 +310,8 @@ struct GenericKernelTy {
/// Get the execution mode name of the kernel.
const char *getExecutionModeName() const {
switch (KernelEnvironment.Configuration.ExecMode) {
+ case OMP_TGT_EXEC_MODE_BARE:
+ return "BARE";
case OMP_TGT_EXEC_MODE_SPMD:
return "SPMD";
case OMP_TGT_EXEC_MODE_GENERIC:
@@ -364,6 +367,9 @@ struct GenericKernelTy {
bool isSPMDMode() const {
return KernelEnvironment.Configuration.ExecMode == OMP_TGT_EXEC_MODE_SPMD;
}
+ bool isBareMode() const {
+ return KernelEnvironment.Configuration.ExecMode == OMP_TGT_EXEC_MODE_BARE;
+ }
/// The kernel name.
const char *Name;
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 4d2ebcbc7be8e..9938a0e914cc9 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -440,18 +440,19 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
ImagePtr = &Image;
// Retrieve kernel environment object for the kernel.
- GlobalTy KernelEnv(std::string(Name) + "_kernel_environment",
- sizeof(KernelEnvironment), &KernelEnvironment);
+ std::string EnvironmentName = std::string(Name) + "_kernel_environment";
GenericGlobalHandlerTy &GHandler = GenericDevice.Plugin.getGlobalHandler();
- if (auto Err =
- GHandler.readGlobalFromImage(GenericDevice, *ImagePtr, KernelEnv)) {
- [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
- DP("Failed to read kernel environment for '%s': %s\n"
- "Using default SPMD (2) execution mode\n",
- Name, ErrStr.data());
- assert(KernelEnvironment.Configuration.ReductionDataSize == 0 &&
- "Default initialization failed.");
- IsBareKernel = true;
+ if (GHandler.isSymbolInImage(GenericDevice, Image, EnvironmentName)) {
+ GlobalTy KernelEnv(EnvironmentName, sizeof(KernelEnvironment),
+ &KernelEnvironment);
+ if (auto Err =
+ GHandler.readGlobalFromImage(GenericDevice, *ImagePtr, KernelEnv))
+ return Err;
+ } else {
+ KernelEnvironment = KernelEnvironmentTy{};
+ DP("Failed to read kernel environment for '%s' Using default Bare (0) "
+ "execution mode\n",
+ Name);
}
// Max = Config.Max > 0 ? min(Config.Max, Device.Max) : Device.Max;
@@ -573,7 +574,7 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
KernelArgs.ThreadLimit[2]};
uint32_t NumBlocks[3] = {KernelArgs.NumTeams[0], KernelArgs.NumTeams[1],
KernelArgs.NumTeams[2]};
- if (!IsBareKernel) {
+ if (!isBareMode()) {
NumThreads[0] = getNumThreads(GenericDevice, NumThreads);
NumBlocks[0] = getNumBlocks(GenericDevice, NumBlocks, KernelArgs.Tripcount,
NumThreads[0], KernelArgs.ThreadLimit[0] > 0);
@@ -627,7 +628,7 @@ KernelLaunchParamsTy GenericKernelTy::prepareArgs(
uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,
uint32_t ThreadLimitClause[3]) const {
- assert(!IsBareKernel && "bare kernel should not call this function");
+ assert(!isBareMode() && "bare kernel should not call this function");
assert(ThreadLimitClause[1] == 1 && ThreadLimitClause[2] == 1 &&
"Multi dimensional launch not supported yet.");
@@ -645,7 +646,7 @@ uint32_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
uint64_t LoopTripCount,
uint32_t &NumThreads,
bool IsNumThreadsFromUser) const {
- assert(!IsBareKernel && "bare kernel should not call this function");
+ assert(!isBareMode() && "bare kernel should not call this function");
assert(NumTeamsClause[1] == 1 && NumTeamsClause[2] == 1 &&
"Multi dimensional launch not supported yet.");
diff --git a/offload/test/offloading/ompx_bare.c b/offload/test/offloading/ompx_bare.c
index 6a6ada9617cf5..9c8addf03c4dc 100644
--- a/offload/test/offloading/ompx_bare.c
+++ b/offload/test/offloading/ompx_bare.c
@@ -15,7 +15,7 @@ int main(int argc, char *argv[]) {
const int N = num_blocks * block_size;
int *data = (int *)malloc(N * sizeof(int));
- // CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with [64,1,1] blocks and [64,1,1] threads in SPMD mode
+ // CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with [64,1,1] blocks and [64,1,1] threads in BARE mode
#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) map(from: data[0:N])
{
diff --git a/offload/test/offloading/ompx_bare_multi_dim.cpp b/offload/test/offloading/ompx_bare_multi_dim.cpp
index d37278525fdb0..3a726f89f7dfb 100644
--- a/offload/test/offloading/ompx_bare_multi_dim.cpp
+++ b/offload/test/offloading/ompx_bare_multi_dim.cpp
@@ -7,7 +7,7 @@
#include <cassert>
#include <vector>
-// CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with [2,4,6] blocks and [32,4,2] threads in SPMD mode
+// CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with [2,4,6] blocks and [32,4,2] threads in BARE mode
int main(int argc, char *argv[]) {
int bs[3] = {32u, 4u, 2u};
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions c,cpp,h -- llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h offload/DeviceRTL/src/Kernel.cpp offload/plugins-nextgen/common/include/PluginInterface.h offload/plugins-nextgen/common/src/PluginInterface.cpp offload/test/offloading/ompx_bare.c offload/test/offloading/ompx_bare_multi_dim.cpp View the diff from clang-format here.diff --git a/offload/test/offloading/ompx_bare.c b/offload/test/offloading/ompx_bare.c
index 9c8addf03..bd5efbc05 100644
--- a/offload/test/offloading/ompx_bare.c
+++ b/offload/test/offloading/ompx_bare.c
@@ -15,7 +15,9 @@ int main(int argc, char *argv[]) {
const int N = num_blocks * block_size;
int *data = (int *)malloc(N * sizeof(int));
- // CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with [64,1,1] blocks and [64,1,1] threads in BARE mode
+ // CHECK: "PluginInterface" device 0 info: Launching kernel
+ // __omp_offloading_{{.*}} with [64,1,1] blocks and [64,1,1] threads in BARE
+ // mode
#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) map(from: data[0:N])
{
diff --git a/offload/test/offloading/ompx_bare_multi_dim.cpp b/offload/test/offloading/ompx_bare_multi_dim.cpp
index 3a726f89f..c3358428a 100644
--- a/offload/test/offloading/ompx_bare_multi_dim.cpp
+++ b/offload/test/offloading/ompx_bare_multi_dim.cpp
@@ -7,7 +7,8 @@
#include <cassert>
#include <vector>
-// CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with [2,4,6] blocks and [32,4,2] threads in BARE mode
+// CHECK: "PluginInterface" device 0 info: Launching kernel
+// __omp_offloading_{{.*}} with [2,4,6] blocks and [32,4,2] threads in BARE mode
int main(int argc, char *argv[]) {
int bs[3] = {32u, 4u, 2u};
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks for the clean up.
Summary: We treated the missing kernel environment as a unique mode, but it was kind of this random bool that was doing the same thing and it explicitly expects the kernel environment to be zero. It broke after the previous change since it used to default to SPMD and didn't handle zero in any of the other cases despite being used. This fixes that and queries for it without needing to consume an error.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…136794) Summary: We treated the missing kernel environment as a unique mode, but it was kind of this random bool that was doing the same thing and it explicitly expects the kernel environment to be zero. It broke after the previous change since it used to default to SPMD and didn't handle zero in any of the other cases despite being used. This fixes that and queries for it without needing to consume an error.
…136794) Summary: We treated the missing kernel environment as a unique mode, but it was kind of this random bool that was doing the same thing and it explicitly expects the kernel environment to be zero. It broke after the previous change since it used to default to SPMD and didn't handle zero in any of the other cases despite being used. This fixes that and queries for it without needing to consume an error.
…136794) Summary: We treated the missing kernel environment as a unique mode, but it was kind of this random bool that was doing the same thing and it explicitly expects the kernel environment to be zero. It broke after the previous change since it used to default to SPMD and didn't handle zero in any of the other cases despite being used. This fixes that and queries for it without needing to consume an error.
Summary:
We treated the missing kernel environment as a unique mode, but it was
kind of this random bool that was doing the same thing and it explicitly
expects the kernel environment to be zero. It broke after the previous
change since it used to default to SPMD and didn't handle zero in any of
the other cases despite being used. This fixes that and queries for it
without needing to consume an error.