Skip to content

Commit 72a6032

Browse files
committed
Don't remove llvm.compiler.used for NVPTX/AMDGCN
These are removed automatically during lowering.
1 parent 56a6ae2 commit 72a6032

File tree

3 files changed

+61
-1
lines changed

3 files changed

+61
-1
lines changed

llvm/tools/sycl-post-link/sycl-post-link.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -788,7 +788,10 @@ processInputModule(std::unique_ptr<Module> M) {
788788
// its purpose, these device_global variables can be removed. If they are not
789789
// used inside the device code after they have been removed from
790790
// "llvm.compiler.used" they can be erased safely.
791-
Modified |= removeDeviceGlobalFromCompilerUsed(*M.get());
791+
if (auto Triple = M->getTargetTriple();
792+
Triple.find("nvptx") != std::string::npos &&
793+
Triple.find("amdgcn") != std::string::npos)
794+
Modified |= removeDeviceGlobalFromCompilerUsed(*M.get());
792795

793796
// Instrument each image scope device globals if the module has been
794797
// instrumented by sanitizer pass.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
//
4+
// The OpenCL GPU backends do not currently support device_global backend
5+
// calls.
6+
// UNSUPPORTED: opencl && gpu
7+
//
8+
// Tests static device_global access through device kernels.
9+
10+
#include "common.hpp"
11+
12+
static device_global<int[4], TestProperties> DeviceGlobalVar;
13+
14+
int main() {
15+
queue Q;
16+
17+
Q.single_task([=]() { DeviceGlobalVar.get()[0] = 42; });
18+
// Make sure that the write happens before subsequent read
19+
Q.wait();
20+
21+
int OutVal = 0;
22+
{
23+
buffer<int, 1> OutBuf(&OutVal, 1);
24+
Q.submit([&](handler &CGH) {
25+
auto OutAcc = OutBuf.get_access<access::mode::write>(CGH);
26+
CGH.single_task([=]() { OutAcc[0] = DeviceGlobalVar.get()[0]; });
27+
});
28+
}
29+
assert(OutVal == 42 && "Read value does not match.");
30+
return 0;
31+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Tests that the llvm.compiler.used symbol, which is used to implement static
2+
// device globals, is removed at some point in compilation. For SPIR-V this
3+
// symbol is removed at sycl-post-link and for NVPTX/AMDGCN it is removed at
4+
// lowering.
5+
6+
// UNSUPPORTED: windows
7+
8+
// RUN: %clangxx -fsycl %s -o %t
9+
// RUN: strings %t | not grep "llvm.compiler.used"
10+
11+
// RUN: %if cuda %{ %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda %s -o %t %}
12+
// RUN: %if cuda %{ strings %t | not grep "llvm.compiler.used" %}
13+
14+
// RUN: %if hip_amd %{ %clangxx -fsycl -fsycl-targets=amd_gpu_gfx906 %s -o %t %}
15+
// RUN: %if hip_amd %{ strings %t | not grep "llvm.compiler.used" %}
16+
17+
#include <sycl/sycl.hpp>
18+
19+
using namespace sycl;
20+
using namespace sycl::ext::oneapi::experimental;
21+
22+
static device_global<int> DeviceGlobalVar;
23+
24+
int main() {
25+
sycl::queue{}.single_task([=] { volatile int ReadVal = DeviceGlobalVar; });
26+
}

0 commit comments

Comments
 (0)