Skip to content

Commit a8c60db

Browse files
authored
[sycl-post-link] Don't remove llvm.compiler.used for NVPTX (#15224)
`llvm.compiler.used` is a global symbol which tells the compiler not to touch some other global symbols until backend lowering. The `llvm.compiler.used` symbol itself is thus removed automatically during lowering for NVPTX and AMDGCN. Removal at `sycl-post-link`, prior to lowering, was causing issues for these backends, where symbols protected by `llvm.compiler.used` were getting removed after `sycl-post-link` and before lowering. We retain the current behaviour for SPIR-V, as SPIR-V generation is handled in `llvm-spirv` anyway, not in the LLVM SPIR-V backend. Also adds tests to make sure static device_globals are handled properly for NVPTX/AMDGCN.
1 parent 9ff35b8 commit a8c60db

File tree

5 files changed

+108
-5
lines changed

5 files changed

+108
-5
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
;; This test verifies llc on AMDGCN will delete the llvm.compiler.used symbol
2+
;; while keeping the symbol in the outputted ASM.
3+
4+
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck %s
5+
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck %s
6+
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a | FileCheck %s
7+
8+
@keep_this = internal global i32 2, align 4
9+
@llvm.compiler.used = appending global [1 x ptr] [ptr @keep_this], section "llvm.metadata"
10+
11+
; CHECK-NOT: llvm.metadata
12+
; CHECK-NOT: llvm{{.*}}used
13+
; CHECK-NOT: llvm{{.*}}compiler{{.*}}used
14+
15+
; CHECK: .type keep_this,@object ;
16+
17+
; CHECK-NOT: llvm.metadata
18+
; CHECK-NOT: llvm{{.*}}used
19+
; CHECK-NOT: llvm{{.*}}compiler{{.*}}used
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
;; This test verifies llc on NVPTX will delete the llvm.compiler.used symbol
2+
;; while keeping the symbol in the outputted ASM.
3+
4+
; RUN: llc < %s -march=nvptx64 | FileCheck %s
5+
6+
@keep_this = internal global i32 2, align 4
7+
@llvm.compiler.used = appending global [1 x ptr] [ptr @keep_this], section "llvm.metadata"
8+
9+
; CHECK-NOT: llvm.metadata
10+
; CHECK-NOT: llvm{{.*}}used
11+
; CHECK-NOT: llvm{{.*}}compiler{{.*}}used
12+
13+
; CHECK: .global .align 4 .u32 keep_this
14+
15+
; CHECK-NOT: llvm.metadata
16+
; CHECK-NOT: llvm{{.*}}used
17+
; CHECK-NOT: llvm{{.*}}compiler{{.*}}used

llvm/tools/sycl-post-link/sycl-post-link.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -785,11 +785,11 @@ processInputModule(std::unique_ptr<Module> M) {
785785
Modified |= removeSYCLKernelsConstRefArray(*M.get());
786786

787787
// There may be device_global variables kept alive in "llvm.compiler.used"
788-
// to keep the optimizer from wrongfully removing them. Since it has served
789-
// its purpose, these device_global variables can be removed. If they are not
790-
// used inside the device code after they have been removed from
791-
// "llvm.compiler.used" they can be erased safely.
792-
Modified |= removeDeviceGlobalFromCompilerUsed(*M.get());
788+
// to keep the optimizer from wrongfully removing them. llvm.compiler.used
789+
// symbols are usually removed at backend lowering, but this is handled here
790+
// for SPIR-V since SYCL compilation uses llvm-spirv, not the SPIR-V backend.
791+
if (auto Triple = M->getTargetTriple().find("spir") != std::string::npos)
792+
Modified |= removeDeviceGlobalFromCompilerUsed(*M.get());
793793

794794
// Instrument each image scope device globals if the module has been
795795
// instrumented by sanitizer pass.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
//
4+
// The OpenCL GPU backends do not currently support device_global backend
5+
// calls.
6+
// UNSUPPORTED: opencl && gpu
7+
//
8+
// Tests static device_global access through device kernels.
9+
10+
#include "common.hpp"
11+
12+
static device_global<int[4], TestProperties> DeviceGlobalVar;
13+
14+
int main() {
15+
queue Q;
16+
17+
Q.single_task([=]() { DeviceGlobalVar.get()[0] = 42; });
18+
// Make sure that the write happens before subsequent read
19+
Q.wait();
20+
21+
int OutVal = 0;
22+
{
23+
buffer<int, 1> OutBuf(&OutVal, 1);
24+
Q.submit([&](handler &CGH) {
25+
auto OutAcc = OutBuf.get_access<access::mode::write>(CGH);
26+
CGH.single_task([=]() { OutAcc[0] = DeviceGlobalVar.get()[0]; });
27+
});
28+
}
29+
assert(OutVal == 42 && "Read value does not match.");
30+
return 0;
31+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Tests that the llvm.compiler.used symbol, which is used to implement static
2+
// device globals, is removed at some point in compilation. For SPIR-V this
3+
// symbol is removed at sycl-post-link and for NVPTX/AMDGCN it is removed at
4+
// lowering.
5+
//
6+
// It also checks that the symbol can be found in an object file for a given
7+
// triple, thus validating that `llvm-strings` can successfully be used to
8+
// check for the presence of the symbol.
9+
10+
// UNSUPPORTED: windows
11+
12+
// RUN: %clangxx -fsycl -fsycl-device-only %s -o %t
13+
// RUN: llvm-strings %t | grep "llvm.compiler.used"
14+
// RUN: %clangxx -fsycl %s -o %t
15+
// RUN: llvm-strings %t | not grep "llvm.compiler.used"
16+
17+
// RUN: %if cuda %{ %clangxx -fsycl -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda %s -o %t %}
18+
// RUN: %if cuda %{ llvm-strings %t | grep "llvm.compiler.used" %}
19+
// RUN: %if cuda %{ %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda %s -o %t %}
20+
// RUN: %if cuda %{ llvm-strings %t | not grep "llvm.compiler.used" %}
21+
22+
// RUN: %if hip_amd %{ %clangxx -fsycl -fsycl-device-only -fsycl-targets=amd_gpu_gfx906 %s -o %t %}
23+
// RUN: %if hip_amd %{ llvm-strings %t | grep "llvm.compiler.used" %}
24+
// RUN: %if hip_amd %{ %clangxx -fsycl -fsycl-targets=amd_gpu_gfx906 %s -o %t %}
25+
// RUN: %if hip_amd %{ llvm-strings %t | not grep "llvm.compiler.used" %}
26+
27+
#include <sycl/sycl.hpp>
28+
29+
using namespace sycl;
30+
using namespace sycl::ext::oneapi::experimental;
31+
32+
static device_global<int> DeviceGlobalVar;
33+
34+
int main() {
35+
sycl::queue{}.single_task([=] { volatile int ReadVal = DeviceGlobalVar; });
36+
}

0 commit comments

Comments
 (0)