File tree 3 files changed +59
-1
lines changed
llvm/tools/sycl-post-link
3 files changed +59
-1
lines changed Original file line number Diff line number Diff line change @@ -788,7 +788,10 @@ processInputModule(std::unique_ptr<Module> M) {
788
788
// its purpose, these device_global variables can be removed. If they are not
789
789
// used inside the device code after they have been removed from
790
790
// "llvm.compiler.used" they can be erased safely.
791
- Modified |= removeDeviceGlobalFromCompilerUsed (*M.get ());
791
+ if (auto Triple = M->getTargetTriple ();
792
+ Triple.find (" nvptx" ) != std::string::npos &&
793
+ Triple.find (" amdgcn" ) != std::string::npos)
794
+ Modified |= removeDeviceGlobalFromCompilerUsed (*M.get ());
792
795
793
796
// Instrument each image scope device globals if the module has been
794
797
// instrumented by sanitizer pass.
Original file line number Diff line number Diff line change
1
+ // RUN: %{build} -o %t.out
2
+ // RUN: %{run} %t.out
3
+ //
4
+ // The OpenCL GPU backends do not currently support device_global backend
5
+ // calls.
6
+ // UNSUPPORTED: opencl && gpu
7
+ //
8
+ // Tests static device_global access through device kernels.
9
+
10
+ #include " common.hpp"
11
+
12
+ static device_global<int [4 ], TestProperties> DeviceGlobalVar;
13
+
14
+ int main () {
15
+ queue Q;
16
+
17
+ Q.single_task ([=]() { DeviceGlobalVar.get ()[0 ] = 42 ; });
18
+ // Make sure that the write happens before subsequent read
19
+ Q.wait ();
20
+
21
+ int OutVal = 0 ;
22
+ {
23
+ buffer<int , 1 > OutBuf (&OutVal, 1 );
24
+ Q.submit ([&](handler &CGH) {
25
+ auto OutAcc = OutBuf.get_access <access ::mode::write >(CGH);
26
+ CGH.single_task ([=]() { OutAcc[0 ] = DeviceGlobalVar.get ()[0 ]; });
27
+ });
28
+ }
29
+ assert (OutVal == 42 && " Read value does not match." );
30
+ return 0 ;
31
+ }
Original file line number Diff line number Diff line change
1
+ // UNSUPPORTED: windows
2
+
3
+ // RUN: %clangxx -fsycl %s -o %t
4
+ // RUN: strings %t | not grep "llvm.compiler.used"
5
+
6
+ // RUN: %if cuda %{ %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda %s -o %t %}
7
+ // RUN: %if cuda %{ strings %t | not grep "llvm.compiler.used" %}
8
+
9
+ // RUN: %if hip_amd %{ %clangxx -fsycl -fsycl-targets=amd_gpu_gfx906 %s -o %t %}
10
+ // RUN: %if hip_amd %{ strings %t | not grep "llvm.compiler.used" %}
11
+
12
+ // Tests that the underlying pointer in a const-qualified shared device_global
13
+ // is not optimized out during access.
14
+
15
+ #include < sycl/sycl.hpp>
16
+
17
+ using namespace sycl ;
18
+ using namespace sycl ::ext::oneapi::experimental;
19
+
20
+ static device_global<int > DeviceGlobalVar;
21
+
22
+ int main () {
23
+ sycl::queue{}.single_task ([=] { volatile int ReadVal = DeviceGlobalVar; });
24
+ }
You can’t perform that action at this time.
0 commit comments