[SYCL][E2E] Fix some tests in multi-device mode

frasercrmck · frasercrmck · commit 56d6e0334d3e · 2024-04-25T15:29:37.000+01:00
The atomic tests can be run by multiple SYCL devices, in which case the
compiler needs to be told which device the `--cuda-gpu-arch` parameter
applies to.

The changes to the bfloat16 tests try and restore earlier behaviour,
where the `aspect-ext_oneapi_bfloat16_math_functions` feature was
preventing other devices from running. It also removes RUN lines from a
shared header file (which we don't consider a valid test file) which may
mislead people into thinking it itself is a runnable test.
diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins.hpp b/sycl/test-e2e/BFloat16/bfloat16_builtins.hpp
@@ -4,21 +4,6 @@
 // + sm_80 and above uses some native bfloat16 math instructions
 // + below sm_80 always uses generic impls
 
-// DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}
-// REQUIRES: aspect-ext_oneapi_bfloat16_math_functions
-// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_80 %} %s -o %t.out %{mathflags}
-// RUN: %{run} %t.out
-
-// Test "new" (ABI breaking) for all platforms ( sm_80/native if CUDA )
-// RUN:  %if preview-breaking-changes-supported %{  %clangxx -fsycl -fpreview-breaking-changes -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_80 %} %s -o %t2.out %{mathflags} %}
-// RUN:  %if preview-breaking-changes-supported %{  %{run} %t2.out  %}
-
-// If CUDA, test "new" again for sm_75/generic
-// RUN:  %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{  %clangxx -fsycl -fpreview-breaking-changes -fsycl-targets=%{sycl_triple}  -Xsycl-target-backend --cuda-gpu-arch=sm_75  %s -o %t3.out %{mathflags} %} %}
-// RUN:  %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{  %{run} %t3.out  %} %}
-
-// Currently the feature isn't supported on FPGA.
-// UNSUPPORTED: accelerator
 #include <sycl/sycl.hpp>
 
 #include <cmath>
diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp
@@ -5,6 +5,7 @@
 // + below sm_80 always uses generic impls
 
 // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}
+// REQUIRES: aspect-ext_oneapi_bfloat16_math_functions
 
 // If CUDA, test "new" again for sm_75/generic
 // RUN:  %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{  %clangxx -fsycl -fpreview-breaking-changes -fsycl-targets=%{sycl_triple}  -Xsycl-target-backend --cuda-gpu-arch=sm_75  %s -o %t3.out %{mathflags} %} %}
diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp
@@ -32,7 +32,7 @@
 
 // UNSUPPORTED: hip || (windows && level_zero)
 
-// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_70 %} %s -o %t.out
+// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} %s -o %t.out
 // RUN: %{run} %t.out
 
 #include <sycl/sycl.hpp>
diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp
@@ -32,7 +32,7 @@
 
 // UNSUPPORTED: hip
 
-// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_70 %} %s -o %t.out
+// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} %s -o %t.out
 // RUN: %{run} %t.out
 
 #include <iostream>