Skip to content

Commit 1de77c3

Browse files
authored
[SYCL][New offload model] Add SYCL E2E tests for --offload-new-driver option and fix failing tests (#14730)
This PR is used to add a set of SYCL E2E tests with --offload-new-driver enabled. Some failures related to offloading to 3rd party hardware have been resolved. --------- Signed-off-by: Arvind Sudarsanam <[email protected]>
1 parent 1f2ea6d commit 1de77c3

15 files changed

+471
-15
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11066,7 +11066,8 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1106611066
ArgStringList CmdArgs;
1106711067

1106811068
// Pass the CUDA path to the linker wrapper tool.
11069-
for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
11069+
for (Action::OffloadKind Kind :
11070+
{Action::OFK_Cuda, Action::OFK_OpenMP, Action::OFK_SYCL}) {
1107011071
auto TCRange = C.getOffloadToolChains(Kind);
1107111072
for (auto &I : llvm::make_range(TCRange.first, TCRange.second)) {
1107211073
const ToolChain *TC = I.second;

clang/test/Driver/linker-wrapper-sycl-win.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,11 @@
9090
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}llvm-link.exe" -only-needed [[FIRSTLLVMLINKOUT]].bc {{.*}}.bc -o [[SECONDLLVMLINKOUT:.*]].bc --suppress-warnings
9191
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}sycl-post-link.exe"{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc
9292
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}clang.exe"{{.*}} -o [[CLANGOUT:.*]] --target=nvptx64-nvidia-cuda -march={{.*}}
93-
// CHK-CMDS-AOT-NV-NEXT: offload-wrapper: input: {{.*}}, output: [[WRAPPEROUT:.*]].bc
93+
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}ptxas"{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]]
94+
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}fatbinary"{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]]
95+
// CHK-CMDS-AOT-NV-NEXT: offload-wrapper: input: [[FATBINOUT]], output: [[WRAPPEROUT:.*]].bc
9496
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}llc.exe" -filetype=obj -o [[LLCOUT:.*]].o [[WRAPPEROUT]].bc
9597
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]].o HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
96-
9798
/// Check for list of commands for standalone clang-linker-wrapper run for sycl (AOT for AMD)
9899
// -------
99100
// Generate .o file as linker wrapper input.
@@ -107,6 +108,7 @@
107108
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}llvm-link.exe" [[FIRSTLLVMLINKIN:.*]].bc -o [[FIRSTLLVMLINKOUT:.*]].bc --suppress-warnings
108109
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}sycl-post-link.exe"{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[FIRSTLLVMLINKOUT]].bc
109110
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}clang.exe"{{.*}} -o [[CLANGOUT:.*]] --target=amdgcn-amd-amdhsa -mcpu={{.*}}
110-
// CHK-CMDS-AOT-AMD-NEXT: offload-wrapper: input: {{.*}}, output: [[WRAPPEROUT:.*]].bc
111+
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}clang-offload-bundler.exe"{{.*}} -input=[[CLANGOUT]] -output=[[BUNDLEROUT:.*]]
112+
// CHK-CMDS-AOT-AMD-NEXT: offload-wrapper: input: [[BUNDLEROUT]], output: [[WRAPPEROUT:.*]].bc
111113
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}llc.exe" -filetype=obj -o [[LLCOUT:.*]].o [[WRAPPEROUT]].bc
112114
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]].o HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o

clang/test/Driver/linker-wrapper-sycl.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,9 @@
108108
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}llvm-link" -only-needed [[FIRSTLLVMLINKOUT]].bc {{.*}}.bc -o [[SECONDLLVMLINKOUT:.*]].bc --suppress-warnings
109109
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}sycl-post-link"{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc
110110
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}clang"{{.*}} -o [[CLANGOUT:.*]] --target=nvptx64-nvidia-cuda -march={{.*}}
111-
// CHK-CMDS-AOT-NV-NEXT: offload-wrapper: input: [[WRAPPERIN:.*]], output: [[WRAPPEROUT:.*]]
111+
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}ptxas"{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]]
112+
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}fatbinary"{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]]
113+
// CHK-CMDS-AOT-NV-NEXT: offload-wrapper: input: [[FATBINOUT]], output: [[WRAPPEROUT:.*]]
112114
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT:.*]] [[WRAPPEROUT]]
113115
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
114116

@@ -125,7 +127,8 @@
125127
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}llvm-link" [[FIRSTLLVMLINKIN]].bc -o [[FIRSTLLVMLINKOUT:.*]].bc --suppress-warnings
126128
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}sycl-post-link"{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[FIRSTLLVMLINKOUT]].bc
127129
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}clang"{{.*}} -o [[CLANGOUT:.*]] --target=amdgcn-amd-amdhsa -mcpu={{.*}}
128-
// CHK-CMDS-AOT-AMD-NEXT: offload-wrapper: input: [[WRAPPERIN:.*]], output: [[WRAPPEROUT:.*]]
130+
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}clang-offload-bundler"{{.*}} -targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa--gfx803 -input=/dev/null -input=[[CLANGOUT]] -output=[[BUNDLEROUT:.*]]
131+
// CHK-CMDS-AOT-AMD-NEXT: offload-wrapper: input: [[BUNDLEROUT]], output: [[WRAPPEROUT:.*]]
129132
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT:.*]] [[WRAPPEROUT]]
130133
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
131134

@@ -150,7 +153,9 @@
150153
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: offload-wrapper: input: {{.*}}.bc, output: [[WRAPPEROUT1:.*]]
151154
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT1:.*]] [[WRAPPEROUT1]]
152155
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}clang"{{.*}} -o [[CLANGOUT:.*]] --target=nvptx64-nvidia-cuda -march={{.*}}
153-
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: offload-wrapper: input: [[WRAPPERIN:.*]], output: [[WRAPPEROUT:.*]]
156+
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}ptxas"{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]]
157+
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}fatbinary"{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]]
158+
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: offload-wrapper: input: [[FATBINOUT]], output: [[WRAPPEROUT:.*]]
154159
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT2:.*]] [[WRAPPEROUT]]
155160
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT1]] [[LLCOUT2]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
156161

@@ -169,6 +174,7 @@
169174
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: offload-wrapper: input: {{.*}}.bc, output: [[WRAPPEROUT1:.*]]
170175
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT1:.*]] [[WRAPPEROUT1]]
171176
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}clang"{{.*}} -o [[CLANGOUT:.*]] --target=amdgcn-amd-amdhsa -mcpu={{.*}}
172-
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: offload-wrapper: input: [[WRAPPERIN:.*]], output: [[WRAPPEROUT2:.*]]
177+
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}clang-offload-bundler"{{.*}} -input=[[CLANGOUT]] -output=[[BUNDLEROUT:.*]]
178+
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: offload-wrapper: input: [[BUNDLEROUT]], output: [[WRAPPEROUT2:.*]]
173179
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT2:.*]] [[WRAPPEROUT2]]
174180
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT1]] [[LLCOUT2]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o

clang/test/Driver/sycl-offload-new-driver.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,3 +188,10 @@
188188
// RUN: -Xsycl-target-backend=spir64_gen "-device pvc,bdw" %s 2>&1 \
189189
// RUN: | FileCheck -check-prefix COMMA_FILE %s
190190
// COMMA_FILE: clang-offload-packager{{.*}} "--image=file={{.*}}pvc@bdw{{.*}},triple=spir64_gen-unknown-unknown,arch=pvc,bdw,kind=sycl"
191+
192+
/// Verify that --cuda-path is passed to clang-linker-wrapper for SYCL offload
193+
// RUN: %clangxx -fsycl -### -fsycl-targets=nvptx64-nvidia-cuda \
194+
// RUN: --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s \
195+
// RUN: --offload-new-driver 2>&1 \
196+
// RUN: | FileCheck -check-prefix NVPTX_CUDA_PATH %s
197+
// NVPTX_CUDA_PATH: clang-linker-wrapper{{.*}} "--cuda-path={{.*}}Inputs/CUDA_80/usr/local/cuda"

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
//
1515
//===---------------------------------------------------------------------===//
1616

17+
#include "clang/Basic/Cuda.h"
1718
#include "clang/Basic/Version.h"
1819
#include "llvm/ADT/MapVector.h"
1920
#include "llvm/BinaryFormat/Magic.h"
@@ -409,6 +410,46 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
409410

410411
return *TempFileOrErr;
411412
}
413+
414+
// ptxas binary
415+
Expected<StringRef> ptxas(StringRef InputFile, const ArgList &Args,
416+
StringRef Arch) {
417+
llvm::TimeTraceScope TimeScope("NVPTX ptxas");
418+
// NVPTX uses the ptxas program to process assembly files.
419+
Expected<std::string> PtxasPath =
420+
findProgram("ptxas", {CudaBinaryPath + "/bin"});
421+
if (!PtxasPath)
422+
return PtxasPath.takeError();
423+
424+
llvm::Triple Triple(
425+
Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple()));
426+
427+
// Create a new file to write the output to.
428+
auto TempFileOrErr =
429+
createOutputFile(sys::path::filename(ExecutableName), "cubin");
430+
if (!TempFileOrErr)
431+
return TempFileOrErr.takeError();
432+
433+
SmallVector<StringRef, 16> CmdArgs;
434+
CmdArgs.push_back(*PtxasPath);
435+
CmdArgs.push_back(Triple.isArch64Bit() ? "-m64" : "-m32");
436+
// Pass -v to ptxas if it was passed to the driver.
437+
if (Args.hasArg(OPT_verbose))
438+
CmdArgs.push_back("-v");
439+
StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2");
440+
if (Args.hasArg(OPT_debug))
441+
CmdArgs.push_back("-g");
442+
else
443+
CmdArgs.push_back(Args.MakeArgString("-" + OptLevel));
444+
CmdArgs.push_back("--gpu-name");
445+
CmdArgs.push_back(Arch);
446+
CmdArgs.push_back("--output-file");
447+
CmdArgs.push_back(*TempFileOrErr);
448+
CmdArgs.push_back(InputFile);
449+
if (Error Err = executeCommands(*PtxasPath, CmdArgs))
450+
return std::move(Err);
451+
return *TempFileOrErr;
452+
}
412453
} // namespace nvptx
413454

414455
namespace amdgcn {
@@ -1240,7 +1281,8 @@ static Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
12401281
} // namespace sycl
12411282

12421283
namespace generic {
1243-
Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
1284+
Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
1285+
bool IsSYCLKind = false) {
12441286
llvm::TimeTraceScope TimeScope("Clang");
12451287
// Use `clang` to invoke the appropriate device tools.
12461288
Expected<std::string> ClangPath =
@@ -1276,6 +1318,8 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
12761318
if (!Triple.isNVPTX())
12771319
CmdArgs.push_back("-Wl,--no-undefined");
12781320

1321+
if (IsSYCLKind && Triple.isNVPTX())
1322+
CmdArgs.push_back("-S");
12791323
for (StringRef InputFile : InputFiles)
12801324
CmdArgs.push_back(InputFile);
12811325

@@ -1369,7 +1413,7 @@ Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
13691413
case Triple::ppc64:
13701414
case Triple::ppc64le:
13711415
case Triple::systemz:
1372-
return generic::clang(InputFiles, Args);
1416+
return generic::clang(InputFiles, Args, IsSYCLKind);
13731417
case Triple::spirv32:
13741418
case Triple::spirv64:
13751419
case Triple::spir:
@@ -2078,14 +2122,40 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
20782122
return OutputFile.takeError();
20792123
WrappedOutput.push_back(*OutputFile);
20802124
}
2081-
20822125
for (size_t I = 0, E = SplitModules.size(); I != E; ++I) {
20832126
SmallVector<StringRef> Files = {SplitModules[I].ModuleFilePath};
2084-
auto LinkedFileFinalOrErr =
2127+
StringRef Arch = LinkerArgs.getLastArgValue(OPT_arch_EQ);
2128+
if (Arch.empty())
2129+
Arch = "native";
2130+
SmallVector<std::pair<StringRef, StringRef>, 4> BundlerInputFiles;
2131+
auto ClangOutputOrErr =
20852132
linkDevice(Files, LinkerArgs, true /* IsSYCLKind */);
2086-
if (!LinkedFileFinalOrErr)
2087-
return LinkedFileFinalOrErr.takeError();
2088-
SplitModules[I].ModuleFilePath = *LinkedFileFinalOrErr;
2133+
if (!ClangOutputOrErr)
2134+
return ClangOutputOrErr.takeError();
2135+
if (Triple.isNVPTX()) {
2136+
auto VirtualArch = StringRef(clang::CudaArchToVirtualArchString(
2137+
clang::StringToCudaArch(Arch)));
2138+
auto PtxasOutputOrErr =
2139+
nvptx::ptxas(*ClangOutputOrErr, LinkerArgs, Arch);
2140+
if (!PtxasOutputOrErr)
2141+
return PtxasOutputOrErr.takeError();
2142+
BundlerInputFiles.emplace_back(*ClangOutputOrErr, VirtualArch);
2143+
BundlerInputFiles.emplace_back(*PtxasOutputOrErr, Arch);
2144+
auto BundledFileOrErr =
2145+
nvptx::fatbinary(BundlerInputFiles, LinkerArgs);
2146+
if (!BundledFileOrErr)
2147+
return BundledFileOrErr.takeError();
2148+
SplitModules[I].ModuleFilePath = *BundledFileOrErr;
2149+
} else if (Triple.isAMDGCN()) {
2150+
BundlerInputFiles.emplace_back(*ClangOutputOrErr, Arch);
2151+
auto BundledFileOrErr =
2152+
amdgcn::fatbinary(BundlerInputFiles, LinkerArgs);
2153+
if (!BundledFileOrErr)
2154+
return BundledFileOrErr.takeError();
2155+
SplitModules[I].ModuleFilePath = *BundledFileOrErr;
2156+
} else {
2157+
SplitModules[I].ModuleFilePath = *ClangOutputOrErr;
2158+
}
20892159
}
20902160
// TODO(NOM7): Remove this call and use community flow for bundle/wrap
20912161
auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs);
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//==--- aot.cpp - Simple vector addition (AOT compilation example) --------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===---------------------------------------------------------------------===//
8+
9+
#include <sycl/detail/core.hpp>
10+
11+
#include <array>
12+
#include <iostream>
13+
14+
constexpr sycl::access::mode sycl_read = sycl::access::mode::read;
15+
constexpr sycl::access::mode sycl_write = sycl::access::mode::write;
16+
17+
template <typename T> class Vadd;
18+
19+
template <typename T, size_t N>
20+
void vadd(const std::array<T, N> &A, const std::array<T, N> &B,
21+
std::array<T, N> &C) {
22+
sycl::queue Queue([](sycl::exception_list ExceptionList) {
23+
for (std::exception_ptr ExceptionPtr : ExceptionList) {
24+
try {
25+
std::rethrow_exception(ExceptionPtr);
26+
} catch (sycl::exception &E) {
27+
std::cerr << E.what();
28+
} catch (...) {
29+
std::cerr << "Unknown async exception was caught." << std::endl;
30+
}
31+
}
32+
});
33+
34+
sycl::range<1> numOfItems{N};
35+
sycl::buffer bufA(A.data(), numOfItems);
36+
sycl::buffer bufB(B.data(), numOfItems);
37+
sycl::buffer bufC(C.data(), numOfItems);
38+
39+
Queue.submit([&](sycl::handler &cgh) {
40+
sycl::accessor accA{bufA, cgh, sycl::read_only};
41+
sycl::accessor accB{bufB, cgh, sycl::read_only};
42+
sycl::accessor accC{bufC, cgh, sycl::write_only};
43+
44+
cgh.parallel_for<Vadd<T>>(numOfItems, [=](sycl::id<1> wiID) {
45+
accC[wiID] = accA[wiID] + accB[wiID];
46+
});
47+
});
48+
49+
Queue.wait_and_throw();
50+
}
51+
52+
int main() {
53+
const size_t array_size = 4;
54+
std::array<int, array_size> A = {{1, 2, 3, 4}}, B = {{1, 2, 3, 4}}, C;
55+
std::array<float, array_size> D = {{1.f, 2.f, 3.f, 4.f}},
56+
E = {{1.f, 2.f, 3.f, 4.f}}, F;
57+
vadd(A, B, C);
58+
vadd(D, E, F);
59+
for (unsigned int i = 0; i < array_size; i++) {
60+
if (C[i] != A[i] + B[i]) {
61+
std::cout << "Incorrect result (element " << i << " is " << C[i] << "!\n";
62+
return 1;
63+
}
64+
if (F[i] != D[i] + E[i]) {
65+
std::cout << "Incorrect result (element " << i << " is " << F[i] << "!\n";
66+
return 1;
67+
}
68+
}
69+
std::cout << "Correct result!\n";
70+
return 0;
71+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#include "split-per-source.h"
2+
3+
void runKernelsFromFile2() {
4+
sycl::queue Q;
5+
int Data = 0;
6+
{
7+
sycl::buffer<int, 1> Buf(&Data, sycl::range<1>(1));
8+
auto KernelID1 = sycl::get_kernel_id<File2Kern1>();
9+
auto KB = sycl::get_kernel_bundle<sycl::bundle_state::executable>(
10+
Q.get_context(), {KernelID1});
11+
auto Krn = KB.get_kernel(KernelID1);
12+
13+
std::vector<sycl::kernel_id> KernelIDStorage = KB.get_kernel_ids();
14+
assert(KernelIDStorage.size() == 1);
15+
assert(KernelIDStorage[0] == KernelID1);
16+
17+
Q.submit([&](sycl::handler &Cgh) {
18+
auto Acc = Buf.get_access<sycl::access::mode::read_write>(Cgh);
19+
Cgh.single_task<File2Kern1>(Krn, [=]() { Acc[0] = 3; });
20+
});
21+
}
22+
assert(Data == 3);
23+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#include <sycl/detail/core.hpp>
2+
3+
class File1Kern1;
4+
class File1Kern2;
5+
class File2Kern1;
6+
7+
void runKernelsFromFile2();
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// REQUIRES: opencl-aot, cpu
2+
3+
// Test with `--offload-new-driver`
4+
// RUN: %clangxx -fsycl -fsycl-device-code-split=per_source -fsycl-targets=spir64_x86_64 -I %S/Inputs -o %t.out %S/split-per-source-main.cpp %S/Inputs/split-per-source-second-file.cpp \
5+
// RUN: -fsycl-dead-args-optimization --offload-new-driver
6+
// RUN: %{run} %t.out
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// REQUIRES: ocloc, gpu
2+
// UNSUPPORTED: cuda || hip
3+
// CUDA does neither support device code splitting nor SPIR.
4+
// Test with `--offload-new-driver`
5+
//
6+
// RUN: %clangxx -fsycl -fsycl-device-code-split=per_source \
7+
// RUN: -fsycl-targets=spir64_gen \
8+
// RUN: -Xsycl-target-backend=spir64_gen \
9+
// RUN: "-device tgllp" -I %S/Inputs -o %t.out \
10+
// RUN: %S/split-per-source-main.cpp \
11+
// RUN: %S/Inputs/split-per-source-second-file.cpp \
12+
// RUN: -fsycl-dead-args-optimization --offload-new-driver
13+
// RUN: %{run} %t.out
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//==--- cpu.cpp - AOT compilation for cpu devices using opencl-aot --------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===---------------------------------------------------------------------===//
8+
9+
// REQUIRES: opencl-aot, cpu
10+
11+
// Test with `--offload-new-driver`
12+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -o %t.out
13+
// RUN: %{run} %t.out
14+
15+
// Test that opencl-aot can handle multiple build options.
16+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -Xsycl-target-backend "--bo=-g" -Xsycl-target-backend "--bo=-cl-opt-disable" -o %t2.out
17+
18+
// Test that opencl-aot can handle march option.
19+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -Xsycl-target-backend "--march=avx512"
20+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -Xsycl-target-backend "--march=wsm"

0 commit comments

Comments
 (0)