Skip to content

Commit 326e54e

Browse files
[SYCL] [Driver] Add offload-arch support for SYCL offloading (#15624)
Implement `--offload-arch` option to enable SYCL offloading to `Intel CPUs`, `Intel GPUs`, `NVidia` and `AMD GPUs`. `--offload-arch` implementation is currently supported in the new driver model ( i.e. with `--offload-new-driver` option) **Example usage:** ``` clang++ --offload-new-driver -fsycl --offload-arch=bdw // Offload SYCL code to Intel GPU clang++ --offload-new-driver -fsycl --offload-arch=broadwell // Offload SYCL code to Intel CPU clang++ --offload-new-driver -fsycl --offload-arch=sm_80 // Offload SYCL code to NVidia GPU clang++ --offload-new-driver -fsycl --offload-arch=gfx700 // Offload SYCL code to AMD GPU ```
1 parent f03fc04 commit 326e54e

File tree

8 files changed

+724
-1
lines changed

8 files changed

+724
-1
lines changed

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,10 @@ def err_drv_sycl_missing_amdgpu_arch : Error<
398398
"missing AMDGPU architecture for SYCL offloading; specify it with '-Xsycl-target-backend%select{|=%1}0 --offload-arch=<arch-name>'">;
399399
def err_drv_sycl_thinlto_split_off: Error<
400400
"'%0' is not supported when '%1' is set with '-fsycl'">;
401+
def err_drv_sycl_offload_arch_new_driver: Error<
402+
"'--offload-arch' is supported when '-fsycl' is set with '--offload-new-driver'">;
403+
def err_drv_sycl_offload_arch_missing_value : Error<
404+
"must pass in an explicit cpu or gpu architecture to '--offload-arch'">;
401405
def warn_drv_sycl_offload_target_duplicate : Warning<
402406
"SYCL offloading target '%0' is similar to target '%1' already specified; "
403407
"will be ignored">, InGroup<SyclTarget>;

clang/lib/Driver/Driver.cpp

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1191,12 +1191,13 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11911191
llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
11921192
llvm::StringMap<StringRef> FoundNormalizedTriples;
11931193
llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec;
1194+
// StringSet to contain SYCL target triples.
1195+
llvm::StringSet<> SYCLTriples;
11941196
if (HasSYCLTargetsOption) {
11951197
// At this point, we know we have a valid combination
11961198
// of -fsycl*target options passed
11971199
Arg *SYCLTargetsValues = SYCLTargets;
11981200
if (SYCLTargetsValues) {
1199-
llvm::StringSet<> SYCLTriples;
12001201
if (SYCLTargetsValues->getNumValues()) {
12011202

12021203
// Multiple targets are currently not supported when using
@@ -1296,6 +1297,109 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
12961297
Diag(clang::diag::warn_drv_empty_joined_argument)
12971298
<< SYCLTargetsValues->getAsString(C.getInputArgs());
12981299
}
1300+
}
1301+
// If the user specified --offload-arch, deduce the offloading
1302+
// target triple(s) from the set of architecture(s).
1303+
// Create a toolchain for each valid triple.
1304+
// We do not support SYCL offloading if any of the inputs is a
1305+
// .cu (for CUDA type) or .hip (for HIP type) file.
1306+
else if (HasValidSYCLRuntime &&
1307+
C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && !IsHIP &&
1308+
!IsCuda) {
1309+
// SYCL offloading to AOT Targets with '--offload-arch'
1310+
// is currently enabled only with '--offload-new-driver' option.
1311+
// Emit a diagnostic if '--offload-arch' is invoked without
1312+
// '--offload-new driver' option.
1313+
if (!C.getInputArgs().hasFlag(options::OPT_offload_new_driver,
1314+
options::OPT_no_offload_new_driver, false)) {
1315+
Diag(clang::diag::err_drv_sycl_offload_arch_new_driver);
1316+
return;
1317+
}
1318+
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
1319+
auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
1320+
auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(),
1321+
HostTC->getTriple());
1322+
1323+
// Attempt to deduce the offloading triple from the set of architectures.
1324+
// We need to temporarily create these toolchains so that we can access
1325+
// tools for inferring architectures.
1326+
llvm::DenseSet<StringRef> Archs;
1327+
if (NVPTXTriple) {
1328+
auto TempTC = std::make_unique<toolchains::CudaToolChain>(
1329+
*this, *NVPTXTriple, *HostTC, C.getInputArgs(), Action::OFK_None);
1330+
for (StringRef Arch :
1331+
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
1332+
Archs.insert(Arch);
1333+
}
1334+
if (AMDTriple) {
1335+
auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
1336+
*this, *AMDTriple, *HostTC, C.getInputArgs());
1337+
for (StringRef Arch :
1338+
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
1339+
Archs.insert(Arch);
1340+
}
1341+
if (!AMDTriple && !NVPTXTriple) {
1342+
for (StringRef Arch :
1343+
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, nullptr, true))
1344+
Archs.insert(Arch);
1345+
}
1346+
for (StringRef Arch : Archs) {
1347+
if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch(
1348+
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
1349+
DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
1350+
} else if (AMDTriple &&
1351+
IsSYCLSupportedAMDGPUArch(StringToOffloadArch(
1352+
getProcessorFromTargetID(*AMDTriple, Arch)))) {
1353+
DerivedArchs[AMDTriple->getTriple()].insert(Arch);
1354+
} else if (IsSYCLSupportedIntelCPUArch(StringToOffloadArchSYCL(Arch))) {
1355+
DerivedArchs[MakeSYCLDeviceTriple("spir64_x86_64").getTriple()].insert(
1356+
Arch);
1357+
} else if (IsSYCLSupportedIntelGPUArch(StringToOffloadArchSYCL(Arch))) {
1358+
StringRef IntelGPUArch;
1359+
// For Intel Graphics AOT target, valid values for '--offload-arch'
1360+
// are mapped to valid device names accepted by OCLOC (the Intel GPU AOT
1361+
// compiler) via the '-device' option. The mapIntelGPUArchName
1362+
// function maps the accepted values for '--offload-arch' to enable SYCL
1363+
// offloading to Intel GPUs and the corresponding '-device' value passed
1364+
// to OCLOC.
1365+
IntelGPUArch = mapIntelGPUArchName(Arch).data();
1366+
DerivedArchs[MakeSYCLDeviceTriple("spir64_gen").getTriple()].insert(
1367+
IntelGPUArch);
1368+
} else {
1369+
Diag(clang::diag::err_drv_invalid_sycl_target) << Arch;
1370+
return;
1371+
}
1372+
}
1373+
// Emit an error if architecture value is not provided
1374+
// to --offload-arch.
1375+
if (Archs.empty()) {
1376+
Diag(clang::diag::err_drv_sycl_offload_arch_missing_value);
1377+
return;
1378+
}
1379+
1380+
for (const auto &TripleAndArchs : DerivedArchs)
1381+
SYCLTriples.insert(TripleAndArchs.first());
1382+
1383+
for (const auto &Val : SYCLTriples) {
1384+
llvm::Triple SYCLTargetTriple(MakeSYCLDeviceTriple(Val.getKey()));
1385+
std::string NormalizedName = SYCLTargetTriple.normalize();
1386+
1387+
// Make sure we don't have a duplicate triple.
1388+
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
1389+
if (Duplicate != FoundNormalizedTriples.end()) {
1390+
Diag(clang::diag::warn_drv_sycl_offload_target_duplicate)
1391+
<< Val.getKey() << Duplicate->second;
1392+
continue;
1393+
}
1394+
1395+
// Store the current triple so that we can check for duplicates in the
1396+
// following iterations.
1397+
FoundNormalizedTriples[NormalizedName] = Val.getKey();
1398+
UniqueSYCLTriplesVec.push_back(SYCLTargetTriple);
1399+
}
1400+
1401+
addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);
1402+
12991403
} else {
13001404
// If -fsycl is supplied without -fsycl-targets we will assume SPIR-V.
13011405
// For -fsycl-device-only, we also setup the implied triple as needed.

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,125 @@ using namespace clang::driver::tools;
2727
using namespace clang;
2828
using namespace llvm::opt;
2929

30+
// Struct that relates an AOT target value with
31+
// Intel CPUs and Intel GPUs.
32+
struct StringToOffloadArchSYCLMap {
33+
const char *ArchName;
34+
SYCLSupportedIntelArchs IntelArch;
35+
};
36+
37+
// Mapping of supported SYCL offloading architectures.
38+
static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = {
39+
// Intel CPU mapping.
40+
{"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512},
41+
{"core-avx2", SYCLSupportedIntelArchs::COREAVX2},
42+
{"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX},
43+
{"corei7", SYCLSupportedIntelArchs::COREI7},
44+
{"westmere", SYCLSupportedIntelArchs::WESTMERE},
45+
{"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE},
46+
{"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE},
47+
{"broadwell", SYCLSupportedIntelArchs::BROADWELL},
48+
{"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE},
49+
{"alderlake", SYCLSupportedIntelArchs::ALDERLAKE},
50+
{"skylake", SYCLSupportedIntelArchs::SKYLAKE},
51+
{"skx", SYCLSupportedIntelArchs::SKX},
52+
{"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE},
53+
{"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT},
54+
{"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER},
55+
{"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS},
56+
{"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS},
57+
// Intel GPU mapping.
58+
{"bdw", SYCLSupportedIntelArchs::BDW},
59+
{"skl", SYCLSupportedIntelArchs::SKL},
60+
{"kbl", SYCLSupportedIntelArchs::KBL},
61+
{"cfl", SYCLSupportedIntelArchs::CFL},
62+
{"apl", SYCLSupportedIntelArchs::APL},
63+
{"bxt", SYCLSupportedIntelArchs::BXT},
64+
{"glk", SYCLSupportedIntelArchs::GLK},
65+
{"whl", SYCLSupportedIntelArchs::WHL},
66+
{"aml", SYCLSupportedIntelArchs::AML},
67+
{"cml", SYCLSupportedIntelArchs::CML},
68+
{"icllp", SYCLSupportedIntelArchs::ICLLP},
69+
{"icl", SYCLSupportedIntelArchs::ICL},
70+
{"ehl", SYCLSupportedIntelArchs::EHL},
71+
{"jsl", SYCLSupportedIntelArchs::JSL},
72+
{"tgllp", SYCLSupportedIntelArchs::TGLLP},
73+
{"tgl", SYCLSupportedIntelArchs::TGL},
74+
{"rkl", SYCLSupportedIntelArchs::RKL},
75+
{"adl_s", SYCLSupportedIntelArchs::ADL_S},
76+
{"rpl_s", SYCLSupportedIntelArchs::RPL_S},
77+
{"adl_p", SYCLSupportedIntelArchs::ADL_P},
78+
{"adl_n", SYCLSupportedIntelArchs::ADL_N},
79+
{"dg1", SYCLSupportedIntelArchs::DG1},
80+
{"acm_g10", SYCLSupportedIntelArchs::ACM_G10},
81+
{"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
82+
{"acm_g11", SYCLSupportedIntelArchs::ACM_G11},
83+
{"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
84+
{"dg2_g11", SYCLSupportedIntelArchs::DG2_G11},
85+
{"acm_g12", SYCLSupportedIntelArchs::ACM_G12},
86+
{"dg2_g12", SYCLSupportedIntelArchs::DG2_G12},
87+
{"pvc", SYCLSupportedIntelArchs::PVC},
88+
{"pvc_vg", SYCLSupportedIntelArchs::PVC_VG},
89+
{"mtl_u", SYCLSupportedIntelArchs::MTL_U},
90+
{"mtl_s", SYCLSupportedIntelArchs::MTL_S},
91+
{"arl_u", SYCLSupportedIntelArchs::ARL_U},
92+
{"arl_s", SYCLSupportedIntelArchs::ARL_S},
93+
{"mtl_h", SYCLSupportedIntelArchs::MTL_H},
94+
{"arl_h", SYCLSupportedIntelArchs::ARL_H},
95+
{"bmg_g21", SYCLSupportedIntelArchs::BMG_G21},
96+
{"lnl_m", SYCLSupportedIntelArchs::LNL_M}};
97+
98+
// Check if the user provided value for --offload-arch is a valid
99+
// SYCL supported Intel AOT target.
100+
SYCLSupportedIntelArchs
101+
clang::driver::StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString) {
102+
auto result = std::find_if(
103+
std::begin(StringToArchNamesMap), std::end(StringToArchNamesMap),
104+
[ArchNameAsString](const StringToOffloadArchSYCLMap &map) {
105+
return ArchNameAsString == map.ArchName;
106+
});
107+
if (result == std::end(StringToArchNamesMap))
108+
return SYCLSupportedIntelArchs::UNKNOWN;
109+
return result->IntelArch;
110+
}
111+
112+
// This is a mapping between the user provided --offload-arch value for Intel
113+
// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU
114+
// AOT compiler).
115+
StringRef clang::driver::mapIntelGPUArchName(StringRef ArchName) {
116+
StringRef Arch;
117+
Arch = llvm::StringSwitch<StringRef>(ArchName)
118+
.Case("bdw", "bdw")
119+
.Case("skl", "skl")
120+
.Case("kbl", "kbl")
121+
.Case("cfl", "cfl")
122+
.Cases("apl", "bxt", "apl")
123+
.Case("glk", "glk")
124+
.Case("whl", "whl")
125+
.Case("aml", "aml")
126+
.Case("cml", "cml")
127+
.Cases("icllp", "icl", "icllp")
128+
.Cases("ehl", "jsl", "ehl")
129+
.Cases("tgllp", "tgl", "tgllp")
130+
.Case("rkl", "rkl")
131+
.Cases("adl_s", "rpl_s", "adl_s")
132+
.Case("adl_p", "adl_p")
133+
.Case("adl_n", "adl_n")
134+
.Case("dg1", "dg1")
135+
.Cases("acm_g10", "dg2_g10", "acm_g10")
136+
.Cases("acm_g11", "dg2_g11", "acm_g11")
137+
.Cases("acm_g12", "dg2_g12", "acm_g12")
138+
.Case("pvc", "pvc")
139+
.Case("pvc_vg", "pvc_vg")
140+
.Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u")
141+
.Case("mtl_h", "mtl_h")
142+
.Case("arl_h", "arl_h")
143+
.Case("bmg_g21", "bmg_g21")
144+
.Case("lnl_m", "lnl_m")
145+
.Default("");
146+
return Arch;
147+
}
148+
30149
SYCLInstallationDetector::SYCLInstallationDetector(const Driver &D)
31150
: D(D), InstallationCandidates() {
32151
InstallationCandidates.emplace_back(D.Dir + "/..");

clang/lib/Driver/ToolChains/SYCL.h

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,122 @@
99
#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SYCL_H
1010
#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SYCL_H
1111

12+
#include "clang/Basic/Cuda.h"
1213
#include "clang/Driver/Options.h"
1314
#include "clang/Driver/Tool.h"
1415
#include "clang/Driver/ToolChain.h"
1516

1617
namespace clang {
1718
namespace driver {
1819

20+
// List of architectures (Intel CPUs and Intel GPUs)
21+
// that support SYCL offloading.
22+
enum class SYCLSupportedIntelArchs {
23+
// Intel CPUs
24+
UNKNOWN,
25+
SKYLAKEAVX512,
26+
COREAVX2,
27+
COREI7AVX,
28+
COREI7,
29+
WESTMERE,
30+
SANDYBRIDGE,
31+
IVYBRIDGE,
32+
BROADWELL,
33+
COFFEELAKE,
34+
ALDERLAKE,
35+
SKYLAKE,
36+
SKX,
37+
CASCADELAKE,
38+
ICELAKECLIENT,
39+
ICELAKESERVER,
40+
SAPPHIRERAPIDS,
41+
GRANITERAPIDS,
42+
// Intel GPUs
43+
BDW,
44+
SKL,
45+
KBL,
46+
CFL,
47+
APL,
48+
BXT,
49+
GLK,
50+
WHL,
51+
AML,
52+
CML,
53+
ICLLP,
54+
ICL,
55+
EHL,
56+
JSL,
57+
TGLLP,
58+
TGL,
59+
RKL,
60+
ADL_S,
61+
RPL_S,
62+
ADL_P,
63+
ADL_N,
64+
DG1,
65+
ACM_G10,
66+
DG2_G10,
67+
ACM_G11,
68+
DG2_G11,
69+
ACM_G12,
70+
DG2_G12,
71+
PVC,
72+
PVC_VG,
73+
MTL_U,
74+
MTL_S,
75+
ARL_U,
76+
ARL_S,
77+
MTL_H,
78+
ARL_H,
79+
BMG_G21,
80+
LNL_M,
81+
};
82+
83+
// Check if the given Arch value is a Generic AMD GPU.
84+
// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading.
85+
// This list is used to filter out GFX*_GENERIC AMD GPUs in
86+
// `IsSYCLSupportedAMDGPUArch`.
87+
static inline bool IsAMDGenericGPUArch(OffloadArch Arch) {
88+
return Arch == OffloadArch::GFX9_GENERIC ||
89+
Arch == OffloadArch::GFX10_1_GENERIC ||
90+
Arch == OffloadArch::GFX10_3_GENERIC ||
91+
Arch == OffloadArch::GFX11_GENERIC ||
92+
Arch == OffloadArch::GFX12_GENERIC;
93+
}
94+
95+
// Check if the given Arch value is a valid SYCL supported AMD GPU.
96+
static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) {
97+
return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV &&
98+
!IsAMDGenericGPUArch(Arch);
99+
}
100+
101+
// Check if the given Arch value is a valid SYCL supported NVidia GPU.
102+
static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) {
103+
return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a;
104+
}
105+
106+
// Check if the given Arch value is a valid SYCL supported Intel CPU.
107+
static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedIntelArchs Arch) {
108+
return Arch >= SYCLSupportedIntelArchs::SKYLAKEAVX512 &&
109+
Arch <= SYCLSupportedIntelArchs::GRANITERAPIDS;
110+
}
111+
112+
// Check if the given Arch value is a valid SYCL supported Intel GPU.
113+
static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedIntelArchs Arch) {
114+
return Arch >= SYCLSupportedIntelArchs::BDW &&
115+
Arch <= SYCLSupportedIntelArchs::LNL_M;
116+
}
117+
118+
// Check if the user provided value for --offload-arch is a valid
119+
// SYCL supported Intel AOT target.
120+
SYCLSupportedIntelArchs
121+
StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString);
122+
123+
// This is a mapping between the user provided --offload-arch value for Intel
124+
// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU
125+
// AOT compiler).
126+
StringRef mapIntelGPUArchName(StringRef ArchName);
127+
19128
class SYCLInstallationDetector {
20129
public:
21130
SYCLInstallationDetector(const Driver &D);

0 commit comments

Comments
 (0)