Skip to content

Commit e67c1ac

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents 25cb138 + 52101af commit e67c1ac

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1352
-565
lines changed

clang/include/clang/Basic/AddressSpaces.h

+34
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,40 @@ inline bool isPtrSizeAddressSpace(LangAS AS) {
8787
AS == LangAS::ptr64);
8888
}
8989

90+
inline LangAS asSYCLLangAS(LangAS AS) {
91+
switch (AS) {
92+
case LangAS::opencl_global:
93+
return LangAS::sycl_global;
94+
case LangAS::opencl_global_device:
95+
return LangAS::sycl_global_device;
96+
case LangAS::opencl_global_host:
97+
return LangAS::sycl_global_host;
98+
case LangAS::opencl_local:
99+
return LangAS::sycl_local;
100+
case LangAS::opencl_private:
101+
return LangAS::sycl_private;
102+
default:
103+
return AS;
104+
}
105+
}
106+
107+
inline LangAS asOpenCLLangAS(LangAS AS) {
108+
switch (AS) {
109+
case LangAS::sycl_global:
110+
return LangAS::opencl_global;
111+
case LangAS::sycl_global_device:
112+
return LangAS::opencl_global_device;
113+
case LangAS::sycl_global_host:
114+
return LangAS::opencl_global_host;
115+
case LangAS::sycl_local:
116+
return LangAS::opencl_local;
117+
case LangAS::sycl_private:
118+
return LangAS::opencl_private;
119+
default:
120+
return AS;
121+
}
122+
}
123+
90124
} // namespace clang
91125

92126
#endif // LLVM_CLANG_BASIC_ADDRESSSPACES_H

clang/include/clang/Basic/DiagnosticDriverKinds.td

+2
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,8 @@ def err_drv_expecting_fsycl_with_sycl_opt : Error<
303303
"'%0' must be used in conjunction with '-fsycl' to enable offloading">;
304304
def err_drv_fsycl_with_c_type : Error<
305305
"'%0' must not be used in conjunction with '-fsycl', which expects C++ source">;
306+
def err_drv_sycl_missing_amdgpu_arch : Error<
307+
"missing AMDGPU architecture for SYCL offloading; specify it with '-Xsycl-target-backend --offload-arch'">;
306308
def warn_drv_sycl_offload_target_duplicate : Warning<
307309
"SYCL offloading target '%0' is similar to target '%1' already specified; "
308310
"will be ignored">, InGroup<SyclTarget>;

clang/lib/AST/MicrosoftMangle.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -2184,6 +2184,21 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T,
21842184
case LangAS::cuda_device:
21852185
Extra.mangleSourceName("_ASCUdevice");
21862186
break;
2187+
case LangAS::sycl_global:
2188+
Extra.mangleSourceName("_ASSYglobal");
2189+
break;
2190+
case LangAS::sycl_global_device:
2191+
Extra.mangleSourceName("_ASSYdevice");
2192+
break;
2193+
case LangAS::sycl_global_host:
2194+
Extra.mangleSourceName("_ASSYhost");
2195+
break;
2196+
case LangAS::sycl_local:
2197+
Extra.mangleSourceName("_ASSYlocal");
2198+
break;
2199+
case LangAS::sycl_private:
2200+
Extra.mangleSourceName("_ASSYprivate");
2201+
break;
21872202
case LangAS::cuda_constant:
21882203
Extra.mangleSourceName("_ASCUconstant");
21892204
break;

clang/lib/CodeGen/CGBuiltin.cpp

+6-10
Original file line numberDiff line numberDiff line change
@@ -18344,22 +18344,18 @@ RValue CodeGenFunction::EmitIntelFPGARegBuiltin(const CallExpr *E,
1834418344
ReturnValueSlot ReturnValue) {
1834518345
const Expr *PtrArg = E->getArg(0);
1834618346
QualType ArgType = PtrArg->getType();
18347-
llvm::Value *V = nullptr;
1834818347
StringRef AnnotStr = "__builtin_intel_fpga_reg";
1834918348

18350-
if (ArgType->isStructureOrClassType() || ArgType->isUnionType()) {
18351-
RValue RV = EmitAnyExpr(PtrArg);
18352-
Address A = EmitIntelFPGAFieldAnnotations(E->getExprLoc(),
18353-
RV.getAggregateAddress(),
18354-
AnnotStr);
18355-
llvm::Type *VTy = ReturnValue.getValue().getPointer()->getType();
18356-
uint64_t SizeVal = CGM.getDataLayout().getTypeAllocSize(VTy);
18357-
Builder.CreateMemCpy(ReturnValue.getValue(), A, SizeVal, false);
18349+
if (ArgType->isRecordType()) {
18350+
Address DstAddr = ReturnValue.getValue();
18351+
EmitAnyExprToMem(PtrArg, DstAddr, ArgType.getQualifiers(), true);
18352+
Address A =
18353+
EmitIntelFPGAFieldAnnotations(E->getExprLoc(), DstAddr, AnnotStr);
1835818354
return RValue::getAggregate(A);
1835918355
}
1836018356

1836118357
// if scalar type
18362-
V = EmitScalarExpr(PtrArg);
18358+
llvm::Value *V = EmitScalarExpr(PtrArg);
1836318359

1836418360
// llvm.annotation does not accept anything but integer types.
1836518361
llvm::Type *OrigVType = V->getType();

clang/lib/Driver/Driver.cpp

+81-21
Original file line numberDiff line numberDiff line change
@@ -3945,8 +3945,9 @@ class OffloadingActionBuilder final {
39453945
/// List of static archives to extract FPGA dependency info from
39463946
ActionList FPGAArchiveInputs;
39473947

3948-
/// List of CUDA architectures to use in this compilation with NVPTX targets.
3949-
SmallVector<CudaArch, 8> GpuArchList;
3948+
/// List of GPU architectures to use in this compilation with NVPTX/AMDGCN
3949+
/// targets.
3950+
SmallVector<std::pair<llvm::Triple, std::string>, 8> GpuArchList;
39503951

39513952
/// Build the last steps for CUDA after all BC files have been linked.
39523953
JobAction *finalizeNVPTXDependences(Action *Input, const llvm::Triple &TT) {
@@ -3983,13 +3984,17 @@ class OffloadingActionBuilder final {
39833984
const Driver::InputList &Inputs)
39843985
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL) {}
39853986

3986-
void withBoundArchForToolChain(const ToolChain* TC,
3987+
void withBoundArchForToolChain(const ToolChain *TC,
39873988
llvm::function_ref<void(const char *)> Op) {
3988-
if (TC->getTriple().isNVPTX())
3989-
for (CudaArch A : GpuArchList)
3990-
Op(CudaArchToString(A));
3991-
else
3992-
Op(nullptr);
3989+
for (auto &A : GpuArchList) {
3990+
if (TC->getTriple() == A.first) {
3991+
Op(Args.MakeArgString(A.second.c_str()));
3992+
return;
3993+
}
3994+
}
3995+
3996+
// no bound arch for this toolchain
3997+
Op(nullptr);
39933998
}
39943999

39954000
ActionBuilderReturnCode
@@ -4043,8 +4048,8 @@ class OffloadingActionBuilder final {
40434048
}
40444049
const auto *TC = ToolChains.front();
40454050
const char *BoundArch = nullptr;
4046-
if (TC->getTriple().isNVPTX())
4047-
BoundArch = CudaArchToString(GpuArchList.front());
4051+
if (TC->getTriple().isNVPTX() || TC->getTriple().isAMDGCN())
4052+
BoundArch = GpuArchList.front().second.c_str();
40484053
DA.add(*DeviceCompilerInput, *TC, BoundArch, Action::OFK_SYCL);
40494054
// Clear the input file, it is already a dependence to a host
40504055
// action.
@@ -4627,39 +4632,94 @@ class OffloadingActionBuilder final {
46274632
}
46284633
}
46294634

4630-
/// Initialize the GPU architecture list from arguments - this populates `GpuArchList` from
4631-
/// `--cuda-gpu-arch` flags. Only relevant if compiling to CUDA. Return true if any
4632-
/// initialization errors are found.
4635+
/// Initialize the GPU architecture list from arguments - this populates
4636+
/// `GpuArchList` from `--offload-arch` flags. Only relevant if compiling to
4637+
/// CUDA or AMDGCN. Return true if any initialization errors are found.
4638+
/// FIXME: "offload-arch" and the BoundArch mechanism should also be
4639+
// used in the SYCLToolChain for SPIR-V AOT to track the offload
4640+
// architecture instead of the Triple sub-arch it currently uses.
46334641
bool initializeGpuArchMap() {
46344642
const OptTable &Opts = C.getDriver().getOpts();
46354643
for (auto *A : Args) {
46364644
unsigned Index;
4645+
llvm::Triple *TargetBE = nullptr;
46374646

4638-
if (A->getOption().matches(options::OPT_Xsycl_backend_EQ))
4647+
auto GetTripleIt = [&, this](llvm::StringRef Triple) {
4648+
llvm::Triple TargetTriple{Triple};
4649+
auto TripleIt = llvm::find_if(SYCLTripleList, [&](auto &SYCLTriple) {
4650+
return SYCLTriple == TargetTriple;
4651+
});
4652+
return TripleIt != SYCLTripleList.end() ? &*TripleIt : nullptr;
4653+
};
4654+
4655+
if (A->getOption().matches(options::OPT_Xsycl_backend_EQ)) {
4656+
TargetBE = GetTripleIt(A->getValue(0));
46394657
// Passing device args: -Xsycl-target-backend=<triple> -opt=val.
4640-
if (llvm::Triple(A->getValue(0)).isNVPTX())
4658+
if (TargetBE)
46414659
Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
46424660
else
46434661
continue;
4644-
else if (A->getOption().matches(options::OPT_Xsycl_backend))
4662+
} else if (A->getOption().matches(options::OPT_Xsycl_backend)) {
4663+
if (SYCLTripleList.size() > 1) {
4664+
C.getDriver().Diag(diag::err_drv_Xsycl_target_missing_triple)
4665+
<< A->getSpelling();
4666+
continue;
4667+
}
46454668
// Passing device args: -Xsycl-target-backend -opt=val.
4669+
TargetBE = &SYCLTripleList.front();
46464670
Index = Args.getBaseArgs().MakeIndex(A->getValue(0));
4647-
else
4671+
} else
46484672
continue;
46494673

46504674
A->claim();
46514675
auto ParsedArg = Opts.ParseOneArg(Args, Index);
4676+
46524677
// TODO: Support --no-cuda-gpu-arch, --{,no-}cuda-gpu-arch=all.
46534678
if (ParsedArg &&
46544679
ParsedArg->getOption().matches(options::OPT_offload_arch_EQ)) {
4680+
llvm::StringRef ArchStr = ParsedArg->getValue(0);
4681+
if (TargetBE->isNVPTX()) {
4682+
// CUDA arch also applies to AMDGCN ...
4683+
CudaArch Arch = StringToCudaArch(ArchStr);
4684+
if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
4685+
C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch)
4686+
<< ArchStr;
4687+
continue;
4688+
}
4689+
ArchStr = CudaArchToString(Arch);
4690+
} else if (TargetBE->isAMDGCN()) {
4691+
llvm::StringMap<bool> Features;
4692+
auto Arch =
4693+
parseTargetID(getHIPOffloadTargetTriple(), ArchStr, &Features);
4694+
if (!Arch) {
4695+
C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr;
4696+
continue;
4697+
}
4698+
auto CanId = getCanonicalTargetID(Arch.getValue(), Features);
4699+
ArchStr = Args.MakeArgStringRef(CanId);
4700+
}
46554701
ParsedArg->claim();
4656-
GpuArchList.push_back(StringToCudaArch(ParsedArg->getValue(0)));
4702+
GpuArchList.emplace_back(*TargetBE, ArchStr);
46574703
}
46584704
}
46594705

4660-
// If there are no CUDA architectures provided then default to SM_50.
4661-
if (GpuArchList.empty()) {
4662-
GpuArchList.push_back(CudaArch::SM_50);
4706+
// Handle defaults architectures
4707+
for (auto &Triple : SYCLTripleList) {
4708+
// For NVIDIA use SM_50 as a default
4709+
if (Triple.isNVPTX() && llvm::none_of(GpuArchList, [&](auto &P) {
4710+
return P.first.isNVPTX();
4711+
})) {
4712+
llvm::StringRef DefaultArch = CudaArchToString(CudaArch::SM_50);
4713+
GpuArchList.emplace_back(Triple, DefaultArch);
4714+
}
4715+
4716+
// For AMD require the architecture to be set by the user
4717+
if (Triple.isAMDGCN() && llvm::none_of(GpuArchList, [&](auto &P) {
4718+
return P.first.isAMDGCN();
4719+
})) {
4720+
C.getDriver().Diag(clang::diag::err_drv_sycl_missing_amdgpu_arch);
4721+
return true;
4722+
}
46634723
}
46644724

46654725
return false;

clang/lib/Driver/ToolChain.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -1174,8 +1174,9 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOffloadTargetArgs(
11741174
// at all, target and host share a toolchain.
11751175
if (A->getOption().matches(options::OPT_m_Group)) {
11761176
// AMD GPU is a special case, as -mcpu is required for the device
1177-
// compilation.
1178-
if (SameTripleAsHost || getTriple().getArch() == llvm::Triple::amdgcn)
1177+
// compilation, except for SYCL which uses --offload-arch.
1178+
if (SameTripleAsHost || (getTriple().getArch() == llvm::Triple::amdgcn &&
1179+
DeviceOffloadKind != Action::OFK_SYCL))
11791180
DAL->append(A);
11801181
else
11811182
Modified = true;

clang/lib/Sema/SemaSYCL.cpp

+9-2
Original file line numberDiff line numberDiff line change
@@ -5302,10 +5302,17 @@ bool Util::isSyclFunction(const FunctionDecl *FD, StringRef Name) {
53025302
if (DC->isTranslationUnit())
53035303
return false;
53045304

5305-
std::array<DeclContextDesc, 2> Scopes = {
5305+
std::array<DeclContextDesc, 2> ScopesSycl = {
53065306
Util::MakeDeclContextDesc(Decl::Kind::Namespace, "cl"),
53075307
Util::MakeDeclContextDesc(Decl::Kind::Namespace, "sycl")};
5308-
return matchContext(DC, Scopes);
5308+
std::array<DeclContextDesc, 5> ScopesOneapiExp = {
5309+
Util::MakeDeclContextDesc(Decl::Kind::Namespace, "cl"),
5310+
Util::MakeDeclContextDesc(Decl::Kind::Namespace, "sycl"),
5311+
Util::MakeDeclContextDesc(Decl::Kind::Namespace, "ext"),
5312+
Util::MakeDeclContextDesc(Decl::Kind::Namespace, "oneapi"),
5313+
Util::MakeDeclContextDesc(Decl::Kind::Namespace, "experimental")};
5314+
5315+
return matchContext(DC, ScopesSycl) || matchContext(DC, ScopesOneapiExp);
53095316
}
53105317

53115318
bool Util::isAccessorPropertyListType(QualType Ty) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: %clang_cc1 %s -x cl -fdeclare-spirv-builtins -fsyntax-only -emit-llvm -o - -O0 | FileCheck %s
2+
//
3+
// Check that SPIR-V builtins are declared with OpenCL address spaces rather
4+
// than SYCL address spaces when using them with OpenCL. OpenCL address spaces
5+
// are mangled with the CL prefix and SYCL address spaces are mangled with the
6+
// SY prefix.
7+
8+
// CHECK: __spirv_ocl_modf{{.*}}CLglobal
9+
void modf_global(float a, global float *ptr) { __spirv_ocl_modf(a, ptr); }
10+
11+
// CHECK: __spirv_ocl_modf{{.*}}CLlocal
12+
void modf_local(float a, local float *ptr) { __spirv_ocl_modf(a, ptr); }
13+
14+
// CHECK: __spirv_ocl_modf{{.*}}CLprivate
15+
void modf_private(float a) {
16+
float *ptr;
17+
__spirv_ocl_modf(a, ptr);
18+
}

clang/test/CodeGenSYCL/Inputs/sycl.hpp

+14-7
Original file line numberDiff line numberDiff line change
@@ -120,13 +120,6 @@ struct no_alias {
120120
} // namespace oneapi
121121
} // namespace ext
122122

123-
namespace ext {
124-
namespace oneapi {
125-
template <typename... properties>
126-
class accessor_property_list {};
127-
} // namespace oneapi
128-
} // namespace ext
129-
130123
template <int dim>
131124
struct id {
132125
template <typename... T>
@@ -146,6 +139,20 @@ template <int dim> struct item {
146139
int Data;
147140
};
148141

142+
namespace ext {
143+
namespace oneapi {
144+
template <typename... properties>
145+
class accessor_property_list {};
146+
namespace experimental {
147+
template <int Dims> item<Dims>
148+
this_item() { return item<Dims>{}; }
149+
150+
template <int Dims> id<Dims>
151+
this_id() { return id<Dims>{}; }
152+
} // namespace experimental
153+
} // namespace oneapi
154+
} // namespace ext
155+
149156
template <int Dims> item<Dims>
150157
this_item() { return item<Dims>{}; }
151158

0 commit comments

Comments
 (0)