Skip to content

Commit bc98cc4

Browse files
committed
[SYCL] Fix issue with half and -fsycl-unnamed-lambda
When `-fsycl-unnamed-lambda` is present, mapping from SYCL Kernel function to a corresponding OpenCL kernel name is done via `__unique_stable_name` built-in. It is used by device compiler to generate integration header and it is used by host compiler to find kernels information in there. The problem is that we might get different results for the same SYCL Kernel function when compiling for host and device: the issue appears if kernel uses `half` data type which is represented as: - `cl::sycl::detail::half_impl::half` on host - `_Float16` on device Actually, similar issue exists even without `-fsycl-unnamed-lambda`, but for that case we have a work-around in form of `#define _Float16 cl::sycl::detail::half_impl::half` in `kernel_desc.hpp` to turn device half representation into a host one. The same trick doesn't apply here and the problem is fixed by doing the following: - for `UniqueStableMangler`, we mangle `cl::sycl::detail::half_impl::half` in the same way as `_Float16`, i.e. `FD16_` - for `UniqueStableMandlger`, `cl::sycl::detail::half_impl::half` is marked as non-substitutable to avoid other differences in mangled name Signed-off-by: Alexey Sachkov <[email protected]>
1 parent ccbe51e commit bc98cc4

File tree

3 files changed

+182
-0
lines changed

3 files changed

+182
-0
lines changed

clang/lib/AST/ItaniumMangle.cpp

+71
Original file line numberDiff line numberDiff line change
@@ -2445,6 +2445,67 @@ static bool isTypeSubstitutable(Qualifiers Quals, const Type *Ty,
24452445
return true;
24462446
}
24472447

2448+
namespace {
2449+
struct DeclContextDesc {
2450+
Decl::Kind DeclKind;
2451+
StringRef Name;
2452+
};
2453+
} // namespace
2454+
2455+
// For Scopes argument, the only supported Decl::Kind values are:
2456+
// - Namespace
2457+
// - CXXRecord
2458+
// - ClassTemplateSpecialization
2459+
static bool matchQualifiedTypeName(const QualType &Ty,
2460+
ArrayRef<DeclContextDesc> Scopes) {
2461+
// The idea: check the declaration context chain starting from the type
2462+
// itself. At each step check the context is of expected kind
2463+
// (namespace) and name.
2464+
const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl();
2465+
2466+
if (!RecTy)
2467+
return false; // only classes/structs supported
2468+
const auto *Ctx = dyn_cast<DeclContext>(RecTy);
2469+
2470+
for (const auto &Scope : llvm::reverse(Scopes)) {
2471+
Decl::Kind DK = Ctx->getDeclKind();
2472+
StringRef Name = "";
2473+
2474+
if (DK != Scope.DeclKind)
2475+
return false;
2476+
2477+
switch (DK) {
2478+
case Decl::Kind::ClassTemplateSpecialization:
2479+
// ClassTemplateSpecializationDecl inherits from CXXRecordDecl
2480+
case Decl::Kind::CXXRecord:
2481+
Name = cast<CXXRecordDecl>(Ctx)->getName();
2482+
break;
2483+
case Decl::Kind::Namespace:
2484+
Name = cast<NamespaceDecl>(Ctx)->getName();
2485+
break;
2486+
default:
2487+
return false;
2488+
}
2489+
if (Name != Scope.Name)
2490+
return false;
2491+
Ctx = Ctx->getParent();
2492+
}
2493+
return Ctx->isTranslationUnit();
2494+
}
2495+
2496+
static bool isSYCLHostHalfType(const Type *Ty) {
2497+
// FIXME: this is not really portable, since the bunch of namespace below
2498+
// is not specified by the SYCL standard and highly depends on particular
2499+
// implementation
2500+
static const std::array<DeclContextDesc, 5> Scopes = {
2501+
DeclContextDesc{Decl::Kind::Namespace, "cl"},
2502+
DeclContextDesc{Decl::Kind::Namespace, "sycl"},
2503+
DeclContextDesc{Decl::Kind::Namespace, "detail"},
2504+
DeclContextDesc{Decl::Kind::Namespace, "half_impl"},
2505+
DeclContextDesc{Decl::Kind::CXXRecord, "half"}};
2506+
return matchQualifiedTypeName(QualType(Ty, 0), Scopes);
2507+
}
2508+
24482509
void CXXNameMangler::mangleType(QualType T) {
24492510
// If our type is instantiation-dependent but not dependent, we mangle
24502511
// it as it was written in the source, removing any top-level sugar.
@@ -2504,6 +2565,11 @@ void CXXNameMangler::mangleType(QualType T) {
25042565

25052566
bool isSubstitutable =
25062567
isTypeSubstitutable(quals, ty, Context.getASTContext());
2568+
if (Context.isUniqueNameMangler() && isSYCLHostHalfType(ty)) {
2569+
// Set isSubstitutable to false for cl::sycl::detail::half_impl::half
2570+
// to achieve the same mangling for other components
2571+
isSubstitutable = false;
2572+
}
25072573
if (isSubstitutable && mangleSubstitution(T))
25082574
return;
25092575

@@ -2980,6 +3046,11 @@ void CXXNameMangler::mangleType(const RecordType *T) {
29803046
mangleType(static_cast<const TagType*>(T));
29813047
}
29823048
void CXXNameMangler::mangleType(const TagType *T) {
3049+
if (Context.isUniqueNameMangler() && isSYCLHostHalfType(T)) {
3050+
// Mangle cl::sycl::detail::half_imple::half as _Float16
3051+
mangleType(Context.getASTContext().Float16Ty);
3052+
return;
3053+
}
29833054
mangleName(T->getDecl());
29843055
}
29853056

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// RUN: %clangxx -fsycl-device-only -fsycl-unnamed-lambda -emit-llvm %s -o %t1.bc
2+
// RUN: llvm-dis %t1.bc -o - | FileCheck %s
3+
// RUN: %clangxx -fsycl-device-only -fsycl-unnamed-lambda -emit-llvm %s -DUSE_WRAPPER=1 -o %t2.bc
4+
// RUN: llvm-dis %t2.bc -o - | FileCheck %s
5+
6+
// Mangling of kernel lambda must be the same for both versions of half
7+
// CHECK: __unique_stable_name{{.*}} = private unnamed_addr constant [52 x i8] c"_ZTSN2cl4sycl6bufferINS0_4pairIDF16_NS0_5dummyEEEEE\00"
8+
9+
// Helper function to get string returned by __unique_stable_name in LLVM IR
10+
template <typename T>
11+
void print() {
12+
auto temp = __unique_stable_name(T);
13+
}
14+
15+
// Helper function to get "print" emitted in device code
16+
template<typename T, typename F>
17+
__attribute__((sycl_kernel)) void helper(F f) {
18+
print<T>();
19+
f();
20+
}
21+
22+
// Half wrapper, as it defined in SYCL headers
23+
namespace cl {
24+
namespace sycl {
25+
namespace detail {
26+
namespace half_impl {
27+
class half {
28+
public:
29+
half operator=(int) {return *this;}
30+
};
31+
} // namespace half_impl
32+
} // namespace detail
33+
} // namespace sycl
34+
} // namespace cl
35+
36+
#ifndef USE_WRAPPER
37+
using half = _Float16;
38+
#else
39+
using half = cl::sycl::detail::half_impl::half;
40+
#endif
41+
42+
// A few more fake data types to complicate the mangling
43+
namespace cl {
44+
namespace sycl {
45+
struct dummy {
46+
int a;
47+
};
48+
template<typename T1, typename T2>
49+
struct pair {
50+
T1 a;
51+
T2 b;
52+
};
53+
template <typename T>
54+
class buffer {
55+
public:
56+
T &operator[](int) const { return value; }
57+
mutable T value;
58+
};
59+
} // namespace sycl
60+
} // namespace cl
61+
62+
int main() {
63+
cl::sycl::buffer<cl::sycl::pair<half, cl::sycl::dummy>> B1;
64+
65+
helper<decltype(B1)>([](){});
66+
67+
return 0;
68+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda %s -o %t.out
2+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
3+
#include <CL/sycl.hpp>
4+
5+
#include <iostream>
6+
7+
int main() {
8+
auto AsyncHandler = [](cl::sycl::exception_list EL) {
9+
for (std::exception_ptr const &P : EL) {
10+
try {
11+
std::rethrow_exception(P);
12+
} catch (std::exception const &E) {
13+
std::cerr << "Caught async SYCL exception: " << E.what() << std::endl;
14+
}
15+
}
16+
};
17+
18+
cl::sycl::queue Q(AsyncHandler);
19+
20+
cl::sycl::device D = Q.get_device();
21+
if (!D.has_extension("cl_khr_fp16"))
22+
return 0; // Skip the test if halfs are not supported
23+
24+
cl::sycl::buffer<cl::sycl::cl_half> Buf(1);
25+
26+
Q.submit([&](cl::sycl::handler &CGH) {
27+
auto Acc = Buf.get_access<cl::sycl::access::mode::write>(CGH);
28+
CGH.single_task([=]() {
29+
Acc[0] = 1;
30+
});
31+
});
32+
33+
Q.wait_and_throw();
34+
35+
auto Acc = Buf.get_access<cl::sycl::access::mode::read>();
36+
if (1 != Acc[0]) {
37+
std::cerr << "Incorrect result, got: " << Acc[0]
38+
<< ", expected: 1" << std::endl;
39+
return 1;
40+
}
41+
42+
return 0;
43+
}

0 commit comments

Comments
 (0)