Skip to content

[NFCI][SYCL] Split vec's unary ops into individual mixins #16946

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 71 additions & 81 deletions sycl/include/sycl/detail/vector_arith.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,39 @@ struct UnaryPlus {
}
};

struct VecOperators {
// Tag to map/templatize the mixin for prefix/postfix inc/dec operators.
struct IncDec {};

template <typename SelfOperandTy> struct IncDecImpl {
using element_type = typename from_incomplete<SelfOperandTy>::element_type;
using vec_t = simplify_if_swizzle_t<std::remove_const_t<SelfOperandTy>>;

public:
friend SelfOperandTy &operator++(SelfOperandTy &x) {
x += element_type{1};
return x;
}
friend SelfOperandTy &operator--(SelfOperandTy &x) {
x -= element_type{1};
return x;
}
friend auto operator++(SelfOperandTy &x, int) {
vec_t tmp{x};
x += element_type{1};
return tmp;
}
friend auto operator--(SelfOperandTy &x, int) {
vec_t tmp{x};
x -= element_type{1};
return tmp;
}
};

template <typename Self> struct VecOperators {
static_assert(is_vec_v<Self>);

template <typename OpTy, typename... ArgTys>
static constexpr auto apply(const ArgTys &...Args) {
using Self = nth_type_t<0, ArgTys...>;
static_assert(is_vec_v<Self>);
static_assert(((std::is_same_v<Self, ArgTys> && ...)));

using element_type = typename Self::element_type;
Expand Down Expand Up @@ -163,6 +191,41 @@ struct VecOperators {
res[i] = Op(Args[i]...);
return res;
}

// Uglier than possible due to
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85282.
template <typename Op, typename = void> struct OpMixin;

template <typename Op>
struct OpMixin<Op, std::enable_if_t<std::is_same_v<Op, IncDec>>>
: public IncDecImpl<Self> {};

#define __SYCL_VEC_UOP_MIXIN(OP, OPERATOR) \
template <typename Op> \
struct OpMixin<Op, std::enable_if_t<std::is_same_v<Op, OP>>> { \
friend auto operator OPERATOR(const Self &v) { return apply<OP>(v); } \
};

__SYCL_VEC_UOP_MIXIN(std::negate<void>, -)
__SYCL_VEC_UOP_MIXIN(std::logical_not<void>, !)
__SYCL_VEC_UOP_MIXIN(UnaryPlus, +)

template <typename Op>
struct OpMixin<Op, std::enable_if_t<std::is_same_v<Op, std::bit_not<void>>>> {
template <typename T = typename from_incomplete<Self>::element_type>
friend std::enable_if_t<!is_vgenfloat_v<T>, Self> operator~(const Self &v) {
return apply<std::bit_not<void>>(v);
}
};

#undef __SYCL_VEC_UOP_MIXIN

template <typename... Op>
struct __SYCL_EBO CombineImpl : public OpMixin<Op>... {};

struct Combined
: public CombineImpl<std::negate<void>, std::logical_not<void>,
std::bit_not<void>, UnaryPlus, IncDec> {};
};

// Macros to populate binary operation on sycl::vec.
Expand All @@ -174,7 +237,7 @@ struct VecOperators {
template <typename T = DataT> \
friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \
const vec_t & Rhs) { \
return VecOperators::apply<FUNCTOR>(Lhs, Rhs); \
return VecOperators<vec_t>::template apply<FUNCTOR>(Lhs, Rhs); \
} \
\
template <typename T = DataT> \
Expand All @@ -200,65 +263,11 @@ struct VecOperators {
return Lhs; \
}

/****************************************************************
* vec_arith_common
* / | \
* / | \
* vec_arith<int> vec_arith<float> ... vec_arith<byte>
* \ | /
* \ | /
* sycl::vec<T>
*
* vec_arith_common is the base class for vec_arith. It contains
* the common math operators of sycl::vec for all types.
* vec_arith is the derived class that contains the math operators
* specialized for certain types. sycl::vec inherits from vec_arith.
* *************************************************************/
template <typename DataT, int NumElements> class vec_arith_common;
template <typename DataT> struct vec_helper;

template <typename DataT, int NumElements>
class vec_arith : public vec_arith_common<DataT, NumElements> {
class vec_arith : public VecOperators<vec<DataT, NumElements>>::Combined {
protected:
using vec_t = vec<DataT, NumElements>;
using ocl_t = detail::fixed_width_signed<sizeof(DataT)>;
template <typename T> using vec_data = vec_helper<T>;

// operator!.
friend vec<ocl_t, NumElements> operator!(const vec_t &Rhs) {
return VecOperators::apply<std::logical_not<void>>(Rhs);
}

// operator +.
friend vec_t operator+(const vec_t &Lhs) {
return VecOperators::apply<UnaryPlus>(Lhs);
}

// operator -.
friend vec_t operator-(const vec_t &Lhs) {
return VecOperators::apply<std::negate<void>>(Lhs);
}

// Unary operations on sycl::vec
// FIXME: Don't allow Unary operators on vec<bool> after
// https://github.com/KhronosGroup/SYCL-CTS/issues/896 gets fixed.
#ifdef __SYCL_UOP
#error "Undefine __SYCL_UOP macro"
#endif
#define __SYCL_UOP(UOP, OPASSIGN) \
friend vec_t &operator UOP(vec_t & Rhs) { \
Rhs OPASSIGN DataT{1}; \
return Rhs; \
} \
friend vec_t operator UOP(vec_t &Lhs, int) { \
vec_t Ret(Lhs); \
Lhs OPASSIGN DataT{1}; \
return Ret; \
}

__SYCL_UOP(++, +=)
__SYCL_UOP(--, -=)
#undef __SYCL_UOP

// The logical operations on scalar types results in 0/1, while for vec<>,
// logical operations should result in 0 and -1 (similar to OpenCL vectors).
Expand All @@ -272,7 +281,7 @@ class vec_arith : public vec_arith_common<DataT, NumElements> {
template <typename T = DataT> \
friend std::enable_if_t<(COND), vec<ocl_t, NumElements>> operator RELLOGOP( \
const vec_t & Lhs, const vec_t & Rhs) { \
return VecOperators::apply<FUNCTOR>(Lhs, Rhs); \
return VecOperators<vec_t>::template apply<FUNCTOR>(Lhs, Rhs); \
} \
\
template <typename T = DataT> \
Expand Down Expand Up @@ -325,13 +334,13 @@ class vec_arith : public vec_arith_common<DataT, NumElements> {
#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
template <int NumElements>
class vec_arith<std::byte, NumElements>
: public vec_arith_common<std::byte, NumElements> {
: public VecOperators<vec<std::byte, NumElements>>::template OpMixin<
std::bit_not<void>> {
protected:
// NumElements can never be zero. Still using the redundant check to avoid
// incomplete type errors.
using DataT = typename std::conditional_t<NumElements == 0, int, std::byte>;
using vec_t = vec<DataT, NumElements>;
template <typename T> using vec_data = vec_helper<T>;

// Special <<, >> operators for std::byte.
// std::byte is not an arithmetic type and it only supports the following
Expand Down Expand Up @@ -376,25 +385,6 @@ class vec_arith<std::byte, NumElements>
};
#endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)

template <typename DataT, int NumElements> class vec_arith_common {
protected:
using vec_t = vec<DataT, NumElements>;

static constexpr bool IsBfloat16 =
std::is_same_v<DataT, sycl::ext::oneapi::bfloat16>;

// operator~() available only when: dataT != float && dataT != double
// && dataT != half
template <typename T = DataT>
friend std::enable_if_t<!detail::is_vgenfloat_v<T>, vec_t>
operator~(const vec_t &Rhs) {
return VecOperators::apply<std::bit_not<void>>(Rhs);
}

// friends
template <typename T1, int T2> friend class __SYCL_EBO vec;
};

#undef __SYCL_BINOP

} // namespace detail
Expand Down
1 change: 0 additions & 1 deletion sycl/include/sycl/vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,6 @@ class __SYCL_EBO vec
template <typename T1, int T2> friend class __SYCL_EBO vec;
// To allow arithmetic operators access private members of vec.
template <typename T1, int T2> friend class detail::vec_arith;
template <typename T1, int T2> friend class detail::vec_arith_common;
};
///////////////////////// class sycl::vec /////////////////////////

Expand Down
24 changes: 12 additions & 12 deletions sycl/test/check_device_code/vector/vector_bf16_builtins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ SYCL_EXTERNAL auto TestFMin(vec<bfloat16, 2> a, vec<bfloat16, 2> b) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.6") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.6") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.6") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.14") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.14") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.14") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16
// CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand All @@ -87,7 +87,7 @@ SYCL_EXTERNAL auto TestFMin(vec<bfloat16, 2> a, vec<bfloat16, 2> b) {
// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I_I:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I_I]], align 8, !tbaa [[TBAA14]], !noalias [[META28]]
// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META28]]
// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I_I]], align 4, !noalias [[META28]]
// CHECK-NEXT: [[LOADVECN_I_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I_I]], align 4, !noalias [[META28]]
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META28]]
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META28]]
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META31:![0-9]+]]
Expand All @@ -97,11 +97,11 @@ SYCL_EXTERNAL auto TestFMin(vec<bfloat16, 2> a, vec<bfloat16, 2> b) {
// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I7_I:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I7_I]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 8, !tbaa [[TBAA14]], !noalias [[META31]]
// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I5_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I6_I]]) #[[ATTR5]], !noalias [[META31]]
// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I8_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I3_I]], align 4, !noalias [[META31]]
// CHECK-NEXT: [[LOADVECN_I_I_I_I_I8_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I3_I]], align 4, !noalias [[META31]]
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META31]]
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META31]]
// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I_I_I_I_I]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[EXTRACTVEC_I_I4_I_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I_I_I_I8_I]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I_I]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[EXTRACTVEC_I_I4_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I8_I]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[CALL2_I_I:%.*]] = call spir_func noundef <3 x float> @_Z16__spirv_ocl_fmaxDv3_fS_(<3 x float> noundef [[EXTRACTVEC_I_I_I_I]], <3 x float> noundef [[EXTRACTVEC_I_I4_I_I]]) #[[ATTR6]]
// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]])
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I12_I]]), !noalias [[META37:![0-9]+]]
Expand All @@ -111,19 +111,19 @@ SYCL_EXTERNAL auto TestFMin(vec<bfloat16, 2> a, vec<bfloat16, 2> b) {
// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I17_I:%.*]] = shufflevector <3 x float> [[CALL2_I_I]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I_I_I17_I]], ptr [[VEC_ADDR_I_I_I_I12_I]], align 16, !tbaa [[TBAA14]], !noalias [[META37]]
// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I15_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I16_I]]) #[[ATTR5]], !noalias [[META37]]
// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I18_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I13_I]], align 2, !noalias [[META37]]
// CHECK-NEXT: [[LOADVECN_I_I_I_I_I18_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I13_I]], align 2, !noalias [[META37]]
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I12_I]]), !noalias [[META37]]
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I13_I]]), !noalias [[META37]]
// CHECK-NEXT: [[EXTRACTVEC4_I19_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I_I_I_I18_I]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC4_I19_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META37]]
// CHECK-NEXT: [[EXTRACTVEC_I19_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I18_I]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I19_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META37]]
// CHECK-NEXT: ret void
//
SYCL_EXTERNAL auto TestFMax(vec<bfloat16, 3> a, vec<bfloat16, 3> b) {
return experimental::fmax(a, b);
}

// CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.18") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.24") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.34") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.44") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8
// CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4
Expand All @@ -149,7 +149,7 @@ SYCL_EXTERNAL auto TestIsNan(vec<bfloat16, 4> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.46") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.46") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.82") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.82") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32
// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down Expand Up @@ -185,7 +185,7 @@ SYCL_EXTERNAL auto TestFabs(vec<bfloat16, 8> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.46") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.46") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.82") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.82") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32
// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down Expand Up @@ -221,7 +221,7 @@ SYCL_EXTERNAL auto TestCeil(vec<bfloat16, 8> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.58") align 32 initializes((0, 32)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.58") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.58") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.58") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.102") align 32 initializes((0, 32)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.102") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.102") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.102") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64
// CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down
Loading
Loading