diff --git a/sycl/include/sycl/detail/named_swizzles_mixin.hpp b/sycl/include/sycl/detail/named_swizzles_mixin.hpp index f35656b2e6381..2700a0adf49cf 100644 --- a/sycl/include/sycl/detail/named_swizzles_mixin.hpp +++ b/sycl/include/sycl/detail/named_swizzles_mixin.hpp @@ -18,6 +18,9 @@ namespace sycl { inline namespace _V1 { namespace detail { +// Will be defined in another header. +template struct from_incomplete; + #ifndef SYCL_SIMPLE_SWIZZLES #define __SYCL_SWIZZLE_MIXIN_SIMPLE_SWIZZLES #else @@ -785,7 +788,8 @@ namespace detail { return (*static_cast(this))[INDEX]; \ } -template struct NamedSwizzlesMixinConst { +template ::size()> +struct NamedSwizzlesMixinConst { #define __SYCL_SWIZZLE_MIXIN_METHOD(COND, NAME, ...) \ __SYCL_SWIZZLE_MIXIN_METHOD_CONST(COND, NAME, __VA_ARGS__) @@ -798,7 +802,8 @@ template struct NamedSwizzlesMixinConst { #undef __SYCL_SWIZZLE_MIXIN_METHOD }; -template struct NamedSwizzlesMixinBoth { +template ::size()> +struct NamedSwizzlesMixinBoth { #define __SYCL_SWIZZLE_MIXIN_METHOD(COND, NAME, ...) \ __SYCL_SWIZZLE_MIXIN_METHOD_NON_CONST(COND, NAME, __VA_ARGS__) \ __SYCL_SWIZZLE_MIXIN_METHOD_CONST(COND, NAME, __VA_ARGS__) diff --git a/sycl/include/sycl/detail/vector_arith.hpp b/sycl/include/sycl/detail/vector_arith.hpp index ac80bd916591d..e628ebb1ae260 100644 --- a/sycl/include/sycl/detail/vector_arith.hpp +++ b/sycl/include/sycl/detail/vector_arith.hpp @@ -21,6 +21,19 @@ template class __SYCL_EBO vec; namespace detail { +template struct from_incomplete; +template +struct from_incomplete : public from_incomplete {}; + +template +struct from_incomplete> { + using element_type = DataT; + static constexpr size_t size() { return NumElements; } +}; + +template struct ApplyIf {}; +template struct ApplyIf : Mixin {}; + // We use std::plus and similar to "map" template parameter to an // overloaded operator. These three below are missing from ``. struct ShiftLeft { diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index bb50fcdb5d754..67d00bd9ea7de 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -113,12 +113,11 @@ template class GetOp { // // must go throw `v.x()` returning a swizzle, then its `operator==` returning // vec and we want that code to compile. -template -struct ScalarConversionOperatorMixIn {}; +template class ScalarConversionOperatorMixIn { + using T = typename from_incomplete::element_type; -template -struct ScalarConversionOperatorMixIn> { - operator T() const { return (*static_cast(this))[0]; } +public: + operator T() const { return (*static_cast(this))[0]; } }; template @@ -134,10 +133,10 @@ inline constexpr bool is_fundamental_or_half_or_bfloat16 = template class __SYCL_EBO vec : public detail::vec_arith, - public detail::ScalarConversionOperatorMixIn, - DataT, NumElements>, - public detail::NamedSwizzlesMixinBoth, - NumElements> { + public detail::ApplyIf< + NumElements == 1, + detail::ScalarConversionOperatorMixIn>>, + public detail::NamedSwizzlesMixinBoth> { static_assert(std::is_same_v>, "DataT must be cv-unqualified"); @@ -177,6 +176,24 @@ class __SYCL_EBO vec element_type_for_vector_t __attribute__(( ext_vector_type(NumElements)))>; + // Make it a template to avoid ambiguity with `vec(const DataT &)` when + // `vector_t` is the same as `DataT`. Not that the other ctor isn't a template + // so we don't even need a smart `enable_if` condition here, the mere fact of + // this being a template makes the other ctor preferred. + template < + typename vector_t_ = vector_t, + typename = typename std::enable_if_t>> + constexpr vec(vector_t_ openclVector) { + m_Data = sycl::bit_cast(openclVector); + } + + /* @SYCL2020 + * Available only when: compiled for the device. + * Converts this SYCL vec instance to the underlying backend-native vector + * type defined by vector_t. + */ + operator vector_t() const { return sycl::bit_cast(m_Data); } + private: #endif // __SYCL_DEVICE_ONLY__ @@ -299,26 +316,6 @@ class __SYCL_EBO vec return *this; } -#ifdef __SYCL_DEVICE_ONLY__ - // Make it a template to avoid ambiguity with `vec(const DataT &)` when - // `vector_t` is the same as `DataT`. Not that the other ctor isn't a template - // so we don't even need a smart `enable_if` condition here, the mere fact of - // this being a template makes the other ctor preferred. - template < - typename vector_t_ = vector_t, - typename = typename std::enable_if_t>> - constexpr vec(vector_t_ openclVector) { - m_Data = sycl::bit_cast(openclVector); - } - - /* @SYCL2020 - * Available only when: compiled for the device. - * Converts this SYCL vec instance to the underlying backend-native vector - * type defined by vector_t. - */ - operator vector_t() const { return sycl::bit_cast(m_Data); } -#endif // __SYCL_DEVICE_ONLY__ - __SYCL2020_DEPRECATED("get_count() is deprecated, please use size() instead") static constexpr size_t get_count() { return size(); } static constexpr size_t size() noexcept { return NumElements; } diff --git a/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp b/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp index 6e5562b182f1b..11fe56b0b54c3 100644 --- a/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp +++ b/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp @@ -18,19 +18,19 @@ using bfloat16 = sycl::ext::oneapi::bfloat16; // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x float], align 4 // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META8]] +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META8]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META8]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META8]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I]] to ptr addrspace(4) -// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I]], <4 x i16> poison, <4 x i32> // CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 8, !tbaa [[TBAA11:![0-9]+]], !noalias [[META8]] // CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4:[0-9]+]], !noalias [[META8]] -// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I]], align 4, !noalias [[META8]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I]], align 4, !noalias [[META8]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META8]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META8]] -// CHECK-NEXT: [[EXTRACTVEC4_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I_I_I_I]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC4_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META8]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META8]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtoFDeviceRNE(vec &inp) { @@ -43,19 +43,19 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRNE(vec &inp) { // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x float], align 4 // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META15]] +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META15]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META15]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META15]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I]] to ptr addrspace(4) -// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I]], <4 x i16> poison, <4 x i32> // CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 8, !tbaa [[TBAA11]], !noalias [[META15]] // CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4]], !noalias [[META15]] -// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I]], align 4, !noalias [[META15]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I]], align 4, !noalias [[META15]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META15]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META15]] -// CHECK-NEXT: [[EXTRACTVEC4_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I_I_I_I]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC4_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META15]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META15]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { @@ -66,23 +66,23 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { // CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.6") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]] -// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I]], <4 x i16> poison, <3 x i32> +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]] +// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I]], <4 x i16> poison, <3 x i32> // CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK: for.cond.i.i.i: -// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i32> [ undef, [[ENTRY:%.*]] ], [ [[RETVAL1_SROA_0_0_VECBLEND_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i32> [ undef, [[ENTRY:%.*]] ], [ [[VECINS_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] // CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I]] ] // CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 // CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EE7CONVERTIILNS_13ROUNDING_MODEE2EEENS1_IT_LI3EEEV_EXIT:%.*]] // CHECK: for.body.i.i.i: // CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x i16> [[EXTRACTVEC_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef i32 @__imf_bfloat162int_rz(i16 noundef zeroext [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META19]] -// CHECK-NEXT: [[VECINS_I_I_I:%.*]] = insertelement <3 x i32> [[RETVAL1_SROA_0_0_I_I_I]], i32 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] +// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i32> [[RETVAL1_SROA_0_0_I_I_I]], i32 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EE7convertIiLNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: -// CHECK-NEXT: [[EXTRACTVEC4_I:%.*]] = shufflevector <3 x i32> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i32> poison, <4 x i32> -// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC4_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META19]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i32> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i32> poison, <4 x i32> +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META19]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { @@ -108,19 +108,19 @@ SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec &inp) { // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META29]] +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META29]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META29]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META29]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I]] to ptr addrspace(4) -// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I]], <4 x float> poison, <4 x i32> // CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 16, !tbaa [[TBAA11]], !noalias [[META29]] // CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4]], !noalias [[META29]] -// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I]], align 2, !noalias [[META29]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I]], align 2, !noalias [[META29]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META29]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META29]] -// CHECK-NEXT: [[EXTRACTVEC4_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I_I_I_I]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC4_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META29]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META29]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { @@ -131,23 +131,23 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { // CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.0") align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META32:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META33]] -// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META33]] +// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I]], <4 x float> poison, <3 x i32> // CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK: for.cond.i.i.i: -// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i16> [ undef, [[ENTRY:%.*]] ], [ [[RETVAL1_SROA_0_0_VECBLEND_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i16> [ undef, [[ENTRY:%.*]] ], [ [[VECINS_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] // CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I]] ] // CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 // CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECIFLI3EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE2EEENS1_IT_LI3EEEV_EXIT:%.*]] // CHECK: for.body.i.i.i: // CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x float> [[EXTRACTVEC_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_float2bfloat16_rz(float noundef [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META33]] -// CHECK-NEXT: [[VECINS_I_I_I:%.*]] = insertelement <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] +// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecIfLi3EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: -// CHECK-NEXT: [[EXTRACTVEC4_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC4_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META33]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META33]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { @@ -158,23 +158,23 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { // CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.0") align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i32>, ptr addrspace(4) [[INP]], align 16, !noalias [[META38]] -// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x i32> [[LOADVEC4_I_I]], <4 x i32> poison, <3 x i32> +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i32>, ptr addrspace(4) [[INP]], align 16, !noalias [[META38]] +// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x i32> [[LOADVECN_I_I]], <4 x i32> poison, <3 x i32> // CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK: for.cond.i.i.i: -// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i16> [ undef, [[ENTRY:%.*]] ], [ [[RETVAL1_SROA_0_0_VECBLEND_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i16> [ undef, [[ENTRY:%.*]] ], [ [[VECINS_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] // CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I]] ] // CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 // CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECIILI3EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE2EEENS1_IT_LI3EEEV_EXIT:%.*]] // CHECK: for.body.i.i.i: // CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x i32> [[EXTRACTVEC_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_int2bfloat16_rz(i32 noundef [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META38]] -// CHECK-NEXT: [[VECINS_I_I_I:%.*]] = insertelement <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] +// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecIiLi3EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: -// CHECK-NEXT: [[EXTRACTVEC4_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC4_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META38]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META38]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { @@ -195,7 +195,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.30") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]]