Skip to content

Commit 7f5da80

Browse files
authored
[SYCL] Fix type traits used for shuffle operations (#17055)
* Pointer type was incorrectly treated as arithmetic type, so the case when user passed, for example, `int*` variable to `sycl::select_from_group` ended up mapping to non-existent built-in `SubgroupShuffleINTEL(i32*, ...)`. This is fixed in this PR and now it is mapped to `SubgroupShuffleINTEL(i64, ...)` * Rename type trait `VecTypeIsProhibitedForShuffleEmulation`->`VecTypeIsProhibitedForNativeShuffle`. Old name was confusing. Basically trait is used to determine vec types for which we cannot use native built-ins. * `enable_if`s for shuffle overloads were not completely mutually exclusive leading to compilation error with ambiguous definitions for some types. For example, `marray<bfloat16, 4>` used to satisfy both `NonScalar` and `Bitcast` traits. So, refactoring was done to make them mutually exclusive and more clear.
1 parent 5c965f9 commit 7f5da80

File tree

2 files changed

+199
-23
lines changed

2 files changed

+199
-23
lines changed

sycl/include/sycl/detail/spirv.hpp

Lines changed: 34 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -797,29 +797,36 @@ AtomicMax(multi_ptr<T, AddressSpace, IsDecorated> MPtr, memory_scope Scope,
797797
#ifndef __NVPTX__
798798

799799
template <typename T>
800-
struct TypeIsProhibitedForShuffleEmulation
800+
struct VecTypeIsProhibitedForNativeShuffle
801801
: std::bool_constant<
802+
(detail::get_vec_size<T>::size > 1) &&
802803
check_type_in_v<vector_element_t<T>, double, long, long long,
803804
unsigned long, unsigned long long, half>> {};
804805

806+
// Native shuffle is supported if T is scalar arithmetic type or vector type
807+
// with arithmetic element type which is not in prohibited list.
808+
// detail::is_arithmetic looks through pointer, marray and vec into the element
809+
// type, so we have to exclude marray and pointer types.
805810
template <typename T>
806-
struct VecTypeIsProhibitedForShuffleEmulation
807-
: std::bool_constant<
808-
(detail::get_vec_size<T>::size > 1) &&
809-
TypeIsProhibitedForShuffleEmulation<vector_element_t<T>>::value> {};
811+
struct NativeShuffle
812+
: std::bool_constant<detail::is_arithmetic<T>::value &&
813+
!VecTypeIsProhibitedForNativeShuffle<T>::value &&
814+
!detail::is_marray_v<T> && !detail::is_pointer_v<T>> {
815+
};
810816

817+
// Non-scalar shuffle (emulation via loop + scalar shuffle) is used if we have a
818+
// vector type for which native shuffle is not supported or for marray type.
811819
template <typename T>
812-
using EnableIfNativeShuffle =
813-
std::enable_if_t<detail::is_arithmetic<T>::value &&
814-
!VecTypeIsProhibitedForShuffleEmulation<T>::value &&
815-
!detail::is_marray_v<T>,
816-
T>;
820+
struct NonScalarShuffle
821+
: std::bool_constant<!NativeShuffle<T>::value &&
822+
(detail::is_vec_v<T> || detail::is_marray_v<T>)> {};
823+
824+
template <typename T>
825+
using EnableIfNativeShuffle = std::enable_if_t<NativeShuffle<T>::value, T>;
817826

818827
template <typename T>
819828
using EnableIfNonScalarShuffle =
820-
std::enable_if_t<VecTypeIsProhibitedForShuffleEmulation<T>::value ||
821-
detail::is_marray_v<T>,
822-
T>;
829+
std::enable_if_t<NonScalarShuffle<T>::value, T>;
823830

824831
#else // ifndef __NVPTX__
825832

@@ -835,13 +842,19 @@ using EnableIfNonScalarShuffle =
835842
// Bitcast shuffles can be implemented using a single SubgroupShuffle
836843
// intrinsic, but require type-punning via an appropriate integer type
837844
#ifndef __NVPTX__
845+
846+
// Use bitcast shuffle for trivially copyable types satisfying size requirements
847+
// that are not handled by native shuffle and non-scalar shuffle.
838848
template <typename T>
839-
using EnableIfBitcastShuffle =
840-
std::enable_if_t<!detail::is_arithmetic<T>::value &&
841-
(std::is_trivially_copyable_v<T> &&
842-
(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
843-
sizeof(T) == 8)),
844-
T>;
849+
struct BitcastShuffle : std::bool_constant<!NativeShuffle<T>::value &&
850+
!NonScalarShuffle<T>::value &&
851+
std::is_trivially_copyable_v<T> &&
852+
(sizeof(T) == 1 || sizeof(T) == 2 ||
853+
sizeof(T) == 4 || sizeof(T) == 8)> {
854+
};
855+
856+
template <typename T>
857+
using EnableIfBitcastShuffle = std::enable_if_t<BitcastShuffle<T>::value, T>;
845858
#else
846859
template <typename T>
847860
using EnableIfBitcastShuffle =
@@ -860,10 +873,8 @@ using EnableIfBitcastShuffle =
860873
#ifndef __NVPTX__
861874
template <typename T>
862875
using EnableIfGenericShuffle =
863-
std::enable_if_t<!detail::is_arithmetic<T>::value &&
864-
!(std::is_trivially_copyable_v<T> &&
865-
(sizeof(T) == 1 || sizeof(T) == 2 ||
866-
sizeof(T) == 4 || sizeof(T) == 8)),
876+
std::enable_if_t<!NativeShuffle<T>::value && !NonScalarShuffle<T>::value &&
877+
!BitcastShuffle<T>::value,
867878
T>;
868879
#else
869880
template <typename T>
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// RUN: %clangxx -O3 -fsycl -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -o - %s | FileCheck %s
3+
// REQUIRES: linux
4+
5+
#include <sycl/sycl.hpp>
6+
7+
using namespace sycl;
8+
using namespace sycl::ext::oneapi;
9+
10+
// CHECK-LABEL: @_Z13test_shuffle1RN4sycl3_V19sub_groupEPNS0_3vecINS0_3ext6oneapi8bfloat16ELi4EEEm(
11+
// CHECK-NEXT: entry:
12+
// CHECK-NEXT: [[AGG_TMP14_I:%.*]] = alloca %"class.sycl::_V1::vec", align 8
13+
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca %"class.sycl::_V1::vec", align 8
14+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw %"class.sycl::_V1::vec", ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
15+
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[ARRAYIDX]], align 8
16+
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[REF_TMP]]) #[[ATTR5:[0-9]+]]
17+
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
18+
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]])
19+
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP14_I]]), !noalias [[META8]]
20+
// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP14_I]], align 1, !noalias [[META8]]
21+
// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]]
22+
// CHECK: for.cond.i.i:
23+
// CHECK-NEXT: [[S_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ]
24+
// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[S_0_I_I]], 4
25+
// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V117SELECT_FROM_GROUPINS0_9SUB_GROUPENS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI4EEEEENST9ENABLE_IFIXAAOOSR3STDE9IS_SAME_VINST5DECAYIT_E4TYPEES2_ESR4SYCL3EXT6ONEAPI12EXPERIMENTALE27IS_USER_CONSTRUCTED_GROUP_VISC_EOOSR3STDE23IS_TRIVIALLY_COPYABLE_VIT0_ESR6DETAIL6IS_VECISD_EE5VALUEESD_E4TYPEESA_SD_NSA_7ID_TYPEE_EXIT:%.*]]
26+
// CHECK: for.body.i.i:
27+
// CHECK-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[S_0_I_I]] to i64
28+
// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[AGG_TMP14_I]], i64 0, i64 [[CONV_I_I_I]]
29+
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_I_I_I_I_I]], align 2, !tbaa [[TBAA14:![0-9]+]], !noalias [[META18:![0-9]+]]
30+
// CHECK-NEXT: [[CALL4_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELItET_S0_j(i16 noundef zeroext [[TMP1]], i32 noundef 1) #[[ATTR6:[0-9]+]], !noalias [[META19:![0-9]+]]
31+
// CHECK-NEXT: [[ARRAYIDX_I_I_I12_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[REF_TMP]], i64 0, i64 [[CONV_I_I_I]]
32+
// CHECK-NEXT: store i16 [[CALL4_I_I_I_I]], ptr [[ARRAYIDX_I_I_I12_I_I]], align 2, !tbaa [[TBAA14]], !alias.scope [[META18]]
33+
// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[S_0_I_I]], 1
34+
// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP22:![0-9]+]]
35+
// CHECK: _ZN4sycl3_V117select_from_groupINS0_9sub_groupENS0_3vecINS0_3ext6oneapi8bfloat16ELi4EEEEENSt9enable_ifIXaaoosr3stdE9is_same_vINSt5decayIT_E4typeES2_Esr4sycl3ext6oneapi12experimentalE27is_user_constructed_group_vISC_Eoosr3stdE23is_trivially_copyable_vIT0_Esr6detail6is_vecISD_EE5valueESD_E4typeESA_SD_NSA_7id_typeE.exit:
36+
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP14_I]]), !noalias [[META8]]
37+
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[REF_TMP]], align 8
38+
// CHECK-NEXT: store i64 [[TMP2]], ptr addrspace(4) [[ARRAYIDX]], align 8
39+
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[REF_TMP]]) #[[ATTR5]]
40+
// CHECK-NEXT: ret void
41+
//
42+
SYCL_EXTERNAL void test_shuffle1(sycl::sub_group &sg, vec<bfloat16, 4> *buf,
43+
size_t id) {
44+
vec<bfloat16, 4> ItemVal = buf[id];
45+
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
46+
}
47+
48+
// CHECK-LABEL: @_Z13test_shuffle2RN4sycl3_V19sub_groupEPNS0_6marrayINS0_3ext6oneapi8bfloat16ELm4EEEm(
49+
// CHECK-NEXT: entry:
50+
// CHECK-NEXT: [[AGG_TMP14_I:%.*]] = alloca %"class.sycl::_V1::marray", align 8
51+
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca %"class.sycl::_V1::marray", align 2
52+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw %"class.sycl::_V1::marray", ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
53+
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[ARRAYIDX]], align 2, !tbaa [[TBAA25:![0-9]+]]
54+
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[REF_TMP]]) #[[ATTR5]]
55+
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]])
56+
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]])
57+
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP14_I]]), !noalias [[META26]]
58+
// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP14_I]], align 1, !noalias [[META26]]
59+
// CHECK-NEXT: br label [[ARRAYINIT_BODY_I_I_I:%.*]]
60+
// CHECK: arrayinit.body.i.i.i:
61+
// CHECK-NEXT: [[ARRAYINIT_CUR_IDX_I_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ARRAYINIT_CUR_ADD_I_I_I:%.*]], [[ARRAYINIT_BODY_I_I_I]] ]
62+
// CHECK-NEXT: [[ARRAYINIT_CUR_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[ARRAYINIT_CUR_IDX_I_I_I]]
63+
// CHECK-NEXT: store i16 0, ptr [[ARRAYINIT_CUR_PTR_I_I_I]], align 2, !alias.scope [[META32:![0-9]+]]
64+
// CHECK-NEXT: [[ARRAYINIT_CUR_ADD_I_I_I]] = add nuw nsw i64 [[ARRAYINIT_CUR_IDX_I_I_I]], 2
65+
// CHECK-NEXT: [[ARRAYINIT_DONE_I_I_I:%.*]] = icmp eq i64 [[ARRAYINIT_CUR_ADD_I_I_I]], 8
66+
// CHECK-NEXT: br i1 [[ARRAYINIT_DONE_I_I_I]], label [[FOR_COND_I_I:%.*]], label [[ARRAYINIT_BODY_I_I_I]]
67+
// CHECK: for.cond.i.i:
68+
// CHECK-NEXT: [[S_0_I_I:%.*]] = phi i32 [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ], [ 0, [[ARRAYINIT_BODY_I_I_I]] ]
69+
// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[S_0_I_I]], 4
70+
// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V117SELECT_FROM_GROUPINS0_9SUB_GROUPENS0_6MARRAYINS0_3EXT6ONEAPI8BFLOAT16ELM4EEEEENST9ENABLE_IFIXAAOOSR3STDE9IS_SAME_VINST5DECAYIT_E4TYPEES2_ESR4SYCL3EXT6ONEAPI12EXPERIMENTALE27IS_USER_CONSTRUCTED_GROUP_VISC_EOOSR3STDE23IS_TRIVIALLY_COPYABLE_VIT0_ESR6DETAIL6IS_VECISD_EE5VALUEESD_E4TYPEESA_SD_NSA_7ID_TYPEE_EXIT:%.*]]
71+
// CHECK: for.body.i.i:
72+
// CHECK-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[S_0_I_I]] to i64
73+
// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[AGG_TMP14_I]], i64 0, i64 [[CONV_I_I]]
74+
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA14]], !noalias [[META32]]
75+
// CHECK-NEXT: [[CALL4_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELItET_S0_j(i16 noundef zeroext [[TMP1]], i32 noundef 1) #[[ATTR6]], !noalias [[META33:![0-9]+]]
76+
// CHECK-NEXT: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[REF_TMP]], i64 0, i64 [[CONV_I_I]]
77+
// CHECK-NEXT: store i16 [[CALL4_I_I_I_I]], ptr [[ARRAYIDX_I13_I_I]], align 2, !tbaa [[TBAA14]], !alias.scope [[META32]]
78+
// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[S_0_I_I]], 1
79+
// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP36:![0-9]+]]
80+
// CHECK: _ZN4sycl3_V117select_from_groupINS0_9sub_groupENS0_6marrayINS0_3ext6oneapi8bfloat16ELm4EEEEENSt9enable_ifIXaaoosr3stdE9is_same_vINSt5decayIT_E4typeES2_Esr4sycl3ext6oneapi12experimentalE27is_user_constructed_group_vISC_Eoosr3stdE23is_trivially_copyable_vIT0_Esr6detail6is_vecISD_EE5valueESD_E4typeESA_SD_NSA_7id_typeE.exit:
81+
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP14_I]]), !noalias [[META26]]
82+
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[REF_TMP]], align 2, !tbaa [[TBAA25]]
83+
// CHECK-NEXT: store i64 [[TMP2]], ptr addrspace(4) [[ARRAYIDX]], align 2, !tbaa [[TBAA25]]
84+
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[REF_TMP]]) #[[ATTR5]]
85+
// CHECK-NEXT: ret void
86+
//
87+
SYCL_EXTERNAL void test_shuffle2(sycl::sub_group &sg, marray<bfloat16, 4> *buf,
88+
size_t id) {
89+
marray<bfloat16, 4> ItemVal = buf[id];
90+
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
91+
}
92+
93+
// CHECK-LABEL: @_Z13test_shuffle3RN4sycl3_V19sub_groupEPNS0_6marrayINS0_3ext6oneapi8bfloat16ELm5EEEm(
94+
// CHECK-NEXT: entry:
95+
// CHECK-NEXT: [[AGG_TMP14_I:%.*]] = alloca %"class.sycl::_V1::marray.32", align 8
96+
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca %"class.sycl::_V1::marray.32", align 2
97+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw %"class.sycl::_V1::marray.32", ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
98+
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 10, ptr nonnull [[REF_TMP]]) #[[ATTR5]]
99+
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 10, ptr nonnull [[AGG_TMP14_I]]), !noalias [[META38:![0-9]+]]
100+
// CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 8 dereferenceable(10) [[AGG_TMP14_I]], ptr addrspace(4) noundef align 2 dereferenceable(10) [[ARRAYIDX]], i64 10, i1 false)
101+
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38]])
102+
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]])
103+
// CHECK-NEXT: br label [[ARRAYINIT_BODY_I_I_I:%.*]]
104+
// CHECK: arrayinit.body.i.i.i:
105+
// CHECK-NEXT: [[ARRAYINIT_CUR_IDX_I_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ARRAYINIT_CUR_ADD_I_I_I:%.*]], [[ARRAYINIT_BODY_I_I_I]] ]
106+
// CHECK-NEXT: [[ARRAYINIT_CUR_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[ARRAYINIT_CUR_IDX_I_I_I]]
107+
// CHECK-NEXT: store i16 0, ptr [[ARRAYINIT_CUR_PTR_I_I_I]], align 2, !alias.scope [[META44:![0-9]+]]
108+
// CHECK-NEXT: [[ARRAYINIT_CUR_ADD_I_I_I]] = add nuw nsw i64 [[ARRAYINIT_CUR_IDX_I_I_I]], 2
109+
// CHECK-NEXT: [[ARRAYINIT_DONE_I_I_I:%.*]] = icmp eq i64 [[ARRAYINIT_CUR_ADD_I_I_I]], 10
110+
// CHECK-NEXT: br i1 [[ARRAYINIT_DONE_I_I_I]], label [[FOR_COND_I_I:%.*]], label [[ARRAYINIT_BODY_I_I_I]]
111+
// CHECK: for.cond.i.i:
112+
// CHECK-NEXT: [[S_0_I_I:%.*]] = phi i32 [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ], [ 0, [[ARRAYINIT_BODY_I_I_I]] ]
113+
// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[S_0_I_I]], 5
114+
// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V117SELECT_FROM_GROUPINS0_9SUB_GROUPENS0_6MARRAYINS0_3EXT6ONEAPI8BFLOAT16ELM5EEEEENST9ENABLE_IFIXAAOOSR3STDE9IS_SAME_VINST5DECAYIT_E4TYPEES2_ESR4SYCL3EXT6ONEAPI12EXPERIMENTALE27IS_USER_CONSTRUCTED_GROUP_VISC_EOOSR3STDE23IS_TRIVIALLY_COPYABLE_VIT0_ESR6DETAIL6IS_VECISD_EE5VALUEESD_E4TYPEESA_SD_NSA_7ID_TYPEE_EXIT:%.*]]
115+
// CHECK: for.body.i.i:
116+
// CHECK-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[S_0_I_I]] to i64
117+
// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds [5 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[AGG_TMP14_I]], i64 0, i64 [[CONV_I_I]]
118+
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA14]], !noalias [[META44]]
119+
// CHECK-NEXT: [[CALL4_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELItET_S0_j(i16 noundef zeroext [[TMP0]], i32 noundef 1) #[[ATTR6]], !noalias [[META45:![0-9]+]]
120+
// CHECK-NEXT: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds [5 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[REF_TMP]], i64 0, i64 [[CONV_I_I]]
121+
// CHECK-NEXT: store i16 [[CALL4_I_I_I_I]], ptr [[ARRAYIDX_I13_I_I]], align 2, !tbaa [[TBAA14]], !alias.scope [[META44]]
122+
// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[S_0_I_I]], 1
123+
// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP48:![0-9]+]]
124+
// CHECK: _ZN4sycl3_V117select_from_groupINS0_9sub_groupENS0_6marrayINS0_3ext6oneapi8bfloat16ELm5EEEEENSt9enable_ifIXaaoosr3stdE9is_same_vINSt5decayIT_E4typeES2_Esr4sycl3ext6oneapi12experimentalE27is_user_constructed_group_vISC_Eoosr3stdE23is_trivially_copyable_vIT0_Esr6detail6is_vecISD_EE5valueESD_E4typeESA_SD_NSA_7id_typeE.exit:
125+
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 10, ptr nonnull [[AGG_TMP14_I]]), !noalias [[META38]]
126+
// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 2 [[ARRAYIDX]], ptr align 2 [[REF_TMP]], i64 10, i1 false), !tbaa.struct [[TBAA_STRUCT49:![0-9]+]]
127+
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 10, ptr nonnull [[REF_TMP]]) #[[ATTR5]]
128+
// CHECK-NEXT: ret void
129+
//
130+
SYCL_EXTERNAL void test_shuffle3(sycl::sub_group &sg, marray<bfloat16, 5> *buf,
131+
size_t id) {
132+
marray<bfloat16, 5> ItemVal = buf[id];
133+
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
134+
}
135+
136+
// CHECK-LABEL: @_Z13test_shuffle4RN4sycl3_V19sub_groupEPPim(
137+
// CHECK-NEXT: entry:
138+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr addrspace(4), ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
139+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA51:![0-9]+]]
140+
// CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(4) [[TMP0]] to i64
141+
// CHECK-NEXT: [[CALL4_I_I_I:%.*]] = tail call spir_func noundef i64 @_Z28__spirv_SubgroupShuffleINTELImET_S0_j(i64 noundef [[TMP1]], i32 noundef 1) #[[ATTR6]]
142+
// CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[CALL4_I_I_I]] to ptr addrspace(4)
143+
// CHECK-NEXT: store ptr addrspace(4) [[TMP2]], ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA51]]
144+
// CHECK-NEXT: ret void
145+
//
146+
SYCL_EXTERNAL void test_shuffle4(sycl::sub_group &sg, int **buf, size_t id) {
147+
int *ItemVal = buf[id];
148+
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
149+
}
150+
151+
// CHECK-LABEL: @_Z13test_shuffle5RN4sycl3_V19sub_groupEPPVim(
152+
// CHECK-NEXT: entry:
153+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr addrspace(4), ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
154+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA51]]
155+
// CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(4) [[TMP0]] to i64
156+
// CHECK-NEXT: [[CALL4_I_I_I:%.*]] = tail call spir_func noundef i64 @_Z28__spirv_SubgroupShuffleINTELImET_S0_j(i64 noundef [[TMP1]], i32 noundef 1) #[[ATTR6]]
157+
// CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[CALL4_I_I_I]] to ptr addrspace(4)
158+
// CHECK-NEXT: store ptr addrspace(4) [[TMP2]], ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA51]]
159+
// CHECK-NEXT: ret void
160+
//
161+
SYCL_EXTERNAL void test_shuffle5(sycl::sub_group &sg, volatile int **buf,
162+
size_t id) {
163+
volatile int *ItemVal = buf[id];
164+
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
165+
}

0 commit comments

Comments
 (0)