@@ -14,7 +14,7 @@ $"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" = comdat any
14
14
@0 = dso_local global %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" zeroinitializer , align 64 #0
15
15
16
16
; Function Attrs: norecurse
17
- define weak_odr dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" (ptr addrspace (1 ) %_arg_ ) local_unnamed_addr #1 comdat !kernel_arg_addr_space !8 !kernel_arg_access_qual !9 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !11 !sycl_explicit_simd !12 !intel_reqd_sub_group_size !8 {
17
+ define weak_odr dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" (i32 addrspace (1 )* %_arg_ ) local_unnamed_addr #1 comdat !kernel_arg_addr_space !8 !kernel_arg_access_qual !9 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !11 !sycl_explicit_simd !12 !intel_reqd_sub_group_size !8 {
18
18
entry:
19
19
%vc.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" , align 64
20
20
%agg.tmp.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" , align 64
@@ -36,57 +36,63 @@ entry:
36
36
%group.id.x.cast.ty.i.i.i.i.i = zext i32 %group.id.x.i.i.i.i.i to i64
37
37
%mul.i4.i.i.i.i = mul nuw i64 %group.id.x.cast.ty.i.i.i.i.i , %wgsize.x.cast.ty.i.i.i.i.i
38
38
%add.i5.i.i.i.i = add i64 %mul.i4.i.i.i.i , %local_id.x.cast.ty.i.i.i.i.i
39
- call void @llvm.lifetime.start.p0 (i64 64 , ptr nonnull %agg.tmp.i )
40
- call void @llvm.lifetime.start.p0 (i64 64 , ptr nonnull %vc.i ) #5
39
+ %0 = bitcast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" * %agg.tmp.i to i8*
40
+ call void @llvm.lifetime.start.p0i8 (i64 64 , i8* nonnull %0 )
41
+ %1 = bitcast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" * %vc.i to i8*
42
+ call void @llvm.lifetime.start.p0i8 (i64 64 , i8* nonnull %1 ) #5
41
43
%conv.i = trunc i64 %add.i5.i.i.i.i to i32
42
- %0 = addrspacecast ptr %vc.i to ptr addrspace (4 )
44
+ %2 = addrspacecast % "class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" * %vc.i to % "class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace (4 )*
43
45
%splat.splatinsert.i.i = insertelement <16 x i32 > undef , i32 %conv.i , i32 0
44
46
%splat.splat.i.i = shufflevector <16 x i32 > %splat.splatinsert.i.i , <16 x i32 > undef , <16 x i32 > zeroinitializer
45
- store <16 x i32 > %splat.splat.i.i , ptr addrspace (4 ) %0 , align 64 , !tbaa !13
47
+ %M_data.i13.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace (4 )* %2 , i64 0 , i32 0
48
+ store <16 x i32 > %splat.splat.i.i , <16 x i32 > addrspace (4 )* %M_data.i13.i , align 64 , !tbaa !13
46
49
%conv3.i = trunc i64 %add.i.i.i.i.i to i32
47
50
%splat.splatinsert.i20.i = insertelement <8 x i32 > undef , i32 %conv3.i , i32 0
48
51
%splat.splat.i21.i = shufflevector <8 x i32 > %splat.splatinsert.i20.i , <8 x i32 > undef , <8 x i32 > zeroinitializer
49
- %call.esimd.i.i.i.i.i2 = call <16 x i32 > @llvm.genx.vload.v16i32.p4 ( ptr addrspace (4 ) %0 ) #5
52
+ %call.esimd.i.i.i.i.i2 = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (< 16 x i32 > addrspace (4 )* %M_data.i13.i ) #5
50
53
%call4.esimd.i.i.i.i = call <16 x i32 > @llvm.genx.wrregioni.v16i32.v8i32.i16.v8i1 (<16 x i32 > %call.esimd.i.i.i.i.i2 , <8 x i32 > %splat.splat.i21.i , i32 0 , i32 8 , i32 1 , i16 0 , i32 0 , <8 x i1 > <i1 true , i1 true , i1 true , i1 true , i1 true , i1 true , i1 true , i1 true >) #5
51
- call void @llvm.genx.vstore.v16i32.p4 (<16 x i32 > %call4.esimd.i.i.i.i , ptr addrspace (4 ) %0 ) #5
54
+ call void @llvm.genx.vstore.v16i32.p4v16i32 (<16 x i32 > %call4.esimd.i.i.i.i , < 16 x i32 > addrspace (4 )* %M_data.i13.i ) #5
52
55
%cmp.i = icmp eq i64 %add.i.i.i.i.i , 0
53
56
%..i = select i1 %cmp.i , i64 %add.i5.i.i.i.i , i64 %add.i.i.i.i.i
54
57
%conv9.i = trunc i64 %..i to i32
55
- ; CHECK: store <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, ptr addrspace(4) addrspacecast (ptr [[NEWGLOBAL]] to ptr addrspace(4)), align 64, !tbaa.struct !16
56
- store <16 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 >, ptr addrspace (4 ) addrspacecast (ptr @ 0 to ptr addrspace (4 )), align 64 , !tbaa.struct !16
58
+ ; CHECK: store <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds ({{.+}}, {{.+}}* bitcast (<16 x i32>* [[NEWGLOBAL]] to {{.+}}*), i64 0, i32 0) to <16 x i32> addrspace(4)* ), align 64, !tbaa.struct !16
59
+ store <16 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 >, < 16 x i32 > addrspace (4 )* addrspacecast (< 16 x i32 >* getelementptr inbounds (% "class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" , % "class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" * @ 0 , i64 0 , i32 0 ) to < 16 x i32 > addrspace (4 )* ), align 64 , !tbaa.struct !16
57
60
%mul.i = shl nsw i32 %conv9.i , 4
58
61
%idx.ext.i = sext i32 %mul.i to i64
59
- %add.ptr.i16 = getelementptr inbounds i32 , ptr addrspace (1 ) %_arg_ , i64 %idx.ext.i
60
- %add.ptr.i = addrspacecast ptr addrspace (1 ) %add.ptr.i16 to ptr addrspace (4 )
61
- %1 = addrspacecast ptr %agg.tmp.i to ptr addrspace (4 )
62
- %call.esimd.i.i.i = call <16 x i32 > @llvm.genx.vload.v16i32.p4 (ptr addrspace (4 ) %0 ) #5
63
- call void @llvm.genx.vstore.v16i32.p4 (<16 x i32 > %call.esimd.i.i.i , ptr addrspace (4 ) %1 ) #5
64
- call spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE (ptr addrspace (4 ) %add.ptr.i , ptr nonnull %agg.tmp.i ) #5
65
- store <16 x i32 > <i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 >, ptr addrspace (4 ) addrspacecast (ptr @0 to ptr addrspace (4 )), align 64 , !tbaa.struct !16
66
- call void @llvm.lifetime.end.p0 (i64 64 , ptr nonnull %vc.i ) #5
67
- call void @llvm.lifetime.end.p0 (i64 64 , ptr nonnull %agg.tmp.i )
62
+ %add.ptr.i16 = getelementptr inbounds i32 , i32 addrspace (1 )* %_arg_ , i64 %idx.ext.i
63
+ %add.ptr.i = addrspacecast i32 addrspace (1 )* %add.ptr.i16 to i32 addrspace (4 )*
64
+ %3 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" * %agg.tmp.i to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace (4 )*
65
+ %call.esimd.i.i.i = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (<16 x i32 > addrspace (4 )* %M_data.i13.i ) #5
66
+ %M_data.i2.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace (4 )* %3 , i64 0 , i32 0
67
+ call void @llvm.genx.vstore.v16i32.p4v16i32 (<16 x i32 > %call.esimd.i.i.i , <16 x i32 > addrspace (4 )* %M_data.i2.i.i ) #5
68
+ call spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE (i32 addrspace (4 )* %add.ptr.i , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" * nonnull %agg.tmp.i ) #5
69
+ store <16 x i32 > <i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 >, <16 x i32 > addrspace (4 )* addrspacecast (<16 x i32 >* getelementptr inbounds (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" * @0 , i64 0 , i32 0 ) to <16 x i32 > addrspace (4 )*), align 64 , !tbaa.struct !16
70
+ call void @llvm.lifetime.end.p0i8 (i64 64 , i8* nonnull %1 ) #5
71
+ call void @llvm.lifetime.end.p0i8 (i64 64 , i8* nonnull %0 )
68
72
ret void
69
73
}
70
74
71
75
; Function Attrs: argmemonly nounwind willreturn
72
- declare void @llvm.lifetime.start.p0 (i64 immarg %agg.tmp.i , ptr nocapture %vc.i ) #2
76
+ declare void @llvm.lifetime.start.p0i8 (i64 immarg %0 , i8* nocapture %1 ) #2
73
77
74
78
; Function Attrs: argmemonly nounwind willreturn
75
- declare void @llvm.lifetime.end.p0 (i64 immarg %agg.tmp.i , ptr nocapture %vc.i ) #2
79
+ declare void @llvm.lifetime.end.p0i8 (i64 immarg %0 , i8* nocapture %1 ) #2
76
80
77
81
; Function Attrs: noinline norecurse nounwind
78
- define dso_local spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE (ptr addrspace (4 ) %C , ptr %v ) local_unnamed_addr #3 {
82
+ define dso_local spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE (i32 addrspace (4 )* %C , % "class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" * %v ) local_unnamed_addr #3 {
79
83
entry:
80
84
%agg.tmp = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" , align 64
81
- %0 = addrspacecast ptr %v to ptr addrspace (4 )
82
- %1 = addrspacecast ptr %agg.tmp to ptr addrspace (4 )
83
- %call.esimd.i.i = call <16 x i32 > @llvm.genx.vload.v16i32.p4 (ptr addrspace (4 ) %0 ), !noalias !17
84
- ; CHECK: {{.+}} = call <16 x i32> @llvm.genx.vload.v16i32.p4(ptr addrspace(4) addrspacecast (ptr [[NEWGLOBAL]] to ptr addrspace(4))), !noalias !17
85
- %call.esimd.i8.i = call <16 x i32 > @llvm.genx.vload.v16i32.p4 (ptr addrspace (4 ) addrspacecast (ptr @0 to ptr addrspace (4 ))), !noalias !17
85
+ %0 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" * %v to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace (4 )*
86
+ %1 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" * %agg.tmp to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace (4 )*
87
+ %M_data.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace (4 )* %0 , i64 0 , i32 0
88
+ %call.esimd.i.i = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (<16 x i32 > addrspace (4 )* %M_data.i.i ), !noalias !17
89
+ ; CHECK: {{.+}} = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* getelementptr ({{.+}}, {{.+}} addrspace(4)* addrspacecast ({{.+}}* bitcast (<16 x i32>* [[NEWGLOBAL]] to {{.+}}*) to {{.+}} addrspace(4)*), i64 0, i32 0)), !noalias !17
90
+ %call.esimd.i8.i = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (<16 x i32 > addrspace (4 )* getelementptr (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace (4 )* addrspacecast (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" * @0 to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace (4 )*), i64 0 , i32 0 )), !noalias !17
86
91
%add.i = add <16 x i32 > %call.esimd.i8.i , %call.esimd.i.i
87
- call void @llvm.genx.vstore.v16i32.p4 (<16 x i32 > %add.i , ptr addrspace (4 ) %1 )
88
- %2 = ptrtoint ptr addrspace (4 ) %C to i64
89
- %call.esimd.i.i2 = call <16 x i32 > @llvm.genx.vload.v16i32.p4 (ptr addrspace (4 ) %1 )
92
+ %M_data.i.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace (4 )* %1 , i64 0 , i32 0
93
+ call void @llvm.genx.vstore.v16i32.p4v16i32 (<16 x i32 > %add.i , <16 x i32 > addrspace (4 )* %M_data.i.i.i )
94
+ %2 = ptrtoint i32 addrspace (4 )* %C to i64
95
+ %call.esimd.i.i2 = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (<16 x i32 > addrspace (4 )* %M_data.i.i.i )
90
96
call void @llvm.genx.svm.block.st.v16i32 (i64 %2 , <16 x i32 > %call.esimd.i.i2 )
91
97
ret void
92
98
}
@@ -95,10 +101,10 @@ entry:
95
101
declare !genx_intrinsic_id !20 <16 x i32 > @llvm.genx.wrregioni.v16i32.v8i32.i16.v8i1 (<16 x i32 > %0 , <8 x i32 > %1 , i32 %2 , i32 %3 , i32 %4 , i16 %5 , i32 %6 , <8 x i1 > %7 ) #4
96
102
97
103
; Function Attrs: nounwind
98
- declare !genx_intrinsic_id !21 <16 x i32 > @llvm.genx.vload.v16i32.p4 ( ptr addrspace (4 ) %0 ) #5
104
+ declare !genx_intrinsic_id !21 <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (< 16 x i32 > addrspace (4 )* %0 ) #5
99
105
100
106
; Function Attrs: nounwind
101
- declare !genx_intrinsic_id !22 void @llvm.genx.vstore.v16i32.p4 (<16 x i32 > %0 , ptr addrspace (4 ) %1 ) #5
107
+ declare !genx_intrinsic_id !22 void @llvm.genx.vstore.v16i32.p4v16i32 (<16 x i32 > %0 , < 16 x i32 > addrspace (4 )* %1 ) #5
102
108
103
109
; Function Attrs: nounwind
104
110
declare !genx_intrinsic_id !23 void @llvm.genx.svm.block.st.v16i32 (i64 %0 , <16 x i32 > %1 ) #5
@@ -134,7 +140,7 @@ attributes #5 = { nounwind }
134
140
!2 = !{i32 1 , i32 2 }
135
141
!3 = !{i32 6 , i32 100000 }
136
142
!4 = !{!"clang version 11.0.0" }
137
- !5 = !{ptr @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" , !"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" , !6 , i32 0 , i32 0 , !6 , !7 , i32 0 , i32 0 }
143
+ !5 = !{void ( i32 addrspace ( 1 )*)* @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" , !"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" , !6 , i32 0 , i32 0 , !6 , !7 , i32 0 , i32 0 }
138
144
!6 = !{i32 0 }
139
145
!7 = !{!"svmptr_t" }
140
146
!8 = !{i32 1 }
0 commit comments