@@ -4895,10 +4895,233 @@ define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) {
4895
4895
ret void
4896
4896
}
4897
4897
4898
- %struct.foo = type { i8* , i64 , i16 , i16 , i32 }
4898
+ ;
4899
+ ; PR13310
4900
+ ; FIXME: Failure to fold scaled-index into gather/scatter scale operand.
4901
+ ;
4902
+
4903
+ define <8 x float > @scaleidx_x86gather (float * %base , <8 x i32 > %index , <8 x i32 > %imask ) nounwind {
4904
+ ; KNL_64-LABEL: scaleidx_x86gather:
4905
+ ; KNL_64: # %bb.0:
4906
+ ; KNL_64-NEXT: vpslld $2, %ymm0, %ymm2
4907
+ ; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0
4908
+ ; KNL_64-NEXT: vgatherdps %ymm1, (%rdi,%ymm2), %ymm0
4909
+ ; KNL_64-NEXT: retq
4910
+ ;
4911
+ ; KNL_32-LABEL: scaleidx_x86gather:
4912
+ ; KNL_32: # %bb.0:
4913
+ ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
4914
+ ; KNL_32-NEXT: vpslld $2, %ymm0, %ymm2
4915
+ ; KNL_32-NEXT: vpxor %xmm0, %xmm0, %xmm0
4916
+ ; KNL_32-NEXT: vgatherdps %ymm1, (%eax,%ymm2), %ymm0
4917
+ ; KNL_32-NEXT: retl
4918
+ ;
4919
+ ; SKX-LABEL: scaleidx_x86gather:
4920
+ ; SKX: # %bb.0:
4921
+ ; SKX-NEXT: vpslld $2, %ymm0, %ymm2
4922
+ ; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0
4923
+ ; SKX-NEXT: vgatherdps %ymm1, (%rdi,%ymm2), %ymm0
4924
+ ; SKX-NEXT: retq
4925
+ ;
4926
+ ; SKX_32-LABEL: scaleidx_x86gather:
4927
+ ; SKX_32: # %bb.0:
4928
+ ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
4929
+ ; SKX_32-NEXT: vpslld $2, %ymm0, %ymm2
4930
+ ; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0
4931
+ ; SKX_32-NEXT: vgatherdps %ymm1, (%eax,%ymm2), %ymm0
4932
+ ; SKX_32-NEXT: retl
4933
+ %ptr = bitcast float * %base to i8*
4934
+ %mask = bitcast <8 x i32 > %imask to <8 x float >
4935
+ %scaledindex = mul <8 x i32 > %index , <i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 >
4936
+ %v = call <8 x float > @llvm.x86.avx2.gather.d.ps.256 (<8 x float > undef , i8* %ptr , <8 x i32 > %scaledindex , <8 x float > %mask , i8 1 ) nounwind
4937
+ ret <8 x float > %v
4938
+ }
4939
+
4940
+ define <8 x float > @scaleidx_x86gather_outofrange (float * %base , <8 x i32 > %index , <8 x i32 > %imask ) nounwind {
4941
+ ; KNL_64-LABEL: scaleidx_x86gather_outofrange:
4942
+ ; KNL_64: # %bb.0:
4943
+ ; KNL_64-NEXT: vpslld $2, %ymm0, %ymm2
4944
+ ; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0
4945
+ ; KNL_64-NEXT: vgatherdps %ymm1, (%rdi,%ymm2,4), %ymm0
4946
+ ; KNL_64-NEXT: retq
4947
+ ;
4948
+ ; KNL_32-LABEL: scaleidx_x86gather_outofrange:
4949
+ ; KNL_32: # %bb.0:
4950
+ ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
4951
+ ; KNL_32-NEXT: vpslld $2, %ymm0, %ymm2
4952
+ ; KNL_32-NEXT: vpxor %xmm0, %xmm0, %xmm0
4953
+ ; KNL_32-NEXT: vgatherdps %ymm1, (%eax,%ymm2,4), %ymm0
4954
+ ; KNL_32-NEXT: retl
4955
+ ;
4956
+ ; SKX-LABEL: scaleidx_x86gather_outofrange:
4957
+ ; SKX: # %bb.0:
4958
+ ; SKX-NEXT: vpslld $2, %ymm0, %ymm2
4959
+ ; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0
4960
+ ; SKX-NEXT: vgatherdps %ymm1, (%rdi,%ymm2,4), %ymm0
4961
+ ; SKX-NEXT: retq
4962
+ ;
4963
+ ; SKX_32-LABEL: scaleidx_x86gather_outofrange:
4964
+ ; SKX_32: # %bb.0:
4965
+ ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
4966
+ ; SKX_32-NEXT: vpslld $2, %ymm0, %ymm2
4967
+ ; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0
4968
+ ; SKX_32-NEXT: vgatherdps %ymm1, (%eax,%ymm2,4), %ymm0
4969
+ ; SKX_32-NEXT: retl
4970
+ %ptr = bitcast float * %base to i8*
4971
+ %mask = bitcast <8 x i32 > %imask to <8 x float >
4972
+ %scaledindex = mul <8 x i32 > %index , <i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 >
4973
+ %v = call <8 x float > @llvm.x86.avx2.gather.d.ps.256 (<8 x float > undef , i8* %ptr , <8 x i32 > %scaledindex , <8 x float > %mask , i8 4 ) nounwind
4974
+ ret <8 x float > %v
4975
+ }
4976
+ declare <8 x float > @llvm.x86.avx2.gather.d.ps.256 (<8 x float >, i8* , <8 x i32 >, <8 x float >, i8 ) nounwind readonly
4977
+
4978
+ define void @scaleidx_x86scatter (<16 x float > %value , float * %base , <16 x i32 > %index , i16 %imask ) nounwind {
4979
+ ; KNL_64-LABEL: scaleidx_x86scatter:
4980
+ ; KNL_64: # %bb.0:
4981
+ ; KNL_64-NEXT: kmovw %esi, %k1
4982
+ ; KNL_64-NEXT: vpaddd %zmm1, %zmm1, %zmm1
4983
+ ; KNL_64-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,2) {%k1}
4984
+ ; KNL_64-NEXT: vzeroupper
4985
+ ; KNL_64-NEXT: retq
4986
+ ;
4987
+ ; KNL_32-LABEL: scaleidx_x86scatter:
4988
+ ; KNL_32: # %bb.0:
4989
+ ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
4990
+ ; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
4991
+ ; KNL_32-NEXT: vpaddd %zmm1, %zmm1, %zmm1
4992
+ ; KNL_32-NEXT: vscatterdps %zmm0, (%eax,%zmm1,2) {%k1}
4993
+ ; KNL_32-NEXT: vzeroupper
4994
+ ; KNL_32-NEXT: retl
4995
+ ;
4996
+ ; SKX-LABEL: scaleidx_x86scatter:
4997
+ ; SKX: # %bb.0:
4998
+ ; SKX-NEXT: kmovw %esi, %k1
4999
+ ; SKX-NEXT: vpaddd %zmm1, %zmm1, %zmm1
5000
+ ; SKX-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,2) {%k1}
5001
+ ; SKX-NEXT: vzeroupper
5002
+ ; SKX-NEXT: retq
5003
+ ;
5004
+ ; SKX_32-LABEL: scaleidx_x86scatter:
5005
+ ; SKX_32: # %bb.0:
5006
+ ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
5007
+ ; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
5008
+ ; SKX_32-NEXT: vpaddd %zmm1, %zmm1, %zmm1
5009
+ ; SKX_32-NEXT: vscatterdps %zmm0, (%eax,%zmm1,2) {%k1}
5010
+ ; SKX_32-NEXT: vzeroupper
5011
+ ; SKX_32-NEXT: retl
5012
+ %ptr = bitcast float * %base to i8*
5013
+ %mask = bitcast i16 %imask to <16 x i1 >
5014
+ %scaledindex = shl <16 x i32 > %index , <i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 >
5015
+ call void @llvm.x86.avx512.mask.scatter.dps.512 (i8* %ptr , <16 x i1 > %mask , <16 x i32 > %scaledindex , <16 x float > %value , i32 2 )
5016
+ ret void
5017
+ }
5018
+ declare void @llvm.x86.avx512.mask.scatter.dps.512 (i8* , <16 x i1 >, <16 x i32 >, <16 x float >, i32 )
5019
+
5020
+ define void @scaleidx_scatter (<8 x float > %value , float * %base , <8 x i32 > %index , i8 %imask ) nounwind {
5021
+ ; KNL_64-LABEL: scaleidx_scatter:
5022
+ ; KNL_64: # %bb.0:
5023
+ ; KNL_64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
5024
+ ; KNL_64-NEXT: vpaddd %ymm1, %ymm1, %ymm1
5025
+ ; KNL_64-NEXT: kmovw %esi, %k0
5026
+ ; KNL_64-NEXT: kshiftlw $8, %k0, %k0
5027
+ ; KNL_64-NEXT: kshiftrw $8, %k0, %k1
5028
+ ; KNL_64-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,4) {%k1}
5029
+ ; KNL_64-NEXT: vzeroupper
5030
+ ; KNL_64-NEXT: retq
5031
+ ;
5032
+ ; KNL_32-LABEL: scaleidx_scatter:
5033
+ ; KNL_32: # %bb.0:
5034
+ ; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
5035
+ ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
5036
+ ; KNL_32-NEXT: vpaddd %ymm1, %ymm1, %ymm1
5037
+ ; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
5038
+ ; KNL_32-NEXT: kmovw %ecx, %k0
5039
+ ; KNL_32-NEXT: kshiftlw $8, %k0, %k0
5040
+ ; KNL_32-NEXT: kshiftrw $8, %k0, %k1
5041
+ ; KNL_32-NEXT: vscatterdps %zmm0, (%eax,%zmm1,4) {%k1}
5042
+ ; KNL_32-NEXT: vzeroupper
5043
+ ; KNL_32-NEXT: retl
5044
+ ;
5045
+ ; SKX-LABEL: scaleidx_scatter:
5046
+ ; SKX: # %bb.0:
5047
+ ; SKX-NEXT: vpaddd %ymm1, %ymm1, %ymm1
5048
+ ; SKX-NEXT: kmovw %esi, %k1
5049
+ ; SKX-NEXT: vscatterdps %ymm0, (%rdi,%ymm1,4) {%k1}
5050
+ ; SKX-NEXT: vzeroupper
5051
+ ; SKX-NEXT: retq
5052
+ ;
5053
+ ; SKX_32-LABEL: scaleidx_scatter:
5054
+ ; SKX_32: # %bb.0:
5055
+ ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
5056
+ ; SKX_32-NEXT: vpaddd %ymm1, %ymm1, %ymm1
5057
+ ; SKX_32-NEXT: kmovb {{[0-9]+}}(%esp), %k1
5058
+ ; SKX_32-NEXT: vscatterdps %ymm0, (%eax,%ymm1,4) {%k1}
5059
+ ; SKX_32-NEXT: vzeroupper
5060
+ ; SKX_32-NEXT: retl
5061
+ %scaledindex = mul <8 x i32 > %index , <i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 >
5062
+ %ptrs = getelementptr float , float * %base , <8 x i32 > %scaledindex
5063
+ %mask = bitcast i8 %imask to <8 x i1 >
5064
+ call void @llvm.masked.scatter.v8f32.v8p0f32 (<8 x float > %value , <8 x float *> %ptrs , i32 1 , <8 x i1 > %mask )
5065
+ ret void
5066
+ }
5067
+
5068
+ define void @scaleidx_scatter_outofrange (<8 x float > %value , float * %base , <8 x i32 > %index , i8 %imask ) nounwind {
5069
+ ; KNL_64-LABEL: scaleidx_scatter_outofrange:
5070
+ ; KNL_64: # %bb.0:
5071
+ ; KNL_64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
5072
+ ; KNL_64-NEXT: vpslld $2, %ymm1, %ymm1
5073
+ ; KNL_64-NEXT: kmovw %esi, %k0
5074
+ ; KNL_64-NEXT: kshiftlw $8, %k0, %k0
5075
+ ; KNL_64-NEXT: kshiftrw $8, %k0, %k1
5076
+ ; KNL_64-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,4) {%k1}
5077
+ ; KNL_64-NEXT: vzeroupper
5078
+ ; KNL_64-NEXT: retq
5079
+ ;
5080
+ ; KNL_32-LABEL: scaleidx_scatter_outofrange:
5081
+ ; KNL_32: # %bb.0:
5082
+ ; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
5083
+ ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
5084
+ ; KNL_32-NEXT: vpslld $2, %ymm1, %ymm1
5085
+ ; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
5086
+ ; KNL_32-NEXT: kmovw %ecx, %k0
5087
+ ; KNL_32-NEXT: kshiftlw $8, %k0, %k0
5088
+ ; KNL_32-NEXT: kshiftrw $8, %k0, %k1
5089
+ ; KNL_32-NEXT: vscatterdps %zmm0, (%eax,%zmm1,4) {%k1}
5090
+ ; KNL_32-NEXT: vzeroupper
5091
+ ; KNL_32-NEXT: retl
5092
+ ;
5093
+ ; SKX-LABEL: scaleidx_scatter_outofrange:
5094
+ ; SKX: # %bb.0:
5095
+ ; SKX-NEXT: vpslld $2, %ymm1, %ymm1
5096
+ ; SKX-NEXT: kmovw %esi, %k1
5097
+ ; SKX-NEXT: vscatterdps %ymm0, (%rdi,%ymm1,4) {%k1}
5098
+ ; SKX-NEXT: vzeroupper
5099
+ ; SKX-NEXT: retq
5100
+ ;
5101
+ ; SKX_32-LABEL: scaleidx_scatter_outofrange:
5102
+ ; SKX_32: # %bb.0:
5103
+ ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
5104
+ ; SKX_32-NEXT: vpslld $2, %ymm1, %ymm1
5105
+ ; SKX_32-NEXT: kmovb {{[0-9]+}}(%esp), %k1
5106
+ ; SKX_32-NEXT: vscatterdps %ymm0, (%eax,%ymm1,4) {%k1}
5107
+ ; SKX_32-NEXT: vzeroupper
5108
+ ; SKX_32-NEXT: retl
5109
+ %scaledindex = mul <8 x i32 > %index , <i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 >
5110
+ %ptrs = getelementptr float , float * %base , <8 x i32 > %scaledindex
5111
+ %mask = bitcast i8 %imask to <8 x i1 >
5112
+ call void @llvm.masked.scatter.v8f32.v8p0f32 (<8 x float > %value , <8 x float *> %ptrs , i32 2 , <8 x i1 > %mask )
5113
+ ret void
5114
+ }
5115
+ declare void @llvm.masked.scatter.v8f32.v8p0f32 (<8 x float >, <8 x float *>, i32 immarg, <8 x i1 >)
4899
5116
5117
+ ;
5118
+ ; PR45906
4900
5119
; This used to cause fast-isel to generate bad copy instructions that would
4901
5120
; cause an error in copyPhysReg.
5121
+ ;
5122
+
5123
+ %struct.foo = type { i8* , i64 , i16 , i16 , i32 }
5124
+
4902
5125
define <8 x i64 > @pr45906 (<8 x %struct.foo* > %ptr ) {
4903
5126
; KNL_64-LABEL: pr45906:
4904
5127
; KNL_64: # %bb.0: # %bb
0 commit comments