Skip to content

Commit 4554b5b

Browse files
committed
[X86][AVX] Add PR13310 test coverage
Show failure to fold scaled-index into gather/scatter scale operands
1 parent d024a01 commit 4554b5b

File tree

1 file changed

+224
-1
lines changed

1 file changed

+224
-1
lines changed

llvm/test/CodeGen/X86/masked_gather_scatter.ll

Lines changed: 224 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4895,10 +4895,233 @@ define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) {
48954895
ret void
48964896
}
48974897

4898-
%struct.foo = type { i8*, i64, i16, i16, i32 }
4898+
;
4899+
; PR13310
4900+
; FIXME: Failure to fold scaled-index into gather/scatter scale operand.
4901+
;
4902+
4903+
define <8 x float> @scaleidx_x86gather(float* %base, <8 x i32> %index, <8 x i32> %imask) nounwind {
4904+
; KNL_64-LABEL: scaleidx_x86gather:
4905+
; KNL_64: # %bb.0:
4906+
; KNL_64-NEXT: vpslld $2, %ymm0, %ymm2
4907+
; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0
4908+
; KNL_64-NEXT: vgatherdps %ymm1, (%rdi,%ymm2), %ymm0
4909+
; KNL_64-NEXT: retq
4910+
;
4911+
; KNL_32-LABEL: scaleidx_x86gather:
4912+
; KNL_32: # %bb.0:
4913+
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
4914+
; KNL_32-NEXT: vpslld $2, %ymm0, %ymm2
4915+
; KNL_32-NEXT: vpxor %xmm0, %xmm0, %xmm0
4916+
; KNL_32-NEXT: vgatherdps %ymm1, (%eax,%ymm2), %ymm0
4917+
; KNL_32-NEXT: retl
4918+
;
4919+
; SKX-LABEL: scaleidx_x86gather:
4920+
; SKX: # %bb.0:
4921+
; SKX-NEXT: vpslld $2, %ymm0, %ymm2
4922+
; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0
4923+
; SKX-NEXT: vgatherdps %ymm1, (%rdi,%ymm2), %ymm0
4924+
; SKX-NEXT: retq
4925+
;
4926+
; SKX_32-LABEL: scaleidx_x86gather:
4927+
; SKX_32: # %bb.0:
4928+
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
4929+
; SKX_32-NEXT: vpslld $2, %ymm0, %ymm2
4930+
; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0
4931+
; SKX_32-NEXT: vgatherdps %ymm1, (%eax,%ymm2), %ymm0
4932+
; SKX_32-NEXT: retl
4933+
%ptr = bitcast float* %base to i8*
4934+
%mask = bitcast <8 x i32> %imask to <8 x float>
4935+
%scaledindex = mul <8 x i32> %index, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
4936+
%v = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %ptr, <8 x i32> %scaledindex, <8 x float> %mask, i8 1) nounwind
4937+
ret <8 x float> %v
4938+
}
4939+
4940+
define <8 x float> @scaleidx_x86gather_outofrange(float* %base, <8 x i32> %index, <8 x i32> %imask) nounwind {
4941+
; KNL_64-LABEL: scaleidx_x86gather_outofrange:
4942+
; KNL_64: # %bb.0:
4943+
; KNL_64-NEXT: vpslld $2, %ymm0, %ymm2
4944+
; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0
4945+
; KNL_64-NEXT: vgatherdps %ymm1, (%rdi,%ymm2,4), %ymm0
4946+
; KNL_64-NEXT: retq
4947+
;
4948+
; KNL_32-LABEL: scaleidx_x86gather_outofrange:
4949+
; KNL_32: # %bb.0:
4950+
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
4951+
; KNL_32-NEXT: vpslld $2, %ymm0, %ymm2
4952+
; KNL_32-NEXT: vpxor %xmm0, %xmm0, %xmm0
4953+
; KNL_32-NEXT: vgatherdps %ymm1, (%eax,%ymm2,4), %ymm0
4954+
; KNL_32-NEXT: retl
4955+
;
4956+
; SKX-LABEL: scaleidx_x86gather_outofrange:
4957+
; SKX: # %bb.0:
4958+
; SKX-NEXT: vpslld $2, %ymm0, %ymm2
4959+
; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0
4960+
; SKX-NEXT: vgatherdps %ymm1, (%rdi,%ymm2,4), %ymm0
4961+
; SKX-NEXT: retq
4962+
;
4963+
; SKX_32-LABEL: scaleidx_x86gather_outofrange:
4964+
; SKX_32: # %bb.0:
4965+
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
4966+
; SKX_32-NEXT: vpslld $2, %ymm0, %ymm2
4967+
; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0
4968+
; SKX_32-NEXT: vgatherdps %ymm1, (%eax,%ymm2,4), %ymm0
4969+
; SKX_32-NEXT: retl
4970+
%ptr = bitcast float* %base to i8*
4971+
%mask = bitcast <8 x i32> %imask to <8 x float>
4972+
%scaledindex = mul <8 x i32> %index, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
4973+
%v = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %ptr, <8 x i32> %scaledindex, <8 x float> %mask, i8 4) nounwind
4974+
ret <8 x float> %v
4975+
}
4976+
declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) nounwind readonly
4977+
4978+
define void @scaleidx_x86scatter(<16 x float> %value, float* %base, <16 x i32> %index, i16 %imask) nounwind {
4979+
; KNL_64-LABEL: scaleidx_x86scatter:
4980+
; KNL_64: # %bb.0:
4981+
; KNL_64-NEXT: kmovw %esi, %k1
4982+
; KNL_64-NEXT: vpaddd %zmm1, %zmm1, %zmm1
4983+
; KNL_64-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,2) {%k1}
4984+
; KNL_64-NEXT: vzeroupper
4985+
; KNL_64-NEXT: retq
4986+
;
4987+
; KNL_32-LABEL: scaleidx_x86scatter:
4988+
; KNL_32: # %bb.0:
4989+
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
4990+
; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
4991+
; KNL_32-NEXT: vpaddd %zmm1, %zmm1, %zmm1
4992+
; KNL_32-NEXT: vscatterdps %zmm0, (%eax,%zmm1,2) {%k1}
4993+
; KNL_32-NEXT: vzeroupper
4994+
; KNL_32-NEXT: retl
4995+
;
4996+
; SKX-LABEL: scaleidx_x86scatter:
4997+
; SKX: # %bb.0:
4998+
; SKX-NEXT: kmovw %esi, %k1
4999+
; SKX-NEXT: vpaddd %zmm1, %zmm1, %zmm1
5000+
; SKX-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,2) {%k1}
5001+
; SKX-NEXT: vzeroupper
5002+
; SKX-NEXT: retq
5003+
;
5004+
; SKX_32-LABEL: scaleidx_x86scatter:
5005+
; SKX_32: # %bb.0:
5006+
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
5007+
; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
5008+
; SKX_32-NEXT: vpaddd %zmm1, %zmm1, %zmm1
5009+
; SKX_32-NEXT: vscatterdps %zmm0, (%eax,%zmm1,2) {%k1}
5010+
; SKX_32-NEXT: vzeroupper
5011+
; SKX_32-NEXT: retl
5012+
%ptr = bitcast float* %base to i8*
5013+
%mask = bitcast i16 %imask to <16 x i1>
5014+
%scaledindex = shl <16 x i32> %index, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
5015+
call void @llvm.x86.avx512.mask.scatter.dps.512(i8* %ptr, <16 x i1> %mask, <16 x i32> %scaledindex, <16 x float> %value, i32 2)
5016+
ret void
5017+
}
5018+
declare void @llvm.x86.avx512.mask.scatter.dps.512(i8*, <16 x i1>, <16 x i32>, <16 x float>, i32)
5019+
5020+
define void @scaleidx_scatter(<8 x float> %value, float* %base, <8 x i32> %index, i8 %imask) nounwind {
5021+
; KNL_64-LABEL: scaleidx_scatter:
5022+
; KNL_64: # %bb.0:
5023+
; KNL_64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
5024+
; KNL_64-NEXT: vpaddd %ymm1, %ymm1, %ymm1
5025+
; KNL_64-NEXT: kmovw %esi, %k0
5026+
; KNL_64-NEXT: kshiftlw $8, %k0, %k0
5027+
; KNL_64-NEXT: kshiftrw $8, %k0, %k1
5028+
; KNL_64-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,4) {%k1}
5029+
; KNL_64-NEXT: vzeroupper
5030+
; KNL_64-NEXT: retq
5031+
;
5032+
; KNL_32-LABEL: scaleidx_scatter:
5033+
; KNL_32: # %bb.0:
5034+
; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
5035+
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
5036+
; KNL_32-NEXT: vpaddd %ymm1, %ymm1, %ymm1
5037+
; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
5038+
; KNL_32-NEXT: kmovw %ecx, %k0
5039+
; KNL_32-NEXT: kshiftlw $8, %k0, %k0
5040+
; KNL_32-NEXT: kshiftrw $8, %k0, %k1
5041+
; KNL_32-NEXT: vscatterdps %zmm0, (%eax,%zmm1,4) {%k1}
5042+
; KNL_32-NEXT: vzeroupper
5043+
; KNL_32-NEXT: retl
5044+
;
5045+
; SKX-LABEL: scaleidx_scatter:
5046+
; SKX: # %bb.0:
5047+
; SKX-NEXT: vpaddd %ymm1, %ymm1, %ymm1
5048+
; SKX-NEXT: kmovw %esi, %k1
5049+
; SKX-NEXT: vscatterdps %ymm0, (%rdi,%ymm1,4) {%k1}
5050+
; SKX-NEXT: vzeroupper
5051+
; SKX-NEXT: retq
5052+
;
5053+
; SKX_32-LABEL: scaleidx_scatter:
5054+
; SKX_32: # %bb.0:
5055+
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
5056+
; SKX_32-NEXT: vpaddd %ymm1, %ymm1, %ymm1
5057+
; SKX_32-NEXT: kmovb {{[0-9]+}}(%esp), %k1
5058+
; SKX_32-NEXT: vscatterdps %ymm0, (%eax,%ymm1,4) {%k1}
5059+
; SKX_32-NEXT: vzeroupper
5060+
; SKX_32-NEXT: retl
5061+
%scaledindex = mul <8 x i32> %index, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5062+
%ptrs = getelementptr float, float* %base, <8 x i32> %scaledindex
5063+
%mask = bitcast i8 %imask to <8 x i1>
5064+
call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %value, <8 x float*> %ptrs, i32 1, <8 x i1> %mask)
5065+
ret void
5066+
}
5067+
5068+
define void @scaleidx_scatter_outofrange(<8 x float> %value, float* %base, <8 x i32> %index, i8 %imask) nounwind {
5069+
; KNL_64-LABEL: scaleidx_scatter_outofrange:
5070+
; KNL_64: # %bb.0:
5071+
; KNL_64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
5072+
; KNL_64-NEXT: vpslld $2, %ymm1, %ymm1
5073+
; KNL_64-NEXT: kmovw %esi, %k0
5074+
; KNL_64-NEXT: kshiftlw $8, %k0, %k0
5075+
; KNL_64-NEXT: kshiftrw $8, %k0, %k1
5076+
; KNL_64-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,4) {%k1}
5077+
; KNL_64-NEXT: vzeroupper
5078+
; KNL_64-NEXT: retq
5079+
;
5080+
; KNL_32-LABEL: scaleidx_scatter_outofrange:
5081+
; KNL_32: # %bb.0:
5082+
; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
5083+
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
5084+
; KNL_32-NEXT: vpslld $2, %ymm1, %ymm1
5085+
; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
5086+
; KNL_32-NEXT: kmovw %ecx, %k0
5087+
; KNL_32-NEXT: kshiftlw $8, %k0, %k0
5088+
; KNL_32-NEXT: kshiftrw $8, %k0, %k1
5089+
; KNL_32-NEXT: vscatterdps %zmm0, (%eax,%zmm1,4) {%k1}
5090+
; KNL_32-NEXT: vzeroupper
5091+
; KNL_32-NEXT: retl
5092+
;
5093+
; SKX-LABEL: scaleidx_scatter_outofrange:
5094+
; SKX: # %bb.0:
5095+
; SKX-NEXT: vpslld $2, %ymm1, %ymm1
5096+
; SKX-NEXT: kmovw %esi, %k1
5097+
; SKX-NEXT: vscatterdps %ymm0, (%rdi,%ymm1,4) {%k1}
5098+
; SKX-NEXT: vzeroupper
5099+
; SKX-NEXT: retq
5100+
;
5101+
; SKX_32-LABEL: scaleidx_scatter_outofrange:
5102+
; SKX_32: # %bb.0:
5103+
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
5104+
; SKX_32-NEXT: vpslld $2, %ymm1, %ymm1
5105+
; SKX_32-NEXT: kmovb {{[0-9]+}}(%esp), %k1
5106+
; SKX_32-NEXT: vscatterdps %ymm0, (%eax,%ymm1,4) {%k1}
5107+
; SKX_32-NEXT: vzeroupper
5108+
; SKX_32-NEXT: retl
5109+
%scaledindex = mul <8 x i32> %index, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
5110+
%ptrs = getelementptr float, float* %base, <8 x i32> %scaledindex
5111+
%mask = bitcast i8 %imask to <8 x i1>
5112+
call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %value, <8 x float*> %ptrs, i32 2, <8 x i1> %mask)
5113+
ret void
5114+
}
5115+
declare void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float>, <8 x float*>, i32 immarg, <8 x i1>)
48995116

5117+
;
5118+
; PR45906
49005119
; This used to cause fast-isel to generate bad copy instructions that would
49015120
; cause an error in copyPhysReg.
5121+
;
5122+
5123+
%struct.foo = type { i8*, i64, i16, i16, i32 }
5124+
49025125
define <8 x i64> @pr45906(<8 x %struct.foo*> %ptr) {
49035126
; KNL_64-LABEL: pr45906:
49045127
; KNL_64: # %bb.0: # %bb

0 commit comments

Comments
 (0)