Skip to content

Commit 8359dbc

Browse files
authored
[X86] combineEXTRACT_SUBVECTOR - fold extract_subvector(subv_broadcast_load(ptr),0) -> load(ptr) (#126523)
This is typically handled by SimplifyDemandedVectorElts, but this will fail when there are multiple uses of the subv_broadcast_load node, but if there's just one use of the load result (and the rest are uses of the memory chain), we can still replace with a load and update the chain accordingly. Noticed on #126517
1 parent 20506a0 commit 8359dbc

File tree

3 files changed

+38
-26
lines changed

3 files changed

+38
-26
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+20-4
Original file line numberDiff line numberDiff line change
@@ -58485,10 +58485,26 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5848558485
DAG.isSplatValue(InVec, /*AllowUndefs*/ false)))
5848658486
return extractSubVector(InVec, 0, DAG, DL, SizeInBits);
5848758487

58488-
// If we're extracting a broadcasted subvector, just use the lowest subvector.
58489-
if (IdxVal != 0 && InVec.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
58490-
cast<MemIntrinsicSDNode>(InVec)->getMemoryVT() == VT)
58491-
return extractSubVector(InVec, 0, DAG, DL, SizeInBits);
58488+
// Check if we're extracting a whole broadcasted subvector.
58489+
if (InVec.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
58490+
auto *MemIntr = cast<MemIntrinsicSDNode>(InVec);
58491+
EVT MemVT = MemIntr->getMemoryVT();
58492+
if (MemVT == VT) {
58493+
// Just use the lowest subvector.
58494+
if (IdxVal != 0)
58495+
return extractSubVector(InVec, 0, DAG, DL, SizeInBits);
58496+
// If this is the only use, we can replace with a regular load (this may
58497+
// have been missed by SimplifyDemandedVectorElts due to extra uses of the
58498+
// memory chain).
58499+
if (InVec.hasOneUse()) {
58500+
SDValue Ld =
58501+
DAG.getLoad(MemVT, DL, MemIntr->getChain(), MemIntr->getBasePtr(),
58502+
MemIntr->getMemOperand());
58503+
DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1), Ld.getValue(1));
58504+
return Ld;
58505+
}
58506+
}
58507+
}
5849258508

5849358509
// Attempt to extract from the source of a shuffle vector.
5849458510
if ((InSizeInBits % SizeInBits) == 0 && (IdxVal % NumSubElts) == 0) {

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll

+8-10
Original file line numberDiff line numberDiff line change
@@ -3634,19 +3634,18 @@ define void @vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3(ptr %i
36343634
;
36353635
; AVX-LABEL: vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3:
36363636
; AVX: # %bb.0:
3637-
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
3638-
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],mem[1,2,3]
3637+
; AVX-NEXT: vmovdqa (%rdi), %xmm0
3638+
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],mem[2,3,4,5,6,7]
36393639
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
36403640
; AVX-NEXT: vmovdqa (%rdi), %xmm2
36413641
; AVX-NEXT: vmovdqa 16(%rdi), %xmm3
36423642
; AVX-NEXT: vpaddb 48(%rsi), %xmm3, %xmm3
3643-
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
36443643
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
3645-
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
3644+
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
36463645
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
3646+
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
36473647
; AVX-NEXT: vmovdqa %xmm3, 48(%rdx)
36483648
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
3649-
; AVX-NEXT: vzeroupper
36503649
; AVX-NEXT: retq
36513650
;
36523651
; AVX2-LABEL: vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3:
@@ -3820,19 +3819,18 @@ define void @vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3(ptr %i
38203819
;
38213820
; AVX-LABEL: vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3:
38223821
; AVX: # %bb.0:
3823-
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
3824-
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0,1],mem[2,3]
3822+
; AVX-NEXT: vmovdqa (%rdi), %xmm0
3823+
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],mem[4,5,6,7]
38253824
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
38263825
; AVX-NEXT: vmovdqa (%rdi), %xmm2
38273826
; AVX-NEXT: vmovdqa 16(%rdi), %xmm3
38283827
; AVX-NEXT: vpaddb 48(%rsi), %xmm3, %xmm3
3829-
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
38303828
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
3831-
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
3829+
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
38323830
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
3831+
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
38333832
; AVX-NEXT: vmovdqa %xmm3, 48(%rdx)
38343833
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
3835-
; AVX-NEXT: vzeroupper
38363834
; AVX-NEXT: retq
38373835
;
38383836
; AVX2-LABEL: vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3:

llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll

+10-12
Original file line numberDiff line numberDiff line change
@@ -4044,18 +4044,17 @@ define void @vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3(ptr %i
40444044
;
40454045
; AVX-LABEL: vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3:
40464046
; AVX: # %bb.0:
4047-
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
4048-
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],mem[1,2,3]
4047+
; AVX-NEXT: vmovdqa (%rdi), %xmm0
4048+
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],mem[2,3,4,5,6,7]
40494049
; AVX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
4050-
; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
4051-
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3]
4052-
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
40534050
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
4051+
; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
4052+
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3,4,5,6,7]
4053+
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
40544054
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
40554055
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
4056-
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
40574056
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
4058-
; AVX-NEXT: vzeroupper
4057+
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
40594058
; AVX-NEXT: retq
40604059
;
40614060
; AVX2-LABEL: vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3:
@@ -4263,17 +4262,16 @@ define void @vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3(ptr %i
42634262
;
42644263
; AVX-LABEL: vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3:
42654264
; AVX: # %bb.0:
4266-
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
4267-
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0,1],mem[2,3]
4265+
; AVX-NEXT: vmovdqa (%rdi), %xmm0
4266+
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],mem[4,5,6,7]
42684267
; AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
4268+
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
42694269
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
42704270
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
4271-
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
42724271
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
42734272
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
4274-
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
42754273
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
4276-
; AVX-NEXT: vzeroupper
4274+
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
42774275
; AVX-NEXT: retq
42784276
;
42794277
; AVX2-LABEL: vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3:

0 commit comments

Comments
 (0)