Skip to content

Commit befa925

Browse files
author
Rin
authored
[MachineLICM][AArch64] Hoist COPY instructions with other uses in the loop (#71403)
When there is a COPY instruction in the loop with other uses, we want to hoist the COPY, which in turn leads to the users being hoisted as well. Co-authored-by David Green : [email protected]
1 parent f7b5c25 commit befa925

27 files changed

+3270
-3571
lines changed

llvm/lib/CodeGen/MachineLICM.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,6 +1262,18 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI,
12621262
return false;
12631263
}
12641264

1265+
// If we have a COPY with other uses in the loop, hoist to allow the users to
1266+
// also be hoisted.
1267+
if (MI.isCopy() && MI.getOperand(0).isReg() &&
1268+
MI.getOperand(0).getReg().isVirtual() && MI.getOperand(1).isReg() &&
1269+
MI.getOperand(1).getReg().isVirtual() &&
1270+
IsLoopInvariantInst(MI, CurLoop) &&
1271+
any_of(MRI->use_nodbg_instructions(MI.getOperand(0).getReg()),
1272+
[&CurLoop](MachineInstr &UseMI) {
1273+
return CurLoop->contains(&UseMI);
1274+
}))
1275+
return true;
1276+
12651277
// High register pressure situation, only hoist if the instruction is going
12661278
// to be remat'ed.
12671279
if (!isTriviallyReMaterializable(MI) &&

llvm/test/CodeGen/AArch64/tbl-loops.ll

Lines changed: 63 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -52,19 +52,19 @@ define void @loop1(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
5252
; CHECK-NEXT: b.eq .LBB0_8
5353
; CHECK-NEXT: .LBB0_6: // %for.body.preheader1
5454
; CHECK-NEXT: movi d0, #0000000000000000
55-
; CHECK-NEXT: sub w10, w2, w10
5655
; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000
56+
; CHECK-NEXT: sub w10, w2, w10
57+
; CHECK-NEXT: fmov s1, w11
5758
; CHECK-NEXT: .LBB0_7: // %for.body
5859
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
59-
; CHECK-NEXT: fmov s2, w11
60-
; CHECK-NEXT: ldr s1, [x8], #4
61-
; CHECK-NEXT: fcmp s1, s2
62-
; CHECK-NEXT: fcsel s2, s2, s1, gt
63-
; CHECK-NEXT: fcmp s1, #0.0
64-
; CHECK-NEXT: fcsel s1, s0, s2, mi
60+
; CHECK-NEXT: ldr s2, [x8], #4
61+
; CHECK-NEXT: fcmp s2, s1
62+
; CHECK-NEXT: fcsel s3, s1, s2, gt
63+
; CHECK-NEXT: fcmp s2, #0.0
64+
; CHECK-NEXT: fcsel s2, s0, s3, mi
6565
; CHECK-NEXT: subs w10, w10, #1
66-
; CHECK-NEXT: fcvtzs w12, s1
67-
; CHECK-NEXT: strb w12, [x9], #1
66+
; CHECK-NEXT: fcvtzs w11, s2
67+
; CHECK-NEXT: strb w11, [x9], #1
6868
; CHECK-NEXT: b.ne .LBB0_7
6969
; CHECK-NEXT: .LBB0_8: // %for.cond.cleanup
7070
; CHECK-NEXT: ret
@@ -165,25 +165,25 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
165165
; CHECK-NEXT: mov x9, x0
166166
; CHECK-NEXT: .LBB1_5: // %for.body.preheader1
167167
; CHECK-NEXT: movi d0, #0000000000000000
168-
; CHECK-NEXT: sub w10, w2, w10
169168
; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000
169+
; CHECK-NEXT: sub w10, w2, w10
170+
; CHECK-NEXT: fmov s1, w11
170171
; CHECK-NEXT: .LBB1_6: // %for.body
171172
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
172-
; CHECK-NEXT: ldp s1, s3, [x8], #8
173-
; CHECK-NEXT: fmov s2, w11
174-
; CHECK-NEXT: fcmp s1, s2
175-
; CHECK-NEXT: fcsel s4, s2, s1, gt
176-
; CHECK-NEXT: fcmp s1, #0.0
177-
; CHECK-NEXT: fcsel s1, s0, s4, mi
178-
; CHECK-NEXT: fcmp s3, s2
179-
; CHECK-NEXT: fcsel s2, s2, s3, gt
173+
; CHECK-NEXT: ldp s2, s3, [x8], #8
174+
; CHECK-NEXT: fcmp s2, s1
175+
; CHECK-NEXT: fcsel s4, s1, s2, gt
176+
; CHECK-NEXT: fcmp s2, #0.0
177+
; CHECK-NEXT: fcsel s2, s0, s4, mi
178+
; CHECK-NEXT: fcmp s3, s1
179+
; CHECK-NEXT: fcsel s4, s1, s3, gt
180180
; CHECK-NEXT: fcmp s3, #0.0
181-
; CHECK-NEXT: fcvtzs w12, s1
182-
; CHECK-NEXT: fcsel s2, s0, s2, mi
181+
; CHECK-NEXT: fcvtzs w11, s2
182+
; CHECK-NEXT: fcsel s3, s0, s4, mi
183183
; CHECK-NEXT: subs w10, w10, #1
184-
; CHECK-NEXT: strb w12, [x9]
185-
; CHECK-NEXT: fcvtzs w13, s2
186-
; CHECK-NEXT: strb w13, [x9, #1]
184+
; CHECK-NEXT: strb w11, [x9]
185+
; CHECK-NEXT: fcvtzs w12, s3
186+
; CHECK-NEXT: strb w12, [x9, #1]
187187
; CHECK-NEXT: add x9, x9, #2
188188
; CHECK-NEXT: b.ne .LBB1_6
189189
; CHECK-NEXT: .LBB1_7: // %for.cond.cleanup
@@ -380,33 +380,33 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
380380
; CHECK-NEXT: mov x9, x0
381381
; CHECK-NEXT: .LBB2_7: // %for.body.preheader1
382382
; CHECK-NEXT: movi d0, #0000000000000000
383-
; CHECK-NEXT: sub w10, w2, w10
384383
; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000
384+
; CHECK-NEXT: sub w10, w2, w10
385+
; CHECK-NEXT: fmov s1, w11
385386
; CHECK-NEXT: .LBB2_8: // %for.body
386387
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
387-
; CHECK-NEXT: ldp s1, s3, [x8]
388-
; CHECK-NEXT: fmov s2, w11
389-
; CHECK-NEXT: fcmp s1, s2
390-
; CHECK-NEXT: fcsel s4, s2, s1, gt
391-
; CHECK-NEXT: fcmp s1, #0.0
392-
; CHECK-NEXT: fcsel s1, s0, s4, mi
393-
; CHECK-NEXT: fcmp s3, s2
394-
; CHECK-NEXT: fcsel s4, s2, s3, gt
388+
; CHECK-NEXT: ldp s2, s3, [x8]
389+
; CHECK-NEXT: fcmp s2, s1
390+
; CHECK-NEXT: fcsel s4, s1, s2, gt
391+
; CHECK-NEXT: fcmp s2, #0.0
392+
; CHECK-NEXT: fcsel s2, s0, s4, mi
393+
; CHECK-NEXT: fcmp s3, s1
394+
; CHECK-NEXT: fcsel s4, s1, s3, gt
395395
; CHECK-NEXT: fcmp s3, #0.0
396396
; CHECK-NEXT: ldr s3, [x8, #8]
397-
; CHECK-NEXT: fcvtzs w12, s1
397+
; CHECK-NEXT: fcvtzs w11, s2
398398
; CHECK-NEXT: add x8, x8, #12
399399
; CHECK-NEXT: fcsel s4, s0, s4, mi
400-
; CHECK-NEXT: fcmp s3, s2
401-
; CHECK-NEXT: strb w12, [x9]
402-
; CHECK-NEXT: fcsel s2, s2, s3, gt
400+
; CHECK-NEXT: fcmp s3, s1
401+
; CHECK-NEXT: strb w11, [x9]
402+
; CHECK-NEXT: fcsel s5, s1, s3, gt
403403
; CHECK-NEXT: fcmp s3, #0.0
404-
; CHECK-NEXT: fcvtzs w13, s4
405-
; CHECK-NEXT: fcsel s2, s0, s2, mi
404+
; CHECK-NEXT: fcvtzs w12, s4
405+
; CHECK-NEXT: fcsel s3, s0, s5, mi
406406
; CHECK-NEXT: subs w10, w10, #1
407-
; CHECK-NEXT: strb w13, [x9, #1]
408-
; CHECK-NEXT: fcvtzs w14, s2
409-
; CHECK-NEXT: strb w14, [x9, #2]
407+
; CHECK-NEXT: strb w12, [x9, #1]
408+
; CHECK-NEXT: fcvtzs w13, s3
409+
; CHECK-NEXT: strb w13, [x9, #2]
410410
; CHECK-NEXT: add x9, x9, #3
411411
; CHECK-NEXT: b.ne .LBB2_8
412412
; CHECK-NEXT: .LBB2_9: // %for.cond.cleanup
@@ -549,39 +549,39 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
549549
; CHECK-NEXT: mov x9, x0
550550
; CHECK-NEXT: .LBB3_5: // %for.body.preheader1
551551
; CHECK-NEXT: movi d0, #0000000000000000
552-
; CHECK-NEXT: sub w10, w2, w10
553552
; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000
553+
; CHECK-NEXT: sub w10, w2, w10
554+
; CHECK-NEXT: fmov s1, w11
554555
; CHECK-NEXT: .LBB3_6: // %for.body
555556
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
556-
; CHECK-NEXT: ldp s1, s3, [x8]
557-
; CHECK-NEXT: fmov s2, w11
558-
; CHECK-NEXT: fcmp s1, s2
559-
; CHECK-NEXT: fcsel s4, s2, s1, gt
560-
; CHECK-NEXT: fcmp s1, #0.0
561-
; CHECK-NEXT: fcsel s1, s0, s4, mi
562-
; CHECK-NEXT: fcmp s3, s2
563-
; CHECK-NEXT: fcsel s4, s2, s3, gt
557+
; CHECK-NEXT: ldp s2, s3, [x8]
558+
; CHECK-NEXT: fcmp s2, s1
559+
; CHECK-NEXT: fcsel s4, s1, s2, gt
560+
; CHECK-NEXT: fcmp s2, #0.0
561+
; CHECK-NEXT: fcsel s2, s0, s4, mi
562+
; CHECK-NEXT: fcmp s3, s1
563+
; CHECK-NEXT: fcsel s4, s1, s3, gt
564564
; CHECK-NEXT: fcmp s3, #0.0
565565
; CHECK-NEXT: ldp s3, s5, [x8, #8]
566-
; CHECK-NEXT: fcvtzs w12, s1
566+
; CHECK-NEXT: fcvtzs w11, s2
567567
; CHECK-NEXT: add x8, x8, #16
568568
; CHECK-NEXT: fcsel s4, s0, s4, mi
569-
; CHECK-NEXT: fcmp s3, s2
570-
; CHECK-NEXT: strb w12, [x9]
571-
; CHECK-NEXT: fcsel s6, s2, s3, gt
569+
; CHECK-NEXT: fcmp s3, s1
570+
; CHECK-NEXT: strb w11, [x9]
571+
; CHECK-NEXT: fcsel s6, s1, s3, gt
572572
; CHECK-NEXT: fcmp s3, #0.0
573-
; CHECK-NEXT: fcvtzs w13, s4
573+
; CHECK-NEXT: fcvtzs w12, s4
574574
; CHECK-NEXT: fcsel s3, s0, s6, mi
575-
; CHECK-NEXT: fcmp s5, s2
576-
; CHECK-NEXT: strb w13, [x9, #1]
577-
; CHECK-NEXT: fcsel s2, s2, s5, gt
575+
; CHECK-NEXT: fcmp s5, s1
576+
; CHECK-NEXT: strb w12, [x9, #1]
577+
; CHECK-NEXT: fcsel s6, s1, s5, gt
578578
; CHECK-NEXT: fcmp s5, #0.0
579-
; CHECK-NEXT: fcvtzs w14, s3
580-
; CHECK-NEXT: fcsel s2, s0, s2, mi
579+
; CHECK-NEXT: fcvtzs w13, s3
580+
; CHECK-NEXT: fcsel s5, s0, s6, mi
581581
; CHECK-NEXT: subs w10, w10, #1
582-
; CHECK-NEXT: strb w14, [x9, #2]
583-
; CHECK-NEXT: fcvtzs w15, s2
584-
; CHECK-NEXT: strb w15, [x9, #3]
582+
; CHECK-NEXT: strb w13, [x9, #2]
583+
; CHECK-NEXT: fcvtzs w14, s5
584+
; CHECK-NEXT: strb w14, [x9, #3]
585585
; CHECK-NEXT: add x9, x9, #4
586586
; CHECK-NEXT: b.ne .LBB3_6
587587
; CHECK-NEXT: .LBB3_7: // %for.cond.cleanup

llvm/test/CodeGen/AArch64/zext-to-tbl.ll

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2756,37 +2756,39 @@ exit:
27562756
define i32 @test_pr62620_widening_instr(ptr %p1, ptr %p2, i64 %lx, i32 %h) {
27572757
; CHECK-LABEL: test_pr62620_widening_instr:
27582758
; CHECK: ; %bb.0: ; %entry
2759-
; CHECK-NEXT: lsl x8, x2, #4
2760-
; CHECK-NEXT: ldr q0, [x0, x8]
2761-
; CHECK-NEXT: ldr q1, [x1, x8]
2759+
; CHECK-NEXT: lsl x9, x2, #4
2760+
; CHECK-NEXT: mov x8, x0
27622761
; CHECK-NEXT: mov w0, wzr
2762+
; CHECK-NEXT: ldr q0, [x8, x9]
2763+
; CHECK-NEXT: ldr q1, [x1, x9]
2764+
; CHECK-NEXT: uabdl.8h v2, v0, v1
2765+
; CHECK-NEXT: uabal2.8h v2, v0, v1
2766+
; CHECK-NEXT: uaddlv.8h s0, v2
2767+
; CHECK-NEXT: fmov w8, s0
27632768
; CHECK-NEXT: LBB23_1: ; %loop
27642769
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
2765-
; CHECK-NEXT: uabdl.8h v2, v0, v1
27662770
; CHECK-NEXT: subs w3, w3, #1
2767-
; CHECK-NEXT: uabal2.8h v2, v0, v1
2768-
; CHECK-NEXT: uaddlv.8h s2, v2
2769-
; CHECK-NEXT: fmov w8, s2
27702771
; CHECK-NEXT: add w0, w8, w0
27712772
; CHECK-NEXT: b.ne LBB23_1
27722773
; CHECK-NEXT: ; %bb.2: ; %exit
27732774
; CHECK-NEXT: ret
27742775
;
27752776
; CHECK-BE-LABEL: test_pr62620_widening_instr:
27762777
; CHECK-BE: // %bb.0: // %entry
2777-
; CHECK-BE-NEXT: lsl x8, x2, #4
2778-
; CHECK-BE-NEXT: add x9, x0, x8
2779-
; CHECK-BE-NEXT: add x8, x1, x8
2778+
; CHECK-BE-NEXT: lsl x9, x2, #4
2779+
; CHECK-BE-NEXT: mov x8, x0
27802780
; CHECK-BE-NEXT: mov w0, wzr
2781-
; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
2782-
; CHECK-BE-NEXT: ld1 { v1.16b }, [x8]
2781+
; CHECK-BE-NEXT: add x8, x8, x9
2782+
; CHECK-BE-NEXT: add x9, x1, x9
2783+
; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
2784+
; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
2785+
; CHECK-BE-NEXT: uabdl v2.8h, v0.8b, v1.8b
2786+
; CHECK-BE-NEXT: uabal2 v2.8h, v0.16b, v1.16b
2787+
; CHECK-BE-NEXT: uaddlv s0, v2.8h
2788+
; CHECK-BE-NEXT: fmov w8, s0
27832789
; CHECK-BE-NEXT: .LBB23_1: // %loop
27842790
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
2785-
; CHECK-BE-NEXT: uabdl v2.8h, v0.8b, v1.8b
27862791
; CHECK-BE-NEXT: subs w3, w3, #1
2787-
; CHECK-BE-NEXT: uabal2 v2.8h, v0.16b, v1.16b
2788-
; CHECK-BE-NEXT: uaddlv s2, v2.8h
2789-
; CHECK-BE-NEXT: fmov w8, s2
27902792
; CHECK-BE-NEXT: add w0, w8, w0
27912793
; CHECK-BE-NEXT: b.ne .LBB23_1
27922794
; CHECK-BE-NEXT: // %bb.2: // %exit

0 commit comments

Comments
 (0)