Skip to content

Commit d8d0588

Browse files
authored
[TwoAddressInstruction] Update LiveIntervals after INSERT_SUBREG with undef read (#66211)
Update LiveIntervals after rewriting: %reg = INSERT_SUBREG undef %reg, %subreg, subidx to: undef %reg:subidx = COPY %subreg D113044 implemented this for the non-undef case.
1 parent dd6dde1 commit d8d0588

File tree

4 files changed

+1019
-503
lines changed

4 files changed

+1019
-503
lines changed

llvm/lib/CodeGen/TwoAddressInstructionPass.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1868,12 +1868,16 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
18681868
// %reg.subidx.
18691869
LaneBitmask LaneMask =
18701870
TRI->getSubRegIndexLaneMask(mi->getOperand(0).getSubReg());
1871-
SlotIndex Idx = LIS->getInstructionIndex(*mi);
1871+
SlotIndex Idx = LIS->getInstructionIndex(*mi).getRegSlot();
18721872
for (auto &S : LI.subranges()) {
18731873
if ((S.LaneMask & LaneMask).none()) {
1874-
LiveRange::iterator UseSeg = S.FindSegmentContaining(Idx);
1875-
LiveRange::iterator DefSeg = std::next(UseSeg);
1876-
S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno);
1874+
LiveRange::iterator DefSeg = S.FindSegmentContaining(Idx);
1875+
if (mi->getOperand(0).isUndef()) {
1876+
S.removeValNo(DefSeg->valno);
1877+
} else {
1878+
LiveRange::iterator UseSeg = std::prev(DefSeg);
1879+
S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno);
1880+
}
18771881
}
18781882
}
18791883

llvm/test/CodeGen/Thumb2/mve-fmath.ll

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
3-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV
3+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV
4+
; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS
5+
; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS
46

57
define arm_aapcs_vfpcc <4 x float> @sqrt_float32_t(<4 x float> %src) {
68
; CHECK-LABEL: sqrt_float32_t:
@@ -1085,21 +1087,37 @@ entry:
10851087
}
10861088

10871089
define arm_aapcs_vfpcc <2 x double> @copysign_float64_t(<2 x double> %src1, <2 x double> %src2) {
1088-
; CHECK-LABEL: copysign_float64_t:
1089-
; CHECK: @ %bb.0: @ %entry
1090-
; CHECK-NEXT: .save {r7, lr}
1091-
; CHECK-NEXT: push {r7, lr}
1092-
; CHECK-NEXT: vmov r0, r1, d3
1093-
; CHECK-NEXT: vmov r0, lr, d2
1094-
; CHECK-NEXT: vmov r0, r3, d1
1095-
; CHECK-NEXT: vmov r12, r2, d0
1096-
; CHECK-NEXT: lsrs r1, r1, #31
1097-
; CHECK-NEXT: bfi r3, r1, #31, #1
1098-
; CHECK-NEXT: lsr.w r1, lr, #31
1099-
; CHECK-NEXT: bfi r2, r1, #31, #1
1100-
; CHECK-NEXT: vmov d1, r0, r3
1101-
; CHECK-NEXT: vmov d0, r12, r2
1102-
; CHECK-NEXT: pop {r7, pc}
1090+
; CHECK-LV-LABEL: copysign_float64_t:
1091+
; CHECK-LV: @ %bb.0: @ %entry
1092+
; CHECK-LV-NEXT: .save {r7, lr}
1093+
; CHECK-LV-NEXT: push {r7, lr}
1094+
; CHECK-LV-NEXT: vmov r0, r1, d3
1095+
; CHECK-LV-NEXT: vmov r0, lr, d2
1096+
; CHECK-LV-NEXT: vmov r0, r3, d1
1097+
; CHECK-LV-NEXT: vmov r12, r2, d0
1098+
; CHECK-LV-NEXT: lsrs r1, r1, #31
1099+
; CHECK-LV-NEXT: bfi r3, r1, #31, #1
1100+
; CHECK-LV-NEXT: lsr.w r1, lr, #31
1101+
; CHECK-LV-NEXT: bfi r2, r1, #31, #1
1102+
; CHECK-LV-NEXT: vmov d1, r0, r3
1103+
; CHECK-LV-NEXT: vmov d0, r12, r2
1104+
; CHECK-LV-NEXT: pop {r7, pc}
1105+
;
1106+
; CHECK-LIS-LABEL: copysign_float64_t:
1107+
; CHECK-LIS: @ %bb.0: @ %entry
1108+
; CHECK-LIS-NEXT: .save {r4, lr}
1109+
; CHECK-LIS-NEXT: push {r4, lr}
1110+
; CHECK-LIS-NEXT: vmov r0, r12, d3
1111+
; CHECK-LIS-NEXT: vmov r0, lr, d2
1112+
; CHECK-LIS-NEXT: vmov r4, r3, d1
1113+
; CHECK-LIS-NEXT: vmov r1, r2, d0
1114+
; CHECK-LIS-NEXT: lsr.w r0, r12, #31
1115+
; CHECK-LIS-NEXT: bfi r3, r0, #31, #1
1116+
; CHECK-LIS-NEXT: lsr.w r0, lr, #31
1117+
; CHECK-LIS-NEXT: bfi r2, r0, #31, #1
1118+
; CHECK-LIS-NEXT: vmov d1, r4, r3
1119+
; CHECK-LIS-NEXT: vmov d0, r1, r2
1120+
; CHECK-LIS-NEXT: pop {r4, pc}
11031121
entry:
11041122
%0 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %src1, <2 x double> %src2)
11051123
ret <2 x double> %0

llvm/test/CodeGen/Thumb2/mve-shuffle.ll

Lines changed: 127 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
3-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECKFP
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV
3+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV,CHECKFP
4+
; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS
5+
; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS,CHECKFP
46

57
define arm_aapcs_vfpcc <4 x i32> @shuffle1_i32(<4 x i32> %src) {
68
; CHECK-LABEL: shuffle1_i32:
@@ -221,18 +223,31 @@ entry:
221223
}
222224

223225
define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) {
224-
; CHECK-LABEL: shuffle3_i16:
225-
; CHECK: @ %bb.0: @ %entry
226-
; CHECK-NEXT: vmov q1, q0
227-
; CHECK-NEXT: vmovx.f16 s2, s5
228-
; CHECK-NEXT: vmovx.f16 s0, s4
229-
; CHECK-NEXT: vins.f16 s5, s4
230-
; CHECK-NEXT: vins.f16 s2, s0
231-
; CHECK-NEXT: vmov.f32 s3, s5
232-
; CHECK-NEXT: vmovx.f16 s1, s7
233-
; CHECK-NEXT: vmov.f32 s0, s6
234-
; CHECK-NEXT: vins.f16 s1, s7
235-
; CHECK-NEXT: bx lr
226+
; CHECK-LV-LABEL: shuffle3_i16:
227+
; CHECK-LV: @ %bb.0: @ %entry
228+
; CHECK-LV-NEXT: vmov q1, q0
229+
; CHECK-LV-NEXT: vmovx.f16 s2, s5
230+
; CHECK-LV-NEXT: vmovx.f16 s0, s4
231+
; CHECK-LV-NEXT: vins.f16 s5, s4
232+
; CHECK-LV-NEXT: vins.f16 s2, s0
233+
; CHECK-LV-NEXT: vmov.f32 s3, s5
234+
; CHECK-LV-NEXT: vmovx.f16 s1, s7
235+
; CHECK-LV-NEXT: vmov.f32 s0, s6
236+
; CHECK-LV-NEXT: vins.f16 s1, s7
237+
; CHECK-LV-NEXT: bx lr
238+
;
239+
; CHECK-LIS-LABEL: shuffle3_i16:
240+
; CHECK-LIS: @ %bb.0: @ %entry
241+
; CHECK-LIS-NEXT: vmovx.f16 s5, s3
242+
; CHECK-LIS-NEXT: vmovx.f16 s6, s1
243+
; CHECK-LIS-NEXT: vmovx.f16 s4, s0
244+
; CHECK-LIS-NEXT: vins.f16 s1, s0
245+
; CHECK-LIS-NEXT: vins.f16 s6, s4
246+
; CHECK-LIS-NEXT: vins.f16 s5, s3
247+
; CHECK-LIS-NEXT: vmov.f32 s7, s1
248+
; CHECK-LIS-NEXT: vmov.f32 s4, s2
249+
; CHECK-LIS-NEXT: vmov q0, q1
250+
; CHECK-LIS-NEXT: bx lr
236251
entry:
237252
%out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
238253
ret <8 x i16> %out
@@ -476,42 +491,79 @@ entry:
476491
}
477492

478493
define arm_aapcs_vfpcc <16 x i8> @shuffle3_i8(<16 x i8> %src) {
479-
; CHECK-LABEL: shuffle3_i8:
480-
; CHECK: @ %bb.0: @ %entry
481-
; CHECK-NEXT: vmov q1, q0
482-
; CHECK-NEXT: vmov.u8 r0, q0[4]
483-
; CHECK-NEXT: vmov.8 q0[0], r0
484-
; CHECK-NEXT: vmov.u8 r0, q1[5]
485-
; CHECK-NEXT: vmov.8 q0[1], r0
486-
; CHECK-NEXT: vmov.u8 r0, q1[15]
487-
; CHECK-NEXT: vmov.8 q0[2], r0
488-
; CHECK-NEXT: vmov.u8 r0, q1[7]
489-
; CHECK-NEXT: vmov.8 q0[3], r0
490-
; CHECK-NEXT: vmov.u8 r0, q1[14]
491-
; CHECK-NEXT: vmov.8 q0[4], r0
492-
; CHECK-NEXT: vmov.u8 r0, q1[9]
493-
; CHECK-NEXT: vmov.8 q0[5], r0
494-
; CHECK-NEXT: vmov.u8 r0, q1[6]
495-
; CHECK-NEXT: vmov.8 q0[6], r0
496-
; CHECK-NEXT: vmov.u8 r0, q1[3]
497-
; CHECK-NEXT: vmov.8 q0[7], r0
498-
; CHECK-NEXT: vmov.u8 r0, q1[10]
499-
; CHECK-NEXT: vmov.8 q0[8], r0
500-
; CHECK-NEXT: vmov.u8 r0, q1[12]
501-
; CHECK-NEXT: vmov.8 q0[9], r0
502-
; CHECK-NEXT: vmov.u8 r0, q1[1]
503-
; CHECK-NEXT: vmov.8 q0[10], r0
504-
; CHECK-NEXT: vmov.u8 r0, q1[13]
505-
; CHECK-NEXT: vmov.8 q0[11], r0
506-
; CHECK-NEXT: vmov.u8 r0, q1[2]
507-
; CHECK-NEXT: vmov.8 q0[12], r0
508-
; CHECK-NEXT: vmov.u8 r0, q1[8]
509-
; CHECK-NEXT: vmov.8 q0[13], r0
510-
; CHECK-NEXT: vmov.u8 r0, q1[0]
511-
; CHECK-NEXT: vmov.8 q0[14], r0
512-
; CHECK-NEXT: vmov.u8 r0, q1[11]
513-
; CHECK-NEXT: vmov.8 q0[15], r0
514-
; CHECK-NEXT: bx lr
494+
; CHECK-LV-LABEL: shuffle3_i8:
495+
; CHECK-LV: @ %bb.0: @ %entry
496+
; CHECK-LV-NEXT: vmov q1, q0
497+
; CHECK-LV-NEXT: vmov.u8 r0, q0[4]
498+
; CHECK-LV-NEXT: vmov.8 q0[0], r0
499+
; CHECK-LV-NEXT: vmov.u8 r0, q1[5]
500+
; CHECK-LV-NEXT: vmov.8 q0[1], r0
501+
; CHECK-LV-NEXT: vmov.u8 r0, q1[15]
502+
; CHECK-LV-NEXT: vmov.8 q0[2], r0
503+
; CHECK-LV-NEXT: vmov.u8 r0, q1[7]
504+
; CHECK-LV-NEXT: vmov.8 q0[3], r0
505+
; CHECK-LV-NEXT: vmov.u8 r0, q1[14]
506+
; CHECK-LV-NEXT: vmov.8 q0[4], r0
507+
; CHECK-LV-NEXT: vmov.u8 r0, q1[9]
508+
; CHECK-LV-NEXT: vmov.8 q0[5], r0
509+
; CHECK-LV-NEXT: vmov.u8 r0, q1[6]
510+
; CHECK-LV-NEXT: vmov.8 q0[6], r0
511+
; CHECK-LV-NEXT: vmov.u8 r0, q1[3]
512+
; CHECK-LV-NEXT: vmov.8 q0[7], r0
513+
; CHECK-LV-NEXT: vmov.u8 r0, q1[10]
514+
; CHECK-LV-NEXT: vmov.8 q0[8], r0
515+
; CHECK-LV-NEXT: vmov.u8 r0, q1[12]
516+
; CHECK-LV-NEXT: vmov.8 q0[9], r0
517+
; CHECK-LV-NEXT: vmov.u8 r0, q1[1]
518+
; CHECK-LV-NEXT: vmov.8 q0[10], r0
519+
; CHECK-LV-NEXT: vmov.u8 r0, q1[13]
520+
; CHECK-LV-NEXT: vmov.8 q0[11], r0
521+
; CHECK-LV-NEXT: vmov.u8 r0, q1[2]
522+
; CHECK-LV-NEXT: vmov.8 q0[12], r0
523+
; CHECK-LV-NEXT: vmov.u8 r0, q1[8]
524+
; CHECK-LV-NEXT: vmov.8 q0[13], r0
525+
; CHECK-LV-NEXT: vmov.u8 r0, q1[0]
526+
; CHECK-LV-NEXT: vmov.8 q0[14], r0
527+
; CHECK-LV-NEXT: vmov.u8 r0, q1[11]
528+
; CHECK-LV-NEXT: vmov.8 q0[15], r0
529+
; CHECK-LV-NEXT: bx lr
530+
;
531+
; CHECK-LIS-LABEL: shuffle3_i8:
532+
; CHECK-LIS: @ %bb.0: @ %entry
533+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[4]
534+
; CHECK-LIS-NEXT: vmov.8 q1[0], r0
535+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[5]
536+
; CHECK-LIS-NEXT: vmov.8 q1[1], r0
537+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[15]
538+
; CHECK-LIS-NEXT: vmov.8 q1[2], r0
539+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[7]
540+
; CHECK-LIS-NEXT: vmov.8 q1[3], r0
541+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[14]
542+
; CHECK-LIS-NEXT: vmov.8 q1[4], r0
543+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[9]
544+
; CHECK-LIS-NEXT: vmov.8 q1[5], r0
545+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[6]
546+
; CHECK-LIS-NEXT: vmov.8 q1[6], r0
547+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[3]
548+
; CHECK-LIS-NEXT: vmov.8 q1[7], r0
549+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[10]
550+
; CHECK-LIS-NEXT: vmov.8 q1[8], r0
551+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[12]
552+
; CHECK-LIS-NEXT: vmov.8 q1[9], r0
553+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[1]
554+
; CHECK-LIS-NEXT: vmov.8 q1[10], r0
555+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[13]
556+
; CHECK-LIS-NEXT: vmov.8 q1[11], r0
557+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[2]
558+
; CHECK-LIS-NEXT: vmov.8 q1[12], r0
559+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[8]
560+
; CHECK-LIS-NEXT: vmov.8 q1[13], r0
561+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[0]
562+
; CHECK-LIS-NEXT: vmov.8 q1[14], r0
563+
; CHECK-LIS-NEXT: vmov.u8 r0, q0[11]
564+
; CHECK-LIS-NEXT: vmov.8 q1[15], r0
565+
; CHECK-LIS-NEXT: vmov q0, q1
566+
; CHECK-LIS-NEXT: bx lr
515567
entry:
516568
%out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 15, i32 7, i32 14, i32 9, i32 6, i32 3, i32 10, i32 12, i32 1, i32 13, i32 2, i32 8, i32 0, i32 11>
517569
ret <16 x i8> %out
@@ -1143,18 +1195,31 @@ entry:
11431195
}
11441196

11451197
define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) {
1146-
; CHECK-LABEL: shuffle3_f16:
1147-
; CHECK: @ %bb.0: @ %entry
1148-
; CHECK-NEXT: vmov q1, q0
1149-
; CHECK-NEXT: vmovx.f16 s2, s5
1150-
; CHECK-NEXT: vmovx.f16 s0, s4
1151-
; CHECK-NEXT: vins.f16 s5, s4
1152-
; CHECK-NEXT: vins.f16 s2, s0
1153-
; CHECK-NEXT: vmov.f32 s3, s5
1154-
; CHECK-NEXT: vmovx.f16 s1, s7
1155-
; CHECK-NEXT: vmov.f32 s0, s6
1156-
; CHECK-NEXT: vins.f16 s1, s7
1157-
; CHECK-NEXT: bx lr
1198+
; CHECK-LV-LABEL: shuffle3_f16:
1199+
; CHECK-LV: @ %bb.0: @ %entry
1200+
; CHECK-LV-NEXT: vmov q1, q0
1201+
; CHECK-LV-NEXT: vmovx.f16 s2, s5
1202+
; CHECK-LV-NEXT: vmovx.f16 s0, s4
1203+
; CHECK-LV-NEXT: vins.f16 s5, s4
1204+
; CHECK-LV-NEXT: vins.f16 s2, s0
1205+
; CHECK-LV-NEXT: vmov.f32 s3, s5
1206+
; CHECK-LV-NEXT: vmovx.f16 s1, s7
1207+
; CHECK-LV-NEXT: vmov.f32 s0, s6
1208+
; CHECK-LV-NEXT: vins.f16 s1, s7
1209+
; CHECK-LV-NEXT: bx lr
1210+
;
1211+
; CHECK-LIS-LABEL: shuffle3_f16:
1212+
; CHECK-LIS: @ %bb.0: @ %entry
1213+
; CHECK-LIS-NEXT: vmovx.f16 s5, s3
1214+
; CHECK-LIS-NEXT: vmovx.f16 s6, s1
1215+
; CHECK-LIS-NEXT: vmovx.f16 s4, s0
1216+
; CHECK-LIS-NEXT: vins.f16 s1, s0
1217+
; CHECK-LIS-NEXT: vins.f16 s6, s4
1218+
; CHECK-LIS-NEXT: vins.f16 s5, s3
1219+
; CHECK-LIS-NEXT: vmov.f32 s7, s1
1220+
; CHECK-LIS-NEXT: vmov.f32 s4, s2
1221+
; CHECK-LIS-NEXT: vmov q0, q1
1222+
; CHECK-LIS-NEXT: bx lr
11581223
entry:
11591224
%out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
11601225
ret <8 x half> %out

0 commit comments

Comments
 (0)