@@ -3602,6 +3602,26 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
3602
3602
}
3603
3603
3604
3604
switch (MI.getOpcode()) {
3605
+ case AMDGPU::S_UADDO_PSEUDO:
3606
+ case AMDGPU::S_USUBO_PSEUDO: {
3607
+ const DebugLoc &DL = MI.getDebugLoc();
3608
+ MachineOperand &Dest0 = MI.getOperand(0);
3609
+ MachineOperand &Dest1 = MI.getOperand(1);
3610
+ MachineOperand &Src0 = MI.getOperand(2);
3611
+ MachineOperand &Src1 = MI.getOperand(3);
3612
+
3613
+ unsigned Opc = (MI.getOpcode() == AMDGPU::S_UADDO_PSEUDO)
3614
+ ? AMDGPU::S_ADD_I32
3615
+ : AMDGPU::S_SUB_I32;
3616
+ BuildMI(*BB, MI, DL, TII->get(Opc), Dest0.getReg()).add(Src0).add(Src1);
3617
+
3618
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CSELECT_B64), Dest1.getReg())
3619
+ .addImm(1)
3620
+ .addImm(0);
3621
+
3622
+ MI.eraseFromParent();
3623
+ return BB;
3624
+ }
3605
3625
case AMDGPU::S_ADD_U64_PSEUDO:
3606
3626
case AMDGPU::S_SUB_U64_PSEUDO: {
3607
3627
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
@@ -3617,35 +3637,146 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
3617
3637
Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
3618
3638
Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
3619
3639
3620
- MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(MI, MRI,
3621
- Src0, BoolRC, AMDGPU::sub0,
3622
- &AMDGPU::SReg_32RegClass);
3623
- MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(MI, MRI,
3624
- Src0, BoolRC, AMDGPU::sub1,
3625
- &AMDGPU::SReg_32RegClass);
3640
+ MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(
3641
+ MI, MRI, Src0, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
3642
+ MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(
3643
+ MI, MRI, Src0, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
3626
3644
3627
- MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(MI, MRI,
3628
- Src1, BoolRC, AMDGPU::sub0,
3629
- &AMDGPU::SReg_32RegClass);
3630
- MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(MI, MRI,
3631
- Src1, BoolRC, AMDGPU::sub1,
3632
- &AMDGPU::SReg_32RegClass);
3645
+ MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(
3646
+ MI, MRI, Src1, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
3647
+ MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(
3648
+ MI, MRI, Src1, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
3633
3649
3634
3650
bool IsAdd = (MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
3635
3651
3636
3652
unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
3637
3653
unsigned HiOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
3638
- BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0)
3639
- .add(Src0Sub0)
3640
- .add(Src1Sub0);
3641
- BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1)
3642
- .add(Src0Sub1)
3643
- .add(Src1Sub1);
3654
+ BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0).add(Src0Sub0).add(Src1Sub0);
3655
+ BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1).add(Src0Sub1).add(Src1Sub1);
3644
3656
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest.getReg())
3645
- .addReg(DestSub0)
3646
- .addImm(AMDGPU::sub0)
3647
- .addReg(DestSub1)
3648
- .addImm(AMDGPU::sub1);
3657
+ .addReg(DestSub0)
3658
+ .addImm(AMDGPU::sub0)
3659
+ .addReg(DestSub1)
3660
+ .addImm(AMDGPU::sub1);
3661
+ MI.eraseFromParent();
3662
+ return BB;
3663
+ }
3664
+ case AMDGPU::V_ADD_U64_PSEUDO:
3665
+ case AMDGPU::V_SUB_U64_PSEUDO: {
3666
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
3667
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
3668
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
3669
+ const DebugLoc &DL = MI.getDebugLoc();
3670
+
3671
+ bool IsAdd = (MI.getOpcode() == AMDGPU::V_ADD_U64_PSEUDO);
3672
+
3673
+ const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
3674
+
3675
+ Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3676
+ Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3677
+
3678
+ Register CarryReg = MRI.createVirtualRegister(CarryRC);
3679
+ Register DeadCarryReg = MRI.createVirtualRegister(CarryRC);
3680
+
3681
+ MachineOperand &Dest = MI.getOperand(0);
3682
+ MachineOperand &Src0 = MI.getOperand(1);
3683
+ MachineOperand &Src1 = MI.getOperand(2);
3684
+
3685
+ const TargetRegisterClass *Src0RC = Src0.isReg()
3686
+ ? MRI.getRegClass(Src0.getReg())
3687
+ : &AMDGPU::VReg_64RegClass;
3688
+ const TargetRegisterClass *Src1RC = Src1.isReg()
3689
+ ? MRI.getRegClass(Src1.getReg())
3690
+ : &AMDGPU::VReg_64RegClass;
3691
+
3692
+ const TargetRegisterClass *Src0SubRC =
3693
+ TRI->getSubRegClass(Src0RC, AMDGPU::sub0);
3694
+ const TargetRegisterClass *Src1SubRC =
3695
+ TRI->getSubRegClass(Src1RC, AMDGPU::sub1);
3696
+
3697
+ MachineOperand SrcReg0Sub0 = TII->buildExtractSubRegOrImm(
3698
+ MI, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
3699
+ MachineOperand SrcReg1Sub0 = TII->buildExtractSubRegOrImm(
3700
+ MI, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
3701
+
3702
+ MachineOperand SrcReg0Sub1 = TII->buildExtractSubRegOrImm(
3703
+ MI, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
3704
+ MachineOperand SrcReg1Sub1 = TII->buildExtractSubRegOrImm(
3705
+ MI, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
3706
+
3707
+ unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
3708
+ MachineInstr *LoHalf = BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0)
3709
+ .addReg(CarryReg, RegState::Define)
3710
+ .add(SrcReg0Sub0)
3711
+ .add(SrcReg1Sub0)
3712
+ .addImm(0); // clamp bit
3713
+
3714
+ unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
3715
+ MachineInstr *HiHalf =
3716
+ BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1)
3717
+ .addReg(DeadCarryReg, RegState::Define | RegState::Dead)
3718
+ .add(SrcReg0Sub1)
3719
+ .add(SrcReg1Sub1)
3720
+ .addReg(CarryReg, RegState::Kill)
3721
+ .addImm(0); // clamp bit
3722
+
3723
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest.getReg())
3724
+ .addReg(DestSub0)
3725
+ .addImm(AMDGPU::sub0)
3726
+ .addReg(DestSub1)
3727
+ .addImm(AMDGPU::sub1);
3728
+ TII->legalizeOperands(*LoHalf);
3729
+ TII->legalizeOperands(*HiHalf);
3730
+ MI.eraseFromParent();
3731
+ return BB;
3732
+ }
3733
+ case AMDGPU::S_ADD_CO_PSEUDO:
3734
+ case AMDGPU::S_SUB_CO_PSEUDO: {
3735
+ // This pseudo has a chance to be selected
3736
+ // only from uniform add/subcarry node. All the VGPR operands
3737
+ // therefore assumed to be splat vectors.
3738
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
3739
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
3740
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
3741
+ MachineBasicBlock::iterator MII = MI;
3742
+ const DebugLoc &DL = MI.getDebugLoc();
3743
+ MachineOperand &Dest = MI.getOperand(0);
3744
+ MachineOperand &Src0 = MI.getOperand(2);
3745
+ MachineOperand &Src1 = MI.getOperand(3);
3746
+ MachineOperand &Src2 = MI.getOperand(4);
3747
+ unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
3748
+ ? AMDGPU::S_ADDC_U32
3749
+ : AMDGPU::S_SUBB_U32;
3750
+ if (Src0.isReg() && TRI->isVectorRegister(MRI, Src0.getReg())) {
3751
+ Register RegOp0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
3752
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp0)
3753
+ .addReg(Src0.getReg());
3754
+ Src0.setReg(RegOp0);
3755
+ }
3756
+ if (Src1.isReg() && TRI->isVectorRegister(MRI, Src1.getReg())) {
3757
+ Register RegOp1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
3758
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp1)
3759
+ .addReg(Src1.getReg());
3760
+ Src1.setReg(RegOp1);
3761
+ }
3762
+ Register RegOp2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
3763
+ if (TRI->isVectorRegister(MRI, Src2.getReg())) {
3764
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp2)
3765
+ .addReg(Src2.getReg());
3766
+ Src2.setReg(RegOp2);
3767
+ }
3768
+
3769
+ if (TRI->getRegSizeInBits(*MRI.getRegClass(Src2.getReg())) == 64) {
3770
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
3771
+ .addReg(Src2.getReg())
3772
+ .addImm(0);
3773
+ } else {
3774
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMPK_LG_U32))
3775
+ .addReg(Src2.getReg())
3776
+ .addImm(0);
3777
+ }
3778
+
3779
+ BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()).add(Src0).add(Src1);
3649
3780
MI.eraseFromParent();
3650
3781
return BB;
3651
3782
}
0 commit comments