diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index a082a1ebe95bf..81523adeefcee 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1634,6 +1634,9 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { case AArch64::STR_PXI: case AArch64::LDR_ZXI: case AArch64::LDR_PXI: + case AArch64::PTRUE_B: + case AArch64::CPY_ZPzI_B: + case AArch64::CMPNE_PPzZI_B: return I->getFlag(MachineInstr::FrameSetup) || I->getFlag(MachineInstr::FrameDestroy); } @@ -3265,7 +3268,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI; break; case RegPairInfo::PPR: - StrOpc = AArch64::STR_PXI; + StrOpc = + Size == 16 ? AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO : AArch64::STR_PXI; break; case RegPairInfo::VG: StrOpc = AArch64::STRXui; @@ -3494,7 +3498,8 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI; break; case RegPairInfo::PPR: - LdrOpc = AArch64::LDR_PXI; + LdrOpc = Size == 16 ? AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO + : AArch64::LDR_PXI; break; case RegPairInfo::VG: continue; @@ -3720,6 +3725,14 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, continue; } + // Always save P4 when PPR spills are ZPR-sized and a predicate above p8 is + // spilled. If all of p0-p3 are used as return values p4 is must be free + // to reload p8-p15. + if (RegInfo->getSpillSize(AArch64::PPRRegClass) == 16 && + AArch64::PPR_p8to15RegClass.contains(Reg)) { + SavedRegs.set(AArch64::P4); + } + // MachO's compact unwind format relies on all registers being stored in // pairs. // FIXME: the usual format is actually better if unwinding isn't needed. @@ -4159,8 +4172,295 @@ int64_t AArch64FrameLowering::assignSVEStackObjectOffsets( true); } +/// Attempts to scavenge a register from \p ScavengeableRegs given the used +/// registers in \p UsedRegs. +static Register tryScavengeRegister(LiveRegUnits const &UsedRegs, + BitVector const &ScavengeableRegs) { + for (auto Reg : ScavengeableRegs.set_bits()) { + if (UsedRegs.available(Reg)) + return Reg; + } + return AArch64::NoRegister; +} + +/// Propagates frame-setup/destroy flags from \p SourceMI to all instructions in +/// \p MachineInstrs. +static void propagateFrameFlags(MachineInstr &SourceMI, + ArrayRef MachineInstrs) { + for (MachineInstr *MI : MachineInstrs) { + if (SourceMI.getFlag(MachineInstr::FrameSetup)) + MI->setFlag(MachineInstr::FrameSetup); + if (SourceMI.getFlag(MachineInstr::FrameDestroy)) + MI->setFlag(MachineInstr::FrameDestroy); + } +} + +/// RAII helper class for scavenging or spilling a register. On construction +/// attempts to find a free register of class \p RC (given \p UsedRegs and \p +/// AllocatableRegs), if no register can be found spills \p SpillCandidate to \p +/// MaybeSpillFI to free a register. The free'd register is returned via the \p +/// FreeReg output parameter. On destruction, if there is a spill, its previous +/// value is reloaded. The spilling and scavenging is only valid at the +/// insertion point \p MBBI, this class should _not_ be used in places that +/// create or manipulate basic blocks, moving the expected insertion point. +struct ScopedScavengeOrSpill { + ScopedScavengeOrSpill(const ScopedScavengeOrSpill &) = delete; + ScopedScavengeOrSpill(ScopedScavengeOrSpill &&) = delete; + + ScopedScavengeOrSpill(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register SpillCandidate, const TargetRegisterClass &RC, + LiveRegUnits const &UsedRegs, + BitVector const &AllocatableRegs, + std::optional *MaybeSpillFI) + : MBB(MBB), MBBI(MBBI), RC(RC), TII(static_cast( + *MF.getSubtarget().getInstrInfo())), + TRI(*MF.getSubtarget().getRegisterInfo()) { + FreeReg = tryScavengeRegister(UsedRegs, AllocatableRegs); + if (FreeReg != AArch64::NoRegister) + return; + assert(MaybeSpillFI && "Expected emergency spill slot FI information " + "(attempted to spill in prologue/epilogue?)"); + if (!MaybeSpillFI->has_value()) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + *MaybeSpillFI = MFI.CreateSpillStackObject(TRI.getSpillSize(RC), + TRI.getSpillAlign(RC)); + } + FreeReg = SpillCandidate; + SpillFI = MaybeSpillFI->value(); + TII.storeRegToStackSlot(MBB, MBBI, FreeReg, false, *SpillFI, &RC, &TRI, + Register()); + } + + bool hasSpilled() const { return SpillFI.has_value(); } + + /// Returns the free register (found from scavenging or spilling a register). + Register freeRegister() const { return FreeReg; } + + Register operator*() const { return freeRegister(); } + + ~ScopedScavengeOrSpill() { + if (hasSpilled()) + TII.loadRegFromStackSlot(MBB, MBBI, FreeReg, *SpillFI, &RC, &TRI, + Register()); + } + +private: + MachineBasicBlock &MBB; + MachineBasicBlock::iterator MBBI; + const TargetRegisterClass &RC; + const AArch64InstrInfo &TII; + const TargetRegisterInfo &TRI; + Register FreeReg = AArch64::NoRegister; + std::optional SpillFI; +}; + +/// Emergency stack slots for expanding SPILL_PPR_TO_ZPR_SLOT_PSEUDO and +/// FILL_PPR_FROM_ZPR_SLOT_PSEUDO. +struct EmergencyStackSlots { + std::optional ZPRSpillFI; + std::optional PPRSpillFI; + std::optional GPRSpillFI; +}; + +/// Registers available for scavenging (ZPR, PPR3b, GPR). +struct ScavengeableRegs { + BitVector ZPRRegs; + BitVector PPR3bRegs; + BitVector GPRRegs; +}; + +static bool isInPrologueOrEpilogue(const MachineInstr &MI) { + return MI.getFlag(MachineInstr::FrameSetup) || + MI.getFlag(MachineInstr::FrameDestroy); +} + +/// Expands: +/// ``` +/// SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 +/// ``` +/// To: +/// ``` +/// $z0 = CPY_ZPzI_B $p0, 1, 0 +/// STR_ZXI $z0, $stack.0, 0 +/// ``` +/// While ensuring a ZPR ($z0 in this example) is free for the predicate ( +/// spilling if necessary). +static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB, + MachineInstr &MI, + const TargetRegisterInfo &TRI, + LiveRegUnits const &UsedRegs, + ScavengeableRegs const &SR, + EmergencyStackSlots &SpillSlots) { + MachineFunction &MF = *MBB.getParent(); + auto *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + + ScopedScavengeOrSpill ZPredReg( + MF, MBB, MI, AArch64::Z0, AArch64::ZPRRegClass, UsedRegs, SR.ZPRRegs, + isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI); + + SmallVector MachineInstrs; + const DebugLoc &DL = MI.getDebugLoc(); + MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::CPY_ZPzI_B)) + .addReg(*ZPredReg, RegState::Define) + .add(MI.getOperand(0)) + .addImm(1) + .addImm(0) + .getInstr()); + MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::STR_ZXI)) + .addReg(*ZPredReg) + .add(MI.getOperand(1)) + .addImm(MI.getOperand(2).getImm()) + .setMemRefs(MI.memoperands()) + .getInstr()); + propagateFrameFlags(MI, MachineInstrs); +} + +/// Expands: +/// ``` +/// $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 +/// ``` +/// To: +/// ``` +/// $z0 = LDR_ZXI %stack.0, 0 +/// $p0 = PTRUE_B 31, implicit $vg +/// $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv +/// ``` +/// While ensuring a ZPR ($z0 in this example) is free for the predicate ( +/// spilling if necessary). If the status flags are in use at the point of +/// expansion they are preserved (by moving them to/from a GPR). This may cause +/// an additional spill if no GPR is free at the expansion point. +static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB, + MachineInstr &MI, + const TargetRegisterInfo &TRI, + LiveRegUnits const &UsedRegs, + ScavengeableRegs const &SR, + EmergencyStackSlots &SpillSlots) { + MachineFunction &MF = *MBB.getParent(); + auto *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + + ScopedScavengeOrSpill ZPredReg( + MF, MBB, MI, AArch64::Z0, AArch64::ZPRRegClass, UsedRegs, SR.ZPRRegs, + isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI); + + ScopedScavengeOrSpill PredReg( + MF, MBB, MI, AArch64::P0, AArch64::PPR_3bRegClass, UsedRegs, SR.PPR3bRegs, + isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.PPRSpillFI); + + // Elide NZCV spills if we know it is not used. + bool IsNZCVUsed = !UsedRegs.available(AArch64::NZCV); + std::optional NZCVSaveReg; + if (IsNZCVUsed) + NZCVSaveReg.emplace( + MF, MBB, MI, AArch64::X0, AArch64::GPR64RegClass, UsedRegs, SR.GPRRegs, + isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.GPRSpillFI); + SmallVector MachineInstrs; + const DebugLoc &DL = MI.getDebugLoc(); + MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::LDR_ZXI)) + .addReg(*ZPredReg, RegState::Define) + .add(MI.getOperand(1)) + .addImm(MI.getOperand(2).getImm()) + .setMemRefs(MI.memoperands()) + .getInstr()); + if (IsNZCVUsed) + MachineInstrs.push_back( + BuildMI(MBB, MI, DL, TII->get(AArch64::MRS)) + .addReg(NZCVSaveReg->freeRegister(), RegState::Define) + .addImm(AArch64SysReg::NZCV) + .addReg(AArch64::NZCV, RegState::Implicit) + .getInstr()); + MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::PTRUE_B)) + .addReg(*PredReg, RegState::Define) + .addImm(31)); + MachineInstrs.push_back( + BuildMI(MBB, MI, DL, TII->get(AArch64::CMPNE_PPzZI_B)) + .addReg(MI.getOperand(0).getReg(), RegState::Define) + .addReg(*PredReg) + .addReg(*ZPredReg) + .addImm(0) + .addReg(AArch64::NZCV, RegState::ImplicitDefine) + .getInstr()); + if (IsNZCVUsed) + MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::MSR)) + .addImm(AArch64SysReg::NZCV) + .addReg(NZCVSaveReg->freeRegister()) + .addReg(AArch64::NZCV, RegState::ImplicitDefine) + .getInstr()); + + propagateFrameFlags(MI, MachineInstrs); + return PredReg.hasSpilled(); +} + +/// Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO +/// operations within the MachineBasicBlock \p MBB. +static bool expandSMEPPRToZPRSpillPseudos(MachineBasicBlock &MBB, + const TargetRegisterInfo &TRI, + ScavengeableRegs const &SR, + EmergencyStackSlots &SpillSlots) { + LiveRegUnits UsedRegs(TRI); + UsedRegs.addLiveOuts(MBB); + bool HasPPRSpills = false; + for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) { + UsedRegs.stepBackward(MI); + switch (MI.getOpcode()) { + case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO: + HasPPRSpills |= expandFillPPRFromZPRSlotPseudo(MBB, MI, TRI, UsedRegs, SR, + SpillSlots); + MI.eraseFromParent(); + break; + case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO: + expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, SR, SpillSlots); + MI.eraseFromParent(); + break; + default: + break; + } + } + + return HasPPRSpills; +} + void AArch64FrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { + + AArch64FunctionInfo *AFI = MF.getInfo(); + const TargetSubtargetInfo &TSI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *TSI.getRegisterInfo(); + + // If predicates spills are 16-bytes we may need to expand + // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO. + if (AFI->hasStackFrame() && TRI.getSpillSize(AArch64::PPRRegClass) == 16) { + auto ComputeScavengeableRegisters = [&](unsigned RegClassID) { + BitVector Regs = TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID)); + assert(Regs.count() > 0 && "Expected scavengeable registers"); + return Regs; + }; + + ScavengeableRegs SR{}; + SR.ZPRRegs = ComputeScavengeableRegisters(AArch64::ZPRRegClassID); + // Only p0-7 are possible as the second operand of cmpne (needed for fills). + SR.PPR3bRegs = ComputeScavengeableRegisters(AArch64::PPR_3bRegClassID); + SR.GPRRegs = ComputeScavengeableRegisters(AArch64::GPR64RegClassID); + + EmergencyStackSlots SpillSlots; + for (MachineBasicBlock &MBB : MF) { + // In the case we had to spill a predicate (in the range p0-p7) to reload + // a predicate (>= p8), additional spill/fill pseudos will be created. + // These need an additional expansion pass. Note: There will only be at + // most two expansion passes, as spilling/filling a predicate in the range + // p0-p7 never requires spilling another predicate. + for (int Pass = 0; Pass < 2; Pass++) { + bool HasPPRSpills = + expandSMEPPRToZPRSpillPseudos(MBB, TRI, SR, SpillSlots); + assert((Pass == 0 || !HasPPRSpills) && "Did not expect PPR spills"); + if (!HasPPRSpills) + break; + } + } + } + MachineFrameInfo &MFI = MF.getFrameInfo(); assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown && @@ -4170,7 +4470,6 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( int64_t SVEStackSize = assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex); - AArch64FunctionInfo *AFI = MF.getInfo(); AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U)); AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex); @@ -5204,9 +5503,13 @@ void AArch64FrameLowering::emitRemarks( unsigned RegTy = StackAccess::AccessType::GPR; if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) { - if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) + // SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO + // spill/fill the predicate as a data vector (so are an FPR acess). + if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO && + MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO && + AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) { RegTy = StackAccess::PPR; - else + } else RegTy = StackAccess::FPR; } else if (AArch64InstrInfo::isFpOrNEON(MI)) { RegTy = StackAccess::FPR; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 17dd8a073eff0..0f2b969fba35c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -81,7 +81,7 @@ static cl::opt AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, AArch64::CATCHRET), - RI(STI.getTargetTriple()), Subtarget(STI) {} + RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {} /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. @@ -2438,6 +2438,8 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::STZ2Gi: case AArch64::STZGi: case AArch64::TAGPstack: + case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO: + case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO: return 2; case AArch64::LD1B_D_IMM: case AArch64::LD1B_H_IMM: @@ -4223,6 +4225,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, MinOffset = -256; MaxOffset = 254; break; + case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO: + case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO: case AArch64::LDR_ZXI: case AArch64::STR_ZXI: Scale = TypeSize::getScalable(16); @@ -5355,6 +5359,11 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, "Unexpected register store without SVE store instructions"); Opc = AArch64::STR_ZXI; StackID = TargetStackID::ScalableVector; + } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected predicate store without SVE store instructions"); + Opc = AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO; + StackID = TargetStackID::ScalableVector; } break; case 24: @@ -5527,6 +5536,11 @@ void AArch64InstrInfo::loadRegFromStackSlot( "Unexpected register load without SVE load instructions"); Opc = AArch64::LDR_ZXI; StackID = TargetStackID::ScalableVector; + } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected predicate load without SVE load instructions"); + Opc = AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO; + StackID = TargetStackID::ScalableVector; } break; case 24: diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 49f6860346fa1..8fd34325bb00d 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -39,8 +39,8 @@ using namespace llvm; #define GET_REGINFO_TARGET_DESC #include "AArch64GenRegisterInfo.inc" -AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT) - : AArch64GenRegisterInfo(AArch64::LR), TT(TT) { +AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT, unsigned HwMode) + : AArch64GenRegisterInfo(AArch64::LR, 0, 0, 0, HwMode), TT(TT) { AArch64_MC::initLLVMToCVRegMapping(this); } diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index 11da624af4881..898a509f75908 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -27,7 +27,7 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo { const Triple &TT; public: - AArch64RegisterInfo(const Triple &TT); + AArch64RegisterInfo(const Triple &TT, unsigned HwMode); // FIXME: This should be tablegen'd like getDwarfRegNum is int getSEHRegNum(unsigned i) const { diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index dd4f2549929f8..fed9b7b173e9c 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -979,10 +979,19 @@ class ZPRRegOp ]>; + +def PPRSpillFillRI : RegInfoByHwMode< + [DefaultMode, SMEWithZPRPredicateSpills], + [RegInfo<16,16,16>, RegInfo<16,128,128>]>; + class PPRClass : RegisterClass<"AArch64", [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16, (sequence "P%u", firstreg, lastreg, step)> { - let Size = 16; + let RegInfos = PPRSpillFillRI; } def PPR : PPRClass<0, 15> { diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index bc921f07e1dbf..68c386585a79a 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -86,6 +86,11 @@ static cl::alias AArch64StreamingStackHazardSize( cl::desc("alias for -aarch64-streaming-hazard-size"), cl::aliasopt(AArch64StreamingHazardSize)); +static cl::opt EnableZPRPredicateSpills( + "aarch64-enable-zpr-predicate-spills", cl::init(false), cl::Hidden, + cl::desc( + "Enables spilling/reloading SVE predicates as data vectors (ZPRs)")); + // Subreg liveness tracking is disabled by default for now until all issues // are ironed out. This option allows the feature to be used in tests. static cl::opt @@ -400,6 +405,20 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, EnableSubregLiveness = EnableSubregLivenessTracking.getValue(); } +unsigned AArch64Subtarget::getHwModeSet() const { + AArch64HwModeBits Modes = AArch64HwModeBits::DefaultMode; + + // Use a special hardware mode in streaming[-compatible] functions with + // aarch64-enable-zpr-predicate-spills. This changes the spill size (and + // alignment) for the predicate register class. + if (EnableZPRPredicateSpills.getValue() && + (isStreaming() || isStreamingCompatible())) { + Modes |= AArch64HwModeBits::SMEWithZPRPredicateSpills; + } + + return to_underlying(Modes); +} + const CallLowering *AArch64Subtarget::getCallLowering() const { return CallLoweringInfo.get(); } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index d22991224d496..e7757907a6643 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -130,6 +130,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool IsStreaming = false, bool IsStreamingCompatible = false, bool HasMinSize = false); + virtual unsigned getHwModeSet() const override; + // Getters for SubtargetFeatures defined in tablegen #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ bool GETTER() const { return ATTRIBUTE; } diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index a01d59d0e5c43..0ac131e48c4f8 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -59,6 +59,20 @@ def FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO : let hasPostISelHook = 1; } +def SPILL_PPR_TO_ZPR_SLOT_PSEUDO : + Pseudo<(outs), (ins PPRorPNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), []>, Sched<[]> +{ + let mayStore = 1; + let hasSideEffects = 0; +} + +def FILL_PPR_FROM_ZPR_SLOT_PSEUDO : + Pseudo<(outs PPRorPNRAny:$Pt), (ins GPR64sp:$Rn, simm9:$imm9), []>, Sched<[]> +{ + let mayLoad = 1; + let hasSideEffects = 0; +} + def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore, [SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>; diff --git a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir new file mode 100644 index 0000000000000..b58f91ac68a93 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir @@ -0,0 +1,1070 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-zpr-predicate-spills -run-pass=greedy %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-zpr-predicate-spills -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=EXPAND +--- | + source_filename = "" + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64--linux-gnu" + + define aarch64_sve_vector_pcs void @zpr_predicate_spill() #0 { entry: unreachable } + + define aarch64_sve_vector_pcs void @zpr_predicate_spill__save_restore_nzcv() #0 { entry: unreachable } + + define aarch64_sve_vector_pcs void @zpr_predicate_spill__save_restore_nzcv__scavenge_csr_gpr() #0 { entry: unreachable } + + define aarch64_sve_vector_pcs void @zpr_predicate_spill__spill_zpr() #0 { entry: unreachable } + + define aarch64_sve_vector_pcs void @zpr_predicate_spill_above_p7() #0 { entry: unreachable } + + define aarch64_sve_vector_pcs void @zpr_predicate_spill_p4_saved() #0 { entry: unreachable } + + attributes #0 = {nounwind "target-features"="+sme,+sve" "aarch64_pstate_sm_compatible"} +... +--- +name: zpr_predicate_spill +tracksRegLiveness: true +stack: +liveins: + - { reg: '$p0' } +body: | + bb.0.entry: + liveins: $p0 + + ; CHECK-LABEL: name: zpr_predicate_spill + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, + ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; + ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 :: (store (s128) into %stack.0) + ; + ; CHECK-NEXT: $p0 = IMPLICIT_DEF + ; CHECK-NEXT: $p1 = IMPLICIT_DEF + ; CHECK-NEXT: $p2 = IMPLICIT_DEF + ; CHECK-NEXT: $p3 = IMPLICIT_DEF + ; CHECK-NEXT: $p4 = IMPLICIT_DEF + ; CHECK-NEXT: $p5 = IMPLICIT_DEF + ; CHECK-NEXT: $p6 = IMPLICIT_DEF + ; CHECK-NEXT: $p7 = IMPLICIT_DEF + ; CHECK-NEXT: $p8 = IMPLICIT_DEF + ; CHECK-NEXT: $p9 = IMPLICIT_DEF + ; CHECK-NEXT: $p10 = IMPLICIT_DEF + ; CHECK-NEXT: $p11 = IMPLICIT_DEF + ; CHECK-NEXT: $p12 = IMPLICIT_DEF + ; CHECK-NEXT: $p13 = IMPLICIT_DEF + ; CHECK-NEXT: $p14 = IMPLICIT_DEF + ; CHECK-NEXT: $p15 = IMPLICIT_DEF + ; + ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) + ; + ; CHECK-NEXT: RET_ReallyLR implicit $p0 + + ; EXPAND-LABEL: name: zpr_predicate_spill + ; EXPAND: liveins: $p0, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4 + ; EXPAND-NEXT: {{ $}} + ; + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 + ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.14) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.13) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg + ; + ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 + ; EXPAND-NEXT: $x8 = ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.0) + ; + ; EXPAND-NEXT: $p0 = IMPLICIT_DEF + ; EXPAND-NEXT: $p1 = IMPLICIT_DEF + ; EXPAND-NEXT: $p2 = IMPLICIT_DEF + ; EXPAND-NEXT: $p3 = IMPLICIT_DEF + ; EXPAND-NEXT: $p4 = IMPLICIT_DEF + ; EXPAND-NEXT: $p5 = IMPLICIT_DEF + ; EXPAND-NEXT: $p6 = IMPLICIT_DEF + ; EXPAND-NEXT: $p7 = IMPLICIT_DEF + ; EXPAND-NEXT: $p8 = IMPLICIT_DEF + ; EXPAND-NEXT: $p9 = IMPLICIT_DEF + ; EXPAND-NEXT: $p10 = IMPLICIT_DEF + ; EXPAND-NEXT: $p11 = IMPLICIT_DEF + ; EXPAND-NEXT: $p12 = IMPLICIT_DEF + ; EXPAND-NEXT: $p13 = IMPLICIT_DEF + ; EXPAND-NEXT: $p14 = IMPLICIT_DEF + ; EXPAND-NEXT: $p15 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg + ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14) + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 + ; EXPAND-NEXT: RET undef $lr, implicit $p0 + %1:ppr = COPY $p0 + + $p0 = IMPLICIT_DEF + $p1 = IMPLICIT_DEF + $p2 = IMPLICIT_DEF + $p3 = IMPLICIT_DEF + $p4 = IMPLICIT_DEF + $p5 = IMPLICIT_DEF + $p6 = IMPLICIT_DEF + $p7 = IMPLICIT_DEF + $p8 = IMPLICIT_DEF + $p9 = IMPLICIT_DEF + $p10 = IMPLICIT_DEF + $p11 = IMPLICIT_DEF + $p12 = IMPLICIT_DEF + $p13 = IMPLICIT_DEF + $p14 = IMPLICIT_DEF + $p15 = IMPLICIT_DEF + + $p0 = COPY %1 + + RET_ReallyLR implicit $p0 +... +--- +name: zpr_predicate_spill__save_restore_nzcv +tracksRegLiveness: true +stack: +liveins: + - { reg: '$p0' } +body: | + bb.0.entry: + liveins: $p0 + + ; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, + ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; + ; CHECK-NEXT: $nzcv = IMPLICIT_DEF + ; + ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 :: (store (s128) into %stack.0) + ; + ; CHECK-NEXT: $p0 = IMPLICIT_DEF + ; CHECK-NEXT: $p1 = IMPLICIT_DEF + ; CHECK-NEXT: $p2 = IMPLICIT_DEF + ; CHECK-NEXT: $p3 = IMPLICIT_DEF + ; CHECK-NEXT: $p4 = IMPLICIT_DEF + ; CHECK-NEXT: $p5 = IMPLICIT_DEF + ; CHECK-NEXT: $p6 = IMPLICIT_DEF + ; CHECK-NEXT: $p7 = IMPLICIT_DEF + ; CHECK-NEXT: $p8 = IMPLICIT_DEF + ; CHECK-NEXT: $p9 = IMPLICIT_DEF + ; CHECK-NEXT: $p10 = IMPLICIT_DEF + ; CHECK-NEXT: $p11 = IMPLICIT_DEF + ; CHECK-NEXT: $p12 = IMPLICIT_DEF + ; CHECK-NEXT: $p13 = IMPLICIT_DEF + ; CHECK-NEXT: $p14 = IMPLICIT_DEF + ; CHECK-NEXT: $p15 = IMPLICIT_DEF + ; + ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) + ; + ; CHECK-NEXT: FAKE_USE implicit $nzcv + ; + ; CHECK-NEXT: RET_ReallyLR implicit $p0 + + ; EXPAND-LABEL: name: zpr_predicate_spill__save_restore_nzcv + ; EXPAND: liveins: $p0, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4 + ; EXPAND-NEXT: {{ $}} + ; + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 + ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.14) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.13) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg + ; + ; EXPAND-NEXT: $nzcv = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 + ; EXPAND-NEXT: $x8 = ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.0) + ; + ; EXPAND-NEXT: $p0 = IMPLICIT_DEF + ; EXPAND-NEXT: $p1 = IMPLICIT_DEF + ; EXPAND-NEXT: $p2 = IMPLICIT_DEF + ; EXPAND-NEXT: $p3 = IMPLICIT_DEF + ; EXPAND-NEXT: $p4 = IMPLICIT_DEF + ; EXPAND-NEXT: $p5 = IMPLICIT_DEF + ; EXPAND-NEXT: $p6 = IMPLICIT_DEF + ; EXPAND-NEXT: $p7 = IMPLICIT_DEF + ; EXPAND-NEXT: $p8 = IMPLICIT_DEF + ; EXPAND-NEXT: $p9 = IMPLICIT_DEF + ; EXPAND-NEXT: $p10 = IMPLICIT_DEF + ; EXPAND-NEXT: $p11 = IMPLICIT_DEF + ; EXPAND-NEXT: $p12 = IMPLICIT_DEF + ; EXPAND-NEXT: $p13 = IMPLICIT_DEF + ; EXPAND-NEXT: $p14 = IMPLICIT_DEF + ; EXPAND-NEXT: $p15 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: $fp = MRS 55824, implicit-def $nzcv, implicit $nzcv + ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: MSR 55824, $fp, implicit-def $nzcv + ; + ; EXPAND-NEXT: FAKE_USE implicit $nzcv + ; + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg + ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14) + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 + ; EXPAND-NEXT: RET undef $lr, implicit $p0 + $nzcv = IMPLICIT_DEF + + %1:ppr = COPY $p0 + + $p0 = IMPLICIT_DEF + $p1 = IMPLICIT_DEF + $p2 = IMPLICIT_DEF + $p3 = IMPLICIT_DEF + $p4 = IMPLICIT_DEF + $p5 = IMPLICIT_DEF + $p6 = IMPLICIT_DEF + $p7 = IMPLICIT_DEF + $p8 = IMPLICIT_DEF + $p9 = IMPLICIT_DEF + $p10 = IMPLICIT_DEF + $p11 = IMPLICIT_DEF + $p12 = IMPLICIT_DEF + $p13 = IMPLICIT_DEF + $p14 = IMPLICIT_DEF + $p15 = IMPLICIT_DEF + + $p0 = COPY %1 + + FAKE_USE implicit $nzcv + + RET_ReallyLR implicit $p0 +... +--- +name: zpr_predicate_spill__save_restore_nzcv__scavenge_csr_gpr +tracksRegLiveness: true +stack: +liveins: + - { reg: '$p0' } + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } + - { reg: '$x3' } + - { reg: '$x4' } + - { reg: '$x5' } + - { reg: '$x6' } + - { reg: '$x7' } +body: | + bb.0.entry: + liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7 + + ; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv__scavenge_csr_gpr + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, + ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: + ; CHECK: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7 + ; CHECK-NEXT: {{ $}} + ; + ; CHECK-NEXT: $nzcv = IMPLICIT_DEF + ; + ; CHECK-NEXT: $x8 = IMPLICIT_DEF + ; CHECK-NEXT: $x9 = IMPLICIT_DEF + ; CHECK-NEXT: $x10 = IMPLICIT_DEF + ; CHECK-NEXT: $x11 = IMPLICIT_DEF + ; CHECK-NEXT: $x12 = IMPLICIT_DEF + ; CHECK-NEXT: $x13 = IMPLICIT_DEF + ; CHECK-NEXT: $x14 = IMPLICIT_DEF + ; CHECK-NEXT: $x15 = IMPLICIT_DEF + ; CHECK-NEXT: $x16 = IMPLICIT_DEF + ; CHECK-NEXT: $x17 = IMPLICIT_DEF + ; CHECK-NEXT: $x18 = IMPLICIT_DEF + ; + ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 :: (store (s128) into %stack.0) + ; + ; CHECK-NEXT: $p0 = IMPLICIT_DEF + ; CHECK-NEXT: $p1 = IMPLICIT_DEF + ; CHECK-NEXT: $p2 = IMPLICIT_DEF + ; CHECK-NEXT: $p3 = IMPLICIT_DEF + ; CHECK-NEXT: $p4 = IMPLICIT_DEF + ; CHECK-NEXT: $p5 = IMPLICIT_DEF + ; CHECK-NEXT: $p6 = IMPLICIT_DEF + ; CHECK-NEXT: $p7 = IMPLICIT_DEF + ; CHECK-NEXT: $p8 = IMPLICIT_DEF + ; CHECK-NEXT: $p9 = IMPLICIT_DEF + ; CHECK-NEXT: $p10 = IMPLICIT_DEF + ; CHECK-NEXT: $p11 = IMPLICIT_DEF + ; CHECK-NEXT: $p12 = IMPLICIT_DEF + ; CHECK-NEXT: $p13 = IMPLICIT_DEF + ; CHECK-NEXT: $p14 = IMPLICIT_DEF + ; CHECK-NEXT: $p15 = IMPLICIT_DEF + ; + ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) + ; + ; CHECK-NEXT: FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 + ; + ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 + + ; EXPAND-LABEL: name: zpr_predicate_spill__save_restore_nzcv__scavenge_csr_gpr + ; EXPAND: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4 + ; EXPAND-NEXT: {{ $}} + ; + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 + ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.14) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.13) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg + ; + ; EXPAND-NEXT: $nzcv = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $x8 = IMPLICIT_DEF + ; EXPAND-NEXT: $x9 = IMPLICIT_DEF + ; EXPAND-NEXT: $x10 = IMPLICIT_DEF + ; EXPAND-NEXT: $x11 = IMPLICIT_DEF + ; EXPAND-NEXT: $x12 = IMPLICIT_DEF + ; EXPAND-NEXT: $x13 = IMPLICIT_DEF + ; EXPAND-NEXT: $x14 = IMPLICIT_DEF + ; EXPAND-NEXT: $x15 = IMPLICIT_DEF + ; EXPAND-NEXT: $x16 = IMPLICIT_DEF + ; EXPAND-NEXT: $x17 = IMPLICIT_DEF + ; EXPAND-NEXT: $x18 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 + ; EXPAND-NEXT: $fp = ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $fp, 0 :: (store (s128) into %stack.0) + ; + ; EXPAND-NEXT: $p0 = IMPLICIT_DEF + ; EXPAND-NEXT: $p1 = IMPLICIT_DEF + ; EXPAND-NEXT: $p2 = IMPLICIT_DEF + ; EXPAND-NEXT: $p3 = IMPLICIT_DEF + ; EXPAND-NEXT: $p4 = IMPLICIT_DEF + ; EXPAND-NEXT: $p5 = IMPLICIT_DEF + ; EXPAND-NEXT: $p6 = IMPLICIT_DEF + ; EXPAND-NEXT: $p7 = IMPLICIT_DEF + ; EXPAND-NEXT: $p8 = IMPLICIT_DEF + ; EXPAND-NEXT: $p9 = IMPLICIT_DEF + ; EXPAND-NEXT: $p10 = IMPLICIT_DEF + ; EXPAND-NEXT: $p11 = IMPLICIT_DEF + ; EXPAND-NEXT: $p12 = IMPLICIT_DEF + ; EXPAND-NEXT: $p13 = IMPLICIT_DEF + ; EXPAND-NEXT: $p14 = IMPLICIT_DEF + ; EXPAND-NEXT: $p15 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $z0 = LDR_ZXI killed $fp, 0 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: $fp = MRS 55824, implicit-def $nzcv, implicit $nzcv + ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: MSR 55824, $fp, implicit-def $nzcv + ; + ; EXPAND-NEXT: FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 + ; + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg + ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14) + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 + ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 + $nzcv = IMPLICIT_DEF + $x8 = IMPLICIT_DEF + $x9 = IMPLICIT_DEF + $x10 = IMPLICIT_DEF + $x11 = IMPLICIT_DEF + $x12 = IMPLICIT_DEF + $x13 = IMPLICIT_DEF + $x14 = IMPLICIT_DEF + $x15 = IMPLICIT_DEF + $x16 = IMPLICIT_DEF + $x17 = IMPLICIT_DEF + $x18 = IMPLICIT_DEF + + %1:ppr = COPY $p0 + + $p0 = IMPLICIT_DEF + $p1 = IMPLICIT_DEF + $p2 = IMPLICIT_DEF + $p3 = IMPLICIT_DEF + $p4 = IMPLICIT_DEF + $p5 = IMPLICIT_DEF + $p6 = IMPLICIT_DEF + $p7 = IMPLICIT_DEF + $p8 = IMPLICIT_DEF + $p9 = IMPLICIT_DEF + $p10 = IMPLICIT_DEF + $p11 = IMPLICIT_DEF + $p12 = IMPLICIT_DEF + $p13 = IMPLICIT_DEF + $p14 = IMPLICIT_DEF + $p15 = IMPLICIT_DEF + + $p0 = COPY %1 + + FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 + + RET_ReallyLR implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 +... +--- +name: zpr_predicate_spill__spill_zpr +tracksRegLiveness: true +stack: +liveins: + - { reg: '$p0' } + - { reg: '$z0' } + - { reg: '$z1' } + - { reg: '$z2' } + - { reg: '$z3' } + - { reg: '$z4' } + - { reg: '$z5' } + - { reg: '$z6' } + - { reg: '$z7' } +body: | + bb.0.entry: + liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + + ; CHECK-LABEL: name: zpr_predicate_spill__spill_zpr + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, + ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: + ; CHECK: liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; + ; CHECK-NEXT: $z16 = IMPLICIT_DEF + ; CHECK-NEXT: $z17 = IMPLICIT_DEF + ; CHECK-NEXT: $z18 = IMPLICIT_DEF + ; CHECK-NEXT: $z19 = IMPLICIT_DEF + ; CHECK-NEXT: $z20 = IMPLICIT_DEF + ; CHECK-NEXT: $z21 = IMPLICIT_DEF + ; CHECK-NEXT: $z22 = IMPLICIT_DEF + ; CHECK-NEXT: $z23 = IMPLICIT_DEF + ; CHECK-NEXT: $z24 = IMPLICIT_DEF + ; CHECK-NEXT: $z25 = IMPLICIT_DEF + ; CHECK-NEXT: $z26 = IMPLICIT_DEF + ; CHECK-NEXT: $z27 = IMPLICIT_DEF + ; CHECK-NEXT: $z28 = IMPLICIT_DEF + ; CHECK-NEXT: $z29 = IMPLICIT_DEF + ; CHECK-NEXT: $z30 = IMPLICIT_DEF + ; CHECK-NEXT: $z31 = IMPLICIT_DEF + ; + ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 :: (store (s128) into %stack.0) + ; + ; CHECK-NEXT: $p0 = IMPLICIT_DEF + ; CHECK-NEXT: $p1 = IMPLICIT_DEF + ; CHECK-NEXT: $p2 = IMPLICIT_DEF + ; CHECK-NEXT: $p3 = IMPLICIT_DEF + ; CHECK-NEXT: $p4 = IMPLICIT_DEF + ; CHECK-NEXT: $p5 = IMPLICIT_DEF + ; CHECK-NEXT: $p6 = IMPLICIT_DEF + ; CHECK-NEXT: $p7 = IMPLICIT_DEF + ; CHECK-NEXT: $p8 = IMPLICIT_DEF + ; CHECK-NEXT: $p9 = IMPLICIT_DEF + ; CHECK-NEXT: $p10 = IMPLICIT_DEF + ; CHECK-NEXT: $p11 = IMPLICIT_DEF + ; CHECK-NEXT: $p12 = IMPLICIT_DEF + ; CHECK-NEXT: $p13 = IMPLICIT_DEF + ; CHECK-NEXT: $p14 = IMPLICIT_DEF + ; CHECK-NEXT: $p15 = IMPLICIT_DEF + ; + ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) + ; + ; CHECK-NEXT: FAKE_USE implicit $z16, implicit $z17, implicit $z18, implicit $z19, implicit $z20, implicit $z21, implicit $z22, implicit $z23, implicit $z24, implicit $z25, implicit $z26, implicit $z27, implicit $z28, implicit $z29, implicit $z30, implicit $z31 + ; + ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7 + + ; EXPAND-LABEL: name: zpr_predicate_spill__spill_zpr + ; EXPAND: liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4, $z23, $z22, $z21, $z20, $z19, $z18, $z17, $z16 + ; EXPAND-NEXT: {{ $}} + ; + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 + ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.22) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -20, implicit $vg + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 0 :: (store (s128) into %stack.21) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 1 :: (store (s128) into %stack.20) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 2 :: (store (s128) into %stack.19) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 3 :: (store (s128) into %stack.18) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 4 :: (store (s128) into %stack.17) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 5 :: (store (s128) into %stack.16) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 6 :: (store (s128) into %stack.15) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 7 :: (store (s128) into %stack.14) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 8 :: (store (s128) into %stack.13) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 9 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 10 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 11 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z23, $sp, 12 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z22, $sp, 13 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z21, $sp, 14 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z20, $sp, 15 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z19, $sp, 16 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z18, $sp, 17 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z17, $sp, 18 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z16, $sp, 19 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg + ; + ; EXPAND-NEXT: $z16 = IMPLICIT_DEF + ; EXPAND-NEXT: $z17 = IMPLICIT_DEF + ; EXPAND-NEXT: $z18 = IMPLICIT_DEF + ; EXPAND-NEXT: $z19 = IMPLICIT_DEF + ; EXPAND-NEXT: $z20 = IMPLICIT_DEF + ; EXPAND-NEXT: $z21 = IMPLICIT_DEF + ; EXPAND-NEXT: $z22 = IMPLICIT_DEF + ; EXPAND-NEXT: $z23 = IMPLICIT_DEF + ; EXPAND-NEXT: $z24 = IMPLICIT_DEF + ; EXPAND-NEXT: $z25 = IMPLICIT_DEF + ; EXPAND-NEXT: $z26 = IMPLICIT_DEF + ; EXPAND-NEXT: $z27 = IMPLICIT_DEF + ; EXPAND-NEXT: $z28 = IMPLICIT_DEF + ; EXPAND-NEXT: $z29 = IMPLICIT_DEF + ; EXPAND-NEXT: $z30 = IMPLICIT_DEF + ; EXPAND-NEXT: $z31 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $x8 = ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.24) + ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $x8, 1 :: (store (s128) into %stack.0) + ; EXPAND-NEXT: $z0 = LDR_ZXI $x8, 0 :: (load (s128) from %stack.24) + ; + ; EXPAND-NEXT: $p0 = IMPLICIT_DEF + ; EXPAND-NEXT: $p1 = IMPLICIT_DEF + ; EXPAND-NEXT: $p2 = IMPLICIT_DEF + ; EXPAND-NEXT: $p3 = IMPLICIT_DEF + ; EXPAND-NEXT: $p4 = IMPLICIT_DEF + ; EXPAND-NEXT: $p5 = IMPLICIT_DEF + ; EXPAND-NEXT: $p6 = IMPLICIT_DEF + ; EXPAND-NEXT: $p7 = IMPLICIT_DEF + ; EXPAND-NEXT: $p8 = IMPLICIT_DEF + ; EXPAND-NEXT: $p9 = IMPLICIT_DEF + ; EXPAND-NEXT: $p10 = IMPLICIT_DEF + ; EXPAND-NEXT: $p11 = IMPLICIT_DEF + ; EXPAND-NEXT: $p12 = IMPLICIT_DEF + ; EXPAND-NEXT: $p13 = IMPLICIT_DEF + ; EXPAND-NEXT: $p14 = IMPLICIT_DEF + ; EXPAND-NEXT: $p15 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.24) + ; EXPAND-NEXT: $z0 = LDR_ZXI $x8, 1 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.24) + ; + ; EXPAND-NEXT: FAKE_USE implicit $z16, implicit $z17, implicit $z18, implicit $z19, implicit $z20, implicit $z21, implicit $z22, implicit $z23, implicit $z24, implicit $z25, implicit $z26, implicit $z27, implicit $z28, implicit $z29, implicit $z30, implicit $z31 + ; + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg + ; EXPAND-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 12 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 13 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $z21 = frame-destroy LDR_ZXI $sp, 14 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $z20 = frame-destroy LDR_ZXI $sp, 15 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $z19 = frame-destroy LDR_ZXI $sp, 16 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $z18 = frame-destroy LDR_ZXI $sp, 17 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $z17 = frame-destroy LDR_ZXI $sp, 18 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $z16 = frame-destroy LDR_ZXI $sp, 19 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.21) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.20) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.19) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.18) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.17) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.16) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.15) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.14) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.13) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 20, implicit $vg + ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.22) + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 + ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7 + $z16 = IMPLICIT_DEF + $z17 = IMPLICIT_DEF + $z18 = IMPLICIT_DEF + $z19 = IMPLICIT_DEF + $z20 = IMPLICIT_DEF + $z21 = IMPLICIT_DEF + $z22 = IMPLICIT_DEF + $z23 = IMPLICIT_DEF + $z24 = IMPLICIT_DEF + $z25 = IMPLICIT_DEF + $z26 = IMPLICIT_DEF + $z27 = IMPLICIT_DEF + $z28 = IMPLICIT_DEF + $z29 = IMPLICIT_DEF + $z30 = IMPLICIT_DEF + $z31 = IMPLICIT_DEF + + %1:ppr = COPY $p0 + + $p0 = IMPLICIT_DEF + $p1 = IMPLICIT_DEF + $p2 = IMPLICIT_DEF + $p3 = IMPLICIT_DEF + $p4 = IMPLICIT_DEF + $p5 = IMPLICIT_DEF + $p6 = IMPLICIT_DEF + $p7 = IMPLICIT_DEF + $p8 = IMPLICIT_DEF + $p9 = IMPLICIT_DEF + $p10 = IMPLICIT_DEF + $p11 = IMPLICIT_DEF + $p12 = IMPLICIT_DEF + $p13 = IMPLICIT_DEF + $p14 = IMPLICIT_DEF + $p15 = IMPLICIT_DEF + + $p0 = COPY %1 + + FAKE_USE implicit $z16, implicit $z17, implicit $z18, implicit $z19, implicit $z20, implicit $z21, implicit $z22, implicit $z23, implicit $z24, implicit $z25, implicit $z26, implicit $z27, implicit $z28, implicit $z29, implicit $z30, implicit $z31 + + RET_ReallyLR implicit $p0, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7 +... +--- +name: zpr_predicate_spill_above_p7 +tracksRegLiveness: true +stack: +liveins: + - { reg: '$p0' } + - { reg: '$p1' } + - { reg: '$p2' } + - { reg: '$p3' } +body: | + bb.0.entry: + liveins: $p0, $p1, $p2, $p3 + + ; CHECK-LABEL: name: zpr_predicate_spill_above_p7 + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, + ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: + ; CHECK: liveins: $p0, $p1, $p2, $p3 + ; CHECK-NEXT: {{ $}} + ; + ; CHECK-NEXT: $p15 = IMPLICIT_DEF + ; + ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p15, %stack.0, 0 :: (store (s128) into %stack.0) + ; + ; CHECK-NEXT: $p0 = IMPLICIT_DEF + ; CHECK-NEXT: $p1 = IMPLICIT_DEF + ; CHECK-NEXT: $p2 = IMPLICIT_DEF + ; CHECK-NEXT: $p3 = IMPLICIT_DEF + ; CHECK-NEXT: $p4 = IMPLICIT_DEF + ; CHECK-NEXT: $p5 = IMPLICIT_DEF + ; CHECK-NEXT: $p6 = IMPLICIT_DEF + ; CHECK-NEXT: $p7 = IMPLICIT_DEF + ; CHECK-NEXT: $p8 = IMPLICIT_DEF + ; CHECK-NEXT: $p9 = IMPLICIT_DEF + ; CHECK-NEXT: $p10 = IMPLICIT_DEF + ; CHECK-NEXT: $p11 = IMPLICIT_DEF + ; CHECK-NEXT: $p12 = IMPLICIT_DEF + ; CHECK-NEXT: $p13 = IMPLICIT_DEF + ; CHECK-NEXT: $p14 = IMPLICIT_DEF + ; CHECK-NEXT: $p15 = IMPLICIT_DEF + ; + ; CHECK-NEXT: $p15 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) + ; + ; CHECK-NEXT: FAKE_USE implicit $p4, implicit $p5, implicit $p6, implicit $p7 + ; + ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3 + + ; EXPAND-LABEL: name: zpr_predicate_spill_above_p7 + ; EXPAND: liveins: $p0, $p1, $p2, $p3, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4 + ; EXPAND-NEXT: {{ $}} + ; + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 + ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.14) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.13) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg + ; + ; EXPAND-NEXT: $p15 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p15, 1, 0 + ; EXPAND-NEXT: $x8 = ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $x8, 1 :: (store (s128) into %stack.0) + ; + ; EXPAND-NEXT: $p0 = IMPLICIT_DEF + ; EXPAND-NEXT: $p1 = IMPLICIT_DEF + ; EXPAND-NEXT: $p2 = IMPLICIT_DEF + ; EXPAND-NEXT: $p3 = IMPLICIT_DEF + ; EXPAND-NEXT: $p4 = IMPLICIT_DEF + ; EXPAND-NEXT: $p5 = IMPLICIT_DEF + ; EXPAND-NEXT: $p6 = IMPLICIT_DEF + ; EXPAND-NEXT: $p7 = IMPLICIT_DEF + ; EXPAND-NEXT: $p8 = IMPLICIT_DEF + ; EXPAND-NEXT: $p9 = IMPLICIT_DEF + ; EXPAND-NEXT: $p10 = IMPLICIT_DEF + ; EXPAND-NEXT: $p11 = IMPLICIT_DEF + ; EXPAND-NEXT: $p12 = IMPLICIT_DEF + ; EXPAND-NEXT: $p13 = IMPLICIT_DEF + ; EXPAND-NEXT: $p14 = IMPLICIT_DEF + ; EXPAND-NEXT: $p15 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.16) + ; EXPAND-NEXT: $z0 = LDR_ZXI $x8, 1 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p15 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.16) + ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; + ; EXPAND-NEXT: FAKE_USE implicit $p4, implicit $p5, implicit $p6, implicit $p7 + ; + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg + ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14) + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 + ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $p1, implicit $p2, implicit $p3 + $p15 = IMPLICIT_DEF + %1:ppr = COPY $p15 + + $p0 = IMPLICIT_DEF + $p1 = IMPLICIT_DEF + $p2 = IMPLICIT_DEF + $p3 = IMPLICIT_DEF + $p4 = IMPLICIT_DEF + $p5 = IMPLICIT_DEF + $p6 = IMPLICIT_DEF + $p7 = IMPLICIT_DEF + $p8 = IMPLICIT_DEF + $p9 = IMPLICIT_DEF + $p10 = IMPLICIT_DEF + $p11 = IMPLICIT_DEF + $p12 = IMPLICIT_DEF + $p13 = IMPLICIT_DEF + $p14 = IMPLICIT_DEF + $p15 = IMPLICIT_DEF + + $p15 = COPY %1 + + FAKE_USE implicit $p4, implicit $p5, implicit $p6, implicit $p7 + + RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3 +... +--- +name: zpr_predicate_spill_p4_saved +tracksRegLiveness: true +stack: +liveins: + - { reg: '$p0' } + - { reg: '$p1' } + - { reg: '$p2' } + - { reg: '$p3' } +body: | + bb.0.entry: + liveins: $p0, $p1, $p2, $p3 + + ; CHECK-LABEL: name: zpr_predicate_spill_p4_saved + ; CHECK: liveins: $p0, $p1, $p2, $p3 + ; CHECK-NEXT: {{ $}} + ; + ; CHECK-NEXT: $p8 = IMPLICIT_DEF + ; + ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3 + + ; EXPAND-LABEL: name: zpr_predicate_spill_p4_saved + ; EXPAND: liveins: $p0, $p1, $p2, $p3, $fp, $p8, $p4 + ; EXPAND-NEXT: {{ $}} + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 + ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.3) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.1) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 + ; + ; EXPAND-NEXT: $p8 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.1) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg + ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.3) + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 + ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $p1, implicit $p2, implicit $p3 + + ; If we spill a register above p8, p4 must also be saved, so we can guarantee + ; they will be a register (in the range p0-p7 to for the cmpne reload). + $p8 = IMPLICIT_DEF + + RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3 +... diff --git a/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll b/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll index 0b6bf3892a0c2..c67d91952c618 100644 --- a/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll +++ b/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-stack-hazard-remark-size=64 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-stack-hazard-size=1024 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-PADDING +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-enable-zpr-predicate-spills -aarch64-stack-hazard-remark-size=64 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-ZPR-PRED-SPILLS +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-enable-zpr-predicate-spills -aarch64-stack-hazard-size=1024 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-ZPR-PRED-SPILLS-WITH-PADDING ; Don't emit remarks for non-streaming functions. define float @csr_x20_stackargs_notsc(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) { @@ -66,13 +68,18 @@ entry: } ; SVE calling conventions -; Predicate register spills end up in FP region, currently. +; Predicate register spills end up in FP region, currently. This can be +; mitigated with the -aarch64-enable-zpr-predicate-spills option. define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, i16 %P4) #2 { ; CHECK: remark: :0:0: stack hazard in 'svecc_call': PPR stack object at [SP-48-258 * vscale] is too close to FPR stack object at [SP-48-256 * vscale] ; CHECK: remark: :0:0: stack hazard in 'svecc_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48] ; CHECK-PADDING: remark: :0:0: stack hazard in 'svecc_call': PPR stack object at [SP-1072-258 * vscale] is too close to FPR stack object at [SP-1072-256 * vscale] ; CHECK-PADDING-NOT: remark: :0:0: stack hazard in 'svecc_call': +; CHECK-ZPR-PRED-SPILLS-NOT: :0:0: stack hazard in 'svecc_call': PPR stack object at {{.*}} is too close to FPR stack object +; CHECK-ZPR-PRED-SPILLS: :0:0: stack hazard in 'svecc_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48] +; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: :0:0: stack hazard in 'svecc_call': PPR stack object at {{.*}} is too close to FPR stack object +; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: :0:0: stack hazard in 'svecc_call': FPR stack object at {{.*}} is too close to GPR stack object entry: tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 %call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37) @@ -84,6 +91,10 @@ define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, :0:0: stack hazard in 'svecc_alloca_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48] ; CHECK-PADDING: remark: :0:0: stack hazard in 'svecc_alloca_call': PPR stack object at [SP-1072-258 * vscale] is too close to FPR stack object at [SP-1072-256 * vscale] ; CHECK-PADDING-NOT: remark: :0:0: stack hazard in 'svecc_alloca_call': +; CHECK-ZPR-PRED-SPILLS-NOT: :0:0: stack hazard in 'svecc_call': PPR stack object at {{.*}} is too close to FPR stack object +; CHECK-ZPR-PRED-SPILLS: :0:0: stack hazard in 'svecc_alloca_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48] +; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: :0:0: stack hazard in 'svecc_alloca_call': PPR stack object at {{.*}} is too close to FPR stack object +; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: :0:0: stack hazard in 'svecc_alloca_call': FPR stack object at {{.*}} is too close to GPR stack object entry: tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 %0 = alloca [37 x i8], align 16 diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 3db3ae65cc555..49362ff5ef655 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -2082,6 +2082,7 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "\n#ifdef GET_SUBTARGETINFO_TARGET_DESC\n"; OS << "#undef GET_SUBTARGETINFO_TARGET_DESC\n\n"; + OS << "#include \"llvm/ADT/BitmaskEnum.h\"\n"; OS << "#include \"llvm/Support/Debug.h\"\n"; OS << "#include \"llvm/Support/raw_ostream.h\"\n\n"; if (Target == "AArch64") @@ -2113,7 +2114,26 @@ void SubtargetEmitter::run(raw_ostream &OS) { << " unsigned CPUID) const override;\n" << " DFAPacketizer *createDFAPacketizer(const InstrItineraryData *IID)" << " const;\n"; - if (TGT.getHwModes().getNumModeIds() > 1) { + + const CodeGenHwModes &CGH = TGT.getHwModes(); + if (CGH.getNumModeIds() > 1) { + OS << " enum class " << Target << "HwModeBits : unsigned {\n"; + for (unsigned M = 0, NumModes = CGH.getNumModeIds(); M != NumModes; ++M) { + StringRef ModeName = CGH.getModeName(M, /*IncludeDefault=*/true); + OS << " " << ModeName << " = "; + if (M == 0) + OS << "0"; + else + OS << "(1 << " << (M - 1) << ")"; + OS << ",\n"; + if (M == NumModes - 1) { + OS << "\n"; + OS << " LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/" << ModeName + << "),\n"; + } + } + OS << " };\n"; + OS << " unsigned getHwModeSet() const override;\n"; OS << " unsigned getHwMode(enum HwModeType type = HwMode_Default) const " "override;\n";