diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 9b62d6067be39..3a30d18ec5b32 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -20,6 +20,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/Register.h" #include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/IR/InstrTypes.h" @@ -299,6 +300,10 @@ class CombinerHelper { /// $whatever = COPY $addr bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0); + bool matchICmp(const MachineInstr &MI, BuildFnTy &MatchInfo); + /// Try hard to fold icmp with zero RHS because this is a common case. + bool matchCmpOfZero(const MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo); void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo); @@ -1017,6 +1022,9 @@ class CombinerHelper { bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo); bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const; + + bool constantFoldICmp(const GICmp &ICmp, const GIConstant &LHS, + const GIConstant &RHS, BuildFnTy &MatchInfo); }; } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index ef1171d9f1f64..427b5a86b6e0c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -950,6 +950,30 @@ class GExtOrTruncOp : public GCastOp { }; }; +/// Represents a splat vector. +class GSplatVector : public GenericMachineInstr { +public: + Register getValueReg() const { return getOperand(1).getReg(); } + + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_SPLAT_VECTOR; + }; +}; + +/// Represents an integer-like extending operation. +class GZextOrSextOp : public GCastOp { +public: + static bool classof(const MachineInstr *MI) { + switch (MI->getOpcode()) { + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + return true; + default: + return false; + } + }; +}; + } // namespace llvm #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index cf5fd6d6f288b..a8bf2e722881a 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -593,5 +593,31 @@ bool isGuaranteedNotToBeUndef(Register Reg, const MachineRegisterInfo &MRI, /// estimate of the type. Type *getTypeForLLT(LLT Ty, LLVMContext &C); +enum class GIConstantKind { Scalar, FixedVector, ScalableVector }; + +/// An integer-like constant. +class GIConstant { + GIConstantKind Kind; + SmallVector Values; + APInt Value; + +public: + GIConstant(ArrayRef Values) + : Kind(GIConstantKind::FixedVector), Values(Values) {}; + GIConstant(const APInt &Value, GIConstantKind Kind) + : Kind(Kind), Value(Value) {}; + + GIConstantKind getKind() const { return Kind; } + + APInt getScalarValue() const; + + static std::optional getConstant(Register Const, + const MachineRegisterInfo &MRI); +}; + +/// Return true if the given value is known to be non-zero when defined. +bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, + GISelKnownBits *KB, unsigned Depth = 0); + } // End namespace llvm. #endif diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 525cc815e73ce..8c0585e4f70ad 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1007,9 +1007,6 @@ def double_icmp_zero_or_combine: GICombineRule< (G_ICMP $root, $p, $ordst, 0)) >; -def double_icmp_zero_and_or_combine : GICombineGroup<[double_icmp_zero_and_combine, - double_icmp_zero_or_combine]>; - def and_or_disjoint_mask : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), (match (wip_match_opcode G_AND):$root, @@ -1884,6 +1881,28 @@ def cast_combines: GICombineGroup<[ buildvector_of_truncate ]>; +def prepare_icmp : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_ICMP $root, $pred, $lhs, $rhs):$cmp, + [{ return Helper.matchICmp(*${cmp}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>; + +def icmp_of_zero : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_CONSTANT $zero, 0), + (G_ICMP $root, $pred, $lhs, $zero):$cmp, + [{ return Helper.matchCmpOfZero(*${cmp}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>; + +def icmp_combines: GICombineGroup<[ + prepare_icmp, + icmp_of_zero, + icmp_to_true_false_known_bits, + icmp_to_lhs_known_bits, + double_icmp_zero_and_combine, + double_icmp_zero_or_combine, + redundant_binop_in_equality +]>; // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, @@ -1917,7 +1936,7 @@ def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p, def known_bits_simplifications : GICombineGroup<[ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask, - zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits, + zext_trunc_fold, sext_inreg_to_zext_inreg]>; def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend, @@ -1944,8 +1963,8 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop, def prefer_sign_combines : GICombineGroup<[nneg_zext]>; -def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, - vector_ops_combines, freeze_combines, cast_combines, +def all_combines : GICombineGroup<[icmp_combines, integer_reassoc_combines, + trivial_combines, vector_ops_combines, freeze_combines, cast_combines, insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload, combine_extracted_vector_load, undef_combines, identity_combines, phi_combines, @@ -1964,9 +1983,9 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, constant_fold_cast_op, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, and_or_disjoint_mask, fma_combines, fold_binop_into_select, - sub_add_reg, select_to_minmax, redundant_binop_in_equality, + sub_add_reg, select_to_minmax, fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, - combine_concat_vector, double_icmp_zero_and_or_combine, match_addos, + combine_concat_vector, match_addos, sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat]>; // A combine group used to for prelegalizer combiners at -O0. The combines in diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt index a15b76440364b..af1717dbf76f3 100644 --- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt +++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_component_library(LLVMGlobalISel Combiner.cpp CombinerHelper.cpp CombinerHelperCasts.cpp + CombinerHelperCompares.cpp CombinerHelperVectorOps.cpp GIMatchTableExecutor.cpp GISelChangeObserver.cpp diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp new file mode 100644 index 0000000000000..0e88525c47171 --- /dev/null +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp @@ -0,0 +1,167 @@ +//===- CombinerHelperCompares.cpp------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements CombinerHelper for G_ICMP. +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/LowLevelTypeUtils.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include + +#define DEBUG_TYPE "gi-combiner" + +using namespace llvm; + +bool CombinerHelper::constantFoldICmp(const GICmp &ICmp, + const GIConstant &LHSCst, + const GIConstant &RHSCst, + BuildFnTy &MatchInfo) { + if (LHSCst.getKind() != GIConstantKind::Scalar) + return false; + + Register Dst = ICmp.getReg(0); + LLT DstTy = MRI.getType(Dst); + + if (!isConstantLegalOrBeforeLegalizer(DstTy)) + return false; + + CmpInst::Predicate Pred = ICmp.getCond(); + APInt LHS = LHSCst.getScalarValue(); + APInt RHS = RHSCst.getScalarValue(); + + bool Result = ICmpInst::compare(LHS, RHS, Pred); + + MatchInfo = [=](MachineIRBuilder &B) { + if (Result) + B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(), + /*IsVector=*/DstTy.isVector(), + /*IsFP=*/false)); + else + B.buildConstant(Dst, 0); + }; + + return true; +} + +bool CombinerHelper::matchICmp(const MachineInstr &MI, BuildFnTy &MatchInfo) { + const GICmp *Cmp = cast(&MI); + + Register Dst = Cmp->getReg(0); + Register LHS = Cmp->getLHSReg(); + Register RHS = Cmp->getRHSReg(); + + CmpInst::Predicate Pred = Cmp->getCond(); + assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); + if (auto CLHS = GIConstant::getConstant(LHS, MRI)) { + if (auto CRHS = GIConstant::getConstant(RHS, MRI)) + return constantFoldICmp(*Cmp, *CLHS, *CRHS, MatchInfo); + + // If we have a constant, make sure it is on the RHS. + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + + MatchInfo = [=](MachineIRBuilder &B) { B.buildICmp(Pred, Dst, LHS, RHS); }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCmpOfZero(const MachineInstr &MI, + BuildFnTy &MatchInfo) { + const GICmp *Cmp = cast(&MI); + + Register Dst = Cmp->getReg(0); + LLT DstTy = MRI.getType(Dst); + Register LHS = Cmp->getLHSReg(); + CmpInst::Predicate Pred = Cmp->getCond(); + + if (!isConstantLegalOrBeforeLegalizer(DstTy)) + return false; + + std::optional Result; + + switch (Pred) { + default: + llvm_unreachable("Unkonwn ICmp predicate!"); + case ICmpInst::ICMP_ULT: + Result = false; + break; + case ICmpInst::ICMP_UGE: + Result = true; + break; + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_ULE: + if (isKnownNonZero(LHS, MRI, KB)) + Result = false; + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGT: + if (isKnownNonZero(LHS, MRI, KB)) + Result = true; + break; + case ICmpInst::ICMP_SLT: { + KnownBits LHSKnown = KB->getKnownBits(LHS); + if (LHSKnown.isNegative()) + Result = true; + if (LHSKnown.isNonNegative()) + Result = false; + break; + } + case ICmpInst::ICMP_SLE: { + KnownBits LHSKnown = KB->getKnownBits(LHS); + if (LHSKnown.isNegative()) + Result = true; + if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, MRI, KB)) + Result = false; + break; + } + case ICmpInst::ICMP_SGE: { + KnownBits LHSKnown = KB->getKnownBits(LHS); + if (LHSKnown.isNegative()) + Result = false; + if (LHSKnown.isNonNegative()) + Result = true; + break; + } + case ICmpInst::ICMP_SGT: { + KnownBits LHSKnown = KB->getKnownBits(LHS); + if (LHSKnown.isNegative()) + Result = false; + if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, MRI, KB)) + Result = true; + break; + } + } + + if (!Result) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + if (*Result) + B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(), + /*IsVector=*/DstTy.isVector(), + /*IsFP=*/false)); + else + B.buildConstant(Dst, 0); + }; + + return true; +} diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 1713a582d5cfe..6f7bc97b55dc8 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -1968,3 +1968,311 @@ Type *llvm::getTypeForLLT(LLT Ty, LLVMContext &C) { Ty.getElementCount()); return IntegerType::get(C, Ty.getSizeInBits()); } + +APInt llvm::GIConstant::getScalarValue() const { + assert(Kind == GIConstantKind::Scalar && "Expected scalar constant"); + + return Value; +} + +std::optional +llvm::GIConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) { + MachineInstr *Constant = getDefIgnoringCopies(Const, MRI); + + if (GSplatVector *Splat = dyn_cast(Constant)) { + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(Splat->getValueReg(), MRI); + if (!MayBeConstant) + return std::nullopt; + return GIConstant(MayBeConstant->Value, GIConstantKind::ScalableVector); + } + + if (GBuildVector *Build = dyn_cast(Constant)) { + SmallVector Values; + unsigned NumSources = Build->getNumSources(); + for (unsigned I = 0; I < NumSources; ++I) { + Register SrcReg = Build->getSourceReg(I); + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(SrcReg, MRI); + if (!MayBeConstant) + return std::nullopt; + Values.push_back(MayBeConstant->Value); + } + return GIConstant(Values); + } + + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(Const, MRI); + if (!MayBeConstant) + return std::nullopt; + + return GIConstant(MayBeConstant->Value, GIConstantKind::Scalar); +} + +static bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, + GISelKnownBits *KB, unsigned Depth); + +bool llvm::isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, + GISelKnownBits *KB, unsigned Depth) { + if (!Reg.isVirtual()) + return false; + + LLT Ty = MRI.getType(Reg); + if (!Ty.isValid()) + return false; + + if (Ty.isPointer()) + return false; + + assert(Ty.isScalar() && "Expected a scalar value"); + return ::isKnownNonZero(Reg, MRI, KB, Depth); +} + +static bool matchOpWithOpEqZero(Register Op0, Register Op1, + const MachineRegisterInfo &MRI) { + auto MatchIt = [&MRI](const Register Reg0, const Register Reg1) { + MachineInstr *MI = MRI.getVRegDef(Reg0); + if (GZextOrSextOp *ZS = dyn_cast(MI)) { + MachineInstr *SrcMI = MRI.getVRegDef(ZS->getSrcReg()); + if (GICmp *Cmp = dyn_cast(SrcMI)) { + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(Cmp->getRHSReg(), MRI); + return MayBeConstant && (MayBeConstant->Value == 0) && + (Cmp->getLHSReg() == Reg1) && + (Cmp->getCond() == ICmpInst::ICMP_EQ); + } + } + return false; + }; + + return MatchIt(Op0, Op1) || MatchIt(Op1, Op0); +} + +static bool isNonZeroAdd(const GBinOp &Add, const MachineRegisterInfo &MRI, + GISelKnownBits *KB, unsigned Depth, + unsigned BitWidth) { + bool NSW = Add.getFlag(MachineInstr::MIFlag::NoSWrap); + bool NUW = Add.getFlag(MachineInstr::MIFlag::NoUWrap); + Register LHS = Add.getLHSReg(); + Register RHS = Add.getRHSReg(); + + // (X + (X != 0)) is non zero. + if (matchOpWithOpEqZero(LHS, RHS, MRI)) + return true; + + if (NUW) + return ::isKnownNonZero(RHS, MRI, KB, Depth) || + ::isKnownNonZero(LHS, MRI, KB, Depth); + + KnownBits LHSKnown = KB->getKnownBits(LHS); + KnownBits RHSKnown = KB->getKnownBits(RHS); + + // If LHS and RHS are both non-negative (as signed values) then their sum is + // not zero unless both LHS and RHS are zero. + if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) + if (::isKnownNonZero(LHS, MRI, KB, Depth) || + ::isKnownNonZero(RHS, MRI, KB, Depth)) + return true; + + // If LHS and RHS are both negative (as signed values) then their sum is not + // zero unless both LHS and RHS equal INT_MIN. + if (LHSKnown.isNegative() && RHSKnown.isNegative()) { + APInt Mask = APInt::getSignedMaxValue(BitWidth); + // The sign bit of LHS is set. If some other bit is set then LHS is not + // equal to INT_MIN. + if (LHSKnown.One.intersects(Mask)) + return true; + // The sign bit of RHS is set. If some other bit is set then RHS is not + // equal to INT_MIN. + if (RHSKnown.One.intersects(Mask)) + return true; + } + + // The sum of a non-negative number and a power of two is not zero. + if (LHSKnown.isNonNegative() && ::isKnownToBeAPowerOfTwo(RHS, MRI, KB)) + return true; + if (RHSKnown.isNonNegative() && ::isKnownToBeAPowerOfTwo(LHS, MRI, KB)) + return true; + + return KnownBits::add(LHSKnown, RHSKnown, NSW, NUW).isNonZero(); +} + +static bool isKnownNonZeroBinOp(const GBinOp &BinOp, + const MachineRegisterInfo &MRI, + GISelKnownBits *KB, unsigned Depth) { + unsigned BitWidth = MRI.getType(BinOp.getReg(0)).getScalarSizeInBits(); + switch (BinOp.getOpcode()) { + case TargetOpcode::G_XOR: + // (X ^ (X != 0)) is non zero. + if (matchOpWithOpEqZero(BinOp.getLHSReg(), BinOp.getRHSReg(), MRI)) + return true; + break; + case TargetOpcode::G_OR: { + // (X | (X != 0)) is non zero. + if (matchOpWithOpEqZero(BinOp.getLHSReg(), BinOp.getRHSReg(), MRI)) + return true; + // X | Y != 0 if X != 0 or Y != 0. + return ::isKnownNonZero(BinOp.getRHSReg(), MRI, KB, Depth) || + ::isKnownNonZero(BinOp.getLHSReg(), MRI, KB, Depth); + } + case TargetOpcode::G_ADD: { + // X + Y. + + // If Add has nuw wrap flag, then if either X or Y is non-zero the result is + // non-zero. + return isNonZeroAdd(BinOp, MRI, KB, Depth, BitWidth); + } + default: + return false; + } + + return false; +} + +static bool isKnownNonZeroCastOp(const GCastOp &CastOp, + const MachineRegisterInfo &MRI, + GISelKnownBits *KB, unsigned Depth) { + switch (CastOp.getOpcode()) { + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + // ext X != 0 if X != 0. + return ::isKnownNonZero(CastOp.getSrcReg(), MRI, KB, Depth); + case Instruction::Trunc: + // nuw/nsw trunc preserves zero/non-zero status of input. + if (CastOp.getFlag(MachineInstr::MIFlag::NoSWrap) || + CastOp.getFlag(MachineInstr::MIFlag::NoUWrap)) + return ::isKnownNonZero(CastOp.getSrcReg(), MRI, KB, Depth); + break; + default: + return false; + } + + return false; +} + +static bool isNonZeroShift(const MachineInstr *MI, + const MachineRegisterInfo &MRI, GISelKnownBits *KB, + unsigned Depth, const KnownBits &KnownVal) { + auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) { + switch (MI->getOpcode()) { + case TargetOpcode::G_SHL: + return Lhs.shl(Rhs); + case TargetOpcode::G_LSHR: + return Lhs.lshr(Rhs); + case TargetOpcode::G_ASHR: + return Lhs.ashr(Rhs); + default: + llvm_unreachable("Unknown Shift Opcode"); + } + }; + + auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) { + switch (MI->getOpcode()) { + case TargetOpcode::G_SHL: + return Lhs.lshr(Rhs); + case TargetOpcode::G_LSHR: + case TargetOpcode::G_ASHR: + return Lhs.shl(Rhs); + default: + llvm_unreachable("Unknown Shift Opcode"); + } + }; + + if (KnownVal.isUnknown()) + return false; + + KnownBits KnownCnt = KB->getKnownBits(MI->getOperand(2).getReg()); + APInt MaxShift = KnownCnt.getMaxValue(); + unsigned NumBits = KnownVal.getBitWidth(); + if (MaxShift.uge(NumBits)) + return false; + + if (!ShiftOp(KnownVal.One, MaxShift).isZero()) + return true; + + // If all of the bits shifted out are known to be zero, and Val is known + // non-zero then at least one non-zero bit must remain. + if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift) + .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) && + ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth)) + return true; + + return false; +} + +bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, + GISelKnownBits *KB, unsigned Depth) { + if (!Reg.isVirtual()) + return false; + + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(Reg, MRI); + + if (MayBeConstant) + return MayBeConstant->Value != 0; + + // Some of the tests below are recursive, so bail out if we hit the limit. + if (Depth++ >= MaxAnalysisRecursionDepth) + return false; + + MachineInstr *MI = getDefIgnoringCopies(Reg, MRI); + + if (GBinOp *BinOp = dyn_cast(MI)) + return isKnownNonZeroBinOp(*BinOp, MRI, KB, Depth); + + if (GCastOp *CastOp = dyn_cast(MI)) + return isKnownNonZeroCastOp(*CastOp, MRI, KB, Depth); + + switch (MI->getOpcode()) { + case TargetOpcode::G_SHL: { + // shl nsw/nuw can't remove any non-zero bits. + if (MI->getFlag(MachineInstr::MIFlag::NoUWrap) || + MI->getFlag(MachineInstr::MIFlag::NoSWrap)) + return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth); + + // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined + // if the lowest bit is shifted off the end. + KnownBits Known = KB->getKnownBits(MI->getOperand(1).getReg()); + if (Known.One[0]) + return true; + + return isNonZeroShift(MI, MRI, KB, Depth, Known); + } + case TargetOpcode::G_LSHR: + case TargetOpcode::G_ASHR: { + // shr exact can only shift out zero bits. + if (MI->getFlag(MachineInstr::MIFlag::IsExact)) + return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth); + + // shr X, Y != 0 if X is negative. Note that the value of the shift is not + // defined if the sign bit is shifted off the end. + KnownBits Known = KB->getKnownBits(MI->getOperand(1).getReg()); + if (Known.isNegative()) + return true; + + return isNonZeroShift(MI, MRI, KB, Depth, Known); + } + case TargetOpcode::G_FREEZE: + return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth) && + ::isGuaranteedNotToBePoison(MI->getOperand(1).getReg(), MRI, Depth); + case TargetOpcode::G_SMIN: { + // If either arg is negative the result is non-zero. Otherwise + // the result is non-zero if both ops are non-zero. + KnownBits Op1Known = KB->getKnownBits(MI->getOperand(2).getReg()); + if (Op1Known.isNegative()) + return true; + KnownBits Op0Known = KB->getKnownBits(MI->getOperand(1).getReg()); + if (Op0Known.isNegative()) + return true; + + if (Op1Known.isNonZero() && Op0Known.isNonZero()) + return true; + } + [[fallthrough]]; + case TargetOpcode::G_UMIN: + return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth) && + ::isKnownNonZero(MI->getOperand(2).getReg(), MRI, KB, Depth); + default: + return false; + } +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir new file mode 100644 index 0000000000000..d454e60b034ea --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir @@ -0,0 +1,181 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK + +--- +name: test_icmp_of_eq_and_right_undef +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_of_eq_and_right_undef + ; CHECK: %lhs:_(s64) = COPY $x0 + ; CHECK-NEXT: %rhs:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ne), %lhs(s64), %rhs + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %rhs:_(s64) = G_IMPLICIT_DEF + %res:_(s32) = G_ICMP intpred(ne), %lhs(s64), %rhs + $w0 = COPY %res(s32) +... +--- +name: test_icmp_of_not_eq_and_right_undef +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_of_not_eq_and_right_undef + ; CHECK: %lhs:_(s64) = COPY $x0 + ; CHECK-NEXT: %rhs:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %rhs + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %rhs:_(s64) = G_IMPLICIT_DEF + %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %rhs + $w0 = COPY %res(s32) +... +--- +name: test_icmp_of_is_eq_and_right_undef +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_of_is_eq_and_right_undef + ; CHECK: %lhs:_(s64) = COPY $x0 + ; CHECK-NEXT: %rhs:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ne), %lhs(s64), %rhs + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %rhs:_(s64) = G_IMPLICIT_DEF + %res:_(s32) = G_ICMP intpred(ne), %lhs(s64), %rhs + $w0 = COPY %res(s32) +... +--- +name: test_icmp_of_eq_not_eq +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_of_eq_not_eq + ; CHECK: %lhs:_(s64) = COPY $x0 + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %lhs + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %rhs:_(s64) = COPY $x0 + %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %lhs + $w0 = COPY %res(s32) +... +--- +name: test_icmp_of_eq_is_eq +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_of_eq_is_eq + ; CHECK: %lhs:_(s64) = COPY $x0 + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(eq), %lhs(s64), %lhs + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %rhs:_(s64) = COPY $x0 + %res:_(s32) = G_ICMP intpred(eq), %lhs(s64), %lhs + $w0 = COPY %res(s32) +... +--- +name: test_icmp_of_zext_and_zext +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_of_zext_and_zext + ; CHECK: %lhs1:_(s32) = COPY $w0 + ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0 + ; CHECK-NEXT: %lhs:_(s64) = G_ZEXT %lhs1(s32) + ; CHECK-NEXT: %rhs:_(s64) = G_ZEXT %rhs1(s32) + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(sgt), %lhs(s64), %rhs + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs1:_(s32) = COPY $w0 + %rhs1:_(s32) = COPY $w0 + %lhs:_(s64) = G_ZEXT %lhs1 + %rhs:_(s64) = G_ZEXT %rhs1 + %res:_(s32) = G_ICMP intpred(sgt), %lhs(s64), %rhs + $w0 = COPY %res(s32) +... +--- +name: test_icmp_of_sext_and_sext +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_of_sext_and_sext + ; CHECK: %lhs1:_(s32) = COPY $w0 + ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0 + ; CHECK-NEXT: %lhs:_(s64) = G_SEXT %lhs1(s32) + ; CHECK-NEXT: %rhs:_(s64) = G_SEXT %rhs1(s32) + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(sgt), %lhs(s64), %rhs + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs1:_(s32) = COPY $w0 + %rhs1:_(s32) = COPY $w0 + %lhs:_(s64) = G_SEXT %lhs1 + %rhs:_(s64) = G_SEXT %rhs1 + %res:_(s32) = G_ICMP intpred(sgt), %lhs(s64), %rhs + $w0 = COPY %res(s32) +... +--- +name: test_ugt_icmp_of_sext_and_sext +body: | + bb.1: + ; CHECK-LABEL: name: test_ugt_icmp_of_sext_and_sext + ; CHECK: %lhs1:_(s32) = COPY $w0 + ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0 + ; CHECK-NEXT: %lhs:_(s64) = G_SEXT %lhs1(s32) + ; CHECK-NEXT: %rhs:_(s64) = G_SEXT %rhs1(s32) + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %rhs + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs1:_(s32) = COPY $w0 + %rhs1:_(s32) = COPY $w0 + %lhs:_(s64) = G_SEXT %lhs1 + %rhs:_(s64) = G_SEXT %rhs1 + %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %rhs + $w0 = COPY %res(s32) +... +--- +name: test_uge_icmp_of_zero +body: | + bb.1: + ; CHECK-LABEL: name: test_uge_icmp_of_zero + ; CHECK: %res:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %zero:_(s64) = G_CONSTANT i64 0 + %res:_(s32) = G_ICMP intpred(uge), %lhs(s64), %zero + $w0 = COPY %res(s32) +... +--- +name: test_slt_icmp_of_zero +body: | + bb.1: + ; CHECK-LABEL: name: test_slt_icmp_of_zero + ; CHECK: %lhs:_(s64) = COPY $x0 + ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %zero + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %zero:_(s64) = G_CONSTANT i64 0 + %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %zero + $w0 = COPY %res(s32) +... +--- +name: test_ugt_icmp_of_zero_known_non_zero +body: | + bb.1: + ; CHECK-LABEL: name: test_ugt_icmp_of_zero_known_non_zero + ; CHECK: %res:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $w0 = COPY %res(s32) + %amount:_(s64) = COPY $x0 + %zero:_(s64) = G_CONSTANT i64 0 + %lhs:_(s64) = G_SHL %zero, %amount + %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %zero + $w0 = COPY %res(s32) +... +--- +name: test_ugt_icmp_of_zero_xor +body: | + bb.1: + ; CHECK-LABEL: name: test_ugt_icmp_of_zero_xor + ; CHECK: %x:_(s64) = COPY $x0 + ; CHECK-NEXT: %y:_(s64) = COPY $x0 + ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: %lhs:_(s64) = G_XOR %x, %y + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %zero + ; CHECK-NEXT: $w0 = COPY %res(s32) + %x:_(s64) = COPY $x0 + %y:_(s64) = COPY $x0 + %zero:_(s64) = G_CONSTANT i64 0 + %lhs:_(s64) = G_XOR %x, %y + %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %zero + $w0 = COPY %res(s32) diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll index 50afc79a5a576..06e957fdcc6a2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll @@ -457,20 +457,12 @@ sw.bb.i.i: } define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { -; SDISEL-LABEL: select_and: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmp w1, #5 -; SDISEL-NEXT: ccmp w0, w1, #0, ne -; SDISEL-NEXT: csel x0, x2, x3, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: select_and: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 ; =0x5 -; GISEL-NEXT: cmp w8, w1 -; GISEL-NEXT: ccmp w0, w1, #0, ne -; GISEL-NEXT: csel x0, x2, x3, lt -; GISEL-NEXT: ret +; CHECK-LABEL: select_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w1, #5 +; CHECK-NEXT: ccmp w0, w1, #0, ne +; CHECK-NEXT: csel x0, x2, x3, lt +; CHECK-NEXT: ret %1 = icmp slt i32 %w0, %w1 %2 = icmp ne i32 5, %w1 %3 = and i1 %1, %2 @@ -479,20 +471,12 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { } define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { -; SDISEL-LABEL: select_or: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmp w1, #5 -; SDISEL-NEXT: ccmp w0, w1, #8, eq -; SDISEL-NEXT: csel x0, x2, x3, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: select_or: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 ; =0x5 -; GISEL-NEXT: cmp w8, w1 -; GISEL-NEXT: ccmp w0, w1, #8, eq -; GISEL-NEXT: csel x0, x2, x3, lt -; GISEL-NEXT: ret +; CHECK-LABEL: select_or: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w1, #5 +; CHECK-NEXT: ccmp w0, w1, #8, eq +; CHECK-NEXT: csel x0, x2, x3, lt +; CHECK-NEXT: ret %1 = icmp slt i32 %w0, %w1 %2 = icmp ne i32 5, %w1 %3 = or i1 %1, %2 @@ -501,20 +485,12 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { } define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) { -; SDISEL-LABEL: select_or_float: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmp w1, #5 -; SDISEL-NEXT: ccmp w0, w1, #8, eq -; SDISEL-NEXT: fcsel s0, s0, s1, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: select_or_float: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 ; =0x5 -; GISEL-NEXT: cmp w8, w1 -; GISEL-NEXT: ccmp w0, w1, #8, eq -; GISEL-NEXT: fcsel s0, s0, s1, lt -; GISEL-NEXT: ret +; CHECK-LABEL: select_or_float: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w1, #5 +; CHECK-NEXT: ccmp w0, w1, #8, eq +; CHECK-NEXT: fcsel s0, s0, s1, lt +; CHECK-NEXT: ret %1 = icmp slt i32 %w0, %w1 %2 = icmp ne i32 5, %w1 %3 = or i1 %1, %2 diff --git a/llvm/test/CodeGen/AArch64/icmp2.ll b/llvm/test/CodeGen/AArch64/icmp2.ll new file mode 100644 index 0000000000000..963501da78475 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/icmp2.ll @@ -0,0 +1,301 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +define i1 @i64_i64_canon(i64 %a, i64 %b) { +; CHECK-SD-LABEL: i64_i64_canon: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp x0, #0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_canon: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp x0, #0 +; CHECK-GI-NEXT: cset w0, ls +; CHECK-GI-NEXT: ret +entry: + %c = icmp uge i64 0, %a + ret i1 %c +} + +define <2 x i1> @i64_i64_canon_2x64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: i64_i64_canon_2x64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: cmhs v0.2d, v1.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret +entry: + %c = icmp uge <2 x i64> , %a + ret <2 x i1> %c +} + +define i1 @i64_i64_undef_eq(i64 %a, i64 %b) { +; CHECK-SD-LABEL: i64_i64_undef_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_undef_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp x0, x8 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret +entry: + %c = icmp eq i64 %a, undef + ret i1 %c +} + +define i1 @i64_i64_slt_eq(i64 %a, i64 %b) { +; CHECK-SD-LABEL: i64_i64_slt_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_slt_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp x0, x0 +; CHECK-GI-NEXT: cset w0, lt +; CHECK-GI-NEXT: ret +entry: + %c = icmp slt i64 %a, %a + ret i1 %c +} + +define i1 @i64_i64_not_eq_undef(i64 %a, i64 %b) { +; CHECK-SD-LABEL: i64_i64_not_eq_undef: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_not_eq_undef: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp x0, x8 +; CHECK-GI-NEXT: cset w0, lt +; CHECK-GI-NEXT: ret +entry: + %c = icmp slt i64 %a, undef + ret i1 %c +} + +define i1 @i64_i64_sext(i32 %a, i32 %b) { +; CHECK-LABEL: i64_i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: cmp x8, w1, sxtw +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret +entry: + %sextedlhs = sext i32 %a to i64 + %sextedrhs = sext i32 %b to i64 + %c = icmp slt i64 %sextedlhs, %sextedrhs + ret i1 %c +} + +define i1 @i64_i64_zext(i32 %a, i32 %b) { +; CHECK-LABEL: i64_i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: cmp x8, w1, uxtw +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret +entry: + %zextedlhs = zext i32 %a to i64 + %zextedrhs = zext i32 %b to i64 + %c = icmp slt i64 %zextedlhs, %zextedrhs + ret i1 %c +} + +define i1 @i64_i64_ule_or(i64 %a, i64 %b, i64 %c) { +; CHECK-SD-LABEL: i64_i64_ule_or: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: orr x8, x2, #0x1 +; CHECK-SD-NEXT: cmp x8, #0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_ule_or: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: ret +entry: + %or = or i64 1, %c + %cmp = icmp ule i64 %or, 0 + ret i1 %cmp +} + +define i1 @i64_i64_ugt_or(i64 %a, i64 %b, i64 %c) { +; CHECK-SD-LABEL: i64_i64_ugt_or: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: orr x8, x2, #0x1 +; CHECK-SD-NEXT: cmp x8, #0 +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_ugt_or: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, #1 // =0x1 +; CHECK-GI-NEXT: ret +entry: + %or = or i64 1, %c + %cmp = icmp ugt i64 %or, 0 + ret i1 %cmp +} + +define i1 @i64_i64_eq_or(i64 %a, i64 %b, i64 %c) { +; CHECK-SD-LABEL: i64_i64_eq_or: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: orr x8, x2, #0x1 +; CHECK-SD-NEXT: cmp x8, #0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_eq_or: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: ret +entry: + %or = or i64 1, %c + %cmp = icmp eq i64 %or, 0 + ret i1 %cmp +} + +define i1 @i64_i64_eq_freeze_or(i64 %a, i64 %b, i64 %c) { +; CHECK-SD-LABEL: i64_i64_eq_freeze_or: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: orr x8, x2, #0x1 +; CHECK-SD-NEXT: cmp x8, #0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_eq_freeze_or: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: ret +entry: + %or = or i64 1, %c + %free = freeze i64 %or + %cmp = icmp eq i64 %free, 0 + ret i1 %cmp +} + +define i1 @i64_i64_eq_freeze_add(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: i64_i64_eq_freeze_add: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn x2, #1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %add = add nuw i64 1, %c + %free = freeze i64 %add + %cmp = icmp eq i64 %free, 0 + ret i1 %cmp +} + +define i1 @i64_i64_eq_lshr(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: i64_i64_eq_lshr: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +entry: + %lshr = lshr exact i64 1, %c + %cmp = icmp eq i64 %lshr, 0 + ret i1 %cmp +} + +define i1 @i64_i64_eq_zext(i64 %a, i64 %b, i32 %c) { +; CHECK-SD-LABEL: i64_i64_eq_zext: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: orr w8, w2, #0x1 +; CHECK-SD-NEXT: cmp w8, #0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_eq_zext: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: ret +entry: + %or = or i32 1, %c + %ze = zext i32 %or to i64 + %cmp = icmp eq i64 %ze, 0 + ret i1 %cmp +} + +define i1 @i64_i64_canon_ule(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: i64_i64_canon_ule: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: ret +entry: + %cmp = icmp ule i64 0, %a + ret i1 %cmp +} + +define i1 @i64_i64_canon_ugt(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: i64_i64_canon_ugt: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +entry: + %cmp = icmp ugt i64 0, %a + ret i1 %cmp +} + +define i1 @i64_i64_trunc_eq(i64 %a, i64 %b, i64 %c) { +; CHECK-SD-LABEL: i64_i64_trunc_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: orr w8, w2, #0x1 +; CHECK-SD-NEXT: cmp w8, #0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_trunc_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: ret +entry: + %or = or i64 1, %c + %tr = trunc nsw i64 %or to i32 + %cmp = icmp eq i32 %tr, 0 + ret i1 %cmp +} + +define i1 @i64_i64_umin_eq(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: i64_i64_umin_eq: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr x8, x2, #0x1 +; CHECK-NEXT: orr x9, x2, #0x2 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %or1 = or i64 1, %c + %or2 = or i64 2, %c + %umin = call i64 @llvm.umin.i64(i64 %or1, i64 %or2) + %cmp = icmp eq i64 %umin, 0 + ret i1 %cmp +} + +define i1 @i64_i64_smin_eq(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: i64_i64_smin_eq: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr x8, x2, #0x1 +; CHECK-NEXT: orr x9, x2, #0x2 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: csel x8, x8, x9, lt +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %or1 = or i64 1, %c + %or2 = or i64 2, %c + %smin = call i64 @llvm.smin.i64(i64 %or1, i64 %or2) + %cmp = icmp eq i64 %smin, 0 + ret i1 %cmp +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll index b1cdf553b7242..0b66185d25f3e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll @@ -253,7 +253,7 @@ define double @v_rcp_f64(double %x) { ; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 ; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] @@ -346,7 +346,7 @@ define double @v_rcp_f64_arcp(double %x) { ; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 ; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] @@ -483,7 +483,7 @@ define double @v_rcp_f64_ulp25(double %x) { ; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 ; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] @@ -1115,7 +1115,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) { ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 ; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20 ; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 ; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9] @@ -1126,7 +1126,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) { ; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5] ; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13] ; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 @@ -1275,7 +1275,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) { ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 ; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20 ; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 ; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9] @@ -1286,7 +1286,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) { ; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5] ; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13] ; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 @@ -1502,7 +1502,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) { ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 ; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20 ; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 ; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9] @@ -1513,7 +1513,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) { ; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5] ; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13] ; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll index c5198cdb421a5..98f09db4925ec 100644 --- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll @@ -138,7 +138,7 @@ define float @sitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] ; GISEL-NEXT: s_mov_b32 s4, 0 ; GISEL-NEXT: v_mov_b32_e32 v4, s4 -; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GISEL-NEXT: s_cbranch_execz .LBB0_14 ; GISEL-NEXT: ; %bb.1: ; %itofp-if-end ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v3 @@ -165,8 +165,8 @@ define float @sitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_sub_u32_e32 v7, 0x7f, v5 ; GISEL-NEXT: v_cmp_ge_i32_e32 vcc, 24, v8 ; GISEL-NEXT: ; implicit-def: $vgpr4 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7] ; GISEL-NEXT: ; %bb.2: ; %itofp-if-else ; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5 ; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] @@ -177,16 +177,16 @@ define float @sitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: ; implicit-def: $vgpr5 ; GISEL-NEXT: ; implicit-def: $vgpr2 ; GISEL-NEXT: ; %bb.3: ; %Flow3 -; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5] +; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] ; GISEL-NEXT: s_cbranch_execz .LBB0_13 ; GISEL-NEXT: ; %bb.4: ; %NodeBlock ; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v8 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] ; GISEL-NEXT: s_cbranch_execz .LBB0_8 ; GISEL-NEXT: ; %bb.5: ; %LeafBlock ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v8 -; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc ; GISEL-NEXT: s_cbranch_execz .LBB0_7 ; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default ; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5 @@ -212,13 +212,10 @@ define float @sitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_or_b32_e32 v16, v10, v12 ; GISEL-NEXT: v_lshrrev_b64 v[11:12], v14, -1 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v11, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v11, v12, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v16, vcc ; GISEL-NEXT: v_and_b32_e32 v2, v9, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v10, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 @@ -231,13 +228,13 @@ define float @sitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, v5 ; GISEL-NEXT: v_mov_b32_e32 v3, v6 ; GISEL-NEXT: .LBB0_7: ; %Flow1 -; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] +; GISEL-NEXT: s_or_b64 exec, exec, s[10:11] ; GISEL-NEXT: .LBB0_8: ; %Flow2 -; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] +; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[8:9] ; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb ; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] +; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] ; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 @@ -246,20 +243,20 @@ define float @sitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1] ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1] ; GISEL-NEXT: v_mov_b32_e32 v7, v8 ; GISEL-NEXT: ; %bb.12: ; %Flow -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] -; GISEL-NEXT: .LBB0_13: ; %Flow4 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] +; GISEL-NEXT: .LBB0_13: ; %Flow4 +; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] ; GISEL-NEXT: v_and_b32_e32 v0, 0x80000000, v6 ; GISEL-NEXT: v_lshl_add_u32 v1, v7, 23, 1.0 ; GISEL-NEXT: v_and_b32_e32 v2, 0x7fffff, v4 ; GISEL-NEXT: v_or3_b32 v4, v2, v0, v1 ; GISEL-NEXT: .LBB0_14: ; %Flow5 -; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] +; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v4 ; GISEL-NEXT: s_setpc_b64 s[30:31] %cvt = sitofp i128 %x to float @@ -392,7 +389,7 @@ define float @uitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] ; GISEL-NEXT: s_mov_b32 s4, 0 ; GISEL-NEXT: v_mov_b32_e32 v4, s4 -; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GISEL-NEXT: s_cbranch_execz .LBB1_14 ; GISEL-NEXT: ; %bb.1: ; %itofp-if-end ; GISEL-NEXT: v_ffbh_u32_e32 v5, v0 @@ -410,8 +407,8 @@ define float @uitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_sub_u32_e32 v6, 0x7f, v5 ; GISEL-NEXT: v_cmp_ge_i32_e32 vcc, 24, v7 ; GISEL-NEXT: ; implicit-def: $vgpr4 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7] ; GISEL-NEXT: ; %bb.2: ; %itofp-if-else ; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5 ; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] @@ -422,16 +419,16 @@ define float @uitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: ; implicit-def: $vgpr5 ; GISEL-NEXT: ; implicit-def: $vgpr2 ; GISEL-NEXT: ; %bb.3: ; %Flow3 -; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5] +; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] ; GISEL-NEXT: s_cbranch_execz .LBB1_13 ; GISEL-NEXT: ; %bb.4: ; %NodeBlock ; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v7 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] ; GISEL-NEXT: s_cbranch_execz .LBB1_8 ; GISEL-NEXT: ; %bb.5: ; %LeafBlock ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v7 -; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc ; GISEL-NEXT: s_cbranch_execz .LBB1_7 ; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default ; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5 @@ -457,13 +454,10 @@ define float @uitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_or_b32_e32 v15, v9, v11 ; GISEL-NEXT: v_lshrrev_b64 v[10:11], v13, -1 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 ; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v10, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v14, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v15, vcc ; GISEL-NEXT: v_and_b32_e32 v2, v8, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v9, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 @@ -476,13 +470,13 @@ define float @uitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, v5 ; GISEL-NEXT: v_mov_b32_e32 v3, v6 ; GISEL-NEXT: .LBB1_7: ; %Flow1 -; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] +; GISEL-NEXT: s_or_b64 exec, exec, s[10:11] ; GISEL-NEXT: .LBB1_8: ; %Flow2 -; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] +; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[8:9] ; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb ; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] +; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] ; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 @@ -491,19 +485,19 @@ define float @uitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1] ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1] ; GISEL-NEXT: v_mov_b32_e32 v6, v7 ; GISEL-NEXT: ; %bb.12: ; %Flow -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] -; GISEL-NEXT: .LBB1_13: ; %Flow4 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] +; GISEL-NEXT: .LBB1_13: ; %Flow4 +; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] ; GISEL-NEXT: v_lshl_add_u32 v0, v6, 23, 1.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 0x7fffff ; GISEL-NEXT: v_and_or_b32 v4, v4, v1, v0 ; GISEL-NEXT: .LBB1_14: ; %Flow5 -; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] +; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v4 ; GISEL-NEXT: s_setpc_b64 s[30:31] %cvt = uitofp i128 %x to float @@ -744,13 +738,10 @@ define double @sitofp_i128_to_f64(i128 %x) { ; GISEL-NEXT: v_or_b32_e32 v17, v10, v12 ; GISEL-NEXT: v_lshrrev_b64 v[11:12], v15, -1 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 -; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v16, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v17, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v16, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v17, vcc ; GISEL-NEXT: v_and_b32_e32 v2, v9, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v10, v3 ; GISEL-NEXT: v_and_or_b32 v0, v11, v0, v2 @@ -1021,13 +1012,10 @@ define double @uitofp_i128_to_f64(i128 %x) { ; GISEL-NEXT: v_or_b32_e32 v17, v5, v13 ; GISEL-NEXT: v_lshrrev_b64 v[12:13], v15, -1 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v8 -; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 ; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v8, v12, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v12, v13, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v8, v12, v16, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v17, vcc ; GISEL-NEXT: v_and_b32_e32 v2, v4, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v5, v3 ; GISEL-NEXT: v_and_or_b32 v0, v8, v0, v2 @@ -1229,7 +1217,7 @@ define half @sitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: s_mov_b32 s4, 0 ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] ; GISEL-NEXT: v_mov_b32_e32 v4, s4 -; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GISEL-NEXT: s_cbranch_execz .LBB4_14 ; GISEL-NEXT: ; %bb.1: ; %itofp-if-end ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v3 @@ -1256,8 +1244,8 @@ define half @sitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_sub_u32_e32 v7, 0x7f, v5 ; GISEL-NEXT: v_cmp_ge_i32_e32 vcc, 24, v8 ; GISEL-NEXT: ; implicit-def: $vgpr4 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7] ; GISEL-NEXT: ; %bb.2: ; %itofp-if-else ; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5 ; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] @@ -1268,16 +1256,16 @@ define half @sitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: ; implicit-def: $vgpr5 ; GISEL-NEXT: ; implicit-def: $vgpr2 ; GISEL-NEXT: ; %bb.3: ; %Flow3 -; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5] +; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] ; GISEL-NEXT: s_cbranch_execz .LBB4_13 ; GISEL-NEXT: ; %bb.4: ; %NodeBlock ; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v8 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] ; GISEL-NEXT: s_cbranch_execz .LBB4_8 ; GISEL-NEXT: ; %bb.5: ; %LeafBlock ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v8 -; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc ; GISEL-NEXT: s_cbranch_execz .LBB4_7 ; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default ; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5 @@ -1303,13 +1291,10 @@ define half @sitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_or_b32_e32 v16, v10, v12 ; GISEL-NEXT: v_lshrrev_b64 v[11:12], v14, -1 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v11, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v11, v12, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v16, vcc ; GISEL-NEXT: v_and_b32_e32 v2, v9, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v10, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 @@ -1322,13 +1307,13 @@ define half @sitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, v5 ; GISEL-NEXT: v_mov_b32_e32 v3, v6 ; GISEL-NEXT: .LBB4_7: ; %Flow1 -; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] +; GISEL-NEXT: s_or_b64 exec, exec, s[10:11] ; GISEL-NEXT: .LBB4_8: ; %Flow2 -; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] +; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[8:9] ; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb ; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] +; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] ; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 @@ -1337,21 +1322,21 @@ define half @sitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1] ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1] ; GISEL-NEXT: v_mov_b32_e32 v7, v8 ; GISEL-NEXT: ; %bb.12: ; %Flow -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] -; GISEL-NEXT: .LBB4_13: ; %Flow4 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] +; GISEL-NEXT: .LBB4_13: ; %Flow4 +; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] ; GISEL-NEXT: v_and_b32_e32 v0, 0x80000000, v6 ; GISEL-NEXT: v_lshl_add_u32 v1, v7, 23, 1.0 ; GISEL-NEXT: v_and_b32_e32 v2, 0x7fffff, v4 ; GISEL-NEXT: v_or3_b32 v0, v2, v0, v1 ; GISEL-NEXT: v_cvt_f16_f32_e32 v4, v0 ; GISEL-NEXT: .LBB4_14: ; %Flow5 -; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] +; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v4 ; GISEL-NEXT: s_setpc_b64 s[30:31] %cvt = sitofp i128 %x to half @@ -1485,7 +1470,7 @@ define half @uitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: s_mov_b32 s4, 0 ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] ; GISEL-NEXT: v_mov_b32_e32 v4, s4 -; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GISEL-NEXT: s_cbranch_execz .LBB5_14 ; GISEL-NEXT: ; %bb.1: ; %itofp-if-end ; GISEL-NEXT: v_ffbh_u32_e32 v5, v0 @@ -1503,8 +1488,8 @@ define half @uitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_sub_u32_e32 v6, 0x7f, v5 ; GISEL-NEXT: v_cmp_ge_i32_e32 vcc, 24, v7 ; GISEL-NEXT: ; implicit-def: $vgpr4 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7] ; GISEL-NEXT: ; %bb.2: ; %itofp-if-else ; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5 ; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] @@ -1515,16 +1500,16 @@ define half @uitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: ; implicit-def: $vgpr5 ; GISEL-NEXT: ; implicit-def: $vgpr2 ; GISEL-NEXT: ; %bb.3: ; %Flow3 -; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5] +; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] ; GISEL-NEXT: s_cbranch_execz .LBB5_13 ; GISEL-NEXT: ; %bb.4: ; %NodeBlock ; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v7 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] ; GISEL-NEXT: s_cbranch_execz .LBB5_8 ; GISEL-NEXT: ; %bb.5: ; %LeafBlock ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v7 -; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc ; GISEL-NEXT: s_cbranch_execz .LBB5_7 ; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default ; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5 @@ -1550,13 +1535,10 @@ define half @uitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_or_b32_e32 v15, v9, v11 ; GISEL-NEXT: v_lshrrev_b64 v[10:11], v13, -1 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 ; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v10, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v14, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v15, vcc ; GISEL-NEXT: v_and_b32_e32 v2, v8, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v9, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 @@ -1569,13 +1551,13 @@ define half @uitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, v5 ; GISEL-NEXT: v_mov_b32_e32 v3, v6 ; GISEL-NEXT: .LBB5_7: ; %Flow1 -; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] +; GISEL-NEXT: s_or_b64 exec, exec, s[10:11] ; GISEL-NEXT: .LBB5_8: ; %Flow2 -; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] +; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[8:9] ; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb ; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] +; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] ; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 @@ -1584,20 +1566,20 @@ define half @uitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1] ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1] ; GISEL-NEXT: v_mov_b32_e32 v6, v7 ; GISEL-NEXT: ; %bb.12: ; %Flow -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] -; GISEL-NEXT: .LBB5_13: ; %Flow4 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] +; GISEL-NEXT: .LBB5_13: ; %Flow4 +; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] ; GISEL-NEXT: v_lshl_add_u32 v0, v6, 23, 1.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 0x7fffff ; GISEL-NEXT: v_and_or_b32 v0, v4, v1, v0 ; GISEL-NEXT: v_cvt_f16_f32_e32 v4, v0 ; GISEL-NEXT: .LBB5_14: ; %Flow5 -; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] +; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v4 ; GISEL-NEXT: s_setpc_b64 s[30:31] %cvt = uitofp i128 %x to half diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll index bd6e1f54e636d..8f4a4b5afcdc1 100644 --- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll @@ -87,7 +87,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -267,7 +267,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -448,7 +448,7 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -628,7 +628,7 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -809,7 +809,7 @@ define double @v_rsq_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -978,7 +978,7 @@ define double @v_rsq_f64_fabs(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -1148,7 +1148,7 @@ define double @v_rsq_f64_missing_contract0(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -1317,7 +1317,7 @@ define double @v_rsq_f64_missing_contract1(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -1486,7 +1486,7 @@ define double @v_neg_rsq_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -1716,7 +1716,7 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7] ; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[8:9] ; SI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v13 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v20 ; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], v[12:13] ; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7] @@ -1728,7 +1728,7 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[18:19], v[6:7] ; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15] ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v19 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v20 ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0 @@ -2019,7 +2019,7 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7] ; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[8:9] ; SI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v13 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v20 ; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], v[12:13] ; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7] @@ -2031,7 +2031,7 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[18:19], v[6:7] ; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15] ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v19 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v20 ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0 @@ -2293,7 +2293,7 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v13 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v10 ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_div_scale_f64 v[18:19], s[4:5], s[4:5], v[2:3], s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] @@ -2568,7 +2568,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v13 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v10 ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] @@ -2578,7 +2578,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19] ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0x3ff00000 ; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v8, v19 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v8 ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0 @@ -2808,7 +2808,7 @@ define double @v_rsq_f64_fneg_fabs(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -2979,7 +2979,7 @@ define double @v_rsq_f64__afn_sqrt(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -4167,7 +4167,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -4846,7 +4846,7 @@ define double @v_rsq_amdgcn_sqrt_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -4927,7 +4927,7 @@ define double @v_neg_rsq_amdgcn_sqrt_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -5008,7 +5008,7 @@ define amdgpu_ps <2 x i32> @s_rsq_amdgcn_sqrt_f64(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -5649,7 +5649,7 @@ define double @v_div_const_contract_sqrt_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], s[6:7], v[0:1], s[6:7] ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]