diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9b62d6067be39..3a30d18ec5b32 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -20,6 +20,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/Register.h"
 #include "llvm/CodeGenTypes/LowLevelType.h"
 #include "llvm/IR/InstrTypes.h"
@@ -299,6 +300,10 @@ class CombinerHelper {
   ///     $whatever = COPY $addr
   bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
 
+  bool matchICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+  /// Try hard to fold icmp with zero RHS because this is a common case.
+  bool matchCmpOfZero(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
   bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
   void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
 
@@ -1017,6 +1022,9 @@ class CombinerHelper {
   bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
 
   bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const;
+
+  bool constantFoldICmp(const GICmp &ICmp, const GIConstant &LHS,
+                        const GIConstant &RHS, BuildFnTy &MatchInfo);
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index ef1171d9f1f64..427b5a86b6e0c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -950,6 +950,30 @@ class GExtOrTruncOp : public GCastOp {
   };
 };
 
+/// Represents a splat vector.
+class GSplatVector : public GenericMachineInstr {
+public:
+  Register getValueReg() const { return getOperand(1).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_SPLAT_VECTOR;
+  };
+};
+
+/// Represents an integer-like extending operation.
+class GZextOrSextOp : public GCastOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_SEXT:
+    case TargetOpcode::G_ZEXT:
+      return true;
+    default:
+      return false;
+    }
+  };
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index cf5fd6d6f288b..a8bf2e722881a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -593,5 +593,31 @@ bool isGuaranteedNotToBeUndef(Register Reg, const MachineRegisterInfo &MRI,
 /// estimate of the type.
 Type *getTypeForLLT(LLT Ty, LLVMContext &C);
 
+enum class GIConstantKind { Scalar, FixedVector, ScalableVector };
+
+/// An integer-like constant.
+class GIConstant {
+  GIConstantKind Kind;
+  SmallVector<APInt> Values;
+  APInt Value;
+
+public:
+  GIConstant(ArrayRef<APInt> Values)
+      : Kind(GIConstantKind::FixedVector), Values(Values) {};
+  GIConstant(const APInt &Value, GIConstantKind Kind)
+      : Kind(Kind), Value(Value) {};
+
+  GIConstantKind getKind() const { return Kind; }
+
+  APInt getScalarValue() const;
+
+  static std::optional<GIConstant> getConstant(Register Const,
+                                               const MachineRegisterInfo &MRI);
+};
+
+/// Return true if the given value is known to be non-zero when defined.
+bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+                    GISelKnownBits *KB, unsigned Depth = 0);
+
 } // End namespace llvm.
 #endif
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 525cc815e73ce..8c0585e4f70ad 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1007,9 +1007,6 @@ def double_icmp_zero_or_combine: GICombineRule<
          (G_ICMP $root, $p, $ordst, 0))
 >;
 
-def double_icmp_zero_and_or_combine : GICombineGroup<[double_icmp_zero_and_combine,
-                                                      double_icmp_zero_or_combine]>;
-
 def and_or_disjoint_mask : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
   (match (wip_match_opcode G_AND):$root,
@@ -1884,6 +1881,28 @@ def cast_combines: GICombineGroup<[
   buildvector_of_truncate
 ]>;
 
+def prepare_icmp : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+         [{ return Helper.matchICmp(*${cmp}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def icmp_of_zero : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_CONSTANT $zero, 0),
+         (G_ICMP $root, $pred, $lhs, $zero):$cmp,
+         [{ return Helper.matchCmpOfZero(*${cmp}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def icmp_combines: GICombineGroup<[
+  prepare_icmp,
+  icmp_of_zero,
+  icmp_to_true_false_known_bits,
+  icmp_to_lhs_known_bits,
+  double_icmp_zero_and_combine,
+  double_icmp_zero_or_combine,
+  redundant_binop_in_equality
+]>;
 
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
@@ -1917,7 +1936,7 @@ def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
 
 def known_bits_simplifications : GICombineGroup<[
   redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
-  zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits,
+  zext_trunc_fold,
   sext_inreg_to_zext_inreg]>;
 
 def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
@@ -1944,8 +1963,8 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
 
 def prefer_sign_combines : GICombineGroup<[nneg_zext]>;
 
-def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
-    vector_ops_combines, freeze_combines, cast_combines,
+def all_combines : GICombineGroup<[icmp_combines, integer_reassoc_combines,
+    trivial_combines, vector_ops_combines, freeze_combines, cast_combines,
     insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload,
     combine_extracted_vector_load,
     undef_combines, identity_combines, phi_combines,
@@ -1964,9 +1983,9 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     constant_fold_cast_op, fabs_fneg_fold,
     intdiv_combines, mulh_combines, redundant_neg_operands,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
-    sub_add_reg, select_to_minmax, redundant_binop_in_equality,
+    sub_add_reg, select_to_minmax,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
-    combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
+    combine_concat_vector, match_addos,
     sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index a15b76440364b..af1717dbf76f3 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMGlobalISel
   Combiner.cpp
   CombinerHelper.cpp
   CombinerHelperCasts.cpp
+  CombinerHelperCompares.cpp
   CombinerHelperVectorOps.cpp
   GIMatchTableExecutor.cpp
   GISelChangeObserver.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
new file mode 100644
index 0000000000000..0e88525c47171
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -0,0 +1,167 @@
+//===- CombinerHelperCompares.cpp------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_ICMP.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdlib>
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+bool CombinerHelper::constantFoldICmp(const GICmp &ICmp,
+                                      const GIConstant &LHSCst,
+                                      const GIConstant &RHSCst,
+                                      BuildFnTy &MatchInfo) {
+  if (LHSCst.getKind() != GIConstantKind::Scalar)
+    return false;
+
+  Register Dst = ICmp.getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  if (!isConstantLegalOrBeforeLegalizer(DstTy))
+    return false;
+
+  CmpInst::Predicate Pred = ICmp.getCond();
+  APInt LHS = LHSCst.getScalarValue();
+  APInt RHS = RHSCst.getScalarValue();
+
+  bool Result = ICmpInst::compare(LHS, RHS, Pred);
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    if (Result)
+      B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+                                          /*IsVector=*/DstTy.isVector(),
+                                          /*IsFP=*/false));
+    else
+      B.buildConstant(Dst, 0);
+  };
+
+  return true;
+}
+
+bool CombinerHelper::matchICmp(const MachineInstr &MI, BuildFnTy &MatchInfo) {
+  const GICmp *Cmp = cast<GICmp>(&MI);
+
+  Register Dst = Cmp->getReg(0);
+  Register LHS = Cmp->getLHSReg();
+  Register RHS = Cmp->getRHSReg();
+
+  CmpInst::Predicate Pred = Cmp->getCond();
+  assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+  if (auto CLHS = GIConstant::getConstant(LHS, MRI)) {
+    if (auto CRHS = GIConstant::getConstant(RHS, MRI))
+      return constantFoldICmp(*Cmp, *CLHS, *CRHS, MatchInfo);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+
+    MatchInfo = [=](MachineIRBuilder &B) { B.buildICmp(Pred, Dst, LHS, RHS); };
+    return true;
+  }
+
+  return false;
+}
+
+bool CombinerHelper::matchCmpOfZero(const MachineInstr &MI,
+                                    BuildFnTy &MatchInfo) {
+  const GICmp *Cmp = cast<GICmp>(&MI);
+
+  Register Dst = Cmp->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+  Register LHS = Cmp->getLHSReg();
+  CmpInst::Predicate Pred = Cmp->getCond();
+
+  if (!isConstantLegalOrBeforeLegalizer(DstTy))
+    return false;
+
+  std::optional<bool> Result;
+
+  switch (Pred) {
+  default:
+    llvm_unreachable("Unkonwn ICmp predicate!");
+  case ICmpInst::ICMP_ULT:
+    Result = false;
+    break;
+  case ICmpInst::ICMP_UGE:
+    Result = true;
+    break;
+  case ICmpInst::ICMP_EQ:
+  case ICmpInst::ICMP_ULE:
+    if (isKnownNonZero(LHS, MRI, KB))
+      Result = false;
+    break;
+  case ICmpInst::ICMP_NE:
+  case ICmpInst::ICMP_UGT:
+    if (isKnownNonZero(LHS, MRI, KB))
+      Result = true;
+    break;
+  case ICmpInst::ICMP_SLT: {
+    KnownBits LHSKnown = KB->getKnownBits(LHS);
+    if (LHSKnown.isNegative())
+      Result = true;
+    if (LHSKnown.isNonNegative())
+      Result = false;
+    break;
+  }
+  case ICmpInst::ICMP_SLE: {
+    KnownBits LHSKnown = KB->getKnownBits(LHS);
+    if (LHSKnown.isNegative())
+      Result = true;
+    if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, MRI, KB))
+      Result = false;
+    break;
+  }
+  case ICmpInst::ICMP_SGE: {
+    KnownBits LHSKnown = KB->getKnownBits(LHS);
+    if (LHSKnown.isNegative())
+      Result = false;
+    if (LHSKnown.isNonNegative())
+      Result = true;
+    break;
+  }
+  case ICmpInst::ICMP_SGT: {
+    KnownBits LHSKnown = KB->getKnownBits(LHS);
+    if (LHSKnown.isNegative())
+      Result = false;
+    if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, MRI, KB))
+      Result = true;
+    break;
+  }
+  }
+
+  if (!Result)
+    return false;
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    if (*Result)
+      B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+                                          /*IsVector=*/DstTy.isVector(),
+                                          /*IsFP=*/false));
+    else
+      B.buildConstant(Dst, 0);
+  };
+
+  return true;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 1713a582d5cfe..6f7bc97b55dc8 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1968,3 +1968,311 @@ Type *llvm::getTypeForLLT(LLT Ty, LLVMContext &C) {
                            Ty.getElementCount());
   return IntegerType::get(C, Ty.getSizeInBits());
 }
+
+APInt llvm::GIConstant::getScalarValue() const {
+  assert(Kind == GIConstantKind::Scalar && "Expected scalar constant");
+
+  return Value;
+}
+
+std::optional<GIConstant>
+llvm::GIConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) {
+  MachineInstr *Constant = getDefIgnoringCopies(Const, MRI);
+
+  if (GSplatVector *Splat = dyn_cast<GSplatVector>(Constant)) {
+    std::optional<ValueAndVReg> MayBeConstant =
+        getIConstantVRegValWithLookThrough(Splat->getValueReg(), MRI);
+    if (!MayBeConstant)
+      return std::nullopt;
+    return GIConstant(MayBeConstant->Value, GIConstantKind::ScalableVector);
+  }
+
+  if (GBuildVector *Build = dyn_cast<GBuildVector>(Constant)) {
+    SmallVector<APInt> Values;
+    unsigned NumSources = Build->getNumSources();
+    for (unsigned I = 0; I < NumSources; ++I) {
+      Register SrcReg = Build->getSourceReg(I);
+      std::optional<ValueAndVReg> MayBeConstant =
+          getIConstantVRegValWithLookThrough(SrcReg, MRI);
+      if (!MayBeConstant)
+        return std::nullopt;
+      Values.push_back(MayBeConstant->Value);
+    }
+    return GIConstant(Values);
+  }
+
+  std::optional<ValueAndVReg> MayBeConstant =
+      getIConstantVRegValWithLookThrough(Const, MRI);
+  if (!MayBeConstant)
+    return std::nullopt;
+
+  return GIConstant(MayBeConstant->Value, GIConstantKind::Scalar);
+}
+
+static bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+                           GISelKnownBits *KB, unsigned Depth);
+
+bool llvm::isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+                          GISelKnownBits *KB, unsigned Depth) {
+  if (!Reg.isVirtual())
+    return false;
+
+  LLT Ty = MRI.getType(Reg);
+  if (!Ty.isValid())
+    return false;
+
+  if (Ty.isPointer())
+    return false;
+
+  assert(Ty.isScalar() && "Expected a scalar value");
+  return ::isKnownNonZero(Reg, MRI, KB, Depth);
+}
+
+static bool matchOpWithOpEqZero(Register Op0, Register Op1,
+                                const MachineRegisterInfo &MRI) {
+  auto MatchIt = [&MRI](const Register Reg0, const Register Reg1) {
+    MachineInstr *MI = MRI.getVRegDef(Reg0);
+    if (GZextOrSextOp *ZS = dyn_cast<GZextOrSextOp>(MI)) {
+      MachineInstr *SrcMI = MRI.getVRegDef(ZS->getSrcReg());
+      if (GICmp *Cmp = dyn_cast<GICmp>(SrcMI)) {
+        std::optional<ValueAndVReg> MayBeConstant =
+            getIConstantVRegValWithLookThrough(Cmp->getRHSReg(), MRI);
+        return MayBeConstant && (MayBeConstant->Value == 0) &&
+               (Cmp->getLHSReg() == Reg1) &&
+               (Cmp->getCond() == ICmpInst::ICMP_EQ);
+      }
+    }
+    return false;
+  };
+
+  return MatchIt(Op0, Op1) || MatchIt(Op1, Op0);
+}
+
+static bool isNonZeroAdd(const GBinOp &Add, const MachineRegisterInfo &MRI,
+                         GISelKnownBits *KB, unsigned Depth,
+                         unsigned BitWidth) {
+  bool NSW = Add.getFlag(MachineInstr::MIFlag::NoSWrap);
+  bool NUW = Add.getFlag(MachineInstr::MIFlag::NoUWrap);
+  Register LHS = Add.getLHSReg();
+  Register RHS = Add.getRHSReg();
+
+  // (X + (X != 0)) is non zero.
+  if (matchOpWithOpEqZero(LHS, RHS, MRI))
+    return true;
+
+  if (NUW)
+    return ::isKnownNonZero(RHS, MRI, KB, Depth) ||
+           ::isKnownNonZero(LHS, MRI, KB, Depth);
+
+  KnownBits LHSKnown = KB->getKnownBits(LHS);
+  KnownBits RHSKnown = KB->getKnownBits(RHS);
+
+  // If LHS and RHS are both non-negative (as signed values) then their sum is
+  // not zero unless both LHS and RHS are zero.
+  if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative())
+    if (::isKnownNonZero(LHS, MRI, KB, Depth) ||
+        ::isKnownNonZero(RHS, MRI, KB, Depth))
+      return true;
+
+  // If LHS and RHS are both negative (as signed values) then their sum is not
+  // zero unless both LHS and RHS equal INT_MIN.
+  if (LHSKnown.isNegative() && RHSKnown.isNegative()) {
+    APInt Mask = APInt::getSignedMaxValue(BitWidth);
+    // The sign bit of LHS is set.  If some other bit is set then LHS is not
+    // equal to INT_MIN.
+    if (LHSKnown.One.intersects(Mask))
+      return true;
+    // The sign bit of RHS is set.  If some other bit is set then RHS is not
+    // equal to INT_MIN.
+    if (RHSKnown.One.intersects(Mask))
+      return true;
+  }
+
+  // The sum of a non-negative number and a power of two is not zero.
+  if (LHSKnown.isNonNegative() && ::isKnownToBeAPowerOfTwo(RHS, MRI, KB))
+    return true;
+  if (RHSKnown.isNonNegative() && ::isKnownToBeAPowerOfTwo(LHS, MRI, KB))
+    return true;
+
+  return KnownBits::add(LHSKnown, RHSKnown, NSW, NUW).isNonZero();
+}
+
+static bool isKnownNonZeroBinOp(const GBinOp &BinOp,
+                                const MachineRegisterInfo &MRI,
+                                GISelKnownBits *KB, unsigned Depth) {
+  unsigned BitWidth = MRI.getType(BinOp.getReg(0)).getScalarSizeInBits();
+  switch (BinOp.getOpcode()) {
+  case TargetOpcode::G_XOR:
+    // (X ^ (X != 0)) is non zero.
+    if (matchOpWithOpEqZero(BinOp.getLHSReg(), BinOp.getRHSReg(), MRI))
+      return true;
+    break;
+  case TargetOpcode::G_OR: {
+    // (X | (X != 0)) is non zero.
+    if (matchOpWithOpEqZero(BinOp.getLHSReg(), BinOp.getRHSReg(), MRI))
+      return true;
+    // X | Y != 0 if X != 0 or Y != 0.
+    return ::isKnownNonZero(BinOp.getRHSReg(), MRI, KB, Depth) ||
+           ::isKnownNonZero(BinOp.getLHSReg(), MRI, KB, Depth);
+  }
+  case TargetOpcode::G_ADD: {
+    // X + Y.
+
+    // If Add has nuw wrap flag, then if either X or Y is non-zero the result is
+    // non-zero.
+    return isNonZeroAdd(BinOp, MRI, KB, Depth, BitWidth);
+  }
+  default:
+    return false;
+  }
+
+  return false;
+}
+
+static bool isKnownNonZeroCastOp(const GCastOp &CastOp,
+                                 const MachineRegisterInfo &MRI,
+                                 GISelKnownBits *KB, unsigned Depth) {
+  switch (CastOp.getOpcode()) {
+  case TargetOpcode::G_SEXT:
+  case TargetOpcode::G_ZEXT:
+    // ext X != 0 if X != 0.
+    return ::isKnownNonZero(CastOp.getSrcReg(), MRI, KB, Depth);
+  case Instruction::Trunc:
+    // nuw/nsw trunc preserves zero/non-zero status of input.
+    if (CastOp.getFlag(MachineInstr::MIFlag::NoSWrap) ||
+        CastOp.getFlag(MachineInstr::MIFlag::NoUWrap))
+      return ::isKnownNonZero(CastOp.getSrcReg(), MRI, KB, Depth);
+    break;
+  default:
+    return false;
+  }
+
+  return false;
+}
+
+static bool isNonZeroShift(const MachineInstr *MI,
+                           const MachineRegisterInfo &MRI, GISelKnownBits *KB,
+                           unsigned Depth, const KnownBits &KnownVal) {
+  auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_SHL:
+      return Lhs.shl(Rhs);
+    case TargetOpcode::G_LSHR:
+      return Lhs.lshr(Rhs);
+    case TargetOpcode::G_ASHR:
+      return Lhs.ashr(Rhs);
+    default:
+      llvm_unreachable("Unknown Shift Opcode");
+    }
+  };
+
+  auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_SHL:
+      return Lhs.lshr(Rhs);
+    case TargetOpcode::G_LSHR:
+    case TargetOpcode::G_ASHR:
+      return Lhs.shl(Rhs);
+    default:
+      llvm_unreachable("Unknown Shift Opcode");
+    }
+  };
+
+  if (KnownVal.isUnknown())
+    return false;
+
+  KnownBits KnownCnt = KB->getKnownBits(MI->getOperand(2).getReg());
+  APInt MaxShift = KnownCnt.getMaxValue();
+  unsigned NumBits = KnownVal.getBitWidth();
+  if (MaxShift.uge(NumBits))
+    return false;
+
+  if (!ShiftOp(KnownVal.One, MaxShift).isZero())
+    return true;
+
+  // If all of the bits shifted out are known to be zero, and Val is known
+  // non-zero then at least one non-zero bit must remain.
+  if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift)
+          .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) &&
+      ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth))
+    return true;
+
+  return false;
+}
+
+bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+                    GISelKnownBits *KB, unsigned Depth) {
+  if (!Reg.isVirtual())
+    return false;
+
+  std::optional<ValueAndVReg> MayBeConstant =
+      getIConstantVRegValWithLookThrough(Reg, MRI);
+
+  if (MayBeConstant)
+    return MayBeConstant->Value != 0;
+
+  // Some of the tests below are recursive, so bail out if we hit the limit.
+  if (Depth++ >= MaxAnalysisRecursionDepth)
+    return false;
+
+  MachineInstr *MI = getDefIgnoringCopies(Reg, MRI);
+
+  if (GBinOp *BinOp = dyn_cast<GBinOp>(MI))
+    return isKnownNonZeroBinOp(*BinOp, MRI, KB, Depth);
+
+  if (GCastOp *CastOp = dyn_cast<GCastOp>(MI))
+    return isKnownNonZeroCastOp(*CastOp, MRI, KB, Depth);
+
+  switch (MI->getOpcode()) {
+  case TargetOpcode::G_SHL: {
+    // shl nsw/nuw can't remove any non-zero bits.
+    if (MI->getFlag(MachineInstr::MIFlag::NoUWrap) ||
+        MI->getFlag(MachineInstr::MIFlag::NoSWrap))
+      return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth);
+
+    // shl X, Y != 0 if X is odd.  Note that the value of the shift is undefined
+    // if the lowest bit is shifted off the end.
+    KnownBits Known = KB->getKnownBits(MI->getOperand(1).getReg());
+    if (Known.One[0])
+      return true;
+
+    return isNonZeroShift(MI, MRI, KB, Depth, Known);
+  }
+  case TargetOpcode::G_LSHR:
+  case TargetOpcode::G_ASHR: {
+    // shr exact can only shift out zero bits.
+    if (MI->getFlag(MachineInstr::MIFlag::IsExact))
+      return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth);
+
+    // shr X, Y != 0 if X is negative.  Note that the value of the shift is not
+    // defined if the sign bit is shifted off the end.
+    KnownBits Known = KB->getKnownBits(MI->getOperand(1).getReg());
+    if (Known.isNegative())
+      return true;
+
+    return isNonZeroShift(MI, MRI, KB, Depth, Known);
+  }
+  case TargetOpcode::G_FREEZE:
+    return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth) &&
+           ::isGuaranteedNotToBePoison(MI->getOperand(1).getReg(), MRI, Depth);
+  case TargetOpcode::G_SMIN: {
+    // If either arg is negative the result is non-zero. Otherwise
+    // the result is non-zero if both ops are non-zero.
+    KnownBits Op1Known = KB->getKnownBits(MI->getOperand(2).getReg());
+    if (Op1Known.isNegative())
+      return true;
+    KnownBits Op0Known = KB->getKnownBits(MI->getOperand(1).getReg());
+    if (Op0Known.isNegative())
+      return true;
+
+    if (Op1Known.isNonZero() && Op0Known.isNonZero())
+      return true;
+  }
+    [[fallthrough]];
+  case TargetOpcode::G_UMIN:
+    return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth) &&
+           ::isKnownNonZero(MI->getOperand(2).getReg(), MRI, KB, Depth);
+  default:
+    return false;
+  }
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir
new file mode 100644
index 0000000000000..d454e60b034ea
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir
@@ -0,0 +1,181 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs  %s | FileCheck %s --check-prefixes=CHECK
+
+---
+name:            test_icmp_of_eq_and_right_undef
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_icmp_of_eq_and_right_undef
+    ; CHECK: %lhs:_(s64) = COPY $x0
+    ; CHECK-NEXT: %rhs:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ne), %lhs(s64), %rhs
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = COPY $x0
+    %rhs:_(s64) = G_IMPLICIT_DEF
+    %res:_(s32) = G_ICMP intpred(ne), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_icmp_of_not_eq_and_right_undef
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_icmp_of_not_eq_and_right_undef
+    ; CHECK: %lhs:_(s64) = COPY $x0
+    ; CHECK-NEXT: %rhs:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %rhs
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = COPY $x0
+    %rhs:_(s64) = G_IMPLICIT_DEF
+    %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_icmp_of_is_eq_and_right_undef
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_icmp_of_is_eq_and_right_undef
+    ; CHECK: %lhs:_(s64) = COPY $x0
+    ; CHECK-NEXT: %rhs:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ne), %lhs(s64), %rhs
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = COPY $x0
+    %rhs:_(s64) = G_IMPLICIT_DEF
+    %res:_(s32) = G_ICMP intpred(ne), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_icmp_of_eq_not_eq
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_icmp_of_eq_not_eq
+    ; CHECK: %lhs:_(s64) = COPY $x0
+    ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %lhs
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = COPY $x0
+    %rhs:_(s64) = COPY $x0
+    %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %lhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_icmp_of_eq_is_eq
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_icmp_of_eq_is_eq
+    ; CHECK: %lhs:_(s64) = COPY $x0
+    ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(eq), %lhs(s64), %lhs
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = COPY $x0
+    %rhs:_(s64) = COPY $x0
+    %res:_(s32) = G_ICMP intpred(eq), %lhs(s64), %lhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_icmp_of_zext_and_zext
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_icmp_of_zext_and_zext
+    ; CHECK: %lhs1:_(s32) = COPY $w0
+    ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0
+    ; CHECK-NEXT: %lhs:_(s64) = G_ZEXT %lhs1(s32)
+    ; CHECK-NEXT: %rhs:_(s64) = G_ZEXT %rhs1(s32)
+    ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(sgt), %lhs(s64), %rhs
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs1:_(s32) = COPY $w0
+    %rhs1:_(s32) = COPY $w0
+    %lhs:_(s64) = G_ZEXT %lhs1
+    %rhs:_(s64) = G_ZEXT %rhs1
+    %res:_(s32) = G_ICMP intpred(sgt), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_icmp_of_sext_and_sext
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_icmp_of_sext_and_sext
+    ; CHECK: %lhs1:_(s32) = COPY $w0
+    ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0
+    ; CHECK-NEXT: %lhs:_(s64) = G_SEXT %lhs1(s32)
+    ; CHECK-NEXT: %rhs:_(s64) = G_SEXT %rhs1(s32)
+    ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(sgt), %lhs(s64), %rhs
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs1:_(s32) = COPY $w0
+    %rhs1:_(s32) = COPY $w0
+    %lhs:_(s64) = G_SEXT %lhs1
+    %rhs:_(s64) = G_SEXT %rhs1
+    %res:_(s32) = G_ICMP intpred(sgt), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_ugt_icmp_of_sext_and_sext
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_ugt_icmp_of_sext_and_sext
+    ; CHECK: %lhs1:_(s32) = COPY $w0
+    ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0
+    ; CHECK-NEXT: %lhs:_(s64) = G_SEXT %lhs1(s32)
+    ; CHECK-NEXT: %rhs:_(s64) = G_SEXT %rhs1(s32)
+    ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %rhs
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs1:_(s32) = COPY $w0
+    %rhs1:_(s32) = COPY $w0
+    %lhs:_(s64) = G_SEXT %lhs1
+    %rhs:_(s64) = G_SEXT %rhs1
+    %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_uge_icmp_of_zero
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_uge_icmp_of_zero
+    ; CHECK: %res:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = COPY $x0
+    %zero:_(s64) = G_CONSTANT i64 0
+    %res:_(s32) = G_ICMP intpred(uge), %lhs(s64), %zero
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_slt_icmp_of_zero
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_slt_icmp_of_zero
+    ; CHECK: %lhs:_(s64) = COPY $x0
+    ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %zero
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = COPY $x0
+    %zero:_(s64) = G_CONSTANT i64 0
+    %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %zero
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_ugt_icmp_of_zero_known_non_zero
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_ugt_icmp_of_zero_known_non_zero
+    ; CHECK: %res:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %amount:_(s64) = COPY $x0
+    %zero:_(s64) = G_CONSTANT i64 0
+    %lhs:_(s64) =  G_SHL %zero, %amount
+    %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %zero
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_ugt_icmp_of_zero_xor
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_ugt_icmp_of_zero_xor
+    ; CHECK: %x:_(s64) = COPY $x0
+    ; CHECK-NEXT: %y:_(s64) = COPY $x0
+    ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: %lhs:_(s64) = G_XOR %x, %y
+    ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %zero
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %x:_(s64) = COPY $x0
+    %y:_(s64) = COPY $x0
+    %zero:_(s64) = G_CONSTANT i64 0
+    %lhs:_(s64) =  G_XOR %x, %y
+    %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %zero
+    $w0 = COPY %res(s32)
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 50afc79a5a576..06e957fdcc6a2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -457,20 +457,12 @@ sw.bb.i.i:
 }
 
 define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
-; SDISEL-LABEL: select_and:
-; SDISEL:       ; %bb.0:
-; SDISEL-NEXT:    cmp w1, #5
-; SDISEL-NEXT:    ccmp w0, w1, #0, ne
-; SDISEL-NEXT:    csel x0, x2, x3, lt
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: select_and:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5 ; =0x5
-; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #0, ne
-; GISEL-NEXT:    csel x0, x2, x3, lt
-; GISEL-NEXT:    ret
+; CHECK-LABEL: select_and:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w1, #5
+; CHECK-NEXT:    ccmp w0, w1, #0, ne
+; CHECK-NEXT:    csel x0, x2, x3, lt
+; CHECK-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
   %3 = and i1 %1, %2
@@ -479,20 +471,12 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
 }
 
 define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
-; SDISEL-LABEL: select_or:
-; SDISEL:       ; %bb.0:
-; SDISEL-NEXT:    cmp w1, #5
-; SDISEL-NEXT:    ccmp w0, w1, #8, eq
-; SDISEL-NEXT:    csel x0, x2, x3, lt
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: select_or:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5 ; =0x5
-; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #8, eq
-; GISEL-NEXT:    csel x0, x2, x3, lt
-; GISEL-NEXT:    ret
+; CHECK-LABEL: select_or:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w1, #5
+; CHECK-NEXT:    ccmp w0, w1, #8, eq
+; CHECK-NEXT:    csel x0, x2, x3, lt
+; CHECK-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
   %3 = or i1 %1, %2
@@ -501,20 +485,12 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
 }
 
 define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) {
-; SDISEL-LABEL: select_or_float:
-; SDISEL:       ; %bb.0:
-; SDISEL-NEXT:    cmp w1, #5
-; SDISEL-NEXT:    ccmp w0, w1, #8, eq
-; SDISEL-NEXT:    fcsel s0, s0, s1, lt
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: select_or_float:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5 ; =0x5
-; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #8, eq
-; GISEL-NEXT:    fcsel s0, s0, s1, lt
-; GISEL-NEXT:    ret
+; CHECK-LABEL: select_or_float:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w1, #5
+; CHECK-NEXT:    ccmp w0, w1, #8, eq
+; CHECK-NEXT:    fcsel s0, s0, s1, lt
+; CHECK-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
   %3 = or i1 %1, %2
diff --git a/llvm/test/CodeGen/AArch64/icmp2.ll b/llvm/test/CodeGen/AArch64/icmp2.ll
new file mode 100644
index 0000000000000..963501da78475
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/icmp2.ll
@@ -0,0 +1,301 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+define i1 @i64_i64_canon(i64 %a, i64 %b) {
+; CHECK-SD-LABEL: i64_i64_canon:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    cmp x0, #0
+; CHECK-SD-NEXT:    cset w0, eq
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: i64_i64_canon:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    cmp x0, #0
+; CHECK-GI-NEXT:    cset w0, ls
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = icmp uge i64 0, %a
+  ret i1 %c
+}
+
+define <2 x i1> @i64_i64_canon_2x64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: i64_i64_canon_2x64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI1_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
+; CHECK-NEXT:    cmhs v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = icmp uge  <2 x i64> <i64 42, i64 11>, %a
+  ret <2 x i1> %c
+}
+
+define i1 @i64_i64_undef_eq(i64 %a, i64 %b) {
+; CHECK-SD-LABEL: i64_i64_undef_eq:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    mov w0, wzr
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: i64_i64_undef_eq:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    cmp x0, x8
+; CHECK-GI-NEXT:    cset w0, eq
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = icmp eq i64 %a, undef
+  ret i1 %c
+}
+
+define i1 @i64_i64_slt_eq(i64 %a, i64 %b) {
+; CHECK-SD-LABEL: i64_i64_slt_eq:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    mov w0, wzr
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: i64_i64_slt_eq:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    cmp x0, x0
+; CHECK-GI-NEXT:    cset w0, lt
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = icmp slt i64 %a, %a
+  ret i1 %c
+}
+
+define i1 @i64_i64_not_eq_undef(i64 %a, i64 %b) {
+; CHECK-SD-LABEL: i64_i64_not_eq_undef:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    mov w0, wzr
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: i64_i64_not_eq_undef:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    cmp x0, x8
+; CHECK-GI-NEXT:    cset w0, lt
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = icmp slt i64 %a, undef
+  ret i1 %c
+}
+
+define i1 @i64_i64_sext(i32 %a, i32 %b) {
+; CHECK-LABEL: i64_i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    cmp x8, w1, sxtw
+; CHECK-NEXT:    cset w0, lt
+; CHECK-NEXT:    ret
+entry:
+  %sextedlhs = sext i32 %a to i64
+  %sextedrhs = sext i32 %b to i64
+  %c = icmp slt i64 %sextedlhs, %sextedrhs
+  ret i1 %c
+}
+
+define i1 @i64_i64_zext(i32 %a, i32 %b) {
+; CHECK-LABEL: i64_i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    cmp x8, w1, uxtw
+; CHECK-NEXT:    cset w0, lt
+; CHECK-NEXT:    ret
+entry:
+  %zextedlhs = zext i32 %a to i64
+  %zextedrhs = zext i32 %b to i64
+  %c = icmp slt i64 %zextedlhs, %zextedrhs
+  ret i1 %c
+}
+
+define i1 @i64_i64_ule_or(i64 %a, i64 %b, i64 %c) {
+; CHECK-SD-LABEL: i64_i64_ule_or:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    orr x8, x2, #0x1
+; CHECK-SD-NEXT:    cmp x8, #0
+; CHECK-SD-NEXT:    cset w0, eq
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: i64_i64_ule_or:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w0, wzr
+; CHECK-GI-NEXT:    ret
+entry:
+  %or = or i64 1, %c
+  %cmp = icmp ule i64 %or, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_i64_ugt_or(i64 %a, i64 %b, i64 %c) {
+; CHECK-SD-LABEL: i64_i64_ugt_or:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    orr x8, x2, #0x1
+; CHECK-SD-NEXT:    cmp x8, #0
+; CHECK-SD-NEXT:    cset w0, ne
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: i64_i64_ugt_or:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w0, #1 // =0x1
+; CHECK-GI-NEXT:    ret
+entry:
+  %or = or i64 1, %c
+  %cmp = icmp ugt i64 %or, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_i64_eq_or(i64 %a, i64 %b, i64 %c) {
+; CHECK-SD-LABEL: i64_i64_eq_or:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    orr x8, x2, #0x1
+; CHECK-SD-NEXT:    cmp x8, #0
+; CHECK-SD-NEXT:    cset w0, eq
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: i64_i64_eq_or:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w0, wzr
+; CHECK-GI-NEXT:    ret
+entry:
+  %or = or i64 1, %c
+  %cmp = icmp eq i64 %or, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_i64_eq_freeze_or(i64 %a, i64 %b, i64 %c) {
+; CHECK-SD-LABEL: i64_i64_eq_freeze_or:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    orr x8, x2, #0x1
+; CHECK-SD-NEXT:    cmp x8, #0
+; CHECK-SD-NEXT:    cset w0, eq
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: i64_i64_eq_freeze_or:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w0, wzr
+; CHECK-GI-NEXT:    ret
+entry:
+  %or = or i64 1, %c
+  %free = freeze i64 %or
+  %cmp = icmp eq i64 %free, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_i64_eq_freeze_add(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: i64_i64_eq_freeze_add:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cmn x2, #1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+entry:
+  %add = add nuw i64 1, %c
+  %free = freeze i64 %add
+  %cmp = icmp eq i64 %free, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_i64_eq_lshr(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: i64_i64_eq_lshr:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ret
+entry:
+  %lshr = lshr exact i64 1, %c
+  %cmp = icmp eq i64 %lshr, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_i64_eq_zext(i64 %a, i64 %b, i32 %c) {
+; CHECK-SD-LABEL: i64_i64_eq_zext:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    orr w8, w2, #0x1
+; CHECK-SD-NEXT:    cmp w8, #0
+; CHECK-SD-NEXT:    cset w0, eq
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: i64_i64_eq_zext:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w0, wzr
+; CHECK-GI-NEXT:    ret
+entry:
+  %or = or i32 1, %c
+  %ze = zext i32 %or to i64
+  %cmp = icmp eq i64 %ze, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_i64_canon_ule(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: i64_i64_canon_ule:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    ret
+entry:
+  %cmp = icmp ule i64 0, %a
+  ret i1 %cmp
+}
+
+define i1 @i64_i64_canon_ugt(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: i64_i64_canon_ugt:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ret
+entry:
+  %cmp = icmp ugt i64 0, %a
+  ret i1 %cmp
+}
+
+define i1 @i64_i64_trunc_eq(i64 %a, i64 %b, i64 %c) {
+; CHECK-SD-LABEL: i64_i64_trunc_eq:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    orr w8, w2, #0x1
+; CHECK-SD-NEXT:    cmp w8, #0
+; CHECK-SD-NEXT:    cset w0, eq
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: i64_i64_trunc_eq:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w0, wzr
+; CHECK-GI-NEXT:    ret
+entry:
+  %or = or i64 1, %c
+  %tr = trunc nsw i64 %or to i32
+  %cmp = icmp eq i32 %tr, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_i64_umin_eq(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: i64_i64_umin_eq:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr x8, x2, #0x1
+; CHECK-NEXT:    orr x9, x2, #0x2
+; CHECK-NEXT:    cmp x8, x9
+; CHECK-NEXT:    csel x8, x8, x9, lo
+; CHECK-NEXT:    cmp x8, #0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+entry:
+  %or1 = or i64 1, %c
+  %or2 = or i64 2, %c
+  %umin =  call i64 @llvm.umin.i64(i64 %or1, i64 %or2)
+  %cmp = icmp eq i64 %umin, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_i64_smin_eq(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: i64_i64_smin_eq:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr x8, x2, #0x1
+; CHECK-NEXT:    orr x9, x2, #0x2
+; CHECK-NEXT:    cmp x8, x9
+; CHECK-NEXT:    csel x8, x8, x9, lt
+; CHECK-NEXT:    cmp x8, #0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+entry:
+  %or1 = or i64 1, %c
+  %or2 = or i64 2, %c
+  %smin =  call i64 @llvm.smin.i64(i64 %or1, i64 %or2)
+  %cmp = icmp eq i64 %smin, 0
+  ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
index b1cdf553b7242..0b66185d25f3e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
@@ -253,7 +253,7 @@ define double @v_rcp_f64(double %x) {
 ; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; GFX6-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; GFX6-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
 ; GFX6-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
@@ -346,7 +346,7 @@ define double @v_rcp_f64_arcp(double %x) {
 ; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; GFX6-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; GFX6-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
 ; GFX6-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
@@ -483,7 +483,7 @@ define double @v_rcp_f64_ulp25(double %x) {
 ; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; GFX6-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; GFX6-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
 ; GFX6-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
@@ -1115,7 +1115,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) {
 ; GFX6-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
 ; GFX6-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
 ; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v9
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v20
 ; GFX6-NEXT:    v_mul_f64 v[12:13], v[8:9], v[6:7]
 ; GFX6-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
 ; GFX6-NEXT:    v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9]
@@ -1126,7 +1126,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) {
 ; GFX6-NEXT:    v_mul_f64 v[8:9], v[16:17], v[4:5]
 ; GFX6-NEXT:    v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13]
 ; GFX6-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17]
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v17
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v17, v20
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v11
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
@@ -1275,7 +1275,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) {
 ; GFX6-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
 ; GFX6-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
 ; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v9
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v20
 ; GFX6-NEXT:    v_mul_f64 v[12:13], v[8:9], v[6:7]
 ; GFX6-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
 ; GFX6-NEXT:    v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9]
@@ -1286,7 +1286,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) {
 ; GFX6-NEXT:    v_mul_f64 v[8:9], v[16:17], v[4:5]
 ; GFX6-NEXT:    v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13]
 ; GFX6-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17]
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v17
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v17, v20
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v11
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
@@ -1502,7 +1502,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) {
 ; GFX6-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
 ; GFX6-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
 ; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v9
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v20
 ; GFX6-NEXT:    v_mul_f64 v[12:13], v[8:9], v[6:7]
 ; GFX6-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
 ; GFX6-NEXT:    v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9]
@@ -1513,7 +1513,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) {
 ; GFX6-NEXT:    v_mul_f64 v[8:9], v[16:17], v[4:5]
 ; GFX6-NEXT:    v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13]
 ; GFX6-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17]
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v17
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v17, v20
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v11
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index c5198cdb421a5..98f09db4925ec 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -138,7 +138,7 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
 ; GISEL-NEXT:    s_mov_b32 s4, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v4, s4
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_14
 ; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
@@ -165,8 +165,8 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v7, 0x7f, v5
 ; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 24, v8
 ; GISEL-NEXT:    ; implicit-def: $vgpr4
-; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
 ; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
@@ -177,16 +177,16 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    ; implicit-def: $vgpr5
 ; GISEL-NEXT:    ; implicit-def: $vgpr2
 ; GISEL-NEXT:  ; %bb.3: ; %Flow3
-; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_13
 ; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
 ; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v8
-; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_8
 ; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v8
-; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[10:11], vcc
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_7
 ; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
 ; GISEL-NEXT:    v_sub_u32_e32 v4, 0x66, v5
@@ -212,13 +212,10 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v16, v10, v12
 ; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v14, -1
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v11, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v12, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v15, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, v12, v16, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v2, v9, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v10, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
@@ -231,13 +228,13 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v5
 ; GISEL-NEXT:    v_mov_b32_e32 v3, v6
 ; GISEL-NEXT:  .LBB0_7: ; %Flow1
-; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; GISEL-NEXT:  .LBB0_8: ; %Flow2
-; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[8:9]
 ; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
 ; GISEL-NEXT:  ; %bb.10: ; %itofp-sw-epilog
-; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    v_bfe_u32 v2, v0, 2, 1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
@@ -246,20 +243,20 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v3, 0
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
-; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
 ; GISEL-NEXT:    v_mov_b32_e32 v7, v8
 ; GISEL-NEXT:  ; %bb.12: ; %Flow
-; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT:  .LBB0_13: ; %Flow4
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT:  .LBB0_13: ; %Flow4
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0x80000000, v6
 ; GISEL-NEXT:    v_lshl_add_u32 v1, v7, 23, 1.0
 ; GISEL-NEXT:    v_and_b32_e32 v2, 0x7fffff, v4
 ; GISEL-NEXT:    v_or3_b32 v4, v2, v0, v1
 ; GISEL-NEXT:  .LBB0_14: ; %Flow5
-; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v4
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
   %cvt = sitofp i128 %x to float
@@ -392,7 +389,7 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
 ; GISEL-NEXT:    s_mov_b32 s4, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v4, s4
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_14
 ; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
 ; GISEL-NEXT:    v_ffbh_u32_e32 v5, v0
@@ -410,8 +407,8 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v6, 0x7f, v5
 ; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 24, v7
 ; GISEL-NEXT:    ; implicit-def: $vgpr4
-; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
 ; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
@@ -422,16 +419,16 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    ; implicit-def: $vgpr5
 ; GISEL-NEXT:    ; implicit-def: $vgpr2
 ; GISEL-NEXT:  ; %bb.3: ; %Flow3
-; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_13
 ; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
 ; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v7
-; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_8
 ; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v7
-; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[10:11], vcc
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_7
 ; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
 ; GISEL-NEXT:    v_sub_u32_e32 v4, 0x66, v5
@@ -457,13 +454,10 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v15, v9, v11
 ; GISEL-NEXT:    v_lshrrev_b64 v[10:11], v13, -1
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v14, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v10, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v11, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v10, v14, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v15, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v2, v8, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v9, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
@@ -476,13 +470,13 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v5
 ; GISEL-NEXT:    v_mov_b32_e32 v3, v6
 ; GISEL-NEXT:  .LBB1_7: ; %Flow1
-; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; GISEL-NEXT:  .LBB1_8: ; %Flow2
-; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[8:9]
 ; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
 ; GISEL-NEXT:  ; %bb.10: ; %itofp-sw-epilog
-; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    v_bfe_u32 v2, v0, 2, 1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
@@ -491,19 +485,19 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v3, 0
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
-; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
 ; GISEL-NEXT:    v_mov_b32_e32 v6, v7
 ; GISEL-NEXT:  ; %bb.12: ; %Flow
-; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT:  .LBB1_13: ; %Flow4
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT:  .LBB1_13: ; %Flow4
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:    v_lshl_add_u32 v0, v6, 23, 1.0
 ; GISEL-NEXT:    v_mov_b32_e32 v1, 0x7fffff
 ; GISEL-NEXT:    v_and_or_b32 v4, v4, v1, v0
 ; GISEL-NEXT:  .LBB1_14: ; %Flow5
-; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v4
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
   %cvt = uitofp i128 %x to float
@@ -744,13 +738,10 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v17, v10, v12
 ; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v15, -1
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
-; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v16, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v17, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v12, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v16, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v17, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v2, v9, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v10, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v11, v0, v2
@@ -1021,13 +1012,10 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v17, v5, v13
 ; GISEL-NEXT:    v_lshrrev_b64 v[12:13], v15, -1
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v8
-; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v13, v13, v17, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, v12, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v13, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v12, v16, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v13, v17, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v2, v4, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v5, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v8, v0, v2
@@ -1229,7 +1217,7 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    s_mov_b32 s4, 0
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v4, s4
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_cbranch_execz .LBB4_14
 ; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
@@ -1256,8 +1244,8 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v7, 0x7f, v5
 ; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 24, v8
 ; GISEL-NEXT:    ; implicit-def: $vgpr4
-; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
 ; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
@@ -1268,16 +1256,16 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    ; implicit-def: $vgpr5
 ; GISEL-NEXT:    ; implicit-def: $vgpr2
 ; GISEL-NEXT:  ; %bb.3: ; %Flow3
-; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB4_13
 ; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
 ; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v8
-; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
 ; GISEL-NEXT:    s_cbranch_execz .LBB4_8
 ; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v8
-; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[10:11], vcc
 ; GISEL-NEXT:    s_cbranch_execz .LBB4_7
 ; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
 ; GISEL-NEXT:    v_sub_u32_e32 v4, 0x66, v5
@@ -1303,13 +1291,10 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v16, v10, v12
 ; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v14, -1
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v11, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v12, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v15, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, v12, v16, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v2, v9, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v10, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
@@ -1322,13 +1307,13 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v5
 ; GISEL-NEXT:    v_mov_b32_e32 v3, v6
 ; GISEL-NEXT:  .LBB4_7: ; %Flow1
-; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; GISEL-NEXT:  .LBB4_8: ; %Flow2
-; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[8:9]
 ; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
 ; GISEL-NEXT:  ; %bb.10: ; %itofp-sw-epilog
-; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    v_bfe_u32 v2, v0, 2, 1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
@@ -1337,21 +1322,21 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v3, 0
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
-; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
 ; GISEL-NEXT:    v_mov_b32_e32 v7, v8
 ; GISEL-NEXT:  ; %bb.12: ; %Flow
-; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT:  .LBB4_13: ; %Flow4
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT:  .LBB4_13: ; %Flow4
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0x80000000, v6
 ; GISEL-NEXT:    v_lshl_add_u32 v1, v7, 23, 1.0
 ; GISEL-NEXT:    v_and_b32_e32 v2, 0x7fffff, v4
 ; GISEL-NEXT:    v_or3_b32 v0, v2, v0, v1
 ; GISEL-NEXT:    v_cvt_f16_f32_e32 v4, v0
 ; GISEL-NEXT:  .LBB4_14: ; %Flow5
-; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v4
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
   %cvt = sitofp i128 %x to half
@@ -1485,7 +1470,7 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    s_mov_b32 s4, 0
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v4, s4
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_cbranch_execz .LBB5_14
 ; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
 ; GISEL-NEXT:    v_ffbh_u32_e32 v5, v0
@@ -1503,8 +1488,8 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v6, 0x7f, v5
 ; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 24, v7
 ; GISEL-NEXT:    ; implicit-def: $vgpr4
-; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
 ; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
@@ -1515,16 +1500,16 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    ; implicit-def: $vgpr5
 ; GISEL-NEXT:    ; implicit-def: $vgpr2
 ; GISEL-NEXT:  ; %bb.3: ; %Flow3
-; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB5_13
 ; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
 ; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v7
-; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
 ; GISEL-NEXT:    s_cbranch_execz .LBB5_8
 ; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v7
-; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[10:11], vcc
 ; GISEL-NEXT:    s_cbranch_execz .LBB5_7
 ; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
 ; GISEL-NEXT:    v_sub_u32_e32 v4, 0x66, v5
@@ -1550,13 +1535,10 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v15, v9, v11
 ; GISEL-NEXT:    v_lshrrev_b64 v[10:11], v13, -1
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v14, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v10, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v11, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v10, v14, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v15, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v2, v8, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v9, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
@@ -1569,13 +1551,13 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v5
 ; GISEL-NEXT:    v_mov_b32_e32 v3, v6
 ; GISEL-NEXT:  .LBB5_7: ; %Flow1
-; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; GISEL-NEXT:  .LBB5_8: ; %Flow2
-; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[8:9]
 ; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
 ; GISEL-NEXT:  ; %bb.10: ; %itofp-sw-epilog
-; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    v_bfe_u32 v2, v0, 2, 1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
@@ -1584,20 +1566,20 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v3, 0
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
-; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
 ; GISEL-NEXT:    v_mov_b32_e32 v6, v7
 ; GISEL-NEXT:  ; %bb.12: ; %Flow
-; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT:  .LBB5_13: ; %Flow4
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT:  .LBB5_13: ; %Flow4
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:    v_lshl_add_u32 v0, v6, 23, 1.0
 ; GISEL-NEXT:    v_mov_b32_e32 v1, 0x7fffff
 ; GISEL-NEXT:    v_and_or_b32 v0, v4, v1, v0
 ; GISEL-NEXT:    v_cvt_f16_f32_e32 v4, v0
 ; GISEL-NEXT:  .LBB5_14: ; %Flow5
-; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v4
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
   %cvt = uitofp i128 %x to half
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index bd6e1f54e636d..8f4a4b5afcdc1 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -87,7 +87,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -267,7 +267,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -448,7 +448,7 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -628,7 +628,7 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -809,7 +809,7 @@ define double @v_rsq_f64(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -978,7 +978,7 @@ define double @v_rsq_f64_fabs(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1148,7 +1148,7 @@ define double @v_rsq_f64_missing_contract0(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1317,7 +1317,7 @@ define double @v_rsq_f64_missing_contract1(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1486,7 +1486,7 @@ define double @v_neg_rsq_f64(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1716,7 +1716,7 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[8:9]
 ; SI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v13
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v20
 ; SI-GISEL-NEXT:    v_fma_f64 v[16:17], -v[10:11], v[14:15], v[12:13]
 ; SI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
@@ -1728,7 +1728,7 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_mul_f64 v[10:11], v[18:19], v[6:7]
 ; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
 ; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v19
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v19, v20
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
@@ -2019,7 +2019,7 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[8:9]
 ; SI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v13
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v20
 ; SI-GISEL-NEXT:    v_fma_f64 v[16:17], -v[10:11], v[14:15], v[12:13]
 ; SI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
@@ -2031,7 +2031,7 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_mul_f64 v[10:11], v[18:19], v[6:7]
 ; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
 ; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v19
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v19, v20
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
@@ -2293,7 +2293,7 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v13
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v10
 ; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[6:7], 1.0
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[18:19], s[4:5], s[4:5], v[2:3], s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
@@ -2568,7 +2568,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v13
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v10
 ; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[6:7], 1.0
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
@@ -2578,7 +2578,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0x3ff00000
 ; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v19
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v19, v8
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
@@ -2808,7 +2808,7 @@ define double @v_rsq_f64_fneg_fabs(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -2979,7 +2979,7 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -4167,7 +4167,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -4846,7 +4846,7 @@ define double @v_rsq_amdgcn_sqrt_f64(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -4927,7 +4927,7 @@ define double @v_neg_rsq_amdgcn_sqrt_f64(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -5008,7 +5008,7 @@ define amdgpu_ps <2 x i32> @s_rsq_amdgcn_sqrt_f64(double inreg %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -5649,7 +5649,7 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], s[6:7], v[0:1], s[6:7]
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]