Skip to content

Commit cb4feae

Browse files
committed
[SLP]Fix logical and/or reductions.
Need to emit select(cmp) instructions for poison-safe forms of select ops. Currently alive reports that `Target is more poisonous than source` for operations we generating for such instructions. https://alive2.llvm.org/ce/z/FiNiAA Differential Revision: https://reviews.llvm.org/D112562
1 parent b0277be commit cb4feae

File tree

2 files changed

+23
-9
lines changed

2 files changed

+23
-9
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+21-7
Original file line numberDiff line numberDiff line change
@@ -8012,10 +8012,20 @@ class HorizontalReduction {
80128012
Value *RHS, const Twine &Name, bool UseSelect) {
80138013
unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
80148014
switch (Kind) {
8015-
case RecurKind::Add:
8016-
case RecurKind::Mul:
80178015
case RecurKind::Or:
8016+
if (UseSelect &&
8017+
LHS->getType() == CmpInst::makeCmpResultType(LHS->getType()))
8018+
return Builder.CreateSelect(LHS, Builder.getTrue(), RHS, Name);
8019+
return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
8020+
Name);
80188021
case RecurKind::And:
8022+
if (UseSelect &&
8023+
LHS->getType() == CmpInst::makeCmpResultType(LHS->getType()))
8024+
return Builder.CreateSelect(LHS, RHS, Builder.getFalse(), Name);
8025+
return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
8026+
Name);
8027+
case RecurKind::Add:
8028+
case RecurKind::Mul:
80198029
case RecurKind::Xor:
80208030
case RecurKind::FAdd:
80218031
case RecurKind::FMul:
@@ -8059,8 +8069,12 @@ class HorizontalReduction {
80598069
static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
80608070
Value *RHS, const Twine &Name,
80618071
const ReductionOpsListType &ReductionOps) {
8062-
bool UseSelect = ReductionOps.size() == 2;
8063-
assert((!UseSelect || isa<SelectInst>(ReductionOps[1][0])) &&
8072+
bool UseSelect = ReductionOps.size() == 2 ||
8073+
// Logical or/and.
8074+
(ReductionOps.size() == 1 &&
8075+
isa<SelectInst>(ReductionOps.front().front()));
8076+
assert((!UseSelect || ReductionOps.size() != 2 ||
8077+
isa<SelectInst>(ReductionOps[1][0])) &&
80648078
"Expected cmp + select pairs for reduction");
80658079
Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, UseSelect);
80668080
if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
@@ -8198,10 +8212,10 @@ class HorizontalReduction {
81988212
/// Checks if the instruction is in basic block \p BB.
81998213
/// For a cmp+sel min/max reduction check that both ops are in \p BB.
82008214
static bool hasSameParent(Instruction *I, BasicBlock *BB) {
8201-
if (isCmpSelMinMax(I)) {
8215+
if (isCmpSelMinMax(I) || (isBoolLogicOp(I) && isa<SelectInst>(I))) {
82028216
auto *Sel = cast<SelectInst>(I);
8203-
auto *Cmp = cast<Instruction>(Sel->getCondition());
8204-
return Sel->getParent() == BB && Cmp->getParent() == BB;
8217+
auto *Cmp = dyn_cast<Instruction>(Sel->getCondition());
8218+
return Sel->getParent() == BB && Cmp && Cmp->getParent() == BB;
82058219
}
82068220
return I->getParent() == BB;
82078221
}

llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ define i1 @logical_and_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
480480
; CHECK-NEXT: [[S3:%.*]] = select i1 [[C:%.*]], i1 [[C]], i1 false
481481
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
482482
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
483-
; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i1 [[TMP3]], [[S3]]
483+
; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i1 [[S3]], i1 false
484484
; CHECK-NEXT: ret i1 [[OP_EXTRA]]
485485
;
486486
%x0 = extractelement <4 x i32> %x, i32 0
@@ -509,7 +509,7 @@ define i1 @logical_or_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
509509
; CHECK-NEXT: [[S3:%.*]] = select i1 [[C:%.*]], i1 true, i1 [[C]]
510510
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
511511
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
512-
; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i1 [[TMP3]], [[S3]]
512+
; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i1 true, i1 [[S3]]
513513
; CHECK-NEXT: ret i1 [[OP_EXTRA]]
514514
;
515515
%x0 = extractelement <4 x i32> %x, i32 0

0 commit comments

Comments
 (0)