Skip to content

Commit d024a01

Browse files
committed
Recommit "[LoopVectorize][AArch64] Enable ordered reductions by default for AArch64"
This reverts the revert ab9296f. The issue causing the revert should be fixed in 9baed02.
1 parent 7a967d9 commit d024a01

File tree

7 files changed

+26
-6
lines changed

7 files changed

+26
-6
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,9 @@ class TargetTransformInfo {
662662
/// Return true if the target supports masked expand load.
663663
bool isLegalMaskedExpandLoad(Type *DataType) const;
664664

665+
/// Return true if we should be enabling ordered reductions for the target.
666+
bool enableOrderedReductions() const;
667+
665668
/// Return true if the target has a unified operation to calculate division
666669
/// and remainder. If so, the additional implicit multiplication and
667670
/// subtraction required to calculate a remainder from division are free. This
@@ -1508,6 +1511,7 @@ class TargetTransformInfo::Concept {
15081511
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
15091512
virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
15101513
virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1514+
virtual bool enableOrderedReductions() = 0;
15111515
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
15121516
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
15131517
virtual bool prefersVectorizedAddressing() = 0;
@@ -1890,6 +1894,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
18901894
bool isLegalMaskedExpandLoad(Type *DataType) override {
18911895
return Impl.isLegalMaskedExpandLoad(DataType);
18921896
}
1897+
bool enableOrderedReductions() override {
1898+
return Impl.enableOrderedReductions();
1899+
}
18931900
bool hasDivRemOp(Type *DataType, bool IsSigned) override {
18941901
return Impl.hasDivRemOp(DataType, IsSigned);
18951902
}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,8 @@ class TargetTransformInfoImplBase {
263263

264264
bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
265265

266+
bool enableOrderedReductions() const { return false; }
267+
266268
bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
267269

268270
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,10 @@ bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
410410
return TTIImpl->isLegalMaskedExpandLoad(DataType);
411411
}
412412

413+
bool TargetTransformInfo::enableOrderedReductions() const {
414+
return TTIImpl->enableOrderedReductions();
415+
}
416+
413417
bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
414418
return TTIImpl->hasDivRemOp(DataType, IsSigned);
415419
}

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
299299
return BaseT::isLegalNTStore(DataType, Alignment);
300300
}
301301

302+
bool enableOrderedReductions() const { return true; }
303+
302304
InstructionCost getInterleavedMemoryOpCost(
303305
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
304306
Align Alignment, unsigned AddressSpace,

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ static cl::opt<bool>
331331
cl::desc("Prefer in-loop vector reductions, "
332332
"overriding the targets preference."));
333333

334-
cl::opt<bool> ForceOrderedReductions(
334+
static cl::opt<bool> ForceOrderedReductions(
335335
"force-ordered-reductions", cl::init(false), cl::Hidden,
336336
cl::desc("Enable the vectorisation of loops with in-order (strict) "
337337
"FP reductions"));
@@ -1317,8 +1317,7 @@ class LoopVectorizationCostModel {
13171317
/// the IsOrdered flag of RdxDesc is set and we do not allow reordering
13181318
/// of FP operations.
13191319
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) {
1320-
return ForceOrderedReductions && !Hints->allowReordering() &&
1321-
RdxDesc.isOrdered();
1320+
return !Hints->allowReordering() && RdxDesc.isOrdered();
13221321
}
13231322

13241323
/// \returns The smallest bitwidth each instruction can be represented with.
@@ -10225,7 +10224,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1022510224
return false;
1022610225
}
1022710226

10228-
if (!LVL.canVectorizeFPMath(ForceOrderedReductions)) {
10227+
bool AllowOrderedReductions;
10228+
// If the flag is set, use that instead and override the TTI behaviour.
10229+
if (ForceOrderedReductions.getNumOccurrences() > 0)
10230+
AllowOrderedReductions = ForceOrderedReductions;
10231+
else
10232+
AllowOrderedReductions = TTI->enableOrderedReductions();
10233+
if (!LVL.canVectorizeFPMath(AllowOrderedReductions)) {
1022910234
ORE->emit([&]() {
1023010235
auto *ExactFPMathInst = Requirements.getExactFPInst();
1023110236
return OptimizationRemarkAnalysisFPCommute(DEBUG_TYPE, "CantReorderFPOps",

llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
33
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
44
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
5-
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED
5+
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
66

77
define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
88
; CHECK-ORDERED-LABEL: @fadd_strict

llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=false -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
33
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=true -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
44
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
5-
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED
5+
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
66

77
define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
88
; CHECK-ORDERED-LABEL: @fadd_strict

0 commit comments

Comments
 (0)