-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[RISCV][TTI] Recognize CONCAT_VECTORS if a shufflevector mask is multiple insert subvector. #110457
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV][TTI] Recognize CONCAT_VECTORS if a shufflevector mask is multiple insert subvector. #110457
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-backend-risc-v Author: Han-Kuan Chen (HanKuanChen) ChangesFull diff: https://github.com/llvm/llvm-project/pull/110457.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 768df71715fa63..e8a5477fbbc599 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -343,6 +343,40 @@ RISCVTTIImpl::getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind) {
/*AddressSpace=*/0, CostKind);
}
+InstructionCost
+RISCVTTIImpl::isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind) {
+ if (!isa<FixedVectorType>(Tp))
+ return InstructionCost::getInvalid();
+ std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
+ if (LT.second.getScalarSizeInBits() == 1)
+ return InstructionCost::getInvalid();
+ // Try to guess SubTp.
+ for (unsigned SubVecSize = 1, E = Mask.size(); SubVecSize < E;
+ SubVecSize <<= 1) {
+ if (E % SubVecSize != 0)
+ continue;
+ SmallVector<int> RepeatedPattern(createSequentialMask(0, SubVecSize, 0));
+ bool Skip = false;
+ for (unsigned I = 0; I != E; I += SubVecSize)
+ if (!Mask.slice(I, SubVecSize).equals(RepeatedPattern)) {
+ Skip = true;
+ break;
+ }
+ if (Skip)
+ continue;
+ InstructionCost Cost = 0;
+ FixedVectorType *SubTp = FixedVectorType::get(
+ cast<FixedVectorType>(Tp)->getElementType(), SubVecSize);
+ // The cost of extraction from a subvector is 0 if the index is 0.
+ for (unsigned I = 0; I != E; I += SubVecSize)
+ Cost +=
+ getShuffleCost(TTI::SK_InsertSubvector, Tp, {}, CostKind, I, SubTp);
+ return Cost;
+ }
+ return InstructionCost::getInvalid();
+}
+
static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST,
LLVMContext &C) {
assert((DataVT.getScalarSizeInBits() != 8 ||
@@ -394,6 +428,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
LT.second, CostKind);
}
}
+ if (InstructionCost Cost =
+ isMultipleInsertSubvector(Tp, Mask, CostKind);
+ Cost.isValid())
+ return Cost;
}
// vrgather + cost of generating the mask constant.
// We model this for an unknown mask with a single vrgather.
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index f16c4fc0eed023..599c9a079874b8 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -55,6 +55,12 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
/// type.
InstructionCost getConstantPoolLoadCost(Type *Ty,
TTI::TargetCostKind CostKind);
+
+ /// Return the cost if a shufflevector can be consist of multiple vslideup.
+ /// Otherwise, return InstructionCost::getInvalid().
+ InstructionCost isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind);
+
public:
explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F)
: BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
diff --git a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll
new file mode 100644
index 00000000000000..a0c2413c8655a6
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: 'test'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = shufflevector <4 x float> poison, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = shufflevector <2 x i1> poison, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+entry:
+ %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = shufflevector <4 x float> poison, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %3 = shufflevector <2 x i1> poison, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ ret void
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll
index bb806be15c71ca..23a9a654c96f9e 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll
@@ -8,7 +8,7 @@
; YAML-NEXT: Function: test
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
-; YAML-NEXT: - Cost: '2'
+; YAML-NEXT: - Cost: '0'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '7'
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
index 887f59bbda94d6..ed1cb6a77f5857 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
@@ -8,7 +8,7 @@
; YAML: Function: test
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
-; YAML: - Cost: '6'
+; YAML: - Cost: '4'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '5'
|
define void @test() { | ||
; CHECK-LABEL: 'test' | ||
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | ||
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this still overestimating the cost? Putting this into llc -mattr=+v:
define <16 x i16> @f(<4 x i16> %a, <4 x i16> %b) {
%c = shufflevector <4 x i16> %a, <4 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <16 x i16> %c
}
I'm counting a total of 3 LMUL ops, so I think this should be costed as 3.
f:
vmv1r.v v10, v8
vsetivli zero, 8, e16, m1, ta, ma
vslideup.vi v10, v8, 4
vmv2r.v v8, v10
vsetivli zero, 16, e16, m2, ta, ma
vslideup.vi v8, v10, 8
ret
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
e79f8d1
to
66e4c34
Compare
ping |
…iple insert subvector. (llvm#110457)
* commit 'FETCH_HEAD': [clang][bytecode] Handle UETT_OpenMPRequiredSimdAlign (llvm#111259) [mlir][polynomial] Add and verify constraints of coefficientModulus for ringAttr (llvm#111016) [clang][bytecode] Save a per-Block IsWeak bit (llvm#111248) [analyzer] Fix wrong `builtin_*_overflow` return type (llvm#111253) [SelectOpt] Don't convert constant selects to branches. (llvm#110858) [InstCombine] Update and-or-icmps.ll after 574266c. NFC [Instcombine] Test for more gep canonicalization [NFC][TableGen] Change `CodeGenIntrinsics` to use const references (llvm#111219) Add warning message to `session save` when transcript isn't saved. (llvm#109020) [RISCV][TTI] Recognize CONCAT_VECTORS if a shufflevector mask is multiple insert subvector. (llvm#110457) Revert "[InstCombine] Folding `(icmp eq/ne (and X, -P2), INT_MIN)`" (llvm#111236) [NFC][lsan] Add SuspendAllThreads traces [lsan] Add `thread_suspend_fail` flag Signed-off-by: kyvangka1610 <[email protected]>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi, even though the comments were marked as resolved PRs usually still need explicit approval before being merged.
Would it be possible to revert this and open another PR so it can go through another round of review? Or was this reviewed internally within SiFive?
Unfortunately I don't think you can reopen merged PRs on GitHub
for (unsigned SubVecSize = 1, E = Mask.size(); SubVecSize < E; | ||
SubVecSize <<= 1) { | ||
if (E % SubVecSize != 0) | ||
continue; | ||
SmallVector<int> RepeatedPattern(createSequentialMask(0, SubVecSize, 0)); | ||
bool Skip = false; | ||
for (unsigned I = 0; I != E; I += SubVecSize) | ||
if (!Mask.slice(I, SubVecSize).equals(RepeatedPattern)) { | ||
Skip = true; | ||
break; | ||
} | ||
if (Skip) | ||
continue; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can work out the subvector size in one iteration over the mask rather than with a nested loop, if we iterate over each index and mark when it goes back to zero, then check the rest of the mask is mask[i] == i % subvecsize
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What do you mean "if we iterate over each index and mark when it goes back to zero"?
for (unsigned I = 0; I != NumSlides; ++I) { | ||
unsigned InsertIndex = SubVecSize * (1 << I); | ||
FixedVectorType *SubTp = FixedVectorType::get( | ||
cast<FixedVectorType>(Tp)->getElementType(), InsertIndex); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be
cast<FixedVectorType>(Tp)->getElementType(), InsertIndex); | |
cast<FixedVectorType>(Tp)->getElementType(), SubVecSize); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The previous iteration result is going to be the next iteration subvector because we are trying to concat the same subvector.
If we use SubVecSize
, that means we are concat multiple different subvector.
I have reverted this commit as it violates project review policy. Feel free to move to a new review, but you need to have approval before landing. |
…iple insert subvector. (llvm#110457)
No description provided.