-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[GlobalIsel] Speedup select to integer min/max #92378
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: Thorsten Schütt (tschuett) ChangesFull diff: https://github.com/llvm/llvm-project/pull/92378.diff 4 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index ecaece8b68342..538beaf1e7864 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -822,6 +822,9 @@ class CombinerHelper {
// Given a binop \p MI, commute operands 1 and 2.
void applyCommuteBinOpOperands(MachineInstr &MI);
+ /// Combine select to integer min/max.
+ bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo);
+
/// Combine selects.
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo);
@@ -962,9 +965,6 @@ class CombinerHelper {
bool tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo);
- /// Try to fold (icmp X, Y) ? X : Y -> integer minmax.
- bool tryFoldSelectToIntMinMax(GSelect *Select, BuildFnTy &MatchInfo);
-
bool isOneOrOneSplat(Register Src, bool AllowUndefs);
bool isZeroOrZeroSplat(Register Src, bool AllowUndefs);
bool isConstantSplatVector(Register Src, int64_t SplatValue,
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 98d266c8c0b4f..ade1c28996d13 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1307,6 +1307,13 @@ def select_to_minmax: GICombineRule<
[{ return Helper.matchSimplifySelectToMinMax(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+def select_to_iminmax: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (G_ICMP $tst, $tst1, $x, $y),
+ (G_SELECT $root, $tst, $x, $y),
+ [{ return Helper.matchSelectIMinMax(${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFnMO(${root}, ${info}); }])>;
+
def match_selects : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_SELECT):$root,
@@ -1670,7 +1677,7 @@ def phi_combines : GICombineGroup<[extend_through_phis]>;
def bitreverse_shift : GICombineGroup<[bitreverse_shl, bitreverse_lshr]>;
def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
- match_selects]>;
+ select_to_iminmax, match_selects]>;
def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
mul_by_neg_one, idempotent_prop]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9999776b98260..acb5e2423c8e6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6717,10 +6717,12 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
return false;
}
-bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
- BuildFnTy &MatchInfo) {
+bool CombinerHelper::matchSelectIMinMax(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
+ GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
+
Register DstReg = Select->getReg(0);
- Register Cond = Select->getCondReg();
Register True = Select->getTrueReg();
Register False = Select->getFalseReg();
LLT DstTy = MRI.getType(DstReg);
@@ -6728,11 +6730,6 @@ bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
if (DstTy.isPointer())
return false;
- // We need an G_ICMP on the condition register.
- GICmp *Cmp = getOpcodeDef<GICmp>(Cond, MRI);
- if (!Cmp)
- return false;
-
// We want to fold the icmp and replace the select.
if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
return false;
@@ -6743,62 +6740,46 @@ bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
if (CmpInst::isEquality(Pred))
return false;
- Register CmpLHS = Cmp->getLHSReg();
- Register CmpRHS = Cmp->getRHSReg();
-
- // We can swap CmpLHS and CmpRHS for higher hitrate.
- if (True == CmpRHS && False == CmpLHS) {
- std::swap(CmpLHS, CmpRHS);
- Pred = CmpInst::getSwappedPredicate(Pred);
- }
+ [[maybe_unused]] Register CmpLHS = Cmp->getLHSReg();
+ [[maybe_unused]] Register CmpRHS = Cmp->getRHSReg();
// (icmp X, Y) ? X : Y -> integer minmax.
// see matchSelectPattern in ValueTracking.
// Legality between G_SELECT and integer minmax can differ.
- if (True == CmpLHS && False == CmpRHS) {
- switch (Pred) {
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE: {
- if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
- return false;
- MatchInfo = [=](MachineIRBuilder &B) {
- B.buildUMax(DstReg, True, False);
- };
- return true;
- }
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE: {
- if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
- return false;
- MatchInfo = [=](MachineIRBuilder &B) {
- B.buildSMax(DstReg, True, False);
- };
- return true;
- }
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE: {
- if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
- return false;
- MatchInfo = [=](MachineIRBuilder &B) {
- B.buildUMin(DstReg, True, False);
- };
- return true;
- }
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE: {
- if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
- return false;
- MatchInfo = [=](MachineIRBuilder &B) {
- B.buildSMin(DstReg, True, False);
- };
- return true;
- }
- default:
+ assert(True == CmpLHS && False == CmpRHS && "unexpected MIR pattern");
+
+ switch (Pred) {
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
return false;
- }
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
+ return true;
+ }
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
+ return true;
+ }
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE: {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
+ return true;
+ }
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
+ return true;
+ }
+ default:
+ return false;
}
-
- return false;
}
bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
@@ -6810,9 +6791,6 @@ bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
if (tryFoldBoolSelectToLogic(Select, MatchInfo))
return true;
- if (tryFoldSelectToIntMinMax(Select, MatchInfo))
- return true;
-
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
index 2bf7e84a379ba..4abbbe1316586 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
@@ -896,3 +896,31 @@ body: |
RET_ReallyLR
...
+---
+# test failed_select icmp_sle f,t_t_f --> smin(t,f)
+name: select_icmp_sle_f_t_t_f_smin_t_f
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: select_icmp_sle_f_t_t_f_smin_t_f
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64)
+ ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32)
+ ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32)
+ ; CHECK-NEXT: %c:_(<4 x s32>) = G_ICMP intpred(sle), %f(<4 x s32>), %t
+ ; CHECK-NEXT: %sel:_(<4 x s32>) = exact G_SELECT %c(<4 x s32>), %t, %f
+ ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %t1:_(s32) = G_TRUNC %0
+ %f1:_(s32) = G_TRUNC %1
+ %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1
+ %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1
+ %c:_(<4 x s32>) = G_ICMP intpred(sle), %f(<4 x s32>), %t(<4 x s32>)
+ %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f
+ $q0 = COPY %sel(<4 x s32>)
+...
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Speedup select to integer min/max
It's nice to use dag patterns but I kinda doubt that it is any faster in this case - or have you measured a speed up?
return true; | ||
} | ||
default: | ||
assert(True == CmpLHS && False == CmpRHS && "unexpected MIR pattern"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You no longer match the swapped form (icmp X, Y) ? Y : X
. That seems like a regression.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I cannot. The alternative would be two patterns?!?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, either write two patterns or move the True == CmpLHS
checks back into C++ code. I don't think it's OK to regress functionality, just to make the pattern matching neater.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this pattern works, it is evidently missing test coverage
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please don't merge with outstanding concerns about the patch. Please add back matching for the swapped form (icmp X, Y) ? Y : X
.
(match (G_ICMP $tst, $tst1, $x, $y), | ||
(G_SELECT $root, $tst, $x, $y), | ||
[{ return Helper.matchSelectIMinMax(${root}, ${info}); }]), | ||
(apply [{ Helper.applyBuildFnMO(${root}, ${info}); }])>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now that we compile apply+match as a single C++ function we should check whether the buildfn here gets inlined. If not, maybe we need to rethink our use of applyBuildFn?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When the pattern matches, the propability for a combine is higher and the pattern hits less often.
@@ -6810,9 +6791,6 @@ bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { | |||
if (tryFoldBoolSelectToLogic(Select, MatchInfo)) | |||
return true; | |||
|
|||
if (tryFoldSelectToIntMinMax(Select, MatchInfo)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was invoked for many selects in the function and probably failed a lot for no win.
774c7f9
to
08c5d14
Compare
return false; | ||
} | ||
MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); }; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of using a lambda, which is kinda expensive, why not make the MatchData an unsigned
?
Then you just set the opcode, and the apply function is just a buildInstr
call.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would still need to pass DstReg
, True
, and False
. I really like this lambda pattern with capture rules.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I really like this lambda pattern with capture rules.
Yeah but after #92239 it is completely pointless :) because we want the "match" and "apply" parts to be combined into one function anyway! So we should think about a better solution, e.g. allowing the user to write their own combined "match+apply" function in the first place.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
e.g. allowing the user to write their own combined "match+apply" function in the first place.
Can you open an issue to track this idea? I'm not sure I like it yet, i'd need to think about how the TableGen would look.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No! I really like the standard applyBuildFnMO
functions. I don't want to remember every time to set the debug location.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you open an issue to track this idea? I'm not sure I like it yet, i'd need to think about how the TableGen would look.
OK, I've tried to explain my thoughts in #92410. I hope it makes sense.
#92309