Skip to content

[GlobalIsel] Speedup select to integer min/max #92378

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,9 @@ class CombinerHelper {
// Given a binop \p MI, commute operands 1 and 2.
void applyCommuteBinOpOperands(MachineInstr &MI);

/// Combine select to integer min/max.
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo);

/// Combine selects.
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo);

Expand Down Expand Up @@ -962,9 +965,6 @@ class CombinerHelper {

bool tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo);

/// Try to fold (icmp X, Y) ? X : Y -> integer minmax.
bool tryFoldSelectToIntMinMax(GSelect *Select, BuildFnTy &MatchInfo);

bool isOneOrOneSplat(Register Src, bool AllowUndefs);
bool isZeroOrZeroSplat(Register Src, bool AllowUndefs);
bool isConstantSplatVector(Register Src, int64_t SplatValue,
Expand Down
9 changes: 8 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -1307,6 +1307,13 @@ def select_to_minmax: GICombineRule<
[{ return Helper.matchSimplifySelectToMinMax(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;

def select_to_iminmax: GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (G_ICMP $tst, $tst1, $x, $y),
(G_SELECT $root, $tst, $x, $y),
[{ return Helper.matchSelectIMinMax(${root}, ${info}); }]),
(apply [{ Helper.applyBuildFnMO(${root}, ${info}); }])>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that we compile apply+match as a single C++ function we should check whether the buildfn here gets inlined. If not, maybe we need to rethink our use of applyBuildFn?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When the pattern matches, the propability for a combine is higher and the pattern hits less often.


def match_selects : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_SELECT):$root,
Expand Down Expand Up @@ -1670,7 +1677,7 @@ def phi_combines : GICombineGroup<[extend_through_phis]>;
def bitreverse_shift : GICombineGroup<[bitreverse_shl, bitreverse_lshr]>;

def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
match_selects]>;
select_to_iminmax, match_selects]>;

def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
mul_by_neg_one, idempotent_prop]>;
Expand Down
100 changes: 39 additions & 61 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6749,22 +6749,19 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
return false;
}

bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
BuildFnTy &MatchInfo) {
bool CombinerHelper::matchSelectIMinMax(const MachineOperand &MO,
BuildFnTy &MatchInfo) {
GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));

Register DstReg = Select->getReg(0);
Register Cond = Select->getCondReg();
Register True = Select->getTrueReg();
Register False = Select->getFalseReg();
LLT DstTy = MRI.getType(DstReg);

if (DstTy.isPointer())
return false;

// We need an G_ICMP on the condition register.
GICmp *Cmp = getOpcodeDef<GICmp>(Cond, MRI);
if (!Cmp)
return false;

// We want to fold the icmp and replace the select.
if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
return false;
Expand All @@ -6775,62 +6772,46 @@ bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
if (CmpInst::isEquality(Pred))
return false;

Register CmpLHS = Cmp->getLHSReg();
Register CmpRHS = Cmp->getRHSReg();

// We can swap CmpLHS and CmpRHS for higher hitrate.
if (True == CmpRHS && False == CmpLHS) {
std::swap(CmpLHS, CmpRHS);
Pred = CmpInst::getSwappedPredicate(Pred);
}
[[maybe_unused]] Register CmpLHS = Cmp->getLHSReg();
[[maybe_unused]] Register CmpRHS = Cmp->getRHSReg();

// (icmp X, Y) ? X : Y -> integer minmax.
// see matchSelectPattern in ValueTracking.
// Legality between G_SELECT and integer minmax can differ.
if (True == CmpLHS && False == CmpRHS) {
switch (Pred) {
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE: {
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
return false;
MatchInfo = [=](MachineIRBuilder &B) {
B.buildUMax(DstReg, True, False);
};
return true;
}
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE: {
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
return false;
MatchInfo = [=](MachineIRBuilder &B) {
B.buildSMax(DstReg, True, False);
};
return true;
}
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE: {
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
return false;
MatchInfo = [=](MachineIRBuilder &B) {
B.buildUMin(DstReg, True, False);
};
return true;
}
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE: {
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
return false;
MatchInfo = [=](MachineIRBuilder &B) {
B.buildSMin(DstReg, True, False);
};
return true;
}
default:
assert(True == CmpLHS && False == CmpRHS && "unexpected MIR pattern");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You no longer match the swapped form (icmp X, Y) ? Y : X. That seems like a regression.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot. The alternative would be two patterns?!?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, either write two patterns or move the True == CmpLHS checks back into C++ code. I don't think it's OK to regress functionality, just to make the pattern matching neater.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this pattern works, it is evidently missing test coverage

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't merge with outstanding concerns about the patch. Please add back matching for the swapped form (icmp X, Y) ? Y : X.


switch (Pred) {
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE: {
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
return false;
}
MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of using a lambda, which is kinda expensive, why not make the MatchData an unsigned ?
Then you just set the opcode, and the apply function is just a buildInstr call.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would still need to pass DstReg, True, and False. I really like this lambda pattern with capture rules.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really like this lambda pattern with capture rules.

Yeah but after #92239 it is completely pointless :) because we want the "match" and "apply" parts to be combined into one function anyway! So we should think about a better solution, e.g. allowing the user to write their own combined "match+apply" function in the first place.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

e.g. allowing the user to write their own combined "match+apply" function in the first place.

Can you open an issue to track this idea? I'm not sure I like it yet, i'd need to think about how the TableGen would look.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No! I really like the standard applyBuildFnMO functions. I don't want to remember every time to set the debug location.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you open an issue to track this idea? I'm not sure I like it yet, i'd need to think about how the TableGen would look.

OK, I've tried to explain my thoughts in #92410. I hope it makes sense.

return true;
}
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE: {
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
return false;
MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
return true;
}
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE: {
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
return false;
MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
return true;
}
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE: {
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
return false;
MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
return true;
}
default:
return false;
}

return false;
}

bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
Expand All @@ -6842,9 +6823,6 @@ bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
if (tryFoldBoolSelectToLogic(Select, MatchInfo))
return true;

if (tryFoldSelectToIntMinMax(Select, MatchInfo))
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was invoked for many selects in the function and probably failed a lot for no win.

return true;

return false;
}

Expand Down
28 changes: 28 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
Original file line number Diff line number Diff line change
Expand Up @@ -896,3 +896,31 @@ body: |
RET_ReallyLR

...
---
# test failed select icmp_slef,t_t_f --> smin(t,f)
name: select_icmp_sle_f_t_t_f_smin_t_f
body: |
bb.1:
liveins: $x0, $x1, $x2
; CHECK-LABEL: name: select_icmp_sle_f_t_t_f_smin_t_f
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64)
; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32)
; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32)
; CHECK-NEXT: %c:_(<4 x s32>) = G_ICMP intpred(sle), %f(<4 x s32>), %t
; CHECK-NEXT: %sel:_(<4 x s32>) = exact G_SELECT %c(<4 x s32>), %t, %f
; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
%t1:_(s32) = G_TRUNC %0
%f1:_(s32) = G_TRUNC %1
%t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1
%f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1
%c:_(<4 x s32>) = G_ICMP intpred(sle), %f(<4 x s32>), %t(<4 x s32>)
%sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f
$q0 = COPY %sel(<4 x s32>)
...
Loading