-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[GISel][CombinerHelper] Combine op(trunc(x), trunc(y)) -> trunc(op(x, y)) #89023
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Updates to test cases are in 46f65d2. |
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-backend-aarch64 Author: Dhruv Chawla (dc03-work) ChangesThe "match_ands" pattern is also enabled in the Full diff: https://github.com/llvm/llvm-project/pull/89023.diff 6 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 3af32043391fec..0148911ae1ecc6 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -963,6 +963,9 @@ class CombinerHelper {
// Simplify (cmp cc0 x, y) (&& or ||) (cmp cc1 x, y) -> cmp cc2 x, y.
bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
+
+ // Simplify (trunc v1) && (trunc v2) -> trunc (v1 && v2)
+ bool tryFoldAndOfTruncs(GLogicalBinOp *Logical, BuildFnTy &MatchInfo);
};
} // namespace llvm
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 3829c33369b275..6933589d713fe6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6982,9 +6982,69 @@ bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
return false;
}
+bool CombinerHelper::tryFoldAndOfTruncs(GLogicalBinOp *Logical,
+ BuildFnTy &MatchInfo) {
+ assert(Logical->getOpcode() == TargetOpcode::G_AND &&
+ "Expected to be called with G_AND!");
+ Register Dst = Logical->getOperand(0).getReg();
+ Register V1 = Logical->getOperand(1).getReg();
+ Register V2 = Logical->getOperand(2).getReg();
+
+ MachineInstr *V1MI = MRI.getUniqueVRegDef(V1);
+ MachineInstr *V2MI = MRI.getUniqueVRegDef(V2);
+ if (!V1MI || !V2MI)
+ return false;
+
+ bool V1Freeze = V1MI->getOpcode() == TargetOpcode::G_FREEZE;
+ bool V2Freeze = V2MI->getOpcode() == TargetOpcode::G_FREEZE;
+ if (V1Freeze)
+ V1 = V1MI->getOperand(1).getReg();
+ if (V2Freeze)
+ V2 = V2MI->getOperand(1).getReg();
+
+ Register V1Src, V2Src;
+ if (!mi_match(V1, MRI, m_GTrunc(m_Reg(V1Src))) ||
+ !mi_match(V2, MRI, m_GTrunc(m_Reg(V2Src))))
+ return false;
+ if (!MRI.hasOneNonDBGUse(V1) || !MRI.hasOneNonDBGUse(V2))
+ return false;
+
+ LLT V1Ty = MRI.getType(V1);
+ LLT V2Ty = MRI.getType(V2);
+ LLT V1SrcTy = MRI.getType(V1Src);
+ LLT V2SrcTy = MRI.getType(V2Src);
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, {V1SrcTy, V2SrcTy}}))
+ return false;
+
+ if (V1Ty != V2Ty || V1SrcTy != V2SrcTy)
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ Register Op0 = V1Src;
+ Register Op1 = V2Src;
+
+ if (V1Freeze)
+ Op0 = B.buildFreeze(V1SrcTy, V1Src).getReg(0);
+ if (V2Freeze)
+ Op1 = B.buildFreeze(V1SrcTy, V2Src).getReg(0);
+
+ auto And = B.buildAnd(V1SrcTy, Op0, Op1);
+ B.buildTrunc(Dst, And);
+
+ MRI.getUniqueVRegDef(V1)->eraseFromParent();
+ MRI.getUniqueVRegDef(V2)->eraseFromParent();
+ };
+
+ return true;
+}
+
bool CombinerHelper::matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) {
GAnd *And = cast<GAnd>(&MI);
+ if (tryFoldAndOfTruncs(And, MatchInfo))
+ return true;
+
if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
return true;
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 10cad6d1924407..eda7d925dade47 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -295,5 +295,5 @@ def AArch64PostLegalizerCombiner
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
select_to_minmax, or_to_bsp, combine_concat_vector,
- commute_constant_to_rhs]> {
+ commute_constant_to_rhs, match_ands]> {
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-and-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-and-trunc.mir
new file mode 100644
index 00000000000000..7f664850885836
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-and-trunc.mir
@@ -0,0 +1,167 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
+
+---
+name: and_trunc
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: and_trunc
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_AND %2, %3
+ %5:_(s32) = G_ANYEXT %4
+ $w0 = COPY %5
+...
+---
+name: and_trunc_multiuse_1
+body: |
+ bb.0:
+ liveins: $w0, $w1, $x2
+ ; CHECK-LABEL: name: and_trunc_multiuse_1
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[COPY2]](p0) :: (store (s16))
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %5:_(p0) = COPY $x2
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ G_STORE %2, %5 :: (store (s16))
+ %4:_(s16) = G_AND %2, %3
+ %6:_(s32) = G_ANYEXT %4
+ $w0 = COPY %6
+...
+---
+name: and_trunc_multiuse_2
+body: |
+ bb.0:
+ liveins: $w0, $w1, $x2
+ ; CHECK-LABEL: name: and_trunc_multiuse_2
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[COPY2]](p0) :: (store (s16))
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %5:_(p0) = COPY $x2
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ G_STORE %2, %5 :: (store (s16))
+ %4:_(s16) = G_AND %2, %3
+ %6:_(s32) = G_ANYEXT %4
+ $w0 = COPY %6
+...
+---
+name: and_trunc_vector
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: and_trunc_vector
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[AND]](<4 x s32>)
+ ; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(<4 x s16>) = G_TRUNC %0
+ %3:_(<4 x s16>) = G_TRUNC %1
+ %4:_(<4 x s16>) = G_AND %2, %3
+ $x0 = COPY %4
+...
+---
+name: and_trunc_vector_multiuse
+body: |
+ bb.0:
+ liveins: $q0, $q1, $x0
+ ; CHECK-LABEL: name: and_trunc_vector_multiuse
+ ; CHECK: liveins: $q0, $q1, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
+ ; CHECK-NEXT: G_STORE [[TRUNC]](<4 x s16>), [[COPY2]](p0) :: (store (<4 x s16>))
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: $x0 = COPY [[AND]](<4 x s16>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %5:_(p0) = COPY $x2
+ %2:_(<4 x s16>) = G_TRUNC %0
+ %3:_(<4 x s16>) = G_TRUNC %1
+ G_STORE %2, %5 :: (store (<4 x s16>))
+ %4:_(<4 x s16>) = G_AND %2, %3
+ $x0 = COPY %4
+...
+---
+name: and_trunc_freeze
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: and_trunc_freeze
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FREEZE]], [[COPY1]]
+ ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %6:_(s16) = G_FREEZE %2
+ %4:_(s16) = G_AND %6, %3
+ %5:_(s32) = G_ANYEXT %4
+ $w0 = COPY %5
+...
+---
+name: and_trunc_freeze_both
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: and_trunc_freeze_both
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
+ ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FREEZE]], [[FREEZE1]]
+ ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %6:_(s16) = G_FREEZE %2
+ %7:_(s16) = G_FREEZE %3
+ %4:_(s16) = G_AND %6, %7
+ %5:_(s32) = G_ANYEXT %4
+ $w0 = COPY %5
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
index 2bf7e84a379ba0..d4bca5dacf0a2b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
@@ -1,7 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown- --aarch64postlegalizercombiner-only-enable-rule="select_to_logical" %s -o - | FileCheck %s
+# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
# REQUIRES: asserts
+
---
# select (c, x, x) -> x
name: test_combine_select_same_res
@@ -200,10 +201,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
- ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
- ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64)
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %t
- ; CHECK-NEXT: %sel:_(s1) = G_AND %c, [[FREEZE]]
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[FREEZE]]
+ ; CHECK-NEXT: %sel:_(s1) = G_TRUNC [[AND]](s64)
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
@@ -228,10 +228,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
- ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
- ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64)
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %t
- ; CHECK-NEXT: %sel:_(s1) = G_AND %c, [[FREEZE]]
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[FREEZE]]
+ ; CHECK-NEXT: %sel:_(s1) = G_TRUNC [[AND]](s64)
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir
index fb19cda303d365..b2a9a802261252 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir
@@ -84,10 +84,9 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK: %binop_lhs:_(s64) = COPY $x0
; CHECK: %binop_rhs:_(s64) = COPY $x1
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
- ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
- ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]]
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[AND]](s32)
+ ; CHECK: %binop:_(s64) = G_AND %binop_lhs, %binop_rhs
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop(s64)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
; CHECK: $x0 = COPY [[ZEXT]](s64)
; CHECK: RET_ReallyLR implicit $x0
%binop_lhs:_(s64) = COPY $x0
|
Should move this to tablegen patterns |
@arsenm Which file would this be? I had tried searching for something like this originally but couldn't find anything. |
https://llvm.org/docs/GlobalISel/MIRPatterns.html It would go in the generic Combine.td, just with most of the C++ boilerplate replaced with tablegen |
Hmm, one of the issues is that we need to match the types as well (the types of x and y in trunc(x) and trunc(y) need to be the same) so that the and(x, y) is well formed. I am not sure if this is possible through TableGen. I am considering moving the code into https://github.com/llvm/llvm-project/blob/main/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp#L2960, which is where the folds for |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this a good idea? Intuitively it means we'll do the and
in a wider type than necessary, which could end up generating more code if the wider type is wider than a register. Is it a canonicalization? Does it enable other optimizations? Is there a general strategy to pull truncs out of other expressions?
@@ -963,6 +963,9 @@ class CombinerHelper { | |||
|
|||
// Simplify (cmp cc0 x, y) (&& or ||) (cmp cc1 x, y) -> cmp cc2 x, y. | |||
bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo); | |||
|
|||
// Simplify (trunc v1) && (trunc v2) -> trunc (v1 && v2) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
&&
seems wrong here?
// Simplify (trunc v1) && (trunc v2) -> trunc (v1 && v2) | |
// Simplify (trunc v1) & (trunc v2) -> trunc (v1 & v2) |
If we need to add something to support this, we should add the support to tablegen. This is not an uncommon type of scenario |
As in my comment above, I am planning on moving this code into
I think that is beyond the scope of this patch, and is probably not possible to do immediately. We also need to check if the node we are generating is legal if we are not in the pre-legalizer combiner, as well as checking if the nodes have one non-dbg use. Not sure if TableGen should be extended to handle these cases as well. |
The freeze looks unsafe. It should be a separate patch. |
You can still put the pattern in MIR and the business logic in the CombinerHelper. |
I have moved the code to |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 | ||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 | ||
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %8, [[COPY1]] | ||
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the freeze is dead. $w0
is not freezed.
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 | ||
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %9, %10 | ||
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]] | ||
; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[COPY1]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
both freezes are dead. The registers are not freezed anymore.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, this looks to be an issue with using buildInstr
in applyBuildInstructionSteps
and then running the OperandFn
s. I believe this can be fixed by inserting the built instruction into the function after running the OperandFn
s instead of before.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove the freeze code. You cannot look through freeze. It is unsafe.
Hmm, the problem with the test case I'm looking at is that select gets optimized to an and with a freeze, so removing the freeze fold will make this patch unable to fix that test case. Is it still not ok to look through a freeze if it only has one use? I would really like the freeze to be optimized how it is with this patch... At the very least, if the semantics of freeze in GMIR match LLVM IR, then Alive2 does not have issues with optimizing this way for a single-use freeze: https://alive2.llvm.org/ce/z/dDWS5u. |
Instcombine handles freeze separately. We are going to follow that pattern:
|
Okay, I guess that would be the visit here:
|
This fixes the case for |
#89023 (comment) ping @jayfoad
Also ping @arsenm and @davemgreen, I'm interested in hearing your thoughts on this. I am currently leaning towards adding this transform that InstCombine does to GlobalISel, which will handle the case I'm matching with this patch. |
I don't have anything to add. I haven't been following this review closely. |
#89363 has the poison symbols. There is no combine on freeze yet. |
41d695a
to
e4d221e
Compare
I have moved the freeze fold to #90618. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needs the testing for all opcodes.
The GIsel implementation seems to be missing some of the free truncate/zext logic that the DAG version has. Arguably that would be decided by targets choosing to add the combine or not (provided that we had some more useful type predicate infrastructure)
%7:_(s16) = G_FREEZE %3 | ||
%4:_(s16) = G_AND %6, %7 | ||
%5:_(s32) = G_ANYEXT %4 | ||
$w0 = COPY %5 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test only covers and, but this will also impact or and xor
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have moved the tests to combine-op-trunc.mir
, and have added test cases for G_OR and G_XOR.
%1:_(s32) = COPY $w1 | ||
%2:_(s16) = G_TRUNC %0 | ||
%3:_(s16) = G_TRUNC %1 | ||
%6:_(s16) = G_FREEZE %2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is there still a freeze?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will be rebasing this PR on top of #90618 once it lands, then the folds through the freezes will occur.
%2:_(s16) = G_TRUNC %0 | ||
%3:_(s16) = G_TRUNC %1 | ||
%6:_(s16) = G_FREEZE %2 | ||
%7:_(s16) = G_FREEZE %3 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same here.
e4d221e
to
94bf09a
Compare
$x0 = COPY %4 | ||
... | ||
|
||
# Freezes should get pushed through truncs. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These folds will trigger once #90618 lands.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is done now.
Looks like CI is failing because of flang causing OOM and stack overflow on windows with MSVC. Fun. |
// Match: logic (ext X), (ext Y) --> ext (logic X, Y) | ||
// Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The logic for the DAG combine treats the extend and truncate cases a bit differently. The truncate case considers whether the extend and truncate are free, and doesn't do this if they are. Arguably this could be handled by the target not adding the combine, but I don't think we have a way to add combines for specific types right now
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will look into this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was able to do this using TLI. Please have a look.
94bf09a
to
9eb6def
Compare
eec208e
to
376c026
Compare
Rebased on main. |
EVT DstEVT = getApproximateEVTForLLT(MRI.getType(Dst), MF->getDataLayout(), | ||
MF->getFunction().getContext()); | ||
EVT XEVT = getApproximateEVTForLLT(XTy, MF->getDataLayout(), | ||
MF->getFunction().getContext()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We already have an LLT native isTruncateFree/isZExtFree, you don't need to figure out the LLT here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, done.
; GFX6-NEXT: v_mov_b32_e32 v0, s0 | ||
; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24 | ||
; GFX6-NEXT: v_readfirstlane_b32 s0, v0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is worse, but not the fault of this patch
@tschuett Any changes you would like to see on this patch? You had posted a review with changes requested, please let me know if it seems ok before I land this. |
f3de4e1
to
f58bbe4
Compare
Looks like no issues. I'm landing this patch. |
No description provided.