-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[GlobalIsel] Combine trunc of binop #107721
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-globalisel Author: Thorsten Schütt (tschuett) Changestrunc (binop X, C) --> binop (trunc X, trunc C) Try to narrow the width of math or bitwise logic instructions by pulling a truncate ahead of binary operators. Vx and Nx cores consider 32-bit and 64-bit basic arithmetic equal in costs. Patch is 438.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/107721.diff 18 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9b62d6067be39c..828532dcffb7d3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -831,6 +831,12 @@ class CombinerHelper {
/// Combine ors.
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// trunc (binop X, C) --> binop (trunc X, trunc C).
+ bool matchNarrowBinop(const MachineInstr &TruncMI,
+ const MachineInstr &BinopMI, BuildFnTy &MatchInfo);
+
+ bool matchCastOfInteger(const MachineInstr &CastMI, APInt &MatchInfo);
+
/// Combine addos.
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 525cc815e73cef..a595a51d7b01ff 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1867,6 +1867,33 @@ class buildvector_of_opcode<Instruction castOpcode> : GICombineRule <
def buildvector_of_truncate : buildvector_of_opcode<G_TRUNC>;
+// narrow binop.
+// trunc (binop X, C) --> binop (trunc X, trunc C)
+class narrow_binop_opcode<Instruction binopOpcode> : GICombineRule <
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_CONSTANT $const, $imm),
+ (binopOpcode $binop, $x, $const):$Binop,
+ (G_TRUNC $root, $binop):$Trunc,
+ [{ return Helper.matchNarrowBinop(*${Trunc}, *${Binop}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${Trunc}, ${matchinfo}); }])>;
+
+def narrow_binop_add : narrow_binop_opcode<G_ADD>;
+def narrow_binop_sub : narrow_binop_opcode<G_SUB>;
+def narrow_binop_mul : narrow_binop_opcode<G_MUL>;
+def narrow_binop_and : narrow_binop_opcode<G_AND>;
+def narrow_binop_or : narrow_binop_opcode<G_OR>;
+def narrow_binop_xor : narrow_binop_opcode<G_XOR>;
+
+// Cast of integer.
+class integer_of_opcode<Instruction castOpcode> : GICombineRule <
+ (defs root:$root, apint_matchinfo:$matchinfo),
+ (match (G_CONSTANT $int, $imm),
+ (castOpcode $root, $int):$Cast,
+ [{ return Helper.matchCastOfInteger(*${Cast}, ${matchinfo}); }]),
+ (apply [{ Helper.replaceInstWithConstant(*${Cast}, ${matchinfo}); }])>;
+
+def integer_of_truncate : integer_of_opcode<G_TRUNC>;
+
def cast_combines: GICombineGroup<[
truncate_of_zext,
truncate_of_sext,
@@ -1881,7 +1908,14 @@ def cast_combines: GICombineGroup<[
anyext_of_anyext,
anyext_of_zext,
anyext_of_sext,
- buildvector_of_truncate
+ buildvector_of_truncate,
+ narrow_binop_add,
+ narrow_binop_sub,
+ narrow_binop_mul,
+ narrow_binop_and,
+ narrow_binop_or,
+ narrow_binop_xor,
+ integer_of_truncate
]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index 8714fdabf65494..30557e6a2304e6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -313,3 +313,49 @@ bool CombinerHelper::matchCastOfBuildVector(const MachineInstr &CastMI,
return true;
}
+
+bool CombinerHelper::matchNarrowBinop(const MachineInstr &TruncMI,
+ const MachineInstr &BinopMI,
+ BuildFnTy &MatchInfo) {
+ const GTrunc *Trunc = cast<GTrunc>(&TruncMI);
+ const GBinOp *BinOp = cast<GBinOp>(&BinopMI);
+
+ if (!MRI.hasOneNonDBGUse(BinOp->getReg(0)))
+ return false;
+
+ Register Dst = Trunc->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ // Is narrow binop legal?
+ if (!isLegalOrBeforeLegalizer({BinOp->getOpcode(), {DstTy}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto LHS = B.buildTrunc(DstTy, BinOp->getLHSReg());
+ auto RHS = B.buildTrunc(DstTy, BinOp->getRHSReg());
+ B.buildInstr(BinOp->getOpcode(), {Dst}, {LHS, RHS});
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI,
+ APInt &MatchInfo) {
+ const GExtOrTruncOp *Cast = cast<GExtOrTruncOp>(&CastMI);
+
+ APInt Input = getIConstantFromReg(Cast->getSrcReg(), MRI);
+
+ LLT DstTy = MRI.getType(Cast->getReg(0));
+
+ if (!isConstantLegalOrBeforeLegalizer(DstTy))
+ return false;
+
+ switch (Cast->getOpcode()) {
+ case TargetOpcode::G_TRUNC: {
+ MatchInfo = Input.trunc(DstTy.getScalarSizeInBits());
+ return true;
+ }
+ default:
+ return false;
+ }
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
new file mode 100644
index 00000000000000..f207e9c149a476
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
@@ -0,0 +1,136 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK
+
+---
+name: test_combine_trunc_xor_i64
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_trunc_xor_i64
+ ; CHECK: %lhs:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: %small:_(s32) = G_XOR [[TRUNC]], [[C]]
+ ; CHECK-NEXT: $w0 = COPY %small(s32)
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = G_CONSTANT i64 5
+ %res:_(s64) = G_XOR %lhs, %rhs
+ %small:_(s32) = G_TRUNC %res(s64)
+ $w0 = COPY %small(s32)
+...
+---
+name: test_combine_trunc_add_i64
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_trunc_add_i64
+ ; CHECK: %lhs:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: %small:_(s32) = G_ADD [[TRUNC]], [[C]]
+ ; CHECK-NEXT: $w0 = COPY %small(s32)
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = G_CONSTANT i64 5
+ %res:_(s64) = G_ADD %lhs, %rhs
+ %small:_(s32) = G_TRUNC %res(s64)
+ $w0 = COPY %small(s32)
+...
+---
+name: test_combine_trunc_mul_i64
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_trunc_mul_i64
+ ; CHECK: %lhs:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: %small:_(s32) = G_MUL [[TRUNC]], [[C]]
+ ; CHECK-NEXT: $w0 = COPY %small(s32)
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = G_CONSTANT i64 5
+ %res:_(s64) = G_MUL %lhs, %rhs
+ %small:_(s32) = G_TRUNC %res(s64)
+ $w0 = COPY %small(s32)
+...
+---
+name: test_combine_trunc_and_i64
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_trunc_and_i64
+ ; CHECK: %lhs:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: %small:_(s32) = G_AND [[TRUNC]], [[C]]
+ ; CHECK-NEXT: $w0 = COPY %small(s32)
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = G_CONSTANT i64 5
+ %res:_(s64) = G_AND %lhs, %rhs
+ %small:_(s32) = G_TRUNC %res(s64)
+ $w0 = COPY %small(s32)
+...
+---
+name: test_combine_trunc_or_i64
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_trunc_or_i64
+ ; CHECK: %lhs:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: %small:_(s32) = G_OR [[TRUNC]], [[C]]
+ ; CHECK-NEXT: $w0 = COPY %small(s32)
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = G_CONSTANT i64 5
+ %res:_(s64) = G_OR %lhs, %rhs
+ %small:_(s32) = G_TRUNC %res(s64)
+ $w0 = COPY %small(s32)
+...
+---
+name: test_combine_trunc_sub_i128
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_trunc_sub_i128
+ ; CHECK: %lhs:_(s128) = COPY $q0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s128)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: %small:_(s32) = G_SUB [[TRUNC]], [[C]]
+ ; CHECK-NEXT: $w0 = COPY %small(s32)
+ %lhs:_(s128) = COPY $q0
+ %rhs:_(s128) = G_CONSTANT i128 5
+ %res:_(s128) = G_SUB %lhs, %rhs
+ %small:_(s32) = G_TRUNC %res(s128)
+ $w0 = COPY %small(s32)
+...
+---
+name: test_combine_trunc_sub_i128_multi_use
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_trunc_sub_i128_multi_use
+ ; CHECK: %lhs:_(s128) = COPY $q0
+ ; CHECK-NEXT: %rhs:_(s128) = G_CONSTANT i128 5
+ ; CHECK-NEXT: %res:_(s128) = G_SUB %lhs, %rhs
+ ; CHECK-NEXT: %small:_(s32) = G_TRUNC %res(s128)
+ ; CHECK-NEXT: $q0 = COPY %res(s128)
+ ; CHECK-NEXT: $w0 = COPY %small(s32)
+ %lhs:_(s128) = COPY $q0
+ %rhs:_(s128) = G_CONSTANT i128 5
+ %res:_(s128) = G_SUB %lhs, %rhs
+ %small:_(s32) = G_TRUNC %res(s128)
+ $q0 = COPY %res(s128)
+ $w0 = COPY %small(s32)
+...
+---
+name: test_combine_trunc_xor_vector_pattern_did_not_match
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_trunc_xor_vector_pattern_did_not_match
+ ; CHECK: %arg1:_(s64) = COPY $x0
+ ; CHECK-NEXT: %arg2:_(s64) = COPY $x0
+ ; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ ; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ ; CHECK-NEXT: %res:_(<2 x s64>) = G_XOR %lhs, %rhs
+ ; CHECK-NEXT: %small:_(<2 x s16>) = G_TRUNC %res(<2 x s64>)
+ ; CHECK-NEXT: $w0 = COPY %small(<2 x s16>)
+ %arg1:_(s64) = COPY $x0
+ %arg2:_(s64) = COPY $x0
+ %lhs:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ %rhs:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ %res:_(<2 x s64>) = G_XOR %lhs, %rhs
+ %small:_(<2 x s16>) = G_TRUNC %res(<2 x s64>)
+ $w0 = COPY %small(<2 x s16>)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir
index 00c7fc4cab3ab0..9ed1e2d9eee3b4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir
@@ -64,13 +64,14 @@ body: |
; CHECK-LABEL: name: test_ms1
; CHECK: liveins: $w1, $w2, $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32)
- ; CHECK: G_MEMSET [[COPY]](p0), [[TRUNC]](s8), [[ZEXT]](s64), 1 :: (store (s8) into %ir.dst)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32)
+ ; CHECK-NEXT: G_MEMSET [[COPY]](p0), [[TRUNC]](s8), [[ZEXT]](s64), 1 :: (store (s8) into %ir.dst)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(s32) = COPY $w1
%2:_(s32) = COPY $w2
@@ -90,17 +91,18 @@ body: |
; CHECK-LABEL: name: test_ms2_const
; CHECK: liveins: $w1, $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s8)
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
- ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
- ; CHECK: G_STORE [[MUL]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1)
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CHECK: G_STORE [[MUL]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s8)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
+ ; CHECK-NEXT: G_STORE [[MUL]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; CHECK-NEXT: G_STORE [[MUL]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(s32) = COPY $w1
%3:_(s64) = G_CONSTANT i64 16
@@ -119,20 +121,21 @@ body: |
; CHECK-LABEL: name: test_zero_const
; CHECK: liveins: $w1, $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
- ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>) into %ir.dst, align 1)
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into %ir.dst + 16, align 1)
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
- ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into %ir.dst + 32, align 1)
- ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
- ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
- ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (<2 x s64>) into %ir.dst + 48, align 1)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>) into %ir.dst, align 1)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into %ir.dst + 16, align 1)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into %ir.dst + 32, align 1)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (<2 x s64>) into %ir.dst + 48, align 1)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(s32) = G_CONSTANT i32 0
%3:_(s64) = G_CONSTANT i64 64
@@ -152,13 +155,14 @@ body: |
; CHECK-LABEL: name: test_ms3_const_both
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4629771061636907072
- ; CHECK: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1)
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CHECK: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4629771061636907072
+ ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(s8) = G_CONSTANT i8 64
%2:_(s64) = G_CONSTANT i64 16
@@ -176,24 +180,25 @@ body: |
; CHECK-LABEL: name: test_ms_vector
; CHECK: liveins: $w1, $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s8)
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
- ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MUL]](s64), [[MUL]](s64)
- ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>) into %ir.dst, align 1)
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into %ir.dst + 16, align 1)
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
- ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into %ir.dst + 32, align 1)
- ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
- ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
- ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (<2 x s64>) into %ir.dst + 44, align 1)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s8)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MUL]](s64), [[MUL]](s64)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>) into %ir.dst, align 1)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into %ir.dst + 16, align 1)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into %ir.dst + 32, align 1)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (<2 x s64>) into %ir.dst + 44, align 1)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(s32) = COPY $w1
%3:_(s64) = G_CONSTANT i64 60
@@ -212,17 +217,18 @@ body: |
; CHECK-LABEL: name: test_ms4_const_both_unaligned
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4629771061636907072
- ; CHECK: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1)
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CHECK: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; CHECK: G_STORE [[TRUNC]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.dst + ...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this only limited to constant RHS operands? And could this not cause an infinite loop with #89023?
[{ return Helper.matchCastOfInteger(*${Cast}, ${matchinfo}); }]), | ||
(apply [{ Helper.replaceInstWithConstant(*${Cast}, ${matchinfo}); }])>; | ||
|
||
def integer_of_truncate : integer_of_opcode<G_TRUNC>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this really not already exist as a fold?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried it twice, but it was never merged.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, this exists in the artifact combiner: https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h#L270-L281. I was a bit confused because I had run into this case before and it was getting folded.
If you're adding it to CombinerHelper, could it be removed from the artifact combiner? Not sure if that will lead to an infinite loop though. I think this can then also be introduced for anyext, sext, zext in that case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The artefact combiner should not be touched. If this combine runs after the legalizer, the trunc(constant) would not hit anymore. There are no exts in this combine, we can only fold trunc(constant) in this PR.
For these binops constants are only on the RHS:
G_SUB might be an exception. |
trunc (binop X, C) --> binop (trunc X, trunc C) is actually misleading. It is actually: |
Instcombine is more aggressive in this combine:
|
Please ignore inline-memset.mir. |
ab76824
to
408efd3
Compare
trunc (binop X, C) --> binop (trunc X, trunc C) Try to narrow the width of math or bitwise logic instructions by pulling a truncate ahead of binary operators. Vx and Nx cores consider 32-bit and 64-bit basic arithmetic equal in costs.
408efd3
to
da108dd
Compare
Thanks! |
trunc (binop X, C) --> binop (trunc X, trunc C) --> binop (trunc X, C`)
Try to narrow the width of math or bitwise logic instructions by pulling a truncate ahead of binary operators.
Vx and Nx cores consider 32-bit and 64-bit basic arithmetic equal in costs.