Skip to content

Commit ba6a80d

Browse files
committed
[GlobalIsel][AArch64] Replace N bit G_ADD with N/2 bit G_ADD if the lower bits are known to be zeros
1 parent dae7fb8 commit ba6a80d

File tree

4 files changed

+182
-1
lines changed

4 files changed

+182
-1
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

+3
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,9 @@ class CombinerHelper {
714714
/// (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
715715
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo);
716716

717+
bool matchAddWithKnownZeroLowerHalfBits(MachineInstr &MI,
718+
BuildFnTy &MatchInfo);
719+
717720
/// Transform (fadd x, fneg(y)) -> (fsub x, y)
718721
/// (fadd fneg(x), y) -> (fsub y, x)
719722
/// (fsub x, fneg(y)) -> (fadd x, y)

llvm/include/llvm/Target/GlobalISel/Combine.td

+9-1
Original file line numberDiff line numberDiff line change
@@ -1142,6 +1142,13 @@ def adde_to_addo: GICombineRule<
11421142
[{ return Helper.matchAddEToAddO(*${root}, ${matchinfo}); }]),
11431143
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
11441144

1145+
def narrow_add_to_half: GICombineRule<
1146+
(defs root:$root, build_fn_matchinfo:$matchinfo),
1147+
(match (wip_match_opcode G_ADD):$root,
1148+
[{ return Helper.matchAddWithKnownZeroLowerHalfBits(*${root}, ${matchinfo}); }]),
1149+
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
1150+
>;
1151+
11451152
def mulh_to_lshr : GICombineRule<
11461153
(defs root:$root),
11471154
(match (wip_match_opcode G_UMULH):$root,
@@ -1829,7 +1836,8 @@ def known_bits_simplifications : GICombineGroup<[
18291836
sext_inreg_to_zext_inreg]>;
18301837

18311838
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
1832-
narrow_binop_feeding_and]>;
1839+
narrow_binop_feeding_and,
1840+
narrow_add_to_half]>;
18331841

18341842
def phi_combines : GICombineGroup<[extend_through_phis]>;
18351843

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+56
Original file line numberDiff line numberDiff line change
@@ -5099,6 +5099,62 @@ bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) {
50995099
return true;
51005100
}
51015101

5102+
bool CombinerHelper::matchAddWithKnownZeroLowerHalfBits(MachineInstr &MI,
5103+
BuildFnTy &MatchInfo) {
5104+
GAdd *Add = cast<GAdd>(&MI);
5105+
5106+
const Register DstReg = Add->getReg(0);
5107+
const LLT FullTy = MRI.getType(DstReg);
5108+
5109+
if (!FullTy.isScalar())
5110+
return false;
5111+
5112+
const uint64_t FullSize = FullTy.getSizeInBits();
5113+
const uint64_t HalfSize = (FullSize + 1) / 2;
5114+
const LLT HalfTy = LLT::scalar(HalfSize);
5115+
5116+
if (isLegal({TargetOpcode::G_ADD, {FullTy}}) ||
5117+
!isLegal({TargetOpcode::G_ADD, {HalfTy}}) ||
5118+
!isLegalOrBeforeLegalizer(
5119+
{TargetOpcode::G_UNMERGE_VALUES, {HalfTy, FullTy}}) ||
5120+
!isLegalOrBeforeLegalizer(
5121+
{TargetOpcode::G_MERGE_VALUES, {FullTy, HalfTy}}))
5122+
return false;
5123+
5124+
const Register LhsReg = Add->getLHSReg();
5125+
const Register RhsReg = Add->getRHSReg();
5126+
5127+
const KnownBits RhsKnownBits = KB->getKnownBits(RhsReg);
5128+
const KnownBits RhsLoBits = RhsKnownBits.extractBits(HalfSize, 0);
5129+
const bool RhsHasLoZeros =
5130+
RhsLoBits.isConstant() && RhsLoBits.getConstant().isZero();
5131+
5132+
if (!RhsHasLoZeros) {
5133+
const KnownBits LhsKnownBits = KB->getKnownBits(LhsReg);
5134+
const KnownBits LhsLoBits = LhsKnownBits.extractBits(HalfSize, 0);
5135+
if (!LhsLoBits.isConstant() || !LhsLoBits.getConstant().isZero())
5136+
return false;
5137+
}
5138+
5139+
const auto Flags = MI.getFlags();
5140+
5141+
MatchInfo = [=](MachineIRBuilder &MIRBuilder) {
5142+
const auto LhsSubRegs = MIRBuilder.buildUnmerge(HalfTy, LhsReg);
5143+
const auto RhsSubRegs = MIRBuilder.buildUnmerge(HalfTy, RhsReg);
5144+
5145+
const auto ResHiReg = MIRBuilder.buildAdd(HalfTy, LhsSubRegs.getReg(1),
5146+
RhsSubRegs.getReg(1), Flags);
5147+
5148+
if (RhsHasLoZeros) {
5149+
MIRBuilder.buildMergeLikeInstr(DstReg, {LhsSubRegs.getReg(0), ResHiReg});
5150+
} else {
5151+
MIRBuilder.buildMergeLikeInstr(DstReg, {RhsSubRegs.getReg(0), ResHiReg});
5152+
}
5153+
};
5154+
5155+
return true;
5156+
}
5157+
51025158
bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI,
51035159
BuildFnTy &MatchInfo) {
51045160
assert(MI.getOpcode() == TargetOpcode::G_SUB);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2+
# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
---
5+
name: add_s128_unknown_bits
6+
body: |
7+
bb.0:
8+
liveins: $q0, $q1
9+
; CHECK-LABEL: name: add_s128_unknown_bits
10+
; CHECK: liveins: $q0, $q1
11+
; CHECK-NEXT: {{ $}}
12+
; CHECK-NEXT: %lhs:_(s128) = COPY $q0
13+
; CHECK-NEXT: %rhs:_(s128) = COPY $q1
14+
; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, %rhs
15+
; CHECK-NEXT: $q0 = COPY %res(s128)
16+
%lhs:_(s128) = COPY $q0
17+
%rhs:_(s128) = COPY $q1
18+
%res:_(s128) = G_ADD %lhs, %rhs
19+
$q0 = COPY %res(s128)
20+
...
21+
22+
---
23+
name: add_s64_low32_known_zero_bits
24+
body: |
25+
bb.0:
26+
liveins: $x0, $x1
27+
; CHECK-LABEL: name: add_s64_low32_known_zero_bits
28+
; CHECK: liveins: $x0, $x1
29+
; CHECK-NEXT: {{ $}}
30+
; CHECK-NEXT: %a:_(s64) = COPY $x0
31+
; CHECK-NEXT: %rhs:_(s64) = COPY $x1
32+
; CHECK-NEXT: %mask:_(s64) = G_CONSTANT i64 -4294967296
33+
; CHECK-NEXT: %lhs:_(s64) = G_AND %a, %mask
34+
; CHECK-NEXT: %res:_(s64) = G_ADD %lhs, %rhs
35+
; CHECK-NEXT: $x0 = COPY %res(s64)
36+
%a:_(s64) = COPY $x0
37+
%rhs:_(s64) = COPY $x1
38+
%mask:_(s64) = G_CONSTANT i64 -4294967296
39+
%lhs:_(s64) = G_AND %a, %mask
40+
%res:_(s64) = G_ADD %lhs, %rhs
41+
$x0 = COPY %res(s64)
42+
...
43+
44+
---
45+
name: add_s128_low64_known_nonzero_bits
46+
body: |
47+
bb.0:
48+
liveins: $q0, $q1
49+
; CHECK-LABEL: name: add_s128_low64_known_nonzero_bits
50+
; CHECK: liveins: $q0, $q1
51+
; CHECK-NEXT: {{ $}}
52+
; CHECK-NEXT: %a:_(s128) = COPY $q0
53+
; CHECK-NEXT: %rhs:_(s128) = COPY $q1
54+
; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 18446744073709551615
55+
; CHECK-NEXT: %lhs:_(s128) = G_OR %a, %mask
56+
; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, %rhs
57+
; CHECK-NEXT: $q0 = COPY %res(s128)
58+
%a:_(s128) = COPY $q0
59+
%rhs:_(s128) = COPY $q1
60+
%mask:_(s128) = G_CONSTANT i128 18446744073709551615
61+
%lhs:_(s128) = G_OR %a, %mask
62+
%res:_(s128) = G_ADD %lhs, %rhs
63+
$q0 = COPY %res(s128)
64+
...
65+
66+
---
67+
name: add_s128_lhs_low64_known_zero_bits
68+
body: |
69+
bb.0:
70+
liveins: $q0, $q1
71+
; CHECK-LABEL: name: add_s128_lhs_low64_known_zero_bits
72+
; CHECK: liveins: $q0, $q1
73+
; CHECK-NEXT: {{ $}}
74+
; CHECK-NEXT: %a:_(s128) = COPY $q0
75+
; CHECK-NEXT: %rhs:_(s128) = COPY $q1
76+
; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
77+
; CHECK-NEXT: %lhs:_(s128) = G_AND %a, %mask
78+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %lhs(s128)
79+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %rhs(s128)
80+
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[UV1]], [[UV3]]
81+
; CHECK-NEXT: %res:_(s128) = G_MERGE_VALUES [[UV2]](s64), [[ADD]](s64)
82+
; CHECK-NEXT: $q0 = COPY %res(s128)
83+
%a:_(s128) = COPY $q0
84+
%rhs:_(s128) = COPY $q1
85+
%mask:_(s128) = G_CONSTANT i128 -18446744073709551616
86+
%lhs:_(s128) = G_AND %a, %mask
87+
%res:_(s128) = G_ADD %lhs, %rhs
88+
$q0 = COPY %res(s128)
89+
...
90+
91+
---
92+
name: add_s128_rhs_low64_known_zero_bits
93+
body: |
94+
bb.0:
95+
liveins: $q0, $q1
96+
; CHECK-LABEL: name: add_s128_rhs_low64_known_zero_bits
97+
; CHECK: liveins: $q0, $q1
98+
; CHECK-NEXT: {{ $}}
99+
; CHECK-NEXT: %lhs:_(s128) = COPY $q0
100+
; CHECK-NEXT: %b:_(s128) = COPY $q1
101+
; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
102+
; CHECK-NEXT: %rhs:_(s128) = G_AND %b, %mask
103+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %lhs(s128)
104+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %rhs(s128)
105+
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[UV1]], [[UV3]]
106+
; CHECK-NEXT: %res:_(s128) = G_MERGE_VALUES [[UV]](s64), [[ADD]](s64)
107+
; CHECK-NEXT: $q0 = COPY %res(s128)
108+
%lhs:_(s128) = COPY $q0
109+
%b:_(s128) = COPY $q1
110+
%mask:_(s128) = G_CONSTANT i128 -18446744073709551616
111+
%rhs:_(s128) = G_AND %b, %mask
112+
%res:_(s128) = G_ADD %lhs, %rhs
113+
$q0 = COPY %res(s128)
114+
...

0 commit comments

Comments
 (0)