Skip to content

Commit edfabea

Browse files
committed
[GlobalIsel][AArch64] Replace N bit G_ADD with N/2 bit G_ADD if the lower bits are known to be zeros
1 parent dae7fb8 commit edfabea

File tree

4 files changed

+188
-1
lines changed

4 files changed

+188
-1
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

+2
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,8 @@ class CombinerHelper {
714714
/// (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
715715
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo);
716716

717+
bool matchAddWithKnownZeroLowerHalfBits(MachineInstr &MI, BuildFnTy &MatchInfo);
718+
717719
/// Transform (fadd x, fneg(y)) -> (fsub x, y)
718720
/// (fadd fneg(x), y) -> (fsub y, x)
719721
/// (fsub x, fneg(y)) -> (fadd x, y)

llvm/include/llvm/Target/GlobalISel/Combine.td

+9-1
Original file line numberDiff line numberDiff line change
@@ -1142,6 +1142,13 @@ def adde_to_addo: GICombineRule<
11421142
[{ return Helper.matchAddEToAddO(*${root}, ${matchinfo}); }]),
11431143
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
11441144

1145+
def narrow_add_to_half: GICombineRule<
1146+
(defs root:$root, build_fn_matchinfo:$matchinfo),
1147+
(match (wip_match_opcode G_ADD):$root,
1148+
[{ return Helper.matchAddWithKnownZeroLowerHalfBits(*${root}, ${matchinfo}); }]),
1149+
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
1150+
>;
1151+
11451152
def mulh_to_lshr : GICombineRule<
11461153
(defs root:$root),
11471154
(match (wip_match_opcode G_UMULH):$root,
@@ -1829,7 +1836,8 @@ def known_bits_simplifications : GICombineGroup<[
18291836
sext_inreg_to_zext_inreg]>;
18301837

18311838
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
1832-
narrow_binop_feeding_and]>;
1839+
narrow_binop_feeding_and,
1840+
narrow_add_to_half]>;
18331841

18341842
def phi_combines : GICombineGroup<[extend_through_phis]>;
18351843

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+63
Original file line numberDiff line numberDiff line change
@@ -5099,6 +5099,69 @@ bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) {
50995099
return true;
51005100
}
51015101

5102+
bool CombinerHelper::matchAddWithKnownZeroLowerHalfBits(MachineInstr &MI,
5103+
BuildFnTy &MatchInfo) {
5104+
assert(MI.getOpcode() == TargetOpcode::G_ADD);
5105+
5106+
const Register DstReg = MI.getOperand(0).getReg();
5107+
const LLT DstTy = MRI.getType(DstReg);
5108+
5109+
if (!DstTy.isScalar()) {
5110+
return false;
5111+
}
5112+
5113+
const std::uint64_t FullSize = DstTy.getSizeInBits();
5114+
const std::uint64_t HalfSize = (FullSize + 1) / 2;
5115+
5116+
MachineFunction &MF = *MI.getMF();
5117+
const DataLayout &DL = MF.getDataLayout();
5118+
5119+
if (DL.isLegalInteger(FullSize) || !DL.isLegalInteger(HalfSize)) {
5120+
return false;
5121+
}
5122+
5123+
const Register LhsReg = MI.getOperand(1).getReg();
5124+
const Register RhsReg = MI.getOperand(2).getReg();
5125+
5126+
const KnownBits LhsKnownBits = KB->getKnownBits(LhsReg);
5127+
const KnownBits LhsLoBits = LhsKnownBits.extractBits(HalfSize, 0);
5128+
5129+
const KnownBits RhsKnownBits = KB->getKnownBits(RhsReg);
5130+
const KnownBits RhsLoBits = RhsKnownBits.extractBits(HalfSize, 0);
5131+
5132+
const bool LhsHasLoZeros =
5133+
LhsLoBits.isConstant() && LhsLoBits.getConstant().isZero();
5134+
const bool RhsHasLoZeros =
5135+
RhsLoBits.isConstant() && RhsLoBits.getConstant().isZero();
5136+
5137+
if (!LhsHasLoZeros && !RhsHasLoZeros) {
5138+
return false;
5139+
}
5140+
5141+
const auto Flags = MI.getFlags();
5142+
5143+
MatchInfo = [=](MachineIRBuilder &MIRBuilder) {
5144+
const LLT HalfTy = LLT::scalar(HalfSize);
5145+
5146+
const auto LhsSubRegs = MIRBuilder.buildUnmerge(HalfTy, LhsReg);
5147+
const auto RhsSubRegs = MIRBuilder.buildUnmerge(HalfTy, RhsReg);
5148+
5149+
const Register ResHiReg = MRI.createGenericVirtualRegister(HalfTy);
5150+
5151+
MIRBuilder.buildAdd(ResHiReg, LhsSubRegs.getReg(1), RhsSubRegs.getReg(1),
5152+
Flags);
5153+
5154+
if (LhsHasLoZeros) {
5155+
MIRBuilder.buildMergeLikeInstr(DstReg, {RhsSubRegs.getReg(0), ResHiReg});
5156+
} else {
5157+
assert(RhsHasLoZeros);
5158+
MIRBuilder.buildMergeLikeInstr(DstReg, {LhsSubRegs.getReg(0), ResHiReg});
5159+
}
5160+
};
5161+
5162+
return true;
5163+
}
5164+
51025165
bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI,
51035166
BuildFnTy &MatchInfo) {
51045167
assert(MI.getOpcode() == TargetOpcode::G_SUB);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2+
# RUN: llc -mtriple aarch64 -global-isel -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
---
5+
name: add_s128_unknown_bits
6+
body: |
7+
bb.0:
8+
liveins: $q0, $q1
9+
; CHECK-LABEL: name: add_s128_unknown_bits
10+
; CHECK: liveins: $q0, $q1
11+
; CHECK-NEXT: {{ $}}
12+
; CHECK-NEXT: %lhs:_(s128) = COPY $q0
13+
; CHECK-NEXT: %rhs:_(s128) = COPY $q1
14+
; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, %rhs
15+
; CHECK-NEXT: $q0 = COPY %res(s128)
16+
%lhs:_(s128) = COPY $q0
17+
%rhs:_(s128) = COPY $q1
18+
%res:_(s128) = G_ADD %lhs, %rhs
19+
$q0 = COPY %res(s128)
20+
...
21+
22+
---
23+
name: add_s64_low32_known_zero_bits
24+
body: |
25+
bb.0:
26+
liveins: $x0, $x1
27+
; CHECK-LABEL: name: add_s64_low32_known_zero_bits
28+
; CHECK: liveins: $x0, $x1
29+
; CHECK-NEXT: {{ $}}
30+
; CHECK-NEXT: %a:_(s64) = COPY $x0
31+
; CHECK-NEXT: %rhs:_(s64) = COPY $x1
32+
; CHECK-NEXT: %mask:_(s64) = G_CONSTANT i64 -4294967296
33+
; CHECK-NEXT: %lhs:_(s64) = G_AND %a, %mask
34+
; CHECK-NEXT: %res:_(s64) = G_ADD %lhs, %rhs
35+
; CHECK-NEXT: $x0 = COPY %res(s64)
36+
%a:_(s64) = COPY $x0
37+
%rhs:_(s64) = COPY $x1
38+
%mask:_(s64) = G_CONSTANT i64 -4294967296
39+
%lhs:_(s64) = G_AND %a, %mask
40+
%res:_(s64) = G_ADD %lhs, %rhs
41+
$x0 = COPY %res(s64)
42+
...
43+
44+
---
45+
name: add_s128_low64_known_nonzero_bits
46+
body: |
47+
bb.0:
48+
liveins: $q0, $q1
49+
; CHECK-LABEL: name: add_s128_low64_known_nonzero_bits
50+
; CHECK: liveins: $q0, $q1
51+
; CHECK-NEXT: {{ $}}
52+
; CHECK-NEXT: %a:_(s128) = COPY $q0
53+
; CHECK-NEXT: %rhs:_(s128) = COPY $q1
54+
; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 18446744073709551615
55+
; CHECK-NEXT: %lhs:_(s128) = G_OR %a, %mask
56+
; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, %rhs
57+
; CHECK-NEXT: $q0 = COPY %res(s128)
58+
%a:_(s128) = COPY $q0
59+
%rhs:_(s128) = COPY $q1
60+
%mask:_(s128) = G_CONSTANT i128 18446744073709551615
61+
%lhs:_(s128) = G_OR %a, %mask
62+
%res:_(s128) = G_ADD %lhs, %rhs
63+
$q0 = COPY %res(s128)
64+
...
65+
66+
---
67+
name: add_s128_lhs_low64_known_zero_bits
68+
body: |
69+
bb.0:
70+
liveins: $q0, $q1
71+
; CHECK-LABEL: name: add_s128_lhs_low64_known_zero_bits
72+
; CHECK: liveins: $q0, $q1
73+
; CHECK-NEXT: {{ $}}
74+
; CHECK-NEXT: %a:_(s128) = COPY $q0
75+
; CHECK-NEXT: %rhs:_(s128) = COPY $q1
76+
; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
77+
; CHECK-NEXT: %lhs:_(s128) = G_AND %a, %mask
78+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %lhs(s128)
79+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %rhs(s128)
80+
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[UV1]], [[UV3]]
81+
; CHECK-NEXT: %res:_(s128) = G_MERGE_VALUES [[UV2]](s64), [[ADD]](s64)
82+
; CHECK-NEXT: $q0 = COPY %res(s128)
83+
%a:_(s128) = COPY $q0
84+
%rhs:_(s128) = COPY $q1
85+
%mask:_(s128) = G_CONSTANT i128 -18446744073709551616
86+
%lhs:_(s128) = G_AND %a, %mask
87+
%res:_(s128) = G_ADD %lhs, %rhs
88+
$q0 = COPY %res(s128)
89+
...
90+
91+
---
92+
name: add_s128_rhs_low64_known_zero_bits
93+
body: |
94+
bb.0:
95+
liveins: $q0, $q1
96+
; CHECK-LABEL: name: add_s128_rhs_low64_known_zero_bits
97+
; CHECK: liveins: $q0, $q1
98+
; CHECK-NEXT: {{ $}}
99+
; CHECK-NEXT: %lhs:_(s128) = COPY $q0
100+
; CHECK-NEXT: %b:_(s128) = COPY $q1
101+
; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
102+
; CHECK-NEXT: %rhs:_(s128) = G_AND %b, %mask
103+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %lhs(s128)
104+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %rhs(s128)
105+
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[UV1]], [[UV3]]
106+
; CHECK-NEXT: %res:_(s128) = G_MERGE_VALUES [[UV]](s64), [[ADD]](s64)
107+
; CHECK-NEXT: $q0 = COPY %res(s128)
108+
%lhs:_(s128) = COPY $q0
109+
%b:_(s128) = COPY $q1
110+
%mask:_(s128) = G_CONSTANT i128 -18446744073709551616
111+
%rhs:_(s128) = G_AND %b, %mask
112+
%res:_(s128) = G_ADD %lhs, %rhs
113+
$q0 = COPY %res(s128)
114+
...

0 commit comments

Comments
 (0)