Skip to content

Commit e12bf36

Browse files
authored
[GISel][CombinerHelper] Combine op(trunc(x), trunc(y)) -> trunc(op(x, y)) (#89023)
1 parent f779ec7 commit e12bf36

File tree

9 files changed

+550
-244
lines changed

9 files changed

+550
-244
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+16
Original file line numberDiff line numberDiff line change
@@ -3158,6 +3158,22 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
31583158
// Match: logic (ext X), (ext Y) --> ext (logic X, Y)
31593159
break;
31603160
}
3161+
case TargetOpcode::G_TRUNC: {
3162+
// Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3163+
const MachineFunction *MF = MI.getMF();
3164+
const DataLayout &DL = MF->getDataLayout();
3165+
LLVMContext &Ctx = MF->getFunction().getContext();
3166+
3167+
LLT DstTy = MRI.getType(Dst);
3168+
const TargetLowering &TLI = getTargetLowering();
3169+
3170+
// Be extra careful sinking truncate. If it's free, there's no benefit in
3171+
// widening a binop.
3172+
if (TLI.isZExtFree(DstTy, XTy, DL, Ctx) &&
3173+
TLI.isTruncateFree(XTy, DstTy, DL, Ctx))
3174+
return false;
3175+
break;
3176+
}
31613177
case TargetOpcode::G_AND:
31623178
case TargetOpcode::G_ASHR:
31633179
case TargetOpcode::G_LSHR:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
3+
4+
# Truncs with a single use get folded.
5+
6+
# and(trunc(x), trunc(y)) -> trunc(and(x, y))
7+
---
8+
name: and_trunc
9+
body: |
10+
bb.0:
11+
liveins: $w0, $w1
12+
; CHECK-LABEL: name: and_trunc
13+
; CHECK: liveins: $w0, $w1
14+
; CHECK-NEXT: {{ $}}
15+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
16+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
17+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]]
18+
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
19+
%0:_(s32) = COPY $w0
20+
%1:_(s32) = COPY $w1
21+
%2:_(s16) = G_TRUNC %0
22+
%3:_(s16) = G_TRUNC %1
23+
%4:_(s16) = G_AND %2, %3
24+
%5:_(s32) = G_ANYEXT %4
25+
$w0 = COPY %5
26+
...
27+
---
28+
name: and_trunc_vector
29+
body: |
30+
bb.0:
31+
liveins: $q0, $q1
32+
; CHECK-LABEL: name: and_trunc_vector
33+
; CHECK: liveins: $q0, $q1
34+
; CHECK-NEXT: {{ $}}
35+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
36+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
37+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY]], [[COPY1]]
38+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[AND]](<4 x s32>)
39+
; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>)
40+
%0:_(<4 x s32>) = COPY $q0
41+
%1:_(<4 x s32>) = COPY $q1
42+
%2:_(<4 x s16>) = G_TRUNC %0
43+
%3:_(<4 x s16>) = G_TRUNC %1
44+
%4:_(<4 x s16>) = G_AND %2, %3
45+
$x0 = COPY %4
46+
...
47+
48+
# or(trunc(x), trunc(y)) -> trunc(or(x, y))
49+
---
50+
name: or_trunc
51+
body: |
52+
bb.0:
53+
liveins: $w0, $w1
54+
; CHECK-LABEL: name: or_trunc
55+
; CHECK: liveins: $w0, $w1
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
58+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
59+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]]
60+
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
61+
%0:_(s32) = COPY $w0
62+
%1:_(s32) = COPY $w1
63+
%2:_(s16) = G_TRUNC %0
64+
%3:_(s16) = G_TRUNC %1
65+
%4:_(s16) = G_OR %2, %3
66+
%5:_(s32) = G_ANYEXT %4
67+
$w0 = COPY %5
68+
...
69+
---
70+
name: or_trunc_vector
71+
body: |
72+
bb.0:
73+
liveins: $q0, $q1
74+
; CHECK-LABEL: name: or_trunc_vector
75+
; CHECK: liveins: $q0, $q1
76+
; CHECK-NEXT: {{ $}}
77+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
78+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
79+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[COPY]], [[COPY1]]
80+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[OR]](<4 x s32>)
81+
; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>)
82+
%0:_(<4 x s32>) = COPY $q0
83+
%1:_(<4 x s32>) = COPY $q1
84+
%2:_(<4 x s16>) = G_TRUNC %0
85+
%3:_(<4 x s16>) = G_TRUNC %1
86+
%4:_(<4 x s16>) = G_OR %2, %3
87+
$x0 = COPY %4
88+
...
89+
90+
# xor(trunc(x), trunc(y)) -> trunc(xor(x, y))
91+
---
92+
name: xor_trunc
93+
body: |
94+
bb.0:
95+
liveins: $w0, $w1
96+
; CHECK-LABEL: name: xor_trunc
97+
; CHECK: liveins: $w0, $w1
98+
; CHECK-NEXT: {{ $}}
99+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
100+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
101+
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]]
102+
; CHECK-NEXT: $w0 = COPY [[XOR]](s32)
103+
%0:_(s32) = COPY $w0
104+
%1:_(s32) = COPY $w1
105+
%2:_(s16) = G_TRUNC %0
106+
%3:_(s16) = G_TRUNC %1
107+
%4:_(s16) = G_XOR %2, %3
108+
%5:_(s32) = G_ANYEXT %4
109+
$w0 = COPY %5
110+
...
111+
---
112+
name: xor_trunc_vector
113+
body: |
114+
bb.0:
115+
liveins: $q0, $q1
116+
; CHECK-LABEL: name: xor_trunc_vector
117+
; CHECK: liveins: $q0, $q1
118+
; CHECK-NEXT: {{ $}}
119+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
120+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
121+
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[COPY]], [[COPY1]]
122+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[XOR]](<4 x s32>)
123+
; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>)
124+
%0:_(<4 x s32>) = COPY $q0
125+
%1:_(<4 x s32>) = COPY $q1
126+
%2:_(<4 x s16>) = G_TRUNC %0
127+
%3:_(<4 x s16>) = G_TRUNC %1
128+
%4:_(<4 x s16>) = G_XOR %2, %3
129+
$x0 = COPY %4
130+
...
131+
132+
# Truncs with multiple uses do not get folded.
133+
---
134+
name: or_trunc_multiuse_1
135+
body: |
136+
bb.0:
137+
liveins: $w0, $w1, $x2
138+
; CHECK-LABEL: name: or_trunc_multiuse_1
139+
; CHECK: liveins: $w0, $w1, $x2
140+
; CHECK-NEXT: {{ $}}
141+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
142+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
143+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
144+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
145+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
146+
; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[COPY2]](p0) :: (store (s16))
147+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]]
148+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
149+
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
150+
%0:_(s32) = COPY $w0
151+
%1:_(s32) = COPY $w1
152+
%5:_(p0) = COPY $x2
153+
%2:_(s16) = G_TRUNC %0
154+
%3:_(s16) = G_TRUNC %1
155+
G_STORE %2, %5 :: (store (s16))
156+
%4:_(s16) = G_OR %2, %3
157+
%6:_(s32) = G_ANYEXT %4
158+
$w0 = COPY %6
159+
...
160+
---
161+
name: and_trunc_multiuse_2
162+
body: |
163+
bb.0:
164+
liveins: $w0, $w1, $x2
165+
; CHECK-LABEL: name: and_trunc_multiuse_2
166+
; CHECK: liveins: $w0, $w1, $x2
167+
; CHECK-NEXT: {{ $}}
168+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
169+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
170+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
171+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
172+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
173+
; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[COPY2]](p0) :: (store (s16))
174+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]]
175+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
176+
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
177+
%0:_(s32) = COPY $w0
178+
%1:_(s32) = COPY $w1
179+
%5:_(p0) = COPY $x2
180+
%2:_(s16) = G_TRUNC %0
181+
%3:_(s16) = G_TRUNC %1
182+
G_STORE %2, %5 :: (store (s16))
183+
%4:_(s16) = G_AND %2, %3
184+
%6:_(s32) = G_ANYEXT %4
185+
$w0 = COPY %6
186+
...
187+
---
188+
name: xor_trunc_vector_multiuse
189+
body: |
190+
bb.0:
191+
liveins: $w0, $w1, $x2
192+
; CHECK-LABEL: name: xor_trunc_vector_multiuse
193+
; CHECK: liveins: $w0, $w1, $x2
194+
; CHECK-NEXT: {{ $}}
195+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
196+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
197+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
198+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
199+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
200+
; CHECK-NEXT: G_STORE [[TRUNC]](<4 x s16>), [[COPY2]](p0) :: (store (<4 x s16>))
201+
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC]], [[TRUNC1]]
202+
; CHECK-NEXT: $x0 = COPY [[XOR]](<4 x s16>)
203+
%0:_(<4 x s32>) = COPY $q0
204+
%1:_(<4 x s32>) = COPY $q1
205+
%5:_(p0) = COPY $x2
206+
%2:_(<4 x s16>) = G_TRUNC %0
207+
%3:_(<4 x s16>) = G_TRUNC %1
208+
G_STORE %2, %5 :: (store (<4 x s16>))
209+
%4:_(<4 x s16>) = G_XOR %2, %3
210+
$x0 = COPY %4
211+
...
212+
213+
# Freezes should get pushed through truncs.
214+
215+
# This optimizes the pattern where `select(cond, T, 0)` gets converted to
216+
# `and(cond, freeze(T))`.
217+
218+
# and(freeze(trunc(x)), trunc(y)) -> trunc(and(freeze(x), y))
219+
---
220+
name: and_trunc_freeze
221+
body: |
222+
bb.0:
223+
liveins: $w0, $w1
224+
; CHECK-LABEL: name: and_trunc_freeze
225+
; CHECK: liveins: $w0, $w1
226+
; CHECK-NEXT: {{ $}}
227+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
228+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
229+
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
230+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FREEZE]], [[COPY1]]
231+
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
232+
%0:_(s32) = COPY $w0
233+
%1:_(s32) = COPY $w1
234+
%2:_(s16) = G_TRUNC %0
235+
%3:_(s16) = G_TRUNC %1
236+
%6:_(s16) = G_FREEZE %2
237+
%4:_(s16) = G_AND %6, %3
238+
%5:_(s32) = G_ANYEXT %4
239+
$w0 = COPY %5
240+
...
241+
242+
# and(freeze(trunc(x)), freeze(trunc(y))) -> trunc(and(freeze(x), freeze(y)))
243+
---
244+
name: and_trunc_freeze_both
245+
body: |
246+
bb.0:
247+
liveins: $w0, $w1
248+
; CHECK-LABEL: name: and_trunc_freeze_both
249+
; CHECK: liveins: $w0, $w1
250+
; CHECK-NEXT: {{ $}}
251+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
252+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
253+
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
254+
; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[COPY1]]
255+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FREEZE]], [[FREEZE1]]
256+
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
257+
%0:_(s32) = COPY $w0
258+
%1:_(s32) = COPY $w1
259+
%2:_(s16) = G_TRUNC %0
260+
%3:_(s16) = G_TRUNC %1
261+
%6:_(s16) = G_FREEZE %2
262+
%7:_(s16) = G_FREEZE %3
263+
%4:_(s16) = G_AND %6, %7
264+
%5:_(s32) = G_ANYEXT %4
265+
$w0 = COPY %5
266+
...
267+
268+
# The freeze fold is less important for G_OR and G_XOR, however it can still
269+
# trigger.
270+
---
271+
name: or_trunc_freeze
272+
body: |
273+
bb.0:
274+
liveins: $w0, $w1
275+
; CHECK-LABEL: name: or_trunc_freeze
276+
; CHECK: liveins: $w0, $w1
277+
; CHECK-NEXT: {{ $}}
278+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
279+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
280+
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
281+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[FREEZE]], [[COPY1]]
282+
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
283+
%0:_(s32) = COPY $w0
284+
%1:_(s32) = COPY $w1
285+
%2:_(s16) = G_TRUNC %0
286+
%3:_(s16) = G_TRUNC %1
287+
%6:_(s16) = G_FREEZE %2
288+
%4:_(s16) = G_OR %6, %3
289+
%5:_(s32) = G_ANYEXT %4
290+
$w0 = COPY %5
291+
...
292+
---
293+
name: xor_trunc_freeze_both
294+
body: |
295+
bb.0:
296+
liveins: $w0, $w1
297+
; CHECK-LABEL: name: xor_trunc_freeze_both
298+
; CHECK: liveins: $w0, $w1
299+
; CHECK-NEXT: {{ $}}
300+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
301+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
302+
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
303+
; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[COPY1]]
304+
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FREEZE]], [[FREEZE1]]
305+
; CHECK-NEXT: $w0 = COPY [[XOR]](s32)
306+
%0:_(s32) = COPY $w0
307+
%1:_(s32) = COPY $w1
308+
%2:_(s16) = G_TRUNC %0
309+
%3:_(s16) = G_TRUNC %1
310+
%6:_(s16) = G_FREEZE %2
311+
%7:_(s16) = G_FREEZE %3
312+
%4:_(s16) = G_XOR %6, %7
313+
%5:_(s32) = G_ANYEXT %4
314+
$w0 = COPY %5
315+
...

0 commit comments

Comments
 (0)