Skip to content

Commit 7b06120

Browse files
committed
[AArch64][GISel] and+or+shl => bfi
This fixes a GISEL vs SDAG regression that showed up at -Os in 256.bzip2 In `_getAndMoveToFrontDecode`: gisel: ``` and w9, w0, #0xff orr w9, w9, w8, lsl #8 ``` sdag: ``` bfi w0, w8, #8, #24 ``` Differential revision: https://reviews.llvm.org/D103291
1 parent 9ac7388 commit 7b06120

File tree

2 files changed

+191
-0
lines changed

2 files changed

+191
-0
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

+34
Original file line numberDiff line numberDiff line change
@@ -2163,6 +2163,40 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
21632163
I.eraseFromParent();
21642164
return true;
21652165
}
2166+
case TargetOpcode::G_OR: {
2167+
// Look for operations that take the lower `Width=Size-ShiftImm` bits of
2168+
// `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2169+
// shifting and masking that we can replace with a BFI (encoded as a BFM).
2170+
Register Dst = I.getOperand(0).getReg();
2171+
LLT Ty = MRI.getType(Dst);
2172+
2173+
if (!Ty.isScalar())
2174+
return false;
2175+
2176+
unsigned Size = Ty.getSizeInBits();
2177+
if (Size != 32 && Size != 64)
2178+
return false;
2179+
2180+
Register ShiftSrc;
2181+
int64_t ShiftImm;
2182+
Register MaskSrc;
2183+
int64_t MaskImm;
2184+
if (!mi_match(
2185+
Dst, MRI,
2186+
m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2187+
m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2188+
return false;
2189+
2190+
if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2191+
return false;
2192+
2193+
int64_t Immr = Size - ShiftImm;
2194+
int64_t Imms = Size - ShiftImm - 1;
2195+
unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2196+
emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2197+
I.eraseFromParent();
2198+
return true;
2199+
}
21662200
default:
21672201
return false;
21682202
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -o - -verify-machineinstrs -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes=CHECK,GISEL
3+
; RUN: llc < %s -o - -verify-machineinstrs -global-isel=0 | FileCheck %s --check-prefixes=CHECK,SDAG
4+
5+
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
6+
target triple = "arm64-apple-ios14.5.0"
7+
8+
define i32 @bfi_w_31(i32 %in1, i32 %in2) {
9+
; CHECK-LABEL: bfi_w_31:
10+
; CHECK: ; %bb.0: ; %bb
11+
; CHECK-NEXT: bfi w1, w0, #31, #1
12+
; CHECK-NEXT: mov w0, w1
13+
; CHECK-NEXT: ret
14+
bb:
15+
%tmp3 = shl i32 %in1, 31
16+
%tmp4 = and i32 %in2, 2147483647
17+
%out = or i32 %tmp3, %tmp4
18+
ret i32 %out
19+
}
20+
21+
define i32 @bfi_w_8(i32 %in1, i32 %in2) {
22+
; CHECK-LABEL: bfi_w_8:
23+
; CHECK: ; %bb.0: ; %bb
24+
; CHECK-NEXT: bfi w1, w0, #8, #24
25+
; CHECK-NEXT: mov w0, w1
26+
; CHECK-NEXT: ret
27+
bb:
28+
%tmp3 = shl i32 %in1, 8
29+
%tmp4 = and i32 %in2, 255
30+
%out = or i32 %tmp3, %tmp4
31+
ret i32 %out
32+
}
33+
34+
define i32 @bfi_w_1(i32 %in1, i32 %in2) {
35+
; CHECK-LABEL: bfi_w_1:
36+
; CHECK: ; %bb.0: ; %bb
37+
; CHECK-NEXT: bfi w1, w0, #1, #31
38+
; CHECK-NEXT: mov w0, w1
39+
; CHECK-NEXT: ret
40+
bb:
41+
%tmp3 = shl i32 %in1, 1
42+
%tmp4 = and i32 %in2, 1
43+
%out = or i32 %tmp3, %tmp4
44+
ret i32 %out
45+
}
46+
47+
define i64 @bfi_x_63(i64 %in1, i64 %in2) {
48+
; CHECK-LABEL: bfi_x_63:
49+
; CHECK: ; %bb.0: ; %bb
50+
; CHECK-NEXT: bfi x1, x0, #63, #1
51+
; CHECK-NEXT: mov x0, x1
52+
; CHECK-NEXT: ret
53+
bb:
54+
%tmp3 = shl i64 %in1, 63
55+
%tmp4 = and i64 %in2, 9223372036854775807
56+
%out = or i64 %tmp3, %tmp4
57+
ret i64 %out
58+
}
59+
60+
define i64 @bfi_x_31(i64 %in1, i64 %in2) {
61+
; CHECK-LABEL: bfi_x_31:
62+
; CHECK: ; %bb.0: ; %bb
63+
; CHECK-NEXT: bfi x1, x0, #31, #33
64+
; CHECK-NEXT: mov x0, x1
65+
; CHECK-NEXT: ret
66+
bb:
67+
%tmp3 = shl i64 %in1, 31
68+
%tmp4 = and i64 %in2, 2147483647
69+
%out = or i64 %tmp3, %tmp4
70+
ret i64 %out
71+
}
72+
73+
define i64 @bfi_x_8(i64 %in1, i64 %in2) {
74+
; CHECK-LABEL: bfi_x_8:
75+
; CHECK: ; %bb.0: ; %bb
76+
; CHECK-NEXT: bfi x1, x0, #8, #56
77+
; CHECK-NEXT: mov x0, x1
78+
; CHECK-NEXT: ret
79+
bb:
80+
%tmp3 = shl i64 %in1, 8
81+
%tmp4 = and i64 %in2, 255
82+
%out = or i64 %tmp3, %tmp4
83+
ret i64 %out
84+
}
85+
86+
define i64 @bfi_x_1(i64 %in1, i64 %in2) {
87+
; CHECK-LABEL: bfi_x_1:
88+
; CHECK: ; %bb.0: ; %bb
89+
; CHECK-NEXT: bfi x1, x0, #1, #63
90+
; CHECK-NEXT: mov x0, x1
91+
; CHECK-NEXT: ret
92+
bb:
93+
%tmp3 = shl i64 %in1, 1
94+
%tmp4 = and i64 %in2, 1
95+
%out = or i64 %tmp3, %tmp4
96+
ret i64 %out
97+
}
98+
99+
define i64 @bfi_x_1_swapped(i64 %in1, i64 %in2) {
100+
; CHECK-LABEL: bfi_x_1_swapped:
101+
; CHECK: ; %bb.0: ; %bb
102+
; CHECK-NEXT: bfi x1, x0, #1, #63
103+
; CHECK-NEXT: mov x0, x1
104+
; CHECK-NEXT: ret
105+
bb:
106+
%tmp3 = shl i64 %in1, 1
107+
%tmp4 = and i64 %in2, 1
108+
%out = or i64 %tmp4, %tmp3
109+
ret i64 %out
110+
}
111+
112+
define i64 @extra_use1(i64 %in1, i64 %in2, i64* %p) {
113+
; GISEL-LABEL: extra_use1:
114+
; GISEL: ; %bb.0: ; %bb
115+
; GISEL-NEXT: lsl x8, x0, #1
116+
; GISEL-NEXT: and x9, x1, #0x1
117+
; GISEL-NEXT: orr x0, x8, x9
118+
; GISEL-NEXT: str x8, [x2]
119+
; GISEL-NEXT: ret
120+
;
121+
; SDAG-LABEL: extra_use1:
122+
; SDAG: ; %bb.0: ; %bb
123+
; SDAG-NEXT: bfi x1, x0, #1, #63
124+
; SDAG-NEXT: lsl x8, x0, #1
125+
; SDAG-NEXT: mov x0, x1
126+
; SDAG-NEXT: str x8, [x2]
127+
; SDAG-NEXT: ret
128+
bb:
129+
%tmp3 = shl i64 %in1, 1
130+
%tmp4 = and i64 %in2, 1
131+
%out = or i64 %tmp3, %tmp4
132+
store i64 %tmp3, i64* %p
133+
ret i64 %out
134+
}
135+
136+
define i64 @extra_use2(i64 %in1, i64 %in2, i64* %p) {
137+
; GISEL-LABEL: extra_use2:
138+
; GISEL: ; %bb.0: ; %bb
139+
; GISEL-NEXT: and x8, x1, #0x1
140+
; GISEL-NEXT: orr x0, x8, x0, lsl #1
141+
; GISEL-NEXT: str x8, [x2]
142+
; GISEL-NEXT: ret
143+
;
144+
; SDAG-LABEL: extra_use2:
145+
; SDAG: ; %bb.0: ; %bb
146+
; SDAG-NEXT: and x8, x1, #0x1
147+
; SDAG-NEXT: bfi x1, x0, #1, #63
148+
; SDAG-NEXT: mov x0, x1
149+
; SDAG-NEXT: str x8, [x2]
150+
; SDAG-NEXT: ret
151+
bb:
152+
%tmp3 = shl i64 %in1, 1
153+
%tmp4 = and i64 %in2, 1
154+
%out = or i64 %tmp3, %tmp4
155+
store i64 %tmp4, i64* %p
156+
ret i64 %out
157+
}

0 commit comments

Comments
 (0)