Skip to content

Commit 63caaa2

Browse files
[LLVM][SVE] Add isel for bfloat based (de)interleave operations. (#128875)
1 parent e3f5269 commit 63caaa2

File tree

4 files changed

+113
-41
lines changed

4 files changed

+113
-41
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1702,6 +1702,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
17021702
setOperationAction(ISD::MLOAD, VT, Custom);
17031703
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
17041704
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1705+
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
1706+
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
17051707
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
17061708

17071709
if (Subtarget->hasSVEB16B16()) {

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3079,6 +3079,8 @@ multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm,
30793079
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
30803080

30813081
def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
3082+
def : SVE_2_Op_Pat<nxv4bf16, op, nxv4bf16, nxv4bf16, !cast<Instruction>(NAME # _S)>;
3083+
def : SVE_2_Op_Pat<nxv2bf16, op, nxv2bf16, nxv2bf16, !cast<Instruction>(NAME # _D)>;
30823084
}
30833085

30843086
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll

Lines changed: 60 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ define {<vscale x 2 x half>, <vscale x 2 x half>} @vector_deinterleave_nxv2f16_n
1010
; CHECK-NEXT: uunpklo z1.d, z2.s
1111
; CHECK-NEXT: ret
1212
%retval = call {<vscale x 2 x half>, <vscale x 2 x half>} @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half> %vec)
13-
ret {<vscale x 2 x half>, <vscale x 2 x half>} %retval
13+
ret {<vscale x 2 x half>, <vscale x 2 x half>} %retval
1414
}
1515

1616
define {<vscale x 4 x half>, <vscale x 4 x half>} @vector_deinterleave_nxv4f16_nxv8f16(<vscale x 8 x half> %vec) {
@@ -22,7 +22,7 @@ define {<vscale x 4 x half>, <vscale x 4 x half>} @vector_deinterleave_nxv4f16_n
2222
; CHECK-NEXT: uunpklo z1.s, z2.h
2323
; CHECK-NEXT: ret
2424
%retval = call {<vscale x 4 x half>, <vscale x 4 x half>} @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half> %vec)
25-
ret {<vscale x 4 x half>, <vscale x 4 x half>} %retval
25+
ret {<vscale x 4 x half>, <vscale x 4 x half>} %retval
2626
}
2727

2828
define {<vscale x 8 x half>, <vscale x 8 x half>} @vector_deinterleave_nxv8f16_nxv16f16(<vscale x 16 x half> %vec) {
@@ -33,7 +33,7 @@ define {<vscale x 8 x half>, <vscale x 8 x half>} @vector_deinterleave_nxv8f16_n
3333
; CHECK-NEXT: mov z0.d, z2.d
3434
; CHECK-NEXT: ret
3535
%retval = call {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half> %vec)
36-
ret {<vscale x 8 x half>, <vscale x 8 x half>} %retval
36+
ret {<vscale x 8 x half>, <vscale x 8 x half>} %retval
3737
}
3838

3939
define {<vscale x 2 x float>, <vscale x 2 x float>} @vector_deinterleave_nxv2f32_nxv4f32(<vscale x 4 x float> %vec) {
@@ -45,7 +45,7 @@ define {<vscale x 2 x float>, <vscale x 2 x float>} @vector_deinterleave_nxv2f32
4545
; CHECK-NEXT: uunpklo z1.d, z2.s
4646
; CHECK-NEXT: ret
4747
%retval = call {<vscale x 2 x float>, <vscale x 2 x float>} @llvm.vector.deinterleave2.nxv4f32(<vscale x 4 x float> %vec)
48-
ret {<vscale x 2 x float>, <vscale x 2 x float>} %retval
48+
ret {<vscale x 2 x float>, <vscale x 2 x float>} %retval
4949
}
5050

5151
define {<vscale x 4 x float>, <vscale x 4 x float>} @vector_deinterleave_nxv4f32_nxv8f32(<vscale x 8 x float> %vec) {
@@ -56,7 +56,7 @@ define {<vscale x 4 x float>, <vscale x 4 x float>} @vector_deinterleave_nxv4f32
5656
; CHECK-NEXT: mov z0.d, z2.d
5757
; CHECK-NEXT: ret
5858
%retval = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> %vec)
59-
ret {<vscale x 4 x float>, <vscale x 4 x float>} %retval
59+
ret {<vscale x 4 x float>, <vscale x 4 x float>} %retval
6060
}
6161

6262
define {<vscale x 2 x double>, <vscale x 2 x double>} @vector_deinterleave_nxv2f64_nxv4f64(<vscale x 4 x double> %vec) {
@@ -67,7 +67,42 @@ define {<vscale x 2 x double>, <vscale x 2 x double>} @vector_deinterleave_nxv2f
6767
; CHECK-NEXT: mov z0.d, z2.d
6868
; CHECK-NEXT: ret
6969
%retval = call {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %vec)
70-
ret {<vscale x 2 x double>, <vscale x 2 x double>} %retval
70+
ret {<vscale x 2 x double>, <vscale x 2 x double>} %retval
71+
}
72+
73+
define {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @vector_deinterleave_nxv2bf16_nxv4bf16(<vscale x 4 x bfloat> %vec) {
74+
; CHECK-LABEL: vector_deinterleave_nxv2bf16_nxv4bf16:
75+
; CHECK: // %bb.0:
76+
; CHECK-NEXT: uzp1 z1.s, z0.s, z0.s
77+
; CHECK-NEXT: uzp2 z2.s, z0.s, z0.s
78+
; CHECK-NEXT: uunpklo z0.d, z1.s
79+
; CHECK-NEXT: uunpklo z1.d, z2.s
80+
; CHECK-NEXT: ret
81+
%retval = call {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @llvm.vector.deinterleave2.nxv4bf16(<vscale x 4 x bfloat> %vec)
82+
ret {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} %retval
83+
}
84+
85+
define {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @vector_deinterleave_nxv4bf16_nxv8bf16(<vscale x 8 x bfloat> %vec) {
86+
; CHECK-LABEL: vector_deinterleave_nxv4bf16_nxv8bf16:
87+
; CHECK: // %bb.0:
88+
; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h
89+
; CHECK-NEXT: uzp2 z2.h, z0.h, z0.h
90+
; CHECK-NEXT: uunpklo z0.s, z1.h
91+
; CHECK-NEXT: uunpklo z1.s, z2.h
92+
; CHECK-NEXT: ret
93+
%retval = call {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @llvm.vector.deinterleave2.nxv8bf16(<vscale x 8 x bfloat> %vec)
94+
ret {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} %retval
95+
}
96+
97+
define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @vector_deinterleave_nxv8bf16_nxv16bf16(<vscale x 16 x bfloat> %vec) {
98+
; CHECK-LABEL: vector_deinterleave_nxv8bf16_nxv16bf16:
99+
; CHECK: // %bb.0:
100+
; CHECK-NEXT: uzp1 z2.h, z0.h, z1.h
101+
; CHECK-NEXT: uzp2 z1.h, z0.h, z1.h
102+
; CHECK-NEXT: mov z0.d, z2.d
103+
; CHECK-NEXT: ret
104+
%retval = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.vector.deinterleave2.nxv16bf16(<vscale x 16 x bfloat> %vec)
105+
ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %retval
71106
}
72107

73108
; Integers
@@ -80,7 +115,7 @@ define {<vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv
80115
; CHECK-NEXT: mov z0.d, z2.d
81116
; CHECK-NEXT: ret
82117
%retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec)
83-
ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %retval
118+
ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %retval
84119
}
85120

86121
define {<vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_nxv8i16_nxv16i16(<vscale x 16 x i16> %vec) {
@@ -91,7 +126,7 @@ define {<vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_nxv8i16_nxv
91126
; CHECK-NEXT: mov z0.d, z2.d
92127
; CHECK-NEXT: ret
93128
%retval = call {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %vec)
94-
ret {<vscale x 8 x i16>, <vscale x 8 x i16>} %retval
129+
ret {<vscale x 8 x i16>, <vscale x 8 x i16>} %retval
95130
}
96131

97132
define {<vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxvv8i32(<vscale x 8 x i32> %vec) {
@@ -102,7 +137,7 @@ define {<vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxv
102137
; CHECK-NEXT: mov z0.d, z2.d
103138
; CHECK-NEXT: ret
104139
%retval = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %vec)
105-
ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %retval
140+
ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %retval
106141
}
107142

108143
define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv4i64(<vscale x 4 x i64> %vec) {
@@ -113,7 +148,7 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv
113148
; CHECK-NEXT: mov z0.d, z2.d
114149
; CHECK-NEXT: ret
115150
%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %vec)
116-
ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
151+
ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
117152
}
118153

119154
; Predicated
@@ -125,7 +160,7 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv
125160
; CHECK-NEXT: mov p0.b, p2.b
126161
; CHECK-NEXT: ret
127162
%retval = call {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.vector.deinterleave2.nxv32i1(<vscale x 32 x i1> %vec)
128-
ret {<vscale x 16 x i1>, <vscale x 16 x i1>} %retval
163+
ret {<vscale x 16 x i1>, <vscale x 16 x i1>} %retval
129164
}
130165

131166
define {<vscale x 8 x i1>, <vscale x 8 x i1>} @vector_deinterleave_nxv8i1_nxv16i1(<vscale x 16 x i1> %vec) {
@@ -137,7 +172,7 @@ define {<vscale x 8 x i1>, <vscale x 8 x i1>} @vector_deinterleave_nxv8i1_nxv16i
137172
; CHECK-NEXT: punpklo p1.h, p2.b
138173
; CHECK-NEXT: ret
139174
%retval = call {<vscale x 8 x i1>, <vscale x 8 x i1>} @llvm.vector.deinterleave2.nxv16i1(<vscale x 16 x i1> %vec)
140-
ret {<vscale x 8 x i1>, <vscale x 8 x i1>} %retval
175+
ret {<vscale x 8 x i1>, <vscale x 8 x i1>} %retval
141176
}
142177

143178
define {<vscale x 4 x i1>, <vscale x 4 x i1>} @vector_deinterleave_nxv4i1_nxv8i1(<vscale x 8 x i1> %vec) {
@@ -149,7 +184,7 @@ define {<vscale x 4 x i1>, <vscale x 4 x i1>} @vector_deinterleave_nxv4i1_nxv8i1
149184
; CHECK-NEXT: punpklo p1.h, p2.b
150185
; CHECK-NEXT: ret
151186
%retval = call {<vscale x 4 x i1>, <vscale x 4 x i1>} @llvm.vector.deinterleave2.nxv8i1(<vscale x 8 x i1> %vec)
152-
ret {<vscale x 4 x i1>, <vscale x 4 x i1>} %retval
187+
ret {<vscale x 4 x i1>, <vscale x 4 x i1>} %retval
153188
}
154189

155190
define {<vscale x 2 x i1>, <vscale x 2 x i1>} @vector_deinterleave_nxv2i1_nxv4i1(<vscale x 4 x i1> %vec) {
@@ -161,7 +196,7 @@ define {<vscale x 2 x i1>, <vscale x 2 x i1>} @vector_deinterleave_nxv2i1_nxv4i1
161196
; CHECK-NEXT: punpklo p1.h, p2.b
162197
; CHECK-NEXT: ret
163198
%retval = call {<vscale x 2 x i1>, <vscale x 2 x i1>} @llvm.vector.deinterleave2.nxv4i1(<vscale x 4 x i1> %vec)
164-
ret {<vscale x 2 x i1>, <vscale x 2 x i1>} %retval
199+
ret {<vscale x 2 x i1>, <vscale x 2 x i1>} %retval
165200
}
166201

167202

@@ -178,11 +213,11 @@ define {<vscale x 4 x i64>, <vscale x 4 x i64>} @vector_deinterleave_nxv4i64_nxv
178213
; CHECK-NEXT: mov z1.d, z4.d
179214
; CHECK-NEXT: mov z2.d, z6.d
180215
; CHECK-NEXT: ret
181-
%retval = call {<vscale x 4 x i64>, <vscale x 4 x i64>} @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %vec)
182-
ret {<vscale x 4 x i64>, <vscale x 4 x i64>} %retval
216+
%retval = call {<vscale x 4 x i64>, <vscale x 4 x i64>} @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %vec)
217+
ret {<vscale x 4 x i64>, <vscale x 4 x i64>} %retval
183218
}
184219

185-
define {<vscale x 8 x i64>, <vscale x 8 x i64>} @vector_deinterleave_nxv8i64_nxv16i64(<vscale x 16 x i64> %vec) {
220+
define {<vscale x 8 x i64>, <vscale x 8 x i64>} @vector_deinterleave_nxv8i64_nxv16i64(<vscale x 16 x i64> %vec) {
186221
; CHECK-LABEL: vector_deinterleave_nxv8i64_nxv16i64:
187222
; CHECK: // %bb.0:
188223
; CHECK-NEXT: uzp1 z24.d, z2.d, z3.d
@@ -201,8 +236,8 @@ define {<vscale x 8 x i64>, <vscale x 8 x i64>} @vector_deinterleave_nxv8i64_nx
201236
; CHECK-NEXT: mov z5.d, z29.d
202237
; CHECK-NEXT: mov z6.d, z30.d
203238
; CHECK-NEXT: ret
204-
%retval = call {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.vector.deinterleave2.nxv16i64(<vscale x 16 x i64> %vec)
205-
ret {<vscale x 8 x i64>, <vscale x 8 x i64>} %retval
239+
%retval = call {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.vector.deinterleave2.nxv16i64(<vscale x 16 x i64> %vec)
240+
ret {<vscale x 8 x i64>, <vscale x 8 x i64>} %retval
206241
}
207242

208243

@@ -216,8 +251,8 @@ define {<vscale x 8 x i8>, <vscale x 8 x i8>} @vector_deinterleave_nxv8i8_nxv16i
216251
; CHECK-NEXT: uzp1 z0.h, z2.h, z1.h
217252
; CHECK-NEXT: uzp2 z1.h, z2.h, z1.h
218253
; CHECK-NEXT: ret
219-
%retval = call {<vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave2.nxv16i8(<vscale x 16 x i8> %vec)
220-
ret {<vscale x 8 x i8>, <vscale x 8 x i8>} %retval
254+
%retval = call {<vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave2.nxv16i8(<vscale x 16 x i8> %vec)
255+
ret {<vscale x 8 x i8>, <vscale x 8 x i8>} %retval
221256
}
222257

223258
define {<vscale x 4 x i16>, <vscale x 4 x i16>} @vector_deinterleave_nxv4i16_nxv8i16(<vscale x 8 x i16> %vec) {
@@ -228,8 +263,8 @@ define {<vscale x 4 x i16>, <vscale x 4 x i16>} @vector_deinterleave_nxv4i16_nxv
228263
; CHECK-NEXT: uzp1 z0.s, z2.s, z1.s
229264
; CHECK-NEXT: uzp2 z1.s, z2.s, z1.s
230265
; CHECK-NEXT: ret
231-
%retval = call {<vscale x 4 x i16>, <vscale x 4 x i16>} @llvm.vector.deinterleave2.nxv8i16(<vscale x 8 x i16> %vec)
232-
ret {<vscale x 4 x i16>, <vscale x 4 x i16>} %retval
266+
%retval = call {<vscale x 4 x i16>, <vscale x 4 x i16>} @llvm.vector.deinterleave2.nxv8i16(<vscale x 8 x i16> %vec)
267+
ret {<vscale x 4 x i16>, <vscale x 4 x i16>} %retval
233268
}
234269

235270
define {<vscale x 2 x i32>, <vscale x 2 x i32>} @vector_deinterleave_nxv2i32_nxv4i32(<vscale x 4 x i32> %vec) {
@@ -240,8 +275,8 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @vector_deinterleave_nxv2i32_nxv
240275
; CHECK-NEXT: uzp1 z0.d, z2.d, z1.d
241276
; CHECK-NEXT: uzp2 z1.d, z2.d, z1.d
242277
; CHECK-NEXT: ret
243-
%retval = call {<vscale x 2 x i32>,<vscale x 2 x i32>} @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %vec)
244-
ret {<vscale x 2 x i32>, <vscale x 2 x i32>} %retval
278+
%retval = call {<vscale x 2 x i32>,<vscale x 2 x i32>} @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %vec)
279+
ret {<vscale x 2 x i32>, <vscale x 2 x i32>} %retval
245280
}
246281

247282

0 commit comments

Comments
 (0)