Skip to content

Commit 6d6cd8d

Browse files
authored
[CIR][CIRGen][Builtin][Neon] Lower vqdmulhq_lane, vqdmulh_lane, vqrdmulhq_lane and vqrdmulh_lane (#985)
1 parent fc82374 commit 6d6cd8d

File tree

2 files changed

+133
-0
lines changed

2 files changed

+133
-0
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2280,6 +2280,26 @@ mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
22802280
return builder.createVecShuffle(getLoc(e->getExprLoc()), ops[0], ops[1],
22812281
indices);
22822282
}
2283+
case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2284+
case NEON::BI__builtin_neon_vqdmulh_lane_v:
2285+
case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2286+
case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2287+
mlir::cir::VectorType resTy =
2288+
(builtinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2289+
builtinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2290+
? mlir::cir::VectorType::get(builder.getContext(), vTy.getEltType(),
2291+
vTy.getSize() * 2)
2292+
: vTy;
2293+
mlir::cir::VectorType mulVecT =
2294+
GetNeonType(this, NeonTypeFlags(neonType.getEltType(), false,
2295+
/*isQuad*/ false));
2296+
return buildNeonCall(builder, {resTy, mulVecT, SInt32Ty}, ops,
2297+
(builtinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2298+
builtinID == NEON::BI__builtin_neon_vqdmulh_lane_v)
2299+
? "llvm.aarch64.neon.sqdmulh.lane"
2300+
: "llvm.aarch64.neon.sqrdmulh.lane",
2301+
resTy, getLoc(e->getExprLoc()));
2302+
}
22832303
}
22842304

22852305
// This second switch is for the intrinsics that might have a more generic

clang/test/CIR/CodeGen/AArch64/neon-arith.c

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,3 +218,116 @@ float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
218218
// LLVM: {{.*}}test_vpaddq_f64(<2 x double>{{.*}}[[A:%.*]], <2 x double>{{.*}}[[B:%.*]])
219219
// LLVM: [[RES:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> [[A]], <2 x double> [[B]])
220220
// LLVM: ret <2 x double> [[RES]]
221+
222+
int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t v) {
223+
return vqdmulh_lane_s16(a, v, 3);
224+
}
225+
226+
// CIR-LABEL: vqdmulh_lane_s16
227+
// CIR: [[LANE:%.*]] = cir.const #cir.int<3> : !s32i
228+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
229+
// CIR: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>, !s32i) -> !cir.vector<!s16i x 4>
230+
231+
// LLVM: {{.*}}test_vqdmulh_lane_s16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[V:%.*]])
232+
// LLVM: [[RES:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.lane.v4i16.v4i16
233+
// LLVM-SAME: (<4 x i16> [[A]], <4 x i16> [[V]], i32 3)
234+
// LLVM: ret <4 x i16> [[RES]]
235+
236+
237+
int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t v) {
238+
return vqdmulh_lane_s32(a, v, 1);
239+
}
240+
241+
// CIR-LABEL: vqdmulh_lane_s32
242+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
243+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
244+
// CIR: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>, !s32i) -> !cir.vector<!s32i x 2>
245+
246+
// LLVM: {{.*}}test_vqdmulh_lane_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[V:%.*]])
247+
// LLVM: [[RES:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.lane.v2i32.v2i32
248+
// LLVM-SAME: (<2 x i32> [[A]], <2 x i32> [[V]], i32 1)
249+
// LLVM: ret <2 x i32> [[RES]]
250+
251+
int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t v) {
252+
return vqdmulhq_lane_s16(a, v, 3);
253+
}
254+
255+
// CIR-LABEL: vqdmulhq_lane_s16
256+
// CIR: [[LANE:%.*]] = cir.const #cir.int<3> : !s32i
257+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
258+
// CIR: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 4>, !s32i) -> !cir.vector<!s16i x 8>
259+
260+
// LLVM: {{.*}}test_vqdmulhq_lane_s16(<8 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[V:%.*]])
261+
// LLVM: [[RES:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.lane.v8i16.v4i16
262+
// LLVM-SAME: (<8 x i16> [[A]], <4 x i16> [[V]], i32 3)
263+
// LLVM: ret <8 x i16> [[RES]]
264+
265+
int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t v) {
266+
return vqdmulhq_lane_s32(a, v, 1);
267+
}
268+
269+
// CIR-LABEL: vqdmulhq_lane_s32
270+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
271+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
272+
// CIR: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 2>, !s32i) -> !cir.vector<!s32i x 4>
273+
274+
// LLVM: {{.*}}test_vqdmulhq_lane_s32(<4 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[V:%.*]])
275+
// LLVM: [[RES:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.lane.v4i32.v2i32
276+
// LLVM-SAME: (<4 x i32> [[A]], <2 x i32> [[V]], i32 1)
277+
// LLVM: ret <4 x i32> [[RES]]
278+
279+
int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t v) {
280+
return vqrdmulh_lane_s16(a, v, 3);
281+
}
282+
283+
// CIR-LABEL: vqrdmulh_lane_s16
284+
// CIR: [[LANE:%.*]] = cir.const #cir.int<3> : !s32i
285+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqrdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
286+
// CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>, !s32i) -> !cir.vector<!s16i x 4>
287+
288+
// LLVM: {{.*}}test_vqrdmulh_lane_s16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[V:%.*]])
289+
// LLVM: [[RES:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v4i16.v4i16
290+
// LLVM-SAME: (<4 x i16> [[A]], <4 x i16> [[V]], i32 3)
291+
// LLVM: ret <4 x i16> [[RES]]
292+
293+
int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t v) {
294+
return vqrdmulhq_lane_s16(a, v, 3);
295+
}
296+
297+
// CIR-LABEL: vqrdmulhq_lane_s16
298+
// CIR: [[LANE:%.*]] = cir.const #cir.int<3> : !s32i
299+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqrdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
300+
// CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 4>, !s32i) -> !cir.vector<!s16i x 8>
301+
302+
// LLVM: {{.*}}test_vqrdmulhq_lane_s16(<8 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[V:%.*]])
303+
// LLVM: [[RES:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v8i16.v4i16
304+
// LLVM-SAME: (<8 x i16> [[A]], <4 x i16> [[V]], i32 3)
305+
// LLVM: ret <8 x i16> [[RES]]
306+
307+
int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t v) {
308+
return vqrdmulh_lane_s32(a, v, 1);
309+
}
310+
311+
// CIR-LABEL: vqrdmulh_lane_s32
312+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
313+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqrdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
314+
// CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>, !s32i) -> !cir.vector<!s32i x 2>
315+
316+
// LLVM: {{.*}}test_vqrdmulh_lane_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[V:%.*]])
317+
// LLVM: [[RES:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v2i32.v2i32
318+
// LLVM-SAME: (<2 x i32> [[A]], <2 x i32> [[V]], i32 1)
319+
// LLVM: ret <2 x i32> [[RES]]
320+
321+
int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t v) {
322+
return vqrdmulhq_lane_s32(a, v, 1);
323+
}
324+
325+
// CIR-LABEL: vqrdmulhq_lane_s32
326+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
327+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqrdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
328+
// CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 2>, !s32i) -> !cir.vector<!s32i x 4>
329+
330+
// LLVM: {{.*}}test_vqrdmulhq_lane_s32(<4 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[V:%.*]])
331+
// LLVM: [[RES:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v4i32.v2i32
332+
// LLVM-SAME: (<4 x i32> [[A]], <2 x i32> [[V]], i32 1)
333+
// LLVM: ret <4 x i32> [[RES]]

0 commit comments

Comments
 (0)