Skip to content

Commit d6db31d

Browse files
authored
[CIR][CIRGen][Builtin][Neon] Lower neon_vqmovun_v (#1012)
1 parent 6288572 commit d6db31d

File tree

2 files changed

+60
-4
lines changed

2 files changed

+60
-4
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

+19-4
Original file line numberDiff line numberDiff line change
@@ -2217,6 +2217,13 @@ buildCommonNeonCallPattern0(CIRGenFunction &cgf, llvm::StringRef intrincsName,
22172217
llvm::SmallVectorImpl<mlir::Value> &ops,
22182218
mlir::Type funcResTy, const clang::CallExpr *e) {
22192219
CIRGenBuilderTy &builder = cgf.getBuilder();
2220+
if (argTypes.empty()) {
2221+
// The most common arg types is {funcResTy, funcResTy} for neon intrinsic
2222+
// functions. Thus, it is as default so call site does not need to
2223+
// provide it. Every neon intrinsic function has at least one argument,
2224+
// Thus empty argTypes really just means {funcResTy, funcResTy}.
2225+
argTypes = {funcResTy, funcResTy};
2226+
}
22202227
mlir::Value res =
22212228
buildNeonCall(builder, std::move(argTypes), ops, intrincsName, funcResTy,
22222229
cgf.getLoc(e->getExprLoc()));
@@ -2359,6 +2366,7 @@ mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
23592366
// This second switch is for the intrinsics that might have a more generic
23602367
// codegen solution so we can use the common codegen in future.
23612368
llvm::StringRef intrincsName;
2369+
llvm::SmallVector<mlir::Type> argTypes;
23622370
switch (builtinID) {
23632371
default:
23642372
llvm::errs() << getAArch64SIMDIntrinsicString(builtinID) << " ";
@@ -2390,11 +2398,18 @@ mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
23902398
: "llvm.aarch64.neon.srhadd";
23912399
break;
23922400
}
2401+
case NEON::BI__builtin_neon_vqmovun_v: {
2402+
intrincsName = "llvm.aarch64.neon.sqxtun";
2403+
argTypes.push_back(builder.getExtendedOrTruncatedElementVectorType(
2404+
vTy, true /* extended */, true /* signed */));
2405+
break;
23932406
}
2394-
if (!intrincsName.empty())
2395-
return buildCommonNeonCallPattern0(*this, intrincsName, {vTy, vTy}, ops,
2396-
vTy, e);
2397-
return nullptr;
2407+
}
2408+
2409+
if (intrincsName.empty())
2410+
return nullptr;
2411+
return buildCommonNeonCallPattern0(*this, intrincsName, argTypes, ops, vTy,
2412+
e);
23982413
}
23992414

24002415
mlir::Value

clang/test/CIR/CodeGen/AArch64/neon-misc.c

+41
Original file line numberDiff line numberDiff line change
@@ -451,3 +451,44 @@ uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
451451
// CIR: cir.store [[RES1]], [[ADDR1]] : !cir.vector<!u32i x 4>, !cir.ptr<!cir.vector<!u32i x 4>>
452452
// LLVM: ret %struct.uint32x4x2_t {{.*}}
453453
}
454+
455+
uint8x8_t test_vqmovun_s16(int16x8_t a) {
456+
return vqmovun_s16(a);
457+
458+
// CIR-LABEL: vqmovun_s16
459+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqxtun" {{%.*}} :
460+
// CIR-SAME: (!cir.vector<!s16i x 8>) -> !cir.vector<!u8i x 8>
461+
462+
// LLVM: {{.*}}test_vqmovun_s16(<8 x i16>{{.*}}[[A:%.*]])
463+
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
464+
// LLVM: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[A]])
465+
// LLVM: ret <8 x i8> [[VQMOVUN_V1_I]]
466+
}
467+
468+
uint16x4_t test_vqmovun_s32(int32x4_t a) {
469+
return vqmovun_s32(a);
470+
471+
// CIR-LABEL: vqmovun_s32
472+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqxtun" {{%.*}} :
473+
// CIR-SAME: (!cir.vector<!s32i x 4>) -> !cir.vector<!u16i x 4>
474+
475+
// LLVM: {{.*}}test_vqmovun_s32(<4 x i32>{{.*}}[[A:%.*]])
476+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
477+
// LLVM: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]])
478+
// LLVM: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
479+
// LLVM: ret <4 x i16> [[VQMOVUN_V1_I]]
480+
}
481+
482+
uint32x2_t test_vqmovun_s64(int64x2_t a) {
483+
return vqmovun_s64(a);
484+
485+
// CIR-LABEL: vqmovun_s64
486+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqxtun" {{%.*}} :
487+
// CIR-SAME: (!cir.vector<!s64i x 2>) -> !cir.vector<!u32i x 2>
488+
489+
// LLVM: {{.*}}test_vqmovun_s64(<2 x i64>{{.*}}[[A:%.*]])
490+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
491+
// LLVM: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[A]])
492+
// LLVM: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
493+
// LLVM: ret <2 x i32> [[VQMOVUN_V1_I]]
494+
}

0 commit comments

Comments
 (0)