From 29f64bc9b2ee2d3517ebcda471ad259ed0148a46 Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Mon, 3 Mar 2025 07:09:38 +0800 Subject: [PATCH 1/9] [CIR][CIRGen][AArch64] Make vrndns emit RoundEvenOp directly --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 1 + .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 39 ++++++++++++++++++- clang/test/CIR/CodeGen/AArch64/neon-arith.c | 2 +- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 657bb82df400..871e8e81cbdf 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -4787,6 +4787,7 @@ def Log2Op : UnaryFPToFPBuiltinOp<"log2", "Log2Op">; def NearbyintOp : UnaryFPToFPBuiltinOp<"nearbyint", "NearbyintOp">; def RintOp : UnaryFPToFPBuiltinOp<"rint", "RintOp">; def RoundOp : UnaryFPToFPBuiltinOp<"round", "RoundOp">; +def RoundEvenOp : UnaryFPToFPBuiltinOp<"roundeven", "RoundEvenOp">; def SinOp : UnaryFPToFPBuiltinOp<"sin", "SinOp">; def SqrtOp : UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp">; def TruncOp : UnaryFPToFPBuiltinOp<"trunc", "FTruncOp">; diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 96610d2df193..368bc1b1f33f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2226,7 +2226,7 @@ static mlir::Value vecReduceIntValue(CIRGenFunction &cgf, mlir::Value val, loc, val, builder.getConstInt(loc, cgf.SizeTy, 0)); } -mlir::Value emitNeonCall(CIRGenBuilderTy &builder, +static mlir::Value emitNeonCall(CIRGenBuilderTy &builder, llvm::SmallVector argTypes, llvm::SmallVectorImpl &args, llvm::StringRef intrinsicName, mlir::Type funcResTy, @@ -2261,6 +2261,41 @@ mlir::Value emitNeonCall(CIRGenBuilderTy &builder, .getResult(); } +template +static mlir::Value emitNeonCall(CIRGenBuilderTy &builder, + llvm::SmallVector argTypes, + llvm::SmallVectorImpl &args, + mlir::Type funcResTy, + mlir::Location loc, + bool isConstrainedFPIntrinsic = false, + unsigned shift = 0, bool rightshift = false) { + // TODO: Consider removing the following unreachable when we have + // emitConstrainedFPCall feature implemented + assert(!cir::MissingFeatures::emitConstrainedFPCall()); + if (isConstrainedFPIntrinsic) + llvm_unreachable("isConstrainedFPIntrinsic NYI"); + + for (unsigned j = 0; j < argTypes.size(); ++j) { + if (isConstrainedFPIntrinsic) { + assert(!cir::MissingFeatures::emitConstrainedFPCall()); + } + if (shift > 0 && shift == j) { + args[j] = emitNeonShiftVector(builder, args[j], + mlir::cast(argTypes[j]), + loc, rightshift); + } else { + args[j] = builder.createBitcast(args[j], argTypes[j]); + } + } + if (isConstrainedFPIntrinsic) { + assert(!cir::MissingFeatures::emitConstrainedFPCall()); + return nullptr; + } + return builder + .create(loc, funcResTy, args) + .getResult(); +} + /// This function `emitCommonNeonCallPattern0` implements a common way /// to generate neon intrinsic call that has following pattern: /// 1. There is a need to cast result of the intrinsic call back to @@ -4139,7 +4174,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, case NEON::BI__builtin_neon_vrndns_f32: { mlir::Value arg0 = emitScalarExpr(E->getArg(0)); args.push_back(arg0); - return emitNeonCall(builder, {arg0.getType()}, args, "roundeven.f32", + return emitNeonCall(builder, {arg0.getType()}, args, getCIRGenModule().FloatTy, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vrndph_f16: { diff --git a/clang/test/CIR/CodeGen/AArch64/neon-arith.c b/clang/test/CIR/CodeGen/AArch64/neon-arith.c index 9426bd004f4d..fda7e6ef471c 100644 --- a/clang/test/CIR/CodeGen/AArch64/neon-arith.c +++ b/clang/test/CIR/CodeGen/AArch64/neon-arith.c @@ -23,7 +23,7 @@ float32_t test_vrndns_f32(float32_t a) { // CIR: cir.func internal private @vrndns_f32(%arg0: !cir.float {{.*}}) -> !cir.float // CIR: cir.store %arg0, [[ARG_SAVE:%.*]] : !cir.float, !cir.ptr // CIR: [[INTRIN_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr, !cir.float -// CIR: {{%.*}} = cir.llvm.intrinsic "roundeven.f32" [[INTRIN_ARG]] : (!cir.float) +// CIR: {{%.*}} = cir.roundeven [[INTRIN_ARG]] : !cir.float // CIR: cir.return {{%.*}} : !cir.float // CIR-LABEL: test_vrndns_f32 From 9bf0f34a8cf86d2755e2f8f61bf8b5320cc6d435 Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Mon, 3 Mar 2025 07:11:06 +0800 Subject: [PATCH 2/9] format code --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 368bc1b1f33f..d2bf42f27ad7 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2227,12 +2227,12 @@ static mlir::Value vecReduceIntValue(CIRGenFunction &cgf, mlir::Value val, } static mlir::Value emitNeonCall(CIRGenBuilderTy &builder, - llvm::SmallVector argTypes, - llvm::SmallVectorImpl &args, - llvm::StringRef intrinsicName, mlir::Type funcResTy, - mlir::Location loc, - bool isConstrainedFPIntrinsic = false, - unsigned shift = 0, bool rightshift = false) { + llvm::SmallVector argTypes, + llvm::SmallVectorImpl &args, + llvm::StringRef intrinsicName, + mlir::Type funcResTy, mlir::Location loc, + bool isConstrainedFPIntrinsic = false, + unsigned shift = 0, bool rightshift = false) { // TODO: Consider removing the following unreachable when we have // emitConstrainedFPCall feature implemented assert(!cir::MissingFeatures::emitConstrainedFPCall()); @@ -2262,13 +2262,11 @@ static mlir::Value emitNeonCall(CIRGenBuilderTy &builder, } template -static mlir::Value emitNeonCall(CIRGenBuilderTy &builder, - llvm::SmallVector argTypes, - llvm::SmallVectorImpl &args, - mlir::Type funcResTy, - mlir::Location loc, - bool isConstrainedFPIntrinsic = false, - unsigned shift = 0, bool rightshift = false) { +static mlir::Value +emitNeonCall(CIRGenBuilderTy &builder, llvm::SmallVector argTypes, + llvm::SmallVectorImpl &args, mlir::Type funcResTy, + mlir::Location loc, bool isConstrainedFPIntrinsic = false, + unsigned shift = 0, bool rightshift = false) { // TODO: Consider removing the following unreachable when we have // emitConstrainedFPCall feature implemented assert(!cir::MissingFeatures::emitConstrainedFPCall()); @@ -2291,9 +2289,7 @@ static mlir::Value emitNeonCall(CIRGenBuilderTy &builder, assert(!cir::MissingFeatures::emitConstrainedFPCall()); return nullptr; } - return builder - .create(loc, funcResTy, args) - .getResult(); + return builder.create(loc, funcResTy, args).getResult(); } /// This function `emitCommonNeonCallPattern0` implements a common way @@ -4175,7 +4171,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, mlir::Value arg0 = emitScalarExpr(E->getArg(0)); args.push_back(arg0); return emitNeonCall(builder, {arg0.getType()}, args, - getCIRGenModule().FloatTy, getLoc(E->getExprLoc())); + getCIRGenModule().FloatTy, + getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vrndph_f16: { llvm_unreachable("NEON::BI__builtin_neon_vrndph_f16 NYI"); From f7954b65933d5dc6474c69d7a546a2ee47895999 Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Tue, 4 Mar 2025 15:18:18 +0800 Subject: [PATCH 3/9] extract prepareNeonCallArgs --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 53 +++++++++++-------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index d2bf42f27ad7..352a56f17c3c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -1654,7 +1654,9 @@ static const std::pair NEONEquivalentIntrinsicMap[] = { static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { #define GET_SVE_LLVM_INTRINSIC_MAP #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def" + #include "clang/Basic/arm_sve_builtin_cg.inc" + #undef GET_SVE_LLVM_INTRINSIC_MAP }; @@ -1670,6 +1672,7 @@ static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = { #define GET_SME_LLVM_INTRINSIC_MAP #include "clang/Basic/arm_sme_builtin_cg.inc" + #undef GET_SME_LLVM_INTRINSIC_MAP }; @@ -2226,19 +2229,12 @@ static mlir::Value vecReduceIntValue(CIRGenFunction &cgf, mlir::Value val, loc, val, builder.getConstInt(loc, cgf.SizeTy, 0)); } -static mlir::Value emitNeonCall(CIRGenBuilderTy &builder, +static void prepareNeonCallArgs(CIRGenBuilderTy &builder, llvm::SmallVector argTypes, llvm::SmallVectorImpl &args, - llvm::StringRef intrinsicName, - mlir::Type funcResTy, mlir::Location loc, - bool isConstrainedFPIntrinsic = false, - unsigned shift = 0, bool rightshift = false) { - // TODO: Consider removing the following unreachable when we have - // emitConstrainedFPCall feature implemented - assert(!cir::MissingFeatures::emitConstrainedFPCall()); - if (isConstrainedFPIntrinsic) - llvm_unreachable("isConstrainedFPIntrinsic NYI"); - + mlir::Location loc, + bool isConstrainedFPIntrinsic, unsigned shift, + bool rightshift) { for (unsigned j = 0; j < argTypes.size(); ++j) { if (isConstrainedFPIntrinsic) { assert(!cir::MissingFeatures::emitConstrainedFPCall()); @@ -2251,6 +2247,24 @@ static mlir::Value emitNeonCall(CIRGenBuilderTy &builder, args[j] = builder.createBitcast(args[j], argTypes[j]); } } +} + +static mlir::Value emitNeonCall(CIRGenBuilderTy &builder, + llvm::SmallVector argTypes, + llvm::SmallVectorImpl &args, + llvm::StringRef intrinsicName, + mlir::Type funcResTy, mlir::Location loc, + bool isConstrainedFPIntrinsic = false, + unsigned shift = 0, bool rightshift = false) { + // TODO: Consider removing the following unreachable when we have + // emitConstrainedFPCall feature implemented + assert(!cir::MissingFeatures::emitConstrainedFPCall()); + if (isConstrainedFPIntrinsic) + llvm_unreachable("isConstrainedFPIntrinsic NYI"); + + prepareNeonCallArgs(builder, std::move(argTypes), args, loc, + isConstrainedFPIntrinsic, shift, rightshift); + if (isConstrainedFPIntrinsic) { assert(!cir::MissingFeatures::emitConstrainedFPCall()); return nullptr; @@ -2261,6 +2275,8 @@ static mlir::Value emitNeonCall(CIRGenBuilderTy &builder, .getResult(); } +// This one is similar to the function above, except for creating an operation +// from template instead of creating a LLVMIntrinsicCallOp template static mlir::Value emitNeonCall(CIRGenBuilderTy &builder, llvm::SmallVector argTypes, @@ -2273,18 +2289,9 @@ emitNeonCall(CIRGenBuilderTy &builder, llvm::SmallVector argTypes, if (isConstrainedFPIntrinsic) llvm_unreachable("isConstrainedFPIntrinsic NYI"); - for (unsigned j = 0; j < argTypes.size(); ++j) { - if (isConstrainedFPIntrinsic) { - assert(!cir::MissingFeatures::emitConstrainedFPCall()); - } - if (shift > 0 && shift == j) { - args[j] = emitNeonShiftVector(builder, args[j], - mlir::cast(argTypes[j]), - loc, rightshift); - } else { - args[j] = builder.createBitcast(args[j], argTypes[j]); - } - } + prepareNeonCallArgs(builder, std::move(argTypes), args, loc, + isConstrainedFPIntrinsic, shift, rightshift); + if (isConstrainedFPIntrinsic) { assert(!cir::MissingFeatures::emitConstrainedFPCall()); return nullptr; From 8fd5411dabd8f527a99a25c2aa879505e396de0b Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Wed, 5 Mar 2025 15:14:25 +0800 Subject: [PATCH 4/9] Revert unintentional reformat --- clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 352a56f17c3c..a19165013219 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -1672,7 +1672,6 @@ static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = { #define GET_SME_LLVM_INTRINSIC_MAP #include "clang/Basic/arm_sme_builtin_cg.inc" - #undef GET_SME_LLVM_INTRINSIC_MAP }; From e6554177f49fd269529d64d87fb4fa342bb6a522 Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Wed, 5 Mar 2025 15:15:13 +0800 Subject: [PATCH 5/9] Revert unintentional reformat --- clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index a19165013219..1343dab2e451 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -1654,9 +1654,7 @@ static const std::pair NEONEquivalentIntrinsicMap[] = { static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { #define GET_SVE_LLVM_INTRINSIC_MAP #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def" - #include "clang/Basic/arm_sve_builtin_cg.inc" - #undef GET_SVE_LLVM_INTRINSIC_MAP }; From efb01eb3f4e2d9062c2f1e2d878572813c0e4ac5 Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Wed, 5 Mar 2025 15:44:20 +0800 Subject: [PATCH 6/9] Unite template verion and non-template version --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 136 +++++++----------- 1 file changed, 53 insertions(+), 83 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 1343dab2e451..b59c64cc3395 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2226,12 +2226,19 @@ static mlir::Value vecReduceIntValue(CIRGenFunction &cgf, mlir::Value val, loc, val, builder.getConstInt(loc, cgf.SizeTy, 0)); } -static void prepareNeonCallArgs(CIRGenBuilderTy &builder, - llvm::SmallVector argTypes, - llvm::SmallVectorImpl &args, - mlir::Location loc, - bool isConstrainedFPIntrinsic, unsigned shift, - bool rightshift) { +template +static mlir::Value emitNeonCallToOp( + CIRGenBuilderTy &builder, llvm::SmallVector argTypes, + llvm::SmallVectorImpl &args, + std::optional intrinsicName, mlir::Type funcResTy, + mlir::Location loc, bool isConstrainedFPIntrinsic = false, + unsigned shift = 0, bool rightshift = false) { + // TODO: Consider removing the following unreachable when we have + // emitConstrainedFPCall feature implemented + assert(!cir::MissingFeatures::emitConstrainedFPCall()); + if (isConstrainedFPIntrinsic) + llvm_unreachable("isConstrainedFPIntrinsic NYI"); + for (unsigned j = 0; j < argTypes.size(); ++j) { if (isConstrainedFPIntrinsic) { assert(!cir::MissingFeatures::emitConstrainedFPCall()); @@ -2244,56 +2251,19 @@ static void prepareNeonCallArgs(CIRGenBuilderTy &builder, args[j] = builder.createBitcast(args[j], argTypes[j]); } } -} - -static mlir::Value emitNeonCall(CIRGenBuilderTy &builder, - llvm::SmallVector argTypes, - llvm::SmallVectorImpl &args, - llvm::StringRef intrinsicName, - mlir::Type funcResTy, mlir::Location loc, - bool isConstrainedFPIntrinsic = false, - unsigned shift = 0, bool rightshift = false) { - // TODO: Consider removing the following unreachable when we have - // emitConstrainedFPCall feature implemented - assert(!cir::MissingFeatures::emitConstrainedFPCall()); - if (isConstrainedFPIntrinsic) - llvm_unreachable("isConstrainedFPIntrinsic NYI"); - - prepareNeonCallArgs(builder, std::move(argTypes), args, loc, - isConstrainedFPIntrinsic, shift, rightshift); if (isConstrainedFPIntrinsic) { assert(!cir::MissingFeatures::emitConstrainedFPCall()); return nullptr; } - return builder - .create( - loc, builder.getStringAttr(intrinsicName), funcResTy, args) - .getResult(); -} - -// This one is similar to the function above, except for creating an operation -// from template instead of creating a LLVMIntrinsicCallOp -template -static mlir::Value -emitNeonCall(CIRGenBuilderTy &builder, llvm::SmallVector argTypes, - llvm::SmallVectorImpl &args, mlir::Type funcResTy, - mlir::Location loc, bool isConstrainedFPIntrinsic = false, - unsigned shift = 0, bool rightshift = false) { - // TODO: Consider removing the following unreachable when we have - // emitConstrainedFPCall feature implemented - assert(!cir::MissingFeatures::emitConstrainedFPCall()); - if (isConstrainedFPIntrinsic) - llvm_unreachable("isConstrainedFPIntrinsic NYI"); - - prepareNeonCallArgs(builder, std::move(argTypes), args, loc, - isConstrainedFPIntrinsic, shift, rightshift); - - if (isConstrainedFPIntrinsic) { - assert(!cir::MissingFeatures::emitConstrainedFPCall()); - return nullptr; + if constexpr (std::is_same_v) { + return builder + .create(loc, builder.getStringAttr(intrinsicName.value()), + funcResTy, args) + .getResult(); + } else { + return builder.create(loc, funcResTy, args).getResult(); } - return builder.create(loc, funcResTy, args).getResult(); } /// This function `emitCommonNeonCallPattern0` implements a common way @@ -2315,7 +2285,7 @@ emitCommonNeonCallPattern0(CIRGenFunction &cgf, llvm::StringRef intrincsName, argTypes = {funcResTy, funcResTy}; } mlir::Value res = - emitNeonCall(builder, std::move(argTypes), ops, intrincsName, funcResTy, + emitNeonCallToOp(builder, std::move(argTypes), ops, intrincsName, funcResTy, cgf.getLoc(e->getExprLoc())); mlir::Type resultType = cgf.convertType(e->getType()); return builder.createBitcast(res, resultType); @@ -2337,7 +2307,7 @@ static mlir::Value emitCommonNeonVecAcrossCall(CIRGenFunction &cgf, cir::VectorType vTy = cir::VectorType::get(&cgf.getMLIRContext(), eltTy, vecLen); llvm::SmallVector args{op}; - return emitNeonCall(builder, {vTy}, args, intrincsName, eltTy, + return emitNeonCallToOp(builder, {vTy}, args, intrincsName, eltTy, cgf.getLoc(e->getExprLoc())); } @@ -2420,7 +2390,7 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vpaddlq_v: { // The source operand type has twice as many elements of half the size. cir::VectorType narrowTy = getHalfEltSizeTwiceNumElemsVecType(builder, vTy); - return emitNeonCall(builder, {narrowTy}, ops, + return emitNeonCallToOp(builder, {narrowTy}, ops, isUnsigned ? "aarch64.neon.uaddlp" : "aarch64.neon.saddlp", vTy, getLoc(e->getExprLoc())); @@ -2431,10 +2401,10 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr( cir::VectorType srcVty = builder.getExtendedOrTruncatedElementVectorType( vTy, false, /* truncate */ mlir::cast(vTy.getEltType()).isSigned()); - ops[1] = emitNeonCall(builder, {srcVty, srcVty}, mulOps, + ops[1] = emitNeonCallToOp(builder, {srcVty, srcVty}, mulOps, "aarch64.neon.sqdmull", vTy, getLoc(e->getExprLoc())); ops.resize(2); - return emitNeonCall(builder, {vTy, vTy}, ops, + return emitNeonCallToOp(builder, {vTy, vTy}, ops, builtinID == NEON::BI__builtin_neon_vqdmlal_v ? "aarch64.neon.sqadd" : "aarch64.neon.sqsub", @@ -2472,7 +2442,7 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr( cir::VectorType mulVecT = GetNeonType(this, NeonTypeFlags(neonType.getEltType(), false, /*isQuad*/ false)); - return emitNeonCall(builder, {resTy, mulVecT, SInt32Ty}, ops, + return emitNeonCallToOp(builder, {resTy, mulVecT, SInt32Ty}, ops, (builtinID == NEON::BI__builtin_neon_vqdmulhq_lane_v || builtinID == NEON::BI__builtin_neon_vqdmulh_lane_v) ? "aarch64.neon.sqdmulh.lane" @@ -2484,7 +2454,7 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr( // These intrinsics expect signed vector type as input, but // return unsigned vector type. cir::VectorType srcTy = getSignChangedVectorType(builder, vTy); - return emitNeonCall(builder, {srcTy, srcTy}, ops, "aarch64.neon.sqshlu", + return emitNeonCallToOp(builder, {srcTy, srcTy}, ops, "aarch64.neon.sqshlu", vTy, getLoc(e->getExprLoc()), false, /* not fp constrained op */ 1, /* second arg is shift amount */ @@ -2492,7 +2462,7 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr( } case NEON::BI__builtin_neon_vrshr_n_v: case NEON::BI__builtin_neon_vrshrq_n_v: { - return emitNeonCall( + return emitNeonCallToOp( builder, {vTy, isUnsigned ? getSignChangedVectorType(builder, vTy) : vTy}, ops, isUnsigned ? "aarch64.neon.urshl" : "aarch64.neon.srshl", vTy, @@ -2698,25 +2668,25 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr( case NEON::BI__builtin_neon_vaddlvq_s32: llvm_unreachable(" neon_vaddlvq_s32 NYI "); case NEON::BI__builtin_neon_vaddlvq_u32: - return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.uaddlv", resultTy, + return emitNeonCallToOp(builder, {argTy}, ops, "aarch64.neon.uaddlv", resultTy, loc); case NEON::BI__builtin_neon_vaddv_f32: case NEON::BI__builtin_neon_vaddvq_f32: case NEON::BI__builtin_neon_vaddvq_f64: - return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.faddv", resultTy, + return emitNeonCallToOp(builder, {argTy}, ops, "aarch64.neon.faddv", resultTy, loc); case NEON::BI__builtin_neon_vaddv_s32: case NEON::BI__builtin_neon_vaddvq_s32: case NEON::BI__builtin_neon_vaddvq_s64: - return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.saddv", resultTy, + return emitNeonCallToOp(builder, {argTy}, ops, "aarch64.neon.saddv", resultTy, loc); case NEON::BI__builtin_neon_vaddv_u32: case NEON::BI__builtin_neon_vaddvq_u32: case NEON::BI__builtin_neon_vaddvq_u64: - return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.uaddv", resultTy, + return emitNeonCallToOp(builder, {argTy}, ops, "aarch64.neon.uaddv", resultTy, loc); case NEON::BI__builtin_neon_vcaged_f64: { - return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.facge", resultTy, + return emitNeonCallToOp(builder, {argTy}, ops, "aarch64.neon.facge", resultTy, loc); } case NEON::BI__builtin_neon_vcages_f32: @@ -2906,7 +2876,7 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr( cir::VectorType resVecTy = cir::VectorType::get(&(cgf.getMLIRContext()), cgf.SInt16Ty, 4); vecExtendIntValue(cgf, argVecTy, ops[0], loc); - mlir::Value result = emitNeonCall(builder, {argVecTy}, ops, + mlir::Value result = emitNeonCallToOp(builder, {argVecTy}, ops, "aarch64.neon.sqxtn", resVecTy, loc); return vecReduceIntValue(cgf, result, loc); } @@ -2937,7 +2907,7 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr( case NEON::BI__builtin_neon_vqrdmulhh_s16: llvm_unreachable(" neon_vqrdmulhh_s16 NYI "); case NEON::BI__builtin_neon_vqrdmulhs_s32: - return emitNeonCall(builder, {resultTy, resultTy}, ops, + return emitNeonCallToOp(builder, {resultTy, resultTy}, ops, "aarch64.neon.sqrdmulh", resultTy, loc); case NEON::BI__builtin_neon_vqrshlb_s8: llvm_unreachable(" neon_vqrshlb_s8 NYI "); @@ -3843,7 +3813,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, assert(APSInt && "Expected argument to be a constant"); Ops[1] = builder.getSInt64(APSInt->getZExtValue(), getLoc(E->getExprLoc())); const StringRef Intrinsic = "aarch64.neon.sqshlu"; - return emitNeonCall(builder, {IntType, IntType}, Ops, Intrinsic, IntType, + return emitNeonCallToOp(builder, {IntType, IntType}, Ops, Intrinsic, IntType, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vqshld_n_u64: @@ -3857,7 +3827,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, : "aarch64.neon.sqshl"; Ops.push_back(emitScalarExpr(E->getArg(1))); Ops[1] = builder.createIntCast(Ops[1], IntType); - return emitNeonCall(builder, {IntType, IntType}, Ops, Intrinsic, IntType, + return emitNeonCallToOp(builder, {IntType, IntType}, Ops, Intrinsic, IntType, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vrshrd_n_u64: @@ -3875,7 +3845,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, assert(APSInt && "Expected argument to be a constant"); int64_t SV = -APSInt->getSExtValue(); Ops[1] = builder.getSInt64(SV, getLoc(E->getExprLoc())); - return emitNeonCall(builder, {IntType, builder.getSInt64Ty()}, Ops, + return emitNeonCallToOp(builder, {IntType, builder.getSInt64Ty()}, Ops, Intrinsic, IntType, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vrsrad_n_u64: @@ -3892,7 +3862,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::SmallVector args = { Ops[1], builder.createIntCast(Ops[2], IntType)}; - Ops[1] = emitNeonCall(builder, {IntType, IntType}, args, Intrinsic, IntType, + Ops[1] = emitNeonCallToOp(builder, {IntType, IntType}, args, Intrinsic, IntType, getLoc(E->getExprLoc())); return builder.createAdd(Ops[0], builder.createBitcast(Ops[1], IntType)); } @@ -4040,7 +4010,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, name = "aarch64.neon.pmull"; cir::VectorType argTy = builder.getExtendedOrTruncatedElementVectorType( ty, false /* truncated */, !usgn); - return emitNeonCall(builder, {argTy, argTy}, Ops, name, ty, + return emitNeonCallToOp(builder, {argTy, argTy}, Ops, name, ty, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vmax_v: @@ -4061,7 +4031,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::StringRef name = usgn ? "aarch64.neon.umin" : "aarch64.neon.smin"; if (cir::isFPOrFPVectorTy(ty)) name = "aarch64.neon.fmin"; - return emitNeonCall(builder, {ty, ty}, Ops, name, ty, + return emitNeonCallToOp(builder, {ty, ty}, Ops, name, ty, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vminh_f16: { @@ -4072,7 +4042,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::StringRef name = usgn ? "aarch64.neon.uabd" : "aarch64.neon.sabd"; if (cir::isFPOrFPVectorTy(ty)) name = "aarch64.neon.fabd"; - return emitNeonCall(builder, {ty, ty}, Ops, name, ty, + return emitNeonCallToOp(builder, {ty, ty}, Ops, name, ty, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vpadal_v: @@ -4080,7 +4050,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, cir::VectorType argTy = getHalfEltSizeTwiceNumElemsVecType(builder, vTy); mlir::Location loc = getLoc(E->getExprLoc()); llvm::SmallVector args = {Ops[1]}; - mlir::Value tmp = emitNeonCall( + mlir::Value tmp = emitNeonCallToOp( builder, {argTy}, args, usgn ? "aarch64.neon.uaddlp" : "aarch64.neon.saddlp", vTy, loc); mlir::Value addEnd = builder.createBitcast(Ops[0], vTy); @@ -4114,13 +4084,13 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, case NEON::BI__builtin_neon_vqrshrun_n_v: // The prototype of builtin_neon_vqrshrun_n can be found at // https://developer.arm.com/architectures/instruction-sets/intrinsics/ - return emitNeonCall( + return emitNeonCallToOp( builder, {builder.getExtendedOrTruncatedElementVectorType(ty, true, true), SInt32Ty}, Ops, "aarch64.neon.sqrshrun", ty, getLoc(E->getExprLoc())); case NEON::BI__builtin_neon_vqshrn_n_v: - return emitNeonCall( + return emitNeonCallToOp( builder, {builder.getExtendedOrTruncatedElementVectorType( vTy, true /* extend */, @@ -4129,7 +4099,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, Ops, usgn ? "aarch64.neon.uqshrn" : "aarch64.neon.sqshrn", ty, getLoc(E->getExprLoc())); case NEON::BI__builtin_neon_vrshrn_n_v: - return emitNeonCall( + return emitNeonCallToOp( builder, {builder.getExtendedOrTruncatedElementVectorType( vTy, true /* extend */, @@ -4137,7 +4107,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, SInt32Ty}, Ops, "aarch64.neon.rshrn", ty, getLoc(E->getExprLoc())); case NEON::BI__builtin_neon_vqrshrn_n_v: - return emitNeonCall( + return emitNeonCallToOp( builder, {builder.getExtendedOrTruncatedElementVectorType( vTy, true /* extend */, @@ -4151,7 +4121,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, case NEON::BI__builtin_neon_vrnda_v: case NEON::BI__builtin_neon_vrndaq_v: { assert(!cir::MissingFeatures::emitConstrainedFPCall()); - return emitNeonCall(builder, {ty}, Ops, "round", ty, + return emitNeonCallToOp(builder, {ty}, Ops, "round", ty, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vrndih_f16: { @@ -4174,7 +4144,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, case NEON::BI__builtin_neon_vrndns_f32: { mlir::Value arg0 = emitScalarExpr(E->getArg(0)); args.push_back(arg0); - return emitNeonCall(builder, {arg0.getType()}, args, + return emitNeonCallToOp(builder, {arg0.getType()}, args, std::nullopt, getCIRGenModule().FloatTy, getLoc(E->getExprLoc())); } @@ -4334,7 +4304,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, cir::VectorType vTy = cir::VectorType::get(builder.getContext(), eltTy, 4); Ops.push_back(emitScalarExpr(E->getArg(0))); // This is to add across the vector elements, so wider result type needed. - Ops[0] = emitNeonCall(builder, {vTy}, Ops, + Ops[0] = emitNeonCallToOp(builder, {vTy}, Ops, usgn ? "aarch64.neon.uaddv" : "aarch64.neon.saddv", SInt32Ty, getLoc(E->getExprLoc())); return builder.createIntCast(Ops[0], eltTy); @@ -4443,7 +4413,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, mlir::Type argTy = cir::VectorType::get(builder.getContext(), usgn ? UInt16Ty : SInt16Ty, 8); llvm::SmallVector argOps = {emitScalarExpr(E->getArg(0))}; - return emitNeonCall(builder, {argTy}, argOps, + return emitNeonCallToOp(builder, {argTy}, argOps, usgn ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv", usgn ? UInt32Ty : SInt32Ty, getLoc(E->getExprLoc())); } @@ -4457,7 +4427,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, mlir::Type argTy = cir::VectorType::get(builder.getContext(), usgn ? UInt16Ty : SInt16Ty, 4); llvm::SmallVector argOps = {emitScalarExpr(E->getArg(0))}; - return emitNeonCall(builder, {argTy}, argOps, + return emitNeonCallToOp(builder, {argTy}, argOps, usgn ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv", usgn ? UInt32Ty : SInt32Ty, getLoc(E->getExprLoc())); } @@ -4487,7 +4457,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, cir::VectorType shitAmtVecTy = usgn ? getSignChangedVectorType(builder, vTy) : vTy; mlir::Value tmp = - emitNeonCall(builder, {vTy, shitAmtVecTy}, tmpOps, + emitNeonCallToOp(builder, {vTy, shitAmtVecTy}, tmpOps, usgn ? "aarch64.neon.urshl" : "aarch64.neon.srshl", vTy, getLoc(E->getExprLoc()), false, 1 /* shift amount is args[1]*/, true /* right shift */); From 6bd8f1fc4cfc02c249baee792317af973ce9d58a Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Wed, 5 Mar 2025 15:45:31 +0800 Subject: [PATCH 7/9] Restore unintentional change --- clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index b59c64cc3395..250129ae0c02 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2251,7 +2251,6 @@ static mlir::Value emitNeonCallToOp( args[j] = builder.createBitcast(args[j], argTypes[j]); } } - if (isConstrainedFPIntrinsic) { assert(!cir::MissingFeatures::emitConstrainedFPCall()); return nullptr; From d406aaeb7a8060be114a2e849faee5751e5bbd4c Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Wed, 5 Mar 2025 15:49:49 +0800 Subject: [PATCH 8/9] format code --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 157 +++++++++--------- 1 file changed, 83 insertions(+), 74 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 250129ae0c02..e853418d716e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2283,9 +2283,9 @@ emitCommonNeonCallPattern0(CIRGenFunction &cgf, llvm::StringRef intrincsName, // Thus empty argTypes really just means {funcResTy, funcResTy}. argTypes = {funcResTy, funcResTy}; } - mlir::Value res = - emitNeonCallToOp(builder, std::move(argTypes), ops, intrincsName, funcResTy, - cgf.getLoc(e->getExprLoc())); + mlir::Value res = emitNeonCallToOp( + builder, std::move(argTypes), ops, intrincsName, funcResTy, + cgf.getLoc(e->getExprLoc())); mlir::Type resultType = cgf.convertType(e->getType()); return builder.createBitcast(res, resultType); } @@ -2306,8 +2306,8 @@ static mlir::Value emitCommonNeonVecAcrossCall(CIRGenFunction &cgf, cir::VectorType vTy = cir::VectorType::get(&cgf.getMLIRContext(), eltTy, vecLen); llvm::SmallVector args{op}; - return emitNeonCallToOp(builder, {vTy}, args, intrincsName, eltTy, - cgf.getLoc(e->getExprLoc())); + return emitNeonCallToOp( + builder, {vTy}, args, intrincsName, eltTy, cgf.getLoc(e->getExprLoc())); } mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr( @@ -2389,10 +2389,10 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vpaddlq_v: { // The source operand type has twice as many elements of half the size. cir::VectorType narrowTy = getHalfEltSizeTwiceNumElemsVecType(builder, vTy); - return emitNeonCallToOp(builder, {narrowTy}, ops, - isUnsigned ? "aarch64.neon.uaddlp" - : "aarch64.neon.saddlp", - vTy, getLoc(e->getExprLoc())); + return emitNeonCallToOp( + builder, {narrowTy}, ops, + isUnsigned ? "aarch64.neon.uaddlp" : "aarch64.neon.saddlp", vTy, + getLoc(e->getExprLoc())); } case NEON::BI__builtin_neon_vqdmlal_v: case NEON::BI__builtin_neon_vqdmlsl_v: { @@ -2400,14 +2400,15 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr( cir::VectorType srcVty = builder.getExtendedOrTruncatedElementVectorType( vTy, false, /* truncate */ mlir::cast(vTy.getEltType()).isSigned()); - ops[1] = emitNeonCallToOp(builder, {srcVty, srcVty}, mulOps, - "aarch64.neon.sqdmull", vTy, getLoc(e->getExprLoc())); + ops[1] = emitNeonCallToOp( + builder, {srcVty, srcVty}, mulOps, "aarch64.neon.sqdmull", vTy, + getLoc(e->getExprLoc())); ops.resize(2); - return emitNeonCallToOp(builder, {vTy, vTy}, ops, - builtinID == NEON::BI__builtin_neon_vqdmlal_v - ? "aarch64.neon.sqadd" - : "aarch64.neon.sqsub", - vTy, getLoc(e->getExprLoc())); + return emitNeonCallToOp( + builder, {vTy, vTy}, ops, + builtinID == NEON::BI__builtin_neon_vqdmlal_v ? "aarch64.neon.sqadd" + : "aarch64.neon.sqsub", + vTy, getLoc(e->getExprLoc())); } case NEON::BI__builtin_neon_vcvt_f32_v: case NEON::BI__builtin_neon_vcvtq_f32_v: { @@ -2441,23 +2442,24 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr( cir::VectorType mulVecT = GetNeonType(this, NeonTypeFlags(neonType.getEltType(), false, /*isQuad*/ false)); - return emitNeonCallToOp(builder, {resTy, mulVecT, SInt32Ty}, ops, - (builtinID == NEON::BI__builtin_neon_vqdmulhq_lane_v || - builtinID == NEON::BI__builtin_neon_vqdmulh_lane_v) - ? "aarch64.neon.sqdmulh.lane" - : "aarch64.neon.sqrdmulh.lane", - resTy, getLoc(e->getExprLoc())); + return emitNeonCallToOp( + builder, {resTy, mulVecT, SInt32Ty}, ops, + (builtinID == NEON::BI__builtin_neon_vqdmulhq_lane_v || + builtinID == NEON::BI__builtin_neon_vqdmulh_lane_v) + ? "aarch64.neon.sqdmulh.lane" + : "aarch64.neon.sqrdmulh.lane", + resTy, getLoc(e->getExprLoc())); } case NEON::BI__builtin_neon_vqshlu_n_v: case NEON::BI__builtin_neon_vqshluq_n_v: { // These intrinsics expect signed vector type as input, but // return unsigned vector type. cir::VectorType srcTy = getSignChangedVectorType(builder, vTy); - return emitNeonCallToOp(builder, {srcTy, srcTy}, ops, "aarch64.neon.sqshlu", - vTy, getLoc(e->getExprLoc()), - false, /* not fp constrained op */ - 1, /* second arg is shift amount */ - false /* leftshift */); + return emitNeonCallToOp( + builder, {srcTy, srcTy}, ops, "aarch64.neon.sqshlu", vTy, + getLoc(e->getExprLoc()), false, /* not fp constrained op */ + 1, /* second arg is shift amount */ + false /* leftshift */); } case NEON::BI__builtin_neon_vrshr_n_v: case NEON::BI__builtin_neon_vrshrq_n_v: { @@ -2667,26 +2669,26 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr( case NEON::BI__builtin_neon_vaddlvq_s32: llvm_unreachable(" neon_vaddlvq_s32 NYI "); case NEON::BI__builtin_neon_vaddlvq_u32: - return emitNeonCallToOp(builder, {argTy}, ops, "aarch64.neon.uaddlv", resultTy, - loc); + return emitNeonCallToOp( + builder, {argTy}, ops, "aarch64.neon.uaddlv", resultTy, loc); case NEON::BI__builtin_neon_vaddv_f32: case NEON::BI__builtin_neon_vaddvq_f32: case NEON::BI__builtin_neon_vaddvq_f64: - return emitNeonCallToOp(builder, {argTy}, ops, "aarch64.neon.faddv", resultTy, - loc); + return emitNeonCallToOp( + builder, {argTy}, ops, "aarch64.neon.faddv", resultTy, loc); case NEON::BI__builtin_neon_vaddv_s32: case NEON::BI__builtin_neon_vaddvq_s32: case NEON::BI__builtin_neon_vaddvq_s64: - return emitNeonCallToOp(builder, {argTy}, ops, "aarch64.neon.saddv", resultTy, - loc); + return emitNeonCallToOp( + builder, {argTy}, ops, "aarch64.neon.saddv", resultTy, loc); case NEON::BI__builtin_neon_vaddv_u32: case NEON::BI__builtin_neon_vaddvq_u32: case NEON::BI__builtin_neon_vaddvq_u64: - return emitNeonCallToOp(builder, {argTy}, ops, "aarch64.neon.uaddv", resultTy, - loc); + return emitNeonCallToOp( + builder, {argTy}, ops, "aarch64.neon.uaddv", resultTy, loc); case NEON::BI__builtin_neon_vcaged_f64: { - return emitNeonCallToOp(builder, {argTy}, ops, "aarch64.neon.facge", resultTy, - loc); + return emitNeonCallToOp( + builder, {argTy}, ops, "aarch64.neon.facge", resultTy, loc); } case NEON::BI__builtin_neon_vcages_f32: llvm_unreachable(" neon_vcages_f32 NYI "); @@ -2875,8 +2877,8 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr( cir::VectorType resVecTy = cir::VectorType::get(&(cgf.getMLIRContext()), cgf.SInt16Ty, 4); vecExtendIntValue(cgf, argVecTy, ops[0], loc); - mlir::Value result = emitNeonCallToOp(builder, {argVecTy}, ops, - "aarch64.neon.sqxtn", resVecTy, loc); + mlir::Value result = emitNeonCallToOp( + builder, {argVecTy}, ops, "aarch64.neon.sqxtn", resVecTy, loc); return vecReduceIntValue(cgf, result, loc); } case NEON::BI__builtin_neon_vqmovns_u32: @@ -2906,8 +2908,9 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr( case NEON::BI__builtin_neon_vqrdmulhh_s16: llvm_unreachable(" neon_vqrdmulhh_s16 NYI "); case NEON::BI__builtin_neon_vqrdmulhs_s32: - return emitNeonCallToOp(builder, {resultTy, resultTy}, ops, - "aarch64.neon.sqrdmulh", resultTy, loc); + return emitNeonCallToOp( + builder, {resultTy, resultTy}, ops, "aarch64.neon.sqrdmulh", resultTy, + loc); case NEON::BI__builtin_neon_vqrshlb_s8: llvm_unreachable(" neon_vqrshlb_s8 NYI "); case NEON::BI__builtin_neon_vqrshlb_u8: @@ -3812,8 +3815,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, assert(APSInt && "Expected argument to be a constant"); Ops[1] = builder.getSInt64(APSInt->getZExtValue(), getLoc(E->getExprLoc())); const StringRef Intrinsic = "aarch64.neon.sqshlu"; - return emitNeonCallToOp(builder, {IntType, IntType}, Ops, Intrinsic, IntType, - getLoc(E->getExprLoc())); + return emitNeonCallToOp( + builder, {IntType, IntType}, Ops, Intrinsic, IntType, + getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vqshld_n_u64: case NEON::BI__builtin_neon_vqshld_n_s64: { @@ -3826,8 +3830,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, : "aarch64.neon.sqshl"; Ops.push_back(emitScalarExpr(E->getArg(1))); Ops[1] = builder.createIntCast(Ops[1], IntType); - return emitNeonCallToOp(builder, {IntType, IntType}, Ops, Intrinsic, IntType, - getLoc(E->getExprLoc())); + return emitNeonCallToOp( + builder, {IntType, IntType}, Ops, Intrinsic, IntType, + getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vrshrd_n_u64: case NEON::BI__builtin_neon_vrshrd_n_s64: { @@ -3844,8 +3849,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, assert(APSInt && "Expected argument to be a constant"); int64_t SV = -APSInt->getSExtValue(); Ops[1] = builder.getSInt64(SV, getLoc(E->getExprLoc())); - return emitNeonCallToOp(builder, {IntType, builder.getSInt64Ty()}, Ops, - Intrinsic, IntType, getLoc(E->getExprLoc())); + return emitNeonCallToOp( + builder, {IntType, builder.getSInt64Ty()}, Ops, Intrinsic, IntType, + getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vrsrad_n_u64: case NEON::BI__builtin_neon_vrsrad_n_s64: { @@ -3861,8 +3867,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::SmallVector args = { Ops[1], builder.createIntCast(Ops[2], IntType)}; - Ops[1] = emitNeonCallToOp(builder, {IntType, IntType}, args, Intrinsic, IntType, - getLoc(E->getExprLoc())); + Ops[1] = emitNeonCallToOp( + builder, {IntType, IntType}, args, Intrinsic, IntType, + getLoc(E->getExprLoc())); return builder.createAdd(Ops[0], builder.createBitcast(Ops[1], IntType)); } case NEON::BI__builtin_neon_vshld_n_s64: @@ -4009,8 +4016,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, name = "aarch64.neon.pmull"; cir::VectorType argTy = builder.getExtendedOrTruncatedElementVectorType( ty, false /* truncated */, !usgn); - return emitNeonCallToOp(builder, {argTy, argTy}, Ops, name, ty, - getLoc(E->getExprLoc())); + return emitNeonCallToOp( + builder, {argTy, argTy}, Ops, name, ty, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vmax_v: case NEON::BI__builtin_neon_vmaxq_v: { @@ -4030,8 +4037,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::StringRef name = usgn ? "aarch64.neon.umin" : "aarch64.neon.smin"; if (cir::isFPOrFPVectorTy(ty)) name = "aarch64.neon.fmin"; - return emitNeonCallToOp(builder, {ty, ty}, Ops, name, ty, - getLoc(E->getExprLoc())); + return emitNeonCallToOp( + builder, {ty, ty}, Ops, name, ty, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vminh_f16: { llvm_unreachable("NEON::BI__builtin_neon_vminh_f16 NYI"); @@ -4041,8 +4048,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::StringRef name = usgn ? "aarch64.neon.uabd" : "aarch64.neon.sabd"; if (cir::isFPOrFPVectorTy(ty)) name = "aarch64.neon.fabd"; - return emitNeonCallToOp(builder, {ty, ty}, Ops, name, ty, - getLoc(E->getExprLoc())); + return emitNeonCallToOp( + builder, {ty, ty}, Ops, name, ty, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vpadal_v: case NEON::BI__builtin_neon_vpadalq_v: { @@ -4120,8 +4127,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, case NEON::BI__builtin_neon_vrnda_v: case NEON::BI__builtin_neon_vrndaq_v: { assert(!cir::MissingFeatures::emitConstrainedFPCall()); - return emitNeonCallToOp(builder, {ty}, Ops, "round", ty, - getLoc(E->getExprLoc())); + return emitNeonCallToOp( + builder, {ty}, Ops, "round", ty, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vrndih_f16: { llvm_unreachable("NEON::BI__builtin_neon_vrndih_f16 NYI"); @@ -4143,9 +4150,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, case NEON::BI__builtin_neon_vrndns_f32: { mlir::Value arg0 = emitScalarExpr(E->getArg(0)); args.push_back(arg0); - return emitNeonCallToOp(builder, {arg0.getType()}, args, std::nullopt, - getCIRGenModule().FloatTy, - getLoc(E->getExprLoc())); + return emitNeonCallToOp( + builder, {arg0.getType()}, args, std::nullopt, + getCIRGenModule().FloatTy, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vrndph_f16: { llvm_unreachable("NEON::BI__builtin_neon_vrndph_f16 NYI"); @@ -4303,9 +4310,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, cir::VectorType vTy = cir::VectorType::get(builder.getContext(), eltTy, 4); Ops.push_back(emitScalarExpr(E->getArg(0))); // This is to add across the vector elements, so wider result type needed. - Ops[0] = emitNeonCallToOp(builder, {vTy}, Ops, - usgn ? "aarch64.neon.uaddv" : "aarch64.neon.saddv", - SInt32Ty, getLoc(E->getExprLoc())); + Ops[0] = emitNeonCallToOp( + builder, {vTy}, Ops, usgn ? "aarch64.neon.uaddv" : "aarch64.neon.saddv", + SInt32Ty, getLoc(E->getExprLoc())); return builder.createIntCast(Ops[0], eltTy); } case NEON::BI__builtin_neon_vaddvq_u8: @@ -4412,9 +4419,10 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, mlir::Type argTy = cir::VectorType::get(builder.getContext(), usgn ? UInt16Ty : SInt16Ty, 8); llvm::SmallVector argOps = {emitScalarExpr(E->getArg(0))}; - return emitNeonCallToOp(builder, {argTy}, argOps, - usgn ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv", - usgn ? UInt32Ty : SInt32Ty, getLoc(E->getExprLoc())); + return emitNeonCallToOp( + builder, {argTy}, argOps, + usgn ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv", + usgn ? UInt32Ty : SInt32Ty, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vaddlv_s8: { llvm_unreachable("NEON::BI__builtin_neon_vaddlv_s8 NYI"); @@ -4426,9 +4434,10 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, mlir::Type argTy = cir::VectorType::get(builder.getContext(), usgn ? UInt16Ty : SInt16Ty, 4); llvm::SmallVector argOps = {emitScalarExpr(E->getArg(0))}; - return emitNeonCallToOp(builder, {argTy}, argOps, - usgn ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv", - usgn ? UInt32Ty : SInt32Ty, getLoc(E->getExprLoc())); + return emitNeonCallToOp( + builder, {argTy}, argOps, + usgn ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv", + usgn ? UInt32Ty : SInt32Ty, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vaddlvq_s8: { llvm_unreachable("NEON::BI__builtin_neon_vaddlvq_s8 NYI"); @@ -4455,11 +4464,11 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, // Thus we have to make shift amount vec type to be signed. cir::VectorType shitAmtVecTy = usgn ? getSignChangedVectorType(builder, vTy) : vTy; - mlir::Value tmp = - emitNeonCallToOp(builder, {vTy, shitAmtVecTy}, tmpOps, - usgn ? "aarch64.neon.urshl" : "aarch64.neon.srshl", vTy, - getLoc(E->getExprLoc()), false, - 1 /* shift amount is args[1]*/, true /* right shift */); + mlir::Value tmp = emitNeonCallToOp( + builder, {vTy, shitAmtVecTy}, tmpOps, + usgn ? "aarch64.neon.urshl" : "aarch64.neon.srshl", vTy, + getLoc(E->getExprLoc()), false, 1 /* shift amount is args[1]*/, + true /* right shift */); Ops[0] = builder.createBitcast(Ops[0], vTy); return builder.createBinop(Ops[0], cir::BinOpKind::Add, tmp); } From a749d73f326ad95ffeb54fbb1047bfe7a03c15b2 Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Thu, 6 Mar 2025 15:56:27 +0800 Subject: [PATCH 9/9] add emitNeonCall back --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 175 +++++++++--------- 1 file changed, 89 insertions(+), 86 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index e853418d716e..b75b5422065b 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2265,6 +2265,18 @@ static mlir::Value emitNeonCallToOp( } } +static mlir::Value emitNeonCall(CIRGenBuilderTy &builder, + llvm::SmallVector argTypes, + llvm::SmallVectorImpl &args, + llvm::StringRef intrinsicName, + mlir::Type funcResTy, mlir::Location loc, + bool isConstrainedFPIntrinsic = false, + unsigned shift = 0, bool rightshift = false) { + return emitNeonCallToOp( + builder, std::move(argTypes), args, intrinsicName, funcResTy, loc, + isConstrainedFPIntrinsic, shift, rightshift); +} + /// This function `emitCommonNeonCallPattern0` implements a common way /// to generate neon intrinsic call that has following pattern: /// 1. There is a need to cast result of the intrinsic call back to @@ -2283,9 +2295,9 @@ emitCommonNeonCallPattern0(CIRGenFunction &cgf, llvm::StringRef intrincsName, // Thus empty argTypes really just means {funcResTy, funcResTy}. argTypes = {funcResTy, funcResTy}; } - mlir::Value res = emitNeonCallToOp( - builder, std::move(argTypes), ops, intrincsName, funcResTy, - cgf.getLoc(e->getExprLoc())); + mlir::Value res = + emitNeonCall(builder, std::move(argTypes), ops, intrincsName, funcResTy, + cgf.getLoc(e->getExprLoc())); mlir::Type resultType = cgf.convertType(e->getType()); return builder.createBitcast(res, resultType); } @@ -2306,8 +2318,8 @@ static mlir::Value emitCommonNeonVecAcrossCall(CIRGenFunction &cgf, cir::VectorType vTy = cir::VectorType::get(&cgf.getMLIRContext(), eltTy, vecLen); llvm::SmallVector args{op}; - return emitNeonCallToOp( - builder, {vTy}, args, intrincsName, eltTy, cgf.getLoc(e->getExprLoc())); + return emitNeonCall(builder, {vTy}, args, intrincsName, eltTy, + cgf.getLoc(e->getExprLoc())); } mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr( @@ -2389,10 +2401,10 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vpaddlq_v: { // The source operand type has twice as many elements of half the size. cir::VectorType narrowTy = getHalfEltSizeTwiceNumElemsVecType(builder, vTy); - return emitNeonCallToOp( - builder, {narrowTy}, ops, - isUnsigned ? "aarch64.neon.uaddlp" : "aarch64.neon.saddlp", vTy, - getLoc(e->getExprLoc())); + return emitNeonCall(builder, {narrowTy}, ops, + isUnsigned ? "aarch64.neon.uaddlp" + : "aarch64.neon.saddlp", + vTy, getLoc(e->getExprLoc())); } case NEON::BI__builtin_neon_vqdmlal_v: case NEON::BI__builtin_neon_vqdmlsl_v: { @@ -2400,15 +2412,14 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr( cir::VectorType srcVty = builder.getExtendedOrTruncatedElementVectorType( vTy, false, /* truncate */ mlir::cast(vTy.getEltType()).isSigned()); - ops[1] = emitNeonCallToOp( - builder, {srcVty, srcVty}, mulOps, "aarch64.neon.sqdmull", vTy, - getLoc(e->getExprLoc())); + ops[1] = emitNeonCall(builder, {srcVty, srcVty}, mulOps, + "aarch64.neon.sqdmull", vTy, getLoc(e->getExprLoc())); ops.resize(2); - return emitNeonCallToOp( - builder, {vTy, vTy}, ops, - builtinID == NEON::BI__builtin_neon_vqdmlal_v ? "aarch64.neon.sqadd" - : "aarch64.neon.sqsub", - vTy, getLoc(e->getExprLoc())); + return emitNeonCall(builder, {vTy, vTy}, ops, + builtinID == NEON::BI__builtin_neon_vqdmlal_v + ? "aarch64.neon.sqadd" + : "aarch64.neon.sqsub", + vTy, getLoc(e->getExprLoc())); } case NEON::BI__builtin_neon_vcvt_f32_v: case NEON::BI__builtin_neon_vcvtq_f32_v: { @@ -2442,28 +2453,27 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr( cir::VectorType mulVecT = GetNeonType(this, NeonTypeFlags(neonType.getEltType(), false, /*isQuad*/ false)); - return emitNeonCallToOp( - builder, {resTy, mulVecT, SInt32Ty}, ops, - (builtinID == NEON::BI__builtin_neon_vqdmulhq_lane_v || - builtinID == NEON::BI__builtin_neon_vqdmulh_lane_v) - ? "aarch64.neon.sqdmulh.lane" - : "aarch64.neon.sqrdmulh.lane", - resTy, getLoc(e->getExprLoc())); + return emitNeonCall(builder, {resTy, mulVecT, SInt32Ty}, ops, + (builtinID == NEON::BI__builtin_neon_vqdmulhq_lane_v || + builtinID == NEON::BI__builtin_neon_vqdmulh_lane_v) + ? "aarch64.neon.sqdmulh.lane" + : "aarch64.neon.sqrdmulh.lane", + resTy, getLoc(e->getExprLoc())); } case NEON::BI__builtin_neon_vqshlu_n_v: case NEON::BI__builtin_neon_vqshluq_n_v: { // These intrinsics expect signed vector type as input, but // return unsigned vector type. cir::VectorType srcTy = getSignChangedVectorType(builder, vTy); - return emitNeonCallToOp( - builder, {srcTy, srcTy}, ops, "aarch64.neon.sqshlu", vTy, - getLoc(e->getExprLoc()), false, /* not fp constrained op */ - 1, /* second arg is shift amount */ - false /* leftshift */); + return emitNeonCall(builder, {srcTy, srcTy}, ops, "aarch64.neon.sqshlu", + vTy, getLoc(e->getExprLoc()), + false, /* not fp constrained op */ + 1, /* second arg is shift amount */ + false /* leftshift */); } case NEON::BI__builtin_neon_vrshr_n_v: case NEON::BI__builtin_neon_vrshrq_n_v: { - return emitNeonCallToOp( + return emitNeonCall( builder, {vTy, isUnsigned ? getSignChangedVectorType(builder, vTy) : vTy}, ops, isUnsigned ? "aarch64.neon.urshl" : "aarch64.neon.srshl", vTy, @@ -2669,26 +2679,26 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr( case NEON::BI__builtin_neon_vaddlvq_s32: llvm_unreachable(" neon_vaddlvq_s32 NYI "); case NEON::BI__builtin_neon_vaddlvq_u32: - return emitNeonCallToOp( - builder, {argTy}, ops, "aarch64.neon.uaddlv", resultTy, loc); + return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.uaddlv", resultTy, + loc); case NEON::BI__builtin_neon_vaddv_f32: case NEON::BI__builtin_neon_vaddvq_f32: case NEON::BI__builtin_neon_vaddvq_f64: - return emitNeonCallToOp( - builder, {argTy}, ops, "aarch64.neon.faddv", resultTy, loc); + return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.faddv", resultTy, + loc); case NEON::BI__builtin_neon_vaddv_s32: case NEON::BI__builtin_neon_vaddvq_s32: case NEON::BI__builtin_neon_vaddvq_s64: - return emitNeonCallToOp( - builder, {argTy}, ops, "aarch64.neon.saddv", resultTy, loc); + return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.saddv", resultTy, + loc); case NEON::BI__builtin_neon_vaddv_u32: case NEON::BI__builtin_neon_vaddvq_u32: case NEON::BI__builtin_neon_vaddvq_u64: - return emitNeonCallToOp( - builder, {argTy}, ops, "aarch64.neon.uaddv", resultTy, loc); + return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.uaddv", resultTy, + loc); case NEON::BI__builtin_neon_vcaged_f64: { - return emitNeonCallToOp( - builder, {argTy}, ops, "aarch64.neon.facge", resultTy, loc); + return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.facge", resultTy, + loc); } case NEON::BI__builtin_neon_vcages_f32: llvm_unreachable(" neon_vcages_f32 NYI "); @@ -2877,8 +2887,8 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr( cir::VectorType resVecTy = cir::VectorType::get(&(cgf.getMLIRContext()), cgf.SInt16Ty, 4); vecExtendIntValue(cgf, argVecTy, ops[0], loc); - mlir::Value result = emitNeonCallToOp( - builder, {argVecTy}, ops, "aarch64.neon.sqxtn", resVecTy, loc); + mlir::Value result = emitNeonCall(builder, {argVecTy}, ops, + "aarch64.neon.sqxtn", resVecTy, loc); return vecReduceIntValue(cgf, result, loc); } case NEON::BI__builtin_neon_vqmovns_u32: @@ -2908,9 +2918,8 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr( case NEON::BI__builtin_neon_vqrdmulhh_s16: llvm_unreachable(" neon_vqrdmulhh_s16 NYI "); case NEON::BI__builtin_neon_vqrdmulhs_s32: - return emitNeonCallToOp( - builder, {resultTy, resultTy}, ops, "aarch64.neon.sqrdmulh", resultTy, - loc); + return emitNeonCall(builder, {resultTy, resultTy}, ops, + "aarch64.neon.sqrdmulh", resultTy, loc); case NEON::BI__builtin_neon_vqrshlb_s8: llvm_unreachable(" neon_vqrshlb_s8 NYI "); case NEON::BI__builtin_neon_vqrshlb_u8: @@ -3815,9 +3824,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, assert(APSInt && "Expected argument to be a constant"); Ops[1] = builder.getSInt64(APSInt->getZExtValue(), getLoc(E->getExprLoc())); const StringRef Intrinsic = "aarch64.neon.sqshlu"; - return emitNeonCallToOp( - builder, {IntType, IntType}, Ops, Intrinsic, IntType, - getLoc(E->getExprLoc())); + return emitNeonCall(builder, {IntType, IntType}, Ops, Intrinsic, IntType, + getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vqshld_n_u64: case NEON::BI__builtin_neon_vqshld_n_s64: { @@ -3830,9 +3838,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, : "aarch64.neon.sqshl"; Ops.push_back(emitScalarExpr(E->getArg(1))); Ops[1] = builder.createIntCast(Ops[1], IntType); - return emitNeonCallToOp( - builder, {IntType, IntType}, Ops, Intrinsic, IntType, - getLoc(E->getExprLoc())); + return emitNeonCall(builder, {IntType, IntType}, Ops, Intrinsic, IntType, + getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vrshrd_n_u64: case NEON::BI__builtin_neon_vrshrd_n_s64: { @@ -3849,9 +3856,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, assert(APSInt && "Expected argument to be a constant"); int64_t SV = -APSInt->getSExtValue(); Ops[1] = builder.getSInt64(SV, getLoc(E->getExprLoc())); - return emitNeonCallToOp( - builder, {IntType, builder.getSInt64Ty()}, Ops, Intrinsic, IntType, - getLoc(E->getExprLoc())); + return emitNeonCall(builder, {IntType, builder.getSInt64Ty()}, Ops, + Intrinsic, IntType, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vrsrad_n_u64: case NEON::BI__builtin_neon_vrsrad_n_s64: { @@ -3867,9 +3873,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::SmallVector args = { Ops[1], builder.createIntCast(Ops[2], IntType)}; - Ops[1] = emitNeonCallToOp( - builder, {IntType, IntType}, args, Intrinsic, IntType, - getLoc(E->getExprLoc())); + Ops[1] = emitNeonCall(builder, {IntType, IntType}, args, Intrinsic, IntType, + getLoc(E->getExprLoc())); return builder.createAdd(Ops[0], builder.createBitcast(Ops[1], IntType)); } case NEON::BI__builtin_neon_vshld_n_s64: @@ -4016,8 +4021,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, name = "aarch64.neon.pmull"; cir::VectorType argTy = builder.getExtendedOrTruncatedElementVectorType( ty, false /* truncated */, !usgn); - return emitNeonCallToOp( - builder, {argTy, argTy}, Ops, name, ty, getLoc(E->getExprLoc())); + return emitNeonCall(builder, {argTy, argTy}, Ops, name, ty, + getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vmax_v: case NEON::BI__builtin_neon_vmaxq_v: { @@ -4037,8 +4042,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::StringRef name = usgn ? "aarch64.neon.umin" : "aarch64.neon.smin"; if (cir::isFPOrFPVectorTy(ty)) name = "aarch64.neon.fmin"; - return emitNeonCallToOp( - builder, {ty, ty}, Ops, name, ty, getLoc(E->getExprLoc())); + return emitNeonCall(builder, {ty, ty}, Ops, name, ty, + getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vminh_f16: { llvm_unreachable("NEON::BI__builtin_neon_vminh_f16 NYI"); @@ -4048,15 +4053,15 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::StringRef name = usgn ? "aarch64.neon.uabd" : "aarch64.neon.sabd"; if (cir::isFPOrFPVectorTy(ty)) name = "aarch64.neon.fabd"; - return emitNeonCallToOp( - builder, {ty, ty}, Ops, name, ty, getLoc(E->getExprLoc())); + return emitNeonCall(builder, {ty, ty}, Ops, name, ty, + getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vpadal_v: case NEON::BI__builtin_neon_vpadalq_v: { cir::VectorType argTy = getHalfEltSizeTwiceNumElemsVecType(builder, vTy); mlir::Location loc = getLoc(E->getExprLoc()); llvm::SmallVector args = {Ops[1]}; - mlir::Value tmp = emitNeonCallToOp( + mlir::Value tmp = emitNeonCall( builder, {argTy}, args, usgn ? "aarch64.neon.uaddlp" : "aarch64.neon.saddlp", vTy, loc); mlir::Value addEnd = builder.createBitcast(Ops[0], vTy); @@ -4090,13 +4095,13 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, case NEON::BI__builtin_neon_vqrshrun_n_v: // The prototype of builtin_neon_vqrshrun_n can be found at // https://developer.arm.com/architectures/instruction-sets/intrinsics/ - return emitNeonCallToOp( + return emitNeonCall( builder, {builder.getExtendedOrTruncatedElementVectorType(ty, true, true), SInt32Ty}, Ops, "aarch64.neon.sqrshrun", ty, getLoc(E->getExprLoc())); case NEON::BI__builtin_neon_vqshrn_n_v: - return emitNeonCallToOp( + return emitNeonCall( builder, {builder.getExtendedOrTruncatedElementVectorType( vTy, true /* extend */, @@ -4105,7 +4110,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, Ops, usgn ? "aarch64.neon.uqshrn" : "aarch64.neon.sqshrn", ty, getLoc(E->getExprLoc())); case NEON::BI__builtin_neon_vrshrn_n_v: - return emitNeonCallToOp( + return emitNeonCall( builder, {builder.getExtendedOrTruncatedElementVectorType( vTy, true /* extend */, @@ -4113,7 +4118,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, SInt32Ty}, Ops, "aarch64.neon.rshrn", ty, getLoc(E->getExprLoc())); case NEON::BI__builtin_neon_vqrshrn_n_v: - return emitNeonCallToOp( + return emitNeonCall( builder, {builder.getExtendedOrTruncatedElementVectorType( vTy, true /* extend */, @@ -4127,8 +4132,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, case NEON::BI__builtin_neon_vrnda_v: case NEON::BI__builtin_neon_vrndaq_v: { assert(!cir::MissingFeatures::emitConstrainedFPCall()); - return emitNeonCallToOp( - builder, {ty}, Ops, "round", ty, getLoc(E->getExprLoc())); + return emitNeonCall(builder, {ty}, Ops, "round", ty, + getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vrndih_f16: { llvm_unreachable("NEON::BI__builtin_neon_vrndih_f16 NYI"); @@ -4310,9 +4315,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, cir::VectorType vTy = cir::VectorType::get(builder.getContext(), eltTy, 4); Ops.push_back(emitScalarExpr(E->getArg(0))); // This is to add across the vector elements, so wider result type needed. - Ops[0] = emitNeonCallToOp( - builder, {vTy}, Ops, usgn ? "aarch64.neon.uaddv" : "aarch64.neon.saddv", - SInt32Ty, getLoc(E->getExprLoc())); + Ops[0] = emitNeonCall(builder, {vTy}, Ops, + usgn ? "aarch64.neon.uaddv" : "aarch64.neon.saddv", + SInt32Ty, getLoc(E->getExprLoc())); return builder.createIntCast(Ops[0], eltTy); } case NEON::BI__builtin_neon_vaddvq_u8: @@ -4419,10 +4424,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, mlir::Type argTy = cir::VectorType::get(builder.getContext(), usgn ? UInt16Ty : SInt16Ty, 8); llvm::SmallVector argOps = {emitScalarExpr(E->getArg(0))}; - return emitNeonCallToOp( - builder, {argTy}, argOps, - usgn ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv", - usgn ? UInt32Ty : SInt32Ty, getLoc(E->getExprLoc())); + return emitNeonCall(builder, {argTy}, argOps, + usgn ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv", + usgn ? UInt32Ty : SInt32Ty, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vaddlv_s8: { llvm_unreachable("NEON::BI__builtin_neon_vaddlv_s8 NYI"); @@ -4434,10 +4438,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, mlir::Type argTy = cir::VectorType::get(builder.getContext(), usgn ? UInt16Ty : SInt16Ty, 4); llvm::SmallVector argOps = {emitScalarExpr(E->getArg(0))}; - return emitNeonCallToOp( - builder, {argTy}, argOps, - usgn ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv", - usgn ? UInt32Ty : SInt32Ty, getLoc(E->getExprLoc())); + return emitNeonCall(builder, {argTy}, argOps, + usgn ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv", + usgn ? UInt32Ty : SInt32Ty, getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vaddlvq_s8: { llvm_unreachable("NEON::BI__builtin_neon_vaddlvq_s8 NYI"); @@ -4464,11 +4467,11 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, // Thus we have to make shift amount vec type to be signed. cir::VectorType shitAmtVecTy = usgn ? getSignChangedVectorType(builder, vTy) : vTy; - mlir::Value tmp = emitNeonCallToOp( - builder, {vTy, shitAmtVecTy}, tmpOps, - usgn ? "aarch64.neon.urshl" : "aarch64.neon.srshl", vTy, - getLoc(E->getExprLoc()), false, 1 /* shift amount is args[1]*/, - true /* right shift */); + mlir::Value tmp = + emitNeonCall(builder, {vTy, shitAmtVecTy}, tmpOps, + usgn ? "aarch64.neon.urshl" : "aarch64.neon.srshl", vTy, + getLoc(E->getExprLoc()), false, + 1 /* shift amount is args[1]*/, true /* right shift */); Ops[0] = builder.createBitcast(Ops[0], vTy); return builder.createBinop(Ops[0], cir::BinOpKind::Add, tmp); }