[CIR][CIRGen][Neon] Make vrndns emit RoundEvenOp directly (#1434)

FantasqueX · web-flow · commit f917f3ba8a71 · 2025-03-06T15:55:30.000-08:00
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -4787,6 +4787,7 @@ def Log2Op : UnaryFPToFPBuiltinOp<"log2", "Log2Op">;
 def NearbyintOp : UnaryFPToFPBuiltinOp<"nearbyint", "NearbyintOp">;
 def RintOp : UnaryFPToFPBuiltinOp<"rint", "RintOp">;
 def RoundOp : UnaryFPToFPBuiltinOp<"round", "RoundOp">;
+def RoundEvenOp : UnaryFPToFPBuiltinOp<"roundeven", "RoundEvenOp">;
 def SinOp : UnaryFPToFPBuiltinOp<"sin", "SinOp">;
 def SqrtOp : UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp">;
 def TruncOp : UnaryFPToFPBuiltinOp<"trunc", "FTruncOp">;
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2226,13 +2226,13 @@ static mlir::Value vecReduceIntValue(CIRGenFunction &cgf, mlir::Value val,
       loc, val, builder.getConstInt(loc, cgf.SizeTy, 0));
 }
 
-mlir::Value emitNeonCall(CIRGenBuilderTy &builder,
-                         llvm::SmallVector<mlir::Type> argTypes,
-                         llvm::SmallVectorImpl<mlir::Value> &args,
-                         llvm::StringRef intrinsicName, mlir::Type funcResTy,
-                         mlir::Location loc,
-                         bool isConstrainedFPIntrinsic = false,
-                         unsigned shift = 0, bool rightshift = false) {
+template <typename Operation>
+static mlir::Value emitNeonCallToOp(
+    CIRGenBuilderTy &builder, llvm::SmallVector<mlir::Type> argTypes,
+    llvm::SmallVectorImpl<mlir::Value> &args,
+    std::optional<llvm::StringRef> intrinsicName, mlir::Type funcResTy,
+    mlir::Location loc, bool isConstrainedFPIntrinsic = false,
+    unsigned shift = 0, bool rightshift = false) {
   // TODO: Consider removing the following unreachable when we have
   // emitConstrainedFPCall feature implemented
   assert(!cir::MissingFeatures::emitConstrainedFPCall());
@@ -2255,10 +2255,26 @@ mlir::Value emitNeonCall(CIRGenBuilderTy &builder,
     assert(!cir::MissingFeatures::emitConstrainedFPCall());
     return nullptr;
   }
-  return builder
-      .create<cir::LLVMIntrinsicCallOp>(
-          loc, builder.getStringAttr(intrinsicName), funcResTy, args)
-      .getResult();
+  if constexpr (std::is_same_v<Operation, cir::LLVMIntrinsicCallOp>) {
+    return builder
+        .create<Operation>(loc, builder.getStringAttr(intrinsicName.value()),
+                           funcResTy, args)
+        .getResult();
+  } else {
+    return builder.create<Operation>(loc, funcResTy, args).getResult();
+  }
+}
+
+static mlir::Value emitNeonCall(CIRGenBuilderTy &builder,
+                                llvm::SmallVector<mlir::Type> argTypes,
+                                llvm::SmallVectorImpl<mlir::Value> &args,
+                                llvm::StringRef intrinsicName,
+                                mlir::Type funcResTy, mlir::Location loc,
+                                bool isConstrainedFPIntrinsic = false,
+                                unsigned shift = 0, bool rightshift = false) {
+  return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
+      builder, std::move(argTypes), args, intrinsicName, funcResTy, loc,
+      isConstrainedFPIntrinsic, shift, rightshift);
 }
 
 /// This function `emitCommonNeonCallPattern0` implements a common way
@@ -4139,8 +4155,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
   case NEON::BI__builtin_neon_vrndns_f32: {
     mlir::Value arg0 = emitScalarExpr(E->getArg(0));
     args.push_back(arg0);
-    return emitNeonCall(builder, {arg0.getType()}, args, "roundeven.f32",
-                        getCIRGenModule().FloatTy, getLoc(E->getExprLoc()));
+    return emitNeonCallToOp<cir::RoundEvenOp>(
+        builder, {arg0.getType()}, args, std::nullopt,
+        getCIRGenModule().FloatTy, getLoc(E->getExprLoc()));
   }
   case NEON::BI__builtin_neon_vrndph_f16: {
     llvm_unreachable("NEON::BI__builtin_neon_vrndph_f16 NYI");
diff --git a/clang/test/CIR/CodeGen/AArch64/neon-arith.c b/clang/test/CIR/CodeGen/AArch64/neon-arith.c
@@ -23,7 +23,7 @@ float32_t test_vrndns_f32(float32_t a) {
 // CIR: cir.func internal private @vrndns_f32(%arg0: !cir.float {{.*}}) -> !cir.float
 // CIR: cir.store %arg0, [[ARG_SAVE:%.*]] : !cir.float, !cir.ptr<!cir.float> 
 // CIR: [[INTRIN_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr<!cir.float>, !cir.float 
-// CIR: {{%.*}} = cir.llvm.intrinsic "roundeven.f32" [[INTRIN_ARG]] : (!cir.float)
+// CIR: {{%.*}} = cir.roundeven [[INTRIN_ARG]] : !cir.float
 // CIR: cir.return {{%.*}} : !cir.float
 
 // CIR-LABEL: test_vrndns_f32