[CIR][CIRGen][Builtin][Neon] Lower neon_vqmovun_v (#1012)

ghehg · web-flow · commit d6db31d24304 · 2024-10-30T11:27:59.000-07:00
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2217,6 +2217,13 @@ buildCommonNeonCallPattern0(CIRGenFunction &cgf, llvm::StringRef intrincsName,
                             llvm::SmallVectorImpl<mlir::Value> &ops,
                             mlir::Type funcResTy, const clang::CallExpr *e) {
   CIRGenBuilderTy &builder = cgf.getBuilder();
+  if (argTypes.empty()) {
+    // The most common arg types is {funcResTy, funcResTy} for neon intrinsic
+    // functions. Thus, it is as default so call site does not need to
+    // provide it. Every neon intrinsic function has at least one argument,
+    // Thus empty argTypes really just means {funcResTy, funcResTy}.
+    argTypes = {funcResTy, funcResTy};
+  }
   mlir::Value res =
       buildNeonCall(builder, std::move(argTypes), ops, intrincsName, funcResTy,
                     cgf.getLoc(e->getExprLoc()));
@@ -2359,6 +2366,7 @@ mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
   // This second switch is for the intrinsics that might have a more generic
   // codegen solution so we can use the common codegen in future.
   llvm::StringRef intrincsName;
+  llvm::SmallVector<mlir::Type> argTypes;
   switch (builtinID) {
   default:
     llvm::errs() << getAArch64SIMDIntrinsicString(builtinID) << " ";
@@ -2390,11 +2398,18 @@ mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
                        : "llvm.aarch64.neon.srhadd";
     break;
   }
+  case NEON::BI__builtin_neon_vqmovun_v: {
+    intrincsName = "llvm.aarch64.neon.sqxtun";
+    argTypes.push_back(builder.getExtendedOrTruncatedElementVectorType(
+        vTy, true /* extended */, true /* signed */));
+    break;
   }
-  if (!intrincsName.empty())
-    return buildCommonNeonCallPattern0(*this, intrincsName, {vTy, vTy}, ops,
-                                       vTy, e);
-  return nullptr;
+  }
+
+  if (intrincsName.empty())
+    return nullptr;
+  return buildCommonNeonCallPattern0(*this, intrincsName, argTypes, ops, vTy,
+                                     e);
 }
 
 mlir::Value
diff --git a/clang/test/CIR/CodeGen/AArch64/neon-misc.c b/clang/test/CIR/CodeGen/AArch64/neon-misc.c
@@ -451,3 +451,44 @@ uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
   // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!u32i x 4>, !cir.ptr<!cir.vector<!u32i x 4>>
   // LLVM: ret %struct.uint32x4x2_t {{.*}}
 }
+
+uint8x8_t test_vqmovun_s16(int16x8_t a) {
+  return vqmovun_s16(a);
+
+  // CIR-LABEL: vqmovun_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqxtun" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 8>) -> !cir.vector<!u8i x 8>
+  
+  // LLVM: {{.*}}test_vqmovun_s16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+  // LLVM: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[A]])
+  // LLVM: ret <8 x i8> [[VQMOVUN_V1_I]]
+}
+
+uint16x4_t test_vqmovun_s32(int32x4_t a) {
+  return vqmovun_s32(a);
+
+  // CIR-LABEL: vqmovun_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqxtun" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 4>) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}test_vqmovun_s32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+  // LLVM: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]])
+  // LLVM: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
+  // LLVM: ret <4 x i16> [[VQMOVUN_V1_I]]
+}
+
+uint32x2_t test_vqmovun_s64(int64x2_t a) {
+  return vqmovun_s64(a);
+
+  // CIR-LABEL: vqmovun_s64
+  // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqxtun" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s64i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}test_vqmovun_s64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+  // LLVM: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[A]])
+  // LLVM: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
+  // LLVM: ret <2 x i32> [[VQMOVUN_V1_I]]
+}