Skip to content

Commit 8d507b1

Browse files
authored
[CIR][CIRGen][Builtin][Neon] Lower neon_vaddv_s16 and neon_vaddv_u16 (#1210)
1 parent 4ae9a04 commit 8d507b1

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

+9-2
Original file line numberDiff line numberDiff line change
@@ -4183,10 +4183,17 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
41834183
llvm_unreachable("NEON::BI__builtin_neon_vaddv_s8 NYI");
41844184
}
41854185
case NEON::BI__builtin_neon_vaddv_u16:
4186-
llvm_unreachable("NEON::BI__builtin_neon_vaddv_u16 NYI");
4186+
usgn = true;
41874187
[[fallthrough]];
41884188
case NEON::BI__builtin_neon_vaddv_s16: {
4189-
llvm_unreachable("NEON::BI__builtin_neon_vaddv_s16 NYI");
4189+
cir::IntType eltTy = usgn ? UInt16Ty : SInt16Ty;
4190+
cir::VectorType vTy = cir::VectorType::get(builder.getContext(), eltTy, 4);
4191+
Ops.push_back(emitScalarExpr(E->getArg(0)));
4192+
// This is to add across the vector elements, so wider result type needed.
4193+
Ops[0] = emitNeonCall(builder, {vTy}, Ops,
4194+
usgn ? "aarch64.neon.uaddv" : "aarch64.neon.saddv",
4195+
SInt32Ty, getLoc(E->getExprLoc()));
4196+
return builder.createIntCast(Ops[0], eltTy);
41904197
}
41914198
case NEON::BI__builtin_neon_vaddvq_u8:
41924199
llvm_unreachable("NEON::BI__builtin_neon_vaddvq_u8 NYI");

clang/test/CIR/CodeGen/AArch64/neon-arith.c

+26
Original file line numberDiff line numberDiff line change
@@ -893,3 +893,29 @@ uint32_t test_vaddlvq_u16(uint16x8_t a) {
893893
// LLVM: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> [[A]])
894894
// LLVM: ret i32 [[VADDLV_I]]
895895
}
896+
897+
uint16_t test_vaddv_u16(uint16x4_t a) {
898+
return vaddv_u16(a);
899+
900+
// CIR-LABEL: vaddv_u16
901+
// CIR: [[VADDV_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddv" {{%.*}} : (!cir.vector<!u16i x 4>) -> !s32i
902+
// CIR: cir.cast(integral, [[VADDV_I]] : !s32i), !u16i
903+
904+
// LLVM: {{.*}}test_vaddv_u16(<4 x i16>{{.*}}[[A:%.*]])
905+
// LLVM: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> [[A]])
906+
// LLVM-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16
907+
// LLVM-NEXT: ret i16 [[TMP0]]
908+
}
909+
910+
int16_t test_vaddv_s16(int16x4_t a) {
911+
return vaddv_s16(a);
912+
913+
// CIR-LABEL: vaddv_s16
914+
// CIR: [[VADDV_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddv" {{%.*}} : (!cir.vector<!s16i x 4>) -> !s32i
915+
// CIR: cir.cast(integral, [[VADDV_I]] : !s32i), !s16i
916+
917+
// LLVM: {{.*}}test_vaddv_s16(<4 x i16>{{.*}}[[A:%.*]])
918+
// LLVM: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> [[A]])
919+
// LLVM-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16
920+
// LLVM-NEXT: ret i16 [[TMP0]]
921+
}

0 commit comments

Comments
 (0)