@@ -2265,6 +2265,18 @@ static mlir::Value emitNeonCallToOp(
2265
2265
}
2266
2266
}
2267
2267
2268
+ static mlir::Value emitNeonCall (CIRGenBuilderTy &builder,
2269
+ llvm::SmallVector<mlir::Type> argTypes,
2270
+ llvm::SmallVectorImpl<mlir::Value> &args,
2271
+ llvm::StringRef intrinsicName,
2272
+ mlir::Type funcResTy, mlir::Location loc,
2273
+ bool isConstrainedFPIntrinsic = false ,
2274
+ unsigned shift = 0 , bool rightshift = false ) {
2275
+ return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2276
+ builder, std::move (argTypes), args, intrinsicName, funcResTy, loc,
2277
+ isConstrainedFPIntrinsic, shift, rightshift);
2278
+ }
2279
+
2268
2280
// / This function `emitCommonNeonCallPattern0` implements a common way
2269
2281
// / to generate neon intrinsic call that has following pattern:
2270
2282
// / 1. There is a need to cast result of the intrinsic call back to
@@ -2283,9 +2295,9 @@ emitCommonNeonCallPattern0(CIRGenFunction &cgf, llvm::StringRef intrincsName,
2283
2295
// Thus empty argTypes really just means {funcResTy, funcResTy}.
2284
2296
argTypes = {funcResTy, funcResTy};
2285
2297
}
2286
- mlir::Value res = emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2287
- builder, std::move (argTypes), ops, intrincsName, funcResTy,
2288
- cgf.getLoc (e->getExprLoc ()));
2298
+ mlir::Value res =
2299
+ emitNeonCall ( builder, std::move (argTypes), ops, intrincsName, funcResTy,
2300
+ cgf.getLoc (e->getExprLoc ()));
2289
2301
mlir::Type resultType = cgf.convertType (e->getType ());
2290
2302
return builder.createBitcast (res, resultType);
2291
2303
}
@@ -2306,8 +2318,8 @@ static mlir::Value emitCommonNeonVecAcrossCall(CIRGenFunction &cgf,
2306
2318
cir::VectorType vTy =
2307
2319
cir::VectorType::get (&cgf.getMLIRContext (), eltTy, vecLen);
2308
2320
llvm::SmallVector<mlir::Value, 1 > args{op};
2309
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2310
- builder, {vTy}, args, intrincsName, eltTy, cgf.getLoc (e->getExprLoc ()));
2321
+ return emitNeonCall (builder, {vTy}, args, intrincsName, eltTy,
2322
+ cgf.getLoc (e->getExprLoc ()));
2311
2323
}
2312
2324
2313
2325
mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr (
@@ -2389,26 +2401,25 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr(
2389
2401
case NEON::BI__builtin_neon_vpaddlq_v: {
2390
2402
// The source operand type has twice as many elements of half the size.
2391
2403
cir::VectorType narrowTy = getHalfEltSizeTwiceNumElemsVecType (builder, vTy);
2392
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2393
- builder, {narrowTy}, ops,
2394
- isUnsigned ? " aarch64.neon.uaddlp " : " aarch64.neon.saddlp" , vTy ,
2395
- getLoc (e->getExprLoc ()));
2404
+ return emitNeonCall (builder, {narrowTy}, ops,
2405
+ isUnsigned ? " aarch64.neon.uaddlp "
2406
+ : " aarch64.neon.saddlp" ,
2407
+ vTy, getLoc (e->getExprLoc ()));
2396
2408
}
2397
2409
case NEON::BI__builtin_neon_vqdmlal_v:
2398
2410
case NEON::BI__builtin_neon_vqdmlsl_v: {
2399
2411
llvm::SmallVector<mlir::Value, 2 > mulOps (ops.begin () + 1 , ops.end ());
2400
2412
cir::VectorType srcVty = builder.getExtendedOrTruncatedElementVectorType (
2401
2413
vTy, false , /* truncate */
2402
2414
mlir::cast<cir::IntType>(vTy.getEltType ()).isSigned ());
2403
- ops[1 ] = emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2404
- builder, {srcVty, srcVty}, mulOps, " aarch64.neon.sqdmull" , vTy,
2405
- getLoc (e->getExprLoc ()));
2415
+ ops[1 ] = emitNeonCall (builder, {srcVty, srcVty}, mulOps,
2416
+ " aarch64.neon.sqdmull" , vTy, getLoc (e->getExprLoc ()));
2406
2417
ops.resize (2 );
2407
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2408
- builder, {vTy, vTy}, ops,
2409
- builtinID == NEON::BI__builtin_neon_vqdmlal_v ? " aarch64.neon.sqadd"
2410
- : " aarch64.neon.sqsub" ,
2411
- vTy, getLoc (e->getExprLoc ()));
2418
+ return emitNeonCall (builder, {vTy, vTy}, ops,
2419
+ builtinID == NEON::BI__builtin_neon_vqdmlal_v
2420
+ ? " aarch64.neon.sqadd"
2421
+ : " aarch64.neon.sqsub" ,
2422
+ vTy, getLoc (e->getExprLoc ()));
2412
2423
}
2413
2424
case NEON::BI__builtin_neon_vcvt_f32_v:
2414
2425
case NEON::BI__builtin_neon_vcvtq_f32_v: {
@@ -2442,28 +2453,27 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr(
2442
2453
cir::VectorType mulVecT =
2443
2454
GetNeonType (this , NeonTypeFlags (neonType.getEltType (), false ,
2444
2455
/* isQuad*/ false ));
2445
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2446
- builder, {resTy, mulVecT, SInt32Ty}, ops,
2447
- (builtinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2448
- builtinID == NEON::BI__builtin_neon_vqdmulh_lane_v)
2449
- ? " aarch64.neon.sqdmulh.lane"
2450
- : " aarch64.neon.sqrdmulh.lane" ,
2451
- resTy, getLoc (e->getExprLoc ()));
2456
+ return emitNeonCall (builder, {resTy, mulVecT, SInt32Ty}, ops,
2457
+ (builtinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2458
+ builtinID == NEON::BI__builtin_neon_vqdmulh_lane_v)
2459
+ ? " aarch64.neon.sqdmulh.lane"
2460
+ : " aarch64.neon.sqrdmulh.lane" ,
2461
+ resTy, getLoc (e->getExprLoc ()));
2452
2462
}
2453
2463
case NEON::BI__builtin_neon_vqshlu_n_v:
2454
2464
case NEON::BI__builtin_neon_vqshluq_n_v: {
2455
2465
// These intrinsics expect signed vector type as input, but
2456
2466
// return unsigned vector type.
2457
2467
cir::VectorType srcTy = getSignChangedVectorType (builder, vTy);
2458
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2459
- builder, {srcTy, srcTy}, ops, " aarch64.neon.sqshlu " , vTy,
2460
- getLoc (e-> getExprLoc ()), false , /* not fp constrained op */
2461
- 1 , /* second arg is shift amount */
2462
- false /* leftshift */ );
2468
+ return emitNeonCall (builder, {srcTy, srcTy}, ops, " aarch64.neon.sqshlu " ,
2469
+ vTy, getLoc (e-> getExprLoc ()) ,
2470
+ false , /* not fp constrained op */
2471
+ 1 , /* second arg is shift amount */
2472
+ false /* leftshift */ );
2463
2473
}
2464
2474
case NEON::BI__builtin_neon_vrshr_n_v:
2465
2475
case NEON::BI__builtin_neon_vrshrq_n_v: {
2466
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp> (
2476
+ return emitNeonCall (
2467
2477
builder,
2468
2478
{vTy, isUnsigned ? getSignChangedVectorType (builder, vTy) : vTy}, ops,
2469
2479
isUnsigned ? " aarch64.neon.urshl" : " aarch64.neon.srshl" , vTy,
@@ -2669,26 +2679,26 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr(
2669
2679
case NEON::BI__builtin_neon_vaddlvq_s32:
2670
2680
llvm_unreachable (" neon_vaddlvq_s32 NYI " );
2671
2681
case NEON::BI__builtin_neon_vaddlvq_u32:
2672
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2673
- builder, {argTy}, ops, " aarch64.neon.uaddlv " , resultTy, loc);
2682
+ return emitNeonCall (builder, {argTy}, ops, " aarch64.neon.uaddlv " , resultTy,
2683
+ loc);
2674
2684
case NEON::BI__builtin_neon_vaddv_f32:
2675
2685
case NEON::BI__builtin_neon_vaddvq_f32:
2676
2686
case NEON::BI__builtin_neon_vaddvq_f64:
2677
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2678
- builder, {argTy}, ops, " aarch64.neon.faddv " , resultTy, loc);
2687
+ return emitNeonCall (builder, {argTy}, ops, " aarch64.neon.faddv " , resultTy,
2688
+ loc);
2679
2689
case NEON::BI__builtin_neon_vaddv_s32:
2680
2690
case NEON::BI__builtin_neon_vaddvq_s32:
2681
2691
case NEON::BI__builtin_neon_vaddvq_s64:
2682
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2683
- builder, {argTy}, ops, " aarch64.neon.saddv " , resultTy, loc);
2692
+ return emitNeonCall (builder, {argTy}, ops, " aarch64.neon.saddv " , resultTy,
2693
+ loc);
2684
2694
case NEON::BI__builtin_neon_vaddv_u32:
2685
2695
case NEON::BI__builtin_neon_vaddvq_u32:
2686
2696
case NEON::BI__builtin_neon_vaddvq_u64:
2687
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2688
- builder, {argTy}, ops, " aarch64.neon.uaddv " , resultTy, loc);
2697
+ return emitNeonCall (builder, {argTy}, ops, " aarch64.neon.uaddv " , resultTy,
2698
+ loc);
2689
2699
case NEON::BI__builtin_neon_vcaged_f64: {
2690
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2691
- builder, {argTy}, ops, " aarch64.neon.facge " , resultTy, loc);
2700
+ return emitNeonCall (builder, {argTy}, ops, " aarch64.neon.facge " , resultTy,
2701
+ loc);
2692
2702
}
2693
2703
case NEON::BI__builtin_neon_vcages_f32:
2694
2704
llvm_unreachable (" neon_vcages_f32 NYI " );
@@ -2877,8 +2887,8 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr(
2877
2887
cir::VectorType resVecTy =
2878
2888
cir::VectorType::get (&(cgf.getMLIRContext ()), cgf.SInt16Ty , 4 );
2879
2889
vecExtendIntValue (cgf, argVecTy, ops[0 ], loc);
2880
- mlir::Value result = emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2881
- builder, {argVecTy}, ops, " aarch64.neon.sqxtn" , resVecTy, loc);
2890
+ mlir::Value result = emitNeonCall (builder, {argVecTy}, ops,
2891
+ " aarch64.neon.sqxtn" , resVecTy, loc);
2882
2892
return vecReduceIntValue (cgf, result, loc);
2883
2893
}
2884
2894
case NEON::BI__builtin_neon_vqmovns_u32:
@@ -2908,9 +2918,8 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr(
2908
2918
case NEON::BI__builtin_neon_vqrdmulhh_s16:
2909
2919
llvm_unreachable (" neon_vqrdmulhh_s16 NYI " );
2910
2920
case NEON::BI__builtin_neon_vqrdmulhs_s32:
2911
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
2912
- builder, {resultTy, resultTy}, ops, " aarch64.neon.sqrdmulh" , resultTy,
2913
- loc);
2921
+ return emitNeonCall (builder, {resultTy, resultTy}, ops,
2922
+ " aarch64.neon.sqrdmulh" , resultTy, loc);
2914
2923
case NEON::BI__builtin_neon_vqrshlb_s8:
2915
2924
llvm_unreachable (" neon_vqrshlb_s8 NYI " );
2916
2925
case NEON::BI__builtin_neon_vqrshlb_u8:
@@ -3815,9 +3824,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
3815
3824
assert (APSInt && " Expected argument to be a constant" );
3816
3825
Ops[1 ] = builder.getSInt64 (APSInt->getZExtValue (), getLoc (E->getExprLoc ()));
3817
3826
const StringRef Intrinsic = " aarch64.neon.sqshlu" ;
3818
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
3819
- builder, {IntType, IntType}, Ops, Intrinsic, IntType,
3820
- getLoc (E->getExprLoc ()));
3827
+ return emitNeonCall (builder, {IntType, IntType}, Ops, Intrinsic, IntType,
3828
+ getLoc (E->getExprLoc ()));
3821
3829
}
3822
3830
case NEON::BI__builtin_neon_vqshld_n_u64:
3823
3831
case NEON::BI__builtin_neon_vqshld_n_s64: {
@@ -3830,9 +3838,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
3830
3838
: " aarch64.neon.sqshl" ;
3831
3839
Ops.push_back (emitScalarExpr (E->getArg (1 )));
3832
3840
Ops[1 ] = builder.createIntCast (Ops[1 ], IntType);
3833
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
3834
- builder, {IntType, IntType}, Ops, Intrinsic, IntType,
3835
- getLoc (E->getExprLoc ()));
3841
+ return emitNeonCall (builder, {IntType, IntType}, Ops, Intrinsic, IntType,
3842
+ getLoc (E->getExprLoc ()));
3836
3843
}
3837
3844
case NEON::BI__builtin_neon_vrshrd_n_u64:
3838
3845
case NEON::BI__builtin_neon_vrshrd_n_s64: {
@@ -3849,9 +3856,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
3849
3856
assert (APSInt && " Expected argument to be a constant" );
3850
3857
int64_t SV = -APSInt->getSExtValue ();
3851
3858
Ops[1 ] = builder.getSInt64 (SV, getLoc (E->getExprLoc ()));
3852
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
3853
- builder, {IntType, builder.getSInt64Ty ()}, Ops, Intrinsic, IntType,
3854
- getLoc (E->getExprLoc ()));
3859
+ return emitNeonCall (builder, {IntType, builder.getSInt64Ty ()}, Ops,
3860
+ Intrinsic, IntType, getLoc (E->getExprLoc ()));
3855
3861
}
3856
3862
case NEON::BI__builtin_neon_vrsrad_n_u64:
3857
3863
case NEON::BI__builtin_neon_vrsrad_n_s64: {
@@ -3867,9 +3873,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
3867
3873
3868
3874
llvm::SmallVector<mlir::Value, 2 > args = {
3869
3875
Ops[1 ], builder.createIntCast (Ops[2 ], IntType)};
3870
- Ops[1 ] = emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
3871
- builder, {IntType, IntType}, args, Intrinsic, IntType,
3872
- getLoc (E->getExprLoc ()));
3876
+ Ops[1 ] = emitNeonCall (builder, {IntType, IntType}, args, Intrinsic, IntType,
3877
+ getLoc (E->getExprLoc ()));
3873
3878
return builder.createAdd (Ops[0 ], builder.createBitcast (Ops[1 ], IntType));
3874
3879
}
3875
3880
case NEON::BI__builtin_neon_vshld_n_s64:
@@ -4016,8 +4021,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
4016
4021
name = " aarch64.neon.pmull" ;
4017
4022
cir::VectorType argTy = builder.getExtendedOrTruncatedElementVectorType (
4018
4023
ty, false /* truncated */ , !usgn);
4019
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
4020
- builder, {argTy, argTy}, Ops, name, ty, getLoc (E->getExprLoc ()));
4024
+ return emitNeonCall (builder, {argTy, argTy}, Ops, name, ty,
4025
+ getLoc (E->getExprLoc ()));
4021
4026
}
4022
4027
case NEON::BI__builtin_neon_vmax_v:
4023
4028
case NEON::BI__builtin_neon_vmaxq_v: {
@@ -4037,8 +4042,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
4037
4042
llvm::StringRef name = usgn ? " aarch64.neon.umin" : " aarch64.neon.smin" ;
4038
4043
if (cir::isFPOrFPVectorTy (ty))
4039
4044
name = " aarch64.neon.fmin" ;
4040
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
4041
- builder, {ty, ty}, Ops, name, ty, getLoc (E->getExprLoc ()));
4045
+ return emitNeonCall (builder, {ty, ty}, Ops, name, ty,
4046
+ getLoc (E->getExprLoc ()));
4042
4047
}
4043
4048
case NEON::BI__builtin_neon_vminh_f16: {
4044
4049
llvm_unreachable (" NEON::BI__builtin_neon_vminh_f16 NYI" );
@@ -4048,15 +4053,15 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
4048
4053
llvm::StringRef name = usgn ? " aarch64.neon.uabd" : " aarch64.neon.sabd" ;
4049
4054
if (cir::isFPOrFPVectorTy (ty))
4050
4055
name = " aarch64.neon.fabd" ;
4051
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
4052
- builder, {ty, ty}, Ops, name, ty, getLoc (E->getExprLoc ()));
4056
+ return emitNeonCall (builder, {ty, ty}, Ops, name, ty,
4057
+ getLoc (E->getExprLoc ()));
4053
4058
}
4054
4059
case NEON::BI__builtin_neon_vpadal_v:
4055
4060
case NEON::BI__builtin_neon_vpadalq_v: {
4056
4061
cir::VectorType argTy = getHalfEltSizeTwiceNumElemsVecType (builder, vTy);
4057
4062
mlir::Location loc = getLoc (E->getExprLoc ());
4058
4063
llvm::SmallVector<mlir::Value, 1 > args = {Ops[1 ]};
4059
- mlir::Value tmp = emitNeonCallToOp<cir::LLVMIntrinsicCallOp> (
4064
+ mlir::Value tmp = emitNeonCall (
4060
4065
builder, {argTy}, args,
4061
4066
usgn ? " aarch64.neon.uaddlp" : " aarch64.neon.saddlp" , vTy, loc);
4062
4067
mlir::Value addEnd = builder.createBitcast (Ops[0 ], vTy);
@@ -4090,13 +4095,13 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
4090
4095
case NEON::BI__builtin_neon_vqrshrun_n_v:
4091
4096
// The prototype of builtin_neon_vqrshrun_n can be found at
4092
4097
// https://developer.arm.com/architectures/instruction-sets/intrinsics/
4093
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp> (
4098
+ return emitNeonCall (
4094
4099
builder,
4095
4100
{builder.getExtendedOrTruncatedElementVectorType (ty, true , true ),
4096
4101
SInt32Ty},
4097
4102
Ops, " aarch64.neon.sqrshrun" , ty, getLoc (E->getExprLoc ()));
4098
4103
case NEON::BI__builtin_neon_vqshrn_n_v:
4099
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp> (
4104
+ return emitNeonCall (
4100
4105
builder,
4101
4106
{builder.getExtendedOrTruncatedElementVectorType (
4102
4107
vTy, true /* extend */ ,
@@ -4105,15 +4110,15 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
4105
4110
Ops, usgn ? " aarch64.neon.uqshrn" : " aarch64.neon.sqshrn" , ty,
4106
4111
getLoc (E->getExprLoc ()));
4107
4112
case NEON::BI__builtin_neon_vrshrn_n_v:
4108
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp> (
4113
+ return emitNeonCall (
4109
4114
builder,
4110
4115
{builder.getExtendedOrTruncatedElementVectorType (
4111
4116
vTy, true /* extend */ ,
4112
4117
mlir::cast<cir::IntType>(vTy.getEltType ()).isSigned ()),
4113
4118
SInt32Ty},
4114
4119
Ops, " aarch64.neon.rshrn" , ty, getLoc (E->getExprLoc ()));
4115
4120
case NEON::BI__builtin_neon_vqrshrn_n_v:
4116
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp> (
4121
+ return emitNeonCall (
4117
4122
builder,
4118
4123
{builder.getExtendedOrTruncatedElementVectorType (
4119
4124
vTy, true /* extend */ ,
@@ -4127,8 +4132,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
4127
4132
case NEON::BI__builtin_neon_vrnda_v:
4128
4133
case NEON::BI__builtin_neon_vrndaq_v: {
4129
4134
assert (!cir::MissingFeatures::emitConstrainedFPCall ());
4130
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
4131
- builder, {ty}, Ops, " round " , ty, getLoc (E->getExprLoc ()));
4135
+ return emitNeonCall (builder, {ty}, Ops, " round " , ty,
4136
+ getLoc (E->getExprLoc ()));
4132
4137
}
4133
4138
case NEON::BI__builtin_neon_vrndih_f16: {
4134
4139
llvm_unreachable (" NEON::BI__builtin_neon_vrndih_f16 NYI" );
@@ -4310,9 +4315,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
4310
4315
cir::VectorType vTy = cir::VectorType::get (builder.getContext (), eltTy, 4 );
4311
4316
Ops.push_back (emitScalarExpr (E->getArg (0 )));
4312
4317
// This is to add across the vector elements, so wider result type needed.
4313
- Ops[0 ] = emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
4314
- builder, {vTy}, Ops, usgn ? " aarch64.neon.uaddv" : " aarch64.neon.saddv" ,
4315
- SInt32Ty, getLoc (E->getExprLoc ()));
4318
+ Ops[0 ] = emitNeonCall (builder, {vTy}, Ops,
4319
+ usgn ? " aarch64.neon.uaddv" : " aarch64.neon.saddv" ,
4320
+ SInt32Ty, getLoc (E->getExprLoc ()));
4316
4321
return builder.createIntCast (Ops[0 ], eltTy);
4317
4322
}
4318
4323
case NEON::BI__builtin_neon_vaddvq_u8:
@@ -4419,10 +4424,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
4419
4424
mlir::Type argTy = cir::VectorType::get (builder.getContext (),
4420
4425
usgn ? UInt16Ty : SInt16Ty, 8 );
4421
4426
llvm::SmallVector<mlir::Value, 1 > argOps = {emitScalarExpr (E->getArg (0 ))};
4422
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
4423
- builder, {argTy}, argOps,
4424
- usgn ? " aarch64.neon.uaddlv" : " aarch64.neon.saddlv" ,
4425
- usgn ? UInt32Ty : SInt32Ty, getLoc (E->getExprLoc ()));
4427
+ return emitNeonCall (builder, {argTy}, argOps,
4428
+ usgn ? " aarch64.neon.uaddlv" : " aarch64.neon.saddlv" ,
4429
+ usgn ? UInt32Ty : SInt32Ty, getLoc (E->getExprLoc ()));
4426
4430
}
4427
4431
case NEON::BI__builtin_neon_vaddlv_s8: {
4428
4432
llvm_unreachable (" NEON::BI__builtin_neon_vaddlv_s8 NYI" );
@@ -4434,10 +4438,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
4434
4438
mlir::Type argTy = cir::VectorType::get (builder.getContext (),
4435
4439
usgn ? UInt16Ty : SInt16Ty, 4 );
4436
4440
llvm::SmallVector<mlir::Value, 1 > argOps = {emitScalarExpr (E->getArg (0 ))};
4437
- return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
4438
- builder, {argTy}, argOps,
4439
- usgn ? " aarch64.neon.uaddlv" : " aarch64.neon.saddlv" ,
4440
- usgn ? UInt32Ty : SInt32Ty, getLoc (E->getExprLoc ()));
4441
+ return emitNeonCall (builder, {argTy}, argOps,
4442
+ usgn ? " aarch64.neon.uaddlv" : " aarch64.neon.saddlv" ,
4443
+ usgn ? UInt32Ty : SInt32Ty, getLoc (E->getExprLoc ()));
4441
4444
}
4442
4445
case NEON::BI__builtin_neon_vaddlvq_s8: {
4443
4446
llvm_unreachable (" NEON::BI__builtin_neon_vaddlvq_s8 NYI" );
@@ -4464,11 +4467,11 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
4464
4467
// Thus we have to make shift amount vec type to be signed.
4465
4468
cir::VectorType shitAmtVecTy =
4466
4469
usgn ? getSignChangedVectorType (builder, vTy) : vTy;
4467
- mlir::Value tmp = emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
4468
- builder, {vTy, shitAmtVecTy}, tmpOps,
4469
- usgn ? " aarch64.neon.urshl" : " aarch64.neon.srshl" , vTy,
4470
- getLoc (E->getExprLoc ()), false , 1 /* shift amount is args[1] */ ,
4471
- true /* right shift */ );
4470
+ mlir::Value tmp =
4471
+ emitNeonCall ( builder, {vTy, shitAmtVecTy}, tmpOps,
4472
+ usgn ? " aarch64.neon.urshl" : " aarch64.neon.srshl" , vTy,
4473
+ getLoc (E->getExprLoc ()), false ,
4474
+ 1 /* shift amount is args[1] */ , true /* right shift */ );
4472
4475
Ops[0 ] = builder.createBitcast (Ops[0 ], vTy);
4473
4476
return builder.createBinop (Ops[0 ], cir::BinOpKind::Add, tmp);
4474
4477
}
0 commit comments