diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 628ddba99857..f98ab30d04c6 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2160,12 +2160,15 @@ static mlir::Value buildArmLdrexNon128Intrinsic(unsigned int builtinID, } } -/// Given a vector of unsigned int type `vecTy`, return a vector type of -/// signed int type with the same element type width and vector size. -static mlir::cir::VectorType getSignedVectorType(CIRGenBuilderTy &builder, - mlir::cir::VectorType vecTy) { +/// Given a vector of int type `vecTy`, return a vector type of +/// int type with the same element type width, different signedness, +/// and the same vector size. +static mlir::cir::VectorType +getSignChangedVectorType(CIRGenBuilderTy &builder, + mlir::cir::VectorType vecTy) { auto elemTy = mlir::cast(vecTy.getEltType()); - elemTy = builder.getSIntNTy(elemTy.getWidth()); + elemTy = elemTy.isSigned() ? builder.getUIntNTy(elemTy.getWidth()) + : builder.getSIntNTy(elemTy.getWidth()); return mlir::cir::VectorType::get(builder.getContext(), elemTy, vecTy.getSize()); } @@ -2354,13 +2357,25 @@ mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr( : "llvm.aarch64.neon.sqrdmulh.lane", resTy, getLoc(e->getExprLoc())); } + case NEON::BI__builtin_neon_vqshlu_n_v: + case NEON::BI__builtin_neon_vqshluq_n_v: { + // These intrinsics expect signed vector type as input, but + // return unsigned vector type. + mlir::cir::VectorType srcTy = getSignChangedVectorType(builder, vTy); + return buildNeonCall( + builder, {srcTy, srcTy}, ops, "llvm.aarch64.neon.sqshlu", vTy, + getLoc(e->getExprLoc()), false, /* not fp constrained op */ + 1, /* second arg is shift amount */ + false /* leftshift */); + } case NEON::BI__builtin_neon_vrshr_n_v: case NEON::BI__builtin_neon_vrshrq_n_v: { return buildNeonCall( - builder, {vTy, isUnsigned ? getSignedVectorType(builder, vTy) : vTy}, - ops, isUnsigned ? "llvm.aarch64.neon.urshl" : "llvm.aarch64.neon.srshl", - vTy, getLoc(e->getExprLoc()), false, /* not fp constrained op*/ - 1, /* second arg is shift amount */ + builder, + {vTy, isUnsigned ? getSignChangedVectorType(builder, vTy) : vTy}, ops, + isUnsigned ? "llvm.aarch64.neon.urshl" : "llvm.aarch64.neon.srshl", vTy, + getLoc(e->getExprLoc()), false, /* not fp constrained op*/ + 1, /* second arg is shift amount */ true /* rightshift */); } case NEON::BI__builtin_neon_vshl_n_v: diff --git a/clang/test/CIR/CodeGen/AArch64/neon.c b/clang/test/CIR/CodeGen/AArch64/neon.c index eb001fcc5262..053464b4f009 100644 --- a/clang/test/CIR/CodeGen/AArch64/neon.c +++ b/clang/test/CIR/CodeGen/AArch64/neon.c @@ -6038,64 +6038,117 @@ uint64x2_t test_vrshrq_n_u64(uint64x2_t a) { // return vsliq_n_p16(a, b, 15); // } -// NYI-LABEL: @test_vqshlu_n_s8( -// NYI: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> ) -// NYI: ret <8 x i8> [[VQSHLU_N]] -// uint8x8_t test_vqshlu_n_s8(int8x8_t a) { -// return vqshlu_n_s8(a, 3); -// } +uint8x8_t test_vqshlu_n_s8(int8x8_t a) { + return vqshlu_n_s8(a, 3); -// NYI-LABEL: @test_vqshlu_n_s16( -// NYI: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// NYI: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// NYI: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> ) -// NYI: ret <4 x i16> [[VQSHLU_N1]] -// uint16x4_t test_vqshlu_n_s16(int16x4_t a) { -// return vqshlu_n_s16(a, 3); -// } + // CIR-LABEL: vqshlu_n_s8 + // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, + // CIR: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i]> : !cir.vector + // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] : + // CIR-SAME: (!cir.vector, !cir.vector) -> !cir.vector -// NYI-LABEL: @test_vqshlu_n_s32( -// NYI: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// NYI: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// NYI: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> ) -// NYI: ret <2 x i32> [[VQSHLU_N1]] -// uint32x2_t test_vqshlu_n_s32(int32x2_t a) { -// return vqshlu_n_s32(a, 3); -// } + // LLVM: {{.*}}@test_vqshlu_n_s8(<8 x i8>{{.*}}[[A:%.*]]) + // LLVM: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[A]], <8 x i8> ) + // LLVM: ret <8 x i8> [[VQSHLU_N]] +} -// NYI-LABEL: @test_vqshluq_n_s8( -// NYI: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> ) -// NYI: ret <16 x i8> [[VQSHLU_N]] -// uint8x16_t test_vqshluq_n_s8(int8x16_t a) { -// return vqshluq_n_s8(a, 3); -// } +uint16x4_t test_vqshlu_n_s16(int16x4_t a) { + return vqshlu_n_s16(a, 3); -// NYI-LABEL: @test_vqshluq_n_s16( -// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// NYI: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// NYI: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> ) -// NYI: ret <8 x i16> [[VQSHLU_N1]] -// uint16x8_t test_vqshluq_n_s16(int16x8_t a) { -// return vqshluq_n_s16(a, 3); -// } + // CIR-LABEL: vqshlu_n_s16 + // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s16i, #cir.int<3> : !s16i, + // CIR-SAME:#cir.int<3> : !s16i, #cir.int<3> : !s16i]> : !cir.vector + // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] : + // CIR-SAME: (!cir.vector, !cir.vector) -> !cir.vector -// NYI-LABEL: @test_vqshluq_n_s32( -// NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// NYI: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// NYI: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> ) -// NYI: ret <4 x i32> [[VQSHLU_N1]] -// uint32x4_t test_vqshluq_n_s32(int32x4_t a) { -// return vqshluq_n_s32(a, 3); -// } + // LLVM: {{.*}}@test_vqshlu_n_s16(<4 x i16>{{.*}}[[A:%.*]]) + // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> + // LLVM: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> + // LLVM: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> ) + // LLVM: ret <4 x i16> [[VQSHLU_N1]] +} -// NYI-LABEL: @test_vqshluq_n_s64( -// NYI: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// NYI: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// NYI: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> ) -// NYI: ret <2 x i64> [[VQSHLU_N1]] -// uint64x2_t test_vqshluq_n_s64(int64x2_t a) { -// return vqshluq_n_s64(a, 3); -// } +uint32x2_t test_vqshlu_n_s32(int32x2_t a) { + return vqshlu_n_s32(a, 3); + + // CIR-LABEL: vqshlu_n_s32 + // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> : !s32i]> : !cir.vector + // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] : + // CIR-SAME: (!cir.vector, !cir.vector) -> !cir.vector + + // LLVM: {{.*}}@test_vqshlu_n_s32(<2 x i32>{{.*}}[[A:%.*]]) + // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> + // LLVM: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> + // LLVM: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> ) +} + +uint8x16_t test_vqshluq_n_s8(int8x16_t a) { + return vqshluq_n_s8(a, 3); + + // CIR-LABEL: vqshluq_n_s8 + // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, + // CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, + // CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, + // CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i]> : !cir.vector + // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] : + // CIR-SAME: (!cir.vector, !cir.vector) -> !cir.vector + + // LLVM: {{.*}}@test_vqshluq_n_s8(<16 x i8>{{.*}}[[A:%.*]]) + // LLVM: [[VQSHLUQ_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[A]], + // LLVM-SAME: <16 x i8> ) + // LLVM: ret <16 x i8> [[VQSHLUQ_N]] +} + +uint16x8_t test_vqshluq_n_s16(int16x8_t a) { + return vqshluq_n_s16(a, 3); + + // CIR-LABEL: vqshluq_n_s16 + // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s16i, #cir.int<3> : !s16i, + // CIR-SAME: #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i, + // CIR-SAME: #cir.int<3> : !s16i, #cir.int<3> : !s16i]> : !cir.vector + // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] : + // CIR-SAME: (!cir.vector, !cir.vector) -> !cir.vector + + // LLVM: {{.*}}@test_vqshluq_n_s16(<8 x i16>{{.*}}[[A:%.*]]) + // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> + // LLVM: [[VQSHLUQ_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> + // LLVM: [[VQSHLUQ_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLUQ_N]], + // LLVM-SAME: <8 x i16> ) + // LLVM: ret <8 x i16> [[VQSHLUQ_N1]] +} + +uint32x4_t test_vqshluq_n_s32(int32x4_t a) { + return vqshluq_n_s32(a, 3); + + // CIR-LABEL: vqshluq_n_s32 + // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> : !s32i, + // CIR-SAME: #cir.int<3> : !s32i, #cir.int<3> : !s32i]> : !cir.vector + // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] : + // CIR-SAME: (!cir.vector, !cir.vector) -> !cir.vector + + // LLVM: {{.*}}@test_vqshluq_n_s32(<4 x i32>{{.*}}[[A:%.*]]) + // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> + // LLVM: [[VQSHLUQ_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> + // LLVM: [[VQSHLUQ_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLUQ_N]], + // LLVM-SAME: <4 x i32> ) + // LLVM: ret <4 x i32> [[VQSHLUQ_N1]] +} + +uint64x2_t test_vqshluq_n_s64(int64x2_t a) { + return vqshluq_n_s64(a, 3); + + // CIR-LABEL: vqshluq_n_s64 + // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s64i, #cir.int<3> : !s64i]> : !cir.vector + // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] : + // CIR-SAME: (!cir.vector, !cir.vector) -> !cir.vector + + // LLVM: {{.*}}@test_vqshluq_n_s64(<2 x i64>{{.*}}[[A:%.*]]) + // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> + // LLVM: [[VQSHLUQ_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> + // LLVM: [[VQSHLUQ_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLUQ_N]], + // LLVM-SAME: <2 x i64> ) + // LLVM: ret <2 x i64> [[VQSHLUQ_N1]] +} // NYI-LABEL: @test_vshrn_n_s16( // NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>