Skip to content

Commit dae8726

Browse files
authored
[CIR][CIRGen][Builtin][Neon] Lower neon_vqshlu_n and neon_vqshluq_n (#1037)
1 parent 734d344 commit dae8726

File tree

2 files changed

+129
-61
lines changed

2 files changed

+129
-61
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

+24-9
Original file line numberDiff line numberDiff line change
@@ -2160,12 +2160,15 @@ static mlir::Value buildArmLdrexNon128Intrinsic(unsigned int builtinID,
21602160
}
21612161
}
21622162

2163-
/// Given a vector of unsigned int type `vecTy`, return a vector type of
2164-
/// signed int type with the same element type width and vector size.
2165-
static mlir::cir::VectorType getSignedVectorType(CIRGenBuilderTy &builder,
2166-
mlir::cir::VectorType vecTy) {
2163+
/// Given a vector of int type `vecTy`, return a vector type of
2164+
/// int type with the same element type width, different signedness,
2165+
/// and the same vector size.
2166+
static mlir::cir::VectorType
2167+
getSignChangedVectorType(CIRGenBuilderTy &builder,
2168+
mlir::cir::VectorType vecTy) {
21672169
auto elemTy = mlir::cast<mlir::cir::IntType>(vecTy.getEltType());
2168-
elemTy = builder.getSIntNTy(elemTy.getWidth());
2170+
elemTy = elemTy.isSigned() ? builder.getUIntNTy(elemTy.getWidth())
2171+
: builder.getSIntNTy(elemTy.getWidth());
21692172
return mlir::cir::VectorType::get(builder.getContext(), elemTy,
21702173
vecTy.getSize());
21712174
}
@@ -2354,13 +2357,25 @@ mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
23542357
: "llvm.aarch64.neon.sqrdmulh.lane",
23552358
resTy, getLoc(e->getExprLoc()));
23562359
}
2360+
case NEON::BI__builtin_neon_vqshlu_n_v:
2361+
case NEON::BI__builtin_neon_vqshluq_n_v: {
2362+
// These intrinsics expect signed vector type as input, but
2363+
// return unsigned vector type.
2364+
mlir::cir::VectorType srcTy = getSignChangedVectorType(builder, vTy);
2365+
return buildNeonCall(
2366+
builder, {srcTy, srcTy}, ops, "llvm.aarch64.neon.sqshlu", vTy,
2367+
getLoc(e->getExprLoc()), false, /* not fp constrained op */
2368+
1, /* second arg is shift amount */
2369+
false /* leftshift */);
2370+
}
23572371
case NEON::BI__builtin_neon_vrshr_n_v:
23582372
case NEON::BI__builtin_neon_vrshrq_n_v: {
23592373
return buildNeonCall(
2360-
builder, {vTy, isUnsigned ? getSignedVectorType(builder, vTy) : vTy},
2361-
ops, isUnsigned ? "llvm.aarch64.neon.urshl" : "llvm.aarch64.neon.srshl",
2362-
vTy, getLoc(e->getExprLoc()), false, /* not fp constrained op*/
2363-
1, /* second arg is shift amount */
2374+
builder,
2375+
{vTy, isUnsigned ? getSignChangedVectorType(builder, vTy) : vTy}, ops,
2376+
isUnsigned ? "llvm.aarch64.neon.urshl" : "llvm.aarch64.neon.srshl", vTy,
2377+
getLoc(e->getExprLoc()), false, /* not fp constrained op*/
2378+
1, /* second arg is shift amount */
23642379
true /* rightshift */);
23652380
}
23662381
case NEON::BI__builtin_neon_vshl_n_v:

clang/test/CIR/CodeGen/AArch64/neon.c

+105-52
Original file line numberDiff line numberDiff line change
@@ -6038,64 +6038,117 @@ uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
60386038
// return vsliq_n_p16(a, b, 15);
60396039
// }
60406040

6041-
// NYI-LABEL: @test_vqshlu_n_s8(
6042-
// NYI: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
6043-
// NYI: ret <8 x i8> [[VQSHLU_N]]
6044-
// uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
6045-
// return vqshlu_n_s8(a, 3);
6046-
// }
6041+
uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
6042+
return vqshlu_n_s8(a, 3);
60476043

6048-
// NYI-LABEL: @test_vqshlu_n_s16(
6049-
// NYI: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6050-
// NYI: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6051-
// NYI: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
6052-
// NYI: ret <4 x i16> [[VQSHLU_N1]]
6053-
// uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
6054-
// return vqshlu_n_s16(a, 3);
6055-
// }
6044+
// CIR-LABEL: vqshlu_n_s8
6045+
// CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i,
6046+
// CIR: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i]> : !cir.vector<!s8i x 8>
6047+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] :
6048+
// CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!u8i x 8>
60566049

6057-
// NYI-LABEL: @test_vqshlu_n_s32(
6058-
// NYI: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6059-
// NYI: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6060-
// NYI: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
6061-
// NYI: ret <2 x i32> [[VQSHLU_N1]]
6062-
// uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
6063-
// return vqshlu_n_s32(a, 3);
6064-
// }
6050+
// LLVM: {{.*}}@test_vqshlu_n_s8(<8 x i8>{{.*}}[[A:%.*]])
6051+
// LLVM: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[A]], <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
6052+
// LLVM: ret <8 x i8> [[VQSHLU_N]]
6053+
}
60656054

6066-
// NYI-LABEL: @test_vqshluq_n_s8(
6067-
// NYI: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
6068-
// NYI: ret <16 x i8> [[VQSHLU_N]]
6069-
// uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
6070-
// return vqshluq_n_s8(a, 3);
6071-
// }
6055+
uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
6056+
return vqshlu_n_s16(a, 3);
60726057

6073-
// NYI-LABEL: @test_vqshluq_n_s16(
6074-
// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6075-
// NYI: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6076-
// NYI: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
6077-
// NYI: ret <8 x i16> [[VQSHLU_N1]]
6078-
// uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
6079-
// return vqshluq_n_s16(a, 3);
6080-
// }
6058+
// CIR-LABEL: vqshlu_n_s16
6059+
// CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s16i, #cir.int<3> : !s16i,
6060+
// CIR-SAME:#cir.int<3> : !s16i, #cir.int<3> : !s16i]> : !cir.vector<!s16i x 4>
6061+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] :
6062+
// CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!u16i x 4>
60816063

6082-
// NYI-LABEL: @test_vqshluq_n_s32(
6083-
// NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6084-
// NYI: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6085-
// NYI: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
6086-
// NYI: ret <4 x i32> [[VQSHLU_N1]]
6087-
// uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
6088-
// return vqshluq_n_s32(a, 3);
6089-
// }
6064+
// LLVM: {{.*}}@test_vqshlu_n_s16(<4 x i16>{{.*}}[[A:%.*]])
6065+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
6066+
// LLVM: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6067+
// LLVM: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
6068+
// LLVM: ret <4 x i16> [[VQSHLU_N1]]
6069+
}
60906070

6091-
// NYI-LABEL: @test_vqshluq_n_s64(
6092-
// NYI: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6093-
// NYI: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6094-
// NYI: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>)
6095-
// NYI: ret <2 x i64> [[VQSHLU_N1]]
6096-
// uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
6097-
// return vqshluq_n_s64(a, 3);
6098-
// }
6071+
uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
6072+
return vqshlu_n_s32(a, 3);
6073+
6074+
// CIR-LABEL: vqshlu_n_s32
6075+
// CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> : !s32i]> : !cir.vector<!s32i x 2>
6076+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] :
6077+
// CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!u32i x 2>
6078+
6079+
// LLVM: {{.*}}@test_vqshlu_n_s32(<2 x i32>{{.*}}[[A:%.*]])
6080+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
6081+
// LLVM: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6082+
// LLVM: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
6083+
}
6084+
6085+
uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
6086+
return vqshluq_n_s8(a, 3);
6087+
6088+
// CIR-LABEL: vqshluq_n_s8
6089+
// CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i,
6090+
// CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i,
6091+
// CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i,
6092+
// CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i]> : !cir.vector<!s8i x 16>
6093+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] :
6094+
// CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!u8i x 16>
6095+
6096+
// LLVM: {{.*}}@test_vqshluq_n_s8(<16 x i8>{{.*}}[[A:%.*]])
6097+
// LLVM: [[VQSHLUQ_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[A]],
6098+
// LLVM-SAME: <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
6099+
// LLVM: ret <16 x i8> [[VQSHLUQ_N]]
6100+
}
6101+
6102+
uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
6103+
return vqshluq_n_s16(a, 3);
6104+
6105+
// CIR-LABEL: vqshluq_n_s16
6106+
// CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s16i, #cir.int<3> : !s16i,
6107+
// CIR-SAME: #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i,
6108+
// CIR-SAME: #cir.int<3> : !s16i, #cir.int<3> : !s16i]> : !cir.vector<!s16i x 8>
6109+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] :
6110+
// CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!u16i x 8>
6111+
6112+
// LLVM: {{.*}}@test_vqshluq_n_s16(<8 x i16>{{.*}}[[A:%.*]])
6113+
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
6114+
// LLVM: [[VQSHLUQ_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6115+
// LLVM: [[VQSHLUQ_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLUQ_N]],
6116+
// LLVM-SAME: <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
6117+
// LLVM: ret <8 x i16> [[VQSHLUQ_N1]]
6118+
}
6119+
6120+
uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
6121+
return vqshluq_n_s32(a, 3);
6122+
6123+
// CIR-LABEL: vqshluq_n_s32
6124+
// CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> : !s32i,
6125+
// CIR-SAME: #cir.int<3> : !s32i, #cir.int<3> : !s32i]> : !cir.vector<!s32i x 4>
6126+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] :
6127+
// CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!u32i x 4>
6128+
6129+
// LLVM: {{.*}}@test_vqshluq_n_s32(<4 x i32>{{.*}}[[A:%.*]])
6130+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
6131+
// LLVM: [[VQSHLUQ_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6132+
// LLVM: [[VQSHLUQ_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLUQ_N]],
6133+
// LLVM-SAME: <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
6134+
// LLVM: ret <4 x i32> [[VQSHLUQ_N1]]
6135+
}
6136+
6137+
uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
6138+
return vqshluq_n_s64(a, 3);
6139+
6140+
// CIR-LABEL: vqshluq_n_s64
6141+
// CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s64i, #cir.int<3> : !s64i]> : !cir.vector<!s64i x 2>
6142+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] :
6143+
// CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!u64i x 2>
6144+
6145+
// LLVM: {{.*}}@test_vqshluq_n_s64(<2 x i64>{{.*}}[[A:%.*]])
6146+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
6147+
// LLVM: [[VQSHLUQ_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6148+
// LLVM: [[VQSHLUQ_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLUQ_N]],
6149+
// LLVM-SAME: <2 x i64> <i64 3, i64 3>)
6150+
// LLVM: ret <2 x i64> [[VQSHLUQ_N1]]
6151+
}
60996152

61006153
// NYI-LABEL: @test_vshrn_n_s16(
61016154
// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>

0 commit comments

Comments
 (0)