Skip to content

Commit 77d060a

Browse files
ghehglanza
authored andcommitted
[CIR][CIRGen][Builtin][Neon] Lower neon_vrshl_v and neon_vrshlq_v (#1151)
They are rounding shift of vectors, and shift amount is from the least significant byte of the corresponding element of the second input vector. Thus, it is implemented in [its own ASM ](https://godbolt.org/z/v65sbeKaW). These make them not suitable to be lowered to CIR ShiftOp though it supports vector type now.
1 parent 5c2567a commit 77d060a

File tree

2 files changed

+218
-132
lines changed

2 files changed

+218
-132
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -2585,6 +2585,12 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr(
25852585
vTy, true /* extended */, true /* signed */));
25862586
break;
25872587
}
2588+
case NEON::BI__builtin_neon_vrshl_v:
2589+
case NEON::BI__builtin_neon_vrshlq_v: {
2590+
intrincsName = (intrinicId != altLLVMIntrinsic) ? "aarch64.neon.urshl"
2591+
: "aarch64.neon.srshl";
2592+
break;
2593+
}
25882594
}
25892595

25902596
if (intrincsName.empty())

clang/test/CIR/CodeGen/AArch64/neon.c

+212-132
Original file line numberDiff line numberDiff line change
@@ -3727,153 +3727,233 @@ uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
37273727
// return vqshlq_u64(a, b);
37283728
// }
37293729

3730-
// NYI-LABEL: @test_vrshl_s8(
3731-
// NYI: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3732-
// NYI: ret <8 x i8> [[VRSHL_V_I]]
3733-
// int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3734-
// return vrshl_s8(a, b);
3735-
// }
3730+
int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3731+
return vrshl_s8(a, b);
37363732

3737-
// NYI-LABEL: @test_vrshl_s16(
3738-
// NYI: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3739-
// NYI: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3740-
// NYI: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3741-
// NYI: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3742-
// NYI: ret <4 x i16> [[VRSHL_V2_I]]
3743-
// int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3744-
// return vrshl_s16(a, b);
3745-
// }
3733+
// CIR-LABEL: vrshl_s8
3734+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3735+
// CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
37463736

3747-
// NYI-LABEL: @test_vrshl_s32(
3748-
// NYI: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3749-
// NYI: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3750-
// NYI: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3751-
// NYI: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3752-
// NYI: ret <2 x i32> [[VRSHL_V2_I]]
3753-
// int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3754-
// return vrshl_s32(a, b);
3755-
// }
3737+
// LLVM: {{.*}}test_vrshl_s8(<8 x i8>{{.*}}[[a:%.*]], <8 x i8>{{.*}}[[b:%.*]])
3738+
// LLVM: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[a]], <8 x i8> [[b]])
3739+
// LLVM: ret <8 x i8> [[VRSHL_V_I]]
3740+
}
37563741

3757-
// NYI-LABEL: @test_vrshl_s64(
3758-
// NYI: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3759-
// NYI: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3760-
// NYI: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3761-
// NYI: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3762-
// NYI: ret <1 x i64> [[VRSHL_V2_I]]
3763-
// int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3764-
// return vrshl_s64(a, b);
3765-
// }
3742+
int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3743+
return vrshl_s16(a, b);
37663744

3767-
// NYI-LABEL: @test_vrshl_u8(
3768-
// NYI: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3769-
// NYI: ret <8 x i8> [[VRSHL_V_I]]
3770-
// uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3771-
// return vrshl_u8(a, b);
3772-
// }
3745+
// CIR-LABEL: vrshl_s16
3746+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3747+
// CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
37733748

3774-
// NYI-LABEL: @test_vrshl_u16(
3775-
// NYI: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3776-
// NYI: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3777-
// NYI: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3778-
// NYI: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3779-
// NYI: ret <4 x i16> [[VRSHL_V2_I]]
3780-
// uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3781-
// return vrshl_u16(a, b);
3782-
// }
3749+
// LLVM: {{.*}}test_vrshl_s16(<4 x i16>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]])
3750+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[a]] to <8 x i8>
3751+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i16> [[b]] to <8 x i8>
3752+
// LLVM: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[a]], <4 x i16> [[b]])
3753+
// LLVM: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3754+
// LLVM: ret <4 x i16> [[VRSHL_V2_I]]
3755+
}
37833756

3784-
// NYI-LABEL: @test_vrshl_u32(
3785-
// NYI: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3786-
// NYI: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3787-
// NYI: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3788-
// NYI: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3789-
// NYI: ret <2 x i32> [[VRSHL_V2_I]]
3790-
// uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3791-
// return vrshl_u32(a, b);
3792-
// }
3757+
int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3758+
return vrshl_s32(a, b);
37933759

3794-
// NYI-LABEL: @test_vrshl_u64(
3795-
// NYI: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3796-
// NYI: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3797-
// NYI: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3798-
// NYI: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3799-
// NYI: ret <1 x i64> [[VRSHL_V2_I]]
3800-
// uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3801-
// return vrshl_u64(a, b);
3802-
// }
3760+
// CIR-LABEL: vrshl_s32
3761+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3762+
// CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
38033763

3804-
// NYI-LABEL: @test_vrshlq_s8(
3805-
// NYI: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3806-
// NYI: ret <16 x i8> [[VRSHLQ_V_I]]
3807-
// int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3808-
// return vrshlq_s8(a, b);
3809-
// }
3764+
// LLVM: {{.*}}test_vrshl_s32(<2 x i32>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]])
3765+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[a]] to <8 x i8>
3766+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[b]] to <8 x i8>
3767+
// LLVM: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[a]], <2 x i32> [[b]])
3768+
// LLVM: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3769+
// LLVM: ret <2 x i32> [[VRSHL_V2_I]]
3770+
}
38103771

3811-
// NYI-LABEL: @test_vrshlq_s16(
3812-
// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3813-
// NYI: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3814-
// NYI: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3815-
// NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3816-
// NYI: ret <8 x i16> [[VRSHLQ_V2_I]]
3817-
// int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3818-
// return vrshlq_s16(a, b);
3819-
// }
3772+
int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3773+
return vrshl_s64(a, b);
38203774

3821-
// NYI-LABEL: @test_vrshlq_s32(
3822-
// NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3823-
// NYI: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3824-
// NYI: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3825-
// NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3826-
// NYI: ret <4 x i32> [[VRSHLQ_V2_I]]
3827-
// int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3828-
// return vrshlq_s32(a, b);
3829-
// }
3775+
// CIR-LABEL: vrshl_s64
3776+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3777+
// CIR-SAME: (!cir.vector<!s64i x 1>, !cir.vector<!s64i x 1>) -> !cir.vector<!s64i x 1>
38303778

3831-
// NYI-LABEL: @test_vrshlq_s64(
3832-
// NYI: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3833-
// NYI: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3834-
// NYI: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3835-
// NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3836-
// NYI: ret <2 x i64> [[VRSHLQ_V2_I]]
3837-
// int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3838-
// return vrshlq_s64(a, b);
3839-
// }
3779+
// LLVM: {{.*}}test_vrshl_s64(<1 x i64>{{.*}}[[a:%.*]], <1 x i64>{{.*}}[[b:%.*]])
3780+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[a]] to <8 x i8>
3781+
// LLVM: [[TMP1:%.*]] = bitcast <1 x i64> [[b]] to <8 x i8>
3782+
// LLVM: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[a]], <1 x i64> [[b]])
3783+
// LLVM: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3784+
// LLVM: ret <1 x i64> [[VRSHL_V2_I]]
3785+
}
38403786

3841-
// NYI-LABEL: @test_vrshlq_u8(
3842-
// NYI: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3843-
// NYI: ret <16 x i8> [[VRSHLQ_V_I]]
3844-
// uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3845-
// return vrshlq_u8(a, b);
3846-
// }
3787+
uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3788+
return vrshl_u8(a, b);
38473789

3848-
// NYI-LABEL: @test_vrshlq_u16(
3849-
// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3850-
// NYI: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3851-
// NYI: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3852-
// NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3853-
// NYI: ret <8 x i16> [[VRSHLQ_V2_I]]
3854-
// uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3855-
// return vrshlq_u16(a, b);
3856-
// }
3790+
// CIR-LABEL: vrshl_u8
3791+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3792+
// CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
38573793

3858-
// NYI-LABEL: @test_vrshlq_u32(
3859-
// NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3860-
// NYI: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3861-
// NYI: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3862-
// NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3863-
// NYI: ret <4 x i32> [[VRSHLQ_V2_I]]
3864-
// uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3865-
// return vrshlq_u32(a, b);
3866-
// }
3794+
// LLVM: {{.*}}test_vrshl_u8(<8 x i8>{{.*}}[[a:%.*]], <8 x i8>{{.*}}[[b:%.*]])
3795+
// LLVM: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[a]], <8 x i8> [[b]])
3796+
// LLVM: ret <8 x i8> [[VRSHL_V_I]]
3797+
}
38673798

3868-
// NYI-LABEL: @test_vrshlq_u64(
3869-
// NYI: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3870-
// NYI: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3871-
// NYI: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3872-
// NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3873-
// NYI: ret <2 x i64> [[VRSHLQ_V2_I]]
3874-
// uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
3875-
// return vrshlq_u64(a, b);
3876-
// }
3799+
uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3800+
return vrshl_u16(a, b);
3801+
3802+
// CIR-LABEL: vrshl_u16
3803+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3804+
// CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
3805+
3806+
// LLVM: {{.*}}test_vrshl_u16(<4 x i16>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]])
3807+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[a]] to <8 x i8>
3808+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i16> [[b]] to <8 x i8>
3809+
// LLVM: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[a]], <4 x i16> [[b]])
3810+
// LLVM: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16>
3811+
// LLVM: ret <4 x i16> [[VRSHL_V2_I]]
3812+
}
3813+
3814+
uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3815+
return vrshl_u32(a, b);
3816+
3817+
// CIR-LABEL: vrshl_u32
3818+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3819+
// CIR-SAME: (!cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
3820+
3821+
// LLVM: {{.*}}test_vrshl_u32(<2 x i32>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]])
3822+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[a]] to <8 x i8>
3823+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[b]] to <8 x i8>
3824+
// LLVM: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[a]], <2 x i32> [[b]])
3825+
// LLVM: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32>
3826+
// LLVM: ret <2 x i32> [[VRSHL_V2_I]]
3827+
}
3828+
3829+
uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3830+
return vrshl_u64(a, b);
3831+
3832+
// CIR-LABEL: vrshl_u64
3833+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3834+
// CIR-SAME: (!cir.vector<!u64i x 1>, !cir.vector<!u64i x 1>) -> !cir.vector<!u64i x 1>
3835+
3836+
// LLVM: {{.*}}test_vrshl_u64(<1 x i64>{{.*}}[[a:%.*]], <1 x i64>{{.*}}[[b:%.*]])
3837+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[a]] to <8 x i8>
3838+
// LLVM: [[TMP1:%.*]] = bitcast <1 x i64> [[b]] to <8 x i8>
3839+
// LLVM: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[a]], <1 x i64> [[b]])
3840+
// LLVM: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64>
3841+
// LLVM: ret <1 x i64> [[VRSHL_V2_I]]
3842+
}
3843+
3844+
int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3845+
return vrshlq_s8(a, b);
3846+
3847+
// CIR-LABEL: vrshlq_s8
3848+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3849+
// CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
3850+
3851+
// LLVM: {{.*}}test_vrshlq_s8(<16 x i8>{{.*}}[[a:%.*]], <16 x i8>{{.*}}[[b:%.*]])
3852+
// LLVM: [[VRSHL_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[a]], <16 x i8> [[b]])
3853+
// LLVM: ret <16 x i8> [[VRSHL_V_I]]
3854+
}
3855+
3856+
int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3857+
return vrshlq_s16(a, b);
3858+
3859+
// CIR-LABEL: vrshlq_s16
3860+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3861+
// CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
3862+
3863+
// LLVM: {{.*}}test_vrshlq_s16(<8 x i16>{{.*}}[[a:%.*]], <8 x i16>{{.*}}[[b:%.*]])
3864+
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[a]] to <16 x i8>
3865+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i16> [[b]] to <16 x i8>
3866+
// LLVM: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[a]], <8 x i16> [[b]])
3867+
// LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3868+
// LLVM: ret <8 x i16> [[VRSHLQ_V2_I]]
3869+
}
3870+
3871+
int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3872+
return vrshlq_s32(a, b);
3873+
3874+
// CIR-LABEL: vrshlq_s32
3875+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3876+
// CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
3877+
3878+
// LLVM: {{.*}}test_vrshlq_s32(<4 x i32>{{.*}}[[a:%.*]], <4 x i32>{{.*}}[[b:%.*]])
3879+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[a]] to <16 x i8>
3880+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[b]] to <16 x i8>
3881+
// LLVM: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[a]], <4 x i32> [[b]])
3882+
// LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3883+
// LLVM: ret <4 x i32> [[VRSHLQ_V2_I]]
3884+
}
3885+
3886+
int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3887+
return vrshlq_s64(a, b);
3888+
3889+
// CIR-LABEL: vrshlq_s64
3890+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3891+
// CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
3892+
3893+
// LLVM: {{.*}}test_vrshlq_s64(<2 x i64>{{.*}}[[a:%.*]], <2 x i64>{{.*}}[[b:%.*]])
3894+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[a]] to <16 x i8>
3895+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i64> [[b]] to <16 x i8>
3896+
// LLVM: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[a]], <2 x i64> [[b]])
3897+
// LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3898+
// LLVM: ret <2 x i64> [[VRSHLQ_V2_I]]
3899+
}
3900+
3901+
uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3902+
return vrshlq_u8(a, b);
3903+
3904+
// CIR-LABEL: vrshlq_u8
3905+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3906+
// CIR-SAME: (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
3907+
3908+
// LLVM: {{.*}}test_vrshlq_u8(<16 x i8>{{.*}}[[a:%.*]], <16 x i8>{{.*}}[[b:%.*]])
3909+
// LLVM: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[a]], <16 x i8> [[b]])
3910+
// LLVM: ret <16 x i8> [[VRSHLQ_V_I]]
3911+
}
3912+
3913+
uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3914+
return vrshlq_u16(a, b);
3915+
3916+
// CIR-LABEL: vrshlq_u16
3917+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3918+
// CIR-SAME: (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
3919+
3920+
// LLVM: {{.*}}test_vrshlq_u16(<8 x i16>{{.*}}[[a:%.*]], <8 x i16>{{.*}}[[b:%.*]])
3921+
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[a]] to <16 x i8>
3922+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i16> [[b]] to <16 x i8>
3923+
// LLVM: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[a]], <8 x i16> [[b]])
3924+
// LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3925+
// LLVM: ret <8 x i16> [[VRSHLQ_V2_I]]
3926+
}
3927+
3928+
uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3929+
return vrshlq_u32(a, b);
3930+
3931+
// CIR-LABEL: vrshlq_u32
3932+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3933+
// CIR-SAME: (!cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
3934+
3935+
// LLVM: {{.*}}test_vrshlq_u32(<4 x i32>{{.*}}[[a:%.*]], <4 x i32>{{.*}}[[b:%.*]])
3936+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[a]] to <16 x i8>
3937+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[b]] to <16 x i8>
3938+
// LLVM: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[a]], <4 x i32> [[b]])
3939+
// LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3940+
// LLVM: ret <4 x i32> [[VRSHLQ_V2_I]]
3941+
}
3942+
3943+
uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
3944+
return vrshlq_u64(a, b);
3945+
3946+
// CIR-LABEL: vrshlq_u64
3947+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3948+
// CIR-SAME: (!cir.vector<!u64i x 2>, !cir.vector<!u64i x 2>) -> !cir.vector<!u64i x 2>
3949+
3950+
// LLVM: {{.*}}test_vrshlq_u64(<2 x i64>{{.*}}[[a:%.*]], <2 x i64>{{.*}}[[b:%.*]])
3951+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[a]] to <16 x i8>
3952+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i64> [[b]] to <16 x i8>
3953+
// LLVM: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[a]], <2 x i64> [[b]])
3954+
// LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3955+
// LLVM: ret <2 x i64> [[VRSHLQ_V2_I]]
3956+
}
38773957

38783958
// NYI-LABEL: @test_vqrshl_s8(
38793959
// NYI: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)

0 commit comments

Comments
 (0)