@@ -3727,153 +3727,233 @@ uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
3727
3727
// return vqshlq_u64(a, b);
3728
3728
// }
3729
3729
3730
- // NYI-LABEL: @test_vrshl_s8(
3731
- // NYI: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3732
- // NYI: ret <8 x i8> [[VRSHL_V_I]]
3733
- // int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3734
- // return vrshl_s8(a, b);
3735
- // }
3730
+ int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3731
+ return vrshl_s8(a, b);
3736
3732
3737
- // NYI-LABEL: @test_vrshl_s16(
3738
- // NYI: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3739
- // NYI: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3740
- // NYI: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3741
- // NYI: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3742
- // NYI: ret <4 x i16> [[VRSHL_V2_I]]
3743
- // int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3744
- // return vrshl_s16(a, b);
3745
- // }
3733
+ // CIR-LABEL: vrshl_s8
3734
+ // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3735
+ // CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
3746
3736
3747
- // NYI-LABEL: @test_vrshl_s32(
3748
- // NYI: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3749
- // NYI: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3750
- // NYI: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3751
- // NYI: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3752
- // NYI: ret <2 x i32> [[VRSHL_V2_I]]
3753
- // int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3754
- // return vrshl_s32(a, b);
3755
- // }
3737
+ // LLVM: {{.*}}test_vrshl_s8(<8 x i8>{{.*}}[[a:%.*]], <8 x i8>{{.*}}[[b:%.*]])
3738
+ // LLVM: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[a]], <8 x i8> [[b]])
3739
+ // LLVM: ret <8 x i8> [[VRSHL_V_I]]
3740
+ }
3756
3741
3757
- // NYI-LABEL: @test_vrshl_s64(
3758
- // NYI: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3759
- // NYI: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3760
- // NYI: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3761
- // NYI: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3762
- // NYI: ret <1 x i64> [[VRSHL_V2_I]]
3763
- // int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3764
- // return vrshl_s64(a, b);
3765
- // }
3742
+ int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3743
+ return vrshl_s16(a, b);
3766
3744
3767
- // NYI-LABEL: @test_vrshl_u8(
3768
- // NYI: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3769
- // NYI: ret <8 x i8> [[VRSHL_V_I]]
3770
- // uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3771
- // return vrshl_u8(a, b);
3772
- // }
3745
+ // CIR-LABEL: vrshl_s16
3746
+ // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3747
+ // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
3773
3748
3774
- // NYI-LABEL: @test_vrshl_u16(
3775
- // NYI: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3776
- // NYI: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3777
- // NYI: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3778
- // NYI: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3779
- // NYI: ret <4 x i16> [[VRSHL_V2_I]]
3780
- // uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3781
- // return vrshl_u16(a, b);
3782
- // }
3749
+ // LLVM: {{.*}}test_vrshl_s16(<4 x i16>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]])
3750
+ // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[a]] to <8 x i8>
3751
+ // LLVM: [[TMP1:%.*]] = bitcast <4 x i16> [[b]] to <8 x i8>
3752
+ // LLVM: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[a]], <4 x i16> [[b]])
3753
+ // LLVM: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3754
+ // LLVM: ret <4 x i16> [[VRSHL_V2_I]]
3755
+ }
3783
3756
3784
- // NYI-LABEL: @test_vrshl_u32(
3785
- // NYI: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3786
- // NYI: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3787
- // NYI: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3788
- // NYI: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3789
- // NYI: ret <2 x i32> [[VRSHL_V2_I]]
3790
- // uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3791
- // return vrshl_u32(a, b);
3792
- // }
3757
+ int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3758
+ return vrshl_s32(a, b);
3793
3759
3794
- // NYI-LABEL: @test_vrshl_u64(
3795
- // NYI: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3796
- // NYI: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3797
- // NYI: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3798
- // NYI: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3799
- // NYI: ret <1 x i64> [[VRSHL_V2_I]]
3800
- // uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3801
- // return vrshl_u64(a, b);
3802
- // }
3760
+ // CIR-LABEL: vrshl_s32
3761
+ // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3762
+ // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
3803
3763
3804
- // NYI-LABEL: @test_vrshlq_s8(
3805
- // NYI: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3806
- // NYI: ret <16 x i8> [[VRSHLQ_V_I]]
3807
- // int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3808
- // return vrshlq_s8(a, b);
3809
- // }
3764
+ // LLVM: {{.*}}test_vrshl_s32(<2 x i32>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]])
3765
+ // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[a]] to <8 x i8>
3766
+ // LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[b]] to <8 x i8>
3767
+ // LLVM: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[a]], <2 x i32> [[b]])
3768
+ // LLVM: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3769
+ // LLVM: ret <2 x i32> [[VRSHL_V2_I]]
3770
+ }
3810
3771
3811
- // NYI-LABEL: @test_vrshlq_s16(
3812
- // NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3813
- // NYI: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3814
- // NYI: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3815
- // NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3816
- // NYI: ret <8 x i16> [[VRSHLQ_V2_I]]
3817
- // int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3818
- // return vrshlq_s16(a, b);
3819
- // }
3772
+ int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3773
+ return vrshl_s64(a, b);
3820
3774
3821
- // NYI-LABEL: @test_vrshlq_s32(
3822
- // NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3823
- // NYI: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3824
- // NYI: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3825
- // NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3826
- // NYI: ret <4 x i32> [[VRSHLQ_V2_I]]
3827
- // int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3828
- // return vrshlq_s32(a, b);
3829
- // }
3775
+ // CIR-LABEL: vrshl_s64
3776
+ // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3777
+ // CIR-SAME: (!cir.vector<!s64i x 1>, !cir.vector<!s64i x 1>) -> !cir.vector<!s64i x 1>
3830
3778
3831
- // NYI-LABEL: @test_vrshlq_s64(
3832
- // NYI: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3833
- // NYI: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3834
- // NYI: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3835
- // NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3836
- // NYI: ret <2 x i64> [[VRSHLQ_V2_I]]
3837
- // int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3838
- // return vrshlq_s64(a, b);
3839
- // }
3779
+ // LLVM: {{.*}}test_vrshl_s64(<1 x i64>{{.*}}[[a:%.*]], <1 x i64>{{.*}}[[b:%.*]])
3780
+ // LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[a]] to <8 x i8>
3781
+ // LLVM: [[TMP1:%.*]] = bitcast <1 x i64> [[b]] to <8 x i8>
3782
+ // LLVM: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[a]], <1 x i64> [[b]])
3783
+ // LLVM: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3784
+ // LLVM: ret <1 x i64> [[VRSHL_V2_I]]
3785
+ }
3840
3786
3841
- // NYI-LABEL: @test_vrshlq_u8(
3842
- // NYI: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3843
- // NYI: ret <16 x i8> [[VRSHLQ_V_I]]
3844
- // uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3845
- // return vrshlq_u8(a, b);
3846
- // }
3787
+ uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3788
+ return vrshl_u8(a, b);
3847
3789
3848
- // NYI-LABEL: @test_vrshlq_u16(
3849
- // NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3850
- // NYI: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3851
- // NYI: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3852
- // NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3853
- // NYI: ret <8 x i16> [[VRSHLQ_V2_I]]
3854
- // uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3855
- // return vrshlq_u16(a, b);
3856
- // }
3790
+ // CIR-LABEL: vrshl_u8
3791
+ // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3792
+ // CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
3857
3793
3858
- // NYI-LABEL: @test_vrshlq_u32(
3859
- // NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3860
- // NYI: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3861
- // NYI: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3862
- // NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3863
- // NYI: ret <4 x i32> [[VRSHLQ_V2_I]]
3864
- // uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3865
- // return vrshlq_u32(a, b);
3866
- // }
3794
+ // LLVM: {{.*}}test_vrshl_u8(<8 x i8>{{.*}}[[a:%.*]], <8 x i8>{{.*}}[[b:%.*]])
3795
+ // LLVM: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[a]], <8 x i8> [[b]])
3796
+ // LLVM: ret <8 x i8> [[VRSHL_V_I]]
3797
+ }
3867
3798
3868
- // NYI-LABEL: @test_vrshlq_u64(
3869
- // NYI: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3870
- // NYI: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3871
- // NYI: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3872
- // NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3873
- // NYI: ret <2 x i64> [[VRSHLQ_V2_I]]
3874
- // uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
3875
- // return vrshlq_u64(a, b);
3876
- // }
3799
+ uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3800
+ return vrshl_u16(a, b);
3801
+
3802
+ // CIR-LABEL: vrshl_u16
3803
+ // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3804
+ // CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
3805
+
3806
+ // LLVM: {{.*}}test_vrshl_u16(<4 x i16>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]])
3807
+ // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[a]] to <8 x i8>
3808
+ // LLVM: [[TMP1:%.*]] = bitcast <4 x i16> [[b]] to <8 x i8>
3809
+ // LLVM: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[a]], <4 x i16> [[b]])
3810
+ // LLVM: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16>
3811
+ // LLVM: ret <4 x i16> [[VRSHL_V2_I]]
3812
+ }
3813
+
3814
+ uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3815
+ return vrshl_u32(a, b);
3816
+
3817
+ // CIR-LABEL: vrshl_u32
3818
+ // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3819
+ // CIR-SAME: (!cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
3820
+
3821
+ // LLVM: {{.*}}test_vrshl_u32(<2 x i32>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]])
3822
+ // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[a]] to <8 x i8>
3823
+ // LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[b]] to <8 x i8>
3824
+ // LLVM: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[a]], <2 x i32> [[b]])
3825
+ // LLVM: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32>
3826
+ // LLVM: ret <2 x i32> [[VRSHL_V2_I]]
3827
+ }
3828
+
3829
+ uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3830
+ return vrshl_u64(a, b);
3831
+
3832
+ // CIR-LABEL: vrshl_u64
3833
+ // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3834
+ // CIR-SAME: (!cir.vector<!u64i x 1>, !cir.vector<!u64i x 1>) -> !cir.vector<!u64i x 1>
3835
+
3836
+ // LLVM: {{.*}}test_vrshl_u64(<1 x i64>{{.*}}[[a:%.*]], <1 x i64>{{.*}}[[b:%.*]])
3837
+ // LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[a]] to <8 x i8>
3838
+ // LLVM: [[TMP1:%.*]] = bitcast <1 x i64> [[b]] to <8 x i8>
3839
+ // LLVM: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[a]], <1 x i64> [[b]])
3840
+ // LLVM: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64>
3841
+ // LLVM: ret <1 x i64> [[VRSHL_V2_I]]
3842
+ }
3843
+
3844
+ int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3845
+ return vrshlq_s8(a, b);
3846
+
3847
+ // CIR-LABEL: vrshlq_s8
3848
+ // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3849
+ // CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
3850
+
3851
+ // LLVM: {{.*}}test_vrshlq_s8(<16 x i8>{{.*}}[[a:%.*]], <16 x i8>{{.*}}[[b:%.*]])
3852
+ // LLVM: [[VRSHL_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[a]], <16 x i8> [[b]])
3853
+ // LLVM: ret <16 x i8> [[VRSHL_V_I]]
3854
+ }
3855
+
3856
+ int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3857
+ return vrshlq_s16(a, b);
3858
+
3859
+ // CIR-LABEL: vrshlq_s16
3860
+ // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3861
+ // CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
3862
+
3863
+ // LLVM: {{.*}}test_vrshlq_s16(<8 x i16>{{.*}}[[a:%.*]], <8 x i16>{{.*}}[[b:%.*]])
3864
+ // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[a]] to <16 x i8>
3865
+ // LLVM: [[TMP1:%.*]] = bitcast <8 x i16> [[b]] to <16 x i8>
3866
+ // LLVM: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[a]], <8 x i16> [[b]])
3867
+ // LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3868
+ // LLVM: ret <8 x i16> [[VRSHLQ_V2_I]]
3869
+ }
3870
+
3871
+ int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3872
+ return vrshlq_s32(a, b);
3873
+
3874
+ // CIR-LABEL: vrshlq_s32
3875
+ // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3876
+ // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
3877
+
3878
+ // LLVM: {{.*}}test_vrshlq_s32(<4 x i32>{{.*}}[[a:%.*]], <4 x i32>{{.*}}[[b:%.*]])
3879
+ // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[a]] to <16 x i8>
3880
+ // LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[b]] to <16 x i8>
3881
+ // LLVM: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[a]], <4 x i32> [[b]])
3882
+ // LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3883
+ // LLVM: ret <4 x i32> [[VRSHLQ_V2_I]]
3884
+ }
3885
+
3886
+ int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3887
+ return vrshlq_s64(a, b);
3888
+
3889
+ // CIR-LABEL: vrshlq_s64
3890
+ // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3891
+ // CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
3892
+
3893
+ // LLVM: {{.*}}test_vrshlq_s64(<2 x i64>{{.*}}[[a:%.*]], <2 x i64>{{.*}}[[b:%.*]])
3894
+ // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[a]] to <16 x i8>
3895
+ // LLVM: [[TMP1:%.*]] = bitcast <2 x i64> [[b]] to <16 x i8>
3896
+ // LLVM: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[a]], <2 x i64> [[b]])
3897
+ // LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3898
+ // LLVM: ret <2 x i64> [[VRSHLQ_V2_I]]
3899
+ }
3900
+
3901
+ uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3902
+ return vrshlq_u8(a, b);
3903
+
3904
+ // CIR-LABEL: vrshlq_u8
3905
+ // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3906
+ // CIR-SAME: (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
3907
+
3908
+ // LLVM: {{.*}}test_vrshlq_u8(<16 x i8>{{.*}}[[a:%.*]], <16 x i8>{{.*}}[[b:%.*]])
3909
+ // LLVM: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[a]], <16 x i8> [[b]])
3910
+ // LLVM: ret <16 x i8> [[VRSHLQ_V_I]]
3911
+ }
3912
+
3913
+ uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3914
+ return vrshlq_u16(a, b);
3915
+
3916
+ // CIR-LABEL: vrshlq_u16
3917
+ // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3918
+ // CIR-SAME: (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
3919
+
3920
+ // LLVM: {{.*}}test_vrshlq_u16(<8 x i16>{{.*}}[[a:%.*]], <8 x i16>{{.*}}[[b:%.*]])
3921
+ // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[a]] to <16 x i8>
3922
+ // LLVM: [[TMP1:%.*]] = bitcast <8 x i16> [[b]] to <16 x i8>
3923
+ // LLVM: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[a]], <8 x i16> [[b]])
3924
+ // LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3925
+ // LLVM: ret <8 x i16> [[VRSHLQ_V2_I]]
3926
+ }
3927
+
3928
+ uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3929
+ return vrshlq_u32(a, b);
3930
+
3931
+ // CIR-LABEL: vrshlq_u32
3932
+ // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3933
+ // CIR-SAME: (!cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
3934
+
3935
+ // LLVM: {{.*}}test_vrshlq_u32(<4 x i32>{{.*}}[[a:%.*]], <4 x i32>{{.*}}[[b:%.*]])
3936
+ // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[a]] to <16 x i8>
3937
+ // LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[b]] to <16 x i8>
3938
+ // LLVM: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[a]], <4 x i32> [[b]])
3939
+ // LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3940
+ // LLVM: ret <4 x i32> [[VRSHLQ_V2_I]]
3941
+ }
3942
+
3943
+ uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
3944
+ return vrshlq_u64(a, b);
3945
+
3946
+ // CIR-LABEL: vrshlq_u64
3947
+ // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3948
+ // CIR-SAME: (!cir.vector<!u64i x 2>, !cir.vector<!u64i x 2>) -> !cir.vector<!u64i x 2>
3949
+
3950
+ // LLVM: {{.*}}test_vrshlq_u64(<2 x i64>{{.*}}[[a:%.*]], <2 x i64>{{.*}}[[b:%.*]])
3951
+ // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[a]] to <16 x i8>
3952
+ // LLVM: [[TMP1:%.*]] = bitcast <2 x i64> [[b]] to <16 x i8>
3953
+ // LLVM: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[a]], <2 x i64> [[b]])
3954
+ // LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3955
+ // LLVM: ret <2 x i64> [[VRSHLQ_V2_I]]
3956
+ }
3877
3957
3878
3958
// NYI-LABEL: @test_vqrshl_s8(
3879
3959
// NYI: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
0 commit comments