|
| 1 | +// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \ |
| 2 | +// RUN: -emit-cir -target-feature +neon %s -o %t.cir |
| 3 | +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s |
| 4 | +// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \ |
| 5 | +// RUN: -emit-llvm -target-feature +neon %s -o %t.ll |
| 6 | +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s |
| 7 | + |
| 8 | +// Tetsting normal situation of vdup lane intrinsics. |
| 9 | + |
| 10 | +// REQUIRES: aarch64-registered-target || arm-registered-target |
| 11 | +#include <arm_neon.h> |
| 12 | + |
| 13 | +int8_t test_vdupb_lane_s8(int8x8_t src) { |
| 14 | + return vdupb_lane_s8(src, 7); |
| 15 | +} |
| 16 | + |
| 17 | +// CIR-LABEL: test_vdupb_lane_s8 |
| 18 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i |
| 19 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 8> |
| 20 | + |
| 21 | +// LLVM: define dso_local i8 @test_vdupb_lane_s8(<8 x i8> [[ARG:%.*]]) |
| 22 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i8>, i64 1, align 8 |
| 23 | +// LLVM: store <8 x i8> [[ARG]], ptr [[ARG_SAVE]], align 8 |
| 24 | +// LLVM: [[TMP:%.*]] = load <8 x i8>, ptr [[ARG_SAVE:%.*]], align 8 |
| 25 | +// LLVM: store <8 x i8> [[TMP]], ptr [[S0:%.*]], align 8 |
| 26 | +// LLVM: [[INTRN_ARG:%.*]] = load <8 x i8>, ptr [[S0]], align 8 |
| 27 | +// LLVM: {{%.*}} = extractelement <8 x i8> [[INTRN_ARG]], i32 7 |
| 28 | +// LLVM: ret i8 {{%.*}} |
| 29 | + |
| 30 | +int8_t test_vdupb_laneq_s8(int8x16_t a) { |
| 31 | + return vdupb_laneq_s8(a, 15); |
| 32 | +} |
| 33 | + |
| 34 | +// CIR-LABEL: test_vdupb_laneq_s8 |
| 35 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<15> : !s32i |
| 36 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 16> |
| 37 | + |
| 38 | +// LLVM: define dso_local i8 @test_vdupb_laneq_s8(<16 x i8> [[ARG:%.*]]) |
| 39 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <16 x i8>, i64 1, align 16 |
| 40 | +// LLVM: store <16 x i8> [[ARG]], ptr [[ARG_SAVE]], align 16 |
| 41 | +// LLVM: [[TMP:%.*]] = load <16 x i8>, ptr [[ARG_SAVE:%.*]], align 16 |
| 42 | +// LLVM: store <16 x i8> [[TMP]], ptr [[S0:%.*]], align 16 |
| 43 | +// LLVM: [[INTRN_ARG:%.*]] = load <16 x i8>, ptr [[S0]], align 16 |
| 44 | +// LLVM: {{%.*}} = extractelement <16 x i8> [[INTRN_ARG]], i32 15 |
| 45 | +// LLVM: ret i8 {{%.*}} |
| 46 | + |
| 47 | +int16_t test_vduph_lane_s16(int16x4_t src) { |
| 48 | + return vduph_lane_s16(src, 3); |
| 49 | +} |
| 50 | + |
| 51 | +// CIR-LABEL: test_vduph_lane_s16 |
| 52 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i |
| 53 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 4> |
| 54 | + |
| 55 | + |
| 56 | +// LLVM: define dso_local i16 @test_vduph_lane_s16(<4 x i16> [[ARG:%.*]]) |
| 57 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i16>, i64 1, align 8 |
| 58 | +// LLVM: store <4 x i16> [[ARG]], ptr [[ARG_SAVE]], align 8 |
| 59 | +// LLVM: [[TMP:%.*]] = load <4 x i16>, ptr [[ARG_SAVE:%.*]], align 8 |
| 60 | +// LLVM: store <4 x i16> [[TMP]], ptr [[S0:%.*]], align 8 |
| 61 | +// LLVM: [[INTRN_ARG:%.*]] = load <4 x i16>, ptr [[S0]], align 8 |
| 62 | +// LLVM: {{%.*}} = extractelement <4 x i16> [[INTRN_ARG]], i32 3 |
| 63 | +// LLVM: ret i16 {{%.*}} |
| 64 | + |
| 65 | +int16_t test_vduph_laneq_s16(int16x8_t a) { |
| 66 | + return vduph_laneq_s16(a, 7); |
| 67 | +} |
| 68 | + |
| 69 | +// CIR-LABEL: test_vduph_laneq_s16 |
| 70 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i |
| 71 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 8> |
| 72 | + |
| 73 | +// LLVM: define dso_local i16 @test_vduph_laneq_s16(<8 x i16> [[ARG:%.*]]) |
| 74 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i16>, i64 1, align 16 |
| 75 | +// LLVM: store <8 x i16> [[ARG]], ptr [[ARG_SAVE]], align 16 |
| 76 | +// LLVM: [[TMP:%.*]] = load <8 x i16>, ptr [[ARG_SAVE:%.*]], align 16 |
| 77 | +// LLVM: store <8 x i16> [[TMP]], ptr [[S0:%.*]], align 16 |
| 78 | +// LLVM: [[INTRN_ARG:%.*]] = load <8 x i16>, ptr [[S0]], align 16 |
| 79 | +// LLVM: {{%.*}} = extractelement <8 x i16> [[INTRN_ARG]], i32 7 |
| 80 | +// LLVM: ret i16 {{%.*}} |
| 81 | + |
| 82 | +int32_t test_vdups_lane_s32(int32x2_t a) { |
| 83 | + return vdups_lane_s32(a, 1); |
| 84 | +} |
| 85 | + |
| 86 | +// CIR-LABEL: test_vdups_lane_s32 |
| 87 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i |
| 88 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 2> |
| 89 | + |
| 90 | +// LLVM: define dso_local i32 @test_vdups_lane_s32(<2 x i32> [[ARG:%.*]]) |
| 91 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i32>, i64 1, align 8 |
| 92 | +// LLVM: store <2 x i32> [[ARG]], ptr [[ARG_SAVE]], align 8 |
| 93 | +// LLVM: [[TMP:%.*]] = load <2 x i32>, ptr [[ARG_SAVE:%.*]], align 8 |
| 94 | +// LLVM: store <2 x i32> [[TMP]], ptr [[S0:%.*]], align 8 |
| 95 | +// LLVM: [[INTRN_ARG:%.*]] = load <2 x i32>, ptr [[S0]], align 8 |
| 96 | +// LLVM: {{%.*}} = extractelement <2 x i32> [[INTRN_ARG]], i32 1 |
| 97 | +// LLVM: ret i32 {{%.*}} |
| 98 | + |
| 99 | +int32_t test_vdups_laneq_s32(int32x4_t a) { |
| 100 | + return vdups_laneq_s32(a, 3); |
| 101 | +} |
| 102 | + |
| 103 | +// CIR-LABEL: test_vdups_laneq_s32 |
| 104 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i |
| 105 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 4> |
| 106 | + |
| 107 | +// LLVM: define dso_local i32 @test_vdups_laneq_s32(<4 x i32> [[ARG:%.*]]) |
| 108 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i32>, i64 1, align 16 |
| 109 | +// LLVM: store <4 x i32> [[ARG]], ptr [[ARG_SAVE]], align 16 |
| 110 | +// LLVM: [[TMP:%.*]] = load <4 x i32>, ptr [[ARG_SAVE:%.*]], align 16 |
| 111 | +// LLVM: store <4 x i32> [[TMP]], ptr [[S0:%.*]], align 16 |
| 112 | +// LLVM: [[INTRN_ARG:%.*]] = load <4 x i32>, ptr [[S0]], align 16 |
| 113 | +// LLVM: {{%.*}} = extractelement <4 x i32> [[INTRN_ARG]], i32 3 |
| 114 | +// LLVM: ret i32 {{%.*}} |
| 115 | + |
| 116 | +int64_t test_vdupd_lane_s64(int64x1_t src) { |
| 117 | + return vdupd_lane_s64(src, 0); |
| 118 | +} |
| 119 | + |
| 120 | +// CIR-LABEL: test_vdupd_lane_s64 |
| 121 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i |
| 122 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 1> |
| 123 | + |
| 124 | +// LLVM: define dso_local i64 @test_vdupd_lane_s64(<1 x i64> [[ARG:%.*]]) |
| 125 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x i64>, i64 1, align 8 |
| 126 | +// LLVM: store <1 x i64> [[ARG]], ptr [[ARG_SAVE]], align 8 |
| 127 | +// LLVM: [[TMP:%.*]] = load <1 x i64>, ptr [[ARG_SAVE:%.*]], align 8 |
| 128 | +// LLVM: store <1 x i64> [[TMP]], ptr [[S0:%.*]], align 8 |
| 129 | +// LLVM: [[INTRN_ARG:%.*]] = load <1 x i64>, ptr [[S0]], align 8 |
| 130 | +// LLVM: {{%.*}} = extractelement <1 x i64> [[INTRN_ARG]], i32 0 |
| 131 | +// LLVM: ret i64 {{%.*}} |
| 132 | + |
| 133 | +int64_t test_vdupd_laneq_s64(int64x2_t a) { |
| 134 | + return vdupd_laneq_s64(a, 1); |
| 135 | +} |
| 136 | + |
| 137 | +// CIR-LABEL: test_vdupd_laneq_s64 |
| 138 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i |
| 139 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 2> |
| 140 | + |
| 141 | +// LLVM: define dso_local i64 @test_vdupd_laneq_s64(<2 x i64> [[ARG:%.*]]) |
| 142 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i64>, i64 1, align 16 |
| 143 | +// LLVM: store <2 x i64> [[ARG]], ptr [[ARG_SAVE]], align 16 |
| 144 | +// LLVM: [[TMP:%.*]] = load <2 x i64>, ptr [[ARG_SAVE:%.*]], align 16 |
| 145 | +// LLVM: store <2 x i64> [[TMP]], ptr [[S0:%.*]], align 16 |
| 146 | +// LLVM: [[INTRN_ARG:%.*]] = load <2 x i64>, ptr [[S0]], align 16 |
| 147 | +// LLVM: {{%.*}} = extractelement <2 x i64> [[INTRN_ARG]], i32 1 |
| 148 | +// LLVM: ret i64 {{%.*}} |
| 149 | + |
| 150 | +float32_t test_vdups_lane_f32(float32x2_t src) { |
| 151 | + return vdups_lane_f32(src, 1); |
| 152 | +} |
| 153 | + |
| 154 | +// CIR-LABEL: test_vdups_lane_f32 |
| 155 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i |
| 156 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2> |
| 157 | + |
| 158 | +// LLVM: define dso_local float @test_vdups_lane_f32(<2 x float> [[ARG:%.*]]) |
| 159 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x float>, i64 1, align 8 |
| 160 | +// LLVM: store <2 x float> [[ARG]], ptr [[ARG_SAVE]], align 8 |
| 161 | +// LLVM: [[TMP:%.*]] = load <2 x float>, ptr [[ARG_SAVE:%.*]], align 8 |
| 162 | +// LLVM: store <2 x float> [[TMP]], ptr [[S0:%.*]], align 8 |
| 163 | +// LLVM: [[INTRN_ARG:%.*]] = load <2 x float>, ptr [[S0]], align 8 |
| 164 | +// LLVM: {{%.*}} = extractelement <2 x float> [[INTRN_ARG]], i32 1 |
| 165 | +// LLVM: ret float {{%.*}} |
| 166 | + |
| 167 | +float64_t test_vdupd_lane_f64(float64x1_t src) { |
| 168 | + return vdupd_lane_f64(src, 0); |
| 169 | +} |
| 170 | + |
| 171 | +// CIR-LABEL: test_vdupd_lane_f64 |
| 172 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i |
| 173 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 1> |
| 174 | + |
| 175 | +// LLVM: define dso_local double @test_vdupd_lane_f64(<1 x double> [[ARG:%.*]]) |
| 176 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x double>, i64 1, align 8 |
| 177 | +// LLVM: store <1 x double> [[ARG]], ptr [[ARG_SAVE]], align 8 |
| 178 | +// LLVM: [[TMP:%.*]] = load <1 x double>, ptr [[ARG_SAVE:%.*]], align 8 |
| 179 | +// LLVM: store <1 x double> [[TMP]], ptr [[S0:%.*]], align 8 |
| 180 | +// LLVM: [[INTRN_ARG:%.*]] = load <1 x double>, ptr [[S0]], align 8 |
| 181 | +// LLVM: {{%.*}} = extractelement <1 x double> [[INTRN_ARG]], i32 0 |
| 182 | +// LLVM: ret double {{%.*}} |
| 183 | + |
| 184 | +float32_t test_vdups_laneq_f32(float32x4_t src) { |
| 185 | + return vdups_laneq_f32(src, 3); |
| 186 | +} |
| 187 | + |
| 188 | +// CIR-LABEL: test_vdups_laneq_f32 |
| 189 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i |
| 190 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4> |
| 191 | + |
| 192 | +// LLVM: define dso_local float @test_vdups_laneq_f32(<4 x float> [[ARG:%.*]]) |
| 193 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x float>, i64 1, align 16 |
| 194 | +// LLVM: store <4 x float> [[ARG]], ptr [[ARG_SAVE]], align 16 |
| 195 | +// LLVM: [[TMP:%.*]] = load <4 x float>, ptr [[ARG_SAVE:%.*]], align 16 |
| 196 | +// LLVM: store <4 x float> [[TMP]], ptr [[S0:%.*]], align 16 |
| 197 | +// LLVM: [[INTRN_ARG:%.*]] = load <4 x float>, ptr [[S0]], align 16 |
| 198 | +// LLVM: {{%.*}} = extractelement <4 x float> [[INTRN_ARG]], i32 3 |
| 199 | +// LLVM: ret float {{%.*}} |
| 200 | + |
| 201 | +float64_t test_vdupd_laneq_f64(float64x2_t src) { |
| 202 | + return vdupd_laneq_f64(src, 1); |
| 203 | +} |
| 204 | + |
| 205 | +// CIR-LABEL: test_vdupd_laneq_f64 |
| 206 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i |
| 207 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 2> |
| 208 | + |
| 209 | +// LLVM: define dso_local double @test_vdupd_laneq_f64(<2 x double> [[ARG:%.*]]) |
| 210 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x double>, i64 1, align 16 |
| 211 | +// LLVM: store <2 x double> [[ARG]], ptr [[ARG_SAVE]], align 16 |
| 212 | +// LLVM: [[TMP:%.*]] = load <2 x double>, ptr [[ARG_SAVE:%.*]], align 16 |
| 213 | +// LLVM: store <2 x double> [[TMP]], ptr [[S0:%.*]], align 16 |
| 214 | +// LLVM: [[INTRN_ARG:%.*]] = load <2 x double>, ptr [[S0]], align 16 |
| 215 | +// LLVM: {{%.*}} = extractelement <2 x double> [[INTRN_ARG]], i32 1 |
| 216 | +// LLVM: ret double {{%.*}} |
0 commit comments