|
5 | 5 | // RUN: -emit-llvm -target-feature +neon %s -o %t.ll
|
6 | 6 | // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
|
7 | 7 |
|
8 |
| -// This test file is similar to but not the same as |
9 |
| -// clang/test/CodeGen/aarch64-neon-vget.c |
10 |
| -// The difference is that this file only tests uses vset intrinsics, as we feel |
11 |
| -// it would be proper to have a separate test file testing vget intrinsics |
12 |
| -// with the file name aarch64-neon-vget.c |
13 |
| -// Also, for each integer type, we only test signed or unsigned, not both. |
14 |
| -// This is because integer types of the same size just use same intrinsic. |
| 8 | +// This test file contains tests of AArch64 NEON intrinsics |
| 9 | +// that are not covered by other tests. |
15 | 10 |
|
16 | 11 | // REQUIRES: aarch64-registered-target || arm-registered-target
|
17 | 12 | #include <arm_neon.h>
|
@@ -236,3 +231,207 @@ float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
|
236 | 231 | // LLVM: [[INTRN_ARG1:%.*]] = load <4 x float>, ptr [[S1]], align 16
|
237 | 232 | // LLVM: [[INTRN_RES:%.*]] = insertelement <4 x float> [[INTRN_ARG1]], float [[INTRN_ARG0]], i32 3
|
238 | 233 | // LLVM: ret <4 x float> {{%.*}}
|
| 234 | + |
| 235 | +uint8_t test_vget_lane_u8(uint8x8_t a) { |
| 236 | + return vget_lane_u8(a, 7); |
| 237 | +} |
| 238 | + |
| 239 | +// CIR-LABEL: test_vget_lane_u8 |
| 240 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i |
| 241 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 8> |
| 242 | + |
| 243 | +// LLVM: define dso_local i8 @test_vget_lane_u8(<8 x i8> [[ARG:%.*]]) |
| 244 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i8>, i64 1, align 8 |
| 245 | +// LLVM: store <8 x i8> [[ARG]], ptr [[ARG_SAVE]], align 8 |
| 246 | +// LLVM: [[TMP:%.*]] = load <8 x i8>, ptr [[ARG_SAVE:%.*]], align 8 |
| 247 | +// LLVM: store <8 x i8> [[TMP]], ptr [[S0:%.*]], align 8 |
| 248 | +// LLVM: [[INTRN_ARG:%.*]] = load <8 x i8>, ptr [[S0]], align 8 |
| 249 | +// LLVM: {{%.*}} = extractelement <8 x i8> [[INTRN_ARG]], i32 7 |
| 250 | +// LLVM: ret i8 {{%.*}} |
| 251 | + |
| 252 | +uint8_t test_vgetq_lane_u8(uint8x16_t a) { |
| 253 | + return vgetq_lane_u8(a, 15); |
| 254 | +} |
| 255 | + |
| 256 | +// CIR-LABEL: test_vgetq_lane_u8 |
| 257 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<15> : !s32i |
| 258 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 16> |
| 259 | + |
| 260 | +// LLVM: define dso_local i8 @test_vgetq_lane_u8(<16 x i8> [[ARG:%.*]]) |
| 261 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <16 x i8>, i64 1, align 16 |
| 262 | +// LLVM: store <16 x i8> [[ARG]], ptr [[ARG_SAVE]], align 16 |
| 263 | +// LLVM: [[TMP:%.*]] = load <16 x i8>, ptr [[ARG_SAVE:%.*]], align 16 |
| 264 | +// LLVM: store <16 x i8> [[TMP]], ptr [[S0:%.*]], align 16 |
| 265 | +// LLVM: [[INTRN_ARG:%.*]] = load <16 x i8>, ptr [[S0]], align 16 |
| 266 | +// LLVM: {{%.*}} = extractelement <16 x i8> [[INTRN_ARG]], i32 15 |
| 267 | +// LLVM: ret i8 {{%.*}} |
| 268 | + |
| 269 | +uint16_t test_vget_lane_u16(uint16x4_t a) { |
| 270 | + return vget_lane_u16(a, 3); |
| 271 | +} |
| 272 | + |
| 273 | +// CIR-LABEL: test_vget_lane_u16 |
| 274 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i |
| 275 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 4> |
| 276 | + |
| 277 | +// LLVM: define dso_local i16 @test_vget_lane_u16(<4 x i16> [[ARG:%.*]]) |
| 278 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i16>, i64 1, align 8 |
| 279 | +// LLVM: store <4 x i16> [[ARG]], ptr [[ARG_SAVE]], align 8 |
| 280 | +// LLVM: [[TMP:%.*]] = load <4 x i16>, ptr [[ARG_SAVE:%.*]], align 8 |
| 281 | +// LLVM: store <4 x i16> [[TMP]], ptr [[S0:%.*]], align 8 |
| 282 | +// LLVM: [[INTRN_ARG:%.*]] = load <4 x i16>, ptr [[S0]], align 8 |
| 283 | +// LLVM: {{%.*}} = extractelement <4 x i16> [[INTRN_ARG]], i32 3 |
| 284 | +// LLVM: ret i16 {{%.*}} |
| 285 | + |
| 286 | +uint16_t test_vgetq_lane_u16(uint16x8_t a) { |
| 287 | + return vgetq_lane_u16(a, 7); |
| 288 | +} |
| 289 | + |
| 290 | +// CIR-LABEL: test_vgetq_lane_u16 |
| 291 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i |
| 292 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 8> |
| 293 | + |
| 294 | +// LLVM: define dso_local i16 @test_vgetq_lane_u16(<8 x i16> [[ARG:%.*]]) |
| 295 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i16>, i64 1, align 16 |
| 296 | +// LLVM: store <8 x i16> [[ARG]], ptr [[ARG_SAVE]], align 16 |
| 297 | +// LLVM: [[TMP:%.*]] = load <8 x i16>, ptr [[ARG_SAVE:%.*]], align 16 |
| 298 | +// LLVM: store <8 x i16> [[TMP]], ptr [[S0:%.*]], align 16 |
| 299 | +// LLVM: [[INTRN_ARG:%.*]] = load <8 x i16>, ptr [[S0]], align 16 |
| 300 | +// LLVM: {{%.*}} = extractelement <8 x i16> [[INTRN_ARG]], i32 7 |
| 301 | +// LLVM: ret i16 {{%.*}} |
| 302 | + |
| 303 | +uint32_t test_vget_lane_u32(uint32x2_t a) { |
| 304 | + return vget_lane_u32(a, 1); |
| 305 | +} |
| 306 | + |
| 307 | +// CIR-LABEL: test_vget_lane_u32 |
| 308 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i |
| 309 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 2> |
| 310 | + |
| 311 | +// LLVM: define dso_local i32 @test_vget_lane_u32(<2 x i32> [[ARG:%.*]]) |
| 312 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i32>, i64 1, align 8 |
| 313 | +// LLVM: store <2 x i32> [[ARG]], ptr [[ARG_SAVE]], align 8 |
| 314 | +// LLVM: [[TMP:%.*]] = load <2 x i32>, ptr [[ARG_SAVE:%.*]], align 8 |
| 315 | +// LLVM: store <2 x i32> [[TMP]], ptr [[S0:%.*]], align 8 |
| 316 | +// LLVM: [[INTRN_ARG:%.*]] = load <2 x i32>, ptr [[S0]], align 8 |
| 317 | +// LLVM: {{%.*}} = extractelement <2 x i32> [[INTRN_ARG]], i32 1 |
| 318 | +// LLVM: ret i32 {{%.*}} |
| 319 | + |
| 320 | +uint32_t test_vgetq_lane_u32(uint32x4_t a) { |
| 321 | + return vgetq_lane_u32(a, 3); |
| 322 | +} |
| 323 | + |
| 324 | +// CIR-LABEL: test_vgetq_lane_u32 |
| 325 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i |
| 326 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 4> |
| 327 | + |
| 328 | +// LLVM: define dso_local i32 @test_vgetq_lane_u32(<4 x i32> [[ARG:%.*]]) |
| 329 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i32>, i64 1, align 16 |
| 330 | +// LLVM: store <4 x i32> [[ARG]], ptr [[ARG_SAVE]], align 16 |
| 331 | +// LLVM: [[TMP:%.*]] = load <4 x i32>, ptr [[ARG_SAVE:%.*]], align 16 |
| 332 | +// LLVM: store <4 x i32> [[TMP]], ptr [[S0:%.*]], align 16 |
| 333 | +// LLVM: [[INTRN_ARG:%.*]] = load <4 x i32>, ptr [[S0]], align 16 |
| 334 | +// LLVM: {{%.*}} = extractelement <4 x i32> [[INTRN_ARG]], i32 3 |
| 335 | +// LLVM: ret i32 {{%.*}} |
| 336 | + |
| 337 | +uint64_t test_vget_lane_u64(uint64x1_t a) { |
| 338 | + return vget_lane_u64(a, 0); |
| 339 | +} |
| 340 | + |
| 341 | +// CIR-LABEL: test_vget_lane_u64 |
| 342 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i |
| 343 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 1> |
| 344 | + |
| 345 | +// LLVM: define dso_local i64 @test_vget_lane_u64(<1 x i64> [[ARG:%.*]]) |
| 346 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x i64>, i64 1, align 8 |
| 347 | +// LLVM: store <1 x i64> [[ARG]], ptr [[ARG_SAVE]], align 8 |
| 348 | +// LLVM: [[TMP:%.*]] = load <1 x i64>, ptr [[ARG_SAVE:%.*]], align 8 |
| 349 | +// LLVM: store <1 x i64> [[TMP]], ptr [[S0:%.*]], align 8 |
| 350 | +// LLVM: [[INTRN_ARG:%.*]] = load <1 x i64>, ptr [[S0]], align 8 |
| 351 | +// LLVM: {{%.*}} = extractelement <1 x i64> [[INTRN_ARG]], i32 0 |
| 352 | +// LLVM: ret i64 {{%.*}} |
| 353 | + |
| 354 | +uint64_t test_vgetq_lane_u64(uint64x2_t a) { |
| 355 | + return vgetq_lane_u64(a, 1); |
| 356 | +} |
| 357 | + |
| 358 | +// CIR-LABEL: test_vgetq_lane_u64 |
| 359 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i |
| 360 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 2> |
| 361 | + |
| 362 | +// LLVM: define dso_local i64 @test_vgetq_lane_u64(<2 x i64> [[ARG:%.*]]) |
| 363 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i64>, i64 1, align 16 |
| 364 | +// LLVM: store <2 x i64> [[ARG]], ptr [[ARG_SAVE]], align 16 |
| 365 | +// LLVM: [[TMP:%.*]] = load <2 x i64>, ptr [[ARG_SAVE:%.*]], align 16 |
| 366 | +// LLVM: store <2 x i64> [[TMP]], ptr [[S0:%.*]], align 16 |
| 367 | +// LLVM: [[INTRN_ARG:%.*]] = load <2 x i64>, ptr [[S0]], align 16 |
| 368 | +// LLVM: {{%.*}} = extractelement <2 x i64> [[INTRN_ARG]], i32 1 |
| 369 | +// LLVM: ret i64 {{%.*}} |
| 370 | + |
| 371 | +float32_t test_vget_lane_f32(float32x2_t a) { |
| 372 | + return vget_lane_f32(a, 1); |
| 373 | +} |
| 374 | + |
| 375 | +// CIR-LABEL: test_vget_lane_f32 |
| 376 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i |
| 377 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2> |
| 378 | + |
| 379 | +// LLVM: define dso_local float @test_vget_lane_f32(<2 x float> [[ARG:%.*]]) |
| 380 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x float>, i64 1, align 8 |
| 381 | +// LLVM: store <2 x float> [[ARG]], ptr [[ARG_SAVE]], align 8 |
| 382 | +// LLVM: [[TMP:%.*]] = load <2 x float>, ptr [[ARG_SAVE:%.*]], align 8 |
| 383 | +// LLVM: store <2 x float> [[TMP]], ptr [[S0:%.*]], align 8 |
| 384 | +// LLVM: [[INTRN_ARG:%.*]] = load <2 x float>, ptr [[S0]], align 8 |
| 385 | +// LLVM: {{%.*}} = extractelement <2 x float> [[INTRN_ARG]], i32 1 |
| 386 | +// LLVM: ret float {{%.*}} |
| 387 | + |
| 388 | +float64_t test_vget_lane_f64(float64x1_t a) { |
| 389 | + return vget_lane_f64(a, 0); |
| 390 | +} |
| 391 | + |
| 392 | +// CIR-LABEL: test_vget_lane_f64 |
| 393 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i |
| 394 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 1> |
| 395 | + |
| 396 | +// LLVM: define dso_local double @test_vget_lane_f64(<1 x double> [[ARG:%.*]]) |
| 397 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x double>, i64 1, align 8 |
| 398 | +// LLVM: store <1 x double> [[ARG]], ptr [[ARG_SAVE]], align 8 |
| 399 | +// LLVM: [[TMP:%.*]] = load <1 x double>, ptr [[ARG_SAVE:%.*]], align 8 |
| 400 | +// LLVM: store <1 x double> [[TMP]], ptr [[S0:%.*]], align 8 |
| 401 | +// LLVM: [[INTRN_ARG:%.*]] = load <1 x double>, ptr [[S0]], align 8 |
| 402 | +// LLVM: {{%.*}} = extractelement <1 x double> [[INTRN_ARG]], i32 0 |
| 403 | +// LLVM: ret double {{%.*}} |
| 404 | + |
| 405 | +float32_t test_vgetq_lane_f32(float32x4_t a) { |
| 406 | + return vgetq_lane_f32(a, 3); |
| 407 | +} |
| 408 | + |
| 409 | +// CIR-LABEL: test_vgetq_lane_f32 |
| 410 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i |
| 411 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4> |
| 412 | + |
| 413 | +// LLVM: define dso_local float @test_vgetq_lane_f32(<4 x float> [[ARG:%.*]]) |
| 414 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x float>, i64 1, align 16 |
| 415 | +// LLVM: store <4 x float> [[ARG]], ptr [[ARG_SAVE]], align 16 |
| 416 | +// LLVM: [[TMP:%.*]] = load <4 x float>, ptr [[ARG_SAVE:%.*]], align 16 |
| 417 | +// LLVM: store <4 x float> [[TMP]], ptr [[S0:%.*]], align 16 |
| 418 | +// LLVM: [[INTRN_ARG:%.*]] = load <4 x float>, ptr [[S0]], align 16 |
| 419 | +// LLVM: {{%.*}} = extractelement <4 x float> [[INTRN_ARG]], i32 3 |
| 420 | +// LLVM: ret float {{%.*}} |
| 421 | + |
| 422 | +float64_t test_vgetq_lane_f64(float64x2_t a) { |
| 423 | + return vgetq_lane_f64(a, 1); |
| 424 | +} |
| 425 | + |
| 426 | +// CIR-LABEL: test_vgetq_lane_f64 |
| 427 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i |
| 428 | +// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 2> |
| 429 | + |
| 430 | +// LLVM: define dso_local double @test_vgetq_lane_f64(<2 x double> [[ARG:%.*]]) |
| 431 | +// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x double>, i64 1, align 16 |
| 432 | +// LLVM: store <2 x double> [[ARG]], ptr [[ARG_SAVE]], align 16 |
| 433 | +// LLVM: [[TMP:%.*]] = load <2 x double>, ptr [[ARG_SAVE:%.*]], align 16 |
| 434 | +// LLVM: store <2 x double> [[TMP]], ptr [[S0:%.*]], align 16 |
| 435 | +// LLVM: [[INTRN_ARG:%.*]] = load <2 x double>, ptr [[S0]], align 16 |
| 436 | +// LLVM: {{%.*}} = extractelement <2 x double> [[INTRN_ARG]], i32 1 |
| 437 | +// LLVM: ret double {{%.*}} |
0 commit comments