|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK |
3 |
| -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECKFP |
| 2 | +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV |
| 3 | +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV,CHECKFP |
| 4 | +; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS |
| 5 | +; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS,CHECKFP |
4 | 6 |
|
5 | 7 | define arm_aapcs_vfpcc <4 x i32> @shuffle1_i32(<4 x i32> %src) {
|
6 | 8 | ; CHECK-LABEL: shuffle1_i32:
|
@@ -221,18 +223,31 @@ entry:
|
221 | 223 | }
|
222 | 224 |
|
223 | 225 | define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) {
|
224 |
| -; CHECK-LABEL: shuffle3_i16: |
225 |
| -; CHECK: @ %bb.0: @ %entry |
226 |
| -; CHECK-NEXT: vmov q1, q0 |
227 |
| -; CHECK-NEXT: vmovx.f16 s2, s5 |
228 |
| -; CHECK-NEXT: vmovx.f16 s0, s4 |
229 |
| -; CHECK-NEXT: vins.f16 s5, s4 |
230 |
| -; CHECK-NEXT: vins.f16 s2, s0 |
231 |
| -; CHECK-NEXT: vmov.f32 s3, s5 |
232 |
| -; CHECK-NEXT: vmovx.f16 s1, s7 |
233 |
| -; CHECK-NEXT: vmov.f32 s0, s6 |
234 |
| -; CHECK-NEXT: vins.f16 s1, s7 |
235 |
| -; CHECK-NEXT: bx lr |
| 226 | +; CHECK-LV-LABEL: shuffle3_i16: |
| 227 | +; CHECK-LV: @ %bb.0: @ %entry |
| 228 | +; CHECK-LV-NEXT: vmov q1, q0 |
| 229 | +; CHECK-LV-NEXT: vmovx.f16 s2, s5 |
| 230 | +; CHECK-LV-NEXT: vmovx.f16 s0, s4 |
| 231 | +; CHECK-LV-NEXT: vins.f16 s5, s4 |
| 232 | +; CHECK-LV-NEXT: vins.f16 s2, s0 |
| 233 | +; CHECK-LV-NEXT: vmov.f32 s3, s5 |
| 234 | +; CHECK-LV-NEXT: vmovx.f16 s1, s7 |
| 235 | +; CHECK-LV-NEXT: vmov.f32 s0, s6 |
| 236 | +; CHECK-LV-NEXT: vins.f16 s1, s7 |
| 237 | +; CHECK-LV-NEXT: bx lr |
| 238 | +; |
| 239 | +; CHECK-LIS-LABEL: shuffle3_i16: |
| 240 | +; CHECK-LIS: @ %bb.0: @ %entry |
| 241 | +; CHECK-LIS-NEXT: vmovx.f16 s5, s3 |
| 242 | +; CHECK-LIS-NEXT: vmovx.f16 s6, s1 |
| 243 | +; CHECK-LIS-NEXT: vmovx.f16 s4, s0 |
| 244 | +; CHECK-LIS-NEXT: vins.f16 s1, s0 |
| 245 | +; CHECK-LIS-NEXT: vins.f16 s6, s4 |
| 246 | +; CHECK-LIS-NEXT: vins.f16 s5, s3 |
| 247 | +; CHECK-LIS-NEXT: vmov.f32 s7, s1 |
| 248 | +; CHECK-LIS-NEXT: vmov.f32 s4, s2 |
| 249 | +; CHECK-LIS-NEXT: vmov q0, q1 |
| 250 | +; CHECK-LIS-NEXT: bx lr |
236 | 251 | entry:
|
237 | 252 | %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
|
238 | 253 | ret <8 x i16> %out
|
@@ -476,42 +491,79 @@ entry:
|
476 | 491 | }
|
477 | 492 |
|
478 | 493 | define arm_aapcs_vfpcc <16 x i8> @shuffle3_i8(<16 x i8> %src) {
|
479 |
| -; CHECK-LABEL: shuffle3_i8: |
480 |
| -; CHECK: @ %bb.0: @ %entry |
481 |
| -; CHECK-NEXT: vmov q1, q0 |
482 |
| -; CHECK-NEXT: vmov.u8 r0, q0[4] |
483 |
| -; CHECK-NEXT: vmov.8 q0[0], r0 |
484 |
| -; CHECK-NEXT: vmov.u8 r0, q1[5] |
485 |
| -; CHECK-NEXT: vmov.8 q0[1], r0 |
486 |
| -; CHECK-NEXT: vmov.u8 r0, q1[15] |
487 |
| -; CHECK-NEXT: vmov.8 q0[2], r0 |
488 |
| -; CHECK-NEXT: vmov.u8 r0, q1[7] |
489 |
| -; CHECK-NEXT: vmov.8 q0[3], r0 |
490 |
| -; CHECK-NEXT: vmov.u8 r0, q1[14] |
491 |
| -; CHECK-NEXT: vmov.8 q0[4], r0 |
492 |
| -; CHECK-NEXT: vmov.u8 r0, q1[9] |
493 |
| -; CHECK-NEXT: vmov.8 q0[5], r0 |
494 |
| -; CHECK-NEXT: vmov.u8 r0, q1[6] |
495 |
| -; CHECK-NEXT: vmov.8 q0[6], r0 |
496 |
| -; CHECK-NEXT: vmov.u8 r0, q1[3] |
497 |
| -; CHECK-NEXT: vmov.8 q0[7], r0 |
498 |
| -; CHECK-NEXT: vmov.u8 r0, q1[10] |
499 |
| -; CHECK-NEXT: vmov.8 q0[8], r0 |
500 |
| -; CHECK-NEXT: vmov.u8 r0, q1[12] |
501 |
| -; CHECK-NEXT: vmov.8 q0[9], r0 |
502 |
| -; CHECK-NEXT: vmov.u8 r0, q1[1] |
503 |
| -; CHECK-NEXT: vmov.8 q0[10], r0 |
504 |
| -; CHECK-NEXT: vmov.u8 r0, q1[13] |
505 |
| -; CHECK-NEXT: vmov.8 q0[11], r0 |
506 |
| -; CHECK-NEXT: vmov.u8 r0, q1[2] |
507 |
| -; CHECK-NEXT: vmov.8 q0[12], r0 |
508 |
| -; CHECK-NEXT: vmov.u8 r0, q1[8] |
509 |
| -; CHECK-NEXT: vmov.8 q0[13], r0 |
510 |
| -; CHECK-NEXT: vmov.u8 r0, q1[0] |
511 |
| -; CHECK-NEXT: vmov.8 q0[14], r0 |
512 |
| -; CHECK-NEXT: vmov.u8 r0, q1[11] |
513 |
| -; CHECK-NEXT: vmov.8 q0[15], r0 |
514 |
| -; CHECK-NEXT: bx lr |
| 494 | +; CHECK-LV-LABEL: shuffle3_i8: |
| 495 | +; CHECK-LV: @ %bb.0: @ %entry |
| 496 | +; CHECK-LV-NEXT: vmov q1, q0 |
| 497 | +; CHECK-LV-NEXT: vmov.u8 r0, q0[4] |
| 498 | +; CHECK-LV-NEXT: vmov.8 q0[0], r0 |
| 499 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[5] |
| 500 | +; CHECK-LV-NEXT: vmov.8 q0[1], r0 |
| 501 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[15] |
| 502 | +; CHECK-LV-NEXT: vmov.8 q0[2], r0 |
| 503 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[7] |
| 504 | +; CHECK-LV-NEXT: vmov.8 q0[3], r0 |
| 505 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[14] |
| 506 | +; CHECK-LV-NEXT: vmov.8 q0[4], r0 |
| 507 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[9] |
| 508 | +; CHECK-LV-NEXT: vmov.8 q0[5], r0 |
| 509 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[6] |
| 510 | +; CHECK-LV-NEXT: vmov.8 q0[6], r0 |
| 511 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[3] |
| 512 | +; CHECK-LV-NEXT: vmov.8 q0[7], r0 |
| 513 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[10] |
| 514 | +; CHECK-LV-NEXT: vmov.8 q0[8], r0 |
| 515 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[12] |
| 516 | +; CHECK-LV-NEXT: vmov.8 q0[9], r0 |
| 517 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[1] |
| 518 | +; CHECK-LV-NEXT: vmov.8 q0[10], r0 |
| 519 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[13] |
| 520 | +; CHECK-LV-NEXT: vmov.8 q0[11], r0 |
| 521 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[2] |
| 522 | +; CHECK-LV-NEXT: vmov.8 q0[12], r0 |
| 523 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[8] |
| 524 | +; CHECK-LV-NEXT: vmov.8 q0[13], r0 |
| 525 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[0] |
| 526 | +; CHECK-LV-NEXT: vmov.8 q0[14], r0 |
| 527 | +; CHECK-LV-NEXT: vmov.u8 r0, q1[11] |
| 528 | +; CHECK-LV-NEXT: vmov.8 q0[15], r0 |
| 529 | +; CHECK-LV-NEXT: bx lr |
| 530 | +; |
| 531 | +; CHECK-LIS-LABEL: shuffle3_i8: |
| 532 | +; CHECK-LIS: @ %bb.0: @ %entry |
| 533 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[4] |
| 534 | +; CHECK-LIS-NEXT: vmov.8 q1[0], r0 |
| 535 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[5] |
| 536 | +; CHECK-LIS-NEXT: vmov.8 q1[1], r0 |
| 537 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[15] |
| 538 | +; CHECK-LIS-NEXT: vmov.8 q1[2], r0 |
| 539 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[7] |
| 540 | +; CHECK-LIS-NEXT: vmov.8 q1[3], r0 |
| 541 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[14] |
| 542 | +; CHECK-LIS-NEXT: vmov.8 q1[4], r0 |
| 543 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[9] |
| 544 | +; CHECK-LIS-NEXT: vmov.8 q1[5], r0 |
| 545 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[6] |
| 546 | +; CHECK-LIS-NEXT: vmov.8 q1[6], r0 |
| 547 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[3] |
| 548 | +; CHECK-LIS-NEXT: vmov.8 q1[7], r0 |
| 549 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[10] |
| 550 | +; CHECK-LIS-NEXT: vmov.8 q1[8], r0 |
| 551 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[12] |
| 552 | +; CHECK-LIS-NEXT: vmov.8 q1[9], r0 |
| 553 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[1] |
| 554 | +; CHECK-LIS-NEXT: vmov.8 q1[10], r0 |
| 555 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[13] |
| 556 | +; CHECK-LIS-NEXT: vmov.8 q1[11], r0 |
| 557 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[2] |
| 558 | +; CHECK-LIS-NEXT: vmov.8 q1[12], r0 |
| 559 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[8] |
| 560 | +; CHECK-LIS-NEXT: vmov.8 q1[13], r0 |
| 561 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[0] |
| 562 | +; CHECK-LIS-NEXT: vmov.8 q1[14], r0 |
| 563 | +; CHECK-LIS-NEXT: vmov.u8 r0, q0[11] |
| 564 | +; CHECK-LIS-NEXT: vmov.8 q1[15], r0 |
| 565 | +; CHECK-LIS-NEXT: vmov q0, q1 |
| 566 | +; CHECK-LIS-NEXT: bx lr |
515 | 567 | entry:
|
516 | 568 | %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 15, i32 7, i32 14, i32 9, i32 6, i32 3, i32 10, i32 12, i32 1, i32 13, i32 2, i32 8, i32 0, i32 11>
|
517 | 569 | ret <16 x i8> %out
|
@@ -1143,18 +1195,31 @@ entry:
|
1143 | 1195 | }
|
1144 | 1196 |
|
1145 | 1197 | define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) {
|
1146 |
| -; CHECK-LABEL: shuffle3_f16: |
1147 |
| -; CHECK: @ %bb.0: @ %entry |
1148 |
| -; CHECK-NEXT: vmov q1, q0 |
1149 |
| -; CHECK-NEXT: vmovx.f16 s2, s5 |
1150 |
| -; CHECK-NEXT: vmovx.f16 s0, s4 |
1151 |
| -; CHECK-NEXT: vins.f16 s5, s4 |
1152 |
| -; CHECK-NEXT: vins.f16 s2, s0 |
1153 |
| -; CHECK-NEXT: vmov.f32 s3, s5 |
1154 |
| -; CHECK-NEXT: vmovx.f16 s1, s7 |
1155 |
| -; CHECK-NEXT: vmov.f32 s0, s6 |
1156 |
| -; CHECK-NEXT: vins.f16 s1, s7 |
1157 |
| -; CHECK-NEXT: bx lr |
| 1198 | +; CHECK-LV-LABEL: shuffle3_f16: |
| 1199 | +; CHECK-LV: @ %bb.0: @ %entry |
| 1200 | +; CHECK-LV-NEXT: vmov q1, q0 |
| 1201 | +; CHECK-LV-NEXT: vmovx.f16 s2, s5 |
| 1202 | +; CHECK-LV-NEXT: vmovx.f16 s0, s4 |
| 1203 | +; CHECK-LV-NEXT: vins.f16 s5, s4 |
| 1204 | +; CHECK-LV-NEXT: vins.f16 s2, s0 |
| 1205 | +; CHECK-LV-NEXT: vmov.f32 s3, s5 |
| 1206 | +; CHECK-LV-NEXT: vmovx.f16 s1, s7 |
| 1207 | +; CHECK-LV-NEXT: vmov.f32 s0, s6 |
| 1208 | +; CHECK-LV-NEXT: vins.f16 s1, s7 |
| 1209 | +; CHECK-LV-NEXT: bx lr |
| 1210 | +; |
| 1211 | +; CHECK-LIS-LABEL: shuffle3_f16: |
| 1212 | +; CHECK-LIS: @ %bb.0: @ %entry |
| 1213 | +; CHECK-LIS-NEXT: vmovx.f16 s5, s3 |
| 1214 | +; CHECK-LIS-NEXT: vmovx.f16 s6, s1 |
| 1215 | +; CHECK-LIS-NEXT: vmovx.f16 s4, s0 |
| 1216 | +; CHECK-LIS-NEXT: vins.f16 s1, s0 |
| 1217 | +; CHECK-LIS-NEXT: vins.f16 s6, s4 |
| 1218 | +; CHECK-LIS-NEXT: vins.f16 s5, s3 |
| 1219 | +; CHECK-LIS-NEXT: vmov.f32 s7, s1 |
| 1220 | +; CHECK-LIS-NEXT: vmov.f32 s4, s2 |
| 1221 | +; CHECK-LIS-NEXT: vmov q0, q1 |
| 1222 | +; CHECK-LIS-NEXT: bx lr |
1158 | 1223 | entry:
|
1159 | 1224 | %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
|
1160 | 1225 | ret <8 x half> %out
|
|
0 commit comments