@@ -345,6 +345,86 @@ define <3 x i32> @load_v3i32(ptr %src) {
345
345
ret <3 x i32 > %l
346
346
}
347
347
348
+ define <3 x i32 > @load_v3i8_zext_to_3xi32 (ptr %src ) {
349
+ ; CHECK-LABEL: load_v3i8_zext_to_3xi32:
350
+ ; CHECK: ; %bb.0:
351
+ ; CHECK-NEXT: sub sp, sp, #16
352
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
353
+ ; CHECK-NEXT: ldrh w8, [x0]
354
+ ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
355
+ ; CHECK-NEXT: strh w8, [sp, #12]
356
+ ; CHECK-NEXT: add x8, x0, #2
357
+ ; CHECK-NEXT: ldr s0, [sp, #12]
358
+ ; CHECK-NEXT: ushll.8h v0, v0, #0
359
+ ; CHECK-NEXT: ld1.b { v0 }[4], [x8]
360
+ ; CHECK-NEXT: ushll.4s v0, v0, #0
361
+ ; CHECK-NEXT: and.16b v0, v0, v1
362
+ ; CHECK-NEXT: add sp, sp, #16
363
+ ; CHECK-NEXT: ret
364
+ ;
365
+ ; BE-LABEL: load_v3i8_zext_to_3xi32:
366
+ ; BE: // %bb.0:
367
+ ; BE-NEXT: sub sp, sp, #16
368
+ ; BE-NEXT: .cfi_def_cfa_offset 16
369
+ ; BE-NEXT: ldrh w8, [x0]
370
+ ; BE-NEXT: movi v1.2d, #0x0000ff000000ff
371
+ ; BE-NEXT: strh w8, [sp, #12]
372
+ ; BE-NEXT: add x8, x0, #2
373
+ ; BE-NEXT: ldr s0, [sp, #12]
374
+ ; BE-NEXT: rev32 v0.8b, v0.8b
375
+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
376
+ ; BE-NEXT: ld1 { v0.b }[4], [x8]
377
+ ; BE-NEXT: ushll v0.4s, v0.4h, #0
378
+ ; BE-NEXT: and v0.16b, v0.16b, v1.16b
379
+ ; BE-NEXT: rev64 v0.4s, v0.4s
380
+ ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
381
+ ; BE-NEXT: add sp, sp, #16
382
+ ; BE-NEXT: ret
383
+ %l = load <3 x i8 >, ptr %src , align 1
384
+ %e = zext <3 x i8 > %l to <3 x i32 >
385
+ ret <3 x i32 > %e
386
+ }
387
+
388
+ define <3 x i32 > @load_v3i8_sext_to_3xi32 (ptr %src ) {
389
+ ; CHECK-LABEL: load_v3i8_sext_to_3xi32:
390
+ ; CHECK: ; %bb.0:
391
+ ; CHECK-NEXT: sub sp, sp, #16
392
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
393
+ ; CHECK-NEXT: ldrh w8, [x0]
394
+ ; CHECK-NEXT: strh w8, [sp, #12]
395
+ ; CHECK-NEXT: add x8, x0, #2
396
+ ; CHECK-NEXT: ldr s0, [sp, #12]
397
+ ; CHECK-NEXT: ushll.8h v0, v0, #0
398
+ ; CHECK-NEXT: ld1.b { v0 }[4], [x8]
399
+ ; CHECK-NEXT: ushll.4s v0, v0, #0
400
+ ; CHECK-NEXT: shl.4s v0, v0, #24
401
+ ; CHECK-NEXT: sshr.4s v0, v0, #24
402
+ ; CHECK-NEXT: add sp, sp, #16
403
+ ; CHECK-NEXT: ret
404
+ ;
405
+ ; BE-LABEL: load_v3i8_sext_to_3xi32:
406
+ ; BE: // %bb.0:
407
+ ; BE-NEXT: sub sp, sp, #16
408
+ ; BE-NEXT: .cfi_def_cfa_offset 16
409
+ ; BE-NEXT: ldrh w8, [x0]
410
+ ; BE-NEXT: strh w8, [sp, #12]
411
+ ; BE-NEXT: add x8, x0, #2
412
+ ; BE-NEXT: ldr s0, [sp, #12]
413
+ ; BE-NEXT: rev32 v0.8b, v0.8b
414
+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
415
+ ; BE-NEXT: ld1 { v0.b }[4], [x8]
416
+ ; BE-NEXT: ushll v0.4s, v0.4h, #0
417
+ ; BE-NEXT: shl v0.4s, v0.4s, #24
418
+ ; BE-NEXT: sshr v0.4s, v0.4s, #24
419
+ ; BE-NEXT: rev64 v0.4s, v0.4s
420
+ ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
421
+ ; BE-NEXT: add sp, sp, #16
422
+ ; BE-NEXT: ret
423
+ %l = load <3 x i8 >, ptr %src , align 1
424
+ %e = sext <3 x i8 > %l to <3 x i32 >
425
+ ret <3 x i32 > %e
426
+ }
427
+
348
428
define void @store_trunc_from_64bits (ptr %src , ptr %dst ) {
349
429
; CHECK-LABEL: store_trunc_from_64bits:
350
430
; CHECK: ; %bb.0: ; %entry
@@ -388,9 +468,9 @@ define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) {
388
468
; CHECK-NEXT: ldr s0, [x0]
389
469
; CHECK-NEXT: add x9, x0, #4
390
470
; CHECK-NEXT: Lloh0:
391
- ; CHECK-NEXT: adrp x8, lCPI9_0 @PAGE
471
+ ; CHECK-NEXT: adrp x8, lCPI11_0 @PAGE
392
472
; CHECK-NEXT: Lloh1:
393
- ; CHECK-NEXT: ldr d1, [x8, lCPI9_0 @PAGEOFF]
473
+ ; CHECK-NEXT: ldr d1, [x8, lCPI11_0 @PAGEOFF]
394
474
; CHECK-NEXT: add x8, x1, #1
395
475
; CHECK-NEXT: ld1.h { v0 }[2], [x9]
396
476
; CHECK-NEXT: add x9, x1, #2
@@ -409,8 +489,8 @@ define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) {
409
489
; BE-NEXT: add x8, x0, #4
410
490
; BE-NEXT: rev32 v0.4h, v0.4h
411
491
; BE-NEXT: ld1 { v0.h }[2], [x8]
412
- ; BE-NEXT: adrp x8, .LCPI9_0
413
- ; BE-NEXT: add x8, x8, :lo12:.LCPI9_0
492
+ ; BE-NEXT: adrp x8, .LCPI11_0
493
+ ; BE-NEXT: add x8, x8, :lo12:.LCPI11_0
414
494
; BE-NEXT: ld1 { v1.4h }, [x8]
415
495
; BE-NEXT: add v0.4h, v0.4h, v1.4h
416
496
; BE-NEXT: xtn v1.8b, v0.8h
@@ -538,9 +618,9 @@ define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {
538
618
; CHECK-NEXT: .cfi_def_cfa_offset 16
539
619
; CHECK-NEXT: ldrh w9, [x0]
540
620
; CHECK-NEXT: Lloh2:
541
- ; CHECK-NEXT: adrp x8, lCPI13_0 @PAGE
621
+ ; CHECK-NEXT: adrp x8, lCPI15_0 @PAGE
542
622
; CHECK-NEXT: Lloh3:
543
- ; CHECK-NEXT: ldr d1, [x8, lCPI13_0 @PAGEOFF]
623
+ ; CHECK-NEXT: ldr d1, [x8, lCPI15_0 @PAGEOFF]
544
624
; CHECK-NEXT: add x8, x1, #4
545
625
; CHECK-NEXT: strh w9, [sp, #12]
546
626
; CHECK-NEXT: add x9, x0, #2
@@ -566,8 +646,8 @@ define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {
566
646
; BE-NEXT: rev32 v0.8b, v0.8b
567
647
; BE-NEXT: ushll v0.8h, v0.8b, #0
568
648
; BE-NEXT: ld1 { v0.b }[4], [x8]
569
- ; BE-NEXT: adrp x8, .LCPI13_0
570
- ; BE-NEXT: add x8, x8, :lo12:.LCPI13_0
649
+ ; BE-NEXT: adrp x8, .LCPI15_0
650
+ ; BE-NEXT: add x8, x8, :lo12:.LCPI15_0
571
651
; BE-NEXT: ld1 { v1.4h }, [x8]
572
652
; BE-NEXT: add x8, x1, #4
573
653
; BE-NEXT: bic v0.4h, #255, lsl #8
@@ -796,3 +876,115 @@ define void @shift_trunc_volatile_store(ptr %src, ptr %dst) {
796
876
store volatile <3 x i8 > %t , ptr %dst , align 1
797
877
ret void
798
878
}
879
+
880
+ define void @load_v3i8_zext_to_3xi32_add_trunc_store (ptr %src ) {
881
+ ; CHECK-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
882
+ ; CHECK: ; %bb.0:
883
+ ; CHECK-NEXT: sub sp, sp, #16
884
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
885
+ ; CHECK-NEXT: ldrh w9, [x0]
886
+ ; CHECK-NEXT: Lloh4:
887
+ ; CHECK-NEXT: adrp x8, lCPI22_0@PAGE
888
+ ; CHECK-NEXT: Lloh5:
889
+ ; CHECK-NEXT: ldr q1, [x8, lCPI22_0@PAGEOFF]
890
+ ; CHECK-NEXT: add x8, x0, #1
891
+ ; CHECK-NEXT: strh w9, [sp, #12]
892
+ ; CHECK-NEXT: add x9, x0, #2
893
+ ; CHECK-NEXT: ldr s0, [sp, #12]
894
+ ; CHECK-NEXT: ushll.8h v0, v0, #0
895
+ ; CHECK-NEXT: ld1.b { v0 }[4], [x9]
896
+ ; CHECK-NEXT: uaddw.4s v0, v1, v0
897
+ ; CHECK-NEXT: st1.b { v0 }[4], [x8]
898
+ ; CHECK-NEXT: st1.b { v0 }[8], [x9]
899
+ ; CHECK-NEXT: st1.b { v0 }[0], [x0]
900
+ ; CHECK-NEXT: add sp, sp, #16
901
+ ; CHECK-NEXT: ret
902
+ ; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
903
+ ;
904
+ ; BE-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
905
+ ; BE: // %bb.0:
906
+ ; BE-NEXT: sub sp, sp, #16
907
+ ; BE-NEXT: .cfi_def_cfa_offset 16
908
+ ; BE-NEXT: ldrh w9, [x0]
909
+ ; BE-NEXT: adrp x8, .LCPI22_0
910
+ ; BE-NEXT: add x8, x8, :lo12:.LCPI22_0
911
+ ; BE-NEXT: ld1 { v1.4h }, [x8]
912
+ ; BE-NEXT: strh w9, [sp, #12]
913
+ ; BE-NEXT: add x9, x0, #2
914
+ ; BE-NEXT: ldr s0, [sp, #12]
915
+ ; BE-NEXT: rev32 v0.8b, v0.8b
916
+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
917
+ ; BE-NEXT: ld1 { v0.b }[4], [x9]
918
+ ; BE-NEXT: add v0.4h, v0.4h, v1.4h
919
+ ; BE-NEXT: xtn v1.8b, v0.8h
920
+ ; BE-NEXT: umov w8, v0.h[2]
921
+ ; BE-NEXT: rev32 v1.16b, v1.16b
922
+ ; BE-NEXT: str s1, [sp, #8]
923
+ ; BE-NEXT: ldrh w9, [sp, #8]
924
+ ; BE-NEXT: strb w8, [x0, #2]
925
+ ; BE-NEXT: strh w9, [x0]
926
+ ; BE-NEXT: add sp, sp, #16
927
+ ; BE-NEXT: ret
928
+ %l = load <3 x i8 >, ptr %src , align 1
929
+ %e = zext <3 x i8 > %l to <3 x i32 >
930
+ %add = add <3 x i32 > %e , <i32 1 , i32 2 , i32 3 >
931
+ %t = trunc <3 x i32 > %add to <3 x i8 >
932
+ store <3 x i8 > %t , ptr %src
933
+ ret void
934
+ }
935
+
936
+ define void @load_v3i8_sext_to_3xi32_add_trunc_store (ptr %src ) {
937
+ ; CHECK-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
938
+ ; CHECK: ; %bb.0:
939
+ ; CHECK-NEXT: sub sp, sp, #16
940
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
941
+ ; CHECK-NEXT: ldrh w9, [x0]
942
+ ; CHECK-NEXT: Lloh6:
943
+ ; CHECK-NEXT: adrp x8, lCPI23_0@PAGE
944
+ ; CHECK-NEXT: Lloh7:
945
+ ; CHECK-NEXT: ldr q1, [x8, lCPI23_0@PAGEOFF]
946
+ ; CHECK-NEXT: add x8, x0, #1
947
+ ; CHECK-NEXT: strh w9, [sp, #12]
948
+ ; CHECK-NEXT: add x9, x0, #2
949
+ ; CHECK-NEXT: ldr s0, [sp, #12]
950
+ ; CHECK-NEXT: ushll.8h v0, v0, #0
951
+ ; CHECK-NEXT: ld1.b { v0 }[4], [x9]
952
+ ; CHECK-NEXT: uaddw.4s v0, v1, v0
953
+ ; CHECK-NEXT: st1.b { v0 }[4], [x8]
954
+ ; CHECK-NEXT: st1.b { v0 }[8], [x9]
955
+ ; CHECK-NEXT: st1.b { v0 }[0], [x0]
956
+ ; CHECK-NEXT: add sp, sp, #16
957
+ ; CHECK-NEXT: ret
958
+ ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
959
+ ;
960
+ ; BE-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
961
+ ; BE: // %bb.0:
962
+ ; BE-NEXT: sub sp, sp, #16
963
+ ; BE-NEXT: .cfi_def_cfa_offset 16
964
+ ; BE-NEXT: ldrh w9, [x0]
965
+ ; BE-NEXT: adrp x8, .LCPI23_0
966
+ ; BE-NEXT: add x8, x8, :lo12:.LCPI23_0
967
+ ; BE-NEXT: ld1 { v1.4h }, [x8]
968
+ ; BE-NEXT: strh w9, [sp, #12]
969
+ ; BE-NEXT: add x9, x0, #2
970
+ ; BE-NEXT: ldr s0, [sp, #12]
971
+ ; BE-NEXT: rev32 v0.8b, v0.8b
972
+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
973
+ ; BE-NEXT: ld1 { v0.b }[4], [x9]
974
+ ; BE-NEXT: add v0.4h, v0.4h, v1.4h
975
+ ; BE-NEXT: xtn v1.8b, v0.8h
976
+ ; BE-NEXT: umov w8, v0.h[2]
977
+ ; BE-NEXT: rev32 v1.16b, v1.16b
978
+ ; BE-NEXT: str s1, [sp, #8]
979
+ ; BE-NEXT: ldrh w9, [sp, #8]
980
+ ; BE-NEXT: strb w8, [x0, #2]
981
+ ; BE-NEXT: strh w9, [x0]
982
+ ; BE-NEXT: add sp, sp, #16
983
+ ; BE-NEXT: ret
984
+ %l = load <3 x i8 >, ptr %src , align 1
985
+ %e = sext <3 x i8 > %l to <3 x i32 >
986
+ %add = add <3 x i32 > %e , <i32 1 , i32 2 , i32 3 >
987
+ %t = trunc <3 x i32 > %add to <3 x i8 >
988
+ store <3 x i8 > %t , ptr %src
989
+ ret void
990
+ }
0 commit comments