Skip to content

Commit 44a1040

Browse files
committed
[AArch64] Use INDEX for constant Neon step vectors
When compiling for an SVE target we can use INDEX to generate constant fixed-length step vectors. The logic for this was already in `LowerBUILD_VECTOR`, though it was hidden under `!Subtarget->isNeonAvailable()`. This patch refactors this to enable the corresponding code path unconditionally for constant step vectors (as long as we can use SVE for them).
1 parent a80656b commit 44a1040

File tree

3 files changed

+40
-37
lines changed

3 files changed

+40
-37
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14512,7 +14512,9 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
1451214512
SelectionDAG &DAG) const {
1451314513
EVT VT = Op.getValueType();
1451414514

14515-
if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
14515+
bool OverrideNEON = !Subtarget->isNeonAvailable() ||
14516+
cast<BuildVectorSDNode>(Op)->isConstantSequence();
14517+
if (useSVEForFixedLengthVectorVT(VT, OverrideNEON))
1451614518
return LowerFixedLengthBuildVectorToSVE(Op, DAG);
1451714519

1451814520
// Try to build a simple constant vector.

llvm/test/CodeGen/AArch64/active_lane_mask.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -430,10 +430,9 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) {
430430
define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
431431
; CHECK-LABEL: lane_mask_v16i1_i8:
432432
; CHECK: // %bb.0:
433-
; CHECK-NEXT: adrp x8, .LCPI24_0
434-
; CHECK-NEXT: dup v0.16b, w0
435-
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0]
436-
; CHECK-NEXT: uqadd v0.16b, v0.16b, v1.16b
433+
; CHECK-NEXT: index z0.b, #0, #1
434+
; CHECK-NEXT: dup v1.16b, w0
435+
; CHECK-NEXT: uqadd v0.16b, v1.16b, v0.16b
437436
; CHECK-NEXT: dup v1.16b, w1
438437
; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b
439438
; CHECK-NEXT: ret
@@ -444,10 +443,9 @@ define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
444443
define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
445444
; CHECK-LABEL: lane_mask_v8i1_i8:
446445
; CHECK: // %bb.0:
447-
; CHECK-NEXT: dup v0.8b, w0
448-
; CHECK-NEXT: adrp x8, .LCPI25_0
449-
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI25_0]
450-
; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b
446+
; CHECK-NEXT: index z0.b, #0, #1
447+
; CHECK-NEXT: dup v1.8b, w0
448+
; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b
451449
; CHECK-NEXT: dup v1.8b, w1
452450
; CHECK-NEXT: cmhi v0.8b, v1.8b, v0.8b
453451
; CHECK-NEXT: ret
@@ -459,9 +457,8 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
459457
; CHECK-LABEL: lane_mask_v4i1_i8:
460458
; CHECK: // %bb.0:
461459
; CHECK-NEXT: dup v0.4h, w0
462-
; CHECK-NEXT: adrp x8, .LCPI26_0
460+
; CHECK-NEXT: index z1.h, #0, #1
463461
; CHECK-NEXT: movi d2, #0xff00ff00ff00ff
464-
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI26_0]
465462
; CHECK-NEXT: dup v3.4h, w1
466463
; CHECK-NEXT: bic v0.4h, #255, lsl #8
467464
; CHECK-NEXT: bic v3.4h, #255, lsl #8
@@ -478,8 +475,7 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
478475
; CHECK: // %bb.0:
479476
; CHECK-NEXT: movi d0, #0x0000ff000000ff
480477
; CHECK-NEXT: dup v1.2s, w0
481-
; CHECK-NEXT: adrp x8, .LCPI27_0
482-
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI27_0]
478+
; CHECK-NEXT: index z2.s, #0, #1
483479
; CHECK-NEXT: dup v3.2s, w1
484480
; CHECK-NEXT: and v1.8b, v1.8b, v0.8b
485481
; CHECK-NEXT: add v1.2s, v1.2s, v2.2s

llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,35 +6,35 @@
66
define <16 x i8> @v16i8() #0 {
77
; CHECK-LABEL: v16i8:
88
; CHECK: // %bb.0:
9-
; CHECK-NEXT: adrp x8, .LCPI0_0
10-
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
9+
; CHECK-NEXT: index z0.b, #0, #1
10+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
1111
; CHECK-NEXT: ret
1212
ret <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
1313
}
1414

1515
define <8 x i16> @v8i16() #0 {
1616
; CHECK-LABEL: v8i16:
1717
; CHECK: // %bb.0:
18-
; CHECK-NEXT: adrp x8, .LCPI1_0
19-
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
18+
; CHECK-NEXT: index z0.h, #0, #1
19+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
2020
; CHECK-NEXT: ret
2121
ret <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
2222
}
2323

2424
define <4 x i32> @v4i32() #0 {
2525
; CHECK-LABEL: v4i32:
2626
; CHECK: // %bb.0:
27-
; CHECK-NEXT: adrp x8, .LCPI2_0
28-
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
27+
; CHECK-NEXT: index z0.s, #0, #1
28+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
2929
; CHECK-NEXT: ret
3030
ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3131
}
3232

3333
define <2 x i64> @v2i64() #0 {
3434
; CHECK-LABEL: v2i64:
3535
; CHECK: // %bb.0:
36-
; CHECK-NEXT: adrp x8, .LCPI3_0
37-
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_0]
36+
; CHECK-NEXT: index z0.d, #0, #1
37+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
3838
; CHECK-NEXT: ret
3939
ret <2 x i64> <i64 0, i64 1>
4040
}
@@ -44,26 +44,26 @@ define <2 x i64> @v2i64() #0 {
4444
define <8 x i8> @v8i8() #0 {
4545
; CHECK-LABEL: v8i8:
4646
; CHECK: // %bb.0:
47-
; CHECK-NEXT: adrp x8, .LCPI4_0
48-
; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI4_0]
47+
; CHECK-NEXT: index z0.b, #0, #1
48+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
4949
; CHECK-NEXT: ret
5050
ret <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
5151
}
5252

5353
define <4 x i16> @v4i16() #0 {
5454
; CHECK-LABEL: v4i16:
5555
; CHECK: // %bb.0:
56-
; CHECK-NEXT: adrp x8, .LCPI5_0
57-
; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI5_0]
56+
; CHECK-NEXT: index z0.h, #0, #1
57+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
5858
; CHECK-NEXT: ret
5959
ret <4 x i16> <i16 0, i16 1, i16 2, i16 3>
6060
}
6161

6262
define <2 x i32> @v2i32() #0 {
6363
; CHECK-LABEL: v2i32:
6464
; CHECK: // %bb.0:
65-
; CHECK-NEXT: adrp x8, .LCPI6_0
66-
; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI6_0]
65+
; CHECK-NEXT: index z0.s, #0, #1
66+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
6767
; CHECK-NEXT: ret
6868
ret <2 x i32> <i32 0, i32 1>
6969
}
@@ -73,8 +73,9 @@ define <2 x i32> @v2i32() #0 {
7373
define <4 x i32> @v4i32_non_zero_non_one() #0 {
7474
; CHECK-LABEL: v4i32_non_zero_non_one:
7575
; CHECK: // %bb.0:
76-
; CHECK-NEXT: adrp x8, .LCPI7_0
77-
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI7_0]
76+
; CHECK-NEXT: index z0.s, #0, #2
77+
; CHECK-NEXT: orr z0.s, z0.s, #0x1
78+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
7879
; CHECK-NEXT: ret
7980
ret <4 x i32> <i32 1, i32 3, i32 5, i32 7>
8081
}
@@ -83,8 +84,8 @@ define <4 x i32> @v4i32_non_zero_non_one() #0 {
8384
define <4 x i32> @v4i32_neg_immediates() #0 {
8485
; CHECK-LABEL: v4i32_neg_immediates:
8586
; CHECK: // %bb.0:
86-
; CHECK-NEXT: adrp x8, .LCPI8_0
87-
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
87+
; CHECK-NEXT: index z0.s, #-1, #-2
88+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
8889
; CHECK-NEXT: ret
8990
ret <4 x i32> <i32 -1, i32 -3, i32 -5, i32 -7>
9091
}
@@ -93,8 +94,9 @@ define <4 x i32> @v4i32_neg_immediates() #0 {
9394
define <4 x i32> @v4i32_out_range_start() #0 {
9495
; CHECK-LABEL: v4i32_out_range_start:
9596
; CHECK: // %bb.0:
96-
; CHECK-NEXT: adrp x8, .LCPI9_0
97-
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI9_0]
97+
; CHECK-NEXT: index z0.s, #0, #1
98+
; CHECK-NEXT: add z0.s, z0.s, #16 // =0x10
99+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
98100
; CHECK-NEXT: ret
99101
ret <4 x i32> <i32 16, i32 17, i32 18, i32 19>
100102
}
@@ -103,8 +105,9 @@ define <4 x i32> @v4i32_out_range_start() #0 {
103105
define <4 x i32> @v4i32_out_range_step() #0 {
104106
; CHECK-LABEL: v4i32_out_range_step:
105107
; CHECK: // %bb.0:
106-
; CHECK-NEXT: adrp x8, .LCPI10_0
107-
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0]
108+
; CHECK-NEXT: mov w8, #16 // =0x10
109+
; CHECK-NEXT: index z0.s, #0, w8
110+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
108111
; CHECK-NEXT: ret
109112
ret <4 x i32> <i32 0, i32 16, i32 32, i32 48>
110113
}
@@ -113,8 +116,10 @@ define <4 x i32> @v4i32_out_range_step() #0 {
113116
define <4 x i32> @v4i32_out_range_start_step() #0 {
114117
; CHECK-LABEL: v4i32_out_range_start_step:
115118
; CHECK: // %bb.0:
116-
; CHECK-NEXT: adrp x8, .LCPI11_0
117-
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI11_0]
119+
; CHECK-NEXT: mov w8, #16 // =0x10
120+
; CHECK-NEXT: index z0.s, #0, w8
121+
; CHECK-NEXT: add z0.s, z0.s, #16 // =0x10
122+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
118123
; CHECK-NEXT: ret
119124
ret <4 x i32> <i32 16, i32 32, i32 48, i32 64>
120125
}

0 commit comments

Comments
 (0)