Skip to content

Commit ce3e7eb

Browse files
committed
[AArch64][SVE] Fix bad PTEST(PG, OP(PG, ...)) optimization
AArch64InstrInfo::optimizePTestInstr attempts to remove a PTEST of a predicate generating operation that identically sets flags (implictly). When the PTEST and the predicate-generating operation use the same mask the PTEST is currently removed. This is incorrect since it doesn't consider element size. PTEST operates on 8-bit predicates, but for instructions like compare that also support 16/32/64-bit predicates, the implicit PTEST performed by the instruction will consider fewer lanes for these element sizes and could set different first or last active flags. For example, consider the following instruction sequence ptrue p0.b ; P0=1111-1111-1111-1111 index z0.s, #0, #1 ; Z0=<0,1,2,3> index z1.s, #1, #1 ; Z1=<1,2,3,4> cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001 ; ^ last active ptest p0, p1.b ; P1=0001-0001-0001-0001 ; ^ last active where the compare generates a canonical all active 32-bit predicate (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last active flag, whereas the PTEST instruction with the same mask doesn't. This patch restricts the optimization to instructions operating on 8-bit predicates. One caveat is the optimization is safe regardless of element size for any active, this will be addressed in a later patch. Reviewed By: bsmith Differential Revision: https://reviews.llvm.org/D137716
1 parent 3ddd5a8 commit ce3e7eb

14 files changed

+94
-40
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1325,10 +1325,30 @@ bool AArch64InstrInfo::optimizePTestInstr(
13251325

13261326
// Fallthough to simply remove the PTEST.
13271327
} else if (PredIsPTestLike) {
1328-
// For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both
1329-
// instructions use the same predicate.
1328+
// For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1329+
// flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1330+
// on 8-bit predicates like the PTEST. Otherwise, for instructions like
1331+
// compare that also support 16/32/64-bit predicates, the implicit PTEST
1332+
// performed by the compare could consider fewer lanes for these element
1333+
// sizes.
1334+
//
1335+
// For example, consider
1336+
//
1337+
// ptrue p0.b ; P0=1111-1111-1111-1111
1338+
// index z0.s, #0, #1 ; Z0=<0,1,2,3>
1339+
// index z1.s, #1, #1 ; Z1=<1,2,3,4>
1340+
// cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1341+
// ; ^ last active
1342+
// ptest p0, p1.b ; P1=0001-0001-0001-0001
1343+
// ; ^ last active
1344+
//
1345+
// where the compare generates a canonical all active 32-bit predicate
1346+
// (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1347+
// active flag, whereas the PTEST instruction with the same mask doesn't.
13301348
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1331-
if (Mask != PTestLikeMask)
1349+
uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1350+
if ((Mask != PTestLikeMask) ||
1351+
(PredElementSize != AArch64::ElementSizeB))
13321352
return false;
13331353

13341354
// Fallthough to simply remove the PTEST.

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ define i32 @cmpeq_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
2020
define i32 @cmpeq_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2121
; CHECK-LABEL: cmpeq_nxv4i32:
2222
; CHECK: // %bb.0:
23-
; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
23+
; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
24+
; CHECK-NEXT: ptest p0, p1.b
2425
; CHECK-NEXT: cset w0, ne
2526
; CHECK-NEXT: ret
2627
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -65,7 +66,8 @@ define i32 @cmpeq_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
6566
define i32 @cmpeq_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
6667
; CHECK-LABEL: cmpeq_wide_nxv8i16:
6768
; CHECK: // %bb.0:
68-
; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.d
69+
; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.d
70+
; CHECK-NEXT: ptest p0, p1.b
6971
; CHECK-NEXT: cset w0, ne
7072
; CHECK-NEXT: ret
7173
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -79,7 +81,8 @@ define i32 @cmpeq_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
7981
define i32 @cmpeq_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
8082
; CHECK-LABEL: cmpeq_wide_nxv4i32:
8183
; CHECK: // %bb.0:
82-
; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.d
84+
; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.d
85+
; CHECK-NEXT: ptest p0, p1.b
8386
; CHECK-NEXT: cset w0, ne
8487
; CHECK-NEXT: ret
8588
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ body: |
6565
liveins: $p0, $z0, $z1
6666
6767
; CHECK-LABEL: name: cmpeq_nxv8i16
68-
; CHECK-NOT: PTEST
68+
; CHECK: PTEST
6969
%2:zpr = COPY $z1
7070
%1:zpr = COPY $z0
7171
%0:ppr_3b = COPY $p0
@@ -101,7 +101,7 @@ body: |
101101
liveins: $p0, $z0, $z1
102102
103103
; CHECK-LABEL: name: cmpeq_nxv4i32
104-
; CHECK-NOT: PTEST
104+
; CHECK: PTEST
105105
%2:zpr = COPY $z1
106106
%1:zpr = COPY $z0
107107
%0:ppr_3b = COPY $p0
@@ -137,7 +137,7 @@ body: |
137137
liveins: $p0, $z0, $z1
138138
139139
; CHECK-LABEL: name: cmpeq_nxv2i64
140-
; CHECK-NOT: PTEST
140+
; CHECK: PTEST
141141
%2:zpr = COPY $z1
142142
%1:zpr = COPY $z0
143143
%0:ppr_3b = COPY $p0
@@ -204,7 +204,7 @@ body: |
204204
liveins: $p0, $z0
205205
206206
; CHECK-LABEL: name: cmpeq_imm_nxv8i16
207-
; CHECK-NOT: PTEST
207+
; CHECK: PTEST
208208
%1:zpr = COPY $z0
209209
%0:ppr_3b = COPY $p0
210210
%2:ppr = CMPEQ_PPzZI_H %0, %1, 0, implicit-def dead $nzcv
@@ -237,7 +237,7 @@ body: |
237237
liveins: $p0, $z0
238238
239239
; CHECK-LABEL: name: cmpeq_imm_nxv4i32
240-
; CHECK-NOT: PTEST
240+
; CHECK: PTEST
241241
%1:zpr = COPY $z0
242242
%0:ppr_3b = COPY $p0
243243
%2:ppr = CMPEQ_PPzZI_S %0, %1, 0, implicit-def dead $nzcv
@@ -270,7 +270,7 @@ body: |
270270
liveins: $p0, $z0
271271
272272
; CHECK-LABEL: name: cmpeq_imm_nxv2i64
273-
; CHECK-NOT: PTEST
273+
; CHECK: PTEST
274274
%1:zpr = COPY $z0
275275
%0:ppr_3b = COPY $p0
276276
%2:ppr = CMPEQ_PPzZI_D %0, %1, 0, implicit-def dead $nzcv
@@ -339,7 +339,7 @@ body: |
339339
liveins: $p0, $z0, $z1
340340
341341
; CHECK-LABEL: name: cmpeq_wide_nxv8i16
342-
; CHECK-NOT: PTEST
342+
; CHECK: PTEST
343343
%2:zpr = COPY $z1
344344
%1:zpr = COPY $z0
345345
%0:ppr_3b = COPY $p0
@@ -375,7 +375,7 @@ body: |
375375
liveins: $p0, $z0, $z1
376376
377377
; CHECK-LABEL: name: cmpeq_wide_nxv4i32
378-
; CHECK-NOT: PTEST
378+
; CHECK: PTEST
379379
%2:zpr = COPY $z1
380380
%1:zpr = COPY $z0
381381
%0:ppr_3b = COPY $p0

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ define i32 @cmpge_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
2020
define i32 @cmpge_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2121
; CHECK-LABEL: cmpge_nxv4i32:
2222
; CHECK: // %bb.0:
23-
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
23+
; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
24+
; CHECK-NEXT: ptest p0, p1.b
2425
; CHECK-NEXT: cset w0, ne
2526
; CHECK-NEXT: ret
2627
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -65,7 +66,8 @@ define i32 @cmpge_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
6566
define i32 @cmpge_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
6667
; CHECK-LABEL: cmpge_wide_nxv8i16:
6768
; CHECK: // %bb.0:
68-
; CHECK-NEXT: cmpge p0.h, p0/z, z0.h, z1.d
69+
; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, z1.d
70+
; CHECK-NEXT: ptest p0, p1.b
6971
; CHECK-NEXT: cset w0, ne
7072
; CHECK-NEXT: ret
7173
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -79,7 +81,8 @@ define i32 @cmpge_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
7981
define i32 @cmpge_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
8082
; CHECK-LABEL: cmpge_wide_nxv4i32:
8183
; CHECK: // %bb.0:
82-
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.d
84+
; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.d
85+
; CHECK-NEXT: ptest p0, p1.b
8386
; CHECK-NEXT: cset w0, ne
8487
; CHECK-NEXT: ret
8588
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ define i32 @cmpgt_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
2020
define i32 @cmpgt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2121
; CHECK-LABEL: cmpgt_nxv4i32:
2222
; CHECK: // %bb.0:
23-
; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.s
23+
; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.s
24+
; CHECK-NEXT: ptest p0, p1.b
2425
; CHECK-NEXT: cset w0, ne
2526
; CHECK-NEXT: ret
2627
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -65,7 +66,8 @@ define i32 @cmpgt_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
6566
define i32 @cmpgt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
6667
; CHECK-LABEL: cmpgt_wide_nxv8i16:
6768
; CHECK: // %bb.0:
68-
; CHECK-NEXT: cmpgt p0.h, p0/z, z0.h, z1.d
69+
; CHECK-NEXT: cmpgt p1.h, p0/z, z0.h, z1.d
70+
; CHECK-NEXT: ptest p0, p1.b
6971
; CHECK-NEXT: cset w0, ne
7072
; CHECK-NEXT: ret
7173
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -79,7 +81,8 @@ define i32 @cmpgt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
7981
define i32 @cmpgt_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
8082
; CHECK-LABEL: cmpgt_wide_nxv4i32:
8183
; CHECK: // %bb.0:
82-
; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.d
84+
; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.d
85+
; CHECK-NEXT: ptest p0, p1.b
8386
; CHECK-NEXT: cset w0, ne
8487
; CHECK-NEXT: ret
8588
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ define i32 @cmphi_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
2020
define i32 @cmphi_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2121
; CHECK-LABEL: cmphi_nxv4i32:
2222
; CHECK: // %bb.0:
23-
; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s
23+
; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.s
24+
; CHECK-NEXT: ptest p0, p1.b
2425
; CHECK-NEXT: cset w0, ne
2526
; CHECK-NEXT: ret
2627
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -66,7 +67,8 @@ define i32 @cmphi_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
6667
define i32 @cmphi_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
6768
; CHECK-LABEL: cmphi_wide_nxv8i16:
6869
; CHECK: // %bb.0:
69-
; CHECK-NEXT: cmphi p0.h, p0/z, z0.h, z1.d
70+
; CHECK-NEXT: cmphi p1.h, p0/z, z0.h, z1.d
71+
; CHECK-NEXT: ptest p0, p1.b
7072
; CHECK-NEXT: cset w0, ne
7173
; CHECK-NEXT: ret
7274
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -80,7 +82,8 @@ define i32 @cmphi_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
8082
define i32 @cmphi_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
8183
; CHECK-LABEL: cmphi_wide_nxv4i32:
8284
; CHECK: // %bb.0:
83-
; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.d
85+
; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.d
86+
; CHECK-NEXT: ptest p0, p1.b
8487
; CHECK-NEXT: cset w0, ne
8588
; CHECK-NEXT: ret
8689
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ define i32 @cmphs_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
2020
define i32 @cmphs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2121
; CHECK-LABEL: cmphs_nxv4i32:
2222
; CHECK: // %bb.0:
23-
; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.s
23+
; CHECK-NEXT: cmphs p1.s, p0/z, z0.s, z1.s
24+
; CHECK-NEXT: ptest p0, p1.b
2425
; CHECK-NEXT: cset w0, ne
2526
; CHECK-NEXT: ret
2627
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -65,7 +66,8 @@ define i32 @cmphs_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
6566
define i32 @cmphs_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
6667
; CHECK-LABEL: cmphs_wide_nxv8i16:
6768
; CHECK: // %bb.0:
68-
; CHECK-NEXT: cmphs p0.h, p0/z, z0.h, z1.d
69+
; CHECK-NEXT: cmphs p1.h, p0/z, z0.h, z1.d
70+
; CHECK-NEXT: ptest p0, p1.b
6971
; CHECK-NEXT: cset w0, ne
7072
; CHECK-NEXT: ret
7173
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -79,7 +81,8 @@ define i32 @cmphs_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
7981
define i32 @cmphs_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
8082
; CHECK-LABEL: cmphs_wide_nxv4i32:
8183
; CHECK: // %bb.0:
82-
; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.d
84+
; CHECK-NEXT: cmphs p1.s, p0/z, z0.s, z1.d
85+
; CHECK-NEXT: ptest p0, p1.b
8386
; CHECK-NEXT: cset w0, ne
8487
; CHECK-NEXT: ret
8588
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ define i32 @cmple_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
3737
define i32 @cmple_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
3838
; CHECK-LABEL: cmple_wide_nxv8i16:
3939
; CHECK: // %bb.0:
40-
; CHECK-NEXT: cmple p0.h, p0/z, z0.h, z1.d
40+
; CHECK-NEXT: cmple p1.h, p0/z, z0.h, z1.d
41+
; CHECK-NEXT: ptest p0, p1.b
4142
; CHECK-NEXT: cset w0, ne
4243
; CHECK-NEXT: ret
4344
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -51,7 +52,8 @@ define i32 @cmple_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
5152
define i32 @cmple_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
5253
; CHECK-LABEL: cmple_wide_nxv4i32:
5354
; CHECK: // %bb.0:
54-
; CHECK-NEXT: cmple p0.s, p0/z, z0.s, z1.d
55+
; CHECK-NEXT: cmple p1.s, p0/z, z0.s, z1.d
56+
; CHECK-NEXT: ptest p0, p1.b
5557
; CHECK-NEXT: cset w0, ne
5658
; CHECK-NEXT: ret
5759
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -114,7 +116,8 @@ define i1 @cmp8_ptest_any_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vsc
114116
define i1 @cmp32_ptest_first_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
115117
; CHECK-LABEL: cmp32_ptest_first_px:
116118
; CHECK: // %bb.0:
117-
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
119+
; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
120+
; CHECK-NEXT: ptest p0, p1.b
118121
; CHECK-NEXT: cset w0, mi
119122
; CHECK-NEXT: ret
120123
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -130,7 +133,8 @@ define i1 @cmp32_ptest_first_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <
130133
define i1 @cmp32_ptest_last_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
131134
; CHECK-LABEL: cmp32_ptest_last_px:
132135
; CHECK: // %bb.0:
133-
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
136+
; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
137+
; CHECK-NEXT: ptest p0, p1.b
134138
; CHECK-NEXT: cset w0, lo
135139
; CHECK-NEXT: ret
136140
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -146,7 +150,8 @@ define i1 @cmp32_ptest_last_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <v
146150
define i1 @cmp32_ptest_any_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
147151
; CHECK-LABEL: cmp32_ptest_any_px:
148152
; CHECK: // %bb.0:
149-
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
153+
; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
154+
; CHECK-NEXT: ptest p0, p1.b
150155
; CHECK-NEXT: cset w0, ne
151156
; CHECK-NEXT: ret
152157
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ define i32 @cmplo_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
3737
define i32 @cmplo_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
3838
; CHECK-LABEL: cmplo_wide_nxv8i16:
3939
; CHECK: // %bb.0:
40-
; CHECK-NEXT: cmplo p0.h, p0/z, z0.h, z1.d
40+
; CHECK-NEXT: cmplo p1.h, p0/z, z0.h, z1.d
41+
; CHECK-NEXT: ptest p0, p1.b
4142
; CHECK-NEXT: cset w0, ne
4243
; CHECK-NEXT: ret
4344
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -51,7 +52,8 @@ define i32 @cmplo_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
5152
define i32 @cmplo_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
5253
; CHECK-LABEL: cmplo_wide_nxv4i32:
5354
; CHECK: // %bb.0:
54-
; CHECK-NEXT: cmplo p0.s, p0/z, z0.s, z1.d
55+
; CHECK-NEXT: cmplo p1.s, p0/z, z0.s, z1.d
56+
; CHECK-NEXT: ptest p0, p1.b
5557
; CHECK-NEXT: cset w0, ne
5658
; CHECK-NEXT: ret
5759
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ define i32 @cmpls_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
3737
define i32 @cmpls_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
3838
; CHECK-LABEL: cmpls_wide_nxv8i16:
3939
; CHECK: // %bb.0:
40-
; CHECK-NEXT: cmpls p0.h, p0/z, z0.h, z1.d
40+
; CHECK-NEXT: cmpls p1.h, p0/z, z0.h, z1.d
41+
; CHECK-NEXT: ptest p0, p1.b
4142
; CHECK-NEXT: cset w0, ne
4243
; CHECK-NEXT: ret
4344
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -51,7 +52,8 @@ define i32 @cmpls_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
5152
define i32 @cmpls_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
5253
; CHECK-LABEL: cmpls_wide_nxv4i32:
5354
; CHECK: // %bb.0:
54-
; CHECK-NEXT: cmpls p0.s, p0/z, z0.s, z1.d
55+
; CHECK-NEXT: cmpls p1.s, p0/z, z0.s, z1.d
56+
; CHECK-NEXT: ptest p0, p1.b
5557
; CHECK-NEXT: cset w0, ne
5658
; CHECK-NEXT: ret
5759
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ define i32 @cmplt_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
3737
define i32 @cmplt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
3838
; CHECK-LABEL: cmplt_wide_nxv8i16:
3939
; CHECK: // %bb.0:
40-
; CHECK-NEXT: cmplt p0.h, p0/z, z0.h, z1.d
40+
; CHECK-NEXT: cmplt p1.h, p0/z, z0.h, z1.d
41+
; CHECK-NEXT: ptest p0, p1.b
4142
; CHECK-NEXT: cset w0, ne
4243
; CHECK-NEXT: ret
4344
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -51,7 +52,8 @@ define i32 @cmplt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
5152
define i32 @cmplt_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
5253
; CHECK-LABEL: cmplt_wide_nxv4i32:
5354
; CHECK: // %bb.0:
54-
; CHECK-NEXT: cmplt p0.s, p0/z, z0.s, z1.d
55+
; CHECK-NEXT: cmplt p1.s, p0/z, z0.s, z1.d
56+
; CHECK-NEXT: ptest p0, p1.b
5557
; CHECK-NEXT: cset w0, ne
5658
; CHECK-NEXT: ret
5759
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ define i32 @cmpne_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
2020
define i32 @cmpne_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2121
; CHECK-LABEL: cmpne_nxv4i32:
2222
; CHECK: // %bb.0:
23-
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.s
23+
; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, z1.s
24+
; CHECK-NEXT: ptest p0, p1.b
2425
; CHECK-NEXT: cset w0, ne
2526
; CHECK-NEXT: ret
2627
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -65,7 +66,8 @@ define i32 @cmpne_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
6566
define i32 @cmpne_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
6667
; CHECK-LABEL: cmpne_wide_nxv8i16:
6768
; CHECK: // %bb.0:
68-
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z1.d
69+
; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, z1.d
70+
; CHECK-NEXT: ptest p0, p1.b
6971
; CHECK-NEXT: cset w0, ne
7072
; CHECK-NEXT: ret
7173
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -79,7 +81,8 @@ define i32 @cmpne_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
7981
define i32 @cmpne_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
8082
; CHECK-LABEL: cmpne_wide_nxv4i32:
8183
; CHECK: // %bb.0:
82-
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.d
84+
; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, z1.d
85+
; CHECK-NEXT: ptest p0, p1.b
8386
; CHECK-NEXT: cset w0, ne
8487
; CHECK-NEXT: ret
8588
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

0 commit comments

Comments
 (0)