Skip to content

Commit 05be3ca

Browse files
davemgreentstellar
authored andcommitted
[AArch64] Add BE test coverage for popcount. NFC
For llvm#129843 (cherry picked from commit b673a59)
1 parent 0e96713 commit 05be3ca

File tree

2 files changed

+265
-0
lines changed

2 files changed

+265
-0
lines changed

Diff for: llvm/test/CodeGen/AArch64/arm64-popcnt.ll

+161
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
33
; RUN: llc < %s -mtriple=aarch64 -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s
44
; RUN: llc < %s -mtriple=aarch64 -mattr +cssc -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-CSSC %s
5+
; RUN: llc < %s -mtriple=aarch64_be-none-eabi | FileCheck %s --check-prefix=CHECK-BE
56

67
define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
78
; CHECK-LABEL: cnt32_advsimd:
@@ -32,6 +33,14 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
3233
; CHECK-CSSC: // %bb.0:
3334
; CHECK-CSSC-NEXT: cnt w0, w0
3435
; CHECK-CSSC-NEXT: ret
36+
;
37+
; CHECK-BE-LABEL: cnt32_advsimd:
38+
; CHECK-BE: // %bb.0:
39+
; CHECK-BE-NEXT: fmov s0, w0
40+
; CHECK-BE-NEXT: cnt v0.8b, v0.8b
41+
; CHECK-BE-NEXT: addv b0, v0.8b
42+
; CHECK-BE-NEXT: fmov w0, s0
43+
; CHECK-BE-NEXT: ret
3544
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
3645
ret i32 %cnt
3746
}
@@ -69,6 +78,16 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
6978
; CHECK-CSSC-NEXT: fmov w8, s0
7079
; CHECK-CSSC-NEXT: cnt w0, w8
7180
; CHECK-CSSC-NEXT: ret
81+
;
82+
; CHECK-BE-LABEL: cnt32_advsimd_2:
83+
; CHECK-BE: // %bb.0:
84+
; CHECK-BE-NEXT: rev64 v0.2s, v0.2s
85+
; CHECK-BE-NEXT: fmov w8, s0
86+
; CHECK-BE-NEXT: fmov s0, w8
87+
; CHECK-BE-NEXT: cnt v0.8b, v0.8b
88+
; CHECK-BE-NEXT: addv b0, v0.8b
89+
; CHECK-BE-NEXT: fmov w0, s0
90+
; CHECK-BE-NEXT: ret
7291
%1 = extractelement <2 x i32> %x, i64 0
7392
%2 = tail call i32 @llvm.ctpop.i32(i32 %1)
7493
ret i32 %2
@@ -103,6 +122,16 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
103122
; CHECK-CSSC: // %bb.0:
104123
; CHECK-CSSC-NEXT: cnt x0, x0
105124
; CHECK-CSSC-NEXT: ret
125+
;
126+
; CHECK-BE-LABEL: cnt64_advsimd:
127+
; CHECK-BE: // %bb.0:
128+
; CHECK-BE-NEXT: fmov d0, x0
129+
; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
130+
; CHECK-BE-NEXT: cnt v0.8b, v0.8b
131+
; CHECK-BE-NEXT: addv b0, v0.8b
132+
; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
133+
; CHECK-BE-NEXT: fmov x0, d0
134+
; CHECK-BE-NEXT: ret
106135
%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
107136
ret i64 %cnt
108137
}
@@ -147,6 +176,22 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
147176
; CHECK-CSSC: // %bb.0:
148177
; CHECK-CSSC-NEXT: cnt w0, w0
149178
; CHECK-CSSC-NEXT: ret
179+
;
180+
; CHECK-BE-LABEL: cnt32:
181+
; CHECK-BE: // %bb.0:
182+
; CHECK-BE-NEXT: lsr w9, w0, #1
183+
; CHECK-BE-NEXT: mov w8, #16843009 // =0x1010101
184+
; CHECK-BE-NEXT: and w9, w9, #0x55555555
185+
; CHECK-BE-NEXT: sub w9, w0, w9
186+
; CHECK-BE-NEXT: lsr w10, w9, #2
187+
; CHECK-BE-NEXT: and w9, w9, #0x33333333
188+
; CHECK-BE-NEXT: and w10, w10, #0x33333333
189+
; CHECK-BE-NEXT: add w9, w9, w10
190+
; CHECK-BE-NEXT: add w9, w9, w9, lsr #4
191+
; CHECK-BE-NEXT: and w9, w9, #0xf0f0f0f
192+
; CHECK-BE-NEXT: mul w8, w9, w8
193+
; CHECK-BE-NEXT: lsr w0, w8, #24
194+
; CHECK-BE-NEXT: ret
150195
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
151196
ret i32 %cnt
152197
}
@@ -188,6 +233,22 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
188233
; CHECK-CSSC: // %bb.0:
189234
; CHECK-CSSC-NEXT: cnt x0, x0
190235
; CHECK-CSSC-NEXT: ret
236+
;
237+
; CHECK-BE-LABEL: cnt64:
238+
; CHECK-BE: // %bb.0:
239+
; CHECK-BE-NEXT: lsr x9, x0, #1
240+
; CHECK-BE-NEXT: mov x8, #72340172838076673 // =0x101010101010101
241+
; CHECK-BE-NEXT: and x9, x9, #0x5555555555555555
242+
; CHECK-BE-NEXT: sub x9, x0, x9
243+
; CHECK-BE-NEXT: lsr x10, x9, #2
244+
; CHECK-BE-NEXT: and x9, x9, #0x3333333333333333
245+
; CHECK-BE-NEXT: and x10, x10, #0x3333333333333333
246+
; CHECK-BE-NEXT: add x9, x9, x10
247+
; CHECK-BE-NEXT: add x9, x9, x9, lsr #4
248+
; CHECK-BE-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
249+
; CHECK-BE-NEXT: mul x8, x9, x8
250+
; CHECK-BE-NEXT: lsr x0, x8, #56
251+
; CHECK-BE-NEXT: ret
191252
%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
192253
ret i64 %cnt
193254
}
@@ -215,6 +276,14 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
215276
; CHECK-CSSC-NEXT: cmp x8, #1
216277
; CHECK-CSSC-NEXT: cset w0, eq
217278
; CHECK-CSSC-NEXT: ret
279+
;
280+
; CHECK-BE-LABEL: ctpop_eq_one:
281+
; CHECK-BE: // %bb.0:
282+
; CHECK-BE-NEXT: sub x8, x0, #1
283+
; CHECK-BE-NEXT: eor x9, x0, x8
284+
; CHECK-BE-NEXT: cmp x9, x8
285+
; CHECK-BE-NEXT: cset w0, hi
286+
; CHECK-BE-NEXT: ret
218287
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
219288
%cmp = icmp eq i64 %count, 1
220289
%conv = zext i1 %cmp to i32
@@ -244,6 +313,14 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
244313
; CHECK-CSSC-NEXT: cmp x8, #1
245314
; CHECK-CSSC-NEXT: cset w0, ne
246315
; CHECK-CSSC-NEXT: ret
316+
;
317+
; CHECK-BE-LABEL: ctpop_ne_one:
318+
; CHECK-BE: // %bb.0:
319+
; CHECK-BE-NEXT: sub x8, x0, #1
320+
; CHECK-BE-NEXT: eor x9, x0, x8
321+
; CHECK-BE-NEXT: cmp x9, x8
322+
; CHECK-BE-NEXT: cset w0, ls
323+
; CHECK-BE-NEXT: ret
247324
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
248325
%cmp = icmp ne i64 %count, 1
249326
%conv = zext i1 %cmp to i32
@@ -273,6 +350,14 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
273350
; CHECK-CSSC-NEXT: cmp w8, #1
274351
; CHECK-CSSC-NEXT: cset w0, ne
275352
; CHECK-CSSC-NEXT: ret
353+
;
354+
; CHECK-BE-LABEL: ctpop32_ne_one:
355+
; CHECK-BE: // %bb.0:
356+
; CHECK-BE-NEXT: sub w8, w0, #1
357+
; CHECK-BE-NEXT: eor w9, w0, w8
358+
; CHECK-BE-NEXT: cmp w9, w8
359+
; CHECK-BE-NEXT: cset w0, ls
360+
; CHECK-BE-NEXT: ret
276361
%count = tail call i32 @llvm.ctpop.i32(i32 %x)
277362
%cmp = icmp ne i32 %count, 1
278363
ret i1 %cmp
@@ -299,6 +384,13 @@ define i1 @ctpop32_eq_one_nonzero(i32 %x) {
299384
; CHECK-CSSC-NEXT: tst w0, w8
300385
; CHECK-CSSC-NEXT: cset w0, eq
301386
; CHECK-CSSC-NEXT: ret
387+
;
388+
; CHECK-BE-LABEL: ctpop32_eq_one_nonzero:
389+
; CHECK-BE: // %bb.0: // %entry
390+
; CHECK-BE-NEXT: sub w8, w0, #1
391+
; CHECK-BE-NEXT: tst w0, w8
392+
; CHECK-BE-NEXT: cset w0, eq
393+
; CHECK-BE-NEXT: ret
302394
entry:
303395
%popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
304396
%cmp = icmp eq i32 %popcnt, 1
@@ -326,11 +418,80 @@ define i1 @ctpop32_ne_one_nonzero(i32 %x) {
326418
; CHECK-CSSC-NEXT: tst w0, w8
327419
; CHECK-CSSC-NEXT: cset w0, ne
328420
; CHECK-CSSC-NEXT: ret
421+
;
422+
; CHECK-BE-LABEL: ctpop32_ne_one_nonzero:
423+
; CHECK-BE: // %bb.0: // %entry
424+
; CHECK-BE-NEXT: sub w8, w0, #1
425+
; CHECK-BE-NEXT: tst w0, w8
426+
; CHECK-BE-NEXT: cset w0, ne
427+
; CHECK-BE-NEXT: ret
329428
entry:
330429
%popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
331430
%cmp = icmp ne i32 %popcnt, 1
332431
ret i1 %cmp
333432
}
334433

434+
define i128 @cnt128(i128 %x) nounwind readnone {
435+
; CHECK-LABEL: cnt128:
436+
; CHECK: // %bb.0:
437+
; CHECK-NEXT: fmov d0, x0
438+
; CHECK-NEXT: mov.d v0[1], x1
439+
; CHECK-NEXT: cnt.16b v0, v0
440+
; CHECK-NEXT: addv.16b b0, v0
441+
; CHECK-NEXT: mov.d x1, v0[1]
442+
; CHECK-NEXT: fmov x0, d0
443+
; CHECK-NEXT: ret
444+
;
445+
; CHECK-NONEON-LABEL: cnt128:
446+
; CHECK-NONEON: // %bb.0:
447+
; CHECK-NONEON-NEXT: lsr x9, x0, #1
448+
; CHECK-NONEON-NEXT: lsr x10, x1, #1
449+
; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101
450+
; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
451+
; CHECK-NONEON-NEXT: and x10, x10, #0x5555555555555555
452+
; CHECK-NONEON-NEXT: sub x9, x0, x9
453+
; CHECK-NONEON-NEXT: sub x10, x1, x10
454+
; CHECK-NONEON-NEXT: mov x1, xzr
455+
; CHECK-NONEON-NEXT: lsr x11, x9, #2
456+
; CHECK-NONEON-NEXT: lsr x12, x10, #2
457+
; CHECK-NONEON-NEXT: and x9, x9, #0x3333333333333333
458+
; CHECK-NONEON-NEXT: and x10, x10, #0x3333333333333333
459+
; CHECK-NONEON-NEXT: and x11, x11, #0x3333333333333333
460+
; CHECK-NONEON-NEXT: add x9, x9, x11
461+
; CHECK-NONEON-NEXT: and x11, x12, #0x3333333333333333
462+
; CHECK-NONEON-NEXT: add x9, x9, x9, lsr #4
463+
; CHECK-NONEON-NEXT: add x10, x10, x11
464+
; CHECK-NONEON-NEXT: add x10, x10, x10, lsr #4
465+
; CHECK-NONEON-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
466+
; CHECK-NONEON-NEXT: mul x9, x9, x8
467+
; CHECK-NONEON-NEXT: and x10, x10, #0xf0f0f0f0f0f0f0f
468+
; CHECK-NONEON-NEXT: mul x8, x10, x8
469+
; CHECK-NONEON-NEXT: lsr x9, x9, #56
470+
; CHECK-NONEON-NEXT: add x0, x9, x8, lsr #56
471+
; CHECK-NONEON-NEXT: ret
472+
;
473+
; CHECK-CSSC-LABEL: cnt128:
474+
; CHECK-CSSC: // %bb.0:
475+
; CHECK-CSSC-NEXT: cnt x8, x1
476+
; CHECK-CSSC-NEXT: cnt x9, x0
477+
; CHECK-CSSC-NEXT: mov x1, xzr
478+
; CHECK-CSSC-NEXT: add x0, x9, x8
479+
; CHECK-CSSC-NEXT: ret
480+
;
481+
; CHECK-BE-LABEL: cnt128:
482+
; CHECK-BE: // %bb.0:
483+
; CHECK-BE-NEXT: fmov d0, x0
484+
; CHECK-BE-NEXT: mov v0.d[1], x1
485+
; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
486+
; CHECK-BE-NEXT: cnt v0.16b, v0.16b
487+
; CHECK-BE-NEXT: addv b0, v0.16b
488+
; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
489+
; CHECK-BE-NEXT: mov x1, v0.d[1]
490+
; CHECK-BE-NEXT: fmov x0, d0
491+
; CHECK-BE-NEXT: ret
492+
%cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
493+
ret i128 %cnt
494+
}
495+
335496
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
336497
declare i64 @llvm.ctpop.i64(i64) nounwind readnone

0 commit comments

Comments
 (0)