2
2
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
3
3
; RUN: llc < %s -mtriple=aarch64 -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s
4
4
; RUN: llc < %s -mtriple=aarch64 -mattr +cssc -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-CSSC %s
5
+ ; RUN: llc < %s -mtriple=aarch64_be-none-eabi | FileCheck %s --check-prefix=CHECK-BE
5
6
6
7
define i32 @cnt32_advsimd (i32 %x ) nounwind readnone {
7
8
; CHECK-LABEL: cnt32_advsimd:
@@ -32,6 +33,14 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
32
33
; CHECK-CSSC: // %bb.0:
33
34
; CHECK-CSSC-NEXT: cnt w0, w0
34
35
; CHECK-CSSC-NEXT: ret
36
+ ;
37
+ ; CHECK-BE-LABEL: cnt32_advsimd:
38
+ ; CHECK-BE: // %bb.0:
39
+ ; CHECK-BE-NEXT: fmov s0, w0
40
+ ; CHECK-BE-NEXT: cnt v0.8b, v0.8b
41
+ ; CHECK-BE-NEXT: addv b0, v0.8b
42
+ ; CHECK-BE-NEXT: fmov w0, s0
43
+ ; CHECK-BE-NEXT: ret
35
44
%cnt = tail call i32 @llvm.ctpop.i32 (i32 %x )
36
45
ret i32 %cnt
37
46
}
@@ -69,6 +78,16 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
69
78
; CHECK-CSSC-NEXT: fmov w8, s0
70
79
; CHECK-CSSC-NEXT: cnt w0, w8
71
80
; CHECK-CSSC-NEXT: ret
81
+ ;
82
+ ; CHECK-BE-LABEL: cnt32_advsimd_2:
83
+ ; CHECK-BE: // %bb.0:
84
+ ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s
85
+ ; CHECK-BE-NEXT: fmov w8, s0
86
+ ; CHECK-BE-NEXT: fmov s0, w8
87
+ ; CHECK-BE-NEXT: cnt v0.8b, v0.8b
88
+ ; CHECK-BE-NEXT: addv b0, v0.8b
89
+ ; CHECK-BE-NEXT: fmov w0, s0
90
+ ; CHECK-BE-NEXT: ret
72
91
%1 = extractelement <2 x i32 > %x , i64 0
73
92
%2 = tail call i32 @llvm.ctpop.i32 (i32 %1 )
74
93
ret i32 %2
@@ -103,6 +122,16 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
103
122
; CHECK-CSSC: // %bb.0:
104
123
; CHECK-CSSC-NEXT: cnt x0, x0
105
124
; CHECK-CSSC-NEXT: ret
125
+ ;
126
+ ; CHECK-BE-LABEL: cnt64_advsimd:
127
+ ; CHECK-BE: // %bb.0:
128
+ ; CHECK-BE-NEXT: fmov d0, x0
129
+ ; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
130
+ ; CHECK-BE-NEXT: cnt v0.8b, v0.8b
131
+ ; CHECK-BE-NEXT: addv b0, v0.8b
132
+ ; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
133
+ ; CHECK-BE-NEXT: fmov x0, d0
134
+ ; CHECK-BE-NEXT: ret
106
135
%cnt = tail call i64 @llvm.ctpop.i64 (i64 %x )
107
136
ret i64 %cnt
108
137
}
@@ -147,6 +176,22 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
147
176
; CHECK-CSSC: // %bb.0:
148
177
; CHECK-CSSC-NEXT: cnt w0, w0
149
178
; CHECK-CSSC-NEXT: ret
179
+ ;
180
+ ; CHECK-BE-LABEL: cnt32:
181
+ ; CHECK-BE: // %bb.0:
182
+ ; CHECK-BE-NEXT: lsr w9, w0, #1
183
+ ; CHECK-BE-NEXT: mov w8, #16843009 // =0x1010101
184
+ ; CHECK-BE-NEXT: and w9, w9, #0x55555555
185
+ ; CHECK-BE-NEXT: sub w9, w0, w9
186
+ ; CHECK-BE-NEXT: lsr w10, w9, #2
187
+ ; CHECK-BE-NEXT: and w9, w9, #0x33333333
188
+ ; CHECK-BE-NEXT: and w10, w10, #0x33333333
189
+ ; CHECK-BE-NEXT: add w9, w9, w10
190
+ ; CHECK-BE-NEXT: add w9, w9, w9, lsr #4
191
+ ; CHECK-BE-NEXT: and w9, w9, #0xf0f0f0f
192
+ ; CHECK-BE-NEXT: mul w8, w9, w8
193
+ ; CHECK-BE-NEXT: lsr w0, w8, #24
194
+ ; CHECK-BE-NEXT: ret
150
195
%cnt = tail call i32 @llvm.ctpop.i32 (i32 %x )
151
196
ret i32 %cnt
152
197
}
@@ -188,6 +233,22 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
188
233
; CHECK-CSSC: // %bb.0:
189
234
; CHECK-CSSC-NEXT: cnt x0, x0
190
235
; CHECK-CSSC-NEXT: ret
236
+ ;
237
+ ; CHECK-BE-LABEL: cnt64:
238
+ ; CHECK-BE: // %bb.0:
239
+ ; CHECK-BE-NEXT: lsr x9, x0, #1
240
+ ; CHECK-BE-NEXT: mov x8, #72340172838076673 // =0x101010101010101
241
+ ; CHECK-BE-NEXT: and x9, x9, #0x5555555555555555
242
+ ; CHECK-BE-NEXT: sub x9, x0, x9
243
+ ; CHECK-BE-NEXT: lsr x10, x9, #2
244
+ ; CHECK-BE-NEXT: and x9, x9, #0x3333333333333333
245
+ ; CHECK-BE-NEXT: and x10, x10, #0x3333333333333333
246
+ ; CHECK-BE-NEXT: add x9, x9, x10
247
+ ; CHECK-BE-NEXT: add x9, x9, x9, lsr #4
248
+ ; CHECK-BE-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
249
+ ; CHECK-BE-NEXT: mul x8, x9, x8
250
+ ; CHECK-BE-NEXT: lsr x0, x8, #56
251
+ ; CHECK-BE-NEXT: ret
191
252
%cnt = tail call i64 @llvm.ctpop.i64 (i64 %x )
192
253
ret i64 %cnt
193
254
}
@@ -215,6 +276,14 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
215
276
; CHECK-CSSC-NEXT: cmp x8, #1
216
277
; CHECK-CSSC-NEXT: cset w0, eq
217
278
; CHECK-CSSC-NEXT: ret
279
+ ;
280
+ ; CHECK-BE-LABEL: ctpop_eq_one:
281
+ ; CHECK-BE: // %bb.0:
282
+ ; CHECK-BE-NEXT: sub x8, x0, #1
283
+ ; CHECK-BE-NEXT: eor x9, x0, x8
284
+ ; CHECK-BE-NEXT: cmp x9, x8
285
+ ; CHECK-BE-NEXT: cset w0, hi
286
+ ; CHECK-BE-NEXT: ret
218
287
%count = tail call i64 @llvm.ctpop.i64 (i64 %x )
219
288
%cmp = icmp eq i64 %count , 1
220
289
%conv = zext i1 %cmp to i32
@@ -244,6 +313,14 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
244
313
; CHECK-CSSC-NEXT: cmp x8, #1
245
314
; CHECK-CSSC-NEXT: cset w0, ne
246
315
; CHECK-CSSC-NEXT: ret
316
+ ;
317
+ ; CHECK-BE-LABEL: ctpop_ne_one:
318
+ ; CHECK-BE: // %bb.0:
319
+ ; CHECK-BE-NEXT: sub x8, x0, #1
320
+ ; CHECK-BE-NEXT: eor x9, x0, x8
321
+ ; CHECK-BE-NEXT: cmp x9, x8
322
+ ; CHECK-BE-NEXT: cset w0, ls
323
+ ; CHECK-BE-NEXT: ret
247
324
%count = tail call i64 @llvm.ctpop.i64 (i64 %x )
248
325
%cmp = icmp ne i64 %count , 1
249
326
%conv = zext i1 %cmp to i32
@@ -273,6 +350,14 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
273
350
; CHECK-CSSC-NEXT: cmp w8, #1
274
351
; CHECK-CSSC-NEXT: cset w0, ne
275
352
; CHECK-CSSC-NEXT: ret
353
+ ;
354
+ ; CHECK-BE-LABEL: ctpop32_ne_one:
355
+ ; CHECK-BE: // %bb.0:
356
+ ; CHECK-BE-NEXT: sub w8, w0, #1
357
+ ; CHECK-BE-NEXT: eor w9, w0, w8
358
+ ; CHECK-BE-NEXT: cmp w9, w8
359
+ ; CHECK-BE-NEXT: cset w0, ls
360
+ ; CHECK-BE-NEXT: ret
276
361
%count = tail call i32 @llvm.ctpop.i32 (i32 %x )
277
362
%cmp = icmp ne i32 %count , 1
278
363
ret i1 %cmp
@@ -299,6 +384,13 @@ define i1 @ctpop32_eq_one_nonzero(i32 %x) {
299
384
; CHECK-CSSC-NEXT: tst w0, w8
300
385
; CHECK-CSSC-NEXT: cset w0, eq
301
386
; CHECK-CSSC-NEXT: ret
387
+ ;
388
+ ; CHECK-BE-LABEL: ctpop32_eq_one_nonzero:
389
+ ; CHECK-BE: // %bb.0: // %entry
390
+ ; CHECK-BE-NEXT: sub w8, w0, #1
391
+ ; CHECK-BE-NEXT: tst w0, w8
392
+ ; CHECK-BE-NEXT: cset w0, eq
393
+ ; CHECK-BE-NEXT: ret
302
394
entry:
303
395
%popcnt = call range(i32 1 , 33 ) i32 @llvm.ctpop.i32 (i32 %x )
304
396
%cmp = icmp eq i32 %popcnt , 1
@@ -326,11 +418,80 @@ define i1 @ctpop32_ne_one_nonzero(i32 %x) {
326
418
; CHECK-CSSC-NEXT: tst w0, w8
327
419
; CHECK-CSSC-NEXT: cset w0, ne
328
420
; CHECK-CSSC-NEXT: ret
421
+ ;
422
+ ; CHECK-BE-LABEL: ctpop32_ne_one_nonzero:
423
+ ; CHECK-BE: // %bb.0: // %entry
424
+ ; CHECK-BE-NEXT: sub w8, w0, #1
425
+ ; CHECK-BE-NEXT: tst w0, w8
426
+ ; CHECK-BE-NEXT: cset w0, ne
427
+ ; CHECK-BE-NEXT: ret
329
428
entry:
330
429
%popcnt = tail call range(i32 1 , 33 ) i32 @llvm.ctpop.i32 (i32 %x )
331
430
%cmp = icmp ne i32 %popcnt , 1
332
431
ret i1 %cmp
333
432
}
334
433
434
+ define i128 @cnt128 (i128 %x ) nounwind readnone {
435
+ ; CHECK-LABEL: cnt128:
436
+ ; CHECK: // %bb.0:
437
+ ; CHECK-NEXT: fmov d0, x0
438
+ ; CHECK-NEXT: mov.d v0[1], x1
439
+ ; CHECK-NEXT: cnt.16b v0, v0
440
+ ; CHECK-NEXT: addv.16b b0, v0
441
+ ; CHECK-NEXT: mov.d x1, v0[1]
442
+ ; CHECK-NEXT: fmov x0, d0
443
+ ; CHECK-NEXT: ret
444
+ ;
445
+ ; CHECK-NONEON-LABEL: cnt128:
446
+ ; CHECK-NONEON: // %bb.0:
447
+ ; CHECK-NONEON-NEXT: lsr x9, x0, #1
448
+ ; CHECK-NONEON-NEXT: lsr x10, x1, #1
449
+ ; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101
450
+ ; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
451
+ ; CHECK-NONEON-NEXT: and x10, x10, #0x5555555555555555
452
+ ; CHECK-NONEON-NEXT: sub x9, x0, x9
453
+ ; CHECK-NONEON-NEXT: sub x10, x1, x10
454
+ ; CHECK-NONEON-NEXT: mov x1, xzr
455
+ ; CHECK-NONEON-NEXT: lsr x11, x9, #2
456
+ ; CHECK-NONEON-NEXT: lsr x12, x10, #2
457
+ ; CHECK-NONEON-NEXT: and x9, x9, #0x3333333333333333
458
+ ; CHECK-NONEON-NEXT: and x10, x10, #0x3333333333333333
459
+ ; CHECK-NONEON-NEXT: and x11, x11, #0x3333333333333333
460
+ ; CHECK-NONEON-NEXT: add x9, x9, x11
461
+ ; CHECK-NONEON-NEXT: and x11, x12, #0x3333333333333333
462
+ ; CHECK-NONEON-NEXT: add x9, x9, x9, lsr #4
463
+ ; CHECK-NONEON-NEXT: add x10, x10, x11
464
+ ; CHECK-NONEON-NEXT: add x10, x10, x10, lsr #4
465
+ ; CHECK-NONEON-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
466
+ ; CHECK-NONEON-NEXT: mul x9, x9, x8
467
+ ; CHECK-NONEON-NEXT: and x10, x10, #0xf0f0f0f0f0f0f0f
468
+ ; CHECK-NONEON-NEXT: mul x8, x10, x8
469
+ ; CHECK-NONEON-NEXT: lsr x9, x9, #56
470
+ ; CHECK-NONEON-NEXT: add x0, x9, x8, lsr #56
471
+ ; CHECK-NONEON-NEXT: ret
472
+ ;
473
+ ; CHECK-CSSC-LABEL: cnt128:
474
+ ; CHECK-CSSC: // %bb.0:
475
+ ; CHECK-CSSC-NEXT: cnt x8, x1
476
+ ; CHECK-CSSC-NEXT: cnt x9, x0
477
+ ; CHECK-CSSC-NEXT: mov x1, xzr
478
+ ; CHECK-CSSC-NEXT: add x0, x9, x8
479
+ ; CHECK-CSSC-NEXT: ret
480
+ ;
481
+ ; CHECK-BE-LABEL: cnt128:
482
+ ; CHECK-BE: // %bb.0:
483
+ ; CHECK-BE-NEXT: fmov d0, x0
484
+ ; CHECK-BE-NEXT: mov v0.d[1], x1
485
+ ; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
486
+ ; CHECK-BE-NEXT: cnt v0.16b, v0.16b
487
+ ; CHECK-BE-NEXT: addv b0, v0.16b
488
+ ; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
489
+ ; CHECK-BE-NEXT: mov x1, v0.d[1]
490
+ ; CHECK-BE-NEXT: fmov x0, d0
491
+ ; CHECK-BE-NEXT: ret
492
+ %cnt = tail call i128 @llvm.ctpop.i128 (i128 %x )
493
+ ret i128 %cnt
494
+ }
495
+
335
496
declare i32 @llvm.ctpop.i32 (i32 ) nounwind readnone
336
497
declare i64 @llvm.ctpop.i64 (i64 ) nounwind readnone
0 commit comments