@@ -281,6 +281,121 @@ define <4 x i32> @vclsQs32(ptr %A) nounwind {
281
281
ret <4 x i32 > %tmp2
282
282
}
283
283
284
+ define i32 @ctpop8 (i8 %x ) nounwind readnone {
285
+ ; CHECK-LABEL: ctpop8:
286
+ ; CHECK: @ %bb.0:
287
+ ; CHECK-NEXT: mov r1, #85
288
+ ; CHECK-NEXT: and r1, r1, r0, lsr #1
289
+ ; CHECK-NEXT: sub r0, r0, r1
290
+ ; CHECK-NEXT: mov r1, #51
291
+ ; CHECK-NEXT: and r1, r1, r0, lsr #2
292
+ ; CHECK-NEXT: and r0, r0, #51
293
+ ; CHECK-NEXT: add r0, r0, r1
294
+ ; CHECK-NEXT: add r0, r0, r0, lsr #4
295
+ ; CHECK-NEXT: and r0, r0, #15
296
+ ; CHECK-NEXT: mov pc, lr
297
+ %count = tail call i8 @llvm.ctpop.i8 (i8 %x )
298
+ %conv = zext i8 %count to i32
299
+ ret i32 %conv
300
+ }
301
+
302
+ define i32 @ctpop16 (i16 %x ) nounwind readnone {
303
+ ; CHECK-LABEL: ctpop16:
304
+ ; CHECK: @ %bb.0:
305
+ ; CHECK-NEXT: mov r1, #85
306
+ ; CHECK-NEXT: orr r1, r1, #21760
307
+ ; CHECK-NEXT: and r1, r1, r0, lsr #1
308
+ ; CHECK-NEXT: sub r0, r0, r1
309
+ ; CHECK-NEXT: mov r1, #51
310
+ ; CHECK-NEXT: orr r1, r1, #13056
311
+ ; CHECK-NEXT: and r2, r0, r1
312
+ ; CHECK-NEXT: and r0, r1, r0, lsr #2
313
+ ; CHECK-NEXT: add r0, r2, r0
314
+ ; CHECK-NEXT: add r0, r0, r0, lsr #4
315
+ ; CHECK-NEXT: and r1, r0, #3840
316
+ ; CHECK-NEXT: and r0, r0, #15
317
+ ; CHECK-NEXT: add r0, r0, r1, lsr #8
318
+ ; CHECK-NEXT: mov pc, lr
319
+ %count = tail call i16 @llvm.ctpop.i16 (i16 %x )
320
+ %conv = zext i16 %count to i32
321
+ ret i32 %conv
322
+ }
323
+
324
+ define i32 @ctpop32 (i32 %x ) nounwind readnone {
325
+ ; CHECK-LABEL: ctpop32:
326
+ ; CHECK: @ %bb.0:
327
+ ; CHECK-NEXT: ldr r1, .LCPI22_0
328
+ ; CHECK-NEXT: ldr r2, .LCPI22_3
329
+ ; CHECK-NEXT: and r1, r1, r0, lsr #1
330
+ ; CHECK-NEXT: ldr r12, .LCPI22_1
331
+ ; CHECK-NEXT: sub r0, r0, r1
332
+ ; CHECK-NEXT: ldr r3, .LCPI22_2
333
+ ; CHECK-NEXT: and r1, r0, r2
334
+ ; CHECK-NEXT: and r0, r2, r0, lsr #2
335
+ ; CHECK-NEXT: add r0, r1, r0
336
+ ; CHECK-NEXT: add r0, r0, r0, lsr #4
337
+ ; CHECK-NEXT: and r0, r0, r12
338
+ ; CHECK-NEXT: mul r1, r0, r3
339
+ ; CHECK-NEXT: lsr r0, r1, #24
340
+ ; CHECK-NEXT: mov pc, lr
341
+ ; CHECK-NEXT: .p2align 2
342
+ ; CHECK-NEXT: @ %bb.1:
343
+ ; CHECK-NEXT: .LCPI22_0:
344
+ ; CHECK-NEXT: .long 1431655765 @ 0x55555555
345
+ ; CHECK-NEXT: .LCPI22_1:
346
+ ; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
347
+ ; CHECK-NEXT: .LCPI22_2:
348
+ ; CHECK-NEXT: .long 16843009 @ 0x1010101
349
+ ; CHECK-NEXT: .LCPI22_3:
350
+ ; CHECK-NEXT: .long 858993459 @ 0x33333333
351
+ %count = tail call i32 @llvm.ctpop.i32 (i32 %x )
352
+ ret i32 %count
353
+ }
354
+
355
+ define i32 @ctpop64 (i64 %x ) nounwind readnone {
356
+ ; CHECK-LABEL: ctpop64:
357
+ ; CHECK: @ %bb.0:
358
+ ; CHECK-NEXT: .save {r4, lr}
359
+ ; CHECK-NEXT: push {r4, lr}
360
+ ; CHECK-NEXT: ldr r2, .LCPI23_0
361
+ ; CHECK-NEXT: ldr r3, .LCPI23_3
362
+ ; CHECK-NEXT: and r4, r2, r0, lsr #1
363
+ ; CHECK-NEXT: and r2, r2, r1, lsr #1
364
+ ; CHECK-NEXT: sub r0, r0, r4
365
+ ; CHECK-NEXT: sub r1, r1, r2
366
+ ; CHECK-NEXT: and r4, r0, r3
367
+ ; CHECK-NEXT: and r2, r1, r3
368
+ ; CHECK-NEXT: and r0, r3, r0, lsr #2
369
+ ; CHECK-NEXT: and r1, r3, r1, lsr #2
370
+ ; CHECK-NEXT: add r0, r4, r0
371
+ ; CHECK-NEXT: ldr lr, .LCPI23_1
372
+ ; CHECK-NEXT: add r1, r2, r1
373
+ ; CHECK-NEXT: ldr r12, .LCPI23_2
374
+ ; CHECK-NEXT: add r0, r0, r0, lsr #4
375
+ ; CHECK-NEXT: and r0, r0, lr
376
+ ; CHECK-NEXT: add r1, r1, r1, lsr #4
377
+ ; CHECK-NEXT: mul r2, r0, r12
378
+ ; CHECK-NEXT: and r0, r1, lr
379
+ ; CHECK-NEXT: mul r1, r0, r12
380
+ ; CHECK-NEXT: lsr r0, r2, #24
381
+ ; CHECK-NEXT: add r0, r0, r1, lsr #24
382
+ ; CHECK-NEXT: pop {r4, lr}
383
+ ; CHECK-NEXT: mov pc, lr
384
+ ; CHECK-NEXT: .p2align 2
385
+ ; CHECK-NEXT: @ %bb.1:
386
+ ; CHECK-NEXT: .LCPI23_0:
387
+ ; CHECK-NEXT: .long 1431655765 @ 0x55555555
388
+ ; CHECK-NEXT: .LCPI23_1:
389
+ ; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
390
+ ; CHECK-NEXT: .LCPI23_2:
391
+ ; CHECK-NEXT: .long 16843009 @ 0x1010101
392
+ ; CHECK-NEXT: .LCPI23_3:
393
+ ; CHECK-NEXT: .long 858993459 @ 0x33333333
394
+ %count = tail call i64 @llvm.ctpop.i64 (i64 %x )
395
+ %conv = trunc i64 %count to i32
396
+ ret i32 %conv
397
+ }
398
+
284
399
define i32 @ctpop_eq_one (i64 %x ) nounwind readnone {
285
400
; CHECK-LABEL: ctpop_eq_one:
286
401
; CHECK: @ %bb.0:
@@ -299,6 +414,9 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
299
414
ret i32 %conv
300
415
}
301
416
417
+ declare i8 @llvm.ctpop.i8 (i8 ) nounwind readnone
418
+ declare i16 @llvm.ctpop.i16 (i16 ) nounwind readnone
419
+ declare i32 @llvm.ctpop.i32 (i32 ) nounwind readnone
302
420
declare i64 @llvm.ctpop.i64 (i64 ) nounwind readnone
303
421
304
422
declare <8 x i8 > @llvm.arm.neon.vcls.v8i8 (<8 x i8 >) nounwind readnone
0 commit comments