@@ -308,3 +308,244 @@ define <32 x i32> @v32i32_v4i32(<4 x i32>) {
308
308
%2 = shufflevector <4 x i32 > %0 , <4 x i32 > poison, <32 x i32 > <i32 2 , i32 3 , i32 0 , i32 2 , i32 3 , i32 0 , i32 1 , i32 2 , i32 3 , i32 0 , i32 2 , i32 3 , i32 0 , i32 1 , i32 1 , i32 2 , i32 0 , i32 3 , i32 1 , i32 1 , i32 2 , i32 0 , i32 3 , i32 1 , i32 2 , i32 0 , i32 3 , i32 1 , i32 1 , i32 2 , i32 0 , i32 3 >
309
309
ret <32 x i32 > %2
310
310
}
311
+
312
+ ; TODO: This case should be a simple vnsrl, but gets scalarized instead
313
+ define <32 x i8 > @vnsrl_v32i8_v64i8 (<64 x i8 > %in ) {
314
+ ; RV32-LABEL: v32i8_v64i8:
315
+ ; RV32: # %bb.0:
316
+ ; RV32-NEXT: addi sp, sp, -128
317
+ ; RV32-NEXT: .cfi_def_cfa_offset 128
318
+ ; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
319
+ ; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
320
+ ; RV32-NEXT: .cfi_offset ra, -4
321
+ ; RV32-NEXT: .cfi_offset s0, -8
322
+ ; RV32-NEXT: addi s0, sp, 128
323
+ ; RV32-NEXT: .cfi_def_cfa s0, 0
324
+ ; RV32-NEXT: andi sp, sp, -64
325
+ ; RV32-NEXT: li a0, 64
326
+ ; RV32-NEXT: mv a1, sp
327
+ ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
328
+ ; RV32-NEXT: vse8.v v8, (a1)
329
+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
330
+ ; RV32-NEXT: vslidedown.vi v10, v8, 1
331
+ ; RV32-NEXT: vmv.x.s a0, v10
332
+ ; RV32-NEXT: li a1, 32
333
+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
334
+ ; RV32-NEXT: vmv.v.x v10, a0
335
+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
336
+ ; RV32-NEXT: vslidedown.vi v12, v8, 3
337
+ ; RV32-NEXT: vmv.x.s a0, v12
338
+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
339
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
340
+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
341
+ ; RV32-NEXT: vslidedown.vi v12, v8, 5
342
+ ; RV32-NEXT: vmv.x.s a0, v12
343
+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
344
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
345
+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
346
+ ; RV32-NEXT: vslidedown.vi v12, v8, 7
347
+ ; RV32-NEXT: vmv.x.s a0, v12
348
+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
349
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
350
+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
351
+ ; RV32-NEXT: vslidedown.vi v12, v8, 9
352
+ ; RV32-NEXT: vmv.x.s a0, v12
353
+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
354
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
355
+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
356
+ ; RV32-NEXT: vslidedown.vi v12, v8, 11
357
+ ; RV32-NEXT: vmv.x.s a0, v12
358
+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
359
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
360
+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
361
+ ; RV32-NEXT: vslidedown.vi v12, v8, 13
362
+ ; RV32-NEXT: vmv.x.s a0, v12
363
+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
364
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
365
+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
366
+ ; RV32-NEXT: vslidedown.vi v12, v8, 15
367
+ ; RV32-NEXT: vmv.x.s a0, v12
368
+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
369
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
370
+ ; RV32-NEXT: vslidedown.vi v12, v8, 17
371
+ ; RV32-NEXT: vmv.x.s a0, v12
372
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
373
+ ; RV32-NEXT: vslidedown.vi v12, v8, 19
374
+ ; RV32-NEXT: vmv.x.s a0, v12
375
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
376
+ ; RV32-NEXT: vslidedown.vi v12, v8, 21
377
+ ; RV32-NEXT: vmv.x.s a0, v12
378
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
379
+ ; RV32-NEXT: vslidedown.vi v12, v8, 23
380
+ ; RV32-NEXT: vmv.x.s a0, v12
381
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
382
+ ; RV32-NEXT: vslidedown.vi v12, v8, 25
383
+ ; RV32-NEXT: vmv.x.s a0, v12
384
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
385
+ ; RV32-NEXT: vslidedown.vi v12, v8, 27
386
+ ; RV32-NEXT: vmv.x.s a0, v12
387
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
388
+ ; RV32-NEXT: vslidedown.vi v12, v8, 29
389
+ ; RV32-NEXT: vmv.x.s a0, v12
390
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
391
+ ; RV32-NEXT: vslidedown.vi v8, v8, 31
392
+ ; RV32-NEXT: vmv.x.s a0, v8
393
+ ; RV32-NEXT: vslide1down.vx v8, v10, a0
394
+ ; RV32-NEXT: lbu a0, 33(sp)
395
+ ; RV32-NEXT: lbu a1, 35(sp)
396
+ ; RV32-NEXT: lbu a2, 37(sp)
397
+ ; RV32-NEXT: lbu a3, 39(sp)
398
+ ; RV32-NEXT: vslide1down.vx v8, v8, a0
399
+ ; RV32-NEXT: vslide1down.vx v8, v8, a1
400
+ ; RV32-NEXT: vslide1down.vx v8, v8, a2
401
+ ; RV32-NEXT: vslide1down.vx v8, v8, a3
402
+ ; RV32-NEXT: lbu a0, 41(sp)
403
+ ; RV32-NEXT: lbu a1, 43(sp)
404
+ ; RV32-NEXT: lbu a2, 45(sp)
405
+ ; RV32-NEXT: lbu a3, 47(sp)
406
+ ; RV32-NEXT: vslide1down.vx v8, v8, a0
407
+ ; RV32-NEXT: vslide1down.vx v8, v8, a1
408
+ ; RV32-NEXT: vslide1down.vx v8, v8, a2
409
+ ; RV32-NEXT: vslide1down.vx v8, v8, a3
410
+ ; RV32-NEXT: lbu a0, 49(sp)
411
+ ; RV32-NEXT: lbu a1, 51(sp)
412
+ ; RV32-NEXT: lbu a2, 53(sp)
413
+ ; RV32-NEXT: lbu a3, 55(sp)
414
+ ; RV32-NEXT: vslide1down.vx v8, v8, a0
415
+ ; RV32-NEXT: vslide1down.vx v8, v8, a1
416
+ ; RV32-NEXT: vslide1down.vx v8, v8, a2
417
+ ; RV32-NEXT: vslide1down.vx v8, v8, a3
418
+ ; RV32-NEXT: lbu a0, 57(sp)
419
+ ; RV32-NEXT: lbu a1, 59(sp)
420
+ ; RV32-NEXT: lbu a2, 61(sp)
421
+ ; RV32-NEXT: lbu a3, 63(sp)
422
+ ; RV32-NEXT: vslide1down.vx v8, v8, a0
423
+ ; RV32-NEXT: vslide1down.vx v8, v8, a1
424
+ ; RV32-NEXT: vslide1down.vx v8, v8, a2
425
+ ; RV32-NEXT: vslide1down.vx v8, v8, a3
426
+ ; RV32-NEXT: addi sp, s0, -128
427
+ ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
428
+ ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
429
+ ; RV32-NEXT: addi sp, sp, 128
430
+ ; RV32-NEXT: ret
431
+ ;
432
+ ; RV64-LABEL: v32i8_v64i8:
433
+ ; RV64: # %bb.0:
434
+ ; RV64-NEXT: addi sp, sp, -128
435
+ ; RV64-NEXT: .cfi_def_cfa_offset 128
436
+ ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
437
+ ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
438
+ ; RV64-NEXT: .cfi_offset ra, -8
439
+ ; RV64-NEXT: .cfi_offset s0, -16
440
+ ; RV64-NEXT: addi s0, sp, 128
441
+ ; RV64-NEXT: .cfi_def_cfa s0, 0
442
+ ; RV64-NEXT: andi sp, sp, -64
443
+ ; RV64-NEXT: li a0, 64
444
+ ; RV64-NEXT: mv a1, sp
445
+ ; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma
446
+ ; RV64-NEXT: vse8.v v8, (a1)
447
+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
448
+ ; RV64-NEXT: vslidedown.vi v10, v8, 1
449
+ ; RV64-NEXT: vmv.x.s a0, v10
450
+ ; RV64-NEXT: li a1, 32
451
+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
452
+ ; RV64-NEXT: vmv.v.x v10, a0
453
+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
454
+ ; RV64-NEXT: vslidedown.vi v12, v8, 3
455
+ ; RV64-NEXT: vmv.x.s a0, v12
456
+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
457
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
458
+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
459
+ ; RV64-NEXT: vslidedown.vi v12, v8, 5
460
+ ; RV64-NEXT: vmv.x.s a0, v12
461
+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
462
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
463
+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
464
+ ; RV64-NEXT: vslidedown.vi v12, v8, 7
465
+ ; RV64-NEXT: vmv.x.s a0, v12
466
+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
467
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
468
+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
469
+ ; RV64-NEXT: vslidedown.vi v12, v8, 9
470
+ ; RV64-NEXT: vmv.x.s a0, v12
471
+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
472
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
473
+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
474
+ ; RV64-NEXT: vslidedown.vi v12, v8, 11
475
+ ; RV64-NEXT: vmv.x.s a0, v12
476
+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
477
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
478
+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
479
+ ; RV64-NEXT: vslidedown.vi v12, v8, 13
480
+ ; RV64-NEXT: vmv.x.s a0, v12
481
+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
482
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
483
+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
484
+ ; RV64-NEXT: vslidedown.vi v12, v8, 15
485
+ ; RV64-NEXT: vmv.x.s a0, v12
486
+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
487
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
488
+ ; RV64-NEXT: vslidedown.vi v12, v8, 17
489
+ ; RV64-NEXT: vmv.x.s a0, v12
490
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
491
+ ; RV64-NEXT: vslidedown.vi v12, v8, 19
492
+ ; RV64-NEXT: vmv.x.s a0, v12
493
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
494
+ ; RV64-NEXT: vslidedown.vi v12, v8, 21
495
+ ; RV64-NEXT: vmv.x.s a0, v12
496
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
497
+ ; RV64-NEXT: vslidedown.vi v12, v8, 23
498
+ ; RV64-NEXT: vmv.x.s a0, v12
499
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
500
+ ; RV64-NEXT: vslidedown.vi v12, v8, 25
501
+ ; RV64-NEXT: vmv.x.s a0, v12
502
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
503
+ ; RV64-NEXT: vslidedown.vi v12, v8, 27
504
+ ; RV64-NEXT: vmv.x.s a0, v12
505
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
506
+ ; RV64-NEXT: vslidedown.vi v12, v8, 29
507
+ ; RV64-NEXT: vmv.x.s a0, v12
508
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
509
+ ; RV64-NEXT: vslidedown.vi v8, v8, 31
510
+ ; RV64-NEXT: vmv.x.s a0, v8
511
+ ; RV64-NEXT: vslide1down.vx v8, v10, a0
512
+ ; RV64-NEXT: lbu a0, 33(sp)
513
+ ; RV64-NEXT: lbu a1, 35(sp)
514
+ ; RV64-NEXT: lbu a2, 37(sp)
515
+ ; RV64-NEXT: lbu a3, 39(sp)
516
+ ; RV64-NEXT: vslide1down.vx v8, v8, a0
517
+ ; RV64-NEXT: vslide1down.vx v8, v8, a1
518
+ ; RV64-NEXT: vslide1down.vx v8, v8, a2
519
+ ; RV64-NEXT: vslide1down.vx v8, v8, a3
520
+ ; RV64-NEXT: lbu a0, 41(sp)
521
+ ; RV64-NEXT: lbu a1, 43(sp)
522
+ ; RV64-NEXT: lbu a2, 45(sp)
523
+ ; RV64-NEXT: lbu a3, 47(sp)
524
+ ; RV64-NEXT: vslide1down.vx v8, v8, a0
525
+ ; RV64-NEXT: vslide1down.vx v8, v8, a1
526
+ ; RV64-NEXT: vslide1down.vx v8, v8, a2
527
+ ; RV64-NEXT: vslide1down.vx v8, v8, a3
528
+ ; RV64-NEXT: lbu a0, 49(sp)
529
+ ; RV64-NEXT: lbu a1, 51(sp)
530
+ ; RV64-NEXT: lbu a2, 53(sp)
531
+ ; RV64-NEXT: lbu a3, 55(sp)
532
+ ; RV64-NEXT: vslide1down.vx v8, v8, a0
533
+ ; RV64-NEXT: vslide1down.vx v8, v8, a1
534
+ ; RV64-NEXT: vslide1down.vx v8, v8, a2
535
+ ; RV64-NEXT: vslide1down.vx v8, v8, a3
536
+ ; RV64-NEXT: lbu a0, 57(sp)
537
+ ; RV64-NEXT: lbu a1, 59(sp)
538
+ ; RV64-NEXT: lbu a2, 61(sp)
539
+ ; RV64-NEXT: lbu a3, 63(sp)
540
+ ; RV64-NEXT: vslide1down.vx v8, v8, a0
541
+ ; RV64-NEXT: vslide1down.vx v8, v8, a1
542
+ ; RV64-NEXT: vslide1down.vx v8, v8, a2
543
+ ; RV64-NEXT: vslide1down.vx v8, v8, a3
544
+ ; RV64-NEXT: addi sp, s0, -128
545
+ ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
546
+ ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
547
+ ; RV64-NEXT: addi sp, sp, 128
548
+ ; RV64-NEXT: ret
549
+ %res = shufflevector <64 x i8 > %in , <64 x i8 > poison, <32 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 , i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 , i32 33 , i32 35 , i32 37 , i32 39 , i32 41 , i32 43 , i32 45 , i32 47 , i32 49 , i32 51 , i32 53 , i32 55 , i32 57 , i32 59 , i32 61 , i32 63 >
550
+ ret <32 x i8 > %res
551
+ }
0 commit comments