Skip to content

Commit 22db91c

Browse files
committed
[RISCV] Another length changing shuffle lowering case
"32" is currently a special number - it's one past the maximum representable immediate on a vslidedown.vi and thus the first sub-vector we do *not* consider "cheap" in RISCVTargetLowering::isExtractSubvectorCheap.
1 parent e9de538 commit 22db91c

File tree

1 file changed

+241
-0
lines changed

1 file changed

+241
-0
lines changed

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll

+241
Original file line numberDiff line numberDiff line change
@@ -308,3 +308,244 @@ define <32 x i32> @v32i32_v4i32(<4 x i32>) {
308308
%2 = shufflevector <4 x i32> %0, <4 x i32> poison, <32 x i32> <i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3>
309309
ret <32 x i32> %2
310310
}
311+
312+
; TODO: This case should be a simple vnsrl, but gets scalarized instead
313+
define <32 x i8> @vnsrl_v32i8_v64i8(<64 x i8> %in) {
314+
; RV32-LABEL: v32i8_v64i8:
315+
; RV32: # %bb.0:
316+
; RV32-NEXT: addi sp, sp, -128
317+
; RV32-NEXT: .cfi_def_cfa_offset 128
318+
; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
319+
; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
320+
; RV32-NEXT: .cfi_offset ra, -4
321+
; RV32-NEXT: .cfi_offset s0, -8
322+
; RV32-NEXT: addi s0, sp, 128
323+
; RV32-NEXT: .cfi_def_cfa s0, 0
324+
; RV32-NEXT: andi sp, sp, -64
325+
; RV32-NEXT: li a0, 64
326+
; RV32-NEXT: mv a1, sp
327+
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
328+
; RV32-NEXT: vse8.v v8, (a1)
329+
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
330+
; RV32-NEXT: vslidedown.vi v10, v8, 1
331+
; RV32-NEXT: vmv.x.s a0, v10
332+
; RV32-NEXT: li a1, 32
333+
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
334+
; RV32-NEXT: vmv.v.x v10, a0
335+
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
336+
; RV32-NEXT: vslidedown.vi v12, v8, 3
337+
; RV32-NEXT: vmv.x.s a0, v12
338+
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
339+
; RV32-NEXT: vslide1down.vx v10, v10, a0
340+
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
341+
; RV32-NEXT: vslidedown.vi v12, v8, 5
342+
; RV32-NEXT: vmv.x.s a0, v12
343+
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
344+
; RV32-NEXT: vslide1down.vx v10, v10, a0
345+
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
346+
; RV32-NEXT: vslidedown.vi v12, v8, 7
347+
; RV32-NEXT: vmv.x.s a0, v12
348+
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
349+
; RV32-NEXT: vslide1down.vx v10, v10, a0
350+
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
351+
; RV32-NEXT: vslidedown.vi v12, v8, 9
352+
; RV32-NEXT: vmv.x.s a0, v12
353+
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
354+
; RV32-NEXT: vslide1down.vx v10, v10, a0
355+
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
356+
; RV32-NEXT: vslidedown.vi v12, v8, 11
357+
; RV32-NEXT: vmv.x.s a0, v12
358+
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
359+
; RV32-NEXT: vslide1down.vx v10, v10, a0
360+
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
361+
; RV32-NEXT: vslidedown.vi v12, v8, 13
362+
; RV32-NEXT: vmv.x.s a0, v12
363+
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
364+
; RV32-NEXT: vslide1down.vx v10, v10, a0
365+
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
366+
; RV32-NEXT: vslidedown.vi v12, v8, 15
367+
; RV32-NEXT: vmv.x.s a0, v12
368+
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
369+
; RV32-NEXT: vslide1down.vx v10, v10, a0
370+
; RV32-NEXT: vslidedown.vi v12, v8, 17
371+
; RV32-NEXT: vmv.x.s a0, v12
372+
; RV32-NEXT: vslide1down.vx v10, v10, a0
373+
; RV32-NEXT: vslidedown.vi v12, v8, 19
374+
; RV32-NEXT: vmv.x.s a0, v12
375+
; RV32-NEXT: vslide1down.vx v10, v10, a0
376+
; RV32-NEXT: vslidedown.vi v12, v8, 21
377+
; RV32-NEXT: vmv.x.s a0, v12
378+
; RV32-NEXT: vslide1down.vx v10, v10, a0
379+
; RV32-NEXT: vslidedown.vi v12, v8, 23
380+
; RV32-NEXT: vmv.x.s a0, v12
381+
; RV32-NEXT: vslide1down.vx v10, v10, a0
382+
; RV32-NEXT: vslidedown.vi v12, v8, 25
383+
; RV32-NEXT: vmv.x.s a0, v12
384+
; RV32-NEXT: vslide1down.vx v10, v10, a0
385+
; RV32-NEXT: vslidedown.vi v12, v8, 27
386+
; RV32-NEXT: vmv.x.s a0, v12
387+
; RV32-NEXT: vslide1down.vx v10, v10, a0
388+
; RV32-NEXT: vslidedown.vi v12, v8, 29
389+
; RV32-NEXT: vmv.x.s a0, v12
390+
; RV32-NEXT: vslide1down.vx v10, v10, a0
391+
; RV32-NEXT: vslidedown.vi v8, v8, 31
392+
; RV32-NEXT: vmv.x.s a0, v8
393+
; RV32-NEXT: vslide1down.vx v8, v10, a0
394+
; RV32-NEXT: lbu a0, 33(sp)
395+
; RV32-NEXT: lbu a1, 35(sp)
396+
; RV32-NEXT: lbu a2, 37(sp)
397+
; RV32-NEXT: lbu a3, 39(sp)
398+
; RV32-NEXT: vslide1down.vx v8, v8, a0
399+
; RV32-NEXT: vslide1down.vx v8, v8, a1
400+
; RV32-NEXT: vslide1down.vx v8, v8, a2
401+
; RV32-NEXT: vslide1down.vx v8, v8, a3
402+
; RV32-NEXT: lbu a0, 41(sp)
403+
; RV32-NEXT: lbu a1, 43(sp)
404+
; RV32-NEXT: lbu a2, 45(sp)
405+
; RV32-NEXT: lbu a3, 47(sp)
406+
; RV32-NEXT: vslide1down.vx v8, v8, a0
407+
; RV32-NEXT: vslide1down.vx v8, v8, a1
408+
; RV32-NEXT: vslide1down.vx v8, v8, a2
409+
; RV32-NEXT: vslide1down.vx v8, v8, a3
410+
; RV32-NEXT: lbu a0, 49(sp)
411+
; RV32-NEXT: lbu a1, 51(sp)
412+
; RV32-NEXT: lbu a2, 53(sp)
413+
; RV32-NEXT: lbu a3, 55(sp)
414+
; RV32-NEXT: vslide1down.vx v8, v8, a0
415+
; RV32-NEXT: vslide1down.vx v8, v8, a1
416+
; RV32-NEXT: vslide1down.vx v8, v8, a2
417+
; RV32-NEXT: vslide1down.vx v8, v8, a3
418+
; RV32-NEXT: lbu a0, 57(sp)
419+
; RV32-NEXT: lbu a1, 59(sp)
420+
; RV32-NEXT: lbu a2, 61(sp)
421+
; RV32-NEXT: lbu a3, 63(sp)
422+
; RV32-NEXT: vslide1down.vx v8, v8, a0
423+
; RV32-NEXT: vslide1down.vx v8, v8, a1
424+
; RV32-NEXT: vslide1down.vx v8, v8, a2
425+
; RV32-NEXT: vslide1down.vx v8, v8, a3
426+
; RV32-NEXT: addi sp, s0, -128
427+
; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
428+
; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
429+
; RV32-NEXT: addi sp, sp, 128
430+
; RV32-NEXT: ret
431+
;
432+
; RV64-LABEL: v32i8_v64i8:
433+
; RV64: # %bb.0:
434+
; RV64-NEXT: addi sp, sp, -128
435+
; RV64-NEXT: .cfi_def_cfa_offset 128
436+
; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
437+
; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
438+
; RV64-NEXT: .cfi_offset ra, -8
439+
; RV64-NEXT: .cfi_offset s0, -16
440+
; RV64-NEXT: addi s0, sp, 128
441+
; RV64-NEXT: .cfi_def_cfa s0, 0
442+
; RV64-NEXT: andi sp, sp, -64
443+
; RV64-NEXT: li a0, 64
444+
; RV64-NEXT: mv a1, sp
445+
; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma
446+
; RV64-NEXT: vse8.v v8, (a1)
447+
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
448+
; RV64-NEXT: vslidedown.vi v10, v8, 1
449+
; RV64-NEXT: vmv.x.s a0, v10
450+
; RV64-NEXT: li a1, 32
451+
; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
452+
; RV64-NEXT: vmv.v.x v10, a0
453+
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
454+
; RV64-NEXT: vslidedown.vi v12, v8, 3
455+
; RV64-NEXT: vmv.x.s a0, v12
456+
; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
457+
; RV64-NEXT: vslide1down.vx v10, v10, a0
458+
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
459+
; RV64-NEXT: vslidedown.vi v12, v8, 5
460+
; RV64-NEXT: vmv.x.s a0, v12
461+
; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
462+
; RV64-NEXT: vslide1down.vx v10, v10, a0
463+
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
464+
; RV64-NEXT: vslidedown.vi v12, v8, 7
465+
; RV64-NEXT: vmv.x.s a0, v12
466+
; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
467+
; RV64-NEXT: vslide1down.vx v10, v10, a0
468+
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
469+
; RV64-NEXT: vslidedown.vi v12, v8, 9
470+
; RV64-NEXT: vmv.x.s a0, v12
471+
; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
472+
; RV64-NEXT: vslide1down.vx v10, v10, a0
473+
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
474+
; RV64-NEXT: vslidedown.vi v12, v8, 11
475+
; RV64-NEXT: vmv.x.s a0, v12
476+
; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
477+
; RV64-NEXT: vslide1down.vx v10, v10, a0
478+
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
479+
; RV64-NEXT: vslidedown.vi v12, v8, 13
480+
; RV64-NEXT: vmv.x.s a0, v12
481+
; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
482+
; RV64-NEXT: vslide1down.vx v10, v10, a0
483+
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
484+
; RV64-NEXT: vslidedown.vi v12, v8, 15
485+
; RV64-NEXT: vmv.x.s a0, v12
486+
; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
487+
; RV64-NEXT: vslide1down.vx v10, v10, a0
488+
; RV64-NEXT: vslidedown.vi v12, v8, 17
489+
; RV64-NEXT: vmv.x.s a0, v12
490+
; RV64-NEXT: vslide1down.vx v10, v10, a0
491+
; RV64-NEXT: vslidedown.vi v12, v8, 19
492+
; RV64-NEXT: vmv.x.s a0, v12
493+
; RV64-NEXT: vslide1down.vx v10, v10, a0
494+
; RV64-NEXT: vslidedown.vi v12, v8, 21
495+
; RV64-NEXT: vmv.x.s a0, v12
496+
; RV64-NEXT: vslide1down.vx v10, v10, a0
497+
; RV64-NEXT: vslidedown.vi v12, v8, 23
498+
; RV64-NEXT: vmv.x.s a0, v12
499+
; RV64-NEXT: vslide1down.vx v10, v10, a0
500+
; RV64-NEXT: vslidedown.vi v12, v8, 25
501+
; RV64-NEXT: vmv.x.s a0, v12
502+
; RV64-NEXT: vslide1down.vx v10, v10, a0
503+
; RV64-NEXT: vslidedown.vi v12, v8, 27
504+
; RV64-NEXT: vmv.x.s a0, v12
505+
; RV64-NEXT: vslide1down.vx v10, v10, a0
506+
; RV64-NEXT: vslidedown.vi v12, v8, 29
507+
; RV64-NEXT: vmv.x.s a0, v12
508+
; RV64-NEXT: vslide1down.vx v10, v10, a0
509+
; RV64-NEXT: vslidedown.vi v8, v8, 31
510+
; RV64-NEXT: vmv.x.s a0, v8
511+
; RV64-NEXT: vslide1down.vx v8, v10, a0
512+
; RV64-NEXT: lbu a0, 33(sp)
513+
; RV64-NEXT: lbu a1, 35(sp)
514+
; RV64-NEXT: lbu a2, 37(sp)
515+
; RV64-NEXT: lbu a3, 39(sp)
516+
; RV64-NEXT: vslide1down.vx v8, v8, a0
517+
; RV64-NEXT: vslide1down.vx v8, v8, a1
518+
; RV64-NEXT: vslide1down.vx v8, v8, a2
519+
; RV64-NEXT: vslide1down.vx v8, v8, a3
520+
; RV64-NEXT: lbu a0, 41(sp)
521+
; RV64-NEXT: lbu a1, 43(sp)
522+
; RV64-NEXT: lbu a2, 45(sp)
523+
; RV64-NEXT: lbu a3, 47(sp)
524+
; RV64-NEXT: vslide1down.vx v8, v8, a0
525+
; RV64-NEXT: vslide1down.vx v8, v8, a1
526+
; RV64-NEXT: vslide1down.vx v8, v8, a2
527+
; RV64-NEXT: vslide1down.vx v8, v8, a3
528+
; RV64-NEXT: lbu a0, 49(sp)
529+
; RV64-NEXT: lbu a1, 51(sp)
530+
; RV64-NEXT: lbu a2, 53(sp)
531+
; RV64-NEXT: lbu a3, 55(sp)
532+
; RV64-NEXT: vslide1down.vx v8, v8, a0
533+
; RV64-NEXT: vslide1down.vx v8, v8, a1
534+
; RV64-NEXT: vslide1down.vx v8, v8, a2
535+
; RV64-NEXT: vslide1down.vx v8, v8, a3
536+
; RV64-NEXT: lbu a0, 57(sp)
537+
; RV64-NEXT: lbu a1, 59(sp)
538+
; RV64-NEXT: lbu a2, 61(sp)
539+
; RV64-NEXT: lbu a3, 63(sp)
540+
; RV64-NEXT: vslide1down.vx v8, v8, a0
541+
; RV64-NEXT: vslide1down.vx v8, v8, a1
542+
; RV64-NEXT: vslide1down.vx v8, v8, a2
543+
; RV64-NEXT: vslide1down.vx v8, v8, a3
544+
; RV64-NEXT: addi sp, s0, -128
545+
; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
546+
; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
547+
; RV64-NEXT: addi sp, sp, 128
548+
; RV64-NEXT: ret
549+
%res = shufflevector <64 x i8> %in, <64 x i8> poison, <32 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 33, i32 35, i32 37, i32 39, i32 41, i32 43, i32 45, i32 47, i32 49, i32 51, i32 53, i32 55, i32 57, i32 59, i32 61, i32 63>
550+
ret <32 x i8> %res
551+
}

0 commit comments

Comments
 (0)