@@ -2626,6 +2626,132 @@ entry:
2626
2626
ret double %ext
2627
2627
}
2628
2628
2629
+ define amdgpu_ps double @dyn_extract_v7f64_s_v_bitcast (<14 x float > inreg %userData , i32 %sel ) {
2630
+ ; GCN-LABEL: dyn_extract_v7f64_s_v_bitcast:
2631
+ ; GCN: ; %bb.0: ; %entry
2632
+ ; GCN-NEXT: v_mov_b32_e32 v1, s2
2633
+ ; GCN-NEXT: v_mov_b32_e32 v2, s3
2634
+ ; GCN-NEXT: v_mov_b32_e32 v3, s4
2635
+ ; GCN-NEXT: v_mov_b32_e32 v4, s5
2636
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
2637
+ ; GCN-NEXT: v_mov_b32_e32 v5, s6
2638
+ ; GCN-NEXT: v_mov_b32_e32 v6, s7
2639
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2640
+ ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2641
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
2642
+ ; GCN-NEXT: v_mov_b32_e32 v7, s8
2643
+ ; GCN-NEXT: v_mov_b32_e32 v8, s9
2644
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2645
+ ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
2646
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
2647
+ ; GCN-NEXT: v_mov_b32_e32 v9, s10
2648
+ ; GCN-NEXT: v_mov_b32_e32 v10, s11
2649
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
2650
+ ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
2651
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
2652
+ ; GCN-NEXT: v_mov_b32_e32 v11, s12
2653
+ ; GCN-NEXT: v_mov_b32_e32 v12, s13
2654
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
2655
+ ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
2656
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
2657
+ ; GCN-NEXT: v_mov_b32_e32 v13, s14
2658
+ ; GCN-NEXT: v_mov_b32_e32 v14, s15
2659
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
2660
+ ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc
2661
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
2662
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
2663
+ ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc
2664
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
2665
+ ; GCN-NEXT: ; kill: def $vgpr15 killed $sgpr2 killed $exec
2666
+ ; GCN-NEXT: ; kill: def $vgpr16 killed $sgpr3 killed $exec
2667
+ ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc
2668
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc
2669
+ ; GCN-NEXT: v_readfirstlane_b32 s0, v0
2670
+ ; GCN-NEXT: v_readfirstlane_b32 s1, v1
2671
+ ; GCN-NEXT: ; return to shader part epilog
2672
+ ;
2673
+ ; GFX10-LABEL: dyn_extract_v7f64_s_v_bitcast:
2674
+ ; GFX10: ; %bb.0: ; %entry
2675
+ ; GFX10-NEXT: v_mov_b32_e32 v1, s4
2676
+ ; GFX10-NEXT: v_mov_b32_e32 v2, s5
2677
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2678
+ ; GFX10-NEXT: s_mov_b32 s0, s14
2679
+ ; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2680
+ ; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo
2681
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2682
+ ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2683
+ ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2684
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2685
+ ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2686
+ ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2687
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2688
+ ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2689
+ ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2690
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2691
+ ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
2692
+ ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
2693
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
2694
+ ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo
2695
+ ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo
2696
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
2697
+ ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo
2698
+ ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo
2699
+ ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2700
+ ; GFX10-NEXT: v_readfirstlane_b32 s1, v1
2701
+ ; GFX10-NEXT: ; return to shader part epilog
2702
+ ;
2703
+ ; GFX11-LABEL: dyn_extract_v7f64_s_v_bitcast:
2704
+ ; GFX11: ; %bb.0: ; %entry
2705
+ ; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5
2706
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2707
+ ; GFX11-NEXT: s_mov_b32 s0, s14
2708
+ ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2709
+ ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo
2710
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2711
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2712
+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2713
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2714
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2715
+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2716
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2717
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2718
+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2719
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2720
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
2721
+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
2722
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
2723
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo
2724
+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo
2725
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
2726
+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo
2727
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo
2728
+ ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2729
+ ; GFX11-NEXT: v_readfirstlane_b32 s1, v1
2730
+ ; GFX11-NEXT: ; return to shader part epilog
2731
+ entry:
2732
+ %bc = bitcast <14 x float > %userData to <7 x double >
2733
+ %ext = extractelement <7 x double > %bc , i32 %sel
2734
+ ret double %ext
2735
+ }
2736
+
2737
+ define amdgpu_ps i64 @dyn_extract_v7i64_s_v_bitcast (<14 x i32 > inreg %userData , i32 %sel ) {
2738
+ ; GCN-LABEL: dyn_extract_v7i64_s_v_bitcast:
2739
+ ; GCN: ; %bb.0: ; %entry
2740
+ ; GCN-NEXT: s_mov_b32 s0, s10
2741
+ ; GCN-NEXT: s_mov_b32 s1, s11
2742
+ ; GCN-NEXT: ; return to shader part epilog
2743
+ ;
2744
+ ; GFX10PLUS-LABEL: dyn_extract_v7i64_s_v_bitcast:
2745
+ ; GFX10PLUS: ; %bb.0: ; %entry
2746
+ ; GFX10PLUS-NEXT: s_mov_b32 s0, s10
2747
+ ; GFX10PLUS-NEXT: s_mov_b32 s1, s11
2748
+ ; GFX10PLUS-NEXT: ; return to shader part epilog
2749
+ entry:
2750
+ %.bc = bitcast <14 x i32 > %userData to <7 x i64 >
2751
+ %ext = extractelement <7 x i64 > %.bc , i32 4
2752
+ ret i64 %ext
2753
+ }
2754
+
2629
2755
define amdgpu_ps double @dyn_extract_v7f64_s_v (<7 x double > inreg %vec , i32 %sel ) {
2630
2756
; GCN-LABEL: dyn_extract_v7f64_s_v:
2631
2757
; GCN: ; %bb.0: ; %entry
0 commit comments