@@ -2714,6 +2714,168 @@ entry:
2714
2714
ret double %ext
2715
2715
}
2716
2716
2717
+ define amdgpu_ps double @dyn_extract_v7f64_s_v_bitcast (<14 x float > inreg %userData , i32 %sel ) {
2718
+ ; GCN-LABEL: dyn_extract_v7f64_s_v_bitcast:
2719
+ ; GCN: ; %bb.0: ; %entry
2720
+ ; GCN-NEXT: s_mov_b32 s0, s2
2721
+ ; GCN-NEXT: s_mov_b32 s1, s3
2722
+ ; GCN-NEXT: s_mov_b32 s2, s4
2723
+ ; GCN-NEXT: s_mov_b32 s3, s5
2724
+ ; GCN-NEXT: s_mov_b32 s4, s6
2725
+ ; GCN-NEXT: s_mov_b32 s5, s7
2726
+ ; GCN-NEXT: v_mov_b32_e32 v1, s0
2727
+ ; GCN-NEXT: v_mov_b32_e32 v2, s1
2728
+ ; GCN-NEXT: v_mov_b32_e32 v3, s2
2729
+ ; GCN-NEXT: v_mov_b32_e32 v4, s3
2730
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
2731
+ ; GCN-NEXT: s_mov_b32 s6, s8
2732
+ ; GCN-NEXT: s_mov_b32 s7, s9
2733
+ ; GCN-NEXT: v_mov_b32_e32 v5, s4
2734
+ ; GCN-NEXT: v_mov_b32_e32 v6, s5
2735
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2736
+ ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2737
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
2738
+ ; GCN-NEXT: s_mov_b32 s8, s10
2739
+ ; GCN-NEXT: s_mov_b32 s9, s11
2740
+ ; GCN-NEXT: v_mov_b32_e32 v7, s6
2741
+ ; GCN-NEXT: v_mov_b32_e32 v8, s7
2742
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2743
+ ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
2744
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
2745
+ ; GCN-NEXT: v_mov_b32_e32 v9, s8
2746
+ ; GCN-NEXT: v_mov_b32_e32 v10, s9
2747
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
2748
+ ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
2749
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
2750
+ ; GCN-NEXT: v_mov_b32_e32 v11, s12
2751
+ ; GCN-NEXT: v_mov_b32_e32 v12, s13
2752
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
2753
+ ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
2754
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
2755
+ ; GCN-NEXT: v_mov_b32_e32 v13, s14
2756
+ ; GCN-NEXT: v_mov_b32_e32 v14, s15
2757
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
2758
+ ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc
2759
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
2760
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
2761
+ ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc
2762
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
2763
+ ; GCN-NEXT: ; kill: def $vgpr15 killed $sgpr14 killed $exec
2764
+ ; GCN-NEXT: ; kill: def $vgpr16 killed $sgpr15 killed $exec
2765
+ ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc
2766
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc
2767
+ ; GCN-NEXT: v_readfirstlane_b32 s0, v0
2768
+ ; GCN-NEXT: v_readfirstlane_b32 s1, v1
2769
+ ; GCN-NEXT: ; return to shader part epilog
2770
+ ;
2771
+ ; GFX10-LABEL: dyn_extract_v7f64_s_v_bitcast:
2772
+ ; GFX10: ; %bb.0: ; %entry
2773
+ ; GFX10-NEXT: s_mov_b32 s0, s2
2774
+ ; GFX10-NEXT: s_mov_b32 s2, s4
2775
+ ; GFX10-NEXT: s_mov_b32 s19, s5
2776
+ ; GFX10-NEXT: v_mov_b32_e32 v1, s2
2777
+ ; GFX10-NEXT: v_mov_b32_e32 v2, s19
2778
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2779
+ ; GFX10-NEXT: s_mov_b32 s1, s3
2780
+ ; GFX10-NEXT: s_mov_b32 s4, s6
2781
+ ; GFX10-NEXT: s_mov_b32 s5, s7
2782
+ ; GFX10-NEXT: s_mov_b32 s6, s8
2783
+ ; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2784
+ ; GFX10-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo
2785
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2786
+ ; GFX10-NEXT: s_mov_b32 s7, s9
2787
+ ; GFX10-NEXT: s_mov_b32 s8, s10
2788
+ ; GFX10-NEXT: s_mov_b32 s9, s11
2789
+ ; GFX10-NEXT: s_mov_b32 s10, s12
2790
+ ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2791
+ ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo
2792
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2793
+ ; GFX10-NEXT: s_mov_b32 s11, s13
2794
+ ; GFX10-NEXT: s_mov_b32 s12, s14
2795
+ ; GFX10-NEXT: s_mov_b32 s13, s15
2796
+ ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2797
+ ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2798
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2799
+ ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2800
+ ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2801
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2802
+ ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2803
+ ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2804
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
2805
+ ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
2806
+ ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
2807
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
2808
+ ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo
2809
+ ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo
2810
+ ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2811
+ ; GFX10-NEXT: v_readfirstlane_b32 s1, v1
2812
+ ; GFX10-NEXT: ; return to shader part epilog
2813
+ ;
2814
+ ; GFX11-LABEL: dyn_extract_v7f64_s_v_bitcast:
2815
+ ; GFX11: ; %bb.0: ; %entry
2816
+ ; GFX11-NEXT: s_mov_b32 s0, s2
2817
+ ; GFX11-NEXT: s_mov_b32 s2, s4
2818
+ ; GFX11-NEXT: s_mov_b32 s19, s5
2819
+ ; GFX11-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s19
2820
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2821
+ ; GFX11-NEXT: s_mov_b32 s1, s3
2822
+ ; GFX11-NEXT: s_mov_b32 s4, s6
2823
+ ; GFX11-NEXT: s_mov_b32 s5, s7
2824
+ ; GFX11-NEXT: s_mov_b32 s6, s8
2825
+ ; GFX11-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2826
+ ; GFX11-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo
2827
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2828
+ ; GFX11-NEXT: s_mov_b32 s7, s9
2829
+ ; GFX11-NEXT: s_mov_b32 s8, s10
2830
+ ; GFX11-NEXT: s_mov_b32 s9, s11
2831
+ ; GFX11-NEXT: s_mov_b32 s10, s12
2832
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2833
+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo
2834
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2835
+ ; GFX11-NEXT: s_mov_b32 s11, s13
2836
+ ; GFX11-NEXT: s_mov_b32 s12, s14
2837
+ ; GFX11-NEXT: s_mov_b32 s13, s15
2838
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2839
+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2840
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2841
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2842
+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2843
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2844
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2845
+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2846
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
2847
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
2848
+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
2849
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
2850
+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo
2851
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo
2852
+ ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2853
+ ; GFX11-NEXT: v_readfirstlane_b32 s1, v1
2854
+ ; GFX11-NEXT: ; return to shader part epilog
2855
+ entry:
2856
+ %bc = bitcast <14 x float > %userData to <7 x double >
2857
+ %ext = extractelement <7 x double > %bc , i32 %sel
2858
+ ret double %ext
2859
+ }
2860
+
2861
+ define amdgpu_ps i64 @dyn_extract_v7i64_s_v_bitcast (<14 x i32 > inreg %userData , i32 %sel ) {
2862
+ ; GCN-LABEL: dyn_extract_v7i64_s_v_bitcast:
2863
+ ; GCN: ; %bb.0: ; %entry
2864
+ ; GCN-NEXT: s_mov_b32 s0, s10
2865
+ ; GCN-NEXT: s_mov_b32 s1, s11
2866
+ ; GCN-NEXT: ; return to shader part epilog
2867
+ ;
2868
+ ; GFX10PLUS-LABEL: dyn_extract_v7i64_s_v_bitcast:
2869
+ ; GFX10PLUS: ; %bb.0: ; %entry
2870
+ ; GFX10PLUS-NEXT: s_mov_b32 s0, s10
2871
+ ; GFX10PLUS-NEXT: s_mov_b32 s1, s11
2872
+ ; GFX10PLUS-NEXT: ; return to shader part epilog
2873
+ entry:
2874
+ %.bc = bitcast <14 x i32 > %userData to <7 x i64 >
2875
+ %ext = extractelement <7 x i64 > %.bc , i32 4
2876
+ ret i64 %ext
2877
+ }
2878
+
2717
2879
define amdgpu_ps double @dyn_extract_v7f64_s_v (<7 x double > inreg %vec , i32 %sel ) {
2718
2880
; GCN-LABEL: dyn_extract_v7f64_s_v:
2719
2881
; GCN: ; %bb.0: ; %entry
0 commit comments