@@ -1804,113 +1804,110 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
1804
1804
define amdgpu_ps i48 @s_fshl_v2i24 (i48 inreg %lhs.arg , i48 inreg %rhs.arg , i48 inreg %amt.arg ) {
1805
1805
; GFX6-LABEL: s_fshl_v2i24:
1806
1806
; GFX6: ; %bb.0:
1807
+ ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
1808
+ ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2
1807
1809
; GFX6-NEXT: s_lshr_b32 s6, s0, 16
1808
- ; GFX6-NEXT: s_lshr_b32 s7, s0, 24
1809
- ; GFX6-NEXT: s_and_b32 s9, s0, 0xff
1810
- ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008
1811
- ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1812
- ; GFX6-NEXT: s_lshl_b32 s0, s0, 8
1810
+ ; GFX6-NEXT: s_lshr_b32 s7, s1, 8
1811
+ ; GFX6-NEXT: s_bfe_u32 s9, s0, 0x80008
1812
+ ; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
1813
+ ; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2
1814
+ ; GFX6-NEXT: s_and_b32 s8, s0, 0xff
1815
+ ; GFX6-NEXT: s_lshl_b32 s9, s9, 8
1813
1816
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
1814
- ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
1815
- ; GFX6-NEXT: s_or_b32 s0, s9, s0
1817
+ ; GFX6-NEXT: s_and_b32 s1, s1, 0xff
1818
+ ; GFX6-NEXT: v_mov_b32_e32 v0, s0
1819
+ ; GFX6-NEXT: s_and_b32 s0, s7, 0xff
1820
+ ; GFX6-NEXT: s_or_b32 s8, s8, s9
1816
1821
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1817
- ; GFX6-NEXT: s_lshr_b32 s8 , s1, 8
1822
+ ; GFX6-NEXT: v_alignbit_b32 v0 , s1, v0, 24
1818
1823
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
1824
+ ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
1819
1825
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1826
+ ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
1827
+ ; GFX6-NEXT: s_lshl_b32 s0, s0, 16
1828
+ ; GFX6-NEXT: v_mov_b32_e32 v3, 0xffffffe8
1829
+ ; GFX6-NEXT: s_or_b32 s6, s8, s6
1830
+ ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
1831
+ ; GFX6-NEXT: s_lshr_b32 s0, s2, 16
1832
+ ; GFX6-NEXT: s_lshr_b32 s1, s3, 8
1833
+ ; GFX6-NEXT: s_bfe_u32 s8, s2, 0x80008
1834
+ ; GFX6-NEXT: v_mul_lo_u32 v3, v2, v3
1835
+ ; GFX6-NEXT: s_and_b32 s7, s2, 0xff
1836
+ ; GFX6-NEXT: s_lshl_b32 s8, s8, 8
1837
+ ; GFX6-NEXT: s_and_b32 s0, s0, 0xff
1838
+ ; GFX6-NEXT: s_and_b32 s3, s3, 0xff
1839
+ ; GFX6-NEXT: v_mov_b32_e32 v1, s2
1820
1840
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
1821
- ; GFX6-NEXT: s_or_b32 s0, s0, s6
1822
- ; GFX6-NEXT: s_lshl_b32 s1, s1, 8
1823
- ; GFX6-NEXT: s_and_b32 s6, s8, 0xff
1824
- ; GFX6-NEXT: s_or_b32 s1, s7, s1
1825
- ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1826
- ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1841
+ ; GFX6-NEXT: s_or_b32 s7, s7, s8
1842
+ ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
1843
+ ; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 24
1844
+ ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
1845
+ ; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
1846
+ ; GFX6-NEXT: s_lshl_b32 s0, s0, 16
1847
+ ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
1848
+ ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1849
+ ; GFX6-NEXT: s_or_b32 s0, s7, s0
1850
+ ; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
1851
+ ; GFX6-NEXT: s_lshr_b32 s1, s4, 16
1852
+ ; GFX6-NEXT: s_bfe_u32 s7, s4, 0x80008
1853
+ ; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
1854
+ ; GFX6-NEXT: s_and_b32 s3, s4, 0xff
1855
+ ; GFX6-NEXT: s_lshl_b32 s7, s7, 8
1856
+ ; GFX6-NEXT: s_and_b32 s1, s1, 0xff
1857
+ ; GFX6-NEXT: s_or_b32 s3, s3, s7
1827
1858
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
1828
- ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1829
- ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
1830
- ; GFX6-NEXT: s_or_b32 s1, s1, s6
1831
- ; GFX6-NEXT: s_lshr_b32 s6, s2, 16
1832
- ; GFX6-NEXT: s_lshr_b32 s7, s2, 24
1833
- ; GFX6-NEXT: s_and_b32 s9, s2, 0xff
1834
- ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008
1835
- ; GFX6-NEXT: s_lshl_b32 s2, s2, 8
1836
- ; GFX6-NEXT: s_and_b32 s6, s6, 0xff
1837
- ; GFX6-NEXT: s_or_b32 s2, s9, s2
1838
- ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1839
- ; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1840
- ; GFX6-NEXT: s_lshr_b32 s8, s3, 8
1841
- ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
1842
- ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1843
- ; GFX6-NEXT: s_and_b32 s3, s3, 0xff
1844
- ; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1
1845
- ; GFX6-NEXT: s_or_b32 s2, s2, s6
1846
- ; GFX6-NEXT: s_lshl_b32 s3, s3, 8
1847
- ; GFX6-NEXT: s_and_b32 s6, s8, 0xff
1848
- ; GFX6-NEXT: s_or_b32 s3, s7, s3
1849
- ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1850
1859
; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
1851
- ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1852
- ; GFX6-NEXT: s_or_b32 s3, s3, s6
1853
- ; GFX6-NEXT: s_lshr_b32 s6, s4, 16
1854
- ; GFX6-NEXT: s_lshr_b32 s7, s4, 24
1855
- ; GFX6-NEXT: s_and_b32 s9, s4, 0xff
1856
- ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008
1857
- ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
1858
- ; GFX6-NEXT: s_lshl_b32 s4, s4, 8
1859
- ; GFX6-NEXT: s_and_b32 s6, s6, 0xff
1860
- ; GFX6-NEXT: s_or_b32 s4, s9, s4
1861
- ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1862
- ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
1863
- ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1864
- ; GFX6-NEXT: s_or_b32 s4, s4, s6
1865
- ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1866
- ; GFX6-NEXT: v_mul_hi_u32 v1, s4, v0
1867
- ; GFX6-NEXT: s_lshr_b32 s8, s5, 8
1868
- ; GFX6-NEXT: s_and_b32 s5, s5, 0xff
1869
- ; GFX6-NEXT: s_lshl_b32 s5, s5, 8
1870
- ; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24
1871
- ; GFX6-NEXT: s_and_b32 s6, s8, 0xff
1872
- ; GFX6-NEXT: s_or_b32 s5, s7, s5
1873
- ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1874
- ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
1875
- ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1876
- ; GFX6-NEXT: s_or_b32 s5, s5, s6
1877
- ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1
1878
- ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
1879
- ; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0
1880
- ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1881
- ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1882
- ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
1883
- ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1884
- ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
1885
- ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1886
- ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1
1887
- ; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1888
- ; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1
1889
- ; GFX6-NEXT: s_lshr_b32 s0, s2, 1
1860
+ ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1861
+ ; GFX6-NEXT: s_or_b32 s1, s3, s1
1862
+ ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
1863
+ ; GFX6-NEXT: v_mul_hi_u32 v3, s1, v2
1864
+ ; GFX6-NEXT: s_lshr_b32 s2, s5, 8
1865
+ ; GFX6-NEXT: s_and_b32 s3, s5, 0xff
1866
+ ; GFX6-NEXT: v_mov_b32_e32 v4, s4
1867
+ ; GFX6-NEXT: s_and_b32 s2, s2, 0xff
1868
+ ; GFX6-NEXT: v_alignbit_b32 v4, s3, v4, 24
1869
+ ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
1870
+ ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
1871
+ ; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
1872
+ ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1873
+ ; GFX6-NEXT: v_or_b32_e32 v4, s2, v4
1874
+ ; GFX6-NEXT: v_mul_hi_u32 v2, v4, v2
1875
+ ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s1, v3
1876
+ ; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
1877
+ ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
1878
+ ; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24
1879
+ ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
1880
+ ; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
1881
+ ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
1882
+ ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
1883
+ ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v4, v2
1884
+ ; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 23, v3
1885
+ ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
1886
+ ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1887
+ ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1888
+ ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
1889
+ ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1890
+ ; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1891
+ ; GFX6-NEXT: s_lshr_b32 s0, s0, 1
1892
+ ; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
1893
+ ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1894
+ ; GFX6-NEXT: v_lshl_b32_e32 v3, s6, v3
1895
+ ; GFX6-NEXT: v_lshr_b32_e32 v5, s0, v5
1896
+ ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
1890
1897
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1891
- ; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2
1892
- ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s5, v0
1898
+ ; GFX6-NEXT: v_or_b32_e32 v3, v3, v5
1899
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
1900
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
1901
+ ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v4
1902
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1903
+ ; GFX6-NEXT: v_bfe_u32 v2, v3, 8, 8
1904
+ ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1905
+ ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3
1906
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
1907
+ ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1908
+ ; GFX6-NEXT: v_bfe_u32 v2, v3, 16, 8
1909
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1893
1910
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1894
- ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0
1895
- ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1896
- ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1897
- ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0
1898
- ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1899
- ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1900
- ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0
1901
- ; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1902
- ; GFX6-NEXT: s_lshr_b32 s0, s3, 1
1903
- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1904
- ; GFX6-NEXT: v_lshl_b32_e32 v0, s1, v0
1905
- ; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2
1906
- ; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8
1907
- ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1908
- ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1
1909
- ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3
1910
- ; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8
1911
- ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1912
- ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1913
- ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
1914
1911
; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0
1915
1912
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2
1916
1913
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
0 commit comments