@@ -572,13 +572,12 @@ define amdgpu_kernel void @add_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
572
572
; GFX1264-NEXT: global_inv scope:SCOPE_DEV
573
573
; GFX1264-NEXT: .LBB1_2:
574
574
; GFX1264-NEXT: s_or_b64 exec, exec, s[0:1]
575
- ; GFX1264-NEXT: s_waitcnt lgkmcnt(0)
576
- ; GFX1264-NEXT: v_mul_lo_u32 v0, s8, v0
577
575
; GFX1264-NEXT: v_readfirstlane_b32 s0, v1
576
+ ; GFX1264-NEXT: s_waitcnt lgkmcnt(0)
578
577
; GFX1264-NEXT: s_mov_b32 s7, 0x31016000
579
578
; GFX1264-NEXT: s_mov_b32 s6, -1
580
579
; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
581
- ; GFX1264-NEXT: v_add_nc_u32_e32 v0, s0, v0
580
+ ; GFX1264-NEXT: v_mad_co_u64_u32 v[0:1], null, s8, v0, s[0:1]
582
581
; GFX1264-NEXT: buffer_store_b32 v0, off, s[4:7], null
583
582
; GFX1264-NEXT: s_nop 0
584
583
; GFX1264-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -610,13 +609,12 @@ define amdgpu_kernel void @add_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
610
609
; GFX1232-NEXT: global_inv scope:SCOPE_DEV
611
610
; GFX1232-NEXT: .LBB1_2:
612
611
; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s1
612
+ ; GFX1232-NEXT: v_readfirstlane_b32 s2, v1
613
613
; GFX1232-NEXT: s_waitcnt lgkmcnt(0)
614
- ; GFX1232-NEXT: v_mul_lo_u32 v0, s0, v0
615
- ; GFX1232-NEXT: v_readfirstlane_b32 s0, v1
616
614
; GFX1232-NEXT: s_mov_b32 s7, 0x31016000
617
615
; GFX1232-NEXT: s_mov_b32 s6, -1
618
616
; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
619
- ; GFX1232-NEXT: v_add_nc_u32_e32 v0, s0, v0
617
+ ; GFX1232-NEXT: v_mad_co_u64_u32 v[0:1], null, s0, v0, s[2:3]
620
618
; GFX1232-NEXT: buffer_store_b32 v0, off, s[4:7], null
621
619
; GFX1232-NEXT: s_nop 0
622
620
; GFX1232-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -1671,12 +1669,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
1671
1669
; GFX1264-NEXT: v_readfirstlane_b32 s2, v0
1672
1670
; GFX1264-NEXT: v_readfirstlane_b32 s3, v1
1673
1671
; GFX1264-NEXT: s_waitcnt lgkmcnt(0)
1674
- ; GFX1264-NEXT: v_mul_lo_u32 v3, s1, v2
1675
1672
; GFX1264-NEXT: s_mov_b32 s7, 0x31016000
1676
1673
; GFX1264-NEXT: s_mov_b32 s6, -1
1674
+ ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1677
1675
; GFX1264-NEXT: v_mad_co_u64_u32 v[0:1], null, s0, v2, s[2:3]
1678
- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
1679
- ; GFX1264-NEXT: v_add_nc_u32_e32 v1, v3, v1
1676
+ ; GFX1264-NEXT: v_mad_co_u64_u32 v[1:2], null, s1, v2, v[1:2]
1680
1677
; GFX1264-NEXT: buffer_store_b64 v[0:1], off, s[4:7], null
1681
1678
; GFX1264-NEXT: s_nop 0
1682
1679
; GFX1264-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -1712,12 +1709,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
1712
1709
; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
1713
1710
; GFX1232-NEXT: v_readfirstlane_b32 s3, v1
1714
1711
; GFX1232-NEXT: s_waitcnt lgkmcnt(0)
1715
- ; GFX1232-NEXT: v_mul_lo_u32 v3, s1, v2
1716
1712
; GFX1232-NEXT: s_mov_b32 s7, 0x31016000
1717
1713
; GFX1232-NEXT: s_mov_b32 s6, -1
1714
+ ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1718
1715
; GFX1232-NEXT: v_mad_co_u64_u32 v[0:1], null, s0, v2, s[2:3]
1719
- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
1720
- ; GFX1232-NEXT: v_add_nc_u32_e32 v1, v3, v1
1716
+ ; GFX1232-NEXT: v_mad_co_u64_u32 v[1:2], null, s1, v2, v[1:2]
1721
1717
; GFX1232-NEXT: buffer_store_b64 v[0:1], off, s[4:7], null
1722
1718
; GFX1232-NEXT: s_nop 0
1723
1719
; GFX1232-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -3608,16 +3604,16 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
3608
3604
; GFX1264-NEXT: .LBB10_2:
3609
3605
; GFX1264-NEXT: s_or_b64 exec, exec, s[2:3]
3610
3606
; GFX1264-NEXT: s_waitcnt lgkmcnt(0)
3611
- ; GFX1264-NEXT: v_mul_lo_u32 v5, s1, v2
3612
3607
; GFX1264-NEXT: v_mad_co_u64_u32 v[3:4], null, s0, v2, 0
3613
3608
; GFX1264-NEXT: v_readfirstlane_b32 s0, v0
3614
- ; GFX1264-NEXT: v_readfirstlane_b32 s1, v1
3615
3609
; GFX1264-NEXT: s_mov_b32 s7, 0x31016000
3616
3610
; GFX1264-NEXT: s_mov_b32 s6, -1
3617
- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
3618
- ; GFX1264-NEXT: v_add_nc_u32_e32 v1, v4, v5
3611
+ ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
3612
+ ; GFX1264-NEXT: v_mad_co_u64_u32 v[4:5], null, s1, v2, v[4:5]
3613
+ ; GFX1264-NEXT: v_readfirstlane_b32 s1, v1
3619
3614
; GFX1264-NEXT: v_sub_co_u32 v0, vcc, s0, v3
3620
- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_2)
3615
+ ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
3616
+ ; GFX1264-NEXT: v_mov_b32_e32 v1, v4
3621
3617
; GFX1264-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s1, v1, vcc
3622
3618
; GFX1264-NEXT: buffer_store_b64 v[0:1], off, s[4:7], null
3623
3619
; GFX1264-NEXT: s_nop 0
@@ -3652,16 +3648,16 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
3652
3648
; GFX1232-NEXT: .LBB10_2:
3653
3649
; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s8
3654
3650
; GFX1232-NEXT: s_waitcnt lgkmcnt(0)
3655
- ; GFX1232-NEXT: v_mul_lo_u32 v5, s1, v2
3656
3651
; GFX1232-NEXT: v_mad_co_u64_u32 v[3:4], null, s0, v2, 0
3657
3652
; GFX1232-NEXT: v_readfirstlane_b32 s0, v0
3658
- ; GFX1232-NEXT: v_readfirstlane_b32 s1, v1
3659
3653
; GFX1232-NEXT: s_mov_b32 s7, 0x31016000
3660
3654
; GFX1232-NEXT: s_mov_b32 s6, -1
3661
- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
3662
- ; GFX1232-NEXT: v_add_nc_u32_e32 v1, v4, v5
3655
+ ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
3656
+ ; GFX1232-NEXT: v_mad_co_u64_u32 v[4:5], null, s1, v2, v[4:5]
3657
+ ; GFX1232-NEXT: v_readfirstlane_b32 s1, v1
3663
3658
; GFX1232-NEXT: v_sub_co_u32 v0, vcc_lo, s0, v3
3664
- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_2)
3659
+ ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
3660
+ ; GFX1232-NEXT: v_mov_b32_e32 v1, v4
3665
3661
; GFX1232-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo
3666
3662
; GFX1232-NEXT: buffer_store_b64 v[0:1], off, s[4:7], null
3667
3663
; GFX1232-NEXT: s_nop 0
0 commit comments