llvm · arsenm · Feb 27, 2025 · Feb 17, 2025
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -821,7 +821,8 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
   if (UseOpIdx >= Desc.getNumOperands())
     return false;
 
-  if (!AMDGPU::isSISrcInlinableOperand(Desc, UseOpIdx))
+  // Filter out unhandled pseudos.
+  if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx))
     return false;
 
   uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType;

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll
@@ -920,9 +920,7 @@ define amdgpu_ps i64 @s_andn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1
 ; GFX6-NEXT:    s_lshl_b32 s3, s9, 16
 ; GFX6-NEXT:    s_and_b32 s4, s8, 0xffff
 ; GFX6-NEXT:    s_or_b32 s3, s3, s4
-; GFX6-NEXT:    s_mov_b32 s4, -1
-; GFX6-NEXT:    s_mov_b32 s5, s4
-; GFX6-NEXT:    s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX6-NEXT:    s_xor_b64 s[2:3], s[2:3], -1
 ; GFX6-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
@@ -962,9 +960,7 @@ define amdgpu_ps i64 @s_andn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inr
 ; GFX6-NEXT:    s_lshl_b32 s3, s9, 16
 ; GFX6-NEXT:    s_and_b32 s4, s8, 0xffff
 ; GFX6-NEXT:    s_or_b32 s3, s3, s4
-; GFX6-NEXT:    s_mov_b32 s4, -1
-; GFX6-NEXT:    s_mov_b32 s5, s4
-; GFX6-NEXT:    s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX6-NEXT:    s_xor_b64 s[2:3], s[2:3], -1
 ; GFX6-NEXT:    s_and_b64 s[0:1], s[2:3], s[0:1]
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
@@ -1004,9 +1000,7 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_use(<4 x i16> inreg %src0, <4
 ; GFX6-NEXT:    s_lshl_b32 s3, s9, 16
 ; GFX6-NEXT:    s_and_b32 s4, s8, 0xffff
 ; GFX6-NEXT:    s_or_b32 s3, s3, s4
-; GFX6-NEXT:    s_mov_b32 s4, -1
-; GFX6-NEXT:    s_mov_b32 s5, s4
-; GFX6-NEXT:    s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX6-NEXT:    s_xor_b64 s[2:3], s[2:3], -1
 ; GFX6-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
@@ -1060,9 +1054,7 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_foldable_use(<4 x i16> inreg
 ; GFX6-NEXT:    s_lshl_b32 s5, s13, 16
 ; GFX6-NEXT:    s_and_b32 s6, s12, 0xffff
 ; GFX6-NEXT:    s_or_b32 s5, s5, s6
-; GFX6-NEXT:    s_mov_b32 s6, -1
-; GFX6-NEXT:    s_mov_b32 s7, s6
-; GFX6-NEXT:    s_xor_b64 s[4:5], s[4:5], s[6:7]
+; GFX6-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
 ; GFX6-NEXT:    s_and_b64 s[0:1], s[0:1], s[4:5]
 ; GFX6-NEXT:    s_and_b64 s[2:3], s[2:3], s[4:5]
 ; GFX6-NEXT:    ; return to shader part epilog

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -1769,9 +1769,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX9-NEXT:    s_mov_b32 s0, 0
 ; GFX9-NEXT:    scratch_store_dword off, v0, s0 offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_movk_i32 s0, 0x3e80
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 15
-; GFX9-NEXT:    s_add_i32 s0, s0, 4
+; GFX9-NEXT:    s_movk_i32 s0, 0x3e84
 ; GFX9-NEXT:    scratch_store_dword off, v0, s0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
@@ -1786,8 +1785,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 15
-; GFX10-NEXT:    s_movk_i32 s0, 0x3e80
-; GFX10-NEXT:    s_add_i32 s0, s0, 4
+; GFX10-NEXT:    s_movk_i32 s0, 0x3e84
 ; GFX10-NEXT:    scratch_store_dword off, v0, off offset:4
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_store_dword off, v1, s0
@@ -1799,11 +1797,10 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX942-LABEL: store_load_large_imm_offset_kernel:
 ; GFX942:       ; %bb.0: ; %bb
 ; GFX942-NEXT:    v_mov_b32_e32 v0, 13
-; GFX942-NEXT:    s_movk_i32 s0, 0x3e80
 ; GFX942-NEXT:    scratch_store_dword off, v0, off offset:4 sc0 sc1
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    v_mov_b32_e32 v0, 15
-; GFX942-NEXT:    s_add_i32 s0, s0, 4
+; GFX942-NEXT:    s_movk_i32 s0, 0x3e84
 ; GFX942-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
@@ -1813,9 +1810,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX11-LABEL: store_load_large_imm_offset_kernel:
 ; GFX11:       ; %bb.0: ; %bb
 ; GFX11-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
-; GFX11-NEXT:    s_movk_i32 s0, 0x3e80
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT:    s_add_i32 s0, s0, 4
+; GFX11-NEXT:    s_movk_i32 s0, 0x3e84
 ; GFX11-NEXT:    scratch_store_b32 off, v0, off offset:4 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    scratch_store_b32 off, v1, s0 dlc
@@ -1843,9 +1838,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; UNALIGNED_GFX9-NEXT:    s_mov_b32 s0, 0
 ; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s0 offset:4
 ; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX9-NEXT:    s_movk_i32 s0, 0x3e80
 ; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v0, 15
-; UNALIGNED_GFX9-NEXT:    s_add_i32 s0, s0, 4
+; UNALIGNED_GFX9-NEXT:    s_movk_i32 s0, 0x3e84
 ; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s0
 ; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
@@ -1860,8 +1854,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
 ; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v0, 13
 ; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v1, 15
-; UNALIGNED_GFX10-NEXT:    s_movk_i32 s0, 0x3e80
-; UNALIGNED_GFX10-NEXT:    s_add_i32 s0, s0, 4
+; UNALIGNED_GFX10-NEXT:    s_movk_i32 s0, 0x3e84
 ; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v0, off offset:4
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v1, s0
@@ -1873,11 +1866,10 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel:
 ; UNALIGNED_GFX942:       ; %bb.0: ; %bb
 ; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 13
-; UNALIGNED_GFX942-NEXT:    s_movk_i32 s0, 0x3e80
 ; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, off offset:4 sc0 sc1
 ; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 15
-; UNALIGNED_GFX942-NEXT:    s_add_i32 s0, s0, 4
+; UNALIGNED_GFX942-NEXT:    s_movk_i32 s0, 0x3e84
 ; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
 ; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
@@ -1887,9 +1879,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
 ; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
-; UNALIGNED_GFX11-NEXT:    s_movk_i32 s0, 0x3e80
-; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; UNALIGNED_GFX11-NEXT:    s_add_i32 s0, s0, 4
+; UNALIGNED_GFX11-NEXT:    s_movk_i32 s0, 0x3e84
 ; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v0, off offset:4 dlc
 ; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v1, s0 dlc
@@ -1923,13 +1913,11 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX9-LABEL: store_load_large_imm_offset_foo:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_movk_i32 s0, 0x3e80
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 13
-; GFX9-NEXT:    s_add_i32 s1, s32, s0
 ; GFX9-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 15
-; GFX9-NEXT:    s_add_i32 s0, s1, 4
+; GFX9-NEXT:    s_add_i32 s0, s32, 0x3e84
 ; GFX9-NEXT:    scratch_store_dword off, v0, s0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
@@ -1940,10 +1928,8 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX10:       ; %bb.0: ; %bb
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 13
-; GFX10-NEXT:    s_movk_i32 s0, 0x3e80
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 15
-; GFX10-NEXT:    s_add_i32 s1, s32, s0
-; GFX10-NEXT:    s_add_i32 s0, s1, 4
+; GFX10-NEXT:    s_add_i32 s0, s32, 0x3e84
 ; GFX10-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_store_dword off, v1, s0
@@ -1955,13 +1941,11 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX942-LABEL: store_load_large_imm_offset_foo:
 ; GFX942:       ; %bb.0: ; %bb
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    s_movk_i32 s0, 0x3e80
 ; GFX942-NEXT:    v_mov_b32_e32 v0, 13
-; GFX942-NEXT:    s_add_i32 s1, s32, s0
 ; GFX942-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    v_mov_b32_e32 v0, 15
-; GFX942-NEXT:    s_add_i32 s0, s1, 4
+; GFX942-NEXT:    s_add_i32 s0, s32, 0x3e84
 ; GFX942-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
@@ -1972,10 +1956,7 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX11:       ; %bb.0: ; %bb
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
-; GFX11-NEXT:    s_movk_i32 s0, 0x3e80
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    s_add_i32 s1, s32, s0
-; GFX11-NEXT:    s_add_i32 s0, s1, 4
+; GFX11-NEXT:    s_add_i32 s0, s32, 0x3e84
 ; GFX11-NEXT:    scratch_store_b32 off, v0, s32 offset:4 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    scratch_store_b32 off, v1, s0 dlc
@@ -2004,13 +1985,11 @@ define void @store_load_large_imm_offset_foo() {
 ; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
 ; UNALIGNED_GFX9:       ; %bb.0: ; %bb
 ; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; UNALIGNED_GFX9-NEXT:    s_movk_i32 s0, 0x3e80
 ; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v0, 13
-; UNALIGNED_GFX9-NEXT:    s_add_i32 s1, s32, s0
 ; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v0, 15
-; UNALIGNED_GFX9-NEXT:    s_add_i32 s0, s1, 4
+; UNALIGNED_GFX9-NEXT:    s_add_i32 s0, s32, 0x3e84
 ; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s0
 ; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
@@ -2021,10 +2000,8 @@ define void @store_load_large_imm_offset_foo() {
 ; UNALIGNED_GFX10:       ; %bb.0: ; %bb
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v0, 13
-; UNALIGNED_GFX10-NEXT:    s_movk_i32 s0, 0x3e80
 ; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v1, 15
-; UNALIGNED_GFX10-NEXT:    s_add_i32 s1, s32, s0
-; UNALIGNED_GFX10-NEXT:    s_add_i32 s0, s1, 4
+; UNALIGNED_GFX10-NEXT:    s_add_i32 s0, s32, 0x3e84
 ; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v1, s0
@@ -2036,13 +2013,11 @@ define void @store_load_large_imm_offset_foo() {
 ; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo:
 ; UNALIGNED_GFX942:       ; %bb.0: ; %bb
 ; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; UNALIGNED_GFX942-NEXT:    s_movk_i32 s0, 0x3e80
 ; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 13
-; UNALIGNED_GFX942-NEXT:    s_add_i32 s1, s32, s0
 ; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
 ; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 15
-; UNALIGNED_GFX942-NEXT:    s_add_i32 s0, s1, 4
+; UNALIGNED_GFX942-NEXT:    s_add_i32 s0, s32, 0x3e84
 ; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
 ; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
@@ -2053,10 +2028,7 @@ define void @store_load_large_imm_offset_foo() {
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
 ; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
-; UNALIGNED_GFX11-NEXT:    s_movk_i32 s0, 0x3e80
-; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; UNALIGNED_GFX11-NEXT:    s_add_i32 s1, s32, s0
-; UNALIGNED_GFX11-NEXT:    s_add_i32 s0, s1, 4
+; UNALIGNED_GFX11-NEXT:    s_add_i32 s0, s32, 0x3e84
 ; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v0, s32 offset:4 dlc
 ; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v1, s0 dlc

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll
@@ -919,9 +919,7 @@ define amdgpu_ps i64 @s_orn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1)
 ; GFX6-NEXT:    s_lshl_b32 s3, s9, 16
 ; GFX6-NEXT:    s_and_b32 s4, s8, 0xffff
 ; GFX6-NEXT:    s_or_b32 s3, s3, s4
-; GFX6-NEXT:    s_mov_b32 s4, -1
-; GFX6-NEXT:    s_mov_b32 s5, s4
-; GFX6-NEXT:    s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX6-NEXT:    s_xor_b64 s[2:3], s[2:3], -1
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
@@ -961,9 +959,7 @@ define amdgpu_ps i64 @s_orn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inre
 ; GFX6-NEXT:    s_lshl_b32 s3, s9, 16
 ; GFX6-NEXT:    s_and_b32 s4, s8, 0xffff
 ; GFX6-NEXT:    s_or_b32 s3, s3, s4
-; GFX6-NEXT:    s_mov_b32 s4, -1
-; GFX6-NEXT:    s_mov_b32 s5, s4
-; GFX6-NEXT:    s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX6-NEXT:    s_xor_b64 s[2:3], s[2:3], -1
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
@@ -1003,9 +999,7 @@ define amdgpu_ps { i64, i64 } @s_orn2_v4i16_multi_use(<4 x i16> inreg %src0, <4
 ; GFX6-NEXT:    s_lshl_b32 s3, s9, 16
 ; GFX6-NEXT:    s_and_b32 s4, s8, 0xffff
 ; GFX6-NEXT:    s_or_b32 s3, s3, s4
-; GFX6-NEXT:    s_mov_b32 s4, -1
-; GFX6-NEXT:    s_mov_b32 s5, s4
-; GFX6-NEXT:    s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX6-NEXT:    s_xor_b64 s[2:3], s[2:3], -1
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
@@ -1059,9 +1053,7 @@ define amdgpu_ps { i64, i64 } @s_orn2_v4i16_multi_foldable_use(<4 x i16> inreg %
 ; GFX6-NEXT:    s_lshl_b32 s5, s13, 16
 ; GFX6-NEXT:    s_and_b32 s6, s12, 0xffff
 ; GFX6-NEXT:    s_or_b32 s5, s5, s6
-; GFX6-NEXT:    s_mov_b32 s6, -1
-; GFX6-NEXT:    s_mov_b32 s7, s6
-; GFX6-NEXT:    s_xor_b64 s[4:5], s[4:5], s[6:7]
+; GFX6-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
 ; GFX6-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
 ; GFX6-NEXT:    ; return to shader part epilog

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
@@ -118,13 +118,11 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
 ; GFX7-NEXT:    s_xor_b64 s[2:3], s[2:3], s[6:7]
 ; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
 ; GFX7-NEXT:    s_and_b32 s0, s0, 0xffff
-; GFX7-NEXT:    s_mov_b32 s8, -1
 ; GFX7-NEXT:    s_or_b32 s0, s1, s0
 ; GFX7-NEXT:    s_lshl_b32 s1, s3, 16
 ; GFX7-NEXT:    s_and_b32 s2, s2, 0xffff
-; GFX7-NEXT:    s_mov_b32 s9, s8
 ; GFX7-NEXT:    s_or_b32 s1, s1, s2
-; GFX7-NEXT:    s_xor_b64 s[0:1], s[0:1], s[8:9]
+; GFX7-NEXT:    s_xor_b64 s[0:1], s[0:1], -1
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: scalar_xnor_v4i16_one_use:

diff --git a/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll b/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll
@@ -5,16 +5,14 @@ define amdgpu_cs <2 x i32> @f() {
 ; CHECK-LABEL: f:
 ; CHECK:       ; %bb.0: ; %bb
 ; CHECK-NEXT:    s_mov_b32 s4, 0
+; CHECK-NEXT:    s_mov_b32 s1, 0
 ; CHECK-NEXT:    s_mov_b32 s5, s4
 ; CHECK-NEXT:    s_mov_b32 s6, s4
 ; CHECK-NEXT:    s_mov_b32 s7, s4
-; CHECK-NEXT:    s_mov_b32 s0, s4
 ; CHECK-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
-; CHECK-NEXT:    s_mov_b32 s1, s4
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc_lo, s[0:1], v[0:1]
+; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1]
 ; CHECK-NEXT:    v_mov_b32_e32 v1, s4
-; CHECK-NEXT:    s_mov_b32 s1, 0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
 ; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0

diff --git a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll
@@ -192,10 +192,8 @@ define amdgpu_ps <4 x i32> @s_csh_v4i32(<4 x i32> inreg %a, <4 x i32> inreg %b)
 ;
 ; GISEL-LABEL: s_csh_v4i32:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_mov_b32 s8, 31
-; GISEL-NEXT:    s_mov_b32 s9, s8
-; GISEL-NEXT:    s_and_b64 s[4:5], s[4:5], s[8:9]
-; GISEL-NEXT:    s_and_b64 s[6:7], s[6:7], s[8:9]
+; GISEL-NEXT:    s_and_b64 s[4:5], s[4:5], 31
+; GISEL-NEXT:    s_and_b64 s[6:7], s[6:7], 31
 ; GISEL-NEXT:    s_lshl_b32 s8, s0, s4
 ; GISEL-NEXT:    s_lshl_b32 s9, s1, s5
 ; GISEL-NEXT:    s_lshl_b32 s10, s2, s6