diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 2a45acf63aa2c..26ae9b472ff3d 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2563,6 +2563,22 @@ static Value *simplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (Value *V = simplifyByDomEq(Instruction::Xor, Op0, Op1, Q, MaxRecurse)) return V; + if (Op0->getType()->isIntOrIntVectorTy(1)) { + bool InvalidTable[2][2] = {}; + if (std::optional Implied = isImpliedCondition(Op0, Op1, Q.DL, false)) + InvalidTable[0][!*Implied] = true; + if (std::optional Implied = isImpliedCondition(Op0, Op1, Q.DL, true)) + InvalidTable[1][!*Implied] = true; + if (std::optional Implied = isImpliedCondition(Op1, Op0, Q.DL, false)) + InvalidTable[!*Implied][0] = true; + if (std::optional Implied = isImpliedCondition(Op1, Op0, Q.DL, true)) + InvalidTable[!*Implied][1] = true; + + if (InvalidTable[0][0] && InvalidTable[1][1]) + return ConstantInt::getTrue(Op0->getType()); + // NOTE: There would be no benefit to handle other cases. + } + return nullptr; } diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index c3ac778f82e04..2b723662fb1cc 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -237,33 +237,14 @@ define amdgpu_kernel void @test_vop3_cmp_f32_sop_and(ptr addrspace(1) %arg) { } define amdgpu_kernel void @test_vop3_cmp_i32_sop_xor(ptr addrspace(1) %arg) { -; GFX1032-LABEL: test_vop3_cmp_i32_sop_xor: -; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; GFX1032-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: global_load_dword v1, v0, s[2:3] -; GFX1032-NEXT: s_waitcnt vmcnt(0) -; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v1 -; GFX1032-NEXT: v_cmp_gt_i32_e64 s0, 1, v1 -; GFX1032-NEXT: s_xor_b32 s0, vcc_lo, s0 -; GFX1032-NEXT: v_cndmask_b32_e64 v1, 2, 1, s0 -; GFX1032-NEXT: global_store_dword v0, v1, s[2:3] -; GFX1032-NEXT: s_endpgm -; -; GFX1064-LABEL: test_vop3_cmp_i32_sop_xor: -; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; GFX1064-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: global_load_dword v1, v0, s[2:3] -; GFX1064-NEXT: s_waitcnt vmcnt(0) -; GFX1064-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1 -; GFX1064-NEXT: v_cmp_gt_i32_e64 s[0:1], 1, v1 -; GFX1064-NEXT: s_xor_b64 s[0:1], vcc, s[0:1] -; GFX1064-NEXT: v_cndmask_b32_e64 v1, 2, 1, s[0:1] -; GFX1064-NEXT: global_store_dword v0, v1, s[2:3] -; GFX1064-NEXT: s_endpgm +; GCN-LABEL: test_vop3_cmp_i32_sop_xor: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_mov_b32_e32 v1, 1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: global_store_dword v0, v1, s[0:1] +; GCN-NEXT: s_endpgm %lid = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %lid %load = load i32, ptr addrspace(1) %gep, align 4 diff --git a/llvm/test/Transforms/InstCombine/xor-icmps.ll b/llvm/test/Transforms/InstCombine/xor-icmps.ll index c85993ea9a7e0..8ef5465d32e84 100644 --- a/llvm/test/Transforms/InstCombine/xor-icmps.ll +++ b/llvm/test/Transforms/InstCombine/xor-icmps.ll @@ -171,3 +171,80 @@ define i1 @xor_icmp_ptr(ptr %c, ptr %d) { ret i1 %xor } +; Tests from PR70928 +define i1 @xor_icmp_true1(i32 %x, i32 %y) { +; CHECK-LABEL: @xor_icmp_true1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 true +; +entry: + %add = add nsw i32 %y, 1 + %cmp1 = icmp sgt i32 %x, %y + %cmp2 = icmp slt i32 %x, %add + %xor = xor i1 %cmp1, %cmp2 + ret i1 %xor +} + +define i1 @xor_icmp_true2(i32 %x, i32 %y) { +; CHECK-LABEL: @xor_icmp_true2( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 true +; +entry: + %sub = add nsw i32 %y, -1 + %cmp1 = icmp slt i32 %x, %y + %cmp2 = icmp sgt i32 %x, %sub + %xor = xor i1 %cmp1, %cmp2 + ret i1 %xor +} + +define i1 @xor_icmp_true3(i32 %a) { +; CHECK-LABEL: @xor_icmp_true3( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 true +; +entry: + %cmp = icmp sgt i32 %a, 5 + %cmp1 = icmp slt i32 %a, 6 + %cmp3 = xor i1 %cmp, %cmp1 + ret i1 %cmp3 +} + +define i1 @xor_icmp_true4(i32 %a) { +; CHECK-LABEL: @xor_icmp_true4( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 true +; +entry: + %cmp = icmp slt i32 %a, 5 + %cmp1 = icmp sgt i32 %a, 4 + %cmp3 = xor i1 %cmp, %cmp1 + ret i1 %cmp3 +} + +define i1 @xor_icmp_true4_commuted(i32 %a) { +; CHECK-LABEL: @xor_icmp_true4_commuted( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 true +; +entry: + %cmp = icmp slt i32 %a, 5 + %cmp1 = icmp sgt i32 %a, 4 + %cmp3 = xor i1 %cmp1, %cmp + ret i1 %cmp3 +} + +define i1 @xor_icmp_failed_to_imply(i32 %a) { +; CHECK-LABEL: @xor_icmp_failed_to_imply( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 7 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[A]], 4 +; CHECK-NEXT: [[CMP3:%.*]] = xor i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: ret i1 [[CMP3]] +; +entry: + %cmp = icmp slt i32 %a, 7 + %cmp1 = icmp sgt i32 %a, 4 + %cmp3 = xor i1 %cmp, %cmp1 + ret i1 %cmp3 +}