Skip to content

Commit f31d39c

Browse files
committed
[InstCombine] remove cast-of-signbit to shift transform
The transform was wrong in 3 ways: 1. It created an extra instruction when the source and dest types don't match. 2. It did not account for an extra use of the icmp, so could create 2 extra insts. 3. It favored bit hacks over icmp (icmp generally has better analysis). This fixes #54692 (modeled by the PhaseOrdering tests). This is a minimal step to fix the bug, but we should likely invert this and the sibling transform for the "is negative" pattern too. The backend should be able to invert this back to a shift if that leads to better codegen. This is a reduced try of 3794cc0 - that was reverted because it could cause infinite loops by conflicting with the related transforms in this block that create shifts.
1 parent f7709a0 commit f31d39c

File tree

5 files changed

+54
-60
lines changed

5 files changed

+54
-60
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -983,25 +983,25 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext)
983983
// If we are just checking for a icmp eq of a single bit and zext'ing it
984984
// to an integer, then shift the bit to the appropriate place and then
985985
// cast to integer to avoid the comparison.
986+
987+
// FIXME: This set of transforms does not check for extra uses and/or creates
988+
// an extra instruction (an optional final cast is not included
989+
// in the transform comments). We may also want to favor icmp over
990+
// shifts in cases of equal instructions because icmp has better
991+
// analysis in general (invert the transform).
992+
986993
const APInt *Op1CV;
987994
if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
988995

989996
// zext (x <s 0) to i32 --> x>>u31 true if signbit set.
990-
// zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
991-
if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) ||
992-
(Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnes())) {
997+
if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
993998
Value *In = Cmp->getOperand(0);
994999
Value *Sh = ConstantInt::get(In->getType(),
9951000
In->getType()->getScalarSizeInBits() - 1);
9961001
In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
9971002
if (In->getType() != Zext.getType())
9981003
In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
9991004

1000-
if (Cmp->getPredicate() == ICmpInst::ICMP_SGT) {
1001-
Constant *One = ConstantInt::get(In->getType(), 1);
1002-
In = Builder.CreateXor(In, One, In->getName() + ".not");
1003-
}
1004-
10051005
return replaceInstUsesWith(Zext, In);
10061006
}
10071007

llvm/test/Transforms/InstCombine/compare-signs.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
define i32 @test1(i32 %a, i32 %b) nounwind readnone {
77
; CHECK-LABEL: @test1(
88
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[A:%.*]]
9-
; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1
10-
; CHECK-NEXT: [[DOTLOBIT_NOT:%.*]] = lshr i32 [[TMP2]], 31
11-
; CHECK-NEXT: ret i32 [[DOTLOBIT_NOT]]
9+
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], -1
10+
; CHECK-NEXT: [[T3:%.*]] = zext i1 [[TMP2]] to i32
11+
; CHECK-NEXT: ret i32 [[T3]]
1212
;
1313
%t0 = icmp sgt i32 %a, -1
1414
%t1 = icmp slt i32 %b, 0
@@ -36,9 +36,9 @@ define i32 @test2(i32 %a, i32 %b) nounwind readnone {
3636
define i32 @test3(i32 %a, i32 %b) nounwind readnone {
3737
; CHECK-LABEL: @test3(
3838
; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
39-
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[T2_UNSHIFTED]], -1
40-
; CHECK-NEXT: [[T2_UNSHIFTED_LOBIT_NOT:%.*]] = lshr i32 [[TMP1]], 31
41-
; CHECK-NEXT: ret i32 [[T2_UNSHIFTED_LOBIT_NOT]]
39+
; CHECK-NEXT: [[T2:%.*]] = icmp sgt i32 [[T2_UNSHIFTED]], -1
40+
; CHECK-NEXT: [[T3:%.*]] = zext i1 [[T2]] to i32
41+
; CHECK-NEXT: ret i32 [[T3]]
4242
;
4343
%t0 = lshr i32 %a, 31
4444
%t1 = lshr i32 %b, 31
@@ -68,9 +68,9 @@ define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
6868
define i32 @test3i(i32 %a, i32 %b) nounwind readnone {
6969
; CHECK-LABEL: @test3i(
7070
; CHECK-NEXT: [[T01:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
71-
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[T01]], -1
72-
; CHECK-NEXT: [[T01_LOBIT_NOT:%.*]] = lshr i32 [[TMP1]], 31
73-
; CHECK-NEXT: ret i32 [[T01_LOBIT_NOT]]
71+
; CHECK-NEXT: [[T4:%.*]] = icmp sgt i32 [[T01]], -1
72+
; CHECK-NEXT: [[T5:%.*]] = zext i1 [[T4]] to i32
73+
; CHECK-NEXT: ret i32 [[T5]]
7474
;
7575
%t0 = lshr i32 %a, 29
7676
%t1 = lshr i32 %b, 29

llvm/test/Transforms/InstCombine/icmp.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ define <2 x i32> @test1vec(<2 x i32> %X) {
2727

2828
define i32 @test2(i32 %X) {
2929
; CHECK-LABEL: @test2(
30-
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], -1
31-
; CHECK-NEXT: [[X_LOBIT_NOT:%.*]] = lshr i32 [[TMP1]], 31
32-
; CHECK-NEXT: ret i32 [[X_LOBIT_NOT]]
30+
; CHECK-NEXT: [[A:%.*]] = icmp sgt i32 [[X:%.*]], -1
31+
; CHECK-NEXT: [[B:%.*]] = zext i1 [[A]] to i32
32+
; CHECK-NEXT: ret i32 [[B]]
3333
;
3434
%a = icmp ult i32 %X, -2147483648
3535
%b = zext i1 %a to i32
@@ -38,9 +38,9 @@ define i32 @test2(i32 %X) {
3838

3939
define <2 x i32> @test2vec(<2 x i32> %X) {
4040
; CHECK-LABEL: @test2vec(
41-
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
42-
; CHECK-NEXT: [[X_LOBIT_NOT:%.*]] = lshr <2 x i32> [[TMP1]], <i32 31, i32 31>
43-
; CHECK-NEXT: ret <2 x i32> [[X_LOBIT_NOT]]
41+
; CHECK-NEXT: [[A:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
42+
; CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[A]] to <2 x i32>
43+
; CHECK-NEXT: ret <2 x i32> [[B]]
4444
;
4545
%a = icmp ult <2 x i32> %X, <i32 -2147483648, i32 -2147483648>
4646
%b = zext <2 x i1> %a to <2 x i32>

llvm/test/Transforms/InstCombine/zext.ll

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -452,10 +452,9 @@ define i32 @zext_or_masked_bit_test_uses(i32 %a, i32 %b, i32 %x) {
452452

453453
define i32 @notneg_zext_wider(i8 %x) {
454454
; CHECK-LABEL: @notneg_zext_wider(
455-
; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X:%.*]], -1
456-
; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], 7
457-
; CHECK-NEXT: [[DOTNOT:%.*]] = zext i8 [[TMP2]] to i32
458-
; CHECK-NEXT: ret i32 [[DOTNOT]]
455+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], -1
456+
; CHECK-NEXT: [[R:%.*]] = zext i1 [[CMP]] to i32
457+
; CHECK-NEXT: ret i32 [[R]]
459458
;
460459
%cmp = icmp sgt i8 %x, -1
461460
%r = zext i1 %cmp to i32
@@ -464,10 +463,9 @@ define i32 @notneg_zext_wider(i8 %x) {
464463

465464
define <2 x i8> @notneg_zext_narrower(<2 x i32> %x) {
466465
; CHECK-LABEL: @notneg_zext_narrower(
467-
; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 31>
468-
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X_LOBIT]] to <2 x i8>
469-
; CHECK-NEXT: [[DOTNOT:%.*]] = xor <2 x i8> [[TMP1]], <i8 1, i8 1>
470-
; CHECK-NEXT: ret <2 x i8> [[DOTNOT]]
466+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
467+
; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[CMP]] to <2 x i8>
468+
; CHECK-NEXT: ret <2 x i8> [[R]]
471469
;
472470
%cmp = icmp sgt <2 x i32> %x, <i32 -1, i32 -1>
473471
%r = zext <2 x i1> %cmp to <2 x i8>
@@ -478,10 +476,8 @@ define i32 @notneg_zext_wider_use(i8 %x) {
478476
; CHECK-LABEL: @notneg_zext_wider_use(
479477
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], -1
480478
; CHECK-NEXT: call void @use1(i1 [[CMP]])
481-
; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X]], -1
482-
; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], 7
483-
; CHECK-NEXT: [[DOTNOT:%.*]] = zext i8 [[TMP2]] to i32
484-
; CHECK-NEXT: ret i32 [[DOTNOT]]
479+
; CHECK-NEXT: [[R:%.*]] = zext i1 [[CMP]] to i32
480+
; CHECK-NEXT: ret i32 [[R]]
485481
;
486482
%cmp = icmp sgt i8 %x, -1
487483
call void @use1(i1 %cmp)
@@ -493,13 +489,24 @@ define i8 @notneg_zext_narrower_use(i32 %x) {
493489
; CHECK-LABEL: @notneg_zext_narrower_use(
494490
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], -1
495491
; CHECK-NEXT: call void @use1(i1 [[CMP]])
496-
; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr i32 [[X]], 31
497-
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X_LOBIT]] to i8
498-
; CHECK-NEXT: [[DOTNOT:%.*]] = xor i8 [[TMP1]], 1
499-
; CHECK-NEXT: ret i8 [[DOTNOT]]
492+
; CHECK-NEXT: [[R:%.*]] = zext i1 [[CMP]] to i8
493+
; CHECK-NEXT: ret i8 [[R]]
500494
;
501495
%cmp = icmp sgt i32 %x, -1
502496
call void @use1(i1 %cmp)
503497
%r = zext i1 %cmp to i8
504498
ret i8 %r
505499
}
500+
501+
define i8 @disguised_signbit_clear_test(i64 %x) {
502+
; CHECK-LABEL: @disguised_signbit_clear_test(
503+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i8
504+
; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], -1
505+
; CHECK-NEXT: [[TMP3:%.*]] = lshr i8 [[TMP2]], 7
506+
; CHECK-NEXT: ret i8 [[TMP3]]
507+
;
508+
%a1 = and i64 %x, 128
509+
%t4 = icmp eq i64 %a1, 0
510+
%t6 = zext i1 %t4 to i8
511+
ret i8 %t6
512+
}

llvm/test/Transforms/PhaseOrdering/cmp-logic.ll

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33

44
define i32 @PR38781(i32 noundef %a, i32 noundef %b) {
55
; CHECK-LABEL: @PR38781(
6-
; CHECK-NEXT: [[A_LOBIT_NOT1_DEMORGAN:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
7-
; CHECK-NEXT: [[A_LOBIT_NOT1:%.*]] = xor i32 [[A_LOBIT_NOT1_DEMORGAN]], -1
8-
; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[A_LOBIT_NOT1]], 31
6+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
7+
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], -1
8+
; CHECK-NEXT: [[AND:%.*]] = zext i1 [[TMP2]] to i32
99
; CHECK-NEXT: ret i32 [[AND]]
1010
;
1111
%cmp = icmp sge i32 %a, 0
@@ -48,17 +48,10 @@ land.end:
4848
define i1 @PR54692_b(i8 noundef signext %c) {
4949
; CHECK-LABEL: @PR54692_b(
5050
; CHECK-NEXT: entry:
51-
; CHECK-NEXT: [[TMP0:%.*]] = xor i8 [[C:%.*]], -1
52-
; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[TMP0]], 7
53-
; CHECK-NEXT: [[DOTNOT:%.*]] = zext i8 [[TMP1]] to i32
54-
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i8 [[C]], 32
55-
; CHECK-NEXT: [[CONV4:%.*]] = zext i1 [[CMP3]] to i32
56-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[DOTNOT]], [[CONV4]]
51+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i8 [[C:%.*]], 32
5752
; CHECK-NEXT: [[CMP6:%.*]] = icmp eq i8 [[C]], 127
58-
; CHECK-NEXT: [[CONV7:%.*]] = zext i1 [[CMP6]] to i32
59-
; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[CONV7]]
60-
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[OR]], 0
61-
; CHECK-NEXT: ret i1 [[TOBOOL]]
53+
; CHECK-NEXT: [[OR2:%.*]] = or i1 [[TMP0]], [[CMP6]]
54+
; CHECK-NEXT: ret i1 [[OR2]]
6255
;
6356
entry:
6457
%conv = sext i8 %c to i32
@@ -79,15 +72,9 @@ entry:
7972
define i1 @PR54692_c(i8 noundef signext %c) {
8073
; CHECK-LABEL: @PR54692_c(
8174
; CHECK-NEXT: entry:
82-
; CHECK-NEXT: [[TMP0:%.*]] = xor i8 [[C:%.*]], -1
83-
; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[TMP0]], 7
84-
; CHECK-NEXT: [[DOTNOT:%.*]] = zext i8 [[TMP1]] to i32
85-
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i8 [[C]], 32
86-
; CHECK-NEXT: [[CONV4:%.*]] = zext i1 [[CMP3]] to i32
87-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[DOTNOT]], [[CONV4]]
88-
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND]], 0
75+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i8 [[C:%.*]], 32
8976
; CHECK-NEXT: [[CMP6:%.*]] = icmp eq i8 [[C]], 127
90-
; CHECK-NEXT: [[T0:%.*]] = or i1 [[CMP6]], [[TOBOOL]]
77+
; CHECK-NEXT: [[T0:%.*]] = or i1 [[TMP0]], [[CMP6]]
9178
; CHECK-NEXT: ret i1 [[T0]]
9279
;
9380
entry:

0 commit comments

Comments
 (0)