-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[InstCombine] Handle isNanOrInf idioms #80414
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Handle isNanOrInf idioms #80414
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch folds:
Alive2: https://alive2.llvm.org/ce/z/_hXAAF Full diff: https://github.com/llvm/llvm-project/pull/80414.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index d295853798b80..0fe75309296b4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1827,6 +1827,35 @@ Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
}
}
+ // (icmp eq (and (bitcast X to int), ExponentMask), ExponentMask) -->
+ // llvm.is.fpclass(X, fcInf|fcNan)
+ // (icmp ne (and (bitcast X to int), ExponentMask), ExponentMask) -->
+ // llvm.is.fpclass(X, ~(fcInf|fcNan))
+ Value *V;
+ if (!Cmp.getParent()->getParent()->hasFnAttribute(
+ Attribute::NoImplicitFloat) &&
+ Cmp.isEquality() && match(X, m_OneUse(m_BitCast(m_Value(V))))) {
+ Type *SrcType = V->getType();
+ Type *DstType = X->getType();
+ // Make sure the bitcast doesn't change between scalar and vector and
+ // doesn't change the number of vector elements.
+ if (SrcType->isVectorTy() == DstType->isVectorTy() &&
+ SrcType->getScalarSizeInBits() == DstType->getScalarSizeInBits()) {
+ Type *FPType = SrcType->getScalarType();
+ if (FPType->isIEEELikeFPTy() && C1 == *C2) {
+ APInt ExponentMask = APInt::getBitsSet(
+ FPType->getScalarSizeInBits(), FPType->getFPMantissaWidth() - 1,
+ FPType->getScalarSizeInBits() - 1);
+ if (C1 == ExponentMask) {
+ unsigned Mask = FPClassTest::fcNan | FPClassTest::fcInf;
+ if (isICMP_NE)
+ Mask = ~Mask & fcAllFlags;
+ return replaceInstUsesWith(Cmp, Builder.createIsFPClass(V, Mask));
+ }
+ }
+ }
+ }
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll b/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll
new file mode 100644
index 0000000000000..dd4a3e0afb50a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll
@@ -0,0 +1,177 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+define i1 @f32_fcnan_fcinf(float %a) {
+; CHECK-LABEL: define i1 @f32_fcnan_fcinf(
+; CHECK-SAME: float [[A:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[A]])
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq float [[TMP1]], 0x7FF0000000000000
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %i32 = bitcast float %a to i32
+ %and = and i32 %i32, 2139095040
+ %cmp = icmp eq i32 %and, 2139095040
+ ret i1 %cmp
+}
+
+define i1 @f32_not_fcnan_fcinf(float %a) {
+; CHECK-LABEL: define i1 @f32_not_fcnan_fcinf(
+; CHECK-SAME: float [[A:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[A]])
+; CHECK-NEXT: [[CMP:%.*]] = fcmp one float [[TMP1]], 0x7FF0000000000000
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %i32 = bitcast float %a to i32
+ %and = and i32 %i32, 2139095040
+ %cmp = icmp ne i32 %and, 2139095040
+ ret i1 %cmp
+}
+
+define i1 @f64_fcnan_fcinf(double %a) {
+; CHECK-LABEL: define i1 @f64_fcnan_fcinf(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[A]])
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq double [[TMP1]], 0x7FF0000000000000
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %i64 = bitcast double %a to i64
+ %and = and i64 %i64, 9218868437227405312
+ %cmp = icmp eq i64 %and, 9218868437227405312
+ ret i1 %cmp
+}
+
+; TODO: handle more fpclass check idioms
+define i1 @f32_fcinf(float %a) {
+; CHECK-LABEL: define i1 @f32_fcinf(
+; CHECK-SAME: float [[A:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[A]])
+; CHECK-NEXT: [[AND:%.*]] = bitcast float [[TMP1]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 2139095040
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %i32 = bitcast float %a to i32
+ %and = and i32 %i32, 2147483647
+ %cmp = icmp eq i32 %and, 2139095040
+ ret i1 %cmp
+}
+
+define i1 @f32_fcnan(float %a) {
+; CHECK-LABEL: define i1 @f32_fcnan(
+; CHECK-SAME: float [[A:%.*]]) {
+; CHECK-NEXT: [[I32:%.*]] = bitcast float [[A]] to i32
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[I32]], 2139095040
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[AND1]], 2139095040
+; CHECK-NEXT: [[AND2:%.*]] = and i32 [[I32]], 8388607
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[AND2]], 0
+; CHECK-NEXT: [[RES:%.*]] = and i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT: ret i1 [[RES]]
+;
+ %i32 = bitcast float %a to i32
+ %and1 = and i32 %i32, 2139095040
+ %cmp1 = icmp eq i32 %and1, 2139095040
+ %and2 = and i32 %i32, 8388607
+ %cmp2 = icmp ne i32 %and2, 0
+ %res = and i1 %cmp1, %cmp2
+ ret i1 %res
+}
+
+; Negative tests
+
+define i1 @f32_fcnan_fcinf_wrong_mask1(float %a) {
+; CHECK-LABEL: define i1 @f32_fcnan_fcinf_wrong_mask1(
+; CHECK-SAME: float [[A:%.*]]) {
+; CHECK-NEXT: [[I32:%.*]] = bitcast float [[A]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[I32]], 2139095041
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 2139095040
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %i32 = bitcast float %a to i32
+ %and = and i32 %i32, 2139095041
+ %cmp = icmp eq i32 %and, 2139095040
+ ret i1 %cmp
+}
+
+define i1 @f32_fcnan_fcinf_wrong_mask2(float %a) {
+; CHECK-LABEL: define i1 @f32_fcnan_fcinf_wrong_mask2(
+; CHECK-SAME: float [[A:%.*]]) {
+; CHECK-NEXT: [[I32:%.*]] = bitcast float [[A]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[I32]], 2139095040
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 2130706432
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %i32 = bitcast float %a to i32
+ %and = and i32 %i32, 2139095040
+ %cmp = icmp eq i32 %and, 2130706432
+ ret i1 %cmp
+}
+
+define i1 @f64_fcnan_fcinf_wrong_mask3(double %a) {
+; CHECK-LABEL: define i1 @f64_fcnan_fcinf_wrong_mask3(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT: [[I64:%.*]] = bitcast double [[A]] to i64
+; CHECK-NEXT: [[AND:%.*]] = and i64 [[I64]], 2139095040
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[AND]], 2139095040
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %i64 = bitcast double %a to i64
+ %and = and i64 %i64, 2139095040
+ %cmp = icmp eq i64 %and, 2139095040
+ ret i1 %cmp
+}
+
+define i1 @f32_fcnan_fcinf_wrong_pred(float %a) {
+; CHECK-LABEL: define i1 @f32_fcnan_fcinf_wrong_pred(
+; CHECK-SAME: float [[A:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[A]])
+; CHECK-NEXT: [[CMP:%.*]] = fcmp one float [[TMP1]], 0x7FF0000000000000
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %i32 = bitcast float %a to i32
+ %and = and i32 %i32, 2139095040
+ %cmp = icmp slt i32 %and, 2139095040
+ ret i1 %cmp
+}
+
+define i1 @f32_fcnan_fcinf_wrong_type1(<2 x float> %a) {
+; CHECK-LABEL: define i1 @f32_fcnan_fcinf_wrong_type1(
+; CHECK-SAME: <2 x float> [[A:%.*]]) {
+; CHECK-NEXT: [[I64:%.*]] = bitcast <2 x float> [[A]] to i64
+; CHECK-NEXT: [[AND:%.*]] = and i64 [[I64]], 2139095040
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[AND]], 2139095040
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %i64 = bitcast <2 x float> %a to i64
+ %and = and i64 %i64, 2139095040
+ %cmp = icmp eq i64 %and, 2139095040
+ ret i1 %cmp
+}
+
+define i1 @f32_fcnan_fcinf_wrong_type2(x86_fp80 %a) {
+; CHECK-LABEL: define i1 @f32_fcnan_fcinf_wrong_type2(
+; CHECK-SAME: x86_fp80 [[A:%.*]]) {
+; CHECK-NEXT: [[I80:%.*]] = bitcast x86_fp80 [[A]] to i80
+; CHECK-NEXT: [[AND:%.*]] = and i80 [[I80]], 2139095040
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i80 [[AND]], 2139095040
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %i80 = bitcast x86_fp80 %a to i80
+ %and = and i80 %i80, 2139095040
+ %cmp = icmp eq i80 %and, 2139095040
+ ret i1 %cmp
+}
+
+define i1 @f32_fcnan_fcinf_noimplicitfloat(float %a) #0 {
+; CHECK-LABEL: define i1 @f32_fcnan_fcinf_noimplicitfloat(
+; CHECK-SAME: float [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[I32:%.*]] = bitcast float [[A]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[I32]], 2139095040
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 2139095040
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %i32 = bitcast float %a to i32
+ %and = and i32 %i32, 2139095040
+ %cmp = icmp eq i32 %and, 2139095040
+ ret i1 %cmp
+}
+
+attributes #0 = { noimplicitfloat }
|
Address review comments.
; CHECK-LABEL: define i1 @f32_fcnan_fcinf_strictfp( | ||
; CHECK-SAME: float [[A:%.*]]) #[[ATTR0:[0-9]+]] { | ||
; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[A]]) | ||
; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq float [[TMP1]], 0x7FF0000000000000 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this shows a bug, we're not emitting a strict compare. doesn't need to be fixed in this patch though, existing issue
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is an old bug but this patch has caused us to start hitting it in our floating-point library, I've posted a fix at #81498.
This patch folds: ``` (icmp eq (and (bitcast X to int), ExponentMask), ExponentMask) --> llvm.is.fpclass(X, fcInf|fcNan) (icmp ne (and (bitcast X to int), ExponentMask), ExponentMask) --> llvm.is.fpclass(X, ~(fcInf|fcNan)) ``` Alive2: https://alive2.llvm.org/ce/z/_hXAAF
@dtcxzyw FYI, this patch and your similar recent changes introduce runtime library calls with fp128 (https://godbolt.org/z/PWcjK86vz) so it's a bit of a de-optimization in that case. I think @FreddyLeaf will look into this when he returns from holiday. We should be able to fix this in the selection DAG. @pranavk has been working on fp128-related issues, so may also want to be aware of this. |
I will reverse the transform in SDAG to fix the problem. |
Thanks! This broke the build of some downstream libraries I work with because they didn't want to link with the runtime library. We disabled the transformations for fp128 locally to work around the problem, but I think this is a good transformation and I'd like to re-enable it when we can. You may already be considering this, but x86_fp80 also has issues, in that after your changes it's using x87 instructions where it used to use a faster integer implementation: https://godbolt.org/z/bPfrnW368 |
…1572) In commit 2b58244, we canonicalize the isInf/isNanOrInf idiom into fabs+fcmp for better analysis/codegen (See also the discussion in #76338). This patch reverses the fabs+fcmp to `is.fpclass`. If the `is.fpclass` is not supported by the target, it will be expanded by TLI. Fixes the regression introduced by 2b58244 and #80414 (comment).
This patch folds:
Alive2: https://alive2.llvm.org/ce/z/_hXAAF
Related patch: #76338