Skip to content

Commit 96e09fe

Browse files
author
Serguei Katkov
committed
[X86] Avoid usage constant NaN for fminimum/fmaximum lowering
After applying FMIN/FMAX, if any of operands is NaN, the second operand will be the result. So all we need is to check whether first operand is NaN and return it or result of FMIN/FMAX. So we avoid usage of constant NaN in the lowering. Additionally we can avoid handling NaN after FMIN/FMAX if we are sure that first operand is not NaN. Reviewed By: e-kud Differential Revision: https://reviews.llvm.org/D149729
1 parent a9919db commit 96e09fe

File tree

5 files changed

+299
-296
lines changed

5 files changed

+299
-296
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30253,9 +30253,9 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
3025330253
// Y Y
3025430254
// Num xNaN +0 -0
3025530255
// --------------- ---------------
30256-
// Num | Max | qNaN | +0 | +0 | +0 |
30256+
// Num | Max | Y | +0 | +0 | +0 |
3025730257
// X --------------- X ---------------
30258-
// xNaN | qNaN | qNaN | -0 | +0 | -0 |
30258+
// xNaN | X | X/Y | -0 | +0 | -0 |
3025930259
// --------------- ---------------
3026030260
//
3026130261
// It is achieved by means of FMAX/FMIN with preliminary checks and operand
@@ -30273,15 +30273,18 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
3027330273
return false;
3027430274
};
3027530275

30276-
SDValue MinMax;
3027730276
bool IsXNeverNaN = DAG.isKnownNeverNaN(X);
3027830277
bool IsYNeverNaN = DAG.isKnownNeverNaN(Y);
30279-
if (DAG.getTarget().Options.NoSignedZerosFPMath ||
30280-
Op->getFlags().hasNoSignedZeros() || IsPreferredZero(Y) ||
30281-
DAG.isKnownNeverZeroFloat(X)) {
30282-
MinMax = DAG.getNode(MinMaxOp, DL, VT, X, Y, Op->getFlags());
30278+
bool IgnoreSignedZero = DAG.getTarget().Options.NoSignedZerosFPMath ||
30279+
Op->getFlags().hasNoSignedZeros();
30280+
SDValue NewX, NewY;
30281+
if (IgnoreSignedZero || IsPreferredZero(Y) || DAG.isKnownNeverZeroFloat(X)) {
30282+
// Operands are already in right order or order does not matter.
30283+
NewX = X;
30284+
NewY = Y;
3028330285
} else if (IsPreferredZero(X) || DAG.isKnownNeverZeroFloat(Y)) {
30284-
MinMax = DAG.getNode(MinMaxOp, DL, VT, Y, X, Op->getFlags());
30286+
NewX = Y;
30287+
NewY = X;
3028530288
} else if ((VT == MVT::f16 || Subtarget.hasDQI()) &&
3028630289
(Op->getFlags().hasNoNaNs() || IsXNeverNaN || IsYNeverNaN)) {
3028730290
if (IsXNeverNaN)
@@ -30300,8 +30303,8 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
3030030303
DAG.getConstant(0, DL, MVT::v8i1), IsNanZero,
3030130304
DAG.getIntPtrConstant(0, DL));
3030230305
SDValue NeedSwap = DAG.getBitcast(MVT::i8, Ins);
30303-
SDValue NewX = DAG.getSelect(DL, VT, NeedSwap, Y, X);
30304-
SDValue NewY = DAG.getSelect(DL, VT, NeedSwap, X, Y);
30306+
NewX = DAG.getSelect(DL, VT, NeedSwap, Y, X);
30307+
NewY = DAG.getSelect(DL, VT, NeedSwap, X, Y);
3030530308
return DAG.getNode(MinMaxOp, DL, VT, NewX, NewY, Op->getFlags());
3030630309
} else {
3030730310
SDValue IsXZero;
@@ -30330,19 +30333,26 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
3033030333
IsXZero = DAG.getSetCC(DL, SetCCType, IsXZero,
3033130334
DAG.getConstant(0, DL, MVT::i32), ISD::SETEQ);
3033230335
}
30333-
SDValue NewX = DAG.getSelect(DL, VT, IsXZero, Y, X);
30334-
SDValue NewY = DAG.getSelect(DL, VT, IsXZero, X, Y);
30335-
MinMax = DAG.getNode(MinMaxOp, DL, VT, NewX, NewY, Op->getFlags());
30336+
NewX = DAG.getSelect(DL, VT, IsXZero, Y, X);
30337+
NewY = DAG.getSelect(DL, VT, IsXZero, X, Y);
3033630338
}
3033730339

30338-
if (Op->getFlags().hasNoNaNs() || (IsXNeverNaN && IsYNeverNaN))
30340+
bool IgnoreNaN = DAG.getTarget().Options.NoNaNsFPMath ||
30341+
Op->getFlags().hasNoNaNs() || (IsXNeverNaN && IsYNeverNaN);
30342+
30343+
// If we did no ordering operands for singed zero handling and we need
30344+
// to process NaN and we know that the second operand is not NaN then put
30345+
// it in first operand and we will not need to post handle NaN after max/min.
30346+
if (IgnoreSignedZero && !IgnoreNaN && DAG.isKnownNeverNaN(NewY))
30347+
std::swap(NewX, NewY);
30348+
30349+
SDValue MinMax = DAG.getNode(MinMaxOp, DL, VT, NewX, NewY, Op->getFlags());
30350+
30351+
if (IgnoreNaN || DAG.isKnownNeverNaN(NewX))
3033930352
return MinMax;
3034030353

30341-
APFloat NaNValue = APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT));
30342-
SDValue IsNaN = DAG.getSetCC(DL, SetCCType, IsXNeverNaN ? Y : X,
30343-
IsYNeverNaN ? X : Y, ISD::SETUO);
30344-
return DAG.getSelect(DL, VT, IsNaN, DAG.getConstantFP(NaNValue, DL, VT),
30345-
MinMax);
30354+
SDValue IsNaN = DAG.getSetCC(DL, SetCCType, NewX, NewX, ISD::SETUO);
30355+
return DAG.getSelect(DL, VT, IsNaN, NewX, MinMax);
3034630356
}
3034730357

3034830358
static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget,

llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@ define half @test_fminimum(half %x, half %y) {
1414
; CHECK-NEXT: cmpl $32768, %eax # imm = 0x8000
1515
; CHECK-NEXT: sete %al
1616
; CHECK-NEXT: kmovd %eax, %k1
17-
; CHECK-NEXT: vmovaps %xmm0, %xmm2
18-
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm2 {%k1}
19-
; CHECK-NEXT: vcmpunordsh %xmm1, %xmm0, %k2
17+
; CHECK-NEXT: vmovaps %xmm1, %xmm2
18+
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1}
19+
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
20+
; CHECK-NEXT: vminsh %xmm2, %xmm0, %xmm1
21+
; CHECK-NEXT: vcmpunordsh %xmm0, %xmm0, %k1
2022
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
21-
; CHECK-NEXT: vminsh %xmm1, %xmm2, %xmm0
22-
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23-
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k2}
23+
; CHECK-NEXT: vmovaps %xmm1, %xmm0
2424
; CHECK-NEXT: retq
2525
%z = call half @llvm.minimum.f16(half %x, half %y)
2626
ret half %z
@@ -79,10 +79,9 @@ define half @test_fminimum_nnan(half %x, half %y) "no-nans-fp-math"="true" {
7979
define half @test_fminimum_zero(half %x, half %y) {
8080
; CHECK-LABEL: test_fminimum_zero:
8181
; CHECK: # %bb.0:
82-
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
8382
; CHECK-NEXT: vcmpunordsh %xmm1, %xmm1, %k1
8483
; CHECK-NEXT: vminsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
85-
; CHECK-NEXT: vmovsh %xmm2, %xmm0, %xmm0 {%k1}
84+
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
8685
; CHECK-NEXT: retq
8786
%1 = tail call half @llvm.minimum.f16(half -0.0, half %y)
8887
ret half %1
@@ -91,10 +90,10 @@ define half @test_fminimum_zero(half %x, half %y) {
9190
define half @test_fminimum_nsz(half %x, half %y) {
9291
; CHECK-LABEL: test_fminimum_nsz:
9392
; CHECK: # %bb.0:
94-
; CHECK-NEXT: vcmpunordsh %xmm1, %xmm0, %k1
95-
; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm0
96-
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
97-
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
93+
; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm1
94+
; CHECK-NEXT: vcmpunordsh %xmm0, %xmm0, %k1
95+
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
96+
; CHECK-NEXT: vmovaps %xmm1, %xmm0
9897
; CHECK-NEXT: retq
9998
%1 = tail call nsz half @llvm.minimum.f16(half %x, half %y)
10099
ret half %1
@@ -122,13 +121,13 @@ define half @test_fmaximum(half %x, half %y) {
122121
; CHECK-NEXT: testw %ax, %ax
123122
; CHECK-NEXT: sete %al
124123
; CHECK-NEXT: kmovd %eax, %k1
125-
; CHECK-NEXT: vmovaps %xmm0, %xmm2
126-
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm2 {%k1}
127-
; CHECK-NEXT: vcmpunordsh %xmm1, %xmm0, %k2
124+
; CHECK-NEXT: vmovaps %xmm1, %xmm2
125+
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1}
126+
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
127+
; CHECK-NEXT: vmaxsh %xmm2, %xmm0, %xmm1
128+
; CHECK-NEXT: vcmpunordsh %xmm0, %xmm0, %k1
128129
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
129-
; CHECK-NEXT: vmaxsh %xmm1, %xmm2, %xmm0
130-
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
131-
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k2}
130+
; CHECK-NEXT: vmovaps %xmm1, %xmm0
132131
; CHECK-NEXT: retq
133132
%r = call half @llvm.maximum.f16(half %x, half %y)
134133
ret half %r
@@ -193,9 +192,8 @@ define half @test_fmaximum_zero(half %x, half %y) {
193192
; CHECK: # %bb.0:
194193
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
195194
; CHECK-NEXT: vmaxsh %xmm0, %xmm1, %xmm0
196-
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
197195
; CHECK-NEXT: vcmpunordsh %xmm1, %xmm1, %k1
198-
; CHECK-NEXT: vmovsh %xmm2, %xmm0, %xmm0 {%k1}
196+
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
199197
; CHECK-NEXT: retq
200198
%1 = tail call half @llvm.maximum.f16(half 0.0, half %y)
201199
ret half %1
@@ -204,10 +202,10 @@ define half @test_fmaximum_zero(half %x, half %y) {
204202
define half @test_fmaximum_nsz(half %x, half %y) "no-signed-zeros-fp-math"="true" {
205203
; CHECK-LABEL: test_fmaximum_nsz:
206204
; CHECK: # %bb.0:
207-
; CHECK-NEXT: vcmpunordsh %xmm1, %xmm0, %k1
208-
; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm0
209-
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
210-
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
205+
; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm1
206+
; CHECK-NEXT: vcmpunordsh %xmm0, %xmm0, %k1
207+
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
208+
; CHECK-NEXT: vmovaps %xmm1, %xmm0
211209
; CHECK-NEXT: retq
212210
%1 = tail call half @llvm.maximum.f16(half %x, half %y)
213211
ret half %1

llvm/test/CodeGen/X86/extract-fp.ll

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,7 @@ define double @ext_maximum_v4f64(<2 x double> %x) nounwind {
111111
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
112112
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
113113
; CHECK-NEXT: maxsd %xmm0, %xmm1
114-
; CHECK-NEXT: cmpunordsd %xmm0, %xmm0
115-
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
116-
; CHECK-NEXT: andpd %xmm0, %xmm2
117-
; CHECK-NEXT: andnpd %xmm1, %xmm0
118-
; CHECK-NEXT: orpd %xmm2, %xmm0
114+
; CHECK-NEXT: movapd %xmm1, %xmm0
119115
; CHECK-NEXT: retq
120116
%v = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> <double 42.0, double 43.0>)
121117
%r = extractelement <2 x double> %v, i32 1
@@ -128,11 +124,7 @@ define float @ext_minimum_v4f32(<4 x float> %x) nounwind {
128124
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
129125
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
130126
; CHECK-NEXT: minss %xmm0, %xmm1
131-
; CHECK-NEXT: cmpunordss %xmm0, %xmm0
132-
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
133-
; CHECK-NEXT: andps %xmm0, %xmm2
134-
; CHECK-NEXT: andnps %xmm1, %xmm0
135-
; CHECK-NEXT: orps %xmm2, %xmm0
127+
; CHECK-NEXT: movaps %xmm1, %xmm0
136128
; CHECK-NEXT: retq
137129
%v = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 1.0, float 2.0, float 42.0>)
138130
%r = extractelement <4 x float> %v, i32 1

llvm/test/CodeGen/X86/extractelement-fp.ll

Lines changed: 32 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -680,16 +680,14 @@ define float @fmaximum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
680680
; X64-NEXT: je .LBB30_1
681681
; X64-NEXT: # %bb.2:
682682
; X64-NEXT: vmovdqa %xmm1, %xmm2
683-
; X64-NEXT: vmovdqa %xmm0, %xmm3
684683
; X64-NEXT: jmp .LBB30_3
685684
; X64-NEXT: .LBB30_1:
686685
; X64-NEXT: vmovdqa %xmm0, %xmm2
687-
; X64-NEXT: vmovdqa %xmm1, %xmm3
686+
; X64-NEXT: vmovdqa %xmm1, %xmm0
688687
; X64-NEXT: .LBB30_3:
689-
; X64-NEXT: vmaxss %xmm2, %xmm3, %xmm2
690-
; X64-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0
691-
; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
692-
; X64-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
688+
; X64-NEXT: vmaxss %xmm2, %xmm0, %xmm1
689+
; X64-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
690+
; X64-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
693691
; X64-NEXT: retq
694692
;
695693
; X86-LABEL: fmaximum_v4f32:
@@ -699,17 +697,15 @@ define float @fmaximum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
699697
; X86-NEXT: je .LBB30_1
700698
; X86-NEXT: # %bb.2:
701699
; X86-NEXT: vmovdqa %xmm1, %xmm2
702-
; X86-NEXT: vmovdqa %xmm0, %xmm3
703700
; X86-NEXT: jmp .LBB30_3
704701
; X86-NEXT: .LBB30_1:
705702
; X86-NEXT: vmovdqa %xmm0, %xmm2
706-
; X86-NEXT: vmovdqa %xmm1, %xmm3
703+
; X86-NEXT: vmovdqa %xmm1, %xmm0
707704
; X86-NEXT: .LBB30_3:
708705
; X86-NEXT: pushl %eax
709-
; X86-NEXT: vmaxss %xmm2, %xmm3, %xmm2
710-
; X86-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0
711-
; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
712-
; X86-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
706+
; X86-NEXT: vmaxss %xmm2, %xmm0, %xmm1
707+
; X86-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
708+
; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
713709
; X86-NEXT: vmovss %xmm0, (%esp)
714710
; X86-NEXT: flds (%esp)
715711
; X86-NEXT: popl %eax
@@ -727,15 +723,14 @@ define double @fmaximum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
727723
; X64-NEXT: je .LBB31_1
728724
; X64-NEXT: # %bb.2:
729725
; X64-NEXT: vmovdqa %xmm1, %xmm2
730-
; X64-NEXT: vmovdqa %xmm0, %xmm3
731726
; X64-NEXT: jmp .LBB31_3
732727
; X64-NEXT: .LBB31_1:
733728
; X64-NEXT: vmovdqa %xmm0, %xmm2
734-
; X64-NEXT: vmovdqa %xmm1, %xmm3
729+
; X64-NEXT: vmovdqa %xmm1, %xmm0
735730
; X64-NEXT: .LBB31_3:
736-
; X64-NEXT: vmaxsd %xmm2, %xmm3, %xmm2
737-
; X64-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0
738-
; X64-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
731+
; X64-NEXT: vmaxsd %xmm2, %xmm0, %xmm1
732+
; X64-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2
733+
; X64-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
739734
; X64-NEXT: vzeroupper
740735
; X64-NEXT: retq
741736
;
@@ -747,19 +742,18 @@ define double @fmaximum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
747742
; X86-NEXT: je .LBB31_1
748743
; X86-NEXT: # %bb.2:
749744
; X86-NEXT: vmovdqa %xmm1, %xmm2
750-
; X86-NEXT: vmovdqa %xmm0, %xmm3
751745
; X86-NEXT: jmp .LBB31_3
752746
; X86-NEXT: .LBB31_1:
753747
; X86-NEXT: vmovdqa %xmm0, %xmm2
754-
; X86-NEXT: vmovdqa %xmm1, %xmm3
748+
; X86-NEXT: vmovdqa %xmm1, %xmm0
755749
; X86-NEXT: .LBB31_3:
756750
; X86-NEXT: pushl %ebp
757751
; X86-NEXT: movl %esp, %ebp
758752
; X86-NEXT: andl $-8, %esp
759753
; X86-NEXT: subl $8, %esp
760-
; X86-NEXT: vmaxsd %xmm2, %xmm3, %xmm2
761-
; X86-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0
762-
; X86-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2, %xmm0
754+
; X86-NEXT: vmaxsd %xmm2, %xmm0, %xmm1
755+
; X86-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2
756+
; X86-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
763757
; X86-NEXT: vmovlpd %xmm0, (%esp)
764758
; X86-NEXT: fldl (%esp)
765759
; X86-NEXT: movl %ebp, %esp
@@ -779,16 +773,14 @@ define float @fminimum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
779773
; X64-NEXT: je .LBB32_1
780774
; X64-NEXT: # %bb.2:
781775
; X64-NEXT: vmovdqa %xmm1, %xmm2
782-
; X64-NEXT: vmovdqa %xmm0, %xmm3
783776
; X64-NEXT: jmp .LBB32_3
784777
; X64-NEXT: .LBB32_1:
785778
; X64-NEXT: vmovdqa %xmm0, %xmm2
786-
; X64-NEXT: vmovdqa %xmm1, %xmm3
779+
; X64-NEXT: vmovdqa %xmm1, %xmm0
787780
; X64-NEXT: .LBB32_3:
788-
; X64-NEXT: vminss %xmm2, %xmm3, %xmm2
789-
; X64-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0
790-
; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
791-
; X64-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
781+
; X64-NEXT: vminss %xmm2, %xmm0, %xmm1
782+
; X64-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
783+
; X64-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
792784
; X64-NEXT: retq
793785
;
794786
; X86-LABEL: fminimum_v4f32:
@@ -798,17 +790,15 @@ define float @fminimum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
798790
; X86-NEXT: je .LBB32_1
799791
; X86-NEXT: # %bb.2:
800792
; X86-NEXT: vmovdqa %xmm1, %xmm2
801-
; X86-NEXT: vmovdqa %xmm0, %xmm3
802793
; X86-NEXT: jmp .LBB32_3
803794
; X86-NEXT: .LBB32_1:
804795
; X86-NEXT: vmovdqa %xmm0, %xmm2
805-
; X86-NEXT: vmovdqa %xmm1, %xmm3
796+
; X86-NEXT: vmovdqa %xmm1, %xmm0
806797
; X86-NEXT: .LBB32_3:
807798
; X86-NEXT: pushl %eax
808-
; X86-NEXT: vminss %xmm2, %xmm3, %xmm2
809-
; X86-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0
810-
; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
811-
; X86-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
799+
; X86-NEXT: vminss %xmm2, %xmm0, %xmm1
800+
; X86-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
801+
; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
812802
; X86-NEXT: vmovss %xmm0, (%esp)
813803
; X86-NEXT: flds (%esp)
814804
; X86-NEXT: popl %eax
@@ -827,15 +817,14 @@ define double @fminimum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
827817
; X64-NEXT: je .LBB33_1
828818
; X64-NEXT: # %bb.2:
829819
; X64-NEXT: vmovdqa %xmm1, %xmm2
830-
; X64-NEXT: vmovdqa %xmm0, %xmm3
831820
; X64-NEXT: jmp .LBB33_3
832821
; X64-NEXT: .LBB33_1:
833822
; X64-NEXT: vmovdqa %xmm0, %xmm2
834-
; X64-NEXT: vmovdqa %xmm1, %xmm3
823+
; X64-NEXT: vmovdqa %xmm1, %xmm0
835824
; X64-NEXT: .LBB33_3:
836-
; X64-NEXT: vminsd %xmm2, %xmm3, %xmm2
837-
; X64-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0
838-
; X64-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
825+
; X64-NEXT: vminsd %xmm2, %xmm0, %xmm1
826+
; X64-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2
827+
; X64-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
839828
; X64-NEXT: vzeroupper
840829
; X64-NEXT: retq
841830
;
@@ -848,19 +837,18 @@ define double @fminimum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
848837
; X86-NEXT: je .LBB33_1
849838
; X86-NEXT: # %bb.2:
850839
; X86-NEXT: vmovdqa %xmm1, %xmm2
851-
; X86-NEXT: vmovdqa %xmm0, %xmm3
852840
; X86-NEXT: jmp .LBB33_3
853841
; X86-NEXT: .LBB33_1:
854842
; X86-NEXT: vmovdqa %xmm0, %xmm2
855-
; X86-NEXT: vmovdqa %xmm1, %xmm3
843+
; X86-NEXT: vmovdqa %xmm1, %xmm0
856844
; X86-NEXT: .LBB33_3:
857845
; X86-NEXT: pushl %ebp
858846
; X86-NEXT: movl %esp, %ebp
859847
; X86-NEXT: andl $-8, %esp
860848
; X86-NEXT: subl $8, %esp
861-
; X86-NEXT: vminsd %xmm2, %xmm3, %xmm2
862-
; X86-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0
863-
; X86-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2, %xmm0
849+
; X86-NEXT: vminsd %xmm2, %xmm0, %xmm1
850+
; X86-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2
851+
; X86-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
864852
; X86-NEXT: vmovlpd %xmm0, (%esp)
865853
; X86-NEXT: fldl (%esp)
866854
; X86-NEXT: movl %ebp, %esp

0 commit comments

Comments
 (0)