Skip to content

Commit b5600c6

Browse files
authored
[TargetLowering][SelectionDAG] Exploit nneg Flag in UINT_TO_FP (llvm#108931)
1. Propagate the nneg flag in WidenVecRes 2. Use SINT_TO_FP in expandUINT_TO_FP when possible.
1 parent c79e5ac commit b5600c6

File tree

4 files changed

+104
-22
lines changed

4 files changed

+104
-22
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -5208,7 +5208,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
52085208
if (N->getOpcode() == ISD::ZERO_EXTEND &&
52095209
getTypeAction(InVT) == TargetLowering::TypePromoteInteger &&
52105210
TLI.getTypeToTransformTo(Ctx, InVT).getScalarSizeInBits() !=
5211-
WidenVT.getScalarSizeInBits()) {
5211+
WidenVT.getScalarSizeInBits()) {
52125212
InOp = ZExtPromotedInteger(InOp);
52135213
InVT = InOp.getValueType();
52145214
if (WidenVT.getScalarSizeInBits() < InVT.getScalarSizeInBits())
@@ -5225,7 +5225,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
52255225
InVTEC = InVT.getVectorElementCount();
52265226
if (InVTEC == WidenEC) {
52275227
if (N->getNumOperands() == 1)
5228-
return DAG.getNode(Opcode, DL, WidenVT, InOp);
5228+
return DAG.getNode(Opcode, DL, WidenVT, InOp, Flags);
52295229
if (N->getNumOperands() == 3) {
52305230
assert(N->isVPOpcode() && "Expected VP opcode");
52315231
SDValue Mask =
@@ -5261,7 +5261,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
52615261
Ops[0] = InOp;
52625262
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
52635263
if (N->getNumOperands() == 1)
5264-
return DAG.getNode(Opcode, DL, WidenVT, InVec);
5264+
return DAG.getNode(Opcode, DL, WidenVT, InVec, Flags);
52655265
return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
52665266
}
52675267

@@ -5270,7 +5270,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
52705270
DAG.getVectorIdxConstant(0, DL));
52715271
// Extract the input and convert the shorten input vector.
52725272
if (N->getNumOperands() == 1)
5273-
return DAG.getNode(Opcode, DL, WidenVT, InVal);
5273+
return DAG.getNode(Opcode, DL, WidenVT, InVal, Flags);
52745274
return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
52755275
}
52765276
}
@@ -5285,7 +5285,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
52855285
SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
52865286
DAG.getVectorIdxConstant(i, DL));
52875287
if (N->getNumOperands() == 1)
5288-
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
5288+
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, Flags);
52895289
else
52905290
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
52915291
}

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

+18-9
Original file line numberDiff line numberDiff line change
@@ -8364,22 +8364,31 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
83648364
}
83658365

83668366
bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8367-
SDValue &Chain,
8368-
SelectionDAG &DAG) const {
8367+
SDValue &Chain, SelectionDAG &DAG) const {
83698368
// This transform is not correct for converting 0 when rounding mode is set
8370-
// to round toward negative infinity which will produce -0.0. So disable under
8371-
// strictfp.
8369+
// to round toward negative infinity which will produce -0.0. So disable
8370+
// under strictfp.
83728371
if (Node->isStrictFPOpcode())
83738372
return false;
83748373

83758374
SDValue Src = Node->getOperand(0);
83768375
EVT SrcVT = Src.getValueType();
83778376
EVT DstVT = Node->getValueType(0);
83788377

8378+
// If the input is known to be non-negative and SINT_TO_FP is legal then use
8379+
// it.
8380+
if (Node->getFlags().hasNonNeg() &&
8381+
isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT)) {
8382+
Result =
8383+
DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8384+
return true;
8385+
}
8386+
83798387
if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
83808388
return false;
83818389

8382-
// Only expand vector types if we have the appropriate vector bit operations.
8390+
// Only expand vector types if we have the appropriate vector bit
8391+
// operations.
83838392
if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
83848393
!isOperationLegalOrCustom(ISD::FADD, DstVT) ||
83858394
!isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
@@ -8393,8 +8402,9 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
83938402
// Implementation of unsigned i64 to f64 following the algorithm in
83948403
// __floatundidf in compiler_rt. This implementation performs rounding
83958404
// correctly in all rounding modes with the exception of converting 0
8396-
// when rounding toward negative infinity. In that case the fsub will produce
8397-
// -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8405+
// when rounding toward negative infinity. In that case the fsub will
8406+
// produce -0.0. This will be added to +0.0 and produce -0.0 which is
8407+
// incorrect.
83988408
SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
83998409
SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
84008410
llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
@@ -8408,8 +8418,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
84088418
SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
84098419
SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
84108420
SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8411-
SDValue HiSub =
8412-
DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8421+
SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
84138422
Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
84148423
return true;
84158424
}

llvm/test/CodeGen/VE/Scalar/cast.ll

+38
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,44 @@ define float @ull2f(i64 %x) {
568568
ret float %r
569569
}
570570

571+
define float @ull2f_nneg(i64 %x) {
572+
; CHECK-LABEL: ull2f_nneg:
573+
; CHECK: # %bb.0:
574+
; CHECK-NEXT: cvt.d.l %s0, %s0
575+
; CHECK-NEXT: cvt.s.d %s0, %s0
576+
; CHECK-NEXT: b.l.t (, %s10)
577+
%r = uitofp nneg i64 %x to float
578+
ret float %r
579+
}
580+
581+
define float @ull2f_strict(i32 %x) {
582+
; CHECK-LABEL: ull2f_strict:
583+
; CHECK: # %bb.0:
584+
; CHECK-NEXT: adds.l %s11, -16, %s11
585+
; CHECK-NEXT: brge.l.t %s11, %s8, .LBB58_2
586+
; CHECK-NEXT: # %bb.1:
587+
; CHECK-NEXT: ld %s61, 24(, %s14)
588+
; CHECK-NEXT: or %s62, 0, %s0
589+
; CHECK-NEXT: lea %s63, 315
590+
; CHECK-NEXT: shm.l %s63, (%s61)
591+
; CHECK-NEXT: shm.l %s8, 8(%s61)
592+
; CHECK-NEXT: shm.l %s11, 16(%s61)
593+
; CHECK-NEXT: monc
594+
; CHECK-NEXT: or %s0, 0, %s62
595+
; CHECK-NEXT: .LBB58_2:
596+
; CHECK-NEXT: lea %s1, 1127219200
597+
; CHECK-NEXT: stl %s1, 12(, %s11)
598+
; CHECK-NEXT: stl %s0, 8(, %s11)
599+
; CHECK-NEXT: ld %s0, 8(, %s11)
600+
; CHECK-NEXT: lea.sl %s1, 1127219200
601+
; CHECK-NEXT: fsub.d %s0, %s0, %s1
602+
; CHECK-NEXT: cvt.s.d %s0, %s0
603+
; CHECK-NEXT: adds.l %s11, 16, %s11
604+
; CHECK-NEXT: b.l.t (, %s10)
605+
%val = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
606+
ret float %val
607+
}
608+
571609
define double @ull2d(i64 %x) {
572610
; CHECK-LABEL: ull2d:
573611
; CHECK: # %bb.0:

llvm/test/CodeGen/X86/avx512-cvt.ll

+43-8
Original file line numberDiff line numberDiff line change
@@ -312,11 +312,46 @@ define <4 x float> @ulto4f32(<4 x i64> %a) {
312312
ret <4 x float> %b
313313
}
314314

315+
define <4 x float> @ulto4f32_nneg(<4 x i64> %a) {
316+
; NODQ-LABEL: ulto4f32_nneg:
317+
; NODQ: # %bb.0:
318+
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
319+
; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
320+
; NODQ-NEXT: vmovq %xmm0, %rax
321+
; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
322+
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
323+
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
324+
; NODQ-NEXT: vmovq %xmm0, %rax
325+
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
326+
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
327+
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
328+
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
329+
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
330+
; NODQ-NEXT: vzeroupper
331+
; NODQ-NEXT: retq
332+
;
333+
; VLDQ-LABEL: ulto4f32_nneg:
334+
; VLDQ: # %bb.0:
335+
; VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
336+
; VLDQ-NEXT: vzeroupper
337+
; VLDQ-NEXT: retq
338+
;
339+
; DQNOVL-LABEL: ulto4f32_nneg:
340+
; DQNOVL: # %bb.0:
341+
; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
342+
; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0
343+
; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
344+
; DQNOVL-NEXT: vzeroupper
345+
; DQNOVL-NEXT: retq
346+
%b = uitofp nneg <4 x i64> %a to <4 x float>
347+
ret <4 x float> %b
348+
}
349+
315350
define <8 x double> @ulto8f64(<8 x i64> %a) {
316351
; NODQ-LABEL: ulto8f64:
317352
; NODQ: # %bb.0:
318353
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
319-
; NODQ-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
354+
; NODQ-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
320355
; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
321356
; NODQ-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
322357
; NODQ-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
@@ -342,14 +377,14 @@ define <16 x double> @ulto16f64(<16 x i64> %a) {
342377
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295]
343378
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm3 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
344379
; NODQ-NEXT: vmovdqa64 %zmm3, %zmm4
345-
; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm0, %zmm4
380+
; NODQ-NEXT: vpternlogq {{.*#+}} zmm4 = zmm4 | (zmm0 & zmm2)
346381
; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
347382
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm5 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
348383
; NODQ-NEXT: vporq %zmm5, %zmm0, %zmm0
349384
; NODQ-NEXT: vbroadcastsd {{.*#+}} zmm6 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
350385
; NODQ-NEXT: vsubpd %zmm6, %zmm0, %zmm0
351386
; NODQ-NEXT: vaddpd %zmm0, %zmm4, %zmm0
352-
; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm1, %zmm3
387+
; NODQ-NEXT: vpternlogq {{.*#+}} zmm3 = zmm3 | (zmm1 & zmm2)
353388
; NODQ-NEXT: vpsrlq $32, %zmm1, %zmm1
354389
; NODQ-NEXT: vporq %zmm5, %zmm1, %zmm1
355390
; NODQ-NEXT: vsubpd %zmm6, %zmm1, %zmm1
@@ -1483,7 +1518,7 @@ define <16 x float> @sbto16f32(<16 x i32> %a) {
14831518
; NODQ: # %bb.0:
14841519
; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
14851520
; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1486-
; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1521+
; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
14871522
; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0
14881523
; NODQ-NEXT: retq
14891524
;
@@ -1564,7 +1599,7 @@ define <16 x double> @sbto16f64(<16 x double> %a) {
15641599
; NODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0
15651600
; NODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
15661601
; NODQ-NEXT: kunpckbw %k0, %k1, %k1
1567-
; NODQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
1602+
; NODQ-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1
15681603
; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
15691604
; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
15701605
; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
@@ -1603,7 +1638,7 @@ define <8 x double> @sbto8f64(<8 x double> %a) {
16031638
; NOVLDQ: # %bb.0:
16041639
; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
16051640
; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
1606-
; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1641+
; NOVLDQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
16071642
; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
16081643
; NOVLDQ-NEXT: retq
16091644
;
@@ -1864,7 +1899,7 @@ define <16 x float> @ubto16f32(<16 x i32> %a) {
18641899
; NODQ: # %bb.0:
18651900
; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
18661901
; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1867-
; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1902+
; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
18681903
; NODQ-NEXT: vpsrld $31, %zmm0, %zmm0
18691904
; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0
18701905
; NODQ-NEXT: retq
@@ -1894,7 +1929,7 @@ define <16 x double> @ubto16f64(<16 x i32> %a) {
18941929
; NODQ: # %bb.0:
18951930
; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
18961931
; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1897-
; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1932+
; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
18981933
; NODQ-NEXT: vpsrld $31, %zmm0, %zmm1
18991934
; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
19001935
; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1

0 commit comments

Comments
 (0)