Experiment with soft-promotion in FP regs (not working).

JonPsson1 · JonPsson1 · commit 9c0bc36cb2cb · 2024-10-29T11:00:12.000+01:00
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -50,8 +50,7 @@ def RetCC_SystemZ_ELF : CallingConv<[
   // other floating-point argument registers available for code that
   // doesn't care about the ABI.  All floating-point argument registers
   // are call-clobbered, so we can use all of them here.
-  CCIfType<[f16], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
-  CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+  CCIfType<[f16, f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
   CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
 
   // Similarly for vectors, with V24 being the ABI-compliant choice.
@@ -116,8 +115,7 @@ def CC_SystemZ_ELF : CallingConv<[
   CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
 
   // The first 4 float and double arguments are passed in even registers F0-F6.
-  CCIfType<[f16], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
-  CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+  CCIfType<[f16, f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
   CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
 
   // The first 8 named vector arguments are passed in V24-V31.  Sub-128 vectors
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -799,7 +799,7 @@ MVT SystemZTargetLowering::getRegisterTypeForCallingConv(
   if (VT.isVector() && VT.getSizeInBits() == 128 &&
       VT.getVectorNumElements() == 1)
     return MVT::v16i8;
-  // Keep f16 so that they can be recognized and handled.
+  // Keep f16 so it can be recognized and handled.
   if (VT == MVT::f16)
     return MVT::f16;
   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
@@ -1625,10 +1625,13 @@ bool SystemZTargetLowering::splitValueIntoRegisterParts(
 
   // Convert f16 to f32 (Out-arg).
   if (PartVT == MVT::f16) {
-    assert(NumParts == 1 && "");
-    SDValue I16Val = DAG.getBitcast(MVT::i16, Val);
-    SDValue I32Val = DAG.getAnyExtOrTrunc(I16Val, DL, MVT::i32);
-    Parts[0] = DAG.getBitcast(MVT::f32, I32Val);
+    assert(NumParts == 1 && "f16 only needs one register.");
+    SDValue F16Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v8f16,
+                                 DAG.getUNDEF(MVT::v8f16), Val,
+                                 DAG.getVectorIdxConstant(0, DL));
+    SDValue F32Vec = DAG.getBitcast(MVT::v4f32, F16Vec);
+    Parts[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
+                           F32Vec, DAG.getVectorIdxConstant(0, DL));
     return true;
   }
 
@@ -1654,9 +1657,13 @@ static SDValue convertF32ToF16(SDValue F32Val, SelectionDAG &DAG,
                                const SDLoc &DL) {
   assert(F32Val->getOpcode() == ISD::CopyFromReg &&
          "Only expecting to handle f16 with CopyFromReg here.");
-  SDValue I32Val = DAG.getBitcast(MVT::i32, F32Val);
-  SDValue I16Val = DAG.getAnyExtOrTrunc(I32Val, DL, MVT::i16);
-  return DAG.getBitcast(MVT::f16, I16Val);
+
+  SDValue F32Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4f32,
+                                 DAG.getUNDEF(MVT::v4f32), F32Val,
+                                 DAG.getVectorIdxConstant(0, DL));
+  SDValue F16Vec = DAG.getBitcast(MVT::v8f16, F32Vec);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16,
+                     F16Vec, DAG.getVectorIdxConstant(0, DL));
 }
 
 SDValue SystemZTargetLowering::LowerFormalArguments(
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -471,11 +471,6 @@ class SystemZTargetLowering : public TargetLowering {
   }
   bool softPromoteHalfType() const override { return true; }
   bool useFPRegsForHalfType() const override { return true; }
-  bool shouldKeepZExtForFP16Conv() const override {
-    // Keep the zero extension from 16 bits if present (as with incoming
-    // arguments).
-    return true;
-  }
   bool hasInlineStackProbe(const MachineFunction &MF) const override;
   AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override;
   AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override;
diff --git a/llvm/test/CodeGen/SystemZ/fp-half.ll b/llvm/test/CodeGen/SystemZ/fp-half.ll
@@ -119,7 +119,6 @@ define half @fun3(half %Op0, ptr %Dst, ptr %Src) {
 ; CHECK-NEXT:    br %r14
 entry:
   store half %Op0, ptr %Dst
-
   %Res = load half, ptr %Src
   ret half %Res
 }

Original file line number	Diff line number	Diff line change
`@@ -119,7 +119,6 @@ define half @fun3(half %Op0, ptr %Dst, ptr %Src) {`
`119`	`119`	`; CHECK-NEXT: br %r14`
`120`	`120`	`entry:`
`121`	`121`	`store half %Op0, ptr %Dst`
`122`		`-`
`123`	`122`	`%Res = load half, ptr %Src`
`124`	`123`	`ret half %Res`
`125`	`124`	`}`