Skip to content

Commit bbea1de

Browse files
authored
[X86][FP16] Fix masking problem of VF[,C]MADDCSH intrinsics (#118071)
Fixes: #98306
1 parent 9300274 commit bbea1de

File tree

3 files changed

+18
-3
lines changed

3 files changed

+18
-3
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26265,6 +26265,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2626526265
}
2626626266
if (!NewOp)
2626726267
NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3);
26268+
if (IntrData->Opc0 == X86ISD::VFMADDCSH ||
26269+
IntrData->Opc0 == X86ISD::VFCMADDCSH)
26270+
return getScalarMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG);
2626826271
return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG);
2626926272
}
2627026273
case IFMA_OP:

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13533,17 +13533,17 @@ let Uses = [MXCSR] in {
1353313533
multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
1353413534
bit IsCommutable> {
1353513535
let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13536-
defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13536+
defm r : AVX512_maskable_3src_scalar<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
1353713537
(ins VR128X:$src2, VR128X:$src3), OpcodeStr,
1353813538
"$src3, $src2", "$src2, $src3",
1353913539
(v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
1354013540
Sched<[WriteFMAX]>;
13541-
defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13541+
defm m : AVX512_maskable_3src_scalar<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
1354213542
(ins VR128X:$src2, ssmem:$src3), OpcodeStr,
1354313543
"$src3, $src2", "$src2, $src3",
1354413544
(v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
1354513545
Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13546-
defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13546+
defm rb : AVX512_maskable_3src_scalar<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
1354713547
(ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
1354813548
"$rc, $src3, $src2", "$src2, $src3, $rc",
1354913549
(v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,

llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,3 +277,15 @@ define <4 x float> @test_int_x86_avx512fp16_maskz_cfcmadd_sh(<4 x float> %x0, <4
277277
%res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfcmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 9)
278278
ret <4 x float> %res
279279
}
280+
281+
define <4 x float> @PR98306() {
282+
; CHECK-LABEL: PR98306:
283+
; CHECK: ## %bb.0:
284+
; CHECK-NEXT: kxorw %k0, %k0, %k1
285+
; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [7.8125E-3,1.050912E+6,4.203776E+6,1.6815616E+7]
286+
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [3.2E+1,4.03288064E+8,8.0658432E+8,1.61318502E+9]
287+
; CHECK-NEXT: vfmaddcsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 {%k1} {z}
288+
; CHECK-NEXT: retq
289+
%res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> <float 7.812500e-03, float 0x4130092000000000, float 0x4150094000000000, float 0x4170096000000000>, <4 x float> <float 2.000000e+00, float 0x4188098000000000, float 0x4198099000000000, float 0x41A809A000000000>, <4 x float> <float 3.200000e+01, float 0x41B809B000000000, float 0x41C809C000000000, float 0x41D809D000000000>, i8 0, i32 4)
290+
ret <4 x float> %res
291+
}

0 commit comments

Comments
 (0)