Skip to content

Commit 3564000

Browse files
authored
[AArch64][GlobalISel] FNeg constant materialization (llvm#80643)
This is a Global ISel equivalent of llvm#80641, creating fneg(movi) instead of the alternative constant pool load or gpr dup.
1 parent 2d616ec commit 3564000

File tree

2 files changed

+62
-26
lines changed

2 files changed

+62
-26
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5758,24 +5758,60 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
57585758

57595759
if (CV->getSplatValue()) {
57605760
APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
5761-
MachineInstr *NewOp;
5762-
bool Inv = false;
5763-
if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5764-
(NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5765-
(NewOp =
5766-
tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5767-
(NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5768-
(NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5769-
(NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5770-
return NewOp;
5761+
auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5762+
MachineInstr *NewOp;
5763+
bool Inv = false;
5764+
if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5765+
(NewOp =
5766+
tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5767+
(NewOp =
5768+
tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5769+
(NewOp =
5770+
tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5771+
(NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5772+
(NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5773+
return NewOp;
5774+
5775+
DefBits = ~DefBits;
5776+
Inv = true;
5777+
if ((NewOp =
5778+
tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5779+
(NewOp =
5780+
tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5781+
(NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5782+
return NewOp;
5783+
return nullptr;
5784+
};
57715785

5772-
DefBits = ~DefBits;
5773-
Inv = true;
5774-
if ((NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5775-
(NewOp =
5776-
tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5777-
(NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5786+
if (auto *NewOp = TryMOVIWithBits(DefBits))
57785787
return NewOp;
5788+
5789+
// See if a fneg of the constant can be materialized with a MOVI, etc
5790+
auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5791+
unsigned NegOpc) -> MachineInstr * {
5792+
// FNegate each sub-element of the constant
5793+
APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5794+
APInt NegBits(DstSize, 0);
5795+
unsigned NumElts = DstSize / NumBits;
5796+
for (unsigned i = 0; i < NumElts; i++)
5797+
NegBits |= Neg << (NumBits * i);
5798+
NegBits = DefBits ^ NegBits;
5799+
5800+
// Try to create the new constants with MOVI, and if so generate a fneg
5801+
// for it.
5802+
if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5803+
Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5804+
NewOp->getOperand(0).setReg(NewDst);
5805+
return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5806+
}
5807+
return nullptr;
5808+
};
5809+
MachineInstr *R;
5810+
if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5811+
(R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5812+
(STI.hasFullFP16() &&
5813+
(R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5814+
return R;
57795815
}
57805816

57815817
auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);

llvm/test/CodeGen/AArch64/neon-mov.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -123,14 +123,14 @@ define <4 x i32> @movi4s_fneg() {
123123
;
124124
; CHECK-NOFP16-GI-LABEL: movi4s_fneg:
125125
; CHECK-NOFP16-GI: // %bb.0:
126-
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI13_0
127-
; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
126+
; CHECK-NOFP16-GI-NEXT: movi v0.4s, #240, lsl #8
127+
; CHECK-NOFP16-GI-NEXT: fneg v0.4s, v0.4s
128128
; CHECK-NOFP16-GI-NEXT: ret
129129
;
130130
; CHECK-FP16-GI-LABEL: movi4s_fneg:
131131
; CHECK-FP16-GI: // %bb.0:
132-
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI13_0
133-
; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
132+
; CHECK-FP16-GI-NEXT: movi v0.4s, #240, lsl #8
133+
; CHECK-FP16-GI-NEXT: fneg v0.4s, v0.4s
134134
; CHECK-FP16-GI-NEXT: ret
135135
ret <4 x i32> <i32 2147545088, i32 2147545088, i32 2147545088, i32 2147545088>
136136
}
@@ -322,8 +322,8 @@ define <8 x i16> @mvni8h_neg() {
322322
;
323323
; CHECK-FP16-GI-LABEL: mvni8h_neg:
324324
; CHECK-FP16-GI: // %bb.0:
325-
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI32_0
326-
; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0]
325+
; CHECK-FP16-GI-NEXT: movi v0.8h, #240
326+
; CHECK-FP16-GI-NEXT: fneg v0.8h, v0.8h
327327
; CHECK-FP16-GI-NEXT: ret
328328
ret <8 x i16> <i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008>
329329
}
@@ -508,14 +508,14 @@ define <2 x double> @fmov2d_neg0() {
508508
;
509509
; CHECK-NOFP16-GI-LABEL: fmov2d_neg0:
510510
; CHECK-NOFP16-GI: // %bb.0:
511-
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI51_0
512-
; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI51_0]
511+
; CHECK-NOFP16-GI-NEXT: movi v0.2d, #0000000000000000
512+
; CHECK-NOFP16-GI-NEXT: fneg v0.2d, v0.2d
513513
; CHECK-NOFP16-GI-NEXT: ret
514514
;
515515
; CHECK-FP16-GI-LABEL: fmov2d_neg0:
516516
; CHECK-FP16-GI: // %bb.0:
517-
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI51_0
518-
; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI51_0]
517+
; CHECK-FP16-GI-NEXT: movi v0.2d, #0000000000000000
518+
; CHECK-FP16-GI-NEXT: fneg v0.2d, v0.2d
519519
; CHECK-FP16-GI-NEXT: ret
520520
ret <2 x double> <double -0.0, double -0.0>
521521
}

0 commit comments

Comments
 (0)