Skip to content

[AArch64][GlobalISel] FNeg constant materialization #80643

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 52 additions & 16 deletions llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5758,24 +5758,60 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,

if (CV->getSplatValue()) {
APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
MachineInstr *NewOp;
bool Inv = false;
if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
(NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
(NewOp =
tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
(NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
(NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
(NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
return NewOp;
auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
MachineInstr *NewOp;
bool Inv = false;
if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
(NewOp =
tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
(NewOp =
tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
(NewOp =
tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
(NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
(NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
return NewOp;

DefBits = ~DefBits;
Inv = true;
if ((NewOp =
tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
(NewOp =
tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
(NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
return NewOp;
return nullptr;
};

DefBits = ~DefBits;
Inv = true;
if ((NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
(NewOp =
tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
(NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
if (auto *NewOp = TryMOVIWithBits(DefBits))
return NewOp;

// See if a fneg of the constant can be materialized with a MOVI, etc
auto TryWithFNeg = [&](APInt DefBits, int NumBits,
unsigned NegOpc) -> MachineInstr * {
// FNegate each sub-element of the constant
APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
APInt NegBits(DstSize, 0);
unsigned NumElts = DstSize / NumBits;
for (unsigned i = 0; i < NumElts; i++)
NegBits |= Neg << (NumBits * i);
NegBits = DefBits ^ NegBits;

// Try to create the new constants with MOVI, and if so generate a fneg
// for it.
if (auto *NewOp = TryMOVIWithBits(NegBits)) {
Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
NewOp->getOperand(0).setReg(NewDst);
return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
}
return nullptr;
};
MachineInstr *R;
if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
(R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
(STI.hasFullFP16() &&
(R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
return R;
}

auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AArch64/neon-mov.ll
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,14 @@ define <4 x i32> @movi4s_fneg() {
;
; CHECK-NOFP16-GI-LABEL: movi4s_fneg:
; CHECK-NOFP16-GI: // %bb.0:
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI13_0
; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
; CHECK-NOFP16-GI-NEXT: movi v0.4s, #240, lsl #8
; CHECK-NOFP16-GI-NEXT: fneg v0.4s, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: movi4s_fneg:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI13_0
; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
; CHECK-FP16-GI-NEXT: movi v0.4s, #240, lsl #8
; CHECK-FP16-GI-NEXT: fneg v0.4s, v0.4s
; CHECK-FP16-GI-NEXT: ret
ret <4 x i32> <i32 2147545088, i32 2147545088, i32 2147545088, i32 2147545088>
}
Expand Down Expand Up @@ -322,8 +322,8 @@ define <8 x i16> @mvni8h_neg() {
;
; CHECK-FP16-GI-LABEL: mvni8h_neg:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI32_0
; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0]
; CHECK-FP16-GI-NEXT: movi v0.8h, #240
; CHECK-FP16-GI-NEXT: fneg v0.8h, v0.8h
; CHECK-FP16-GI-NEXT: ret
ret <8 x i16> <i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008>
}
Expand Down Expand Up @@ -508,14 +508,14 @@ define <2 x double> @fmov2d_neg0() {
;
; CHECK-NOFP16-GI-LABEL: fmov2d_neg0:
; CHECK-NOFP16-GI: // %bb.0:
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI51_0
; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI51_0]
; CHECK-NOFP16-GI-NEXT: movi v0.2d, #0000000000000000
; CHECK-NOFP16-GI-NEXT: fneg v0.2d, v0.2d
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: fmov2d_neg0:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI51_0
; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI51_0]
; CHECK-FP16-GI-NEXT: movi v0.2d, #0000000000000000
; CHECK-FP16-GI-NEXT: fneg v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: ret
ret <2 x double> <double -0.0, double -0.0>
}
Expand Down