Skip to content

[AArch64] Materialize constants via fneg. #80641

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 68 additions & 33 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12935,42 +12935,74 @@ static SDValue NormalizeBuildVector(SDValue Op,
return DAG.getBuildVector(VT, dl, Ops);
}

static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG) {
static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG,
const AArch64Subtarget *ST) {
EVT VT = Op.getValueType();
assert((VT.getSizeInBits() == 64 || VT.getSizeInBits() == 128) &&
"Expected a legal NEON vector");

APInt DefBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
SDValue NewOp;
if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
return NewOp;

DefBits = ~DefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
return NewOp;

DefBits = UndefBits;
if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
return NewOp;
auto TryMOVIWithBits = [&](APInt DefBits) {
SDValue NewOp;
if ((NewOp =
tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
(NewOp =
tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp =
tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
(NewOp =
tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
return NewOp;

APInt NotDefBits = ~DefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG,
NotDefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG,
NotDefBits)) ||
(NewOp =
tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, NotDefBits)))
return NewOp;
return SDValue();
};
if (SDValue R = TryMOVIWithBits(DefBits))
return R;
if (SDValue R = TryMOVIWithBits(UndefBits))
return R;

DefBits = ~UndefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
return NewOp;
// See if a fneg of the constant can be materialized with a MOVI, etc
auto TryWithFNeg = [&](APInt DefBits, MVT FVT) {
// FNegate each sub-element of the constant
assert(VT.getSizeInBits() % FVT.getScalarSizeInBits() == 0);
APInt Neg = APInt::getHighBitsSet(FVT.getSizeInBits(), 1)
.zext(VT.getSizeInBits());
APInt NegBits(VT.getSizeInBits(), 0);
unsigned NumElts = VT.getSizeInBits() / FVT.getScalarSizeInBits();
for (unsigned i = 0; i < NumElts; i++)
NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
NegBits = DefBits ^ NegBits;

// Try to create the new constants with MOVI, and if so generate a fneg
// for it.
if (SDValue NewOp = TryMOVIWithBits(NegBits)) {
SDLoc DL(Op);
MVT VFVT = NumElts == 1 ? FVT : MVT::getVectorVT(FVT, NumElts);
return DAG.getNode(
AArch64ISD::NVCAST, DL, VT,
DAG.getNode(ISD::FNEG, DL, VFVT,
DAG.getNode(AArch64ISD::NVCAST, DL, VFVT, NewOp)));
}
return SDValue();
};
SDValue R;
if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
(R = TryWithFNeg(DefBits, MVT::f64)) ||
(ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
return R;
}

return SDValue();
Expand Down Expand Up @@ -13019,7 +13051,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
return Op;
}

if (SDValue V = ConstantBuildVector(Op, DAG))
if (SDValue V = ConstantBuildVector(Op, DAG, Subtarget))
return V;

// Scan through the operands to find some interesting properties we can
Expand Down Expand Up @@ -13244,7 +13276,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
ConstantValueAPInt = C->getAPIntValue().zextOrTrunc(BitSize);
if (!isNullConstant(ConstantValue) && !isNullFPConstant(ConstantValue) &&
!ConstantValueAPInt.isAllOnes()) {
Val = ConstantBuildVector(Val, DAG);
Val = ConstantBuildVector(Val, DAG, Subtarget);
if (!Val)
// Otherwise, materialize the constant and splat it.
Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
Expand Down Expand Up @@ -23175,9 +23207,12 @@ static SDValue performDUPCombine(SDNode *N,
}

/// Get rid of unnecessary NVCASTs (that don't change the type).
static SDValue performNVCASTCombine(SDNode *N) {
static SDValue performNVCASTCombine(SDNode *N, SelectionDAG &DAG) {
if (N->getValueType(0) == N->getOperand(0).getValueType())
return N->getOperand(0);
if (N->getOperand(0).getOpcode() == AArch64ISD::NVCAST)
return DAG.getNode(AArch64ISD::NVCAST, SDLoc(N), N->getValueType(0),
N->getOperand(0).getOperand(0));

return SDValue();
}
Expand Down Expand Up @@ -24171,7 +24206,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case AArch64ISD::DUPLANE128:
return performDupLane128Combine(N, DAG);
case AArch64ISD::NVCAST:
return performNVCASTCombine(N);
return performNVCASTCombine(N, DAG);
case AArch64ISD::SPLICE:
return performSpliceCombine(N, DAG);
case AArch64ISD::UUNPKLO:
Expand Down
16 changes: 10 additions & 6 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -7558,13 +7558,17 @@ defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;

def VImm0080: PatLeaf<(AArch64movi_shift (i32 128), (i32 0))>;
def VImm00008000: PatLeaf<(AArch64movi_shift (i32 128), (i32 8))>;
def VImm0000000080000000: PatLeaf<(AArch64NvCast (v2f64 (fneg (AArch64NvCast (v4i32 (AArch64movi_shift (i32 128), (i32 24)))))))>;

// RADDHN patterns for when RSHRN shifts by half the size of the vector element
def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))),
def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))),
(RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))),
def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), VImm00008000), (i32 16)))),
(RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
let AddedComplexity = 5 in
def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))),
def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))),
(RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
(RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
Expand All @@ -7576,20 +7580,20 @@ def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),
// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
def : Pat<(v16i8 (concat_vectors
(v8i8 V64:$Vd),
(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))))),
(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))))),
(RADDHNv8i16_v16i8
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
(v8i16 (MOVIv2d_ns (i32 0))))>;
def : Pat<(v8i16 (concat_vectors
(v4i16 V64:$Vd),
(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))))),
(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), VImm00008000), (i32 16)))))),
(RADDHNv4i32_v8i16
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
(v4i32 (MOVIv2d_ns (i32 0))))>;
let AddedComplexity = 5 in
def : Pat<(v4i32 (concat_vectors
(v2i32 V64:$Vd),
(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))))),
(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))))),
(RADDHNv2i64_v4i32
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
(v2i64 (MOVIv2d_ns (i32 0))))>;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/arm64-build-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ define <2 x double> @poszero_v2f64(<2 x double> %a) {
define <2 x double> @negzero_v2f64(<2 x double> %a) {
; CHECK-LABEL: negzero_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: fneg v1.2d, v1.2d
; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%b = fmul <2 x double> %a, <double -0.0, double -0.0>
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/fabs-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ define <4 x float> @nabsv4f32(<4 x float> %a) {
define <2 x double> @nabsv2d64(<2 x double> %a) {
; CHECK-LABEL: nabsv2d64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-9223372036854775808
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: fneg v1.2d, v1.2d
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%conv = bitcast <2 x double> %a to <2 x i64>
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/fcvt_combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ define <2 x i32> @test9(<2 x float> %f) {
define <2 x i32> @test10(<2 x float> %f) {
; CHECK-LABEL: test10:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #2143289344 // =0x7fc00000
; CHECK-NEXT: dup v0.2s, w8
; CHECK-NEXT: mvni v0.2s, #63, msl #16
; CHECK-NEXT: fneg v0.2s, v0.2s
; CHECK-NEXT: fcvtzu v0.2s, v0.2s
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float undef, float undef>
Expand Down Expand Up @@ -426,8 +426,8 @@ define <2 x i32> @test9_sat(<2 x float> %f) {
define <2 x i32> @test10_sat(<2 x float> %f) {
; CHECK-LABEL: test10_sat:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #2143289344 // =0x7fc00000
; CHECK-NEXT: dup v0.2s, w8
; CHECK-NEXT: mvni v0.2s, #63, msl #16
; CHECK-NEXT: fneg v0.2s, v0.2s
; CHECK-NEXT: fcvtzu v0.2s, v0.2s
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float undef, float undef>
Expand Down
50 changes: 33 additions & 17 deletions llvm/test/CodeGen/AArch64/neon-mov.ll
Original file line number Diff line number Diff line change
Expand Up @@ -111,16 +111,14 @@ define <4 x i32> @movi4s_lsl16() {
define <4 x i32> @movi4s_fneg() {
; CHECK-NOFP16-SD-LABEL: movi4s_fneg:
; CHECK-NOFP16-SD: // %bb.0:
; CHECK-NOFP16-SD-NEXT: mov w8, #61440 // =0xf000
; CHECK-NOFP16-SD-NEXT: movk w8, #32768, lsl #16
; CHECK-NOFP16-SD-NEXT: dup v0.4s, w8
; CHECK-NOFP16-SD-NEXT: movi v0.4s, #240, lsl #8
; CHECK-NOFP16-SD-NEXT: fneg v0.4s, v0.4s
; CHECK-NOFP16-SD-NEXT: ret
;
; CHECK-FP16-SD-LABEL: movi4s_fneg:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: mov w8, #61440 // =0xf000
; CHECK-FP16-SD-NEXT: movk w8, #32768, lsl #16
; CHECK-FP16-SD-NEXT: dup v0.4s, w8
; CHECK-FP16-SD-NEXT: movi v0.4s, #240, lsl #8
; CHECK-FP16-SD-NEXT: fneg v0.4s, v0.4s
; CHECK-FP16-SD-NEXT: ret
;
; CHECK-NOFP16-GI-LABEL: movi4s_fneg:
Expand Down Expand Up @@ -178,11 +176,29 @@ define <8 x i16> @movi8h_lsl8() {
}

define <8 x i16> @movi8h_fneg() {
; CHECK-LABEL: movi8h_fneg:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI19_0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI19_0]
; CHECK-NEXT: ret
; CHECK-NOFP16-SD-LABEL: movi8h_fneg:
; CHECK-NOFP16-SD: // %bb.0:
; CHECK-NOFP16-SD-NEXT: movi v0.8h, #127, lsl #8
; CHECK-NOFP16-SD-NEXT: fneg v0.4s, v0.4s
; CHECK-NOFP16-SD-NEXT: ret
;
; CHECK-FP16-SD-LABEL: movi8h_fneg:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: movi v0.8h, #127, lsl #8
; CHECK-FP16-SD-NEXT: fneg v0.4s, v0.4s
; CHECK-FP16-SD-NEXT: ret
;
; CHECK-NOFP16-GI-LABEL: movi8h_fneg:
; CHECK-NOFP16-GI: // %bb.0:
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI19_0
; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0]
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: movi8h_fneg:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI19_0
; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0]
; CHECK-FP16-GI-NEXT: ret
ret <8 x i16> <i16 32512, i16 65280, i16 32512, i16 65280, i16 32512, i16 65280, i16 32512, i16 65280>
}

Expand Down Expand Up @@ -294,8 +310,8 @@ define <8 x i16> @mvni8h_neg() {
;
; CHECK-FP16-SD-LABEL: mvni8h_neg:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: mov w8, #33008 // =0x80f0
; CHECK-FP16-SD-NEXT: dup v0.8h, w8
; CHECK-FP16-SD-NEXT: movi v0.8h, #240
; CHECK-FP16-SD-NEXT: fneg v0.8h, v0.8h
; CHECK-FP16-SD-NEXT: ret
;
; CHECK-NOFP16-GI-LABEL: mvni8h_neg:
Expand Down Expand Up @@ -480,14 +496,14 @@ define <2 x double> @fmov2d() {
define <2 x double> @fmov2d_neg0() {
; CHECK-NOFP16-SD-LABEL: fmov2d_neg0:
; CHECK-NOFP16-SD: // %bb.0:
; CHECK-NOFP16-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
; CHECK-NOFP16-SD-NEXT: dup v0.2d, x8
; CHECK-NOFP16-SD-NEXT: movi v0.2d, #0000000000000000
; CHECK-NOFP16-SD-NEXT: fneg v0.2d, v0.2d
; CHECK-NOFP16-SD-NEXT: ret
;
; CHECK-FP16-SD-LABEL: fmov2d_neg0:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
; CHECK-FP16-SD-NEXT: dup v0.2d, x8
; CHECK-FP16-SD-NEXT: movi v0.2d, #0000000000000000
; CHECK-FP16-SD-NEXT: fneg v0.2d, v0.2d
; CHECK-FP16-SD-NEXT: ret
;
; CHECK-NOFP16-GI-LABEL: fmov2d_neg0:
Expand Down
Loading