@@ -12935,42 +12935,74 @@ static SDValue NormalizeBuildVector(SDValue Op,
12935
12935
return DAG.getBuildVector(VT, dl, Ops);
12936
12936
}
12937
12937
12938
- static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG) {
12938
+ static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG,
12939
+ const AArch64Subtarget *ST) {
12939
12940
EVT VT = Op.getValueType();
12941
+ assert((VT.getSizeInBits() == 64 || VT.getSizeInBits() == 128) &&
12942
+ "Expected a legal NEON vector");
12940
12943
12941
12944
APInt DefBits(VT.getSizeInBits(), 0);
12942
12945
APInt UndefBits(VT.getSizeInBits(), 0);
12943
12946
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
12944
12947
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
12945
- SDValue NewOp;
12946
- if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
12947
- (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
12948
- (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
12949
- (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
12950
- (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
12951
- (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
12952
- return NewOp;
12953
-
12954
- DefBits = ~DefBits;
12955
- if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
12956
- (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
12957
- (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
12958
- return NewOp;
12959
-
12960
- DefBits = UndefBits;
12961
- if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
12962
- (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
12963
- (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
12964
- (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
12965
- (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
12966
- (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
12967
- return NewOp;
12948
+ auto TryMOVIWithBits = [&](APInt DefBits) {
12949
+ SDValue NewOp;
12950
+ if ((NewOp =
12951
+ tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
12952
+ (NewOp =
12953
+ tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
12954
+ (NewOp =
12955
+ tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
12956
+ (NewOp =
12957
+ tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
12958
+ (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
12959
+ (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
12960
+ return NewOp;
12961
+
12962
+ APInt NotDefBits = ~DefBits;
12963
+ if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG,
12964
+ NotDefBits)) ||
12965
+ (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG,
12966
+ NotDefBits)) ||
12967
+ (NewOp =
12968
+ tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, NotDefBits)))
12969
+ return NewOp;
12970
+ return SDValue();
12971
+ };
12972
+ if (SDValue R = TryMOVIWithBits(DefBits))
12973
+ return R;
12974
+ if (SDValue R = TryMOVIWithBits(UndefBits))
12975
+ return R;
12968
12976
12969
- DefBits = ~UndefBits;
12970
- if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
12971
- (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
12972
- (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
12973
- return NewOp;
12977
+ // See if a fneg of the constant can be materialized with a MOVI, etc
12978
+ auto TryWithFNeg = [&](APInt DefBits, MVT FVT) {
12979
+ // FNegate each sub-element of the constant
12980
+ assert(VT.getSizeInBits() % FVT.getScalarSizeInBits() == 0);
12981
+ APInt Neg = APInt::getHighBitsSet(FVT.getSizeInBits(), 1)
12982
+ .zext(VT.getSizeInBits());
12983
+ APInt NegBits(VT.getSizeInBits(), 0);
12984
+ unsigned NumElts = VT.getSizeInBits() / FVT.getScalarSizeInBits();
12985
+ for (unsigned i = 0; i < NumElts; i++)
12986
+ NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
12987
+ NegBits = DefBits ^ NegBits;
12988
+
12989
+ // Try to create the new constants with MOVI, and if so generate a fneg
12990
+ // for it.
12991
+ if (SDValue NewOp = TryMOVIWithBits(NegBits)) {
12992
+ SDLoc DL(Op);
12993
+ MVT VFVT = NumElts == 1 ? FVT : MVT::getVectorVT(FVT, NumElts);
12994
+ return DAG.getNode(
12995
+ AArch64ISD::NVCAST, DL, VT,
12996
+ DAG.getNode(ISD::FNEG, DL, VFVT,
12997
+ DAG.getNode(AArch64ISD::NVCAST, DL, VFVT, NewOp)));
12998
+ }
12999
+ return SDValue();
13000
+ };
13001
+ SDValue R;
13002
+ if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
13003
+ (R = TryWithFNeg(DefBits, MVT::f64)) ||
13004
+ (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
13005
+ return R;
12974
13006
}
12975
13007
12976
13008
return SDValue();
@@ -13019,7 +13051,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
13019
13051
return Op;
13020
13052
}
13021
13053
13022
- if (SDValue V = ConstantBuildVector(Op, DAG))
13054
+ if (SDValue V = ConstantBuildVector(Op, DAG, Subtarget ))
13023
13055
return V;
13024
13056
13025
13057
// Scan through the operands to find some interesting properties we can
@@ -13244,7 +13276,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
13244
13276
ConstantValueAPInt = C->getAPIntValue().zextOrTrunc(BitSize);
13245
13277
if (!isNullConstant(ConstantValue) && !isNullFPConstant(ConstantValue) &&
13246
13278
!ConstantValueAPInt.isAllOnes()) {
13247
- Val = ConstantBuildVector(Val, DAG);
13279
+ Val = ConstantBuildVector(Val, DAG, Subtarget );
13248
13280
if (!Val)
13249
13281
// Otherwise, materialize the constant and splat it.
13250
13282
Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
@@ -23145,9 +23177,12 @@ static SDValue performDUPCombine(SDNode *N,
23145
23177
}
23146
23178
23147
23179
/// Get rid of unnecessary NVCASTs (that don't change the type).
23148
- static SDValue performNVCASTCombine(SDNode *N) {
23180
+ static SDValue performNVCASTCombine(SDNode *N, SelectionDAG &DAG ) {
23149
23181
if (N->getValueType(0) == N->getOperand(0).getValueType())
23150
23182
return N->getOperand(0);
23183
+ if (N->getOperand(0).getOpcode() == AArch64ISD::NVCAST)
23184
+ return DAG.getNode(AArch64ISD::NVCAST, SDLoc(N), N->getValueType(0),
23185
+ N->getOperand(0).getOperand(0));
23151
23186
23152
23187
return SDValue();
23153
23188
}
@@ -24141,7 +24176,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
24141
24176
case AArch64ISD::DUPLANE128:
24142
24177
return performDupLane128Combine(N, DAG);
24143
24178
case AArch64ISD::NVCAST:
24144
- return performNVCASTCombine(N);
24179
+ return performNVCASTCombine(N, DAG );
24145
24180
case AArch64ISD::SPLICE:
24146
24181
return performSpliceCombine(N, DAG);
24147
24182
case AArch64ISD::UUNPKLO:
0 commit comments