Skip to content

Commit a2c93b3

Browse files
lukel97llvmbot
authored andcommitted
[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (#82506)
This fixes the miscompile reported in #82430 by telling isSimpleVIDSequence to sign extend to XLen instead of the width of the indices, since the "sequence" of indices generated by a strided load will be at XLen. This was the simplest way I could think of getting isSimpleVIDSequence to treat the indexes as if they were zero extended to XLenVT. Another way we could do this is by refactoring out the "get constant integers" part from isSimpleVIDSequence and handle them as APInts so we can separately zero extend it. Fixes #82430 (cherry picked from commit 815644b)
1 parent a9d4ed7 commit a2c93b3

File tree

2 files changed

+16
-16
lines changed

2 files changed

+16
-16
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3192,15 +3192,16 @@ static std::optional<uint64_t> getExactInteger(const APFloat &APF,
31923192
// Note that this method will also match potentially unappealing index
31933193
// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
31943194
// determine whether this is worth generating code for.
3195-
static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
3195+
static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3196+
unsigned EltSizeInBits) {
31963197
unsigned NumElts = Op.getNumOperands();
31973198
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
31983199
bool IsInteger = Op.getValueType().isInteger();
31993200

32003201
std::optional<unsigned> SeqStepDenom;
32013202
std::optional<int64_t> SeqStepNum, SeqAddend;
32023203
std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3203-
unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
3204+
assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
32043205
for (unsigned Idx = 0; Idx < NumElts; Idx++) {
32053206
// Assume undef elements match the sequence; we just have to be careful
32063207
// when interpolating across them.
@@ -3213,14 +3214,14 @@ static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
32133214
if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
32143215
return std::nullopt;
32153216
Val = Op.getConstantOperandVal(Idx) &
3216-
maskTrailingOnes<uint64_t>(EltSizeInBits);
3217+
maskTrailingOnes<uint64_t>(Op.getScalarValueSizeInBits());
32173218
} else {
32183219
// The BUILD_VECTOR must be all constants.
32193220
if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
32203221
return std::nullopt;
32213222
if (auto ExactInteger = getExactInteger(
32223223
cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
3223-
EltSizeInBits))
3224+
Op.getScalarValueSizeInBits()))
32243225
Val = *ExactInteger;
32253226
else
32263227
return std::nullopt;
@@ -3276,11 +3277,11 @@ static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
32763277
uint64_t Val;
32773278
if (IsInteger) {
32783279
Val = Op.getConstantOperandVal(Idx) &
3279-
maskTrailingOnes<uint64_t>(EltSizeInBits);
3280+
maskTrailingOnes<uint64_t>(Op.getScalarValueSizeInBits());
32803281
} else {
32813282
Val = *getExactInteger(
32823283
cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
3283-
EltSizeInBits);
3284+
Op.getScalarValueSizeInBits());
32843285
}
32853286
uint64_t ExpectedVal =
32863287
(int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
@@ -3550,7 +3551,7 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
35503551
// Try and match index sequences, which we can lower to the vid instruction
35513552
// with optional modifications. An all-undef vector is matched by
35523553
// getSplatValue, above.
3553-
if (auto SimpleVID = isSimpleVIDSequence(Op)) {
3554+
if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
35543555
int64_t StepNumerator = SimpleVID->StepNumerator;
35553556
unsigned StepDenominator = SimpleVID->StepDenominator;
35563557
int64_t Addend = SimpleVID->Addend;
@@ -15562,7 +15563,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1556215563

1556315564
if (Index.getOpcode() == ISD::BUILD_VECTOR &&
1556415565
MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
15565-
if (std::optional<VIDSequence> SimpleVID = isSimpleVIDSequence(Index);
15566+
// The sequence will be XLenVT, not the type of Index. Tell
15567+
// isSimpleVIDSequence this so we avoid overflow.
15568+
if (std::optional<VIDSequence> SimpleVID =
15569+
isSimpleVIDSequence(Index, Subtarget.getXLen());
1556615570
SimpleVID && SimpleVID->StepDenominator == 1) {
1556715571
const int64_t StepNumerator = SimpleVID->StepNumerator;
1556815572
const int64_t Addend = SimpleVID->Addend;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15086,23 +15086,19 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
1508615086
ret <32 x i64> %x
1508715087
}
1508815088

15089-
; FIXME: This is a miscompile triggered by the mgather ->
15090-
; riscv.masked.strided.load combine. In order for it to trigger we need either a
15091-
; strided gather that RISCVGatherScatterLowering doesn't pick up, or a new
15092-
; strided gather generated by the widening sew combine.
1509315089
define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) {
1509415090
; RV32V-LABEL: masked_gather_widen_sew_negative_stride:
1509515091
; RV32V: # %bb.0:
15096-
; RV32V-NEXT: addi a0, a0, -120
15097-
; RV32V-NEXT: li a1, 120
15092+
; RV32V-NEXT: addi a0, a0, 136
15093+
; RV32V-NEXT: li a1, -136
1509815094
; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1509915095
; RV32V-NEXT: vlse64.v v8, (a0), a1
1510015096
; RV32V-NEXT: ret
1510115097
;
1510215098
; RV64V-LABEL: masked_gather_widen_sew_negative_stride:
1510315099
; RV64V: # %bb.0:
15104-
; RV64V-NEXT: addi a0, a0, -120
15105-
; RV64V-NEXT: li a1, 120
15100+
; RV64V-NEXT: addi a0, a0, 136
15101+
; RV64V-NEXT: li a1, -136
1510615102
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1510715103
; RV64V-NEXT: vlse64.v v8, (a0), a1
1510815104
; RV64V-NEXT: ret

0 commit comments

Comments
 (0)