Skip to content

Commit 5db63d2

Browse files
authored
[AMDGPU] PromoteAlloca: Handle load/store subvectors using non-constant indexes (#71505)
I assumed indexes were always ConstantInts, but that's not always the case. They can be other things as well. We can easily handle that by just emitting an add and let InstSimplify do the constant folding for cases where it's really a ConstantInt. Solves SWDEV-429935
1 parent 7c63672 commit 5db63d2

File tree

3 files changed

+109
-49
lines changed

3 files changed

+109
-49
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,6 @@ static Value *promoteAllocaUserToVector(
386386
};
387387

388388
Type *VecEltTy = VectorTy->getElementType();
389-
const unsigned NumVecElts = VectorTy->getNumElements();
390389

391390
switch (Inst->getOpcode()) {
392391
case Instruction::Load: {
@@ -419,11 +418,12 @@ static Value *promoteAllocaUserToVector(
419418
auto *SubVecTy = FixedVectorType::get(VecEltTy, NumLoadedElts);
420419
assert(DL.getTypeStoreSize(SubVecTy) == DL.getTypeStoreSize(AccessTy));
421420

422-
unsigned IndexVal = cast<ConstantInt>(Index)->getZExtValue();
423421
Value *SubVec = PoisonValue::get(SubVecTy);
424422
for (unsigned K = 0; K < NumLoadedElts; ++K) {
423+
Value *CurIdx =
424+
Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
425425
SubVec = Builder.CreateInsertElement(
426-
SubVec, Builder.CreateExtractElement(CurVal, IndexVal + K), K);
426+
SubVec, Builder.CreateExtractElement(CurVal, CurIdx), K);
427427
}
428428

429429
if (AccessTy->isPtrOrPtrVectorTy())
@@ -479,12 +479,12 @@ static Value *promoteAllocaUserToVector(
479479

480480
Val = Builder.CreateBitOrPointerCast(Val, SubVecTy);
481481

482-
unsigned IndexVal = cast<ConstantInt>(Index)->getZExtValue();
483482
Value *CurVec = GetOrLoadCurrentVectorValue();
484-
for (unsigned K = 0; K < NumWrittenElts && ((IndexVal + K) < NumVecElts);
485-
++K) {
483+
for (unsigned K = 0; K < NumWrittenElts; ++K) {
484+
Value *CurIdx =
485+
Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
486486
CurVec = Builder.CreateInsertElement(
487-
CurVec, Builder.CreateExtractElement(Val, K), IndexVal + K);
487+
CurVec, Builder.CreateExtractElement(Val, K), CurIdx);
488488
}
489489
return CurVec;
490490
}

llvm/test/CodeGen/AMDGPU/promote-alloca-loadstores.ll

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,11 @@ define <4 x i64> @test_fullvec_out_of_bounds(<4 x i64> %arg) {
4343
; CHECK-SAME: (<4 x i64> [[ARG:%.*]]) {
4444
; CHECK-NEXT: entry:
4545
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i64> [[ARG]], i64 0
46-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i64 3
47-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> <i64 undef, i64 poison, i64 poison, i64 poison>, i64 [[TMP0]], i64 1
48-
; CHECK-NEXT: ret <4 x i64> [[TMP2]]
46+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i32 3
47+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[ARG]], i64 1
48+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[ARG]], i64 2
49+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[ARG]], i64 3
50+
; CHECK-NEXT: ret <4 x i64> poison
4951
;
5052
entry:
5153
%stack = alloca [4 x i64], align 4, addrspace(5)
@@ -159,9 +161,9 @@ define void @alloca_load_store_ptr_mixed_ptrvec(<2 x ptr addrspace(3)> %arg) {
159161
; CHECK-NEXT: entry:
160162
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(3)> [[ARG]] to <2 x i32>
161163
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
162-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i64 0
164+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 0
163165
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1
164-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP3]], i64 1
166+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP3]], i32 1
165167
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0
166168
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP3]], i64 1
167169
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x ptr addrspace(3)>

0 commit comments

Comments
 (0)