Skip to content

[VPlan] Don't rely on region check in isUniformAfterVectorization. #137883

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 34 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlanUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,46 @@ const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);

/// Returns true if \p VPV is uniform after vectorization.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent: documentation should be (more) meaningful. Name should be more accurate.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will check separately

inline bool isUniformAfterVectorization(const VPValue *VPV) {
// A value defined outside the vector region must be uniform after
// vectorization inside a vector region.
if (VPV->isDefinedOutsideLoopRegions())
auto PreservesUniformity = [](unsigned Opcode) -> bool {
if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
return true;
switch (Opcode) {
case Instruction::GetElementPtr:
case Instruction::ICmp:
case Instruction::FCmp:
case VPInstruction::Broadcast:
case VPInstruction::PtrAdd:
return true;
default:
return false;
}
};

// A live-in must be uniform across the scope of VPlan.
if (VPV->isLiveIn())
return true;
if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV))
return Rep->isUniform();

if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) {
const VPRegionBlock *RegionOfR = Rep->getParent()->getParent();
// Don't consider recipes in replicate regions as uniform yet; their first
// lane cannot be accessed when executing the replicate region for other
// lanes.
if (RegionOfR && RegionOfR->isReplicator())
return false;
return Rep->isUniform() ||
(PreservesUniformity(Rep->getOpcode()) &&
all_of(Rep->operands(), isUniformAfterVectorization));
}
if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe>(VPV))
return all_of(VPV->getDefiningRecipe()->operands(),
isUniformAfterVectorization);
if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) {
return PreservesUniformity(WidenR->getOpcode()) &&
all_of(WidenR->operands(), isUniformAfterVectorization);
}
if (auto *VPI = dyn_cast<VPInstruction>(VPV))
return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
((Instruction::isBinaryOp(VPI->getOpcode()) ||
VPI->getOpcode() == VPInstruction::PtrAdd) &&
(PreservesUniformity(VPI->getOpcode()) &&
all_of(VPI->operands(), isUniformAfterVectorization));

// VPExpandSCEVRecipes must be placed in the entry and are alway uniform.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -557,21 +557,21 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]]
; DEFAULT: [[PRED_STORE_IF32]]:
; DEFAULT-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 1
; DEFAULT-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
; DEFAULT-NEXT: store i32 [[TMP13]], ptr [[E]], align 4, !alias.scope [[META14]], !noalias [[META16]]
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE33]]
; DEFAULT: [[PRED_STORE_CONTINUE33]]:
; DEFAULT-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
; DEFAULT-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35:.*]]
; DEFAULT: [[PRED_STORE_IF34]]:
; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2
; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
; DEFAULT-NEXT: store i32 [[TMP15]], ptr [[E]], align 4, !alias.scope [[META14]], !noalias [[META16]]
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]]
; DEFAULT: [[PRED_STORE_CONTINUE35]]:
; DEFAULT-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
; DEFAULT-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF36:.*]], label %[[PRED_STORE_CONTINUE37]]
; DEFAULT: [[PRED_STORE_IF36]]:
; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3
; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
; DEFAULT-NEXT: store i32 [[TMP22]], ptr [[E]], align 4, !alias.scope [[META14]], !noalias [[META16]]
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE37]]
; DEFAULT: [[PRED_STORE_CONTINUE37]]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,12 @@ define void @ld_div2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 8
; CHECK-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,12 @@ define void @ld_and_neg2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[INDEX]], -2
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8
; CHECK-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
Expand Down Expand Up @@ -269,15 +269,11 @@ define void @ld_and_neg2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 42
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
; CHECK-NEXT: store i64 [[TMP8]], ptr [[TMP6]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP7]], align 8
; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP6]], align 8
; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,12 +290,12 @@ define void @ld_div8_urem3(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP2:%.*]] = urem i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP4]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 42
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP5]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0
; CHECK-NEXT: store <8 x i64> [[TMP5]], ptr [[TMP7]], align 8
; CHECK-NEXT: store <8 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 1
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; VF2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
; VF2-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; VF2-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
; VF2-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 8
; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF2-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
Expand Down Expand Up @@ -225,12 +225,12 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[TMP1:%.*]] = lshr i64 [[INDEX]], 2
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
; VF2-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
; VF2-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8
; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
Expand Down Expand Up @@ -264,12 +264,12 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[TMP1:%.*]] = lshr i64 [[INDEX]], 2
; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; VF4-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
; VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0
; VF4-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42
; VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i64 0
; VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
; VF4-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP6]], align 8
; VF4-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF4-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,12 +157,12 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], [[TMP3]]
; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i64 0
; VF2-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP6]], 42
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i64 0
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; VF2-NEXT: [[TMP7:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
; VF2-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP9]], align 8
; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
Expand Down