Skip to content

Commit 41c1a7b

Browse files
committed
[LV] Don't add fixed-order recurrence phis to forced scalars.
Fixed-order recurrence phis cannot be forced to be scalar, they will always be widened at the moment. Make sure we don't add them to ForcedScalars, otherwise the legacy cost model will compute incorrect costs. This fixes an assertion reported with #129645.
1 parent 6b0c8c4 commit 41c1a7b

File tree

2 files changed

+105
-2
lines changed

2 files changed

+105
-2
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6464,10 +6464,15 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
64646464
getMemoryInstructionCost(Member, ElementCount::getFixed(1))));
64656465
}
64666466
}
6467-
} else
6467+
} else {
6468+
// Cannot scalarize fixed-order recurrence phis at the moment.
6469+
if (isa<PHINode>(I) && Legal->isFixedOrderRecurrence(cast<PHINode>(I)))
6470+
continue;
6471+
64686472
// Make sure I gets scalarized and a cost estimate without
64696473
// scalarization overhead.
64706474
ForcedScalars[VF].insert(I);
6475+
}
64716476
}
64726477
}
64736478

llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ for.body: ; preds = %for.body.preheader,
9191
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
9292
}
9393

94-
define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapture noundef writeonly %y, i32 noundef %n) #0 {
94+
define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapture noundef writeonly %y, i32 noundef %n) {
9595
; CHECK-LABEL: @thirdorderrec(
9696
; CHECK-NEXT: entry:
9797
; CHECK-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 3
@@ -352,3 +352,101 @@ loop:
352352
exit:
353353
ret void
354354
}
355+
356+
define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %n) #0 {
357+
; CHECK-LABEL: @test_for_tried_to_force_scalar(
358+
; CHECK-NEXT: entry:
359+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1
360+
; CHECK-NEXT: [[CONFLICT_RDX20:%.*]] = icmp ule i64 [[TMP0]], 8
361+
; CHECK-NEXT: br i1 [[CONFLICT_RDX20]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
362+
; CHECK: vector.ph:
363+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8
364+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
365+
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 8, i64 [[N_MOD_VF]]
366+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[TMP4]]
367+
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x ptr> poison, ptr [[A:%.*]], i32 3
368+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
369+
; CHECK: vector.body:
370+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
371+
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x ptr> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ]
372+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
373+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
374+
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 2
375+
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 3
376+
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 4
377+
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 5
378+
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 6
379+
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 7
380+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP5]]
381+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP6]]
382+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP7]]
383+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP8]]
384+
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP13]], i32 0
385+
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x ptr> [[TMP17]], ptr [[TMP14]], i32 1
386+
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x ptr> [[TMP18]], ptr [[TMP15]], i32 2
387+
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x ptr> [[TMP19]], ptr [[TMP16]], i32 3
388+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP9]]
389+
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP10]]
390+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP11]]
391+
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP12]]
392+
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP21]], i32 0
393+
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x ptr> [[TMP25]], ptr [[TMP22]], i32 1
394+
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x ptr> [[TMP26]], ptr [[TMP23]], i32 2
395+
; CHECK-NEXT: [[TMP28]] = insertelement <4 x ptr> [[TMP27]], ptr [[TMP24]], i32 3
396+
; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x ptr> [[TMP20]], <4 x ptr> [[TMP28]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
397+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x float>, ptr [[TMP21]], align 4
398+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
399+
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x float> [[STRIDED_VEC]], i32 3
400+
; CHECK-NEXT: store float [[TMP30]], ptr [[C:%.*]], align 4
401+
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 0
402+
; CHECK-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP31]], align 4
403+
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 1
404+
; CHECK-NEXT: [[TMP32:%.*]] = load float, ptr [[TMP33]], align 4
405+
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 2
406+
; CHECK-NEXT: [[TMP34:%.*]] = load float, ptr [[TMP35]], align 4
407+
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 3
408+
; CHECK-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP37]], align 4
409+
; CHECK-NEXT: store float [[TMP36]], ptr [[B:%.*]], align 4
410+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
411+
; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
412+
; CHECK-NEXT: br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
413+
; CHECK: middle.block:
414+
; CHECK-NEXT: br label [[SCALAR_PH]]
415+
; CHECK: scalar.ph:
416+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
417+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi ptr [ [[TMP24]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ]
418+
; CHECK-NEXT: br label [[LOOP:%.*]]
419+
; CHECK: loop:
420+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
421+
; CHECK-NEXT: [[PREV:%.*]] = phi ptr [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[NEXT:%.*]], [[LOOP]] ]
422+
; CHECK-NEXT: [[NEXT]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[IV]]
423+
; CHECK-NEXT: [[TMP40:%.*]] = load float, ptr [[NEXT]], align 4
424+
; CHECK-NEXT: store float [[TMP40]], ptr [[C]], align 4
425+
; CHECK-NEXT: [[TMP41:%.*]] = load float, ptr [[PREV]], align 4
426+
; CHECK-NEXT: store float [[TMP41]], ptr [[B]], align 4
427+
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
428+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[N]]
429+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
430+
; CHECK: exit:
431+
; CHECK-NEXT: ret void
432+
;
433+
entry:
434+
br label %loop
435+
436+
loop:
437+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
438+
%prev = phi ptr [ %A, %entry ], [ %next, %loop ]
439+
%next = getelementptr nusw [3 x float], ptr %A, i64 %iv
440+
%0 = load float, ptr %next, align 4
441+
store float %0, ptr %C, align 4
442+
%1 = load float, ptr %prev, align 4
443+
store float %1, ptr %B, align 4
444+
%iv.next = add nsw i64 %iv, 1
445+
%exitcond.not = icmp eq i64 %iv, %n
446+
br i1 %exitcond.not, label %exit, label %loop
447+
448+
exit:
449+
ret void
450+
}
451+
452+
attributes #0 = { "target-cpu"="znver3" }

0 commit comments

Comments
 (0)