-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[DebugInfo][LoopVectorizer] Avoid dropping !dbg in optimizeForVFAndUF #114243
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Prior to this patch, optimizeForVFAndUF may optimize the branch condition for a VPBasicblock to a constant, but unnecessarily drops the DILocation attachment when it does so; this patch changes it to preserve the DILocation.
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-debuginfo Author: Stephen Tozer (SLTozer) ChangesPrior to this patch, optimizeForVFAndUF may optimize the conditional branch for a VPBasicblock to have a constant condition, but unnecessarily drops the DILocation attachment when it does so; this patch changes it to preserve the DILocation. Full diff: https://github.com/llvm/llvm-project/pull/114243.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 355781f955052e..df1ceedacdee99 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -699,7 +699,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
LLVMContext &Ctx = SE.getContext();
auto *BOC =
new VPInstruction(VPInstruction::BranchOnCond,
- {Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))});
+ {Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))}, Term->getDebugLoc());
SmallVector<VPValue *> PossiblyDead(Term->operands());
Term->eraseFromParent();
diff --git a/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll b/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll
new file mode 100644
index 00000000000000..0b38339e1772ff
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=loop-vectorize -force-vector-width=2 | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define fastcc i32 @foo(i64 %a) {
+; CHECK-LABEL: define fastcc i32 @foo(
+; CHECK-SAME: i64 [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = and i64 [[A]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[WIDE_TRIP_COUNT]], 1
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 1
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ], !dbg [[DBG4:![0-9]+]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1>
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]]
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
+; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 0 to i8, !dbg [[DBG8:![0-9]+]]
+; CHECK-NEXT: store i8 [[TMP3]], ptr null, align 1, !dbg [[DBG9:![0-9]+]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; CHECK: [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1, !dbg [[DBG9]]
+; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4]], !dbg [[DBG9]]
+; CHECK: [[PRED_STORE_IF3]]:
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 0 to i8, !dbg [[DBG8]]
+; CHECK-NEXT: store i8 [[TMP5]], ptr null, align 1, !dbg [[DBG9]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]], !dbg [[DBG9]]
+; CHECK: [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2, !dbg [[DBG4]]
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG4]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 true, label %[[DO_BODY45_PREHEADER:.*]], label %[[SCALAR_PH]], !dbg [[DBG13:![0-9]+]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], !dbg [[DBG4]]
+; CHECK-NEXT: br label %[[DO_BODY:.*]]
+; CHECK: [[DO_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV554:%.*]] = phi i64 [ [[INDVARS_IV_NEXT555:%.*]], %[[DO_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], !dbg [[DBG4]]
+; CHECK-NEXT: [[CONV39:%.*]] = trunc i64 0 to i8, !dbg [[DBG8]]
+; CHECK-NEXT: store i8 [[CONV39]], ptr null, align 1, !dbg [[DBG9]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT555]] = add i64 [[INDVARS_IV554]], 1, !dbg [[DBG14:![0-9]+]]
+; CHECK-NEXT: [[EXITCOND557_NOT:%.*]] = icmp eq i64 [[INDVARS_IV554]], [[WIDE_TRIP_COUNT]], !dbg [[DBG15:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND557_NOT]], label %[[DO_BODY45_PREHEADER]], label %[[DO_BODY]], !dbg [[DBG13]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK: [[DO_BODY45_PREHEADER]]:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %wide.trip.count = and i64 %a, 1
+ br label %do.body
+
+do.body: ; preds = %do.body, %entry
+ %indvars.iv554 = phi i64 [ %indvars.iv.next555, %do.body ], [ 0, %entry ], !dbg !4
+ %conv39 = trunc i64 0 to i8, !dbg !5
+ store i8 %conv39, ptr null, align 1, !dbg !6
+ %indvars.iv.next555 = add i64 %indvars.iv554, 1, !dbg !7
+ %exitcond557.not = icmp eq i64 %indvars.iv554, %wide.trip.count, !dbg !8
+ br i1 %exitcond557.not, label %do.body45.preheader, label %do.body, !dbg !9
+
+do.body45.preheader: ; preds = %do.body
+ ret i32 0
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 20.0.0git (https://github.com/llvm/llvm-project.git 97cea5fecc5fa70842644da877c9547d4f34f6db)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "/home/gbtozers/dev/llvm-test-suite/CTMark/7zip/CPP/7zip/Compress/BZip2Decoder.cpp", directory: "/home/gbtozers/dev/llvm-test-suite-build", checksumkind: CSK_MD5, checksum: "ace37523c35f8079a0f00d337fc014dc")
+!2 = !{}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !DILocation(line: 4, scope: !21)
+!5 = !DILocation(line: 5, scope: !21)
+!6 = !DILocation(line: 6, scope: !21)
+!7 = !DILocation(line: 7, scope: !21)
+!8 = !DILocation(line: 8, scope: !21)
+!9 = !DILocation(line: 9, scope: !21)
+!20 = !DIFile(filename: "llvm-test-suite/CTMark/7zip/CPP/7zip/Compress/BZip2Decoder.cpp", directory: "/home/gbtozers/dev", checksumkind: CSK_MD5, checksum: "ace37523c35f8079a0f00d337fc014dc")
+!21 = distinct !DISubprogram(name: "ReadBlock", linkageName: "foo", scope: !20, file: !20, line: 113, type: !22, scopeLine: 116, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!22 = distinct !DISubroutineType(types: !2)
+;.
+; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], retainedTypes: [[META2]], globals: [[META2]], imports: [[META2]], splitDebugInlining: false, nameTableKind: None)
+; CHECK: [[META1]] = !DIFile(filename: "/home/gbtozers/dev/llvm-test-suite/CTMark/7zip/CPP/7zip/Compress/BZip2Decoder.cpp", directory: {{.*}})
+; CHECK: [[META2]] = !{}
+; CHECK: [[DBG4]] = !DILocation(line: 4, scope: [[META5:![0-9]+]])
+; CHECK: [[META5]] = distinct !DISubprogram(name: "ReadBlock", linkageName: "foo", scope: [[META6:![0-9]+]], file: [[META6]], line: 113, type: [[META7:![0-9]+]], scopeLine: 116, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
+; CHECK: [[META6]] = !DIFile(filename: "llvm-test-suite/CTMark/7zip/CPP/7zip/Compress/BZip2Decoder.cpp", directory: {{.*}})
+; CHECK: [[META7]] = distinct !DISubroutineType(types: [[META2]])
+; CHECK: [[DBG8]] = !DILocation(line: 5, scope: [[META5]])
+; CHECK: [[DBG9]] = !DILocation(line: 6, scope: [[META5]])
+; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META11:![0-9]+]], [[META12:![0-9]+]]}
+; CHECK: [[META11]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META12]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[DBG13]] = !DILocation(line: 9, scope: [[META5]])
+; CHECK: [[DBG14]] = !DILocation(line: 7, scope: [[META5]])
+; CHECK: [[DBG15]] = !DILocation(line: 8, scope: [[META5]])
+; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META12]], [[META11]]}
+;.
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for fixing this! Some suggestions for the test inline
; RUN: opt -S < %s -passes=loop-vectorize -force-vector-width=2 | FileCheck %s | ||
|
||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The triple shouldn't be needed, can you remove it? If it is required, then the test needs to be moved to the X86 subdirectory, otherwise it will fail if the X86 backend isn't built
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
define fastcc i32 @foo(i64 %a) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
define fastcc i32 @foo(i64 %a) { | |
define i32 @foo(i64 %a) { |
|
||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add a brief comment that this checks the debug location is preserved when the exiting branch from the vector loop gets simplified?
do.body: ; preds = %do.body, %entry | ||
%indvars.iv554 = phi i64 [ %indvars.iv.next555, %do.body ], [ 0, %entry ], !dbg !4 | ||
%conv39 = trunc i64 0 to i8, !dbg !5 | ||
store i8 %conv39, ptr null, align 1, !dbg !6 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please store to a non-null pointer, to ensure the test isn't UB
br label %do.body | ||
|
||
do.body: ; preds = %do.body, %entry | ||
%indvars.iv554 = phi i64 [ %indvars.iv.next555, %do.body ], [ 0, %entry ], !dbg !4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
%indvars.iv554 = phi i64 [ %indvars.iv.next555, %do.body ], [ 0, %entry ], !dbg !4 | |
%iv = phi i64 [ %iv.next, %do.body ], [ 0, %entry ], !dbg !4 |
%wide.trip.count = and i64 %a, 1 | ||
br label %do.body | ||
|
||
do.body: ; preds = %do.body, %entry |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do.body: ; preds = %do.body, %entry | |
loop: |
; CHECK-NEXT: ret i32 0 | ||
; | ||
entry: | ||
%wide.trip.count = and i64 %a, 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This shouldn't be needed I think or at least it might be clearer to use a constant trip count in the exit check
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be good to still apply if possible to simplify the test.
do.body45.preheader: ; preds = %do.body | ||
ret i32 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do.body45.preheader: ; preds = %do.body | |
ret i32 0 | |
exit: | |
ret void |
%conv39 = trunc i64 0 to i8, !dbg !5 | ||
store i8 %conv39, ptr null, align 1, !dbg !6 | ||
%indvars.iv.next555 = add i64 %indvars.iv554, 1, !dbg !7 | ||
%exitcond557.not = icmp eq i64 %indvars.iv554, %wide.trip.count, !dbg !8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
%exitcond557.not = icmp eq i64 %indvars.iv554, %wide.trip.count, !dbg !8 | |
%ec = icmp eq i64 %indvars.iv554, %wide.trip.count, !dbg !8 |
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 20.0.0git (https://github.com/llvm/llvm-project.git 97cea5fecc5fa70842644da877c9547d4f34f6db)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2, splitDebugInlining: false, nameTableKind: None) | ||
!1 = !DIFile(filename: "/home/gbtozers/dev/llvm-test-suite/CTMark/7zip/CPP/7zip/Compress/BZip2Decoder.cpp", directory: "/home/gbtozers/dev/llvm-test-suite-build", checksumkind: CSK_MD5, checksum: "ace37523c35f8079a0f00d337fc014dc") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure if any of this can be shortened.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes it can - in most of the tests for my recent patches I cleaned the metadata up before submitting, unfortunately look like I missed this one!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
; CHECK-NEXT: ret i32 0 | ||
; | ||
entry: | ||
%wide.trip.count = and i64 %a, 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be good to still apply if possible to simplify the test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks.
Prior to this patch, optimizeForVFAndUF may optimize the conditional branch for a VPBasicblock to have a constant condition, but unnecessarily drops the DILocation attachment when it does so; this patch changes it to preserve the DILocation.
Found using #107279.