Skip to content

Commit fe7b5e2

Browse files
committed
Revert "[AMDGPU] Call the FINI_ARRAY destructors in the correct order (llvm#71815)"
This reverts commit c1d5865. Introduces a new use of ConstantExpr::getAShr().
1 parent b43b2a6 commit fe7b5e2

File tree

4 files changed

+48
-53
lines changed

4 files changed

+48
-53
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp

+4-37
Original file line numberDiff line numberDiff line change
@@ -53,22 +53,13 @@ static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
5353
//
5454
// extern "C" void * __init_array_start[];
5555
// extern "C" void * __init_array_end[];
56-
// extern "C" void * __fini_array_start[];
57-
// extern "C" void * __fini_array_end[];
5856
//
5957
// using InitCallback = void();
60-
// using FiniCallback = void(void);
6158
//
6259
// void call_init_array_callbacks() {
6360
// for (auto start = __init_array_start; start != __init_array_end; ++start)
6461
// reinterpret_cast<InitCallback *>(*start)();
6562
// }
66-
//
67-
// void call_fini_array_callbacks() {
68-
// size_t fini_array_size = __fini_array_end - __fini_array_start;
69-
// for (size_t i = fini_array_size; i > 0; --i)
70-
// reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
71-
// }
7263
static void createInitOrFiniCalls(Function &F, bool IsCtor) {
7364
Module &M = *F.getParent();
7465
LLVMContext &C = M.getContext();
@@ -105,39 +96,15 @@ static void createInitOrFiniCalls(Function &F, bool IsCtor) {
10596
// for now we just call them with no arguments.
10697
auto *CallBackTy = FunctionType::get(IRB.getVoidTy(), {});
10798

108-
Constant *Start = Begin;
109-
Constant *Stop = End;
110-
// The destructor array must be called in reverse order. Get a constant
111-
// expression to the end of the array and iterate backwards instead.
112-
if (!IsCtor) {
113-
Type *Int64Ty = IntegerType::getInt64Ty(C);
114-
auto *Offset = ConstantExpr::getSub(
115-
ConstantExpr::getAShr(
116-
ConstantExpr::getSub(ConstantExpr::getPtrToInt(End, Int64Ty),
117-
ConstantExpr::getPtrToInt(Begin, Int64Ty)),
118-
ConstantInt::get(Int64Ty, 3)),
119-
ConstantInt::get(Int64Ty, 1));
120-
Start = ConstantExpr::getGetElementPtr(
121-
ArrayType::get(IRB.getPtrTy(), 0), Begin,
122-
ArrayRef<Constant *>({ConstantInt::get(Int64Ty, 0), Offset}),
123-
/*InBounds=*/true);
124-
Stop = Begin;
125-
}
126-
127-
IRB.CreateCondBr(
128-
IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGE, Start,
129-
Stop),
130-
LoopBB, ExitBB);
99+
IRB.CreateCondBr(IRB.CreateICmpNE(Begin, End), LoopBB, ExitBB);
131100
IRB.SetInsertPoint(LoopBB);
132101
auto *CallBackPHI = IRB.CreatePHI(PtrTy, 2, "ptr");
133102
auto *CallBack = IRB.CreateLoad(CallBackTy->getPointerTo(F.getAddressSpace()),
134103
CallBackPHI, "callback");
135104
IRB.CreateCall(CallBackTy, CallBack);
136-
auto *NewCallBack =
137-
IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, IsCtor ? 1 : -1, "next");
138-
auto *EndCmp = IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_ULT,
139-
NewCallBack, Stop, "end");
140-
CallBackPHI->addIncoming(Start, &F.getEntryBlock());
105+
auto *NewCallBack = IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, 1, "next");
106+
auto *EndCmp = IRB.CreateICmpEQ(NewCallBack, End, "end");
107+
CallBackPHI->addIncoming(Begin, &F.getEntryBlock());
141108
CallBackPHI->addIncoming(NewCallBack, LoopBB);
142109
IRB.CreateCondBr(EndCmp, ExitBB, LoopBB);
143110
IRB.SetInsertPoint(ExitBB);

llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll

+6-4
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ define void @bar() addrspace(1) {
2525
ret void
2626
}
2727

28+
29+
2830
;.
2931
; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo.alias, ptr null }, { i32, ptr, ptr } { i32 1, ptr inttoptr (i64 4096 to ptr), ptr null }]
3032
; CHECK: @[[LLVM_GLOBAL_DTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr addrspacecast (ptr addrspace(1) @bar to ptr), ptr null }]
@@ -63,13 +65,13 @@ define void @bar() addrspace(1) {
6365
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
6466
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
6567
; CHECK-NEXT: entry:
66-
; CHECK-NEXT: br i1 icmp uge (ptr addrspace(1) getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), ptr addrspace(1) @__fini_array_start), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
68+
; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
6769
; CHECK: while.entry:
68-
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
70+
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
6971
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
7072
; CHECK-NEXT: call void [[CALLBACK]]()
71-
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1
72-
; CHECK-NEXT: [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start
73+
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
74+
; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
7375
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
7476
; CHECK: while.end:
7577
; CHECK-NEXT: ret void

llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll

+16-7
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,20 @@
1212
@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
1313
@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
1414

15+
16+
17+
18+
1519
; VISIBILITY: FUNC WEAK PROTECTED {{.*}} amdgcn.device.init
1620
; VISIBILITY: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.init.kd
1721
; VISIBILITY: FUNC WEAK PROTECTED {{.*}} amdgcn.device.fini
1822
; VISIBILITY: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.fini.kd
19-
2023
; SECTION: .init_array.1 INIT_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8
2124
; SECTION: .fini_array.1 FINI_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8
22-
2325
; DISABLED-NOT: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.init
2426
; DISABLED-NOT: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.kd
2527
; DISABLED-NOT: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.fini
2628
; DISABLED-NOT: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd
27-
2829
; METADATA: amdhsa.kernels:
2930
; METADATA: .kind: init
3031
; METADATA: .max_flat_workgroup_size: 1
@@ -52,6 +53,13 @@ define internal void @bar() {
5253
; CHECK: @[[__FINI_ARRAY_END:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)]
5354
; CHECK: @[[LLVM_USED:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata"
5455
;.
56+
; CHECK-LABEL: define internal void @foo() {
57+
; CHECK-NEXT: ret void
58+
;
59+
;
60+
; CHECK-LABEL: define internal void @bar() {
61+
; CHECK-NEXT: ret void
62+
;
5563
;
5664
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init(
5765
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -71,17 +79,18 @@ define internal void @bar() {
7179
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
7280
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
7381
; CHECK-NEXT: entry:
74-
; CHECK-NEXT: br i1 icmp uge (ptr addrspace(1) getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), ptr addrspace(1) @__fini_array_start), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
82+
; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
7583
; CHECK: while.entry:
76-
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
84+
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
7785
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
7886
; CHECK-NEXT: call void [[CALLBACK]]()
79-
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1
80-
; CHECK-NEXT: [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start
87+
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
88+
; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
8189
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
8290
; CHECK: while.end:
8391
; CHECK-NEXT: ret void
8492
;
8593
;.
8694
; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,1" "device-init" }
8795
; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,1" "device-fini" }
96+
;.

llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll

+22-5
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=CHECK-VIS
44

55

6+
; UTC_ARGS: --disable
67
@llvm.global_ctors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }, { i32, ptr, ptr } { i32 1, ptr @foo.5, ptr null }]
78
@llvm.global_dtors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }, { i32, ptr, ptr } { i32 1, ptr @bar.5, ptr null }]
89

9-
; UTC_ARGS: --disable
1010
; CHECK: @__init_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
1111
; CHECK: @__init_array_end = external addrspace(1) constant [0 x ptr addrspace(1)]
1212
; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
@@ -36,6 +36,22 @@ define internal void @bar.5() {
3636
ret void
3737
}
3838

39+
; CHECK-LABEL: define internal void @foo() {
40+
; CHECK-NEXT: ret void
41+
;
42+
;
43+
; CHECK-LABEL: define internal void @bar() {
44+
; CHECK-NEXT: ret void
45+
;
46+
;
47+
; CHECK-LABEL: define internal void @foo.5() {
48+
; CHECK-NEXT: ret void
49+
;
50+
;
51+
; CHECK-LABEL: define internal void @bar.5() {
52+
; CHECK-NEXT: ret void
53+
;
54+
;
3955
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init(
4056
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
4157
; CHECK-NEXT: entry:
@@ -54,13 +70,14 @@ define internal void @bar.5() {
5470
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
5571
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
5672
; CHECK-NEXT: entry:
57-
; CHECK-NEXT: br i1 icmp uge (ptr addrspace(1) getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), ptr addrspace(1) @__fini_array_start), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
73+
; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
5874
; CHECK: while.entry:
59-
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
75+
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
6076
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
6177
; CHECK-NEXT: call void [[CALLBACK]]()
62-
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1
63-
; CHECK-NEXT: [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start
78+
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
79+
; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
6480
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
6581
; CHECK: while.end:
6682
; CHECK-NEXT: ret void
83+
;

0 commit comments

Comments
 (0)