Skip to content

Commit 994dee0

Browse files
authored
[CIR][CUDA] Fix destructor behaviour (#1422)
CIR didn't work on structs with destructor but without constructor. Now it is fixed. Moreover, CUDA kernels must be emitted if it was referred to in the destructor of a non-device variable. It seems already working, so I just unblocked the code path.
1 parent 2ab0704 commit 994dee0

File tree

3 files changed

+47
-9
lines changed

3 files changed

+47
-9
lines changed

clang/lib/CIR/CodeGen/CIRGenModule.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -373,9 +373,9 @@ bool CIRGenModule::MayBeEmittedEagerly(const ValueDecl *global) {
373373
if (fd) {
374374
// Implicit template instantiations may change linkage if they are later
375375
// explicitly instantiated, so they should not be emitted eagerly.
376-
// TODO(cir): do we care?
377-
assert(fd->getTemplateSpecializationKind() != TSK_ImplicitInstantiation &&
378-
"not implemented");
376+
if (fd->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
377+
return false;
378+
379379
assert(!fd->isTemplated() && "Templates NYI");
380380
}
381381
const auto *vd = dyn_cast<VarDecl>(global);

clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -227,10 +227,12 @@ FuncOp LoweringPreparePass::buildCXXGlobalVarDeclInitFunc(GlobalOp op) {
227227
cir::GlobalLinkageKind::InternalLinkage);
228228

229229
// Move over the initialzation code of the ctor region.
230-
auto &block = op.getCtorRegion().front();
231230
mlir::Block *entryBB = f.addEntryBlock();
232-
entryBB->getOperations().splice(entryBB->begin(), block.getOperations(),
233-
block.begin(), std::prev(block.end()));
231+
if (!op.getCtorRegion().empty()) {
232+
auto &block = op.getCtorRegion().front();
233+
entryBB->getOperations().splice(entryBB->begin(), block.getOperations(),
234+
block.begin(), std::prev(block.end()));
235+
}
234236

235237
// Register the destructor call with __cxa_atexit
236238
auto &dtorRegion = op.getDtorRegion();
@@ -294,9 +296,18 @@ FuncOp LoweringPreparePass::buildCXXGlobalVarDeclInitFunc(GlobalOp op) {
294296

295297
// Replace cir.yield with cir.return
296298
builder.setInsertionPointToEnd(entryBB);
297-
auto &yieldOp = block.getOperations().back();
298-
assert(isa<YieldOp>(yieldOp));
299-
builder.create<ReturnOp>(yieldOp.getLoc());
299+
mlir::Operation *yieldOp = nullptr;
300+
if (!op.getCtorRegion().empty()) {
301+
auto &block = op.getCtorRegion().front();
302+
yieldOp = &block.getOperations().back();
303+
} else {
304+
assert(!dtorRegion.empty());
305+
auto &block = dtorRegion.front();
306+
yieldOp = &block.getOperations().back();
307+
}
308+
309+
assert(isa<YieldOp>(*yieldOp));
310+
builder.create<ReturnOp>(yieldOp->getLoc());
300311
return f;
301312
}
302313

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#include "../Inputs/cuda.h"
2+
3+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
4+
// RUN: -x cuda -emit-cir -target-sdk-version=12.3 \
5+
// RUN: %s -o %t.cir
6+
// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
7+
8+
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
9+
// RUN: -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
10+
// RUN: %s -o %t.cir
11+
// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
12+
13+
// Make sure we do emit device-side kernel even if it's only referenced
14+
// by the destructor of a variable not present on device.
15+
template<typename T> __global__ void f(T) {}
16+
template<typename T> struct A {
17+
~A() { f<<<1, 1>>>(T()); }
18+
};
19+
20+
// CIR-DEVICE: cir.func @_Z1fIiEvT_
21+
22+
// CIR-HOST: cir.func {{.*}} @_ZN1AIiED2Ev{{.*}} {
23+
// CIR-HOST: cir.call @__cudaPushCallConfiguration
24+
// CIR-HOST: cir.call @_Z16__device_stub__fIiEvT_
25+
// CIR-HOST: }
26+
27+
A<int> a;

0 commit comments

Comments
 (0)