Skip to content

Commit f2f5f1b

Browse files
[OMPIRBuilder] Do not call __kmpc_push_num_threads for device parallel (llvm#71934)
Function __kmpc_push_num_threads should be called only if we specify number of threads for host parallel region. Number of threads specified by the user should be passed as one of arguments of __kmpc_parallel_51 function.
1 parent 3f906f5 commit f2f5f1b

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -1305,8 +1305,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
13051305
// function arguments are declared in zero address space
13061306
bool ArgsInZeroAddressSpace = Config.isTargetDevice();
13071307

1308-
if (NumThreads) {
1309-
// Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
1308+
// Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
1309+
// only if we compile for host side.
1310+
if (NumThreads && !Config.isTargetDevice()) {
13101311
Value *Args[] = {
13111312
Ident, ThreadID,
13121313
Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};

mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir

+21
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,21 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
1717
}
1818
llvm.return
1919
}
20+
21+
llvm.func @_test_num_threads(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, omp.outline_parent_name = "_QQmain"} {
22+
%0 = omp.map_info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
23+
omp.target map_entries(%0 -> %arg2 : !llvm.ptr) {
24+
^bb0(%arg2: !llvm.ptr):
25+
%1 = llvm.mlir.constant(156 : i32) : i32
26+
omp.parallel num_threads(%1 : i32) {
27+
%2 = llvm.mlir.constant(1 : i32) : i32
28+
llvm.store %2, %arg2 : i32, !llvm.ptr
29+
omp.terminator
30+
}
31+
omp.terminator
32+
}
33+
llvm.return
34+
}
2035
}
2136

2237
// CHECK: define weak_odr protected amdgpu_kernel void [[FUNC0:@.*]](
@@ -43,3 +58,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
4358
// CHECK: define internal void [[FUNC1]](
4459
// CHECK-SAME: ptr noalias noundef [[TID_ADDR_ASCAST:%.*]], ptr noalias noundef [[ZERO_ADDR_ASCAST:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
4560

61+
// CHECK: define weak_odr protected amdgpu_kernel void [[FUNC_NUM_THREADS0:@.*]](
62+
// CHECK-NOT: call void @__kmpc_push_num_threads(
63+
// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (
64+
// CHECK-SAME: ptr addrspace(1) @[[NUM_THREADS_GLOB:[0-9]+]] to ptr),
65+
// CHECK-SAME: i32 [[NUM_THREADS_TMP0:%.*]], i32 1, i32 156,
66+
// CHECK-SAME: i32 -1, ptr [[FUNC_NUM_THREADS1:@.*]], ptr null, ptr [[NUM_THREADS_TMP1:%.*]], i64 1)

0 commit comments

Comments
 (0)