Skip to content

Commit fd8fd9e

Browse files
jhuber6tstellar
authored andcommitted
Revert "[OpenMP] Remove noinline attributes in the device runtime"
The behaviour of this patch is not great, but it has some side-effects that are required for OpenMPOpt to work. The problem is that when we use `-mlink-builtin-bitcode` we only import used symbols from the runtime. Then OpenMPOpt will insert calls to symbols that were not previously included. This patch removed this implicit behaviour as these functions were kept alive by the `noinline` simply because it kept calls to them in the module. This caused regression in some tests that relied on some OpenMPOpt passes without using LTO. Reverting for the LLVM15 release but will try to fix it more correctly on main. This reverts commit d61d72d. Fixes #56752 (cherry picked from commit b08369f)
1 parent 5e4e882 commit fd8fd9e

File tree

7 files changed

+127
-11
lines changed

7 files changed

+127
-11
lines changed

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,18 @@ struct OMPInformationCache : public InformationCache {
499499
}
500500
#include "llvm/Frontend/OpenMP/OMPKinds.def"
501501

502+
// Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_`
503+
// functions, except if `optnone` is present.
504+
if (isOpenMPDevice(M)) {
505+
for (Function &F : M) {
506+
for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
507+
if (F.hasFnAttribute(Attribute::NoInline) &&
508+
F.getName().startswith(Prefix) &&
509+
!F.hasFnAttribute(Attribute::OptimizeNone))
510+
F.removeFnAttr(Attribute::NoInline);
511+
}
512+
}
513+
502514
// TODO: We should attach the attributes defined in OMPKinds.def.
503515
}
504516

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes
2+
; RUN: opt < %s -S -openmp-opt-cgscc | FileCheck %s
3+
; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s
4+
5+
declare void @unknown()
6+
7+
; __kmpc functions
8+
define void @__kmpc_noinline() noinline nounwind {
9+
; CHECK: Function Attrs: nounwind
10+
; CHECK-LABEL: @__kmpc_noinline(
11+
; CHECK-NEXT: call void @unknown()
12+
; CHECK-NEXT: ret void
13+
;
14+
call void @unknown()
15+
ret void
16+
}
17+
; omp_X functions
18+
define void @omp_noinline() noinline nounwind {
19+
; CHECK: Function Attrs: nounwind
20+
; CHECK-LABEL: @omp_noinline(
21+
; CHECK-NEXT: call void @unknown()
22+
; CHECK-NEXT: ret void
23+
;
24+
call void @unknown()
25+
ret void
26+
}
27+
; _OMP namespace
28+
define void @_ZN4_OMP_noinline() noinline nounwind {
29+
; CHECK: Function Attrs: nounwind
30+
; CHECK-LABEL: @_ZN4_OMP_noinline(
31+
; CHECK-NEXT: call void @unknown()
32+
; CHECK-NEXT: ret void
33+
;
34+
call void @unknown()
35+
ret void
36+
}
37+
38+
; Negative tests:
39+
40+
define void @__kmpc_noinline_optnone() noinline optnone nounwind {
41+
; CHECK: Function Attrs: noinline nounwind optnone
42+
; CHECK-LABEL: @__kmpc_noinline_optnone(
43+
; CHECK-NEXT: call void @unknown()
44+
; CHECK-NEXT: ret void
45+
;
46+
call void @unknown()
47+
ret void
48+
}
49+
define void @omp_noinline_optnone() noinline optnone nounwind {
50+
; CHECK: Function Attrs: noinline nounwind optnone
51+
; CHECK-LABEL: @omp_noinline_optnone(
52+
; CHECK-NEXT: call void @unknown()
53+
; CHECK-NEXT: ret void
54+
;
55+
call void @unknown()
56+
ret void
57+
}
58+
; _OMP namespace
59+
define void @_ZN4_OMP_noinline_optnone() noinline optnone nounwind {
60+
; CHECK: Function Attrs: noinline nounwind optnone
61+
; CHECK-LABEL: @_ZN4_OMP_noinline_optnone(
62+
; CHECK-NEXT: call void @unknown()
63+
; CHECK-NEXT: ret void
64+
;
65+
call void @unknown()
66+
ret void
67+
}
68+
define void @a___kmpc_noinline() noinline nounwind {
69+
; CHECK: Function Attrs: noinline nounwind
70+
; CHECK-LABEL: @a___kmpc_noinline(
71+
; CHECK-NEXT: call void @unknown()
72+
; CHECK-NEXT: ret void
73+
;
74+
call void @unknown()
75+
ret void
76+
}
77+
define void @a_omp_noinline() noinline nounwind {
78+
; CHECK: Function Attrs: noinline nounwind
79+
; CHECK-LABEL: @a_omp_noinline(
80+
; CHECK-NEXT: call void @unknown()
81+
; CHECK-NEXT: ret void
82+
;
83+
call void @unknown()
84+
ret void
85+
}
86+
define void @a__ZN4_OMP_noinline() noinline nounwind {
87+
; CHECK: Function Attrs: noinline nounwind
88+
; CHECK-LABEL: @a__ZN4_OMP_noinline(
89+
; CHECK-NEXT: call void @unknown()
90+
; CHECK-NEXT: ret void
91+
;
92+
call void @unknown()
93+
ret void
94+
}
95+
96+
!llvm.module.flags = !{!0, !1}
97+
98+
!0 = !{i32 7, !"openmp", i32 50}
99+
!1 = !{i32 7, !"openmp-device", i32 50}

openmp/libomptarget/DeviceRTL/include/Synchronization.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,15 @@ void threads();
2929

3030
/// Synchronizing threads is allowed even if they all hit different instances of
3131
/// `synchronize::threads()`. However, `synchronize::threadsAligned()` is more
32-
/// restrictive in that it requires all threads to hit the same instance.
32+
/// restrictive in that it requires all threads to hit the same instance. The
33+
/// noinline is removed by the openmp-opt pass and helps to preserve the
34+
/// information till then.
3335
///{
3436
#pragma omp begin assumes ext_aligned_barrier
3537

3638
/// Synchronize all threads in a block, they are are reaching the same
3739
/// instruction (hence all threads in the block are "aligned").
38-
void threadsAligned();
40+
__attribute__((noinline)) void threadsAligned();
3941

4042
#pragma omp end assumes
4143
///}

openmp/libomptarget/DeviceRTL/src/Mapping.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -289,17 +289,17 @@ bool mapping::isGenericMode() { return !isSPMDMode(); }
289289
///}
290290

291291
extern "C" {
292-
uint32_t __kmpc_get_hardware_thread_id_in_block() {
292+
__attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
293293
FunctionTracingRAII();
294294
return mapping::getThreadIdInBlock();
295295
}
296296

297-
uint32_t __kmpc_get_hardware_num_threads_in_block() {
297+
__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
298298
FunctionTracingRAII();
299299
return impl::getNumHardwareThreadsInBlock();
300300
}
301301

302-
uint32_t __kmpc_get_warp_size() {
302+
__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
303303
FunctionTracingRAII();
304304
return impl::getWarpSize();
305305
}

openmp/libomptarget/DeviceRTL/src/Parallelism.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,8 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
243243
__kmpc_end_sharing_variables();
244244
}
245245

246-
bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
246+
__attribute__((noinline)) bool
247+
__kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
247248
FunctionTracingRAII();
248249
// Work function and arguments for L1 parallel region.
249250
*WorkFn = state::ParallelRegionFn;
@@ -258,7 +259,7 @@ bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
258259
return ThreadIsActive;
259260
}
260261

261-
void __kmpc_kernel_end_parallel() {
262+
__attribute__((noinline)) void __kmpc_kernel_end_parallel() {
262263
FunctionTracingRAII();
263264
// In case we have modified an ICV for this thread before a ThreadState was
264265
// created. We drop it now to not contaminate the next parallel region.

openmp/libomptarget/DeviceRTL/src/State.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -393,12 +393,12 @@ int omp_get_initial_device(void) { return -1; }
393393
}
394394

395395
extern "C" {
396-
void *__kmpc_alloc_shared(uint64_t Bytes) {
396+
__attribute__((noinline)) void *__kmpc_alloc_shared(uint64_t Bytes) {
397397
FunctionTracingRAII();
398398
return memory::allocShared(Bytes, "Frontend alloc shared");
399399
}
400400

401-
void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
401+
__attribute__((noinline)) void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
402402
FunctionTracingRAII();
403403
memory::freeShared(Ptr, Bytes, "Frontend free shared");
404404
}

openmp/libomptarget/DeviceRTL/src/Synchronization.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -358,12 +358,14 @@ void __kmpc_barrier(IdentTy *Loc, int32_t TId) {
358358
impl::namedBarrier();
359359
}
360360

361-
void __kmpc_barrier_simple_spmd(IdentTy *Loc, int32_t TId) {
361+
__attribute__((noinline)) void __kmpc_barrier_simple_spmd(IdentTy *Loc,
362+
int32_t TId) {
362363
FunctionTracingRAII();
363364
synchronize::threadsAligned();
364365
}
365366

366-
void __kmpc_barrier_simple_generic(IdentTy *Loc, int32_t TId) {
367+
__attribute__((noinline)) void __kmpc_barrier_simple_generic(IdentTy *Loc,
368+
int32_t TId) {
367369
FunctionTracingRAII();
368370
synchronize::threads();
369371
}

0 commit comments

Comments
 (0)