Skip to content

Commit d61d72d

Browse files
committed
[OpenMP] Remove noinline attributes in the device runtime
We previously used the `noinline` attributes to specify some defintions which should be kept alive in the runtime. These were then stripped immediately in the OpenMPOpt module pass. However, Since the changes in D130298, we not explicitly state which functions will have external visiblity in the bitcode library. Additionally the OpenMPOpt module pass should run before the inliner pass, so this shouldn't make a difference in whether or not the functions will be alive for the initial pass of OpenMPOpt. This should simplify the interface, and additionally save time spend on scanning funciton names for noinline. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D130368
1 parent bfb9b8e commit d61d72d

File tree

7 files changed

+11
-127
lines changed

7 files changed

+11
-127
lines changed

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

-12
Original file line numberDiff line numberDiff line change
@@ -499,18 +499,6 @@ struct OMPInformationCache : public InformationCache {
499499
}
500500
#include "llvm/Frontend/OpenMP/OMPKinds.def"
501501

502-
// Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_`
503-
// functions, except if `optnone` is present.
504-
if (isOpenMPDevice(M)) {
505-
for (Function &F : M) {
506-
for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
507-
if (F.hasFnAttribute(Attribute::NoInline) &&
508-
F.getName().startswith(Prefix) &&
509-
!F.hasFnAttribute(Attribute::OptimizeNone))
510-
F.removeFnAttr(Attribute::NoInline);
511-
}
512-
}
513-
514502
// TODO: We should attach the attributes defined in OMPKinds.def.
515503
}
516504

llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll

-99
This file was deleted.

openmp/libomptarget/DeviceRTL/include/Synchronization.h

+2-4
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,13 @@ void threads();
2929

3030
/// Synchronizing threads is allowed even if they all hit different instances of
3131
/// `synchronize::threads()`. However, `synchronize::threadsAligned()` is more
32-
/// restrictive in that it requires all threads to hit the same instance. The
33-
/// noinline is removed by the openmp-opt pass and helps to preserve the
34-
/// information till then.
32+
/// restrictive in that it requires all threads to hit the same instance.
3533
///{
3634
#pragma omp begin assumes ext_aligned_barrier
3735

3836
/// Synchronize all threads in a block, they are are reaching the same
3937
/// instruction (hence all threads in the block are "aligned").
40-
__attribute__((noinline)) void threadsAligned();
38+
void threadsAligned();
4139

4240
#pragma omp end assumes
4341
///}

openmp/libomptarget/DeviceRTL/src/Mapping.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -289,17 +289,17 @@ bool mapping::isGenericMode() { return !isSPMDMode(); }
289289
///}
290290

291291
extern "C" {
292-
__attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
292+
uint32_t __kmpc_get_hardware_thread_id_in_block() {
293293
FunctionTracingRAII();
294294
return mapping::getThreadIdInBlock();
295295
}
296296

297-
__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
297+
uint32_t __kmpc_get_hardware_num_threads_in_block() {
298298
FunctionTracingRAII();
299299
return impl::getNumHardwareThreadsInBlock();
300300
}
301301

302-
__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
302+
uint32_t __kmpc_get_warp_size() {
303303
FunctionTracingRAII();
304304
return impl::getWarpSize();
305305
}

openmp/libomptarget/DeviceRTL/src/Parallelism.cpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,7 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
243243
__kmpc_end_sharing_variables();
244244
}
245245

246-
__attribute__((noinline)) bool
247-
__kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
246+
bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
248247
FunctionTracingRAII();
249248
// Work function and arguments for L1 parallel region.
250249
*WorkFn = state::ParallelRegionFn;
@@ -259,7 +258,7 @@ __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
259258
return ThreadIsActive;
260259
}
261260

262-
__attribute__((noinline)) void __kmpc_kernel_end_parallel() {
261+
void __kmpc_kernel_end_parallel() {
263262
FunctionTracingRAII();
264263
// In case we have modified an ICV for this thread before a ThreadState was
265264
// created. We drop it now to not contaminate the next parallel region.

openmp/libomptarget/DeviceRTL/src/State.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -393,12 +393,12 @@ int omp_get_initial_device(void) { return -1; }
393393
}
394394

395395
extern "C" {
396-
__attribute__((noinline)) void *__kmpc_alloc_shared(uint64_t Bytes) {
396+
void *__kmpc_alloc_shared(uint64_t Bytes) {
397397
FunctionTracingRAII();
398398
return memory::allocShared(Bytes, "Frontend alloc shared");
399399
}
400400

401-
__attribute__((noinline)) void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
401+
void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
402402
FunctionTracingRAII();
403403
memory::freeShared(Ptr, Bytes, "Frontend free shared");
404404
}

openmp/libomptarget/DeviceRTL/src/Synchronization.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -358,14 +358,12 @@ void __kmpc_barrier(IdentTy *Loc, int32_t TId) {
358358
impl::namedBarrier();
359359
}
360360

361-
__attribute__((noinline)) void __kmpc_barrier_simple_spmd(IdentTy *Loc,
362-
int32_t TId) {
361+
void __kmpc_barrier_simple_spmd(IdentTy *Loc, int32_t TId) {
363362
FunctionTracingRAII();
364363
synchronize::threadsAligned();
365364
}
366365

367-
__attribute__((noinline)) void __kmpc_barrier_simple_generic(IdentTy *Loc,
368-
int32_t TId) {
366+
void __kmpc_barrier_simple_generic(IdentTy *Loc, int32_t TId) {
369367
FunctionTracingRAII();
370368
synchronize::threads();
371369
}

0 commit comments

Comments
 (0)