diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -499,18 +499,6 @@ } #include "llvm/Frontend/OpenMP/OMPKinds.def" - // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_` - // functions, except if `optnone` is present. - if (isOpenMPDevice(M)) { - for (Function &F : M) { - for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"}) - if (F.hasFnAttribute(Attribute::NoInline) && - F.getName().startswith(Prefix) && - !F.hasFnAttribute(Attribute::OptimizeNone)) - F.removeFnAttr(Attribute::NoInline); - } - } - // TODO: We should attach the attributes defined in OMPKinds.def. } diff --git a/llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll b/llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll deleted file mode 100644 --- a/llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll +++ /dev/null @@ -1,99 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes -; RUN: opt < %s -S -openmp-opt-cgscc | FileCheck %s -; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s - -declare void @unknown() - -; __kmpc functions -define void @__kmpc_noinline() noinline nounwind { -; CHECK: Function Attrs: nounwind -; CHECK-LABEL: @__kmpc_noinline( -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void -; - call void @unknown() - ret void -} -; omp_X functions -define void @omp_noinline() noinline nounwind { -; CHECK: Function Attrs: nounwind -; CHECK-LABEL: @omp_noinline( -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void -; - call void @unknown() - ret void -} -; _OMP namespace -define void @_ZN4_OMP_noinline() noinline nounwind { -; CHECK: Function Attrs: nounwind -; CHECK-LABEL: @_ZN4_OMP_noinline( -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void -; - call void @unknown() - ret void -} - -; Negative tests: - -define void @__kmpc_noinline_optnone() noinline optnone nounwind { -; CHECK: Function Attrs: noinline nounwind optnone -; CHECK-LABEL: @__kmpc_noinline_optnone( -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void -; - call void @unknown() - ret void -} -define void @omp_noinline_optnone() noinline optnone nounwind { -; CHECK: Function Attrs: noinline nounwind optnone -; CHECK-LABEL: @omp_noinline_optnone( -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void -; - call void @unknown() - ret void -} -; _OMP namespace -define void @_ZN4_OMP_noinline_optnone() noinline optnone nounwind { -; CHECK: Function Attrs: noinline nounwind optnone -; CHECK-LABEL: @_ZN4_OMP_noinline_optnone( -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void -; - call void @unknown() - ret void -} -define void @a___kmpc_noinline() noinline nounwind { -; CHECK: Function Attrs: noinline nounwind -; CHECK-LABEL: @a___kmpc_noinline( -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void -; - call void @unknown() - ret void -} -define void @a_omp_noinline() noinline nounwind { -; CHECK: Function Attrs: noinline nounwind -; CHECK-LABEL: @a_omp_noinline( -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void -; - call void @unknown() - ret void -} -define void @a__ZN4_OMP_noinline() noinline nounwind { -; CHECK: Function Attrs: noinline nounwind -; CHECK-LABEL: @a__ZN4_OMP_noinline( -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void -; - call void @unknown() - ret void -} - -!llvm.module.flags = !{!0, !1} - -!0 = !{i32 7, !"openmp", i32 50} -!1 = !{i32 7, !"openmp-device", i32 50} diff --git a/openmp/libomptarget/DeviceRTL/include/Synchronization.h b/openmp/libomptarget/DeviceRTL/include/Synchronization.h --- a/openmp/libomptarget/DeviceRTL/include/Synchronization.h +++ b/openmp/libomptarget/DeviceRTL/include/Synchronization.h @@ -29,15 +29,13 @@ /// Synchronizing threads is allowed even if they all hit different instances of /// `synchronize::threads()`. However, `synchronize::threadsAligned()` is more -/// restrictive in that it requires all threads to hit the same instance. The -/// noinline is removed by the openmp-opt pass and helps to preserve the -/// information till then. +/// restrictive in that it requires all threads to hit the same instance. ///{ #pragma omp begin assumes ext_aligned_barrier /// Synchronize all threads in a block, they are are reaching the same /// instruction (hence all threads in the block are "aligned"). -__attribute__((noinline)) void threadsAligned(); +void threadsAligned(); #pragma omp end assumes ///} diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp --- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -289,17 +289,17 @@ ///} extern "C" { -__attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() { +uint32_t __kmpc_get_hardware_thread_id_in_block() { FunctionTracingRAII(); return mapping::getThreadIdInBlock(); } -__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() { +uint32_t __kmpc_get_hardware_num_threads_in_block() { FunctionTracingRAII(); return impl::getNumHardwareThreadsInBlock(); } -__attribute__((noinline)) uint32_t __kmpc_get_warp_size() { +uint32_t __kmpc_get_warp_size() { FunctionTracingRAII(); return impl::getWarpSize(); } diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp --- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp @@ -243,8 +243,7 @@ __kmpc_end_sharing_variables(); } -__attribute__((noinline)) bool -__kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) { +bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) { FunctionTracingRAII(); // Work function and arguments for L1 parallel region. *WorkFn = state::ParallelRegionFn; @@ -259,7 +258,7 @@ return ThreadIsActive; } -__attribute__((noinline)) void __kmpc_kernel_end_parallel() { +void __kmpc_kernel_end_parallel() { FunctionTracingRAII(); // In case we have modified an ICV for this thread before a ThreadState was // created. We drop it now to not contaminate the next parallel region. diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -393,12 +393,12 @@ } extern "C" { -__attribute__((noinline)) void *__kmpc_alloc_shared(uint64_t Bytes) { +void *__kmpc_alloc_shared(uint64_t Bytes) { FunctionTracingRAII(); return memory::allocShared(Bytes, "Frontend alloc shared"); } -__attribute__((noinline)) void __kmpc_free_shared(void *Ptr, uint64_t Bytes) { +void __kmpc_free_shared(void *Ptr, uint64_t Bytes) { FunctionTracingRAII(); memory::freeShared(Ptr, Bytes, "Frontend free shared"); } diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp --- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp @@ -358,14 +358,12 @@ impl::namedBarrier(); } -__attribute__((noinline)) void __kmpc_barrier_simple_spmd(IdentTy *Loc, - int32_t TId) { +void __kmpc_barrier_simple_spmd(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); synchronize::threadsAligned(); } -__attribute__((noinline)) void __kmpc_barrier_simple_generic(IdentTy *Loc, - int32_t TId) { +void __kmpc_barrier_simple_generic(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); synchronize::threads(); }