diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -144,9 +144,11 @@ __OMP_TYPE(Void) __OMP_TYPE(Int1) __OMP_TYPE(Int8) +__OMP_TYPE(Int16) __OMP_TYPE(Int32) __OMP_TYPE(Int64) __OMP_TYPE(Int8Ptr) +__OMP_TYPE(Int16Ptr) __OMP_TYPE(Int32Ptr) __OMP_TYPE(Int64Ptr) @@ -500,6 +502,13 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr, /* Int */ Int32, /* kmp_task_t */ VoidPtr) +__OMP_RTL(__kmpc_data_sharing_init_stack, false, Void, ) +__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, ) +__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, + Int16) +__OMP_RTL(__kmpc_data_sharing_push_stack, false, VoidPtr, SizeTy, Int16) +__OMP_RTL(__kmpc_data_sharing_pop_stack, false, Void, VoidPtr) + /// Note that device runtime functions (in the following) do not necessarily /// need attributes as we expect to see the definitions. __OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -476,6 +476,12 @@ : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} + /// Check if any remarks are enabled for openmp-opt + bool remarksEnabled() { + auto &Ctx = M.getContext(); + return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE); + } + /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. bool run() { if (SCC.empty()) @@ -503,6 +509,8 @@ Changed |= deleteParallelRegions(); if (HideMemoryTransferLatency) Changed |= hideMemTransfersLatency(); + if (remarksEnabled()) + analysisGlobalization(); return Changed; } @@ -695,6 +703,27 @@ return Changed; } + void analysisGlobalization() { + auto &RFI = + OMPInfoCache.RFIs[OMPRTL___kmpc_data_sharing_coalesced_push_stack]; + for (Function *F : SCC) { + auto *UV = RFI.getUseVector(*F); + if (!UV) + return; + for (Use *U : *UV) { + if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { + auto Remark = [&](OptimizationRemarkAnalysis ORA) { + return ORA << "Found thread data sharing on the GPU. Expect " + << "degraded performance due to data globalization."; + }; + emitRemark(CI, "OpenMPGlobalization", + Remark); + } + } + } + return; + } + /// Maps the values stored in the offload arrays passed as arguments to /// \p RuntimeCall into the offload arrays in \p OAs. bool getValuesInOffloadArrays(CallInst &RuntimeCall,