diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -176,6 +176,34 @@ __OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) __OMP_RTL(omp_get_thread_num, false, Int32, ) +__OMP_RTL(omp_get_num_threads, false, Int32, ) +__OMP_RTL(omp_get_max_threads, false, Int32, ) +__OMP_RTL(omp_in_parallel, false, Int32, ) +__OMP_RTL(omp_get_dynamic, false, Int32, ) +__OMP_RTL(omp_get_cancellation, false, Int32, ) +__OMP_RTL(omp_get_nested, false, Int32, ) +__OMP_RTL(omp_get_schedule, false, Void, Int32Ptr, Int32Ptr) +__OMP_RTL(omp_get_thread_limit, false, Int32, ) +__OMP_RTL(omp_get_supported_active_levels, false, Int32, ) +__OMP_RTL(omp_get_max_active_levels, false, Int32, ) +__OMP_RTL(omp_get_level, false, Int32, ) +__OMP_RTL(omp_get_ancestor_thread_num, false, Int32, ) +__OMP_RTL(omp_get_team_size, false, Int32, ) +__OMP_RTL(omp_get_active_level, false, Int32, ) +__OMP_RTL(omp_in_final, false, Int32, ) +__OMP_RTL(omp_get_proc_bind, false, Int32, ) +__OMP_RTL(omp_get_num_places, false, Int32, ) +__OMP_RTL(omp_get_num_procs, false, Int32, ) +__OMP_RTL(omp_get_place_proc_ids, false, Void, Int32, Int32Ptr) +__OMP_RTL(omp_get_place_num, false, Int32, ) +__OMP_RTL(omp_get_partition_num_places, false, Int32, ) +__OMP_RTL(omp_get_partition_place_nums, false, Int32, ) + +__OMP_RTL(omp_set_num_threads, false, Void, Int32) +__OMP_RTL(omp_set_dynamic, false, Void, Int32) +__OMP_RTL(omp_set_nested, false, Void, Int32) +__OMP_RTL(omp_set_schedule, false, Void, Int32, Int32) +__OMP_RTL(omp_set_max_active_levels, false, Void, Int32) __OMP_RTL(__last, false, Void, ) @@ -197,6 +225,16 @@ ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(ReadOnly), EnumAttr(NoSync), EnumAttr(NoFree)) : AttributeSet(EnumAttr(NoUnwind))) +__OMP_ATTRS_SET(GetterArgWriteAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), + EnumAttr(NoFree)) + : AttributeSet(EnumAttr(NoUnwind))) +__OMP_ATTRS_SET(SetterAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(WriteOnly), + EnumAttr(NoSync), EnumAttr(NoFree)) + : AttributeSet(EnumAttr(NoUnwind))) #undef __OMP_ATTRS_SET #undef OMP_ATTRS_SET @@ -213,6 +251,41 @@ __OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_schedule, GetterArgWriteAttrs, AttributeSet(), + ArrayRef( + {AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)), + AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly))})) +__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_supported_active_levels, GetterAttrs, AttributeSet(), + {}) +__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_place_proc_ids, GetterArgWriteAttrs, AttributeSet(), + ArrayRef({AttributeSet(), + AttributeSet(EnumAttr(NoCapture), + EnumAttr(WriteOnly))})) +__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(), {}) + +__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(), {}) #undef __OMP_RTL_ATTRS #undef OMP_RTL_ATTRS diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/CallSite.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" @@ -52,9 +53,10 @@ SmallPtrSetImpl &ModuleSlice, CallGraphUpdater &CGUpdater) : M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice), - CGUpdater(CGUpdater) { + OMPBuilder(M), CGUpdater(CGUpdater) { initializeTypes(M); initializeRuntimeFunctions(); + OMPBuilder.initialize(); } /// Generic information that describes a runtime function @@ -118,12 +120,36 @@ bool deduplicateRuntimeCalls() { bool Changed = false; + RuntimeFunction DeduplicableRuntimeCallIDs[] = { + OMPRTL_omp_get_num_threads, + OMPRTL_omp_in_parallel, + OMPRTL_omp_get_cancellation, + OMPRTL_omp_get_thread_limit, + OMPRTL_omp_get_supported_active_levels, + OMPRTL_omp_get_level, + OMPRTL_omp_get_ancestor_thread_num, + OMPRTL_omp_get_team_size, + OMPRTL_omp_get_active_level, + OMPRTL_omp_in_final, + OMPRTL_omp_get_proc_bind, + OMPRTL_omp_get_num_places, + OMPRTL_omp_get_num_procs, + OMPRTL_omp_get_place_num, + OMPRTL_omp_get_partition_num_places, + OMPRTL_omp_get_partition_place_nums}; + + // Global-tid is handled separatly. SmallSetVector GTIdArgs; collectGlobalThreadIdArguments(GTIdArgs); LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() << " global thread ID arguments\n"); for (Function *F : SCC) { + for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) + deduplicateRuntimeCalls(*F, RFIs[DeduplicableRuntimeCallID]); + + // __kmpc_global_thread_num is special as we can replace it with an + // argument in enough cases to make it worth trying. Value *GTIdArg = nullptr; for (Argument &Arg : F->args()) if (GTIdArgs.count(&Arg)) { @@ -132,7 +158,6 @@ } Changed |= deduplicateRuntimeCalls( *F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); - Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_thread_num]); } return Changed; @@ -259,6 +284,7 @@ unsigned NumUses = 0; if (!RFI.Declaration) return NumUses; + OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); NumOpenMPRuntimeFunctionsIdentified += 1; NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); @@ -312,6 +338,9 @@ /// The slice of the module we are allowed to look at. SmallPtrSetImpl &ModuleSlice; + /// An OpenMP-IR-Builder instance + OpenMPIRBuilder OMPBuilder; + /// Callback to update the call graph, the first argument is a removed call, /// the second an optional replacement call. CallGraphUpdater &CGUpdater;