diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -97,6 +97,11 @@ cl::desc("Print the current module after OpenMP optimizations."), cl::Hidden, cl::init(false)); +static cl::opt AlwaysInlineDeviceFunctions( + "openmp-opt-inline-device", cl::ZeroOrMore, + cl::desc("Inline all applicible functions on the device."), + cl::Hidden, cl::init(false)); + STATISTIC(NumOpenMPRuntimeCallsDeduplicated, "Number of OpenMP runtime calls deduplicated"); STATISTIC(NumOpenMPParallelRegionsDeleted, @@ -4481,6 +4486,13 @@ OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(true); + // Optionally inline device functions for potentially better performance. + if (AlwaysInlineDeviceFunctions) + if (isOpenMPDevice(M)) + for (Function &F : M) + if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::NoInline)) + F.addFnAttr(Attribute::AlwaysInline); + if (PrintModuleAfterOptimizations) LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M);