diff --git a/.gitignore b/.gitignore --- a/.gitignore +++ b/.gitignore @@ -70,3 +70,4 @@ /clang/utils/analyzer/projects/*/RefScanBuildResults # automodapi puts generated documentation files here. /lldb/docs/python_api/ +/clang_build/* diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -107,6 +107,11 @@ cl::desc("Disable OpenMP optimizations that eliminate barriers."), cl::Hidden, cl::init(false)); +static cl::opt DisableOpenMPOptParallelCodeMotion( + "openmp-opt-disable-code-motion", cl::ZeroOrMore, + cl::desc("Disable OpenMP optimizations that perform code motion."), + cl::Hidden, cl::init(false)); + static cl::opt PrintModuleAfterOptimizations( "openmp-opt-print-module-after", cl::desc("Print the current module after OpenMP optimizations."), @@ -167,6 +172,8 @@ STATISTIC(NumBytesMovedToSharedMemory, "Amount of memory pushed to shared memory"); STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated"); +STATISTIC(NumOpenMPParallelRegionsForCodeMotion, + "Number of OpenMP parallel regions for Code Motion"); #if !defined(NDEBUG) static constexpr auto TAG = "[" DEBUG_TYPE "]"; @@ -844,6 +851,7 @@ } Changed |= eliminateBarriers(); + Changed |= parallelCodeMotion(); } return Changed; @@ -911,6 +919,74 @@ } private: + bool parallelCodeMotion() { + bool Changed = false; + const unsigned CallbackCalleeOperand = 2; + + if (DisableOpenMPOptParallelCodeMotion) + return false; + + OMPInformationCache::RuntimeFunctionInfo &RFI = + OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; + + if (!RFI.Declaration) + return false; + + SmallDenseMap> BB2PRMap; + SmallPtrSet InstructionsToBeMoved; + + auto SelectCMInst = [&](Use &U, Function &F) { + CallInst *CI = getCallIfRegularCall(U); + if (!CI) + return false; + + auto *Fn = dyn_cast( + CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); + + for (BasicBlock &BB : Fn->getBasicBlockList()) { + for (Instruction &I : BB) { + + /// check for constant operand operations + auto CheckConstantOperands = [&](Instruction &I) { + int operandCount = I.getNumOperands(); + for (int i = 0; i < operandCount; i++) { + if (!dyn_cast(I.getOperand(i))) { + return false; + } + } + return true; + }; + + /// choosing instruction for code motion + if (!I.mayThrow() && !I.mayReadOrWriteMemory() && + isSafeToSpeculativelyExecute(&I) && CheckConstantOperands(I)) { + InstructionsToBeMoved.insert(&I); + continue; + } + } + } + + if (!InstructionsToBeMoved.empty()) { + Changed = true; + NumOpenMPParallelRegionsForCodeMotion++; + } + + /// moves instructions before the callsite for __kmpc_fork_call + for (auto instruction : InstructionsToBeMoved) { + instruction->moveBefore(CI); + } + return false; + }; + + RFI.foreachUse(SCC, SelectCMInst); + + for (auto instr : InstructionsToBeMoved) { + LLVM_DEBUG(dbgs() << TAG << "Instruction for CodeMotion : " + << instr->dump() << "\n"); + } + return Changed; + } + /// Merge parallel regions when it is safe. bool mergeParallelRegions() { const unsigned CallbackCalleeOperand = 2;