diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -49,6 +49,16 @@ #define OMP_RTL(Enum, ...) constexpr auto Enum = omp::RuntimeFunction::Enum; #include "llvm/Frontend/OpenMP/OMPKinds.def" +/// IDs for the different proc bind kinds. +enum class ProcBindKind { +#define OMP_PROC_BIND_KIND(Enum, Str, Value) Enum = Value, +#include "llvm/Frontend/OpenMP/OMPKinds.def" +}; + +#define OMP_PROC_BIND_KIND(Enum, ...) \ + constexpr auto Enum = omp::ProcBindKind::Enum; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + /// IDs for all omp runtime library ident_t flag encodings (see /// their defintion in openmp/runtime/src/kmp.h). enum class IdentFlag { @@ -67,8 +77,8 @@ StringRef getOpenMPDirectiveName(Directive D); /// Forward declarations for LLVM-IR types (simple, function and structure) are -/// generated below. Their names are defined and used in OpenMPKinds.def. Here -/// we provide the forward declarations, the initializeTypes function will +/// generated below. Their names are defined and used in OpenMP/OMPKinds.def. +/// Here we provide the forward declarations, the initializeTypes function will /// provide the values. /// ///{ @@ -83,10 +93,10 @@ extern PointerType *VarName##Ptr; #include "llvm/Frontend/OpenMP/OMPKinds.def" -/// Helper to initialize all types defined in OpenMPKinds.def. +/// Helper to initialize all types defined in OpenMP/OMPKinds.def. void initializeTypes(Module &M); -/// Helper to uninitialize all types defined in OpenMPKinds.def. +/// Helper to uninitialize all types defined in OpenMP/OMPKinds.def. void uninitializeTypes(); } // namespace types diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -75,6 +75,40 @@ /// NOTE: Temporary solution until Clang CG is gone. void popFinalizationCB() { FinalizationStack.pop_back(); } + /// Callback type for body (=inner region) code generation + /// + /// The callback takes code locations as arguments, each describing a + /// location at which code might need to be generated or a location that is + /// the target of control transfer. + /// + /// \param AllocaIP is the insertion point at which new alloca instructions + /// should be placed. + /// \param CodeGenIP is the insertion point at which the body code should be + /// placed. + /// \param ContinuationBB is the basic block target to leave the body. + /// + /// Note that all blocks pointed to by the arguments have terminators. + using BodyGenCallbackTy = function_ref; + + /// Callback type for variable privatization (think copy & default + /// constructor). + /// + /// \param AllocaIP is the insertion point at which new alloca instructions + /// should be placed. + /// \param CodeGenIP is the insertion point at which the privatization code + /// should be placed. + /// \param Val The value beeing copied/created. + /// \param ReplVal The replacement value, thus a copy or new created version + /// of \p Val. + /// + /// \returns The new insertion point where code generation continues and + /// \p ReplVal the replacement of \p Val. + using PrivatizeCallbackTy = function_ref; + /// Description of a LLVM-IR insertion point (IP) and a debug/source location /// (filename, line, column, ...). struct LocationDescription { @@ -105,6 +139,24 @@ bool ForceSimpleCall = false, bool CheckCancelFlag = true); + /// Generator for '#omp parallel' + /// + /// \param Loc The insert and source location description. + /// \param BodyGenCB Callback that will generate the region code. + /// \param PrivCB Callback to copy a given variable (think copy constructor). + /// \param FiniCB Callback to finalize variable copies. + /// \param IfCondition The evaluated 'if' clause expression, if any. + /// \param NumThreads The evaluated 'num_threads' clause expression, if any. + /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). + /// \param IsCancellable Flag to indicate a cancellable parallel region. + /// + /// \returns The insertion position *after* the parallel. + IRBuilder<>::InsertPoint + CreateParallel(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, + PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, + Value *IfCondition, Value *NumThreads, + omp::ProcBindKind ProcBind, bool IsCancellable); + ///} private: diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -167,6 +167,11 @@ __OMP_RTL(__kmpc_cancel_barrier, false, Int32, IdentPtr, Int32) __OMP_RTL(__kmpc_global_thread_num, false, Int32, IdentPtr) __OMP_RTL(__kmpc_fork_call, true, Void, IdentPtr, Int32, ParallelTaskPtr) +__OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32, /* Int */Int32) +__OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */Int32) +__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) + __OMP_RTL(omp_get_thread_num, false, Int32, ) #undef __OMP_RTL @@ -234,3 +239,26 @@ #undef OMP_IDENT_FLAG ///} + + +/// Proc bind kinds +/// +///{ + +#ifndef OMP_PROC_BIND_KIND +#define OMP_PROC_BIND_KIND(Enum, Str, Value) +#endif + +#define __OMP_PROC_BIND_KIND(Name, Value) \ + OMP_PROC_BIND_KIND(OMP_PB_##Name, #Name, Value) + +__OMP_PROC_BIND_KIND(master, 2) +__OMP_PROC_BIND_KIND(close, 3) +__OMP_PROC_BIND_KIND(spread, 4) +__OMP_PROC_BIND_KIND(default, 6) +__OMP_PROC_BIND_KIND(unknown, 7) + +#undef __OMP_PROC_BIND_KIND +#undef OMP_PROC_BIND_KIND + +///} diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -16,10 +16,13 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" #include @@ -216,8 +219,17 @@ if (UseCancelBarrier && CheckCancelFlag) { // For a cancel barrier we create two new blocks. BasicBlock *BB = Builder.GetInsertBlock(); - BasicBlock *NonCancellationBlock = BasicBlock::Create( - BB->getContext(), BB->getName() + ".cont", BB->getParent()); + BasicBlock *NonCancellationBlock; + if (Builder.GetInsertPoint() == BB->end()) { + // TODO: This branch will not be needed once we moved to the + // OpenMPIRBuilder codegen completely. + NonCancellationBlock = BasicBlock::Create( + BB->getContext(), BB->getName() + ".cont", BB->getParent()); + } else { + NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint()); + BB->getTerminator()->eraseFromParent(); + Builder.SetInsertPoint(BB); + } BasicBlock *CancellationBlock = BasicBlock::Create( BB->getContext(), BB->getName() + ".cncl", BB->getParent()); @@ -233,8 +245,310 @@ FI.FiniCB(Builder.saveIP()); // The continuation block is where code generation continues. - Builder.SetInsertPoint(NonCancellationBlock); + Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); } return Builder.saveIP(); } + +IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( + const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, + PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, + Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) { + if (!updateToLocation(Loc)) + return Loc.IP; + + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadID = getOrCreateThreadID(Ident); + + if (NumThreads) { + // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads) + Value *Args[] = { + Ident, ThreadID, + Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)}; + Builder.CreateCall( + getOrCreateRuntimeFunction(OMPRTL___kmpc_push_num_threads), Args); + } + + if (ProcBind != OMP_PB_default) { + // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind) + Value *Args[] = { + Ident, ThreadID, + ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)}; + Builder.CreateCall(getOrCreateRuntimeFunction(OMPRTL___kmpc_push_proc_bind), + Args); + } + + BasicBlock *InsertBB = Builder.GetInsertBlock(); + Function *OuterFn = InsertBB->getParent(); + + // Vector to remember instructions we used only during the modeling but which + // we want to delete at the end. + SmallVector ToBeDeleted; + + Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI()); + AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); + AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); + + // If there is an if condition we actually use the TIDAddr and ZeroAddr in the + // program, otherwise we only need them for modeling purposes to get the + // associated arguments in the outlined function. In the former case, + // initialize the allocas properly, in the latter case, delete them later. + if (IfCondition) { + Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr); + Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); + } else { + ToBeDeleted.push_back(TIDAddr); + ToBeDeleted.push_back(ZeroAddr); + } + + // Create an artificial insertion point that will also ensure the blocks we + // are about to split are not degenerated. + auto *UI = new UnreachableInst(Builder.getContext(), InsertBB); + + Instruction *ThenTI = UI, *ElseTI = nullptr; + if (IfCondition) + SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); + + BasicBlock *ThenBB = ThenTI->getParent(); + BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry"); + BasicBlock *PRegBodyBB = + PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region"); + BasicBlock *PRegPreFiniBB = + PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize"); + BasicBlock *PRegExitBB = + PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); + + auto FiniCBWrapper = [&](InsertPointTy IP) { + // Hide "open-ended" blocks from the given FiniCB by setting the right jump + // target to the region exit block. + if (IP.getBlock()->end() == IP.getPoint()) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(IP); + Instruction *I = Builder.CreateBr(PRegExitBB); + IP = InsertPointTy(I->getParent(), I->getIterator()); + } + assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 && + IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB && + "Unexpected insertion point for finalization call!"); + return FiniCB(IP); + }; + + FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); + + // Generate the privatization allocas in the block that will become the entry + // of the outlined function. + InsertPointTy AllocaIP(PRegEntryBB, + PRegEntryBB->getTerminator()->getIterator()); + Builder.restoreIP(AllocaIP); + AllocaInst *PrivTIDAddr = + Builder.CreateAlloca(Int32, nullptr, "tid.addr.local"); + Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid"); + + // Add some fake uses for OpenMP provided arguments. + ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use")); + ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use")); + + // ThenBB + // | + // V + // PRegionEntryBB <- Privatization allocas are placed here. + // | + // V + // PRegionBodyBB <- BodeGen is invoked here. + // | + // V + // PRegPreFiniBB <- The block we will start finalization from. + // | + // V + // PRegionExitBB <- A common exit to simplify block collection. + // + + LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n"); + + // Let the caller create the body. + assert(BodyGenCB && "Expected body generation callback!"); + InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); + BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB); + + LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n"); + + SmallPtrSet ParallelRegionBlockSet; + SmallVector ParallelRegionBlocks, Worklist; + ParallelRegionBlockSet.insert(PRegEntryBB); + ParallelRegionBlockSet.insert(PRegExitBB); + + // Collect all blocks in-between PRegEntryBB and PRegExitBB. + Worklist.push_back(PRegEntryBB); + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + ParallelRegionBlocks.push_back(BB); + for (BasicBlock *SuccBB : successors(BB)) + if (ParallelRegionBlockSet.insert(SuccBB).second) + Worklist.push_back(SuccBB); + } + + CodeExtractorAnalysisCache CEAC(*OuterFn); + CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr, + /* AggregateArgs */ false, + /* BlockFrequencyInfo */ nullptr, + /* BranchProbabilityInfo */ nullptr, + /* AssumptionCache */ nullptr, + /* AllowVarArgs */ true, + /* AllowAlloca */ true, + /* Suffix */ ".omp_par"); + + // Find inputs to, outputs from the code region. + BasicBlock *CommonExit = nullptr; + SetVector Inputs, Outputs, SinkingCands, HoistingCands; + Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); + Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); + + LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n"); + + FunctionCallee TIDRTLFn = + getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num); + + auto PrivHelper = [&](Value &V) { + if (&V == TIDAddr || &V == ZeroAddr) + return; + + SmallVector Uses; + for (Use &U : V.uses()) + if (auto *UserI = dyn_cast(U.getUser())) + if (ParallelRegionBlockSet.count(UserI->getParent())) + Uses.push_back(&U); + + Value *ReplacementValue = nullptr; + CallInst *CI = dyn_cast(&V); + if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) { + ReplacementValue = PrivTID; + } else { + Builder.restoreIP( + PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue)); + assert(ReplacementValue && + "Expected copy/create callback to set replacement value!"); + if (ReplacementValue == &V) + return; + } + + for (Use *UPtr : Uses) + UPtr->set(ReplacementValue); + }; + + for (Value *Input : Inputs) { + LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); + PrivHelper(*Input); + } + for (Value *Output : Outputs) { + LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); + PrivHelper(*Output); + } + + LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n"); + LLVM_DEBUG({ + for (auto *BB : ParallelRegionBlocks) + dbgs() << " PBR: " << BB->getName() << "\n"; + }); + + Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); + LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); + + // Remove the artificial entry introduced by the extractor right away, we + // made our own entry block after all. + { + BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); + assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB); + assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry); + PRegEntryBB->moveBefore(&ArtificialEntry); + ArtificialEntry.eraseFromParent(); + } + LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n"); + assert(&OutlinedFn->getEntryBlock() == PRegEntryBB); + + assert(OutlinedFn && OutlinedFn->getNumUses() == 1); + assert(OutlinedFn->arg_size() >= 2 && + "Expected at least tid and bounded tid as arguments"); + unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2; + + CallInst *CI = cast(OutlinedFn->user_back()); + CI->getParent()->setName("omp_parallel"); + Builder.SetInsertPoint(CI); + + // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); + Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars), + Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)}; + + SmallVector RealArgs; + RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); + RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); + + FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call); + Builder.CreateCall(RTLFn, RealArgs); + + LLVM_DEBUG(dbgs() << "With fork_call placed: " + << *Builder.GetInsertBlock()->getParent() << "\n"); + + InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); + InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); + UI->eraseFromParent(); + + // Initialize the local TID stack location with the argument value. + Builder.SetInsertPoint(PrivTID); + Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin(); + Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); + + // If no "if" clause was present we do not need the call created during + // outlining, otherwise we reuse it in the serialized parallel region. + if (!ElseTI) { + CI->eraseFromParent(); + } else { + + // If an "if" clause was present we are now generating the serialized + // version into the "else" branch. + Builder.SetInsertPoint(ElseTI); + + // Build calls __kmpc_serialized_parallel(&Ident, GTid); + Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel), + SerializedParallelCallArgs); + + // OutlinedFn(>id, &zero, CapturedStruct); + CI->removeFromParent(); + Builder.Insert(CI); + + // __kmpc_end_serialized_parallel(&Ident, GTid); + Value *EndArgs[] = {Ident, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel), + EndArgs); + + LLVM_DEBUG(dbgs() << "With serialized parallel region: " + << *Builder.GetInsertBlock()->getParent() << "\n"); + } + + // Adjust the finalization stack, verify the adjustment, and call the + // finalize function a last time to finalize values between the pre-fini block + // and the exit block if we left the parallel "the normal way". + auto FiniInfo = FinalizationStack.pop_back_val(); + (void)FiniInfo; + assert(FiniInfo.DK == OMPD_parallel && + "Unexpected finalization stack state!"); + + Instruction *PreFiniTI = PRegPreFiniBB->getTerminator(); + assert(PreFiniTI->getNumSuccessors() == 1 && + PreFiniTI->getSuccessor(0)->size() == 1 && + isa(PreFiniTI->getSuccessor(0)->getTerminator()) && + "Unexpected CFG structure!"); + + InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator()); + FiniCB(PreFiniIP); + + for (Instruction *I : ToBeDeleted) + I->eraseFromParent(); + + return AfterIP; +} diff --git a/llvm/unittests/Frontend/CMakeLists.txt b/llvm/unittests/Frontend/CMakeLists.txt --- a/llvm/unittests/Frontend/CMakeLists.txt +++ b/llvm/unittests/Frontend/CMakeLists.txt @@ -4,6 +4,7 @@ FrontendOpenMP Support Passes + TransformUtils ) add_llvm_unittest(LLVMFrontendTests diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/Module.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/IR/Verifier.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "gtest/gtest.h" using namespace llvm; @@ -99,20 +100,18 @@ } TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize(); BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); new UnreachableInst(Ctx, CBB); - auto FiniCB = [CBB](llvm::OpenMPIRBuilder::InsertPointTy IP) { - assert(IP.getBlock()->end() == IP.getPoint() && - "Clang CG should cause non-terminated block!"); + auto FiniCB = [&](InsertPointTy IP) { + ASSERT_NE(IP.getBlock(), nullptr); + ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); BranchInst::Create(CBB, IP.getBlock()); }; - // Emulate an outer parallel. - llvm::OpenMPIRBuilder::FinalizationInfo FI( - {FiniCB, OMPD_parallel, /* HasCancel */ true}); - OMPBuilder.pushFinalizationCB(std::move(FI)); + OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); IRBuilder<> Builder(BB); @@ -141,6 +140,7 @@ Instruction *BarrierBBTI = Barrier->getParent()->getTerminator(); EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U); EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock()); + EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U); EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 1U); EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), @@ -188,4 +188,309 @@ return; EXPECT_EQ(SrcSrc->getAsCString(), ";test.dbg;foo;3;7;;"); } + +TEST_F(OpenMPIRBuilderTest, ParallelSimple) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + AllocaInst *PrivAI = nullptr; + + unsigned NumBodiesGenerated = 0; + unsigned NumPrivatizedVars = 0; + unsigned NumFinalizationPoints = 0; + + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumBodiesGenerated; + + Builder.restoreIP(AllocaIP); + PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); + Builder.CreateStore(F->arg_begin(), PrivAI); + + Builder.restoreIP(CodeGenIP); + Value *PrivLoad = Builder.CreateLoad(PrivAI, "local.use"); + Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); + Instruction *ThenTerm, *ElseTerm; + SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), + &ThenTerm, &ElseTerm); + + Builder.SetInsertPoint(ThenTerm); + Builder.CreateBr(&ContinuationIP); + ThenTerm->eraseFromParent(); + }; + + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { + ++NumPrivatizedVars; + + if (!isa(VPtr)) { + EXPECT_EQ(&VPtr, F->arg_begin()); + ReplacementValue = &VPtr; + return CodeGenIP; + } + + // Trivial copy (=firstprivate). + Builder.restoreIP(AllocaIP); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(V, ReplacementValue); + return CodeGenIP; + }; + + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + + IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel( + Loc, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PB_default, false); + + EXPECT_EQ(NumBodiesGenerated, 1U); + EXPECT_EQ(NumPrivatizedVars, 1U); + EXPECT_EQ(NumFinalizationPoints, 1U); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + EXPECT_NE(PrivAI, nullptr); + Function *OutlinedFn = PrivAI->getFunction(); + EXPECT_NE(F, OutlinedFn); + EXPECT_FALSE(verifyModule(*M)); + + EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); + EXPECT_EQ(OutlinedFn->arg_size(), 3U); + + EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); + EXPECT_EQ(OutlinedFn->getNumUses(), 1U); + User *Usr = OutlinedFn->user_back(); + ASSERT_TRUE(isa(Usr)); + CallInst *ForkCI = dyn_cast(Usr->user_back()); + ASSERT_NE(ForkCI, nullptr); + + EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); + EXPECT_EQ(ForkCI->getNumArgOperands(), 4U); + EXPECT_TRUE(isa(ForkCI->getArgOperand(0))); + EXPECT_EQ(ForkCI->getArgOperand(1), + ConstantInt::get(Type::getInt32Ty(Ctx), 1U)); + EXPECT_EQ(ForkCI->getArgOperand(2), Usr); + EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin()); +} + +TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + AllocaInst *PrivAI = nullptr; + + unsigned NumBodiesGenerated = 0; + unsigned NumPrivatizedVars = 0; + unsigned NumFinalizationPoints = 0; + + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumBodiesGenerated; + + Builder.restoreIP(AllocaIP); + PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); + Builder.CreateStore(F->arg_begin(), PrivAI); + + Builder.restoreIP(CodeGenIP); + Value *PrivLoad = Builder.CreateLoad(PrivAI, "local.use"); + Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); + Instruction *ThenTerm, *ElseTerm; + SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), + &ThenTerm, &ElseTerm); + + Builder.SetInsertPoint(ThenTerm); + Builder.CreateBr(&ContinuationIP); + ThenTerm->eraseFromParent(); + }; + + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { + ++NumPrivatizedVars; + + if (!isa(VPtr)) { + EXPECT_EQ(&VPtr, F->arg_begin()); + ReplacementValue = &VPtr; + return CodeGenIP; + } + + // Trivial copy (=firstprivate). + Builder.restoreIP(AllocaIP); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(V, ReplacementValue); + return CodeGenIP; + }; + + auto FiniCB = [&](InsertPointTy CodeGenIP) { + ++NumFinalizationPoints; + // No destructors. + }; + + IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel( + Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()), + nullptr, OMP_PB_default, false); + + EXPECT_EQ(NumBodiesGenerated, 1U); + EXPECT_EQ(NumPrivatizedVars, 1U); + EXPECT_EQ(NumFinalizationPoints, 1U); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + EXPECT_NE(PrivAI, nullptr); + Function *OutlinedFn = PrivAI->getFunction(); + EXPECT_NE(F, OutlinedFn); + EXPECT_FALSE(verifyModule(*M, &errs())); + + EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); + EXPECT_EQ(OutlinedFn->arg_size(), 3U); + + EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); + ASSERT_EQ(OutlinedFn->getNumUses(), 2U); + + CallInst *DirectCI = nullptr; + CallInst *ForkCI = nullptr; + for (User *Usr : OutlinedFn->users()) { + if (isa(Usr)) { + ASSERT_EQ(DirectCI, nullptr); + DirectCI = cast(Usr); + } else { + ASSERT_TRUE(isa(Usr)); + ASSERT_EQ(Usr->getNumUses(), 1U); + ASSERT_TRUE(isa(Usr->user_back())); + ForkCI = cast(Usr->user_back()); + } + } + + EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); + EXPECT_EQ(ForkCI->getNumArgOperands(), 4U); + EXPECT_TRUE(isa(ForkCI->getArgOperand(0))); + EXPECT_EQ(ForkCI->getArgOperand(1), + ConstantInt::get(Type::getInt32Ty(Ctx), 1)); + EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin()); + + EXPECT_EQ(DirectCI->getCalledFunction(), OutlinedFn); + EXPECT_EQ(DirectCI->getNumArgOperands(), 3U); + EXPECT_TRUE(isa(DirectCI->getArgOperand(0))); + EXPECT_TRUE(isa(DirectCI->getArgOperand(1))); + EXPECT_EQ(DirectCI->getArgOperand(2), F->arg_begin()); +} + +TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + unsigned NumBodiesGenerated = 0; + unsigned NumPrivatizedVars = 0; + unsigned NumFinalizationPoints = 0; + + CallInst *CheckedBarrier = nullptr; + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumBodiesGenerated; + + Builder.restoreIP(CodeGenIP); + + // Create three barriers, two cancel barriers but only one checked. + Function *CBFn, *BFn; + + Builder.restoreIP( + OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel)); + + CBFn = M->getFunction("__kmpc_cancel_barrier"); + BFn = M->getFunction("__kmpc_barrier"); + ASSERT_NE(CBFn, nullptr); + ASSERT_EQ(BFn, nullptr); + ASSERT_EQ(CBFn->getNumUses(), 1U); + ASSERT_TRUE(isa(CBFn->user_back())); + ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U); + CheckedBarrier = cast(CBFn->user_back()); + + Builder.restoreIP( + OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel, true)); + CBFn = M->getFunction("__kmpc_cancel_barrier"); + BFn = M->getFunction("__kmpc_barrier"); + ASSERT_NE(CBFn, nullptr); + ASSERT_NE(BFn, nullptr); + ASSERT_EQ(CBFn->getNumUses(), 1U); + ASSERT_EQ(BFn->getNumUses(), 1U); + ASSERT_TRUE(isa(BFn->user_back())); + ASSERT_EQ(BFn->user_back()->getNumUses(), 0U); + + Builder.restoreIP(OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel, + false, false)); + ASSERT_EQ(CBFn->getNumUses(), 2U); + ASSERT_EQ(BFn->getNumUses(), 1U); + ASSERT_TRUE(CBFn->user_back() != CheckedBarrier); + ASSERT_TRUE(isa(CBFn->user_back())); + ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U); + }; + + auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, + Value *&) -> InsertPointTy { + ++NumPrivatizedVars; + llvm_unreachable("No privatization callback call expected!"); + }; + + FunctionType *FakeDestructorTy = + FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)}, + /*isVarArg=*/false); + auto *FakeDestructor = Function::Create( + FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get()); + + auto FiniCB = [&](InsertPointTy IP) { + ++NumFinalizationPoints; + Builder.restoreIP(IP); + Builder.CreateCall(FakeDestructor, + {Builder.getInt32(NumFinalizationPoints)}); + }; + + IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel( + Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()), + nullptr, OMP_PB_default, true); + + EXPECT_EQ(NumBodiesGenerated, 1U); + EXPECT_EQ(NumPrivatizedVars, 0U); + EXPECT_EQ(NumFinalizationPoints, 2U); + EXPECT_EQ(FakeDestructor->getNumUses(), 2U); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + EXPECT_FALSE(verifyModule(*M, &errs())); + + BasicBlock *ExitBB = nullptr; + for (const User *Usr : FakeDestructor->users()) { + const CallInst *CI = dyn_cast(Usr); + ASSERT_EQ(CI->getCalledFunction(), FakeDestructor); + ASSERT_TRUE(isa(CI->getNextNode())); + ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U); + if (ExitBB) + ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB); + else + ExitBB = CI->getNextNode()->getSuccessor(0); + ASSERT_EQ(ExitBB->size(), 1U); + ASSERT_TRUE(isa(ExitBB->front())); + } +} + } // namespace