diff --git a/llvm/include/llvm/Frontend/OpenMPConstants.h b/llvm/include/llvm/Frontend/OpenMPConstants.h --- a/llvm/include/llvm/Frontend/OpenMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMPConstants.h @@ -49,6 +49,26 @@ #define OMP_RTL(Enum, ...) constexpr auto Enum = omp::RuntimeFunction::Enum; #include "llvm/Frontend/OpenMPKinds.def" +/// IDs for the different data-sharing attributes. +enum class DataSharingAttribute { +#define OMP_DATA_SHARING_ATTRIBUTE(Enum, Str) Enum, +#include "llvm/Frontend/OpenMPKinds.def" +}; + +#define OMP_DATA_SHARING_ATTRIBUTE(Enum, ...) \ + constexpr auto Enum = omp::DataSharingAttribute::Enum; +#include "llvm/Frontend/OpenMPKinds.def" + +/// IDs for the different proc bind kinds. +enum class ProcBindKind { +#define OMP_PROC_BIND_KIND(Enum, Str, Value) Enum = Value, +#include "llvm/Frontend/OpenMPKinds.def" +}; + +#define OMP_PROC_BIND_KIND(Enum, ...) \ + constexpr auto Enum = omp::ProcBindKind::Enum; +#include "llvm/Frontend/OpenMPKinds.def" + /// IDs for all omp runtime library ident_t flag encodings (see /// their defintion in openmp/runtime/src/kmp.h). enum class IdentFlag { diff --git a/llvm/include/llvm/Frontend/OpenMPKinds.def b/llvm/include/llvm/Frontend/OpenMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMPKinds.def @@ -166,6 +166,11 @@ __OMP_RTL(__kmpc_cancel_barrier, false, Int32, IdentPtr, Int32) __OMP_RTL(__kmpc_global_thread_num, false, Int32, IdentPtr) __OMP_RTL(__kmpc_fork_call, true, Void, IdentPtr, Int32, ParallelTaskPtr) +__OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32, /* Int */Int32) +__OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */Int32) +__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) + __OMP_RTL(omp_get_thread_num, false, Int32, ) #undef __OMP_RTL @@ -233,3 +238,47 @@ #undef OMP_IDENT_FLAG ///} + + +/// Data-sharing attributes +/// +///{ + +#ifndef OMP_DATA_SHARING_ATTRIBUTE +#define OMP_DATA_SHARING_ATTRIBUTE(Enum, Str) +#endif + +#define __OMP_DATA_SHARING_ATTRIBUTE(Name) \ + OMP_DATA_SHARING_ATTRIBUTE(OMP_DSA_##Name, #Name) + +__OMP_DATA_SHARING_ATTRIBUTE(none) +__OMP_DATA_SHARING_ATTRIBUTE(shared) +__OMP_DATA_SHARING_ATTRIBUTE(private) +__OMP_DATA_SHARING_ATTRIBUTE(firstprivate) +__OMP_DATA_SHARING_ATTRIBUTE(lastprivate) + +#undef __OMP_DATA_SHARING_ATTRIBUTE +#undef OMP_DATA_SHARING_ATTRIBUTE + +///} + +/// Proc bind kinds +/// +///{ + +#ifndef OMP_PROC_BIND_KIND +#define OMP_PROC_BIND_KIND(Enum, Str, Value) +#endif + +#define __OMP_PROC_BIND_KIND(Name, Value) \ + OMP_PROC_BIND_KIND(OMP_PB_##Name, #Name, Value) + +__OMP_PROC_BIND_KIND(none, 1) +__OMP_PROC_BIND_KIND(master, 2) +__OMP_PROC_BIND_KIND(close, 3) +__OMP_PROC_BIND_KIND(spread, 4) + +#undef __OMP_PROC_BIND_KIND +#undef OMP_PROC_BIND_KIND + +///} diff --git a/llvm/include/llvm/Transforms/Utils/OpenMPIRBuilder.h b/llvm/include/llvm/Transforms/Utils/OpenMPIRBuilder.h --- a/llvm/include/llvm/Transforms/Utils/OpenMPIRBuilder.h +++ b/llvm/include/llvm/Transforms/Utils/OpenMPIRBuilder.h @@ -43,6 +43,42 @@ /// Type used throughout for insertion points. using InsertPointTy = IRBuilder<>::InsertPoint; + /// Callback type for body (=inner region) code generation + /// + /// The callback takes code locations as arguments, each describing a + /// location at which code might need to be generated or a location that is + /// the target of control transfer. + /// + /// \param AllocaIP is the insertion point at which new alloca instructions + /// should be placed. + /// \param CodeGenIP is the insertion point at which the body code should be + /// placed. + /// \param ContinuationBB is the basic block target to leave the body. + /// + /// Note that all blocks pointed to by the arguments have terminators. + using BodyGenCallbackTy = function_ref; + + /// Callback type for variable privatization (think copy & default + /// constructor). + /// + /// \param AllocaIP is the insertion point at which new alloca instructions + /// should be placed. + /// \param CodeGenIP is the insertion point at which the body code should be + /// placed. + /// \param Val The value beeing copied/created. + /// \param ReplVal The replacement value, thus a copy or new created version + /// of \p Val. + /// + /// \returns The new insertion point where code generation continues and + /// \p ReplVal the replacement of \p Val. + using PrivatizeCallbackTy = function_ref; + + using FinalizeCallbackTy = function_ref; + /// Description of a LLVM-IR insertion point (IP) and a debug/source location /// (filename, line, column, ...). struct LocationDescription { @@ -73,6 +109,24 @@ bool ForceSimpleCall = false, bool CheckCancelFlag = true); + /// Generator for '#omp parallel' + /// + /// \param Loc The insert and source location description. + /// \param BodyGenCB Callback that will generate the region code. + /// \param PrivCB Callback to copy a given variable (think copy constructor). + /// \param FiniCB Callback to finalize variable copies. + /// \param IfCondition The evaluated 'if' clause expression, if any. + /// \param NumThreads The evaluated 'num_threads' clause expression, if any. + /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). + /// + /// \returns The insertion position *after* the parallel. + IRBuilder<>::InsertPoint CreateParallel(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + PrivatizeCallbackTy PrivCB, + FinalizeCallbackTy FiniCB, + Value *IfCondition, Value *NumThreads, + omp::ProcBindKind ProcBind); + ///} private: diff --git a/llvm/lib/Frontend/OpenMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMPIRBuilder.cpp @@ -16,10 +16,13 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" #define DEBUG_TYPE "openmp-ir-builder" @@ -248,3 +251,229 @@ return Builder.saveIP(); } + +IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( + const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, + PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, + Value *NumThreads, omp::ProcBindKind ProcBind) { + if (!updateToLocation(Loc)) + return Loc.IP; + + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadID = getOrCreateThreadID(Ident); + + if (NumThreads) { + // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads) + Value *Args[] = { + Ident, ThreadID, + Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)}; + Builder.CreateCall( + getOrCreateRuntimeFunction(OMPRTL___kmpc_push_num_threads), Args); + } + + if (ProcBind != OMP_PB_none) { + // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind) + Value *Args[] = { + Ident, ThreadID, + ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)}; + Builder.CreateCall(getOrCreateRuntimeFunction(OMPRTL___kmpc_push_proc_bind), + Args); + } + + BasicBlock *InsertBB = Builder.GetInsertBlock(); + Function *OuterFn = InsertBB->getParent(); + + Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI()); + AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); + AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); + Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); + Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr); + + // Create an artifical insertion point that will also ensure the blocks we are + // about to split are not degenerated. + auto *UI = new UnreachableInst(Builder.getContext(), InsertBB); + + Instruction *ThenTI = UI, *ElseTI = nullptr; + if (IfCondition) + SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); + + BasicBlock *ThenBB = ThenTI->getParent(); + BasicBlock *ParalleRegionEntryBB = + ThenBB->splitBasicBlock(ThenTI, "omp.par.entry"); + BasicBlock *ParalleRegionBodyBB = + ParalleRegionEntryBB->splitBasicBlock(ThenTI, "omp.par.region"); + BasicBlock *ParalleRegionExitBB = + ParalleRegionBodyBB->splitBasicBlock(ThenTI, "omp.par.exit"); + + // Generate the privatization allocas in the block that will become the entry + // of the outlined function. + InsertPointTy AllocaIP(ParalleRegionEntryBB, ParalleRegionEntryBB->begin()); + Builder.restoreIP(AllocaIP); + Builder.CreateLoad(ZeroAddr, "zero.addr.use"); + Builder.CreateLoad(TIDAddr, "tidd.addr.use"); + AllocaIP = InsertPointTy(ParalleRegionEntryBB, ParalleRegionEntryBB->begin()); + + // ThenBB + // | + // V + // PRegionEntryBB <- Privatization allocas are placed here. + // | + // V + // PRegionBodyBB <- BodeGen is invoked here. + // | + // V + // PRegionExitBB <- A common exit to simplify block collection. + // + + LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n"); + + // Let the caller create the body. + assert(BodyGenCB && "Expected body generation callback!"); + InsertPointTy CodeGenIP(ParalleRegionBodyBB, ParalleRegionBodyBB->begin()); + BodyGenCB(AllocaIP, CodeGenIP, *ParalleRegionExitBB); + + LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n"); + + SmallPtrSet ParallelRegionBlockSet; + SmallVector ParallelRegionBlocks, Worklist; + ParallelRegionBlockSet.insert(ParalleRegionEntryBB); + ParallelRegionBlockSet.insert(ParalleRegionExitBB); + + // Collect all blocks in-between ParalleRegionEntryBB and ParalleRegionExitBB. + Worklist.push_back(ParalleRegionEntryBB); + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + ParallelRegionBlocks.push_back(BB); + for (BasicBlock *SuccBB : successors(BB)) + if (ParallelRegionBlockSet.insert(SuccBB).second) + Worklist.push_back(SuccBB); + } + + CodeExtractorAnalysisCache CEAC(*OuterFn); + CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr, + /* AggregateArgs */ false, + /* BlockFrequencyInfo */ nullptr, + /* BranchProbabilityInfo */ nullptr, + /* AssumptionCache */ nullptr, + /* AllowVarArgs */ true, + /* AllowAlloca */ true, + /* Suffix */ ".omp_par"); + + // Find inputs to, outputs from the code region. + BasicBlock *CommonExit = nullptr; + SetVector Inputs, Outputs, SinkingCands, HoistingCands; + Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); + Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); + + LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n"); + + auto PrivHelper = [&](Value &V) { + SmallVector Uses; + for (Use &U : V.uses()) + if (auto *UserI = dyn_cast(U.getUser())) + if (ParallelRegionBlockSet.count(UserI->getParent())) + Uses.push_back(&U); + + Value *ReplacementValue = nullptr; + Builder.restoreIP(PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue)); + assert(ReplacementValue && + "Expected copy/create callback to set replacement value!"); + if (ReplacementValue == &V) + return; + + for (Use *UPtr : Uses) + UPtr->set(ReplacementValue); + }; + + for (Value *Input : Inputs) { + LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); + PrivHelper(*Input); + } + for (Value *Output : Outputs) { + LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); + PrivHelper(*Output); + } + + LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n"); + LLVM_DEBUG({ + for (auto *BB : ParallelRegionBlocks) + dbgs() << " PBR: " << BB->getName() << "\n"; + }); + + Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); + LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); + + // Remove the artifical entry introduced by the extractor right away, we + // made our own entry block after all. + { + BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); + assert(ArtificialEntry.getUniqueSuccessor() == ParalleRegionEntryBB); + assert(ParalleRegionEntryBB->getUniquePredecessor() == &ArtificialEntry); + ParalleRegionEntryBB->moveBefore(&ArtificialEntry); + ArtificialEntry.eraseFromParent(); + } + LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n"); + assert(&OutlinedFn->getEntryBlock() == ParalleRegionEntryBB); + + assert(OutlinedFn && OutlinedFn->getNumUses() == 1); + assert(OutlinedFn->arg_size() >= 2 && + "Expected at least tid and bounded tid as arguments"); + unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2; + + CallInst *CI = cast(OutlinedFn->user_back()); + CI->getParent()->setName("omp_parallel"); + Builder.SetInsertPoint(CI); + + // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); + Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars), + Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)}; + + SmallVector RealArgs; + RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); + RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); + + FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call); + Builder.CreateCall(RTLFn, RealArgs); + + LLVM_DEBUG(dbgs() << "With fork_call placed: " + << *Builder.GetInsertBlock()->getParent() << "\n"); + + InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); + InsertPointTy ExitIP(ParalleRegionExitBB, ParalleRegionExitBB->end()); + UI->eraseFromParent(); + + // If no "if" clause was present we are done. + if (!ElseTI) { + CI->eraseFromParent(); + FiniCB(ExitIP); + return AfterIP; + } + + // If an "if" clause was present we are now generating the serialized + // version into the "else" branch. + Builder.SetInsertPoint(ElseTI); + + // Build calls __kmpc_serialized_parallel(&Ident, GTid); + Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel), + SerializedParallelCallArgs); + + // OutlinedFn(>id, &zero, CapturedStruct); + CI->removeFromParent(); + Builder.Insert(CI); + + // __kmpc_end_serialized_parallel(&Ident, GTid); + Value *EndArgs[] = {Ident, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel), + EndArgs); + + LLVM_DEBUG(dbgs() << "With serialized parallel region: " + << *Builder.GetInsertBlock()->getParent() << "\n"); + + FiniCB(ExitIP); + return AfterIP; +} diff --git a/llvm/unittests/Frontend/CMakeLists.txt b/llvm/unittests/Frontend/CMakeLists.txt --- a/llvm/unittests/Frontend/CMakeLists.txt +++ b/llvm/unittests/Frontend/CMakeLists.txt @@ -4,6 +4,7 @@ Frontend Support Passes + TransformUtils ) add_llvm_unittest(LLVMFrontendTests diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/Module.h" #include "llvm/Frontend/OpenMPConstants.h" #include "llvm/IR/Verifier.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "gtest/gtest.h" using namespace llvm; @@ -175,4 +176,148 @@ return; EXPECT_EQ(SrcSrc->getAsCString(), ";test.dbg;foo;3;7;;"); } + +TEST_F(OpenMPIRBuilderTest, ParallelSimple) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + AllocaInst *PrivAI = nullptr; + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + Builder.restoreIP(AllocaIP); + PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); + Builder.CreateStore(F->arg_begin(), PrivAI); + + Builder.restoreIP(CodeGenIP); + Value *PrivLoad = Builder.CreateLoad(PrivAI, "local.use"); + Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); + Instruction *ThenTerm, *ElseTerm; + SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), + &ThenTerm, &ElseTerm); + + Builder.SetInsertPoint(ThenTerm); + Builder.CreateBr(&ContinuationIP); + ThenTerm->eraseFromParent(); + }; + + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { + if (!isa(VPtr)) { + ReplacementValue = &VPtr; + return CodeGenIP; + } + + // Trivial copy + assert(isa(VPtr) && "Unexpected value to be copied."); + Builder.restoreIP(AllocaIP); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(V, ReplacementValue); + return CodeGenIP; + }; + + auto FiniCB = [&](InsertPointTy CodeGenIP) { + // No destructors. + }; + + IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel( + Loc, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PB_none); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + EXPECT_NE(PrivAI, nullptr); + Function *OutlinedFn = PrivAI->getFunction(); + EXPECT_NE(F, OutlinedFn); + EXPECT_FALSE(verifyModule(*M)); + + EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); + EXPECT_EQ(OutlinedFn->arg_size(), 3U); + + EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); + EXPECT_EQ(OutlinedFn->getNumUses(), 1U); + User *Usr = OutlinedFn->user_back(); + EXPECT_TRUE(isa(Usr)); + CallInst *UserCI = dyn_cast(OutlinedFn->user_back()->user_back()); + EXPECT_NE(UserCI, nullptr); + if (!UserCI) + return; + EXPECT_EQ(UserCI->getCalledFunction()->getName(), "__kmpc_fork_call"); +} + +TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + AllocaInst *PrivAI = nullptr; + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + Builder.restoreIP(AllocaIP); + PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); + Builder.CreateStore(F->arg_begin(), PrivAI); + + Builder.restoreIP(CodeGenIP); + Value *PrivLoad = Builder.CreateLoad(PrivAI, "local.use"); + Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); + Instruction *ThenTerm, *ElseTerm; + SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), + &ThenTerm, &ElseTerm); + + Builder.SetInsertPoint(ThenTerm); + Builder.CreateBr(&ContinuationIP); + ThenTerm->eraseFromParent(); + }; + + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { + if (!isa(VPtr)) { + ReplacementValue = &VPtr; + return CodeGenIP; + } + + // Trivial copy (=firstprivate). + Builder.restoreIP(AllocaIP); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(V, ReplacementValue); + return CodeGenIP; + }; + + auto FiniCB = [&](InsertPointTy CodeGenIP) { + // No destructors. + }; + + IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel( + Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()), + nullptr, OMP_PB_none); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + EXPECT_NE(PrivAI, nullptr); + Function *OutlinedFn = PrivAI->getFunction(); + EXPECT_NE(F, OutlinedFn); + EXPECT_FALSE(verifyModule(*M, &errs())); + + EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); + EXPECT_EQ(OutlinedFn->arg_size(), 3U); + + EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); + EXPECT_EQ(OutlinedFn->getNumUses(), 2U); +} + } // namespace