diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -175,7 +175,7 @@ // IRBUILDER: define internal void @main -// IRBUILDER: [[RETURN:omp.par.exit[^:]*]] +// IRBUILDER: [[RETURN:omp.par.outlined.exit[^:]*]] // IRBUILDER-NEXT: ret void // IRBUILDER: [[FLAG:%.+]] = load float, float* @{{.+}}, @@ -192,10 +192,8 @@ // IRBUILDER: [[CMP:%.+]] = icmp eq i32 [[RES]], 0 // IRBUILDER: br i1 [[CMP]], label %[[CONTINUE:[^,].+]], label %[[EXIT:.+]] // IRBUILDER: [[EXIT]] -// IRBUILDER: br label %[[EXIT2:.+]] -// IRBUILDER: [[CONTINUE]] -// IRBUILDER: br label %[[ELSE:.+]] -// IRBUILDER: [[EXIT2]] // IRBUILDER: br label %[[RETURN]] +// IRBUILDER: [[CONTINUE]] +// IRBUILDER: br label %[[ELSE2:.+]] #endif diff --git a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c @@ -0,0 +1,110 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER +// %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o /tmp/t1 %s +// %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch /tmp/t1 -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,IRBUILDER-DEBUG %s + +// expected-no-diagnostics + +// TODO: Teach the update script to check new functions too. + +#ifndef HEADER +#define HEADER + +// ALL-LABEL: @_Z17nested_parallel_0v( +// ALL-NEXT: entry: +// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// ALL-NEXT: br label [[OMP_PARALLEL:%.*]] +// ALL: omp_parallel: +// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z17nested_parallel_0v..omp_par.1 to void (i32*, i32*, ...)*)) +// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT12:%.*]] +// ALL: omp.par.outlined.exit12: +// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +// ALL: omp.par.exit.split: +// ALL-NEXT: ret void +// +void nested_parallel_0(void) { +#pragma omp parallel + { +#pragma omp parallel + { + } + } +} + +// ALL-LABEL: @_Z17nested_parallel_1Pfid( +// ALL-NEXT: entry: +// ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 +// ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// ALL-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 +// ALL-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// ALL-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// ALL-NEXT: br label [[OMP_PARALLEL:%.*]] +// ALL: omp_parallel: +// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]) +// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT13:%.*]] +// ALL: omp.par.outlined.exit13: +// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +// ALL: omp.par.exit.split: +// ALL-NEXT: ret void +// +void nested_parallel_1(float *r, int a, double b) { +#pragma omp parallel + { +#pragma omp parallel + { + *r = a + b; + } + } +} + +// ALL-LABEL: @_Z17nested_parallel_2Pfid( +// ALL-NEXT: entry: +// ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 +// ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// ALL-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 +// ALL-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// ALL-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// ALL-NEXT: br label [[OMP_PARALLEL:%.*]] +// ALL: omp_parallel: +// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]) +// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT55:%.*]] +// ALL: omp.par.outlined.exit55: +// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +// ALL: omp.par.exit.split: +// ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// ALL-NEXT: [[CONV56:%.*]] = sitofp i32 [[TMP0]] to double +// ALL-NEXT: [[TMP1:%.*]] = load double, double* [[B_ADDR]], align 8 +// ALL-NEXT: [[ADD57:%.*]] = fadd double [[CONV56]], [[TMP1]] +// ALL-NEXT: [[CONV58:%.*]] = fptrunc double [[ADD57]] to float +// ALL-NEXT: [[TMP2:%.*]] = load float*, float** [[R_ADDR]], align 8 +// ALL-NEXT: store float [[CONV58]], float* [[TMP2]], align 4 +// ALL-NEXT: ret void +// +void nested_parallel_2(float *r, int a, double b) { +#pragma omp parallel + { + *r = a + b; +#pragma omp parallel + { + *r = a + b; +#pragma omp parallel + { + *r = a + b; + } + *r = a + b; +#pragma omp parallel + { + *r = a + b; + } + *r = a + b; + } + *r = a + b; + } + *r = a + b; +} + +#endif diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -28,7 +28,8 @@ public: /// Create a new OpenMPIRBuilder operating on the given module \p M. This will /// not have an effect on \p M (see initialize). - OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {} + OpenMPIRBuilder(Module &M) + : M(M), Builder(M.getContext()), AllocaBuilder(M.getContext()) {} /// Initialize the internal state, this will put structures types and /// potentially other helpers into the underlying module. Must be called @@ -276,6 +277,9 @@ /// The LLVM-IR Builder used to create IR. IRBuilder<> Builder; + /// The LLVM-IR Builder used to create alloca instructions. + IRBuilder<> AllocaBuilder; + /// Map to remember source location strings StringMap SrcLocStrMap; @@ -285,9 +289,14 @@ /// Helper that contains information about regions we need to outline /// during finalization. struct OutlineInfo { - SmallVector Blocks; using PostOutlineCBTy = std::function; PostOutlineCBTy PostOutlineCB; + BasicBlock *EntryBB, *ExitBB; + + /// Collect all blocks in between EntryBB and ExitBB in both the given + /// vector and set. + void collectBlocks(SmallPtrSetImpl &BlockSet, + SmallVectorImpl &BlockVector); }; /// Collection of regions that need to be outlined during finalization. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -128,13 +128,16 @@ void OpenMPIRBuilder::initialize() { initializeTypes(M); } void OpenMPIRBuilder::finalize() { + SmallPtrSet ParallelRegionBlockSet; + SmallVector Blocks; for (OutlineInfo &OI : OutlineInfos) { - assert(!OI.Blocks.empty() && - "Outlined regions should have at least a single block!"); - BasicBlock *RegEntryBB = OI.Blocks.front(); - Function *OuterFn = RegEntryBB->getParent(); + ParallelRegionBlockSet.clear(); + Blocks.clear(); + OI.collectBlocks(ParallelRegionBlockSet, Blocks); + + Function *OuterFn = OI.EntryBB->getParent(); CodeExtractorAnalysisCache CEAC(*OuterFn); - CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr, + CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, /* AggregateArgs */ false, /* BlockFrequencyInfo */ nullptr, /* BranchProbabilityInfo */ nullptr, @@ -144,6 +147,8 @@ /* Suffix */ ".omp_par"); LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); + LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName() + << " Exit: " << OI.ExitBB->getName() << "\n"); assert(Extractor.isEligible() && "Expected OpenMP outlining to be possible!"); @@ -163,12 +168,12 @@ // made our own entry block after all. { BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); - assert(ArtificialEntry.getUniqueSuccessor() == RegEntryBB); - assert(RegEntryBB->getUniquePredecessor() == &ArtificialEntry); - RegEntryBB->moveBefore(&ArtificialEntry); + assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB); + assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry); + OI.EntryBB->moveBefore(&ArtificialEntry); ArtificialEntry.eraseFromParent(); } - assert(&OutlinedFn->getEntryBlock() == RegEntryBB); + assert(&OutlinedFn->getEntryBlock() == OI.EntryBB); assert(OutlinedFn && OutlinedFn->getNumUses() == 1); // Run a user callback, e.g. to add attributes. @@ -425,17 +430,32 @@ // we want to delete at the end. SmallVector ToBeDeleted; - Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI()); - AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); - AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); + // The alloca builder is managed internally basically like a stack. The + // insertion point guards keep the old top value alive while we update it for + // the body. + // + // TODO: We now have an internal AllocaBuilder and the AllocaIP in the + // callback, one might suffice. + IRBuilder<>::InsertPointGuard AIPG(AllocaBuilder); + + // For the first outermost region we need to initialize the alloca builder. + if (!AllocaBuilder.GetInsertBlock()) + AllocaBuilder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI()); + + // Use the debug location of the pragma for alloca related code as well. + AllocaBuilder.SetCurrentDebugLocation(Loc.DL); + + AllocaInst *TIDAddr = AllocaBuilder.CreateAlloca(Int32, nullptr, "tid.addr"); + AllocaInst *ZeroAddr = + AllocaBuilder.CreateAlloca(Int32, nullptr, "zero.addr"); // If there is an if condition we actually use the TIDAddr and ZeroAddr in the // program, otherwise we only need them for modeling purposes to get the // associated arguments in the outlined function. In the former case, // initialize the allocas properly, in the latter case, delete them later. if (IfCondition) { - Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr); - Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); + AllocaBuilder.CreateStore(Constant::getNullValue(Int32), TIDAddr); + AllocaBuilder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); } else { ToBeDeleted.push_back(TIDAddr); ToBeDeleted.push_back(ZeroAddr); @@ -479,14 +499,14 @@ // of the outlined function. InsertPointTy AllocaIP(PRegEntryBB, PRegEntryBB->getTerminator()->getIterator()); - Builder.restoreIP(AllocaIP); + AllocaBuilder.restoreIP(AllocaIP); AllocaInst *PrivTIDAddr = - Builder.CreateAlloca(Int32, nullptr, "tid.addr.local"); - Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid"); + AllocaBuilder.CreateAlloca(Int32, nullptr, "tid.addr.local"); + Instruction *PrivTID = AllocaBuilder.CreateLoad(PrivTIDAddr, "tid"); // Add some fake uses for OpenMP provided arguments. - ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use")); - ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use")); + ToBeDeleted.push_back(AllocaBuilder.CreateLoad(TIDAddr, "tid.addr.use")); + ToBeDeleted.push_back(AllocaBuilder.CreateLoad(ZeroAddr, "zero.addr.use")); // ThenBB // | @@ -618,20 +638,12 @@ InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); FiniCB(PreFiniIP); - SmallPtrSet ParallelRegionBlockSet; - SmallVector Worklist; - ParallelRegionBlockSet.insert(PRegEntryBB); - ParallelRegionBlockSet.insert(PRegExitBB); + OI.EntryBB = PRegEntryBB; + OI.ExitBB = PRegExitBB; - // Collect all blocks in-between PRegEntryBB and PRegExitBB. - Worklist.push_back(PRegEntryBB); - while (!Worklist.empty()) { - BasicBlock *BB = Worklist.pop_back_val(); - OI.Blocks.push_back(BB); - for (BasicBlock *SuccBB : successors(BB)) - if (ParallelRegionBlockSet.insert(SuccBB).second) - Worklist.push_back(SuccBB); - } + SmallPtrSet ParallelRegionBlockSet; + SmallVector Blocks; + OI.collectBlocks(ParallelRegionBlockSet, Blocks); // Ensure a single exit node for the outlined region by creating one. // We might have multiple incoming edges to the exit now due to finalizations, @@ -639,10 +651,10 @@ BasicBlock *PRegOutlinedExitBB = PRegExitBB; PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt()); PRegOutlinedExitBB->setName("omp.par.outlined.exit"); - OI.Blocks.push_back(PRegOutlinedExitBB); + Blocks.push_back(PRegOutlinedExitBB); CodeExtractorAnalysisCache CEAC(*OuterFn); - CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr, + CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, /* AggregateArgs */ false, /* BlockFrequencyInfo */ nullptr, /* BranchProbabilityInfo */ nullptr, @@ -693,12 +705,16 @@ LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); PrivHelper(*Input); } + LLVM_DEBUG({ + for (Value *Output : Outputs) + LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); + }); assert(Outputs.empty() && "OpenMP outlining should not produce live-out values!"); LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n"); LLVM_DEBUG({ - for (auto *BB : OI.Blocks) + for (auto *BB : Blocks) dbgs() << " PBR: " << BB->getName() << "\n"; }); @@ -996,3 +1012,20 @@ std::string Name = getNameWithSeparators({Prefix, "var"}, ".", "."); return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name); } + +void OpenMPIRBuilder::OutlineInfo::collectBlocks( + SmallPtrSetImpl &BlockSet, + SmallVectorImpl &BlockVector) { + SmallVector Worklist; + BlockSet.insert(EntryBB); + BlockSet.insert(ExitBB); + + Worklist.push_back(EntryBB); + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + BlockVector.push_back(BB); + for (BasicBlock *SuccBB : successors(BB)) + if (BlockSet.insert(SuccBB).second) + Worklist.push_back(SuccBB); + } +} diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -6,13 +6,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/IR/Verifier.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "gtest/gtest.h" @@ -402,6 +403,201 @@ EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin()); } +TEST_F(OpenMPIRBuilderTest, ParallelNested) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + unsigned NumInnerBodiesGenerated = 0; + unsigned NumOuterBodiesGenerated = 0; + unsigned NumFinalizationPoints = 0; + + auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumInnerBodiesGenerated; + }; + + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { + // Trivial copy (=firstprivate). + Builder.restoreIP(AllocaIP); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(V, ReplacementValue); + return CodeGenIP; + }; + + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + + auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumOuterBodiesGenerated; + Builder.restoreIP(CodeGenIP); + BasicBlock *CGBB = CodeGenIP.getBlock(); + BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint()); + CGBB->getTerminator()->eraseFromParent(); + ; + + IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel( + InsertPointTy(CGBB, CGBB->end()), InnerBodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false); + + Builder.restoreIP(AfterIP); + Builder.CreateBr(NewBB); + }; + + IRBuilder<>::InsertPoint AfterIP = + OMPBuilder.CreateParallel(Loc, OuterBodyGenCB, PrivCB, FiniCB, nullptr, + nullptr, OMP_PROC_BIND_default, false); + + EXPECT_EQ(NumInnerBodiesGenerated, 1U); + EXPECT_EQ(NumOuterBodiesGenerated, 1U); + EXPECT_EQ(NumFinalizationPoints, 2U); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + OMPBuilder.finalize(); + + EXPECT_EQ(M->size(), 5U); + for (Function &OutlinedFn : *M) { + if (F == &OutlinedFn || OutlinedFn.isDeclaration()) + continue; + EXPECT_FALSE(verifyModule(*M, &errs())); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); + + EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); + EXPECT_EQ(OutlinedFn.arg_size(), 2U); + + EXPECT_EQ(OutlinedFn.getNumUses(), 1U); + User *Usr = OutlinedFn.user_back(); + ASSERT_TRUE(isa(Usr)); + CallInst *ForkCI = dyn_cast(Usr->user_back()); + ASSERT_NE(ForkCI, nullptr); + + EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); + EXPECT_EQ(ForkCI->getNumArgOperands(), 3U); + EXPECT_TRUE(isa(ForkCI->getArgOperand(0))); + EXPECT_EQ(ForkCI->getArgOperand(1), + ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); + EXPECT_EQ(ForkCI->getArgOperand(2), Usr); + } +} + +TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + unsigned NumInnerBodiesGenerated = 0; + unsigned NumOuterBodiesGenerated = 0; + unsigned NumFinalizationPoints = 0; + + auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumInnerBodiesGenerated; + }; + + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { + // Trivial copy (=firstprivate). + Builder.restoreIP(AllocaIP); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(V, ReplacementValue); + return CodeGenIP; + }; + + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + + auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumOuterBodiesGenerated; + Builder.restoreIP(CodeGenIP); + BasicBlock *CGBB = CodeGenIP.getBlock(); + BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint()); + BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt()); + CGBB->getTerminator()->eraseFromParent(); + ; + NewBB1->getTerminator()->eraseFromParent(); + ; + + IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.CreateParallel( + InsertPointTy(CGBB, CGBB->end()), InnerBodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false); + + Builder.restoreIP(AfterIP1); + Builder.CreateBr(NewBB1); + + IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.CreateParallel( + InsertPointTy(NewBB1, NewBB1->end()), InnerBodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false); + + Builder.restoreIP(AfterIP2); + Builder.CreateBr(NewBB2); + }; + + IRBuilder<>::InsertPoint AfterIP = + OMPBuilder.CreateParallel(Loc, OuterBodyGenCB, PrivCB, FiniCB, nullptr, + nullptr, OMP_PROC_BIND_default, false); + + EXPECT_EQ(NumInnerBodiesGenerated, 2U); + EXPECT_EQ(NumOuterBodiesGenerated, 1U); + EXPECT_EQ(NumFinalizationPoints, 3U); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + OMPBuilder.finalize(); + + EXPECT_EQ(M->size(), 6U); + for (Function &OutlinedFn : *M) { + if (F == &OutlinedFn || OutlinedFn.isDeclaration()) + continue; + EXPECT_FALSE(verifyModule(*M, &errs())); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); + + EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); + EXPECT_EQ(OutlinedFn.arg_size(), 2U); + + unsigned NumAllocas = 0; + for (Instruction &I : instructions(OutlinedFn)) + NumAllocas += isa(I); + EXPECT_EQ(NumAllocas, 1U); + + EXPECT_EQ(OutlinedFn.getNumUses(), 1U); + User *Usr = OutlinedFn.user_back(); + ASSERT_TRUE(isa(Usr)); + CallInst *ForkCI = dyn_cast(Usr->user_back()); + ASSERT_NE(ForkCI, nullptr); + + EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); + EXPECT_EQ(ForkCI->getNumArgOperands(), 3U); + EXPECT_TRUE(isa(ForkCI->getArgOperand(0))); + EXPECT_EQ(ForkCI->getArgOperand(1), + ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); + EXPECT_EQ(ForkCI->getArgOperand(2), Usr); + } +} + TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M);