Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -28,7 +28,8 @@ public: /// Create a new OpenMPIRBuilder operating on the given module \p M. This will /// not have an effect on \p M (see initialize). - OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {} + OpenMPIRBuilder(Module &M) + : M(M), Builder(M.getContext()), AllocaBuilder(M.getContext()) {} /// Initialize the internal state, this will put structures types and /// potentially other helpers into the underlying module. Must be called @@ -283,6 +284,9 @@ /// The LLVM-IR Builder used to create IR. IRBuilder<> Builder; + /// The LLVM-IR Builder used to create alloca instructions. + IRBuilder<> AllocaBuilder; + /// Map to remember source location strings StringMap SrcLocStrMap; Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -449,17 +449,32 @@ // we want to delete at the end. SmallVector ToBeDeleted; - Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI()); - AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); - AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); + // The alloca builder is managed internally basically like a stack. The + // insertion point guards keep the old top value alive while we update it for + // the body. + // + // TODO: We now have an internal AllocaBuilder and the AllocaIP in the + // callback, one might suffice. + IRBuilder<>::InsertPointGuard AIPG(AllocaBuilder); + + // For the first outermost region we need to initialize the alloca builder. + if (!AllocaBuilder.GetInsertBlock()) + AllocaBuilder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI()); + + // Use the debug location of the pragma for alloca related code as well. + AllocaBuilder.SetCurrentDebugLocation(Loc.DL); + + AllocaInst *TIDAddr = AllocaBuilder.CreateAlloca(Int32, nullptr, "tid.addr"); + AllocaInst *ZeroAddr = + AllocaBuilder.CreateAlloca(Int32, nullptr, "zero.addr"); // If there is an if condition we actually use the TIDAddr and ZeroAddr in the // program, otherwise we only need them for modeling purposes to get the // associated arguments in the outlined function. In the former case, // initialize the allocas properly, in the latter case, delete them later. if (IfCondition) { - Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr); - Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); + AllocaBuilder.CreateStore(Constant::getNullValue(Int32), TIDAddr); + AllocaBuilder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); } else { ToBeDeleted.push_back(TIDAddr); ToBeDeleted.push_back(ZeroAddr); @@ -503,14 +518,14 @@ // of the outlined function. InsertPointTy AllocaIP(PRegEntryBB, PRegEntryBB->getTerminator()->getIterator()); - Builder.restoreIP(AllocaIP); + AllocaBuilder.restoreIP(AllocaIP); AllocaInst *PrivTIDAddr = - Builder.CreateAlloca(Int32, nullptr, "tid.addr.local"); - Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid"); + AllocaBuilder.CreateAlloca(Int32, nullptr, "tid.addr.local"); + Instruction *PrivTID = AllocaBuilder.CreateLoad(PrivTIDAddr, "tid"); // Add some fake uses for OpenMP provided arguments. - ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use")); - ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use")); + ToBeDeleted.push_back(AllocaBuilder.CreateLoad(TIDAddr, "tid.addr.use")); + ToBeDeleted.push_back(AllocaBuilder.CreateLoad(ZeroAddr, "zero.addr.use")); // ThenBB // | @@ -706,6 +721,10 @@ LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); PrivHelper(*Input); } + LLVM_DEBUG({ + for (Value *Output : Outputs) + LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); + }); assert(Outputs.empty() && "OpenMP outlining should not produce live-out values!"); Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp =================================================================== --- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -6,13 +6,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/IR/Verifier.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "gtest/gtest.h" @@ -401,6 +402,201 @@ EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin()); } +TEST_F(OpenMPIRBuilderTest, ParallelNested) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + unsigned NumInnerBodiesGenerated = 0; + unsigned NumOuterBodiesGenerated = 0; + unsigned NumFinalizationPoints = 0; + + auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumInnerBodiesGenerated; + }; + + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { + // Trivial copy (=firstprivate). + Builder.restoreIP(AllocaIP); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(V, ReplacementValue); + return CodeGenIP; + }; + + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + + auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumOuterBodiesGenerated; + Builder.restoreIP(CodeGenIP); + BasicBlock *CGBB = CodeGenIP.getBlock(); + BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint()); + CGBB->getTerminator()->eraseFromParent(); + ; + + IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel( + InsertPointTy(CGBB, CGBB->end()), InnerBodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false); + + Builder.restoreIP(AfterIP); + Builder.CreateBr(NewBB); + }; + + IRBuilder<>::InsertPoint AfterIP = + OMPBuilder.CreateParallel(Loc, OuterBodyGenCB, PrivCB, FiniCB, nullptr, + nullptr, OMP_PROC_BIND_default, false); + + EXPECT_EQ(NumInnerBodiesGenerated, 1U); + EXPECT_EQ(NumOuterBodiesGenerated, 1U); + EXPECT_EQ(NumFinalizationPoints, 2U); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + OMPBuilder.finalize(); + + EXPECT_EQ(M->size(), 5U); + for (Function &OutlinedFn : *M) { + if (F == &OutlinedFn || OutlinedFn.isDeclaration()) + continue; + EXPECT_FALSE(verifyModule(*M, &errs())); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); + + EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); + EXPECT_EQ(OutlinedFn.arg_size(), 2U); + + EXPECT_EQ(OutlinedFn.getNumUses(), 1U); + User *Usr = OutlinedFn.user_back(); + ASSERT_TRUE(isa(Usr)); + CallInst *ForkCI = dyn_cast(Usr->user_back()); + ASSERT_NE(ForkCI, nullptr); + + EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); + EXPECT_EQ(ForkCI->getNumArgOperands(), 3U); + EXPECT_TRUE(isa(ForkCI->getArgOperand(0))); + EXPECT_EQ(ForkCI->getArgOperand(1), + ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); + EXPECT_EQ(ForkCI->getArgOperand(2), Usr); + } +} + +TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + unsigned NumInnerBodiesGenerated = 0; + unsigned NumOuterBodiesGenerated = 0; + unsigned NumFinalizationPoints = 0; + + auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumInnerBodiesGenerated; + }; + + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { + // Trivial copy (=firstprivate). + Builder.restoreIP(AllocaIP); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(V, ReplacementValue); + return CodeGenIP; + }; + + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + + auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumOuterBodiesGenerated; + Builder.restoreIP(CodeGenIP); + BasicBlock *CGBB = CodeGenIP.getBlock(); + BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint()); + BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt()); + CGBB->getTerminator()->eraseFromParent(); + ; + NewBB1->getTerminator()->eraseFromParent(); + ; + + IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.CreateParallel( + InsertPointTy(CGBB, CGBB->end()), InnerBodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false); + + Builder.restoreIP(AfterIP1); + Builder.CreateBr(NewBB1); + + IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.CreateParallel( + InsertPointTy(NewBB1, NewBB1->end()), InnerBodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false); + + Builder.restoreIP(AfterIP2); + Builder.CreateBr(NewBB2); + }; + + IRBuilder<>::InsertPoint AfterIP = + OMPBuilder.CreateParallel(Loc, OuterBodyGenCB, PrivCB, FiniCB, nullptr, + nullptr, OMP_PROC_BIND_default, false); + + EXPECT_EQ(NumInnerBodiesGenerated, 2U); + EXPECT_EQ(NumOuterBodiesGenerated, 1U); + EXPECT_EQ(NumFinalizationPoints, 3U); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + OMPBuilder.finalize(); + + EXPECT_EQ(M->size(), 6U); + for (Function &OutlinedFn : *M) { + if (F == &OutlinedFn || OutlinedFn.isDeclaration()) + continue; + EXPECT_FALSE(verifyModule(*M, &errs())); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); + + EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); + EXPECT_EQ(OutlinedFn.arg_size(), 2U); + + unsigned NumAllocas = 0; + for (Instruction &I : instructions(OutlinedFn)) + NumAllocas += isa(I); + EXPECT_EQ(NumAllocas, 1U); + + EXPECT_EQ(OutlinedFn.getNumUses(), 1U); + User *Usr = OutlinedFn.user_back(); + ASSERT_TRUE(isa(Usr)); + CallInst *ForkCI = dyn_cast(Usr->user_back()); + ASSERT_NE(ForkCI, nullptr); + + EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); + EXPECT_EQ(ForkCI->getNumArgOperands(), 3U); + EXPECT_TRUE(isa(ForkCI->getArgOperand(0))); + EXPECT_EQ(ForkCI->getArgOperand(1), + ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); + EXPECT_EQ(ForkCI->getArgOperand(2), Usr); + } +} + TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); Index: mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h =================================================================== --- mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h +++ mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h @@ -127,6 +127,10 @@ /// OpenMP dialect hasn't been loaded (it is always loaded if there are OpenMP /// operations in the module though). const Dialect *ompDialect; + /// Stack which stores the target block to which a branch a must be added when + /// a terminator is seen. A stack is required to handle nested OpenMP parallel + /// regions. + llvm::SmallVector ompContinuationIPStack; /// Mappings between llvm.mlir.global definitions and corresponding globals. DenseMap globalsMapping; Index: mlir/lib/Target/LLVMIR/ModuleTranslation.cpp =================================================================== --- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -397,8 +397,8 @@ llvm::BasicBlock *codeGenIPBB = codeGenIP.getBlock(); llvm::Instruction *codeGenIPBBTI = codeGenIPBB->getTerminator(); + ompContinuationIPStack.push_back(&continuationIP); - builder.SetInsertPoint(codeGenIPBB); // ParallelOp has only `1` region associated with it. auto ®ion = cast(opInst).getRegion(); for (auto &bb : region) { @@ -413,22 +413,19 @@ for (auto indexedBB : llvm::enumerate(blocks)) { Block *bb = indexedBB.value(); llvm::BasicBlock *curLLVMBB = blockMapping[bb]; - if (bb->isEntryBlock()) + if (bb->isEntryBlock()) { + assert(codeGenIPBBTI->getNumSuccessors() == 1 && + "OpenMPIRBuilder provided entry block has multiple successors"); codeGenIPBBTI->setSuccessor(0, curLLVMBB); + } // TODO: Error not returned up the hierarchy if (failed(convertBlock(*bb, /*ignoreArguments=*/indexedBB.index() == 0))) return; - - // If this block has the terminator then add a jump to - // continuation bb - for (auto &op : *bb) { - if (isa(op)) { - builder.SetInsertPoint(curLLVMBB); - builder.CreateBr(&continuationIP); - } - } } + + ompContinuationIPStack.pop_back(); + // Finally, after all blocks have been traversed and values mapped, // connect the PHI nodes to the results of preceding blocks. connectPHINodes(region, valueMapping, blockMapping); @@ -463,10 +460,10 @@ // TODO: Determine the actual alloca insertion point, e.g., the function // entry or the alloca insertion point as provided by the body callback // above. - llvm::OpenMPIRBuilder::InsertPointTy allocaIP(builder.saveIP()); - builder.restoreIP( - ompBuilder->CreateParallel(builder, allocaIP, bodyGenCB, privCB, finiCB, - ifCond, numThreads, pbKind, isCancellable)); + // llvm::OpenMPIRBuilder::InsertPointTy allocaIP(builder.saveIP()); + builder.restoreIP(ompBuilder->CreateParallel(builder, bodyGenCB, privCB, + finiCB, ifCond, numThreads, + pbKind, isCancellable)); return success(); } @@ -504,7 +501,11 @@ ompBuilder->CreateFlush(builder.saveIP()); return success(); }) - .Case([&](omp::TerminatorOp) { return success(); }) + .Case([&](omp::TerminatorOp) { + llvm::BranchInst::Create(ompContinuationIPStack.back(), + builder.GetInsertBlock()); + return success(); + }) .Case( [&](omp::ParallelOp) { return convertOmpParallel(opInst, builder); }) .Default([&](Operation *inst) { Index: mlir/test/Target/openmp-llvm.mlir =================================================================== --- mlir/test/Target/openmp-llvm.mlir +++ mlir/test/Target/openmp-llvm.mlir @@ -214,3 +214,26 @@ // CHECK: define internal void @[[OMP_OUTLINED_FN_3_3]] // CHECK: define internal void @[[OMP_OUTLINED_FN_3_2]] // CHECK: define internal void @[[OMP_OUTLINED_FN_3_1]] + +// CHECK-LABEL: define void @test_omp_parallel_4() +llvm.func @test_omp_parallel_4() -> () { +// CHECK: call void {{.*}}@__kmpc_fork_call{{.*}} @[[OMP_OUTLINED_FN_4_1:.*]] to +// CHECK: define internal void @[[OMP_OUTLINED_FN_4_1]] +// CHECK: call void @__kmpc_barrier +// CHECK: call void {{.*}}@__kmpc_fork_call{{.*}} @[[OMP_OUTLINED_FN_4_1_1:.*]] to +// CHECK: call void @__kmpc_barrier + omp.parallel { + omp.barrier + +// CHECK: define internal void @[[OMP_OUTLINED_FN_4_1_1]] +// CHECK: call void @__kmpc_barrier + omp.parallel { + omp.barrier + omp.terminator + } + + omp.barrier + omp.terminator + } + llvm.return +}