Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -18,6 +18,8 @@ #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/DeclOpenMP.h" +#include "llvm/Transforms/Utils/OpenMPIRBuilder.h" + using namespace clang; using namespace CodeGen; @@ -1302,10 +1304,24 @@ CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, - emitEmptyBoundParameters); - emitPostUpdateForReductionClause(*this, S, - [](CodeGenFunction &) { return nullptr; }); + + llvm::OpenMPIRBuilder OMPBuilder(CGM.getModule()); + + auto OldBB = Builder.GetInsertBlock(); + auto *BC = new llvm::BitCastInst(llvm::UndefValue::get(Builder.getInt32Ty()), + Builder.getInt32Ty(), "", OldBB); + const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); + llvm::Function *OutlinedFn = + CGM.getOpenMPRuntime().emitParallelOutlinedFunction( + S, *CS->getCapturedDecl()->param_begin(), OMPD_parallel, CodeGen); + + OMPParallelScope Scope(*this, S); + llvm::SmallVector CapturedVars; + GenerateOpenMPCapturedVars(*CS, CapturedVars); + OMPBuilder.emitOMPParallel(*BC, *OutlinedFn, CapturedVars); + + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); } void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, @@ -2482,22 +2498,113 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; - auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, - PrePostActionTy &) { - OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); - HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), - emitForLoopBounds, - emitDispatchForLoopBounds); + + ///{ + auto OldIP = Builder.GetInsertPoint(); + auto OldBB = Builder.GetInsertBlock(); + auto ExprGen = [&](llvm::Instruction &IP, const Expr &E) { + auto OldIP = Builder.GetInsertPoint(); + auto OldBB = Builder.GetInsertBlock(); + Builder.SetInsertPoint(&IP); + (void)EmitOMPHelperVar(*this, cast(&E)).getPointer(); + llvm::Value *V = EmitScalarExpr(&E); + Builder.SetInsertPoint(OldBB, OldIP); + return V; + }; + + // Emit the loop iteration variable. + const auto *IVExpr = cast(S.getIterationVariable()); + const auto *IVDecl = cast(IVExpr->getDecl()); + EmitVarDecl(*IVDecl); + + llvm::SmallVector + CollapsedLoops; + CollapsedLoops.push_back(llvm::OpenMPIRBuilder::LoopDescription( + {[&](llvm::Instruction &IP) { + return ExprGen(IP, *S.getLowerBoundVariable()); + }, + [&](llvm::Instruction &IP) { + return ExprGen(IP, *S.getUpperBoundVariable()); + }, + [&](llvm::Instruction &IP) { + return ExprGen(IP, *S.getStrideVariable()); + }, + GetAddrOfLocalVar(IVDecl).getPointer(), llvm::ICmpInst::ICMP_SGT, true, + 32})); + + llvm::OpenMPIRBuilder::ScheduleDescription Schedule; + llvm::OpenMPIRBuilder OMPBuilder(CGM.getModule()); + + auto *BC = new llvm::BitCastInst(llvm::UndefValue::get(Builder.getInt32Ty()), + Builder.getInt32Ty(), "", OldBB); + llvm::OpenMPIRBuilder::OMPIRLoopInfo IRLoop = + OMPBuilder.emitOMPFor(*BC, Schedule, CollapsedLoops); + +#if 0 + Builder.GetInsertBlock()->getModule()->dump(); + llvm::dbgs() << "Precondition check block:\n"; + IRLoop.SkipCheckBB->dump(); + + llvm::dbgs() << "Thread execute check block:\n"; + IRLoop.ThreadCheckBB->dump(); + + llvm::dbgs() << "Loop header block:\n"; + IRLoop.HeaderBB->dump(); + + llvm::dbgs() << "Loop body block:\n"; + IRLoop.BodyBB->dump(); + + llvm::dbgs() << "Loop latch block:\n"; + IRLoop.LatchBB->dump(); + + llvm::dbgs() << "Thread execute end block:\n"; + IRLoop.ThreadExitBB->dump(); + + llvm::dbgs() << "Precondition end block:\n"; + IRLoop.SkipExitBB->dump(); +#endif + + ///} + + auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { + RunCleanupsScope BodyScope(*this); + // Update counters values on current iteration. + for (const Expr *UE : S.updates()) + EmitIgnoredExpr(UE); + // Update the linear variables. + // In distribute directives only loop counters may be marked as linear, no + // need to generate the code for them. + if (!isOpenMPDistributeDirective(S.getDirectiveKind())) { + for (const auto *C : S.getClausesOfKind()) { + for (const Expr *UE : C->updates()) + EmitIgnoredExpr(UE); + } + } + + // On a continue in the body, jump to the end. + JumpDest LoopExit = getJumpDestInCurrentScope(IRLoop.ThreadExitBB); + JumpDest Continue = getJumpDestInCurrentScope(IRLoop.LatchBB); + BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); + // Emit loop body. + EmitStmt(S.getBody()); + // The end (updates/cleanups). + BreakContinueStack.pop_back(); }; + Builder.SetInsertPoint(IRLoop.BodyBB->getTerminator()); { + OMPPrivateScope LoopScope(*this); + EmitOMPPrivateClause(S, LoopScope); + HasLastprivates = EmitOMPLastprivateClauseInit(S, LoopScope); + EmitOMPReductionClauseInit(S, LoopScope); + EmitOMPPrivateLoopCounters(S, LoopScope); + EmitOMPLinearClause(S, LoopScope); + (void)LoopScope.Privatize(); + OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, S.hasCancel()); } - - // Emit an implicit barrier at the end. - if (!S.getSingleClause() || HasLastprivates) - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); + Builder.SetInsertPoint(BC->getParent(), BC->getParent()->end()); } void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { Index: llvm/include/llvm/Transforms/Utils/OpenMPIRBuilder.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Utils/OpenMPIRBuilder.h @@ -0,0 +1,271 @@ +//===- llvm/IRBuilder.h - Builder for LLVM Instructions ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the OpenMPIRBuilder class, which is used as a convenient +// way to create LLVM instructions for OpenMP directives. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OPENMP_IR_IRBUILDER_H +#define LLVM_OPENMP_IR_IRBUILDER_H + +#include "llvm/IR/IRBuilder.h" + +namespace llvm { + +/// This provides a uniform way to create LLVM-IR for OpenMP directives. +/// +/// Each OpenMP directive should have a corresponding public generator method. +struct OpenMPIRBuilder { + + /// Create a new OpenMPIRBuilder operating on the given module \p M. + OpenMPIRBuilder(Module &M); + + /// Constant definitions to be used in the generator methods below. + /// + ///{ + + /// Eligable values for the 'proc_bind' clause (OpenMP 5.0, Section 2.6) + enum ProcBindKind { + PBK_MASTER = 2, ///< proc_bind(master) + PBK_CLOSE = 3, ///< proc_bind(close) + PBK_SPREAD = 4, ///< proc_bind(spread) + PBK_NONE = 6, ///< No 'proc_bind' clause present + }; + + /// Eligable values for the 'order' clause (OpenMP 5.0, Section 2.9.2, 2.9.5) + enum OrderKind { + OK_CONCURRENT, ///< order(concurrent) + OK_NONE, ///< No 'order' clause present + }; + + /// OpenMP attributes for 'schedule' clause. + enum OpenMPScheduleClauseKind { + // TODO: Read from tablegen + OMPC_SCHEDULE_unknown + }; + + /// OpenMP modifiers for 'schedule' clause. + enum OpenMPScheduleClauseModifier { + OMPC_SCHEDULE_MODIFIER_unknown = OMPC_SCHEDULE_unknown, + // TODO: Read from tablegen + OMPC_SCHEDULE_MODIFIER_last + }; + + ///} + + /// Type definitions to be used in the generator methods below. + /// + ///{ + + using ValueGeneratorTy = std::function; + + /// Encoding of the 'schedule' clause values (OpenMP 5.0, Section 2.9.2) + struct ScheduleDescription { + OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown; + OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown; + OpenMPScheduleClauseKind Kind = OMPC_SCHEDULE_unknown; + ValueGeneratorTy *ChunkSizeGen = nullptr; + }; + + struct LoopDescription { + LoopDescription(ValueGeneratorTy LowerBoundGen, + ValueGeneratorTy UpperBoundGen, ValueGeneratorTy StrideGen, + Value *IVPtr, ICmpInst::Predicate ComparisonPredidcate, + bool IsSignedInductionVariable = true, + uint32_t InductionVariableSize = 32) + : LowerBoundGen((LowerBoundGen)), UpperBoundGen((UpperBoundGen)), + StrideGen((StrideGen)), IVPtr(IVPtr), + ComparisonPredidcate(ComparisonPredidcate), + IsSignedInductionVariable(IsSignedInductionVariable), + InductionVariableSize(InductionVariableSize) {} + + ValueGeneratorTy LowerBoundGen; + ValueGeneratorTy UpperBoundGen; + ValueGeneratorTy StrideGen; + Value *IVPtr; + ICmpInst::Predicate ComparisonPredidcate; + bool IsSignedInductionVariable; + uint32_t InductionVariableSize; + }; + + // TODO: Draw a nice diagram of how the loop structure will look like wrt. the + // OMPIRLoopInfo object. + struct OMPIRLoopInfo { + BasicBlock *SkipCheckBB; + BasicBlock *ThreadCheckBB; + BasicBlock *HeaderBB; + BasicBlock *BodyBB; + BasicBlock *LatchBB; + BasicBlock *ThreadExitBB; + BasicBlock *SkipExitBB; + }; + + ///} + + /// LLVM-IR Generators for OpenMP Directives + /// + ///{ + + /// Generator for '#omp parallel' (OpenMP 5.0, Section 2.6) + /// + /// \param IP The location at which the parallel region was encountered. + /// \param OutlinedFn The function containing the parallel region code. + /// \param CapturedVars The values captured by the parallel region. + /// \param IfCondition The evaluated 'if' clause expression, if any. + /// \param NumThreads The evaluated 'num_threads' clause expression, if any. + /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). + /// \param IsCancelable True if the directive is cancelable. + void emitOMPParallel(Instruction &IP, Function &OutlinedFn, + ArrayRef CapturedVars, + Value *IfCondition = nullptr, + Value *NumThreads = nullptr, + ProcBindKind ProcBind = PBK_NONE, + bool IsCancelable = false); + + /// Generator for '#omp for' (OpenMP 5.0, Section 2.9.2) + /// + /// \param IP The location at which the parallel region was encountered. + /// \param CollapsedLoops Description of the collapsed loops, minimum one. + /// \param OrderedLoops Description of the ordered loops, if any. + /// \param NoWait Flag to indicate if the 'nowait' clause was present. + /// \param Order The value of the 'order' clause (see OrderKind). + /// \param HasLastprivates True if the 'lastprivate' clause is present. + /// \param IsCancelable True if the directive is cancelable. + /// + /// \return The important CFG locations of the generated loop. + OMPIRLoopInfo + emitOMPFor(Instruction &IP, + ScheduleDescription &Schedule, + SmallVectorImpl &CollapsedLoops, + SmallVectorImpl *OrderedLoops = nullptr, + bool NoWait = false, OrderKind Order = OK_NONE, + bool HasLastprivates = false, bool IsCancelable = false); + + /// Generator for '#omp barrier' (OpenMP 5.0, Section 2.17.2) + /// + /// \param IP The location at which the parallel region was encountered. + /// \param Implicit Flag to indicate the barrier is implementation caused. + void emitBarrier(Instruction &IP, bool IsImplementationCaused = false); + + ///} + + enum OpenMPDirectiveKind { + OMPDK_for, + OMPDK_sections, + OMPDK_single, + OMPDK_barrier, + OMPDK_parallel, + OMPDK_taskloop, + OMPDK_unknown, + // TODO: Read these from a tablegen file + }; + + enum OpenMPCancelableDirectiveKind { + OMPCDK_parallel = OMPDK_taskloop, + OMPCDK_sections = OMPDK_sections, + OMPCDK_for = OMPDK_for, + OMPCDK_taskloop = OMPDK_taskloop, + OMPCDK_NUM_CANCELABLE_DIRECTIVES, + }; + + Module &getModule() { return M; } + Type *getKMPCIdentPtrTy() { return KMPCIdentPtrTy; } + Type *getKMPCMicroTaskTy() { return KMPCMicroTaskTy; } + +private: + /// TODO: Provide documention for the private members. + + Module &M; + + Type *KMPCIdentPtrTy; + Type *KMPCMicroTaskTy; + + IRBuilder<> Builder; + + /// Reuire information about cancel directions nested inside. + /// + /// TODO: We could probably create cancelable calls and retroactively remove + /// the cancel part if no cancel directive was found in the region. This + /// might be useful to avoid scanning the region multiple times. + ///{ + + /// RAII class for cancelation handling + struct CancelationScope { + CancelationScope(OpenMPIRBuilder &OMPBuilder, bool IsCancelable, + OpenMPCancelableDirectiveKind Kind); + ~CancelationScope(); + private: + OpenMPIRBuilder &OMPBuilder; + OpenMPCancelableDirectiveKind Kind; + BasicBlock *CancelBB; + }; + + SmallVector + CancelationInfoStack[OMPCDK_NUM_CANCELABLE_DIRECTIVES]; + + BasicBlock *popCancelationBlock(OpenMPCancelableDirectiveKind Kind) { + return CancelationInfoStack[Kind].pop_back_val(); + } + + void pushCancelationBlock(OpenMPCancelableDirectiveKind Kind, + BasicBlock *CancelationBlock) { + CancelationInfoStack[Kind].push_back(CancelationBlock); + } + + BasicBlock *peekCancelationBlock(OpenMPDirectiveKind Kind) { + switch (Kind) { + OMPCDK_parallel: + OMPCDK_sections: + OMPCDK_for: + OMPCDK_taskloop: + break; + default: + return nullptr; + } + if (CancelationInfoStack[Kind].empty()) + return nullptr; + return CancelationInfoStack[Kind].back(); + } + + ///} + + struct FunctionState { + Value *GlobalThreadNum = nullptr; + AllocaInst *LoopIsLastIterationPtr = nullptr; + AllocaInst *LoopLowerBoundPtr = nullptr; + AllocaInst *LoopUpperBoundPtr = nullptr; + AllocaInst *LoopStridePtr = nullptr; + }; + DenseMap FunctionStateMap; + + /// TODO: Provide documention for the private types. + + /// TODO: Provide documention for the private functions. + + Value *getOrCreateDefaultLocation(unsigned Flags); + Value *emitUpdateLocation(unsigned Flags); + Value *getThreadID(Value *Loc); + + FunctionCallee createForStaticInitFunction(LoopDescription &LD); + + Function &getCurrentFunction(); + Instruction &getInsertionPoint() { return *Builder.GetInsertPoint(); } + Instruction &getAllocaInsertionPoint() { + return *getCurrentFunction().getEntryBlock().begin(); + } + + void emitBarrierImpl(OpenMPDirectiveKind Kind, bool ForceSimpleCall = false); + + void emitOMPIfClause(Instruction &IP, Value &Condition, Instruction **ThenGen, + Instruction **ElseGen); +}; + +} // end namespace llvm + +#endif // LLVM_IR_IRBUILDER_H Index: llvm/lib/IR/BasicBlock.cpp =================================================================== --- llvm/lib/IR/BasicBlock.cpp +++ llvm/lib/IR/BasicBlock.cpp @@ -399,19 +399,20 @@ /// the new BB, and the rest of the instructions in the BB are moved to the new /// BB, including the old terminator. This invalidates the iterator. /// -/// Note that this only works on well formed basic blocks (must have a -/// terminator), and 'I' must not be the end of instruction list (which would -/// cause a degenerate basic block to be formed, having a terminator inside of -/// the basic block). +/// Note that 'I' must not be the end of instruction list (which would cause a +/// degenerate basic block to be formed, having a terminator inside of the basic +/// block). /// BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) { - assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!"); assert(I != InstList.end() && "Trying to get me to create degenerate basic block!"); BasicBlock *New = BasicBlock::Create(getContext(), BBName, getParent(), this->getNextNode()); + // Rewrite success PHIs early before we move the instructions. + replaceSuccessorsPhiUsesWith(New); + // Save DebugLoc of split point before invalidating iterator. DebugLoc Loc = I->getDebugLoc(); // Move all of the specified instructions from the original basic block into @@ -422,23 +423,6 @@ BranchInst *BI = BranchInst::Create(New, this); BI->setDebugLoc(Loc); - // Now we must loop through all of the successors of the New block (which - // _were_ the successors of the 'this' block), and update any PHI nodes in - // successors. If there were PHI nodes in the successors, then they need to - // know that incoming branches will be from New, not from Old. - // - for (succ_iterator I = succ_begin(New), E = succ_end(New); I != E; ++I) { - // Loop over any phi nodes in the basic block, updating the BB field of - // incoming values... - BasicBlock *Successor = *I; - for (auto &PN : Successor->phis()) { - int Idx = PN.getBasicBlockIndex(this); - while (Idx != -1) { - PN.setIncomingBlock((unsigned)Idx, New); - Idx = PN.getBasicBlockIndex(this); - } - } - } return New; } Index: llvm/lib/Transforms/Utils/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Utils/CMakeLists.txt +++ llvm/lib/Transforms/Utils/CMakeLists.txt @@ -42,6 +42,7 @@ MetaRenamer.cpp ModuleUtils.cpp NameAnonGlobals.cpp + OpenMPIRBuilder.cpp PredicateInfo.cpp PromoteMemoryToRegister.cpp StripGCRelocates.cpp Index: llvm/lib/Transforms/Utils/OpenMPIRBuilder.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Utils/OpenMPIRBuilder.cpp @@ -0,0 +1,661 @@ +//===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the OpenMPIRBuilder class, which is used as a convenient +// way to create LLVM instructions for OpenMP directives. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/OpenMPIRBuilder.h" + +#include "llvm/IR/MDBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +namespace { + +/// Create a new runtime function with the specified type and name. +FunctionCallee +getOrCreateRuntimeFunction(Module &M, FunctionType *FTy, + StringRef Name, + AttributeList ExtraAttrs = AttributeList()) { + Function *Fn = M.getFunction(Name); + if (!Fn) { + Fn = Function::Create(FTy, GlobalValue::ExternalLinkage, Name, M); + Fn->setAttributes(ExtraAttrs); + + // TODO determine calling convention + // Fn->setCallingConv(getRuntimeCC()); + + Fn->setDSOLocal(true); + } + + return FunctionCallee(FTy, Fn); +} + +enum OpenMPRTLFunction { + /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, + /// kmpc_micro microtask, ...); + OMPRTL__kmpc_fork_call, + /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL__kmpc_serialized_parallel, + /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL__kmpc_end_serialized_parallel, + /// Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + /// kmp_int32 num_threads); + OMPRTL__kmpc_push_num_threads, + /// Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, + /// int proc_bind); + OMPRTL__kmpc_push_proc_bind, + /// Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); + OMPRTL__kmpc_global_thread_num, + /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_barrier, + /// Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL__kmpc_cancel_barrier, + /// Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_for_static_fini, +}; + +FunctionCallee getOrCreateRuntimeFunction(OpenMPIRBuilder &OMPBuilder, + OpenMPRTLFunction FnID) { + Module &M = OMPBuilder.getModule(); + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *KMPCIdentPtrTy = OMPBuilder.getKMPCIdentPtrTy(); + + FunctionCallee RTLFn = nullptr; + switch (FnID) { + case OMPRTL__kmpc_fork_call: { + // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro + // microtask, ...); + Type *KMPCMicroTaskTy = OMPBuilder.getKMPCMicroTaskTy(); + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty, KMPCMicroTaskTy}; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ true); + RTLFn = getOrCreateRuntimeFunction(M, FnTy, "__kmpc_fork_call"); + if (auto *F = dyn_cast(RTLFn.getCallee())) { + if (!F->hasMetadata(LLVMContext::MD_callback)) { + MDBuilder MDB(Ctx); + // Annotate the callback behavior of the __kmpc_fork_call: + // - The callback callee is argument number 2 (microtask). + // - The first two arguments of the callback callee are unknown (-1). + // - All variadic arguments to the __kmpc_fork_call are passed to the + // callback callee. + F->addMetadata(LLVMContext::MD_callback, + *MDNode::get(Ctx, {MDB.createCallbackEncoding( + 2, {-1, -1}, + /* isVarArg */ true)})); + } + } + break; + } + case OMPRTL__kmpc_serialized_parallel: { + // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty}; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = getOrCreateRuntimeFunction(M, FnTy, + "__kmpc_serialized_parallel"); + break; + } + case OMPRTL__kmpc_end_serialized_parallel: { + // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty}; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = getOrCreateRuntimeFunction(M, FnTy, + "__kmpc_end_serialized_parallel"); + break; + } + case OMPRTL__kmpc_push_num_threads: { + // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_threads) + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty, Int32Ty}; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = + getOrCreateRuntimeFunction(M, FnTy, "__kmpc_push_num_threads"); + break; + } + case OMPRTL__kmpc_push_proc_bind: { + // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, + // int proc_bind) + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty, Int32Ty}; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = + getOrCreateRuntimeFunction(M, FnTy, "__kmpc_push_proc_bind"); + break; + } + case OMPRTL__kmpc_global_thread_num: { + // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); + Type *TypeParams[] = {KMPCIdentPtrTy}; + auto *FnTy = FunctionType::get(Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = getOrCreateRuntimeFunction(M, FnTy, + "__kmpc_global_thread_num"); + break; + } + case OMPRTL__kmpc_barrier: { + // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty}; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = + getOrCreateRuntimeFunction(M, FnTy, /*Name*/ "__kmpc_barrier"); + break; + } + case OMPRTL__kmpc_cancel_barrier: { + // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 + // global_tid); + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty}; + auto *FnTy = FunctionType::get(Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = getOrCreateRuntimeFunction(M, FnTy, + /*Name*/ "__kmpc_cancel_barrier"); + break; + } + case OMPRTL__kmpc_for_static_fini: { + // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty}; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = + getOrCreateRuntimeFunction(M, FnTy, "__kmpc_for_static_fini"); + break; + } + } + assert(RTLFn && "Unable to find OpenMP runtime function"); + return RTLFn; +} + +/// Values for bit flags used in the ident_t to describe the fields. +/// All enumeric elements are named and described in accordance with the code +/// from +/// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h +enum OpenMPLocationFlags : unsigned { + /// Use trampoline for internal microtask. + OMP_IDENT_IMD = 0x01, + /// Use c-style ident structure. + OMP_IDENT_KMPC = 0x02, + /// Atomic reduction option for kmpc_reduce. + OMP_ATOMIC_REDUCE = 0x10, + /// Explicit 'barrier' directive. + OMP_IDENT_BARRIER_EXPL = 0x20, + /// Implicit barrier in code. + OMP_IDENT_BARRIER_IMPL = 0x40, + /// Implicit barrier in 'for' directive. + OMP_IDENT_BARRIER_IMPL_FOR = 0x40, + /// Implicit barrier in 'sections' directive. + OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, + /// Implicit barrier in 'single' directive. + OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, + /// Call of __kmp_for_static_init for static loop. + OMP_IDENT_WORK_LOOP = 0x200, + /// Call of __kmp_for_static_init for sections. + OMP_IDENT_WORK_SECTIONS = 0x400, + /// Call of __kmp_for_static_init for distribute. + OMP_IDENT_WORK_DISTRIBUTE = 0x800, +}; + +unsigned getDefaultFlagsForBarriers(OpenMPIRBuilder::OpenMPDirectiveKind Kind) { + switch (Kind) { + case OpenMPIRBuilder::OMPDK_for: + return OMP_IDENT_BARRIER_IMPL_FOR; + case OpenMPIRBuilder::OMPDK_sections: + return OMP_IDENT_BARRIER_IMPL_SECTIONS; + case OpenMPIRBuilder::OMPDK_single: + return OMP_IDENT_BARRIER_IMPL_SINGLE; + case OpenMPIRBuilder::OMPDK_barrier: + return OMP_IDENT_BARRIER_EXPL; + default: + return OMP_IDENT_BARRIER_IMPL; + } +} + + +} // end anonymous namespace + +OpenMPIRBuilder::CancelationScope::CancelationScope( + OpenMPIRBuilder &OMPBuilder, bool IsCancelable, + OpenMPCancelableDirectiveKind Kind) + : OMPBuilder(OMPBuilder), Kind(Kind), CancelBB(nullptr) { + if (!IsCancelable) + return; + + Function &F = OMPBuilder.getCurrentFunction(); + LLVMContext &Ctx = F.getContext(); + CancelBB = BasicBlock::Create(Ctx, "omp_canceled", &F, &*F.end()); + OMPBuilder.pushCancelationBlock(Kind, CancelBB); + + Instruction &IP = OMPBuilder.getInsertionPoint(); + SplitBlock(IP.getParent(), &IP); + + BranchInst::Create(IP.getParent(), CancelBB); +} + +OpenMPIRBuilder::CancelationScope::~CancelationScope() { + if (!CancelBB) + return; + BasicBlock *RemovedCancelBB = OMPBuilder.popCancelationBlock(Kind); + (void)RemovedCancelBB; + assert(RemovedCancelBB == CancelBB && + "Cancelation scopes not perfectly nested!"); +} + +OpenMPIRBuilder::OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) { + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *Int8PtrTy = Type::getInt8PtrTy(Ctx); + StructType *KMPCIdentTy = M.getTypeByName("ident_t"); + if (!KMPCIdentTy) + KMPCIdentTy = StructType::create(Ctx, {Int32Ty, Int32Ty, Int32Ty, Int32Ty, Int8PtrTy}, "ident_t"); + KMPCIdentPtrTy = PointerType::getUnqual(KMPCIdentTy); + + // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) + Type *MicroParams[] = {PointerType::getUnqual(Int32Ty), + PointerType::getUnqual(Int32Ty)}; + KMPCMicroTaskTy = + PointerType::getUnqual(FunctionType::get(VoidTy, MicroParams, true)); +} + +Value *OpenMPIRBuilder::getOrCreateDefaultLocation(unsigned Flags) { + static const char *OMP_DEFAULT_IDENT = "omp_default_ident"; + + Value *DefaultIdent = M.getGlobalVariable(OMP_DEFAULT_IDENT); + if (!DefaultIdent) { + LLVMContext &Ctx = M.getContext(); + Constant *SourceLocString = + Builder.CreateGlobalStringPtr("", "omp_default_loc"); + Constant *IdentData[] = {ConstantInt::getNullValue(Type::getInt32Ty(Ctx)), + ConstantInt::get(Type::getInt32Ty(Ctx), Flags), + ConstantInt::get(Type::getInt32Ty(Ctx), 0), + ConstantInt::getNullValue(Type::getInt32Ty(Ctx)), + SourceLocString}; + + DefaultIdent = new GlobalVariable(M, + KMPCIdentPtrTy->getPointerElementType(), /* isConstant */ true, GlobalValue::WeakAnyLinkage, + ConstantStruct::get( + cast(KMPCIdentPtrTy->getPointerElementType()), + IdentData), + OMP_DEFAULT_IDENT); + } + return DefaultIdent; +} + +Value *OpenMPIRBuilder::emitUpdateLocation(unsigned Flags) { + Flags |= OMP_IDENT_KMPC; + return getOrCreateDefaultLocation(Flags); +} + +Value *OpenMPIRBuilder::getThreadID(Value *Loc) { + Function &F = getCurrentFunction(); + FunctionState &FState = FunctionStateMap[&F]; + if (!FState.GlobalThreadNum) { + auto Fn = getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_global_thread_num); + Fn.getCallee()->dump(); + CallInst *CI = Builder.CreateCall( + Fn, Loc, "omp_global_thread_num"); + CI->moveBefore(&F.getEntryBlock().back()); + FState.GlobalThreadNum = CI; + } + return FState.GlobalThreadNum; +} + +void OpenMPIRBuilder::emitOMPIfClause(Instruction &IP, Value &Condition, + Instruction **ThenIP, + Instruction **ElseIP) { + SplitBlockAndInsertIfThenElse(&Condition, &IP, ThenIP, ElseIP); +} + +FunctionCallee +OpenMPIRBuilder::createForStaticInitFunction(LoopDescription &LD) { + assert((LD.InductionVariableSize == 32 || LD.InductionVariableSize == 64) && + "IV size is not compatible with the omp runtime"); + StringRef Name = + LD.InductionVariableSize == 32 + ? (LD.IsSignedInductionVariable ? "__kmpc_for_static_init_4" + : "__kmpc_for_static_init_4u") + : (LD.IsSignedInductionVariable ? "__kmpc_for_static_init_8" + : "__kmpc_for_static_init_8u"); + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *Int64Ty = Type::getInt64Ty(Ctx); + + Type *ITy = Builder.getIntNTy(LD.InductionVariableSize); + auto *PtrTy = PointerType::getUnqual(ITy); + Type *TypeParams[] = { + KMPCIdentPtrTy, // loc + Int32Ty, // tid + Int32Ty, // schedtype + PointerType::getUnqual(Int32Ty), // p_lastiter + PtrTy, // p_lower + PtrTy, // p_upper + PtrTy, // p_stride + ITy, // incr + ITy // chunk + }; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ false); + return getOrCreateRuntimeFunction(getModule(), FnTy, Name); +} + +Function &OpenMPIRBuilder::getCurrentFunction() { + assert(Builder.GetInsertBlock() && "No insertion point set!"); + return *Builder.GetInsertBlock()->getParent(); +} + +void OpenMPIRBuilder::emitOMPParallel(Instruction &IP, Function &OutlinedFn, + ArrayRef CapturedVars, + Value *IfCondition, Value *NumThreads, + ProcBindKind ProcBind, + bool IsCancelable) { + // Set new insertion point for the internal builder. + Builder.SetInsertPoint(&IP); + + CancelationScope(*this, IsCancelable, OMPCDK_parallel); + + LLVMContext &Ctx = M.getContext(); + + Value *Loc = emitUpdateLocation(0); + Value *ThreadID = getThreadID(Loc); + + Type *Int32Ty = Type::getInt32Ty(Ctx); + + if (NumThreads) { + // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) + Value *Args[] = { + Loc, ThreadID, + Builder.CreateIntCast(NumThreads, Int32Ty, /*isSigned*/ false)}; + Builder.CreateCall( + getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_push_num_threads), Args); + } + + if (ProcBind != PBK_NONE) { + // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) + Value *Args[] = {Loc, ThreadID, + ConstantInt::get(Int32Ty, ProcBind, /*isSigned=*/true)}; + Builder.CreateCall( + getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_push_proc_bind), Args); + } + + Instruction *ThenIP = &IP, *ElseIP = nullptr; + if (IfCondition) + emitOMPIfClause(IP, *IfCondition, &ThenIP, &ElseIP); + + // Generate new instructions in the "then" insertion point, which is set to + // the correct value in the presence and absence of an "if" clause. + Builder.SetInsertPoint(ThenIP); + + // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); + Value *ForkCallArgs[] = {Loc, Builder.getInt32(CapturedVars.size()), + Builder.CreateBitCast(&OutlinedFn, KMPCMicroTaskTy)}; + + SmallVector RealArgs; + RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); + RealArgs.append(CapturedVars.begin(), CapturedVars.end()); + + FunctionCallee RTLFn = + getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_fork_call); + Builder.CreateCall(RTLFn, RealArgs); + + // If no "if" clause was present we are done. + if (!ElseIP) + return; + + // If an "if" clause was present we are now generating the serialized + // version into the "else" branch. + Builder.SetInsertPoint(ElseIP); + + // Build calls: + // __kmpc_serialized_parallel(&Loc, GTid); + Value *SerializedParallelCallArgs[] = {Loc, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_serialized_parallel), + SerializedParallelCallArgs); + + // OutlinedFn(>id, &zero, CapturedStruct); + AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32Ty, nullptr, "zero.addr"); + Builder.CreateStore(Constant::getNullValue(Int32Ty), ZeroAddr); + + SmallVector OutlinedFnArgs; + // ThreadId for serialized parallels is 0. + OutlinedFnArgs.push_back(ZeroAddr); + OutlinedFnArgs.push_back(ZeroAddr); + OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); + Builder.CreateCall(&OutlinedFn, OutlinedFnArgs); + + // __kmpc_end_serialized_parallel(&Loc, GTid); + Value *EndArgs[] = {Loc, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_end_serialized_parallel), + EndArgs); +} + +OpenMPIRBuilder::OMPIRLoopInfo OpenMPIRBuilder::emitOMPFor( + Instruction &IP, + ScheduleDescription &Schedule, + SmallVectorImpl &CollapsedLoops, + SmallVectorImpl *OrderedLoops, bool NoWait, + OrderKind Order, bool HasLastprivates, bool IsCancelable) { + + // TODO: Handle OrderedLoops + assert(!OrderedLoops && "'ordered' clause not handled yet"); + // TODO: Handle collapsed loops + assert(CollapsedLoops.size() == 1 && + "multiple 'collapsed' loops not handled yet"); + LoopDescription &Loop = CollapsedLoops[0]; + + // Set new insertion point for the internal builder. + Builder.SetInsertPoint(&IP); + CancelationScope(*this, IsCancelable, OMPCDK_for); + + Function &F = getCurrentFunction(); + + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *LoopIVTy = Builder.getIntNTy(Loop.InductionVariableSize); + + /// Initialize the function state and helper values, e.g., thread id + ///{ + FunctionState &FState = FunctionStateMap[&F]; + if (!FState.LoopIsLastIterationPtr) { + assert(!FState.LoopUpperBoundPtr && !FState.LoopUpperBoundPtr && + !FState.LoopStridePtr && "Inconsistent function state!"); + Instruction &AllocaIP = getAllocaInsertionPoint(); + FState.LoopIsLastIterationPtr = + new AllocaInst(Int32Ty, 0, "omp_loop_last_iter", &AllocaIP); + FState.LoopUpperBoundPtr = + new AllocaInst(LoopIVTy, 0, "omp_loop_upper_bound", &AllocaIP); + FState.LoopLowerBoundPtr = + new AllocaInst(LoopIVTy, 0, "omp_loop_lower_bound", &AllocaIP); + FState.LoopStridePtr = + new AllocaInst(LoopIVTy, 0, "omp_loop_stride", &AllocaIP); + } + assert(FState.LoopIsLastIterationPtr && FState.LoopUpperBoundPtr && + FState.LoopUpperBoundPtr && FState.LoopStridePtr && + "Inconsistent function state!"); + + Value *Loc = emitUpdateLocation(0); + Value *ThreadID = getThreadID(Loc); + + Value *Zero = ConstantInt::get(LoopIVTy, 0); + Value *One = ConstantInt::get(LoopIVTy, 1); + Value *MinusOne = ConstantInt::get(LoopIVTy, -1); + ///} + + /// Create all blocks we will need + ///{ + OMPIRLoopInfo IRLoop; + IRLoop.SkipCheckBB = IP.getParent(); + IRLoop.ThreadCheckBB = SplitBlock(IP.getParent(), &IP); + IRLoop.ThreadCheckBB->setName("omp_for_thread_check"); + IRLoop.HeaderBB = SplitBlock(IP.getParent(), &IP); + IRLoop.HeaderBB->setName("omp_for_header"); + IRLoop.BodyBB = SplitBlock(IP.getParent(), &IP); + IRLoop.BodyBB->setName("omp_for_body"); + IRLoop.LatchBB = SplitBlock(IP.getParent(), &IP); + IRLoop.LatchBB->setName("omp_for_latch"); + IRLoop.ThreadExitBB = SplitBlock(IP.getParent(), &IP); + IRLoop.ThreadExitBB->setName("omp_for_thread_exit"); + IRLoop.SkipExitBB = SplitBlock(IP.getParent(), &IP); + IRLoop.SkipExitBB->setName("omp_for_skip_exit"); + assert(&IP == &getInsertionPoint() && "Unexpected builder state!"); + ///} + + /// Fill the skip check block + ///{ + Builder.SetInsertPoint(IRLoop.SkipCheckBB->getTerminator()); + Value *LowerBoundVal = Loop.LowerBoundGen(getInsertionPoint()); + if (auto *LBInst = cast(LowerBoundVal)) + LBInst->setName("omp_loop_lower_bound_initial"); + Value *UpperBoundVal = Loop.UpperBoundGen(getInsertionPoint()); + if (auto *UBInst = cast(UpperBoundVal)) + UBInst->setName("omp_loop_upper_bound_initial"); + Value *SkipCond = Builder.CreateICmp(Loop.ComparisonPredidcate, LowerBoundVal, + UpperBoundVal, "omp_for_skip_cond"); + SplitBlockAndInsertIfThen(SkipCond, &getInsertionPoint(), false, nullptr, + nullptr, nullptr, IRLoop.SkipExitBB); + ///} + + /// Fill the thread check block + ///{ + assert(IRLoop.ThreadCheckBB->size() == 1); + Builder.SetInsertPoint(IRLoop.ThreadCheckBB->getTerminator()); + Value *StrideVal = Loop.StrideGen(getInsertionPoint()); + if (auto *StrideInst = cast(StrideVal)) + StrideInst->setName("omp_loop_stride_initial"); + + //Value *Xor = Builder.CreateXor(LowerBoundVal, MinusOne); + //Value *Add0 = Builder.CreateAdd(Xor, UpperBoundVal); + //Value *Add1 = Builder.CreateNSWAdd(Add0, StrideVal); + //Value *Div = Builder.CreateSDiv(Add1, StrideVal); + //Value *LoopAdjustedUB = Builder.CreateNSWAdd(Div, MinusOne); + Value *LoopAdjustedUB = UpperBoundVal; + + Builder.CreateStore(Zero, FState.LoopLowerBoundPtr); + Builder.CreateStore(LoopAdjustedUB, FState.LoopUpperBoundPtr); + Builder.CreateStore(One, FState.LoopStridePtr); + Builder.CreateStore(Zero, FState.LoopIsLastIterationPtr); + + FunctionCallee StaticInitFunction = createForStaticInitFunction(Loop); + + Value *Args[] = { + Loc, + ThreadID, + Builder.getInt32(/* static schedule */ 34), // Schedule type + FState.LoopIsLastIterationPtr, // &isLastIter + FState.LoopLowerBoundPtr, // &LB + FState.LoopUpperBoundPtr, // &UB + FState.LoopStridePtr, // &Stride + One, // Incr + One, // Chunk + }; + Builder.CreateCall(StaticInitFunction, Args); + + Value *ThreadUB = Builder.CreateLoad(LoopIVTy, FState.LoopUpperBoundPtr, + "omp_for_thread_ub_val"); + Value *ThreadAdjustUBCmp = Builder.CreateICmpSLE(LoopAdjustedUB, ThreadUB, "omp_for_thread_ub_val_cmp"); + Value *ThreadAdjustedUB = Builder.CreateSelect(ThreadAdjustUBCmp, LoopAdjustedUB, ThreadUB, "omp_for_thread_ub_val_capped"); + Value *ThreadLB = Builder.CreateLoad(LoopIVTy, FState.LoopLowerBoundPtr, + "omp_for_thread_lb_val"); + Value *ThreadCond = + Builder.CreateICmpSGT(ThreadLB, ThreadAdjustedUB, "omp_for_thread_cond"); + SplitBlockAndInsertIfThen(ThreadCond, &getInsertionPoint(), false, nullptr, + nullptr, nullptr, IRLoop.ThreadExitBB); + BasicBlock *LoopPreHeader = getInsertionPoint().getParent(); + ///} + + /// Fill the header block + ///{ + assert(IRLoop.HeaderBB->size() == 1); + Builder.SetInsertPoint(&IRLoop.HeaderBB->front()); + PHINode *IVPHI = Builder.CreatePHI(LoopIVTy, 2, "omp_for_loop_iv"); + IVPHI->addIncoming(ThreadLB, LoopPreHeader); + Value *LoopCond = + Builder.CreateICmpSLE(IVPHI, ThreadAdjustedUB, "omp_for_loop_cond"); + Builder.CreateCondBr(LoopCond, IRLoop.BodyBB, IRLoop.ThreadExitBB); + Builder.ClearInsertionPoint(); + IRLoop.HeaderBB->getTerminator()->eraseFromParent(); + ///} + + /// Fill the body block + ///{ + assert(IRLoop.BodyBB->size() == 1); + Builder.SetInsertPoint(&IRLoop.BodyBB->front()); + // TODO: Handle multiple loops (ordered/collapsed) + //Value *IVStrideAdj = + //Builder.CreateNSWMul(IVPHI, StrideVal, "omp_for_loop_iv_stride_adj"); + //Value *IVOffsetAdj = Builder.CreateNSWAdd(IVStrideAdj, LowerBoundVal, + //"omp_for_loop_iv_offset_adj"); + Builder.CreateStore(IVPHI, Loop.IVPtr); + assert(IRLoop.BodyBB->getTerminator()->getSuccessor(0) == IRLoop.LatchBB); + ///} + + /// Fill the latch block + ///{ + assert(IRLoop.LatchBB->size() == 1); + Builder.SetInsertPoint(&IRLoop.LatchBB->front()); + IRLoop.LatchBB->getTerminator()->setSuccessor(0, IRLoop.HeaderBB); + Value *IVPHIInc = Builder.CreateNSWAdd(IVPHI, One); + IVPHI->addIncoming(IVPHIInc, IRLoop.LatchBB); + ///} + + /// Fill the thread exit block + ///{ + assert(IRLoop.ThreadExitBB->size() == 1); + Builder.SetInsertPoint(&IRLoop.ThreadExitBB->front()); + Builder.CreateCall( + getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_for_static_fini), + {Loc, ThreadID}); + assert(IRLoop.ThreadExitBB->getTerminator()->getSuccessor(0) == + IRLoop.SkipExitBB); + ///} + + /// Fill the skip exit block + ///{ + assert(IRLoop.SkipExitBB->size() == 1); + Builder.SetInsertPoint(&IRLoop.SkipExitBB->front()); + // Emit an implicit barrier at the end. + if (!NoWait || HasLastprivates) + emitBarrierImpl(OMPDK_for); + ///} + + return IRLoop; +} + +void OpenMPIRBuilder::emitBarrier(Instruction &IP, + bool IsImplementationCaused) { + // Set new insertion point for the internal builder. + Builder.SetInsertPoint(&IP); + return emitBarrierImpl(IsImplementationCaused ? OMPDK_unknown + : OMPDK_barrier); +} + +void OpenMPIRBuilder::emitBarrierImpl(OpenMPDirectiveKind Kind, + bool ForceSimpleCall) { + + // Build call __kmpc_cancel_barrier(loc, thread_id) or + // __kmpc_barrier(loc, thread_id); + unsigned Flags = getDefaultFlagsForBarriers(Kind); + Value *Loc = emitUpdateLocation(Flags); + Value *Args[] = {Loc, getThreadID(Loc)}; + BasicBlock *CancelationBlock = peekCancelationBlock(Kind); + if (ForceSimpleCall || !CancelationBlock) { + Builder.CreateCall(getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_barrier), + Args); + return; + } + + Value *Result = Builder.CreateCall( + getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_cancel_barrier), Args); + Value *Cmp = Builder.CreateIsNotNull(Result); + SplitBlockAndInsertIfThen(Cmp, &*Builder.GetInsertPoint(), false, nullptr, + nullptr, nullptr, CancelationBlock); +}