Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -2507,9 +2507,11 @@ CapLVal.getTBAAInfo()); } - assert(isa(CurCodeDecl)); - Address addr = GetAddrOfBlockDecl(VD); - return MakeAddrLValue(addr, T, AlignmentSource::Decl); + // FIXME: this is not the right way to make this work not capturing might be. + if (isa(CurCodeDecl)) { + Address addr = GetAddrOfBlockDecl(VD); + return MakeAddrLValue(addr, T, AlignmentSource::Decl); + } } } Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -18,6 +18,8 @@ #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/DeclOpenMP.h" +#include "llvm/Transforms/Utils/OpenMPIRBuilder.h" + using namespace clang; using namespace CodeGen; @@ -1282,30 +1284,88 @@ llvm::SmallVectorImpl &) {} void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { - // Emit parallel region as a standalone region. - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { - Action.Enter(CGF); - OMPPrivateScope PrivateScope(CGF); - bool Copyins = CGF.EmitOMPCopyinClause(S); - (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); - if (Copyins) { - // Emit implicit barrier to synchronize threads and avoid data races on - // propagation master's thread values of threadprivate variables to local - // instances of that variables of all other implicit threads. - CGF.CGM.getOpenMPRuntime().emitBarrierCall( - CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); + llvm::OpenMPIRBuilder OMPBuilder(CGM.getModule()); + + const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); + CS->dump(); + const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); + + // The callback for generation of the parllel region body. + std::function + BodyGen = [&](llvm::BasicBlock *AllocaBB, llvm::BasicBlock *InsertBB, + llvm::BasicBlock *ExitBB) { + auto OldAllocaIP = AllocaInsertPt; + AllocaInsertPt = AllocaBB->getFirstNonPHI(); + Builder.SetInsertPoint(InsertBB); + EmitStmt(ParallelRegionBodyStmt); + AllocaInsertPt = OldAllocaIP; + EmitBranch(ExitBB); + }; + + // Collect firstprivate variables. TODO: do it for shared, private, lastprivate + llvm::SmallVector + ExplicitlyCapturedVariables; + llvm::DenseSet Captured; + for (const auto *C : S.getClausesOfKind()) { + auto IRef = C->varlist_begin(); + auto IElemInitRef = C->inits().begin(); + for (const Expr *IInit : C->private_copies()) { + const auto *OrigVD = cast(cast(*IRef)->getDecl()); + if (!Captured.insert(OrigVD->getCanonicalDecl()).second) + continue; + const auto *VD = cast(cast(IInit)->getDecl()); + if (!LocalDeclMap.count(OrigVD)) + EmitAutoVarAlloca(*OrigVD); + if (!LocalDeclMap.count(VD)) + EmitAutoVarAlloca(*VD); + auto Addr = GetAddrOfLocalVar(OrigVD); + ExplicitlyCapturedVariables.push_back( + llvm::OpenMPIRBuilder::DataSharingInfo{ + Addr.getPointer(), llvm::OpenMPConstants::DSK_FIRSTPRIVATE}); + ++IRef; + ++IElemInitRef; } - CGF.EmitOMPPrivateClause(S, PrivateScope); - CGF.EmitOMPReductionClauseInit(S, PrivateScope); - (void)PrivateScope.Privatize(); - CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); - CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); - }; - emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, - emitEmptyBoundParameters); - emitPostUpdateForReductionClause(*this, S, - [](CodeGenFunction &) { return nullptr; }); + } + + // Check if we have any if clause associated with the directive. + llvm::Value *IfCond = nullptr; + if (const auto *C = S.getSingleClause()) + IfCond = EmitScalarExpr(C->getCondition(), + /*IgnoreResultAssign=*/true); + + + llvm::Value *NumThreads = nullptr; + if (const auto *NumThreadsClause = S.getSingleClause()) + NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), + /*IgnoreResultAssign=*/true); + + llvm::OpenMPConstants::DataSharingKind DefaultSharingKind = + llvm::OpenMPConstants::DSK_SHARED; + // TODO: Store and lookup default sharing kind. + + llvm::OpenMPConstants::ProcBindKind ProcBind = llvm::OpenMPConstants::PBK_NONE; + if (const auto *ProcBindClause = S.getSingleClause()) { + switch(ProcBindClause->getProcBindKind()) { + case OMPC_PROC_BIND_master: + ProcBind = llvm::OpenMPConstants::PBK_MASTER; + break; + case OMPC_PROC_BIND_close: + ProcBind = llvm::OpenMPConstants::PBK_CLOSE; + break; + case OMPC_PROC_BIND_spread: + ProcBind = llvm::OpenMPConstants::PBK_SPREAD; + break; + case OMPC_PROC_BIND_unknown: + /* Error? */ + break; + } + } + + llvm::BasicBlock *NewInsertBB = OMPBuilder.emitOMPParallel( + Builder.GetInsertBlock(), BodyGen, IfCond, NumThreads, DefaultSharingKind, + ExplicitlyCapturedVariables, ProcBind); + Builder.SetInsertPoint(NewInsertBB); } void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, Index: llvm/include/llvm/Transforms/Utils/OpenMPIRBuilder.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Utils/OpenMPIRBuilder.h @@ -0,0 +1,143 @@ +//===- llvm/IRBuilder.h - Builder for LLVM Instructions ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the OpenMPIRBuilder class, which is used as a convenient +// way to create LLVM instructions for OpenMP directives. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OPENMP_IR_IRBUILDER_H +#define LLVM_OPENMP_IR_IRBUILDER_H + +#include "llvm/IR/IRBuilder.h" + +namespace llvm { + +/// Constant definitions for fixed OpenMP values +namespace OpenMPConstants { + + /// Eligable values for the 'proc_bind' clause (OpenMP 5.0, Section 2.6) + enum ProcBindKind { + PBK_MASTER = 2, ///< proc_bind(master) + PBK_CLOSE = 3, ///< proc_bind(close) + PBK_SPREAD = 4, ///< proc_bind(spread) + PBK_NONE = 6, ///< No 'proc_bind' clause present + }; + + // TODO + enum DataSharingKind { + DSK_NONE, + DSK_SHARED, + DSK_PRIVATE, + DSK_LASTPRIVATE, + DSK_FIRSTPRIVATE, + /* TODO copyin */ + }; + + // TODO: + enum OpenMPDirectiveKind { + OMPDK_for, + OMPDK_sections, + OMPDK_single, + OMPDK_barrier, + OMPDK_parallel, + OMPDK_taskloop, + OMPDK_unknown, + // TODO: Read these from a tablegen file + }; + +}; + +/// An interface to create LLVM-IR for OpenMP directives. +/// +/// Each OpenMP directive has a corresponding public generator method. +struct OpenMPIRBuilder { + + /// Create a new OpenMPIRBuilder operating on the given module \p M. + OpenMPIRBuilder(Module &M); + + /// Encoding of a data-sharing clause + struct DataSharingInfo { + /// The location currently used by the variable. It should have type T* if + /// the privatized variable has type T. + Value *Location; + + /// The data-sharing kind, e.g., shared, private, etc. + enum OpenMPConstants::DataSharingKind Kind; + }; + + /// LLVM-IR Generators for OpenMP Directives + /// + ///{ + + /// Generator for '#omp parallel' (OpenMP 5.0, Section 2.6) + /// + /// \param InsertBB The location at which the parallel region was encountered. + /// \param BodyGen Callback that will generate the region code. + /// \param IfCondition The evaluated 'if' clause expression, if any. + /// \param NumThreads The evaluated 'num_threads' clause expression, if any. + /// \param DefaultSharingKind The default data-sharing kind. + /// \param DataSharingInformation Array describing captured variables. + /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). + /// + /// TODO: copyin, reduction, allocate not implemented + /// + /// \returns The location at which code generation continues. + BasicBlock *emitOMPParallel( + BasicBlock *InsertBB, + std::function &BodyGen, + Value *IfCondition, Value *NumThreads, + OpenMPConstants::DataSharingKind DefaultSharingKind, + SmallVectorImpl &DataSharingInformation, + OpenMPConstants::ProcBindKind ProcBind); + + /// Generator for '#omp barrier' (OpenMP 5.0, Section 2.17.2) + /// + /// \param IP The location at which the parallel region was encountered. + /// \param IsImplementationCaused Flag to indicate implicit barriers. + void emitOMPBarrier(Instruction &IP, bool IsImplementationCaused = false); + + ///} + + Module &getModule() { return M; } + Type *getKMPCIdentPtrTy() { return KMPCIdentPtrTy; } + Type *getKMPCMicroTaskTy() { return KMPCMicroTaskTy; } + +private: + /// TODO: Provide documention for the private members. + + Module &M; + + Type *KMPCIdentPtrTy; + Type *KMPCMicroTaskTy; + + IRBuilder<> Builder; + + /// TODO: Provide documention for the private types. + + /// TODO: Provide documention for the private functions. + + Value *getOrCreateDefaultLocation(unsigned Flags); + Value *emitUpdateLocation(unsigned Flags); + Value *getThreadID(Value *Loc); + + Function &getCurrentFunction(); + Instruction &getInsertionPoint() { return *Builder.GetInsertPoint(); } + + void emitBarrierImpl(OpenMPConstants::OpenMPDirectiveKind Kind, + bool ForceSimpleCall = false); + + void emitOMPIfClause(Instruction &IP, Value *Condition, Instruction **ThenGen, + Instruction **ElseGen); +}; + +} // end namespace llvm + +#endif // LLVM_IR_IRBUILDER_H Index: llvm/lib/IR/BasicBlock.cpp =================================================================== --- llvm/lib/IR/BasicBlock.cpp +++ llvm/lib/IR/BasicBlock.cpp @@ -399,19 +399,20 @@ /// the new BB, and the rest of the instructions in the BB are moved to the new /// BB, including the old terminator. This invalidates the iterator. /// -/// Note that this only works on well formed basic blocks (must have a -/// terminator), and 'I' must not be the end of instruction list (which would -/// cause a degenerate basic block to be formed, having a terminator inside of -/// the basic block). +/// Note that 'I' must not be the end of instruction list (which would cause a +/// degenerate basic block to be formed, having a terminator inside of the basic +/// block). /// BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) { - assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!"); assert(I != InstList.end() && "Trying to get me to create degenerate basic block!"); BasicBlock *New = BasicBlock::Create(getContext(), BBName, getParent(), this->getNextNode()); + // Rewrite success PHIs early before we move the instructions. + replaceSuccessorsPhiUsesWith(New); + // Save DebugLoc of split point before invalidating iterator. DebugLoc Loc = I->getDebugLoc(); // Move all of the specified instructions from the original basic block into @@ -422,23 +423,6 @@ BranchInst *BI = BranchInst::Create(New, this); BI->setDebugLoc(Loc); - // Now we must loop through all of the successors of the New block (which - // _were_ the successors of the 'this' block), and update any PHI nodes in - // successors. If there were PHI nodes in the successors, then they need to - // know that incoming branches will be from New, not from Old. - // - for (succ_iterator I = succ_begin(New), E = succ_end(New); I != E; ++I) { - // Loop over any phi nodes in the basic block, updating the BB field of - // incoming values... - BasicBlock *Successor = *I; - for (auto &PN : Successor->phis()) { - int Idx = PN.getBasicBlockIndex(this); - while (Idx != -1) { - PN.setIncomingBlock((unsigned)Idx, New); - Idx = PN.getBasicBlockIndex(this); - } - } - } return New; } Index: llvm/lib/Transforms/Utils/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Utils/CMakeLists.txt +++ llvm/lib/Transforms/Utils/CMakeLists.txt @@ -42,6 +42,7 @@ MetaRenamer.cpp ModuleUtils.cpp NameAnonGlobals.cpp + OpenMPIRBuilder.cpp PredicateInfo.cpp PromoteMemoryToRegister.cpp StripGCRelocates.cpp Index: llvm/lib/Transforms/Utils/OpenMPIRBuilder.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Utils/OpenMPIRBuilder.cpp @@ -0,0 +1,545 @@ +//===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the OpenMPIRBuilder class, which is used as a convenient +// way to create LLVM instructions for OpenMP directives. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/OpenMPIRBuilder.h" + +#include "llvm/IR/MDBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" + +#define DEBUG_TYPE "openmp-ir-builder" + +using namespace llvm; +using namespace OpenMPConstants; + +namespace { + +/// Create a new runtime function with the specified type and name. +FunctionCallee +getOrCreateRuntimeFunction(Module &M, FunctionType *FTy, StringRef Name, + AttributeList ExtraAttrs = AttributeList()) { + Function *Fn = M.getFunction(Name); + if (!Fn) { + Fn = Function::Create(FTy, GlobalValue::ExternalLinkage, Name, M); + Fn->setAttributes(ExtraAttrs); + + // TODO determine calling convention + // Fn->setCallingConv(getRuntimeCC()); + + Fn->setDSOLocal(true); + } + + return FunctionCallee(FTy, Fn); +} + +enum OpenMPRTLFunction { + /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, + /// kmpc_micro microtask, ...); + OMPRTL__kmpc_fork_call, + /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL__kmpc_serialized_parallel, + /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL__kmpc_end_serialized_parallel, + /// Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + /// kmp_int32 num_threads); + OMPRTL__kmpc_push_num_threads, + /// Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, + /// int proc_bind); + OMPRTL__kmpc_push_proc_bind, + /// Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); + OMPRTL__kmpc_global_thread_num, + /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_barrier, + /// Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL__kmpc_cancel_barrier, + /// Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_for_static_fini, +}; + +FunctionCallee getOrCreateRuntimeFunction(OpenMPIRBuilder &OMPBuilder, + OpenMPRTLFunction FnID) { + Module &M = OMPBuilder.getModule(); + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *KMPCIdentPtrTy = OMPBuilder.getKMPCIdentPtrTy(); + + FunctionCallee RTLFn = nullptr; + switch (FnID) { + case OMPRTL__kmpc_fork_call: { + // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro + // microtask, ...); + Type *KMPCMicroTaskTy = OMPBuilder.getKMPCMicroTaskTy(); + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty, KMPCMicroTaskTy}; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ true); + RTLFn = getOrCreateRuntimeFunction(M, FnTy, "__kmpc_fork_call"); + if (auto *F = dyn_cast(RTLFn.getCallee())) { + if (!F->hasMetadata(LLVMContext::MD_callback)) { + MDBuilder MDB(Ctx); + // Annotate the callback behavior of the __kmpc_fork_call: + // - The callback callee is argument number 2 (microtask). + // - The first two arguments of the callback callee are unknown (-1). + // - All variadic arguments to the __kmpc_fork_call are passed to the + // callback callee. + F->addMetadata(LLVMContext::MD_callback, + *MDNode::get(Ctx, {MDB.createCallbackEncoding( + 2, {-1, -1}, + /* isVarArg */ true)})); + } + } + break; + } + case OMPRTL__kmpc_serialized_parallel: { + // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty}; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = getOrCreateRuntimeFunction(M, FnTy, "__kmpc_serialized_parallel"); + break; + } + case OMPRTL__kmpc_end_serialized_parallel: { + // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty}; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = + getOrCreateRuntimeFunction(M, FnTy, "__kmpc_end_serialized_parallel"); + break; + } + case OMPRTL__kmpc_push_num_threads: { + // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_threads) + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty, Int32Ty}; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = getOrCreateRuntimeFunction(M, FnTy, "__kmpc_push_num_threads"); + break; + } + case OMPRTL__kmpc_push_proc_bind: { + // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, + // int proc_bind) + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty, Int32Ty}; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = getOrCreateRuntimeFunction(M, FnTy, "__kmpc_push_proc_bind"); + break; + } + case OMPRTL__kmpc_global_thread_num: { + // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); + Type *TypeParams[] = {KMPCIdentPtrTy}; + auto *FnTy = FunctionType::get(Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = getOrCreateRuntimeFunction(M, FnTy, "__kmpc_global_thread_num"); + break; + } + case OMPRTL__kmpc_barrier: { + // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty}; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = getOrCreateRuntimeFunction(M, FnTy, /*Name*/ "__kmpc_barrier"); + break; + } + case OMPRTL__kmpc_cancel_barrier: { + // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 + // global_tid); + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty}; + auto *FnTy = FunctionType::get(Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = getOrCreateRuntimeFunction(M, FnTy, + /*Name*/ "__kmpc_cancel_barrier"); + break; + } + case OMPRTL__kmpc_for_static_fini: { + // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); + Type *TypeParams[] = {KMPCIdentPtrTy, Int32Ty}; + auto *FnTy = FunctionType::get(VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = getOrCreateRuntimeFunction(M, FnTy, "__kmpc_for_static_fini"); + break; + } + } + assert(RTLFn && "Unable to find OpenMP runtime function"); + return RTLFn; +} + +/// Values for bit flags used in the ident_t to describe the fields. +/// All enumeric elements are named and described in accordance with the code +/// from +/// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h +enum OpenMPLocationFlags : unsigned { + /// Use trampoline for internal microtask. + OMP_IDENT_IMD = 0x01, + /// Use c-style ident structure. + OMP_IDENT_KMPC = 0x02, + /// Atomic reduction option for kmpc_reduce. + OMP_ATOMIC_REDUCE = 0x10, + /// Explicit 'barrier' directive. + OMP_IDENT_BARRIER_EXPL = 0x20, + /// Implicit barrier in code. + OMP_IDENT_BARRIER_IMPL = 0x40, + /// Implicit barrier in 'for' directive. + OMP_IDENT_BARRIER_IMPL_FOR = 0x40, + /// Implicit barrier in 'sections' directive. + OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, + /// Implicit barrier in 'single' directive. + OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, + /// Call of __kmp_for_static_init for static loop. + OMP_IDENT_WORK_LOOP = 0x200, + /// Call of __kmp_for_static_init for sections. + OMP_IDENT_WORK_SECTIONS = 0x400, + /// Call of __kmp_for_static_init for distribute. + OMP_IDENT_WORK_DISTRIBUTE = 0x800, +}; + +unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { + switch (Kind) { + case OMPDK_for: + return OMP_IDENT_BARRIER_IMPL_FOR; + case OMPDK_sections: + return OMP_IDENT_BARRIER_IMPL_SECTIONS; + case OMPDK_single: + return OMP_IDENT_BARRIER_IMPL_SINGLE; + case OMPDK_barrier: + return OMP_IDENT_BARRIER_EXPL; + default: + return OMP_IDENT_BARRIER_IMPL; + } +} + +} // end anonymous namespace + +OpenMPIRBuilder::OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) { + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *Int8PtrTy = Type::getInt8PtrTy(Ctx); + StructType *KMPCIdentTy = M.getTypeByName("ident_t"); + if (!KMPCIdentTy) + KMPCIdentTy = StructType::create( + Ctx, {Int32Ty, Int32Ty, Int32Ty, Int32Ty, Int8PtrTy}, "ident_t"); + KMPCIdentPtrTy = PointerType::getUnqual(KMPCIdentTy); + + // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) + Type *MicroParams[] = {PointerType::getUnqual(Int32Ty), + PointerType::getUnqual(Int32Ty)}; + KMPCMicroTaskTy = + PointerType::getUnqual(FunctionType::get(VoidTy, MicroParams, true)); +} + +Value *OpenMPIRBuilder::getOrCreateDefaultLocation(unsigned Flags) { + static const char *OMP_DEFAULT_IDENT = "omp_default_ident"; + + Value *DefaultIdent = M.getGlobalVariable(OMP_DEFAULT_IDENT); + if (!DefaultIdent) { + LLVMContext &Ctx = M.getContext(); + Constant *SourceLocString = + Builder.CreateGlobalStringPtr("", "omp_default_loc"); + Constant *IdentData[] = {ConstantInt::getNullValue(Type::getInt32Ty(Ctx)), + ConstantInt::get(Type::getInt32Ty(Ctx), Flags), + ConstantInt::get(Type::getInt32Ty(Ctx), 0), + ConstantInt::getNullValue(Type::getInt32Ty(Ctx)), + SourceLocString}; + + DefaultIdent = new GlobalVariable( + M, KMPCIdentPtrTy->getPointerElementType(), /* isConstant */ true, + GlobalValue::WeakAnyLinkage, + ConstantStruct::get( + cast(KMPCIdentPtrTy->getPointerElementType()), + IdentData), + OMP_DEFAULT_IDENT); + } + return DefaultIdent; +} + +Value *OpenMPIRBuilder::emitUpdateLocation(unsigned Flags) { + Flags |= OMP_IDENT_KMPC; + return getOrCreateDefaultLocation(Flags); +} + +Value *OpenMPIRBuilder::getThreadID(Value *Loc) { + auto Fn = getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_global_thread_num); + CallInst *CI = Builder.CreateCall(Fn, Loc, "omp_global_thread_num"); + return CI; +} + +void OpenMPIRBuilder::emitOMPIfClause(Instruction &IP, Value *Condition, + Instruction **ThenIP, + Instruction **ElseIP) { + if (Condition->getType() != Builder.getInt1Ty()) { + Builder.SetInsertPoint(&IP); + Condition = Builder.CreateIsNotNull(Condition); + } + SplitBlockAndInsertIfThenElse(Condition, &IP, ThenIP, ElseIP); +} + +Function &OpenMPIRBuilder::getCurrentFunction() { + assert(Builder.GetInsertBlock() && "No insertion point set!"); + return *Builder.GetInsertBlock()->getParent(); +} + +BasicBlock *OpenMPIRBuilder::emitOMPParallel( + BasicBlock *InsertBB, + std::function &BodyGen, + Value *IfCondition, Value *NumThreads, + OpenMPConstants::DataSharingKind DefaultSharingKind, + SmallVectorImpl &DataSharingInformation, ProcBindKind ProcBind) { + LLVM_DEBUG(dbgs() << "OpenMPIRBuilder::emitOMPParallel @ " << *InsertBB + << " in\n" << *InsertBB->getParent()); + + // Set new insertion point for the internal builder. + Builder.SetInsertPoint(InsertBB); + + LLVMContext &Ctx = M.getContext(); + + Value *Loc = emitUpdateLocation(0); + Value *ThreadID = getThreadID(Loc); + + Type *Int32Ty = Type::getInt32Ty(Ctx); + + if (NumThreads) { + // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) + Value *Args[] = { + Loc, ThreadID, + Builder.CreateIntCast(NumThreads, Int32Ty, /*isSigned*/ false)}; + Builder.CreateCall( + getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_push_num_threads), Args); + } + + if (ProcBind != PBK_NONE) { + // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) + Value *Args[] = {Loc, ThreadID, + ConstantInt::get(Int32Ty, ProcBind, /*isSigned=*/true)}; + Builder.CreateCall( + getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_push_proc_bind), Args); + } + + Instruction *OuterAllocaIP = + InsertBB->getParent()->getEntryBlock().getFirstNonPHI(); + AllocaInst *TIDAddr = + new AllocaInst(Int32Ty, 0, nullptr, "tid.addr", OuterAllocaIP); + AllocaInst *ZeroAddr = + new AllocaInst(Int32Ty, 0, nullptr, "zero.addr", OuterAllocaIP); + Builder.CreateStore(Constant::getNullValue(Int32Ty), ZeroAddr); + Builder.CreateStore(Constant::getNullValue(Int32Ty), TIDAddr); + + auto *IP = new BitCastInst(UndefValue::get(Builder.getInt32Ty()), + Builder.getInt32Ty(), "", InsertBB); + BasicBlock *CondBB = IP->getParent(); + + Instruction *ThenIP = IP, *ElseIP = nullptr; + if (IfCondition) + emitOMPIfClause(*IP, IfCondition, &ThenIP, &ElseIP); + + BasicBlock *ParalleRegionEntryBB = + ThenIP->getParent()->splitBasicBlock(ThenIP, "omp.par.entry"); + BasicBlock *ParalleRegionBodyBB = + ParalleRegionEntryBB->splitBasicBlock(ThenIP, "omp.par.region"); + BasicBlock *ParalleRegionExitBB = + ParalleRegionBodyBB->splitBasicBlock(ThenIP, "omp.par.exit"); + + // ThenBB + // | + // V + // PRegionEntryBB <- Privatization allocas are placed here. + // | + // V + // PRegionBodyBB <- BodeGen is invoked here. + // | + // V + // PRegionExitBB <- A common exit to simplify block collection. + // + + // Generate the privatization allocas in the "entry" block because it will + // become the entry of the outlined function. + Builder.SetInsertPoint(ParalleRegionEntryBB->getTerminator()); + + Instruction *AllocaIP = Builder.CreateLoad(ZeroAddr, "zero.addr.use"); + Builder.CreateLoad(TIDAddr, "tidd.addr.use"); + + SmallPtrSet ExplicitlyCapturedVariables; + for (DataSharingInfo &DSI : DataSharingInformation) + ExplicitlyCapturedVariables.insert(DSI.Location); + + LLVM_DEBUG(dbgs() << "Before body codegen: " << *IP->getFunction() << "\n"); + + Builder.SetInsertPoint(ParalleRegionBodyBB); + ParalleRegionBodyBB->getTerminator()->eraseFromParent(); + // Let the caller create the body. + BodyGen(ParalleRegionEntryBB, ParalleRegionBodyBB, ParalleRegionExitBB); + + LLVM_DEBUG(dbgs() << "After body codegen: " << *IP->getFunction() << "\n"); + + SmallPtrSet ParallelRegionBlockSet; + SmallVector ParallelRegionBlocks, Worklist; + ParallelRegionBlockSet.insert(ParalleRegionEntryBB); + ParallelRegionBlockSet.insert(ParalleRegionExitBB); + + Worklist.push_back(ParalleRegionEntryBB); + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + ParallelRegionBlocks.push_back(BB); + for (BasicBlock *SuccBB : successors(BB)) + if (ParallelRegionBlockSet.insert(SuccBB).second) + Worklist.push_back(SuccBB); + } + // ParallelRegionBlocks.push_back(ParalleRegionExitBB); + + CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr, + /* AggregateArgs */ false, + /* BlockFrequencyInfo */ nullptr, + /* BranchProbabilityInfo */ nullptr, + /* AssumptionCache */ nullptr, + /* AllowVarArgs */ true, + /* AllowAlloca */ true, + /* Suffix */ ".omp_par"); + + if (DefaultSharingKind != DSK_SHARED) { + + // Find inputs to, outputs from the code region. + BasicBlock *CommonExit = nullptr; + using ValueSet = SetVector; + ValueSet Inputs, Outputs, SinkingCands, HoistingCands; + Extractor.findAllocas(SinkingCands, HoistingCands, CommonExit); + Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); + + assert(Outputs.empty()); + + for (Value *Input : Inputs) { + if (ExplicitlyCapturedVariables.count(Input)) + continue; + + LLVM_DEBUG(dbgs() << "Implicit captured input: " << *Input << "\n"); + assert(Input->getType()->isPointerTy()); + DataSharingInformation.push_back( + DataSharingInfo{Input, DefaultSharingKind}); + } + } + + Builder.SetInsertPoint(ParalleRegionEntryBB->getTerminator()); + LLVM_DEBUG(dbgs() << "Before privatization: " << *IP->getFunction() << "\n"); + for (DataSharingInfo &DSI : DataSharingInformation) { + if (DSI.Kind == DSK_SHARED) + continue; + + Type *ValueTy = DSI.Location->getType()->getPointerElementType(); + AllocaInst *PrivLoc = + new AllocaInst(ValueTy, 0, nullptr, DSI.Location->getName(), AllocaIP); + assert(DSI.Location->getType() == PrivLoc->getType()); + + SmallVector Uses; + for (Use &U : DSI.Location->uses()) + Uses.push_back(&U); + for (Use *UPtr : Uses) { + Use &U = *UPtr; + User *Usr = U.getUser(); + if (!ParallelRegionBlockSet.count(cast(Usr)->getParent())) + continue; + U.set(PrivLoc); + } + + if (DSI.Kind == DSK_FIRSTPRIVATE) { + Builder.SetInsertPoint(CondBB->getTerminator()); + Value *PVVal = Builder.CreateLoad( + DSI.Location, DSI.Location->getName() + ".omp_fpriv"); + Builder.SetInsertPoint(ParalleRegionEntryBB->getTerminator()); + Builder.CreateStore(PVVal, PrivLoc); + } + } + LLVM_DEBUG(dbgs() << "After privatization: " << *IP->getFunction() << "\n"); + LLVM_DEBUG({ + for (auto *BB : ParallelRegionBlocks) + dbgs() << " PBR: " << BB->getName() << "\n"; + }); + + Function *OutlinedFn = Extractor.extractCodeRegion(); + LLVM_DEBUG(dbgs() << "After outlining: " << *IP->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); + + assert(OutlinedFn && OutlinedFn->getNumUses() == 1); + assert(OutlinedFn->arg_size() >= 2 && + "Expected at least tid and bounded tid as arguments"); + unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2; + + CallInst *CI = cast(OutlinedFn->user_back()); + CI->getParent()->setName("omp_parallel"); + Builder.SetInsertPoint(CI); + + // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); + Value *ForkCallArgs[] = {Loc, Builder.getInt32(NumCapturedVars), + Builder.CreateBitCast(OutlinedFn, KMPCMicroTaskTy)}; + + SmallVector RealArgs; + RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); + RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); + + FunctionCallee RTLFn = + getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_fork_call); + Builder.CreateCall(RTLFn, RealArgs); + + // If no "if" clause was present we are done. + if (!ElseIP) { + CI->eraseFromParent(); + return ParalleRegionExitBB; + } + + // If an "if" clause was present we are now generating the serialized + // version into the "else" branch. + Builder.SetInsertPoint(ElseIP); + + // Build calls: + // __kmpc_serialized_parallel(&Loc, GTid); + Value *SerializedParallelCallArgs[] = {Loc, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_serialized_parallel), + SerializedParallelCallArgs); + + // OutlinedFn(>id, &zero, CapturedStruct); + CI->removeFromParent(); + Builder.Insert(CI); + + // __kmpc_end_serialized_parallel(&Loc, GTid); + Value *EndArgs[] = {Loc, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_end_serialized_parallel), + EndArgs); + + return IP->getParent(); +} + +void OpenMPIRBuilder::emitOMPBarrier(Instruction &IP, + bool IsImplementationCaused) { + // Set new insertion point for the internal builder. + Builder.SetInsertPoint(&IP); + return emitBarrierImpl(IsImplementationCaused ? OMPDK_unknown + : OMPDK_barrier); +} + +void OpenMPIRBuilder::emitBarrierImpl(OpenMPDirectiveKind Kind, + bool ForceSimpleCall) { + + // Build call __kmpc_cancel_barrier(loc, thread_id) or + // __kmpc_barrier(loc, thread_id); + unsigned Flags = getDefaultFlagsForBarriers(Kind); + Value *Loc = emitUpdateLocation(Flags); + Value *Args[] = {Loc, getThreadID(Loc)}; + BasicBlock *CancelationBlock = nullptr; + if (ForceSimpleCall || !CancelationBlock) { + Builder.CreateCall(getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_barrier), + Args); + return; + } + + Value *Result = Builder.CreateCall( + getOrCreateRuntimeFunction(*this, OMPRTL__kmpc_cancel_barrier), Args); + Value *Cmp = Builder.CreateIsNotNull(Result); + SplitBlockAndInsertIfThen(Cmp, &*Builder.GetInsertPoint(), false, nullptr, + nullptr, nullptr, CancelationBlock); +}