Index: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h @@ -43,7 +43,10 @@ class CodeGenFunction; class CodeGenModule; +typedef llvm::function_ref RegionCodeGenTy; + class CGOpenMPRuntime { +private: enum OpenMPRTLFunction { /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, /// kmpc_micro microtask, ...); @@ -284,25 +287,27 @@ virtual ~CGOpenMPRuntime() {} virtual void clear(); - /// \brief Emits outlined function for the specified OpenMP directive \a D. - /// This outlined function has type void(*)(kmp_int32 *ThreadID, kmp_int32 - /// BoundID, struct context_vars*). + /// \brief Emits outlined function for the specified OpenMP parallel directive + /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, + /// kmp_int32 BoundID, struct context_vars*). /// \param D OpenMP directive. /// \param ThreadIDVar Variable for thread id in the current OpenMP region. - /// - virtual llvm::Value *emitOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar); + /// \param CodeGen Code generation sequence for the \a D directive. + virtual llvm::Value * + emitParallelOutlinedFunction(const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, + const RegionCodeGenTy &CodeGen); /// \brief Emits outlined function for the OpenMP task directive \a D. This /// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32 /// PartID, struct context_vars*). /// \param D OpenMP directive. /// \param ThreadIDVar Variable for thread id in the current OpenMP region. - /// \param PartIDVar If not nullptr - variable used for part id in tasks. + /// \param CodeGen Code generation sequence for the \a D directive. /// virtual llvm::Value *emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - const VarDecl *PartIDVar); + const RegionCodeGenTy &CodeGen); /// \brief Cleans up references to the objects in finished function. /// @@ -334,14 +339,14 @@ /// \param CriticalOpGen Generator for the statement associated with the given /// critical region. virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, - const std::function &CriticalOpGen, + const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc); /// \brief Emits a master region. /// \param MasterOpGen Generator for the statement associated with the given /// master region. virtual void emitMasterRegion(CodeGenFunction &CGF, - const std::function &MasterOpGen, + const RegionCodeGenTy &MasterOpGen, SourceLocation Loc); /// \brief Emits code for a taskyield directive. @@ -351,7 +356,7 @@ /// \param SingleOpGen Generator for the statement associated with the given /// single region. virtual void emitSingleRegion(CodeGenFunction &CGF, - const std::function &SingleOpGen, + const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef CopyprivateVars, ArrayRef SrcExprs, @@ -506,17 +511,13 @@ llvm::PointerIntPair Final, llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds); + /// \brief Emit code for the directive that does not require outlining. + /// + /// \param CodeGen Code generation sequence for the \a D directive. + virtual void emitInlinedDirective(CodeGenFunction &CGF, + const RegionCodeGenTy &CodeGen); }; -/// \brief RAII for emitting code of CapturedStmt without function outlining. -class InlinedOpenMPRegionRAII { - CodeGenFunction &CGF; - -public: - InlinedOpenMPRegionRAII(CodeGenFunction &CGF, - const OMPExecutableDirective &D); - ~InlinedOpenMPRegionRAII(); -}; } // namespace CodeGen } // namespace clang Index: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp @@ -31,37 +31,58 @@ /// \brief Base class for handling code generation inside OpenMP regions. class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { public: - CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS) - : CGCapturedStmtInfo(CS, CR_OpenMP), Directive(D) {} + /// \brief Kinds of OpenMP regions used in codegen. + enum CGOpenMPRegionKind { + /// \brief Region with outlined function for standalone 'parallel' + /// directive. + ParallelOutlinedRegion, + /// \brief Region with outlined function for standalone 'task' directive. + TaskOutlinedRegion, + /// \brief Region for constructs that do not require function outlining, + /// like 'for', 'sections', 'atomic' etc. directives. + InlinedRegion, + }; - CGOpenMPRegionInfo(const OMPExecutableDirective &D) - : CGCapturedStmtInfo(CR_OpenMP), Directive(D) {} + CGOpenMPRegionInfo(const CapturedStmt &CS, + const CGOpenMPRegionKind RegionKind, + const RegionCodeGenTy &CodeGen) + : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), + CodeGen(CodeGen) {} + + CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, + const RegionCodeGenTy &CodeGen) + : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), + CodeGen(CodeGen) {} /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. virtual const VarDecl *getThreadIDVariable() const = 0; + /// \brief Emit the captured statement body. + virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; + /// \brief Get an LValue for the current ThreadID variable. /// \return LValue for thread id variable. This LValue always has type int32*. virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); - /// \brief Emit the captured statement body. - virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; + CGOpenMPRegionKind getRegionKind() const { return RegionKind; } static bool classof(const CGCapturedStmtInfo *Info) { return Info->getKind() == CR_OpenMP; } + protected: - /// \brief OpenMP executable directive associated with the region. - const OMPExecutableDirective &Directive; + CGOpenMPRegionKind RegionKind; + const RegionCodeGenTy &CodeGen; }; /// \brief API for captured statement code generation in OpenMP constructs. class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { public: - CGOpenMPOutlinedRegionInfo(const OMPExecutableDirective &D, - const CapturedStmt &CS, const VarDecl *ThreadIDVar) - : CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar) { + CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, + const RegionCodeGenTy &CodeGen) + : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen), + ThreadIDVar(ThreadIDVar) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } /// \brief Get a variable or parameter for storing global thread id @@ -69,9 +90,16 @@ virtual const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } + /// \brief Get the name of the capture helper. StringRef getHelperName() const override { return ".omp_outlined."; } + static bool classof(const CGCapturedStmtInfo *Info) { + return CGOpenMPRegionInfo::classof(Info) && + cast(Info)->getRegionKind() == + ParallelOutlinedRegion; + } + private: /// \brief A variable or parameter storing global thread id for OpenMP /// constructs. @@ -81,12 +109,11 @@ /// \brief API for captured statement code generation in OpenMP constructs. class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { public: - CGOpenMPTaskOutlinedRegionInfo(const OMPExecutableDirective &D, - const CapturedStmt &CS, + CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, - const VarDecl *PartIDVar) - : CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar), - PartIDVar(PartIDVar) { + const RegionCodeGenTy &CodeGen) + : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen), + ThreadIDVar(ThreadIDVar) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } /// \brief Get a variable or parameter for storing global thread id @@ -98,28 +125,28 @@ /// \brief Get an LValue for the current ThreadID variable. virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; - /// \brief Emit the captured statement body. - virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; - /// \brief Get the name of the capture helper. StringRef getHelperName() const override { return ".omp_outlined."; } + static bool classof(const CGCapturedStmtInfo *Info) { + return CGOpenMPRegionInfo::classof(Info) && + cast(Info)->getRegionKind() == + TaskOutlinedRegion; + } + private: /// \brief A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; - /// \brief A variable or parameter storing part id for OpenMP tasking - /// constructs. - const VarDecl *PartIDVar; }; /// \brief API for inlined captured statement code generation in OpenMP /// constructs. class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { public: - CGOpenMPInlinedRegionInfo(const OMPExecutableDirective &D, - CodeGenFunction::CGCapturedStmtInfo *OldCSI) - : CGOpenMPRegionInfo(D), OldCSI(OldCSI), + CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, + const RegionCodeGenTy &CodeGen) + : CGOpenMPRegionInfo(InlinedRegion, CodeGen), OldCSI(OldCSI), OuterRegionInfo(dyn_cast_or_null(OldCSI)) {} // \brief Retrieve the value of the context parameter. virtual llvm::Value *getContextValue() const override { @@ -127,6 +154,13 @@ return OuterRegionInfo->getContextValue(); llvm_unreachable("No context value for inlined OpenMP region"); } + virtual void setContextValue(llvm::Value *V) override { + if (OuterRegionInfo) { + OuterRegionInfo->setContextValue(V); + return; + } + llvm_unreachable("No context value for inlined OpenMP region"); + } /// \brief Lookup the captured field decl for a variable. virtual const FieldDecl *lookup(const VarDecl *VD) const override { if (OuterRegionInfo) @@ -149,16 +183,48 @@ /// \brief Get the name of the capture helper. virtual StringRef getHelperName() const override { + if (auto *OuterRegionInfo = getOldCSI()) + return OuterRegionInfo->getHelperName(); llvm_unreachable("No helper name for inlined OpenMP construct"); } CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } + static bool classof(const CGCapturedStmtInfo *Info) { + return CGOpenMPRegionInfo::classof(Info) && + cast(Info)->getRegionKind() == InlinedRegion; + } + private: /// \brief CodeGen info about outer OpenMP region. CodeGenFunction::CGCapturedStmtInfo *OldCSI; CGOpenMPRegionInfo *OuterRegionInfo; }; + +/// \brief RAII for emitting code of OpenMP constructs. +class InlinedOpenMPRegionRAII { + CodeGenFunction &CGF; + +public: + /// \brief Constructs region for combined constructs. + /// \param CodeGen Code generation sequence for combined directives. Includes + /// a list of functions used for code generation of implicitly inlined + /// regions. + InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen) + : CGF(CGF) { + // Start emission for the construct. + CGF.CapturedStmtInfo = + new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen); + } + ~InlinedOpenMPRegionRAII() { + // Restore original CapturedStmtInfo only if we're done with code emission. + auto *OldCSI = + cast(CGF.CapturedStmtInfo)->getOldCSI(); + delete CGF.CapturedStmtInfo; + CGF.CapturedStmtInfo = OldCSI; + } +}; + } // namespace LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { @@ -172,15 +238,18 @@ ->getPointeeType()); } -void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - CGF.EmitOMPPrivateClause(Directive, PrivateScope); - CGF.EmitOMPFirstprivateClause(Directive, PrivateScope); - if (PrivateScope.Privatize()) - // Emit implicit barrier to synchronize threads and avoid data races. - CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Directive.getLocStart(), - OMPD_unknown); - CGCapturedStmtInfo::EmitBody(CGF, S); +void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { + // 1.2.2 OpenMP Language Terminology + // Structured block - An executable statement with a single entry at the + // top and a single exit at the bottom. + // The point of exit cannot be a branch out of the structured block. + // longjmp() and throw() must not violate the entry/exit criteria. + CGF.EHStack.pushTerminate(); + { + CodeGenFunction::RunCleanupsScope Scope(CGF); + CodeGen(CGF); + } + CGF.EHStack.popTerminate(); } LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( @@ -190,14 +259,6 @@ getThreadIDVariable()->getType()); } -void CGOpenMPTaskOutlinedRegionInfo::EmitBody(CodeGenFunction &CGF, - const Stmt *S) { - if (PartIDVar) { - // TODO: emit code for untied tasks. - } - CGCapturedStmtInfo::EmitBody(CGF, S); -} - CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) { IdentTy = llvm::StructType::create( @@ -216,13 +277,14 @@ } llvm::Value * -CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar) { +CGOpenMPRuntime::emitParallelOutlinedFunction(const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, + const RegionCodeGenTy &CodeGen) { assert(ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 *"); const CapturedStmt *CS = cast(D.getAssociatedStmt()); CodeGenFunction CGF(CGM, true); - CGOpenMPOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar); + CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen); CGF.CapturedStmtInfo = &CGInfo; return CGF.GenerateCapturedStmtFunction(*CS); } @@ -230,12 +292,12 @@ llvm::Value * CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - const VarDecl *PartIDVar) { + const RegionCodeGenTy &CodeGen) { assert(!ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 for tasks"); auto *CS = cast(D.getAssociatedStmt()); CodeGenFunction CGF(CGM, true); - CGOpenMPTaskOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar, PartIDVar); + CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen); CGF.CapturedStmtInfo = &CGInfo; return CGF.GenerateCapturedStmtFunction(*CS); } @@ -906,9 +968,21 @@ return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); } -void CGOpenMPRuntime::emitCriticalRegion( - CodeGenFunction &CGF, StringRef CriticalName, - const std::function &CriticalOpGen, SourceLocation Loc) { +namespace { +class CallEndCleanup : public EHScopeStack::Cleanup { +private: + const RegionCodeGenTy CodeGen; + +public: + CallEndCleanup(const RegionCodeGenTy &CodeGen) : CodeGen(CodeGen) {} + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { CodeGen(CGF); } +}; +} // namespace + +void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, + StringRef CriticalName, + const RegionCodeGenTy &CriticalOpGen, + SourceLocation Loc) { auto RegionLock = getCriticalRegionLock(CriticalName); // __kmpc_critical(ident_t *, gtid, Lock); // CriticalOpGen(); @@ -916,14 +990,21 @@ // Prepare arguments and build a call to __kmpc_critical llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), RegionLock}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); - CriticalOpGen(); - // Build a call to __kmpc_end_critical - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); + { + CodeGenFunction::RunCleanupsScope Scope(CGF); + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); + emitInlinedDirective(CGF, CriticalOpGen); + // Build a call to __kmpc_end_critical + CGF.EHStack.pushCleanup( + NormalAndEHCleanup, [this, Args](CodeGenFunction &CGF) { + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_critical), + Args); + }); + } } static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, - const std::function &BodyOpGen) { + const RegionCodeGenTy &BodyOpGen) { llvm::Value *CallBool = CGF.EmitScalarConversion( IfCond, CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), @@ -934,14 +1015,14 @@ // Generate the branch (If-stmt) CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); CGF.EmitBlock(ThenBlock); - BodyOpGen(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, BodyOpGen); // Emit the rest of bblocks/branches CGF.EmitBranch(ContBlock); CGF.EmitBlock(ContBlock, true); } void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, - const std::function &MasterOpGen, + const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) { // if(__kmpc_master(ident_t *, gtid)) { // MasterOpGen(); @@ -951,12 +1032,14 @@ llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; auto *IsMaster = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); - emitIfStmt(CGF, IsMaster, [&]() -> void { - MasterOpGen(); + emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void { + CodeGenFunction::RunCleanupsScope Scope(CGF); + MasterOpGen(CGF); // Build a call to __kmpc_end_master. // OpenMP [1.2.2 OpenMP Language Terminology] // For C/C++, an executable statement, possibly compound, with a single - // entry at the top and a single exit at the bottom, or an OpenMP construct. + // entry at the top and a single exit at the bottom, or an OpenMP + // construct. // * Access to the structured block must not be the result of a branch. // * The point of exit cannot be a branch out of the structured block. // * The point of entry must not be a call to setjmp(). @@ -967,7 +1050,12 @@ // structured block. // It is analyzed in Sema, so we can just call __kmpc_end_master() on // fallthrough rather than pushing a normal cleanup for it. - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_master), Args); + // Build a call to __kmpc_end_critical + CGF.EHStack.pushCleanup( + NormalAndEHCleanup, [this, Args](CodeGenFunction &CGF) { + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_master), + Args); + }); }); } @@ -1046,7 +1134,7 @@ } void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, - const std::function &SingleOpGen, + const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef CopyprivateVars, ArrayRef SrcExprs, @@ -1076,8 +1164,9 @@ llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; auto *IsSingle = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); - emitIfStmt(CGF, IsSingle, [&]() -> void { - SingleOpGen(); + emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void { + CodeGenFunction::RunCleanupsScope Scope(CGF); + SingleOpGen(CGF); if (DidIt) { // did_it = 1; CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt, @@ -1097,7 +1186,11 @@ // structured block. // It is analyzed in Sema, so we can just call __kmpc_end_single() on // fallthrough rather than pushing a normal cleanup for it. - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single), Args); + CGF.EHStack.pushCleanup( + NormalAndEHCleanup, [this, Args](CodeGenFunction &CGF) { + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single), + Args); + }); }); // call __kmpc_copyprivate(ident_t *, gtid, , , // , did_it); @@ -1277,6 +1370,7 @@ assert((ScheduleKind == OMPC_SCHEDULE_static || ScheduleKind == OMPC_SCHEDULE_unknown) && "Non-static schedule kinds are not yet implemented"); + (void)ScheduleKind; // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc)}; @@ -1522,23 +1616,9 @@ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); } -InlinedOpenMPRegionRAII::InlinedOpenMPRegionRAII( - CodeGenFunction &CGF, const OMPExecutableDirective &D) - : CGF(CGF) { - CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(D, CGF.CapturedStmtInfo); - // 1.2.2 OpenMP Language Terminology - // Structured block - An executable statement with a single entry at the - // top and a single exit at the bottom. - // The point of exit cannot be a branch out of the structured block. - // longjmp() and throw() must not violate the entry/exit criteria. - CGF.EHStack.pushTerminate(); -} - -InlinedOpenMPRegionRAII::~InlinedOpenMPRegionRAII() { - CGF.EHStack.popTerminate(); - auto *OldCSI = - cast(CGF.CapturedStmtInfo)->getOldCSI(); - delete CGF.CapturedStmtInfo; - CGF.CapturedStmtInfo = OldCSI; +void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, + const RegionCodeGenTy &CodeGen) { + InlinedOpenMPRegionRAII Region(CGF, CodeGen); + CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); } Index: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp +++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp @@ -23,21 +23,6 @@ //===----------------------------------------------------------------------===// // OpenMP Directive Emission //===----------------------------------------------------------------------===// -namespace { -/// \brief RAII for inlined OpenMP regions (like 'omp for', 'omp simd', 'omp -/// critical' etc.). Helps to generate proper debug info and provides correct -/// code generation for such constructs. -class InlinedOpenMPRegionScopeRAII { - InlinedOpenMPRegionRAII Region; - CodeGenFunction::LexicalScope DirectiveScope; - -public: - InlinedOpenMPRegionScopeRAII(CodeGenFunction &CGF, - const OMPExecutableDirective &D) - : Region(CGF, D), DirectiveScope(CGF, D.getSourceRange()) {} -}; -} // namespace - /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen /// function. Here is the logic: /// if (Cond) { @@ -225,8 +210,8 @@ } /// \brief Emits code for OpenMP parallel directive in the parallel region. -static void EmitOMPParallelCall(CodeGenFunction &CGF, - const OMPParallelDirective &S, +static void emitOMPParallelCall(CodeGenFunction &CGF, + const OMPExecutableDirective &S, llvm::Value *OutlinedFn, llvm::Value *CapturedStruct) { if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) { @@ -241,22 +226,43 @@ CapturedStruct); } -void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { +static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + const RegionCodeGenTy &CodeGen) { auto CS = cast(S.getAssociatedStmt()); - auto CapturedStruct = GenerateCapturedStmtArgument(*CS); - auto OutlinedFn = CGM.getOpenMPRuntime().emitOutlinedFunction( - S, *CS->getCapturedDecl()->param_begin()); + auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS); + auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( + S, *CS->getCapturedDecl()->param_begin(), CodeGen); if (auto C = S.getSingleClause(/*K*/ OMPC_if)) { auto Cond = cast(C)->getCondition(); - EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) { + EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) { if (ThenBlock) - EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); + emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct); else - CGM.getOpenMPRuntime().emitSerialCall(*this, S.getLocStart(), - OutlinedFn, CapturedStruct); + CGF.CGM.getOpenMPRuntime().emitSerialCall(CGF, S.getLocStart(), + OutlinedFn, CapturedStruct); }); } else - EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); + emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct); +} + +void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { + LexicalScope Scope(*this, S.getSourceRange()); + // Emit parallel region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPFirstprivateClause(S, PrivateScope); + if (PrivateScope.Privatize()) + // Emit implicit barrier to synchronize threads and avoid data races. + CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), + OMPD_unknown); + CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + // Emit implicit barrier at the end of the 'parallel' directive. + CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), + OMPD_unknown); + }; + emitCommonOMPParallelDirective(*this, S, CodeGen); } void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, @@ -289,10 +295,10 @@ } } -void CodeGenFunction::EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, - const Expr *LoopCond, - const Expr *IncExpr, - const std::function &BodyGen) { +void CodeGenFunction::EmitOMPInnerLoop( + const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, + const Expr *IncExpr, + const llvm::function_ref &BodyGen) { auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); auto Cnt = getPGORegionCounter(&S); @@ -323,7 +329,7 @@ auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); - BodyGen(); + BodyGen(*this); // Emit "IV = IV + 1" and a back-edge to the condition block. EmitBlock(Continue.getBlock()); @@ -414,129 +420,132 @@ } void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { - // Pragma 'simd' code depends on presence of 'lastprivate'. - // If present, we have to separate last iteration of the loop: - // - // if (LastIteration != 0) { - // for (IV in 0..LastIteration-1) BODY; - // BODY with updates of lastprivate vars; - // ; - // } - // - // otherwise (when there's no lastprivate): - // - // for (IV in 0..LastIteration) BODY; - // ; - // - - // Walk clauses and process safelen/lastprivate. - bool SeparateIter = false; - LoopStack.setParallel(); - LoopStack.setVectorizerEnable(true); - for (auto C : S.clauses()) { - switch (C->getClauseKind()) { - case OMPC_safelen: { - RValue Len = EmitAnyExpr(cast(C)->getSafelen(), - AggValueSlot::ignored(), true); - llvm::ConstantInt *Val = cast(Len.getScalarVal()); - LoopStack.setVectorizerWidth(Val->getZExtValue()); - // In presence of finite 'safelen', it may be unsafe to mark all - // the memory instructions parallel, because loop-carried - // dependences of 'safelen' iterations are possible. - LoopStack.setParallel(false); - break; - } - case OMPC_aligned: - EmitOMPAlignedClause(*this, CGM, cast(*C)); - break; - case OMPC_lastprivate: - SeparateIter = true; - break; - default: - // Not handled yet - ; + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + // Pragma 'simd' code depends on presence of 'lastprivate'. + // If present, we have to separate last iteration of the loop: + // + // if (LastIteration != 0) { + // for (IV in 0..LastIteration-1) BODY; + // BODY with updates of lastprivate vars; + // ; + // } + // + // otherwise (when there's no lastprivate): + // + // for (IV in 0..LastIteration) BODY; + // ; + // + + // Walk clauses and process safelen/lastprivate. + bool SeparateIter = false; + CGF.LoopStack.setParallel(); + CGF.LoopStack.setVectorizerEnable(true); + for (auto C : S.clauses()) { + switch (C->getClauseKind()) { + case OMPC_safelen: { + RValue Len = CGF.EmitAnyExpr(cast(C)->getSafelen(), + AggValueSlot::ignored(), true); + llvm::ConstantInt *Val = cast(Len.getScalarVal()); + CGF.LoopStack.setVectorizerWidth(Val->getZExtValue()); + // In presence of finite 'safelen', it may be unsafe to mark all + // the memory instructions parallel, because loop-carried + // dependences of 'safelen' iterations are possible. + CGF.LoopStack.setParallel(false); + break; + } + case OMPC_aligned: + EmitOMPAlignedClause(CGF, CGF.CGM, cast(*C)); + break; + case OMPC_lastprivate: + SeparateIter = true; + break; + default: + // Not handled yet + ; + } } - } - InlinedOpenMPRegionScopeRAII Region(*this, S); - - // Emit inits for the linear variables. - for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { - for (auto Init : C->inits()) { - auto *D = cast(cast(Init)->getDecl()); - EmitVarDecl(*D); + // Emit inits for the linear variables. + for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { + for (auto Init : C->inits()) { + auto *D = cast(cast(Init)->getDecl()); + CGF.EmitVarDecl(*D); + } } - } - // Emit the loop iteration variable. - const Expr *IVExpr = S.getIterationVariable(); - const VarDecl *IVDecl = cast(cast(IVExpr)->getDecl()); - EmitVarDecl(*IVDecl); - EmitIgnoredExpr(S.getInit()); + // Emit the loop iteration variable. + const Expr *IVExpr = S.getIterationVariable(); + const VarDecl *IVDecl = cast(cast(IVExpr)->getDecl()); + CGF.EmitVarDecl(*IVDecl); + CGF.EmitIgnoredExpr(S.getInit()); + + // Emit the iterations count variable. + // If it is not a variable, Sema decided to calculate iterations count on + // each + // iteration (e.g., it is foldable into a constant). + if (auto LIExpr = dyn_cast(S.getLastIteration())) { + CGF.EmitVarDecl(*cast(LIExpr->getDecl())); + // Emit calculation of the iterations count. + CGF.EmitIgnoredExpr(S.getCalcLastIteration()); + } - // Emit the iterations count variable. - // If it is not a variable, Sema decided to calculate iterations count on each - // iteration (e.g., it is foldable into a constant). - if (auto LIExpr = dyn_cast(S.getLastIteration())) { - EmitVarDecl(*cast(LIExpr->getDecl())); - // Emit calculation of the iterations count. - EmitIgnoredExpr(S.getCalcLastIteration()); - } + // Emit the linear steps for the linear clauses. + // If a step is not constant, it is pre-calculated before the loop. + for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { + if (auto CS = cast_or_null(C->getCalcStep())) + if (auto SaveRef = cast(CS->getLHS())) { + CGF.EmitVarDecl(*cast(SaveRef->getDecl())); + // Emit calculation of the linear step. + CGF.EmitIgnoredExpr(CS); + } + } - // Emit the linear steps for the linear clauses. - // If a step is not constant, it is pre-calculated before the loop. - for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { - if (auto CS = cast_or_null(C->getCalcStep())) - if (auto SaveRef = cast(CS->getLHS())) { - EmitVarDecl(*cast(SaveRef->getDecl())); - // Emit calculation of the linear step. - EmitIgnoredExpr(CS); + if (SeparateIter) { + // Emit: if (LastIteration > 0) - begin. + RegionCounter Cnt = CGF.getPGORegionCounter(&S); + auto ThenBlock = CGF.createBasicBlock("simd.if.then"); + auto ContBlock = CGF.createBasicBlock("simd.if.end"); + CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, + Cnt.getCount()); + CGF.EmitBlock(ThenBlock); + Cnt.beginRegion(CGF.Builder); + // Emit 'then' code. + { + OMPPrivateScope LoopScope(CGF); + EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); + EmitPrivateLinearVars(CGF, S, LoopScope); + CGF.EmitOMPPrivateClause(S, LoopScope); + (void)LoopScope.Privatize(); + CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + S.getCond(/*SeparateIter=*/true), S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S); + CGF.EmitStopPoint(&S); + }); + CGF.EmitOMPLoopBody(S, /* SeparateIter */ true); } - } - - if (SeparateIter) { - // Emit: if (LastIteration > 0) - begin. - RegionCounter Cnt = getPGORegionCounter(&S); - auto ThenBlock = createBasicBlock("simd.if.then"); - auto ContBlock = createBasicBlock("simd.if.end"); - EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); - EmitBlock(ThenBlock); - Cnt.beginRegion(Builder); - // Emit 'then' code. - { - OMPPrivateScope LoopScope(*this); - EmitPrivateLoopCounters(*this, LoopScope, S.counters()); - EmitPrivateLinearVars(*this, S, LoopScope); - EmitOMPPrivateClause(S, LoopScope); - (void)LoopScope.Privatize(); - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), - S.getCond(/*SeparateIter=*/true), S.getInc(), - [&S, this]() { - EmitOMPLoopBody(S); - EmitStopPoint(&S); - }); - EmitOMPLoopBody(S, /* SeparateIter */ true); - } - EmitOMPSimdFinal(S); - // Emit: if (LastIteration != 0) - end. - EmitBranch(ContBlock); - EmitBlock(ContBlock, true); - } else { - { - OMPPrivateScope LoopScope(*this); - EmitPrivateLoopCounters(*this, LoopScope, S.counters()); - EmitPrivateLinearVars(*this, S, LoopScope); - EmitOMPPrivateClause(S, LoopScope); - (void)LoopScope.Privatize(); - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), - S.getCond(/*SeparateIter=*/false), S.getInc(), - [&S, this]() { - EmitOMPLoopBody(S); - EmitStopPoint(&S); - }); + CGF.EmitOMPSimdFinal(S); + // Emit: if (LastIteration != 0) - end. + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } else { + { + OMPPrivateScope LoopScope(CGF); + EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); + EmitPrivateLinearVars(CGF, S, LoopScope); + CGF.EmitOMPPrivateClause(S, LoopScope); + (void)LoopScope.Privatize(); + CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + S.getCond(/*SeparateIter=*/false), S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S); + CGF.EmitStopPoint(&S); + }); + } + CGF.EmitOMPSimdFinal(S); } - EmitOMPSimdFinal(S); - } + }; + CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); } void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, @@ -654,9 +663,10 @@ BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), - S.getCond(/*SeparateIter=*/false), S.getInc(), [&S, this]() { - EmitOMPLoopBody(S); - EmitStopPoint(&S); + S.getCond(/*SeparateIter=*/false), S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S); + CGF.EmitStopPoint(&S); }); EmitBlock(Continue.getBlock()); @@ -759,9 +769,9 @@ // while (idx <= UB) { BODY; ++idx; } EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(/*SeparateIter=*/false), S.getInc(), - [&S, this]() { - EmitOMPLoopBody(S); - EmitStopPoint(&S); + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S); + CGF.EmitStopPoint(&S); }); // Tell the runtime we are done. RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); @@ -780,9 +790,10 @@ } void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { - InlinedOpenMPRegionScopeRAII Region(*this, S); - - EmitOMPWorksharingLoop(S); + LexicalScope Scope(*this, S.getSourceRange()); + auto &&CodeGen = + [&S](CodeGenFunction &CGF) { CGF.EmitOMPWorksharingLoop(S); }; + CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); // Emit an implicit barrier at the end. if (!S.getSingleClause(OMPC_nowait)) { @@ -804,86 +815,94 @@ } void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { - InlinedOpenMPRegionScopeRAII Region(*this, S); - + LexicalScope Scope(*this, S.getSourceRange()); auto *Stmt = cast(S.getAssociatedStmt())->getCapturedStmt(); auto *CS = dyn_cast(Stmt); if (CS && CS->size() > 1) { - auto &C = CGM.getContext(); - auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); - // Emit helper vars inits. - LValue LB = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.lb.", - Builder.getInt32(0)); - auto *GlobalUBVal = Builder.getInt32(CS->size() - 1); - LValue UB = - createSectionLVal(*this, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); - LValue ST = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.st.", - Builder.getInt32(1)); - LValue IL = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.il.", - Builder.getInt32(0)); - // Loop counter. - LValue IV = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.iv."); - OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); - OpaqueValueMapping OpaqueIV(*this, &IVRefExpr, IV); - OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); - OpaqueValueMapping OpaqueUB(*this, &UBRefExpr, UB); - // Generate condition for loop. - BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, - OK_Ordinary, S.getLocStart(), /*fpContractable=*/false); - // Increment for loop counter. - UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, - S.getLocStart()); - auto BodyGen = [this, CS, &S, &IV]() { - // Iterate through all sections and emit a switch construct: - // switch (IV) { - // case 0: - // ; - // break; - // ... - // case - 1: - // - 1]>; - // break; - // } - // .omp.sections.exit: - auto *ExitBB = createBasicBlock(".omp.sections.exit"); - auto *SwitchStmt = Builder.CreateSwitch( - EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, - CS->size()); - unsigned CaseNumber = 0; - for (auto C = CS->children(); C; ++C, ++CaseNumber) { - auto CaseBB = createBasicBlock(".omp.sections.case"); - EmitBlock(CaseBB); - SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); - EmitStmt(*C); - EmitBranch(ExitBB); - } - EmitBlock(ExitBB, /*IsFinished=*/true); + auto &&CodeGen = [&S, CS](CodeGenFunction &CGF) { + auto &C = CGF.CGM.getContext(); + auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + // Emit helper vars inits. + LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", + CGF.Builder.getInt32(0)); + auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1); + LValue UB = + createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); + LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", + CGF.Builder.getInt32(1)); + LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", + CGF.Builder.getInt32(0)); + // Loop counter. + LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); + OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); + OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); + OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); + OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); + // Generate condition for loop. + BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, + OK_Ordinary, S.getLocStart(), + /*fpContractable=*/false); + // Increment for loop counter. + UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, + OK_Ordinary, S.getLocStart()); + auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) { + // Iterate through all sections and emit a switch construct: + // switch (IV) { + // case 0: + // ; + // break; + // ... + // case - 1: + // - 1]>; + // break; + // } + // .omp.sections.exit: + auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); + auto *SwitchStmt = CGF.Builder.CreateSwitch( + CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, + CS->size()); + unsigned CaseNumber = 0; + for (auto C = CS->children(); C; ++C, ++CaseNumber) { + auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); + CGF.EmitBlock(CaseBB); + SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); + CGF.EmitStmt(*C); + CGF.EmitBranch(ExitBB); + } + CGF.EmitBlock(ExitBB, /*IsFinished=*/true); + }; + // Emit static non-chunked loop. + CGF.CGM.getOpenMPRuntime().emitForInit( + CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, + /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(), + ST.getAddress()); + // UB = min(UB, GlobalUB); + auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); + auto *MinUBGlobalUB = CGF.Builder.CreateSelect( + CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); + CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); + // IV = LB; + CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); + // while (idx <= UB) { BODY; ++idx; } + CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen); + // Tell the runtime we are done. + CGF.CGM.getOpenMPRuntime().emitForFinish(CGF, S.getLocStart(), + OMPC_SCHEDULE_static); }; - // Emit static non-chunked loop. - CGM.getOpenMPRuntime().emitForInit( - *this, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, - /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(), - ST.getAddress()); - // UB = min(UB, GlobalUB); - auto *UBVal = EmitLoadOfScalar(UB, S.getLocStart()); - auto *MinUBGlobalUB = Builder.CreateSelect( - Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); - EmitStoreOfScalar(MinUBGlobalUB, UB); - // IV = LB; - EmitStoreOfScalar(EmitLoadOfScalar(LB, S.getLocStart()), IV); - // while (idx <= UB) { BODY; ++idx; } - EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen); - // Tell the runtime we are done. - CGM.getOpenMPRuntime().emitForFinish(*this, S.getLocStart(), - OMPC_SCHEDULE_static); + + CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); } else { - // If only one section is found - no need to generate loop, emit as a single + // If only one section is found - no need to generate loop, emit as a + // single // region. - CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { - InlinedOpenMPRegionScopeRAII Region(*this, S); - EmitStmt(Stmt); - EnsureInsertPoint(); - }, S.getLocStart(), llvm::None, llvm::None, llvm::None, llvm::None); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.EmitStmt( + cast(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EnsureInsertPoint(); + }; + CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), + llvm::None, llvm::None, llvm::None, + llvm::None); } // Emit an implicit barrier at the end. @@ -895,9 +914,12 @@ } void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { - InlinedOpenMPRegionScopeRAII Region(*this, S); - EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); - EnsureInsertPoint(); + LexicalScope Scope(*this, S.getSourceRange()); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EnsureInsertPoint(); + }; + CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); } void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { @@ -905,7 +927,8 @@ llvm::SmallVector SrcExprs; llvm::SmallVector DstExprs; llvm::SmallVector AssignmentOps; - // Check if there are any 'copyprivate' clauses associated with this 'single' + // Check if there are any 'copyprivate' clauses associated with this + // 'single' // construct. auto CopyprivateFilter = [](const OMPClause *C) -> bool { return C->getClauseKind() == OMPC_copyprivate; @@ -923,12 +946,15 @@ AssignmentOps.append(C->assignment_ops().begin(), C->assignment_ops().end()); } + LexicalScope Scope(*this, S.getSourceRange()); // Emit code for 'single' region along with 'copyprivate' clauses - CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { - InlinedOpenMPRegionScopeRAII Region(*this, S); - EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); - EnsureInsertPoint(); - }, S.getLocStart(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EnsureInsertPoint(); + }; + CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), + CopyprivateVars, SrcExprs, DstExprs, + AssignmentOps); // Emit an implicit barrier at the end. if (!S.getSingleClause(OMPC_nowait)) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single); @@ -936,20 +962,22 @@ } void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { - CGM.getOpenMPRuntime().emitMasterRegion(*this, [&]() -> void { - InlinedOpenMPRegionScopeRAII Region(*this, S); - EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); - EnsureInsertPoint(); - }, S.getLocStart()); + LexicalScope Scope(*this, S.getSourceRange()); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EnsureInsertPoint(); + }; + CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); } void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { + LexicalScope Scope(*this, S.getSourceRange()); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EnsureInsertPoint(); + }; CGM.getOpenMPRuntime().emitCriticalRegion( - *this, S.getDirectiveName().getAsString(), [&]() -> void { - InlinedOpenMPRegionScopeRAII Region(*this, S); - EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); - EnsureInsertPoint(); - }, S.getLocStart()); + *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart()); } void @@ -969,13 +997,21 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Emit outlined function for task construct. + LexicalScope Scope(*this, S.getSourceRange()); auto CS = cast(S.getAssociatedStmt()); auto CapturedStruct = GenerateCapturedStmtArgument(*CS); auto *I = CS->getCapturedDecl()->param_begin(); + auto *PartId = std::next(I); // The first function argument for tasks is a thread id, the second one is a // part id (0 for tied tasks, >=0 for untied task). + auto &&CodeGen = [PartId, &S](CodeGenFunction &CGF) { + if (*PartId) { + // TODO: emit code for untied tasks. + } + CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + }; auto OutlinedFn = - CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, *std::next(I)); + CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen); // Check if we should emit tied or untied task. bool Tied = !S.getSingleClause(OMPC_untied); // Check if the task is final @@ -1305,10 +1341,13 @@ S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); if (const auto *EWC = dyn_cast(CS)) enterFullExpression(EWC); - InlinedOpenMPRegionScopeRAII Region(*this, S); - EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(), - S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart()); + LexicalScope Scope(*this, S.getSourceRange()); + auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) { + EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(), + S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart()); + }; + CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); } void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { Index: cfe/trunk/lib/CodeGen/CodeGenFunction.h =================================================================== --- cfe/trunk/lib/CodeGen/CodeGenFunction.h +++ cfe/trunk/lib/CodeGen/CodeGenFunction.h @@ -192,7 +192,7 @@ CapturedRegionKind getKind() const { return Kind; } - void setContextValue(llvm::Value *V) { ThisValue = V; } + virtual void setContextValue(llvm::Value *V) { ThisValue = V; } // \brief Retrieve the value of the context parameter. virtual llvm::Value *getContextValue() const { return ThisValue; } @@ -2061,9 +2061,10 @@ /// Helpers for the OpenMP loop directives. void EmitOMPLoopBody(const OMPLoopDirective &Directive, bool SeparateIter = false); - void EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, - const Expr *LoopCond, const Expr *IncExpr, - const std::function &BodyGen); + void + EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, + const Expr *IncExpr, + const llvm::function_ref &BodyGen); void EmitOMPSimdFinal(const OMPLoopDirective &S); void EmitOMPWorksharingLoop(const OMPLoopDirective &S); void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, Index: cfe/trunk/test/OpenMP/critical_codegen.cpp =================================================================== --- cfe/trunk/test/OpenMP/critical_codegen.cpp +++ cfe/trunk/test/OpenMP/critical_codegen.cpp @@ -53,6 +53,6 @@ // TERM_DEBUG: unreachable foo(); } -// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !MDLocation(line: 44, -// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !MDLocation(line: 44, +// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !MDLocation(line: [[@LINE-12]], +// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !MDLocation(line: [[@LINE-3]], #endif Index: cfe/trunk/test/OpenMP/master_codegen.cpp =================================================================== --- cfe/trunk/test/OpenMP/master_codegen.cpp +++ cfe/trunk/test/OpenMP/master_codegen.cpp @@ -61,7 +61,7 @@ // TERM_DEBUG: unreachable foo(); } -// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !MDLocation(line: 52, -// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !MDLocation(line: 52, +// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !MDLocation(line: [[@LINE-12]], +// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !MDLocation(line: [[@LINE-3]], #endif Index: cfe/trunk/test/OpenMP/parallel_codegen.cpp =================================================================== --- cfe/trunk/test/OpenMP/parallel_codegen.cpp +++ cfe/trunk/test/OpenMP/parallel_codegen.cpp @@ -39,7 +39,7 @@ // CHECK: [[ARGC_REF:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 // CHECK-NEXT: store i32* {{%[a-z0-9.]+}}, i32** [[ARGC_REF]] // CHECK-NEXT: [[BITCAST:%.+]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), i8* [[BITCAST]]) +// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]]) // CHECK-NEXT: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}} // CHECK-NEXT: [[RET:%.+]] = call {{[a-z]*[ ]?i32}} [[TMAIN:@.+tmain.+]](i8** [[ARGV]]) // CHECK-NEXT: ret i32 [[RET]] @@ -55,13 +55,13 @@ // CHECK-DEBUG-NEXT: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC1]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]] // CHECK-DEBUG-NEXT: [[BITCAST:%.+]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK-DEBUG-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), i8* [[BITCAST]]) +// CHECK-DEBUG-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]]) // CHECK-DEBUG-NEXT: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}} // CHECK-DEBUG-NEXT: [[RET:%.+]] = call i32 [[TMAIN:@.+tmain.+]](i8** [[ARGV]]) // CHECK-DEBUG-NEXT: ret i32 [[RET]] // CHECK-DEBUG-NEXT: } -// CHECK-LABEL: define internal void @.omp_outlined.(i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context) +// CHECK: define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context) // CHECK: #[[FN_ATTRS:[0-9]+]] // CHECK: [[CONTEXT_ADDR:%.+]] = alloca %struct.anon* // CHECK: store %struct.anon* %__context, %struct.anon** [[CONTEXT_ADDR]] @@ -70,11 +70,12 @@ // CHECK-NEXT: [[ARGC_REF:%.+]] = load i32*, i32** [[ARGC_PTR_REF]] // CHECK-NEXT: [[ARGC:%.+]] = load i32, i32* [[ARGC_REF]] // CHECK-NEXT: invoke void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[ARGC]]) +// CHECK: call {{.+}} @__kmpc_cancel_barrier( // CHECK: ret void // CHECK: call void @{{.+terminate.*|abort}}( // CHECK-NEXT: unreachable // CHECK-NEXT: } -// CHECK-DEBUG-LABEL: define internal void @.omp_outlined.(i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context) +// CHECK-DEBUG: define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context) // CHECK-DEBUG: #[[FN_ATTRS:[0-9]+]] // CHECK-DEBUG: [[CONTEXT_ADDR:%.+]] = alloca %struct.anon* // CHECK-DEBUG: store %struct.anon* %__context, %struct.anon** [[CONTEXT_ADDR]] @@ -83,6 +84,7 @@ // CHECK-DEBUG-NEXT: [[ARGC_REF:%.+]] = load i32*, i32** [[ARGC_PTR_REF]] // CHECK-DEBUG-NEXT: [[ARGC:%.+]] = load i32, i32* [[ARGC_REF]] // CHECK-DEBUG-NEXT: invoke void [[FOO:@.+foo.+]](i32 [[ARGC]]) +// CHECK-DEBUG: call {{.+}} @__kmpc_cancel_barrier( // CHECK-DEBUG: ret void // CHECK-DEBUG: call void @{{.+terminate.*|abort}}( // CHECK-DEBUG-NEXT: unreachable @@ -98,7 +100,7 @@ // CHECK: [[ARGC_REF:%.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 // CHECK-NEXT: store i8*** {{%[a-z0-9.]+}}, i8**** [[ARGC_REF]] // CHECK-NEXT: [[BITCAST:%.+]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* -// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined.1 to void (i32*, i32*, ...)*), i8* [[BITCAST]]) +// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]]) // CHECK-NEXT: ret i32 0 // CHECK-NEXT: } // CHECK-DEBUG: define linkonce_odr i32 [[TMAIN]](i8** %argc) @@ -112,11 +114,11 @@ // CHECK-DEBUG-NEXT: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC2]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]] // CHECK-DEBUG-NEXT: [[BITCAST:%.+]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* -// CHECK-DEBUG-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined.1 to void (i32*, i32*, ...)*), i8* [[BITCAST]]) +// CHECK-DEBUG-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]]) // CHECK-DEBUG-NEXT: ret i32 0 // CHECK-DEBUG-NEXT: } -// CHECK-LABEL: define internal void @.omp_outlined.1(i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context) +// CHECK: define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context) // CHECK: [[CONTEXT_ADDR:%.+]] = alloca %struct.anon.0* // CHECK: store %struct.anon.0* %__context, %struct.anon.0** [[CONTEXT_ADDR]] // CHECK: [[CONTEXT_PTR:%.+]] = load %struct.anon.0*, %struct.anon.0** [[CONTEXT_ADDR]] @@ -124,11 +126,12 @@ // CHECK-NEXT: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_REF]] // CHECK-NEXT: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] // CHECK-NEXT: invoke void [[FOO1:@.+foo.+]](i8** [[ARGC]]) +// CHECK: call {{.+}} @__kmpc_cancel_barrier( // CHECK: ret void // CHECK: call void @{{.+terminate.*|abort}}( // CHECK-NEXT: unreachable // CHECK-NEXT: } -// CHECK-DEBUG-LABEL: define internal void @.omp_outlined.1(i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context) +// CHECK-DEBUG: define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context) // CHECK-DEBUG: [[CONTEXT_ADDR:%.+]] = alloca %struct.anon.0* // CHECK-DEBUG: store %struct.anon.0* %__context, %struct.anon.0** [[CONTEXT_ADDR]] // CHECK-DEBUG: [[CONTEXT_PTR:%.+]] = load %struct.anon.0*, %struct.anon.0** [[CONTEXT_ADDR]] @@ -136,6 +139,7 @@ // CHECK-DEBUG-NEXT: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_REF]] // CHECK-DEBUG-NEXT: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] // CHECK-DEBUG-NEXT: invoke void [[FOO1:@.+foo.+]](i8** [[ARGC]]) +// CHECK-DEBUG: call {{.+}} @__kmpc_cancel_barrier( // CHECK-DEBUG: ret void // CHECK-DEBUG: call void @{{.+terminate.*|abort}}( // CHECK-DEBUG-NEXT: unreachable Index: cfe/trunk/test/OpenMP/sections_codegen.cpp =================================================================== --- cfe/trunk/test/OpenMP/sections_codegen.cpp +++ cfe/trunk/test/OpenMP/sections_codegen.cpp @@ -96,6 +96,7 @@ // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] // CHECK-NEXT: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_SINGLE_LOC]], +// CHECK-NEXT: call i32 @__kmpc_cancel_barrier( // CHECK-NEXT: ret // CHECK: [[TERM_LPAD]] // CHECK: call void @__clang_call_terminate(i8* Index: cfe/trunk/test/OpenMP/single_codegen.cpp =================================================================== --- cfe/trunk/test/OpenMP/single_codegen.cpp +++ cfe/trunk/test/OpenMP/single_codegen.cpp @@ -129,6 +129,6 @@ foo(); } // TERM_DEBUG-DAG: [[DBG_LOC_START]] = !MDLocation(line: [[@LINE-12]], -// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !MDLocation(line: [[@LINE-13]], +// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !MDLocation(line: [[@LINE-3]], #endif