Index: include/clang/AST/StmtOpenMP.h =================================================================== --- include/clang/AST/StmtOpenMP.h +++ include/clang/AST/StmtOpenMP.h @@ -264,27 +264,38 @@ SeparatedCondOffset = 6, InitOffset = 7, IncOffset = 8, - ArraysOffset = 9 + NonworksharingArraysOffset = 9, + // The following 7 exprs are used by worksharing loops only. + IsLastIterVariableOffset = 9, + LowerBoundVariableOffset = 10, + UpperBoundVariableOffset = 11, + StrideVariableOffset = 12, + EnsureUpperBoundOffset = 13, + NextLowerBoundOffset = 14, + NextUpperBoundOffset = 15, + WorksharingArraysOffset = 16 }; /// \brief Get the counters storage. MutableArrayRef getCounters() { - Expr **Storage = - reinterpret_cast(&(*(std::next(child_begin(), ArraysOffset)))); + Expr **Storage = reinterpret_cast( + &(*(std::next(child_begin(), getArraysOffset(getDirectiveKind()))))); return MutableArrayRef(Storage, CollapsedNum); } /// \brief Get the updates storage. MutableArrayRef getUpdates() { Expr **Storage = reinterpret_cast( - &*std::next(child_begin(), ArraysOffset + CollapsedNum)); + &*std::next(child_begin(), + getArraysOffset(getDirectiveKind()) + CollapsedNum)); return MutableArrayRef(Storage, CollapsedNum); } /// \brief Get the final counter updates storage. MutableArrayRef getFinals() { Expr **Storage = reinterpret_cast( - &*std::next(child_begin(), ArraysOffset + 2 * CollapsedNum)); + &*std::next(child_begin(), + getArraysOffset(getDirectiveKind()) + 2 * CollapsedNum)); return MutableArrayRef(Storage, CollapsedNum); } @@ -305,13 +316,21 @@ unsigned CollapsedNum, unsigned NumClauses, unsigned NumSpecialChildren = 0) : OMPExecutableDirective(That, SC, Kind, StartLoc, EndLoc, NumClauses, - numLoopChildren(CollapsedNum) + + numLoopChildren(CollapsedNum, Kind) + NumSpecialChildren), CollapsedNum(CollapsedNum) {} + /// \brief Offset to the start of children expression arrays. + static unsigned getArraysOffset(OpenMPDirectiveKind Kind) { + return isOpenMPWorksharingDirective(Kind) ? WorksharingArraysOffset + : NonworksharingArraysOffset; + } + /// \brief Children number. - static unsigned numLoopChildren(unsigned CollapsedNum) { - return ArraysOffset + 3 * CollapsedNum; // Counters, Updates and Finals + static unsigned numLoopChildren(unsigned CollapsedNum, + OpenMPDirectiveKind Kind) { + return getArraysOffset(Kind) + + 3 * CollapsedNum; // Counters, Updates and Finals } void setIterationVariable(Expr *IV) { @@ -332,6 +351,41 @@ } void setInit(Expr *Init) { *std::next(child_begin(), InitOffset) = Init; } void setInc(Expr *Inc) { *std::next(child_begin(), IncOffset) = Inc; } + void setIsLastIterVariable(Expr *IL) { + assert(isOpenMPWorksharingDirective(getDirectiveKind()) && + "expected worksharing loop directive"); + *std::next(child_begin(), IsLastIterVariableOffset) = IL; + } + void setLowerBoundVariable(Expr *LB) { + assert(isOpenMPWorksharingDirective(getDirectiveKind()) && + "expected worksharing loop directive"); + *std::next(child_begin(), LowerBoundVariableOffset) = LB; + } + void setUpperBoundVariable(Expr *UB) { + assert(isOpenMPWorksharingDirective(getDirectiveKind()) && + "expected worksharing loop directive"); + *std::next(child_begin(), UpperBoundVariableOffset) = UB; + } + void setStrideVariable(Expr *ST) { + assert(isOpenMPWorksharingDirective(getDirectiveKind()) && + "expected worksharing loop directive"); + *std::next(child_begin(), StrideVariableOffset) = ST; + } + void setEnsureUpperBound(Expr *EUB) { + assert(isOpenMPWorksharingDirective(getDirectiveKind()) && + "expected worksharing loop directive"); + *std::next(child_begin(), EnsureUpperBoundOffset) = EUB; + } + void setNextLowerBound(Expr *NLB) { + assert(isOpenMPWorksharingDirective(getDirectiveKind()) && + "expected worksharing loop directive"); + *std::next(child_begin(), NextLowerBoundOffset) = NLB; + } + void setNextUpperBound(Expr *NUB) { + assert(isOpenMPWorksharingDirective(getDirectiveKind()) && + "expected worksharing loop directive"); + *std::next(child_begin(), NextUpperBoundOffset) = NUB; + } void setCounters(ArrayRef A); void setUpdates(ArrayRef A); void setFinals(ArrayRef A); @@ -369,6 +423,48 @@ return const_cast( reinterpret_cast(*std::next(child_begin(), IncOffset))); } + Expr *getIsLastIterVariable() const { + assert(isOpenMPWorksharingDirective(getDirectiveKind()) && + "expected worksharing loop directive"); + return const_cast(reinterpret_cast( + *std::next(child_begin(), IsLastIterVariableOffset))); + } + Expr *getLowerBoundVariable() const { + assert(isOpenMPWorksharingDirective(getDirectiveKind()) && + "expected worksharing loop directive"); + return const_cast(reinterpret_cast( + *std::next(child_begin(), LowerBoundVariableOffset))); + } + Expr *getUpperBoundVariable() const { + assert(isOpenMPWorksharingDirective(getDirectiveKind()) && + "expected worksharing loop directive"); + return const_cast(reinterpret_cast( + *std::next(child_begin(), UpperBoundVariableOffset))); + } + Expr *getStrideVariable() const { + assert(isOpenMPWorksharingDirective(getDirectiveKind()) && + "expected worksharing loop directive"); + return const_cast(reinterpret_cast( + *std::next(child_begin(), StrideVariableOffset))); + } + Expr *getEnsureUpperBound() const { + assert(isOpenMPWorksharingDirective(getDirectiveKind()) && + "expected worksharing loop directive"); + return const_cast(reinterpret_cast( + *std::next(child_begin(), EnsureUpperBoundOffset))); + } + Expr *getNextLowerBound() const { + assert(isOpenMPWorksharingDirective(getDirectiveKind()) && + "expected worksharing loop directive"); + return const_cast(reinterpret_cast( + *std::next(child_begin(), NextLowerBoundOffset))); + } + Expr *getNextUpperBound() const { + assert(isOpenMPWorksharingDirective(getDirectiveKind()) && + "expected worksharing loop directive"); + return const_cast(reinterpret_cast( + *std::next(child_begin(), NextUpperBoundOffset))); + } const Stmt *getBody() const { // This relies on the loop form is already checked by Sema. Stmt *Body = getAssociatedStmt()->IgnoreContainers(true); @@ -531,6 +627,13 @@ /// \param Cond Condition. /// \param SeparatedCond Condition with 1 iteration separated. /// \param Inc Loop increment. + /// \param IL IsLastIteration local variable passed to runtime. + /// \param LB LowerBound local variable passed to runtime. + /// \param UB UpperBound local variable passed to runtime. + /// \param ST Stride local variable passed to runtime. + /// \param EUB EnsureUpperBound -- expression LB = min(LB, NumIterations). + /// \param NLB Update of LowerBound for statically sheduled 'omp for' loops. + /// \param NUB Update of UpperBound for statically sheduled 'omp for' loops. /// \param Counters Loop counters. /// \param Updates Expressions for loop counters update for CodeGen. /// \param Finals Final loop counter values for GodeGen. @@ -540,8 +643,10 @@ unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, Expr *IV, Expr *LastIteration, Expr *CalcLastIteration, Expr *PreCond, Expr *Cond, - Expr *SeparatedCond, Expr *Init, Expr *Inc, ArrayRef Counters, - ArrayRef Updates, ArrayRef Finals); + Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB, + Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB, + ArrayRef Counters, ArrayRef Updates, + ArrayRef Finals); /// \brief Creates an empty directive with the place /// for \a NumClauses clauses. @@ -607,6 +712,13 @@ /// \param Cond Condition. /// \param SeparatedCond Condition with 1 iteration separated. /// \param Inc Loop increment. + /// \param IL IsLastIteration local variable passed to runtime. + /// \param LB LowerBound local variable passed to runtime. + /// \param UB UpperBound local variable passed to runtime. + /// \param ST Stride local variable passed to runtime. + /// \param EUB EnsureUpperBound -- expression LB = min(LB, NumIterations). + /// \param NLB Update of LowerBound for statically sheduled 'omp for' loops. + /// \param NUB Update of UpperBound for statically sheduled 'omp for' loops. /// \param Counters Loop counters. /// \param Updates Expressions for loop counters update for CodeGen. /// \param Finals Final loop counter values for GodeGen. @@ -616,8 +728,10 @@ unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, Expr *IV, Expr *LastIteration, Expr *CalcLastIteration, Expr *PreCond, Expr *Cond, - Expr *SeparatedCond, Expr *Init, Expr *Inc, ArrayRef Counters, - ArrayRef Updates, ArrayRef Finals); + Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB, + Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB, + ArrayRef Counters, ArrayRef Updates, + ArrayRef Finals); /// \brief Creates an empty directive with the place /// for \a NumClauses clauses. @@ -956,6 +1070,13 @@ /// \param Cond Condition. /// \param SeparatedCond Condition with 1 iteration separated. /// \param Inc Loop increment. + /// \param IL IsLastIteration local variable passed to runtime. + /// \param LB LowerBound local variable passed to runtime. + /// \param UB UpperBound local variable passed to runtime. + /// \param ST Stride local variable passed to runtime. + /// \param EUB EnsureUpperBound -- expression LB = min(LB, NumIterations). + /// \param NLB Update of LowerBound for statically sheduled 'omp for' loops. + /// \param NUB Update of UpperBound for statically sheduled 'omp for' loops. /// \param Counters Loop counters. /// \param Updates Expressions for loop counters update for CodeGen. /// \param Finals Final loop counter values for GodeGen. @@ -965,8 +1086,10 @@ unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, Expr *IV, Expr *LastIteration, Expr *CalcLastIteration, Expr *PreCond, Expr *Cond, - Expr *SeparatedCond, Expr *Init, Expr *Inc, ArrayRef Counters, - ArrayRef Updates, ArrayRef Finals); + Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB, + Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB, + ArrayRef Counters, ArrayRef Updates, + ArrayRef Finals); /// \brief Creates an empty directive with the place /// for \a NumClauses clauses. @@ -1037,6 +1160,13 @@ /// \param Cond Condition. /// \param SeparatedCond Condition with 1 iteration separated. /// \param Inc Loop increment. + /// \param IL IsLastIteration local variable passed to runtime. + /// \param LB LowerBound local variable passed to runtime. + /// \param UB UpperBound local variable passed to runtime. + /// \param ST Stride local variable passed to runtime. + /// \param EUB EnsureUpperBound -- expression LB = min(LB, NumIterations). + /// \param NLB Update of LowerBound for statically sheduled 'omp for' loops. + /// \param NUB Update of UpperBound for statically sheduled 'omp for' loops. /// \param Counters Loop counters. /// \param Updates Expressions for loop counters update for CodeGen. /// \param Finals Final loop counter values for GodeGen. @@ -1046,8 +1176,10 @@ unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, Expr *IV, Expr *LastIteration, Expr *CalcLastIteration, Expr *PreCond, Expr *Cond, - Expr *SeparatedCond, Expr *Init, Expr *Inc, ArrayRef Counters, - ArrayRef Updates, ArrayRef Finals); + Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB, + Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB, + ArrayRef Counters, ArrayRef Updates, + ArrayRef Finals); /// \brief Creates an empty directive with the place /// for \a NumClauses clauses. Index: lib/AST/Stmt.cpp =================================================================== --- lib/AST/Stmt.cpp +++ lib/AST/Stmt.cpp @@ -1486,8 +1486,9 @@ ArrayRef Updates, ArrayRef Finals) { unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSimdDirective), llvm::alignOf()); - void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + - sizeof(Stmt *) * numLoopChildren(CollapsedNum)); + void *Mem = + C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + + sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_simd)); OMPSimdDirective *Dir = new (Mem) OMPSimdDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size()); Dir->setClauses(Clauses); @@ -1511,23 +1512,25 @@ EmptyShell) { unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSimdDirective), llvm::alignOf()); - void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses + - sizeof(Stmt *) * numLoopChildren(CollapsedNum)); + void *Mem = + C.Allocate(Size + sizeof(OMPClause *) * NumClauses + + sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_simd)); return new (Mem) OMPSimdDirective(CollapsedNum, NumClauses); } -OMPForDirective * -OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc, - SourceLocation EndLoc, unsigned CollapsedNum, - ArrayRef Clauses, Stmt *AssociatedStmt, - Expr *IV, Expr *LastIteration, Expr *CalcLastIteration, - Expr *PreCond, Expr *Cond, Expr *SeparatedCond, - Expr *Init, Expr *Inc, ArrayRef Counters, - ArrayRef Updates, ArrayRef Finals) { +OMPForDirective *OMPForDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, + Expr *IV, Expr *LastIteration, Expr *CalcLastIteration, Expr *PreCond, + Expr *Cond, Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB, + Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB, + ArrayRef Counters, ArrayRef Updates, + ArrayRef Finals) { unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForDirective), llvm::alignOf()); - void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + - sizeof(Stmt *) * numLoopChildren(CollapsedNum)); + void *Mem = + C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + + sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for)); OMPForDirective *Dir = new (Mem) OMPForDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size()); Dir->setClauses(Clauses); @@ -1539,6 +1542,13 @@ Dir->setCond(Cond, SeparatedCond); Dir->setInit(Init); Dir->setInc(Inc); + Dir->setIsLastIterVariable(IL); + Dir->setLowerBoundVariable(LB); + Dir->setUpperBoundVariable(UB); + Dir->setStrideVariable(ST); + Dir->setEnsureUpperBound(EUB); + Dir->setNextLowerBound(NLB); + Dir->setNextUpperBound(NUB); Dir->setCounters(Counters); Dir->setUpdates(Updates); Dir->setFinals(Finals); @@ -1551,8 +1561,9 @@ EmptyShell) { unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForDirective), llvm::alignOf()); - void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses + - sizeof(Stmt *) * numLoopChildren(CollapsedNum)); + void *Mem = + C.Allocate(Size + sizeof(OMPClause *) * NumClauses + + sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for)); return new (Mem) OMPForDirective(CollapsedNum, NumClauses); } @@ -1560,13 +1571,15 @@ const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, Expr *IV, Expr *LastIteration, Expr *CalcLastIteration, Expr *PreCond, - Expr *Cond, Expr *SeparatedCond, Expr *Init, Expr *Inc, + Expr *Cond, Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB, + Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB, ArrayRef Counters, ArrayRef Updates, ArrayRef Finals) { unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForSimdDirective), llvm::alignOf()); - void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + - sizeof(Stmt *) * numLoopChildren(CollapsedNum)); + void *Mem = + C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + + sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for_simd)); OMPForSimdDirective *Dir = new (Mem) OMPForSimdDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size()); Dir->setClauses(Clauses); @@ -1578,6 +1591,13 @@ Dir->setCond(Cond, SeparatedCond); Dir->setInit(Init); Dir->setInc(Inc); + Dir->setIsLastIterVariable(IL); + Dir->setLowerBoundVariable(LB); + Dir->setUpperBoundVariable(UB); + Dir->setStrideVariable(ST); + Dir->setEnsureUpperBound(EUB); + Dir->setNextLowerBound(NLB); + Dir->setNextUpperBound(NUB); Dir->setCounters(Counters); Dir->setUpdates(Updates); Dir->setFinals(Finals); @@ -1590,8 +1610,9 @@ EmptyShell) { unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForSimdDirective), llvm::alignOf()); - void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses + - sizeof(Stmt *) * numLoopChildren(CollapsedNum)); + void *Mem = + C.Allocate(Size + sizeof(OMPClause *) * NumClauses + + sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for_simd)); return new (Mem) OMPForSimdDirective(CollapsedNum, NumClauses); } @@ -1709,13 +1730,15 @@ const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, Expr *IV, Expr *LastIteration, Expr *CalcLastIteration, Expr *PreCond, - Expr *Cond, Expr *SeparatedCond, Expr *Init, Expr *Inc, + Expr *Cond, Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB, + Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB, ArrayRef Counters, ArrayRef Updates, ArrayRef Finals) { unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForDirective), llvm::alignOf()); void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + - sizeof(Stmt *) * numLoopChildren(CollapsedNum)); + sizeof(Stmt *) * + numLoopChildren(CollapsedNum, OMPD_parallel_for)); OMPParallelForDirective *Dir = new (Mem) OMPParallelForDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size()); Dir->setClauses(Clauses); @@ -1727,6 +1750,13 @@ Dir->setCond(Cond, SeparatedCond); Dir->setInit(Init); Dir->setInc(Inc); + Dir->setIsLastIterVariable(IL); + Dir->setLowerBoundVariable(LB); + Dir->setUpperBoundVariable(UB); + Dir->setStrideVariable(ST); + Dir->setEnsureUpperBound(EUB); + Dir->setNextLowerBound(NLB); + Dir->setNextUpperBound(NUB); Dir->setCounters(Counters); Dir->setUpdates(Updates); Dir->setFinals(Finals); @@ -1739,7 +1769,8 @@ unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForDirective), llvm::alignOf()); void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses + - sizeof(Stmt *) * numLoopChildren(CollapsedNum)); + sizeof(Stmt *) * + numLoopChildren(CollapsedNum, OMPD_parallel_for)); return new (Mem) OMPParallelForDirective(CollapsedNum, NumClauses); } @@ -1747,13 +1778,15 @@ const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, Expr *IV, Expr *LastIteration, Expr *CalcLastIteration, Expr *PreCond, - Expr *Cond, Expr *SeparatedCond, Expr *Init, Expr *Inc, + Expr *Cond, Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB, + Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB, ArrayRef Counters, ArrayRef Updates, ArrayRef Finals) { unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForSimdDirective), llvm::alignOf()); - void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + - sizeof(Stmt *) * numLoopChildren(CollapsedNum)); + void *Mem = C.Allocate( + Size + sizeof(OMPClause *) * Clauses.size() + + sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_parallel_for_simd)); OMPParallelForSimdDirective *Dir = new (Mem) OMPParallelForSimdDirective( StartLoc, EndLoc, CollapsedNum, Clauses.size()); Dir->setClauses(Clauses); @@ -1765,6 +1798,13 @@ Dir->setCond(Cond, SeparatedCond); Dir->setInit(Init); Dir->setInc(Inc); + Dir->setIsLastIterVariable(IL); + Dir->setLowerBoundVariable(LB); + Dir->setUpperBoundVariable(UB); + Dir->setStrideVariable(ST); + Dir->setEnsureUpperBound(EUB); + Dir->setNextLowerBound(NLB); + Dir->setNextUpperBound(NUB); Dir->setCounters(Counters); Dir->setUpdates(Updates); Dir->setFinals(Finals); @@ -1777,8 +1817,9 @@ unsigned CollapsedNum, EmptyShell) { unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForSimdDirective), llvm::alignOf()); - void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses + - sizeof(Stmt *) * numLoopChildren(CollapsedNum)); + void *Mem = C.Allocate( + Size + sizeof(OMPClause *) * NumClauses + + sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_parallel_for_simd)); return new (Mem) OMPParallelForSimdDirective(CollapsedNum, NumClauses); } Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/OpenMPKinds.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" @@ -75,6 +76,12 @@ OMPRTL__kmpc_end_critical, // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); OMPRTL__kmpc_barrier, + // Calls for static scheduling 'omp for' loops. + OMPRTL__kmpc_for_static_init_4, + OMPRTL__kmpc_for_static_init_4u, + OMPRTL__kmpc_for_static_init_8, + OMPRTL__kmpc_for_static_init_8u, + OMPRTL__kmpc_for_static_fini, // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); OMPRTL__kmpc_serialized_parallel, @@ -254,6 +261,56 @@ virtual void EmitOMPBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPLocationFlags Flags); + /// \brief Check if the specified \a ScheduleKind is static non-chunked. + /// This kind of worksharing directive is emitted without outer loop. + /// \param ScheduleKind Schedule kind specified in the 'schedule' clause. + /// \param Chunked True if chunk is specified in the clause. + /// + virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, + bool Chunked) const; + + /// \brief If the loop has static schedule, call + /// __kmpc_for_static_init( + /// ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, + /// kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, + /// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, + /// kmp_int[32|64] incr, kmp_int[32|64] chunk); + /// + /// This runtime routine is called before the OpenMP loop with static + /// schedule to get the upper/lower bounds \a LB and \a UB for the current + /// OpenMP thread, and to get stride \a ST if the schedule is static-chunked. + /// + /// \param CGF Reference to current CodeGenFunction. + /// \param Loc Clang source location. + /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. + /// \param IVSize Size of the iteration variable in bits. + /// \param IVSigned Sign of the interation variable. + /// \param IL Address of the output variable in which the flag of the + /// last iteration is returned. + /// \param LB Address of the output variable in which the lower iteration + /// number is returned. + /// \param UB Address of the output variable in which the upper iteration + /// number is returned. + /// \param ST Address of the output variable in which the stride value is + /// returned nesessary to generated the static_chunked scheduled loop. + /// \param Chunk Value of the chunk for the static_chunked scheduled loop. + /// + virtual void EmitOMPForInit(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPScheduleClauseKind SchedKind, + unsigned IVSize, bool IVSigned, llvm::Value *IL, + llvm::Value *LB, llvm::Value *UB, llvm::Value *ST, + llvm::Value *Chunk = nullptr); + + /// \brief If the loop has static schedule, call + /// __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid) + /// + /// \param CGF Reference to current CodeGenFunction. + /// \param Loc Clang source location. + /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. + /// + virtual void EmitOMPForFini(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPScheduleClauseKind ScheduleKind); + /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' /// clause. Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -150,7 +150,8 @@ OpenMPLocThreadIDMapTy::iterator I = OpenMPLocThreadIDMap.find(CGF.CurFn); if (I != OpenMPLocThreadIDMap.end()) { LocValue = I->second.DebugLoc; - } else { + } + if (LocValue == nullptr) { // Generate "ident_t .kmpc_loc.addr;" llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr"); AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy)); @@ -200,8 +201,11 @@ OpenMPLocThreadIDMapTy::iterator I = OpenMPLocThreadIDMap.find(CGF.CurFn); if (I != OpenMPLocThreadIDMap.end()) { ThreadID = I->second.ThreadID; - } else if (auto OMPRegionInfo = - dyn_cast_or_null(CGF.CapturedStmtInfo)) { + if (ThreadID != nullptr) + return ThreadID; + } + if (auto OMPRegionInfo = + dyn_cast_or_null(CGF.CapturedStmtInfo)) { // Check if this an outlined function with thread id passed as argument. auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable(); auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); @@ -296,6 +300,95 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); break; } + // Build __kmpc_for_static_init*( + // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, + // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, + // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, + // kmp_int[32|64] incr, kmp_int[32|64] chunk); + case OMPRTL__kmpc_for_static_init_4: { + auto ITy = CGM.Int32Ty; + auto PtrTy = llvm::PointerType::getUnqual(ITy); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), // loc + CGM.Int32Ty, // tid + CGM.Int32Ty, // schedtype + llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter + PtrTy, // p_lower + PtrTy, // p_upper + PtrTy, // p_stride + ITy, // incr + ITy // chunk + }; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4"); + break; + } + case OMPRTL__kmpc_for_static_init_4u: { + auto ITy = CGM.Int32Ty; + auto PtrTy = llvm::PointerType::getUnqual(ITy); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), // loc + CGM.Int32Ty, // tid + CGM.Int32Ty, // schedtype + llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter + PtrTy, // p_lower + PtrTy, // p_upper + PtrTy, // p_stride + ITy, // incr + ITy // chunk + }; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4u"); + break; + } + case OMPRTL__kmpc_for_static_init_8: { + auto ITy = CGM.Int64Ty; + auto PtrTy = llvm::PointerType::getUnqual(ITy); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), // loc + CGM.Int32Ty, // tid + CGM.Int32Ty, // schedtype + llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter + PtrTy, // p_lower + PtrTy, // p_upper + PtrTy, // p_stride + ITy, // incr + ITy // chunk + }; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8"); + break; + } + case OMPRTL__kmpc_for_static_init_8u: { + auto ITy = CGM.Int64Ty; + auto PtrTy = llvm::PointerType::getUnqual(ITy); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), // loc + CGM.Int32Ty, // tid + CGM.Int32Ty, // schedtype + llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter + PtrTy, // p_lower + PtrTy, // p_upper + PtrTy, // p_stride + ITy, // incr + ITy // chunk + }; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8u"); + break; + } + case OMPRTL__kmpc_for_static_fini: { + // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); + break; + } case OMPRTL__kmpc_push_num_threads: { // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, // kmp_int32 num_threads) @@ -441,6 +534,103 @@ CGF.EmitRuntimeCall(RTLFn, Args); } +/// \brief Schedule types for 'omp for' loops (see enum sched_type in kmp.h). +enum OpenMPSchedType { + /// \brief Lower bound for default (unordered) versions. + OMP_sch_lower = 32, + OMP_sch_static_chunked = 33, + OMP_sch_static = 34, + OMP_sch_dynamic_chunked = 35, + OMP_sch_guided_chunked = 36, + OMP_sch_runtime = 37, + OMP_sch_auto = 38, + /// \brief Lower bound for 'ordered' versions. + OMP_ord_lower = 64, + /// \brief Lower bound for 'nomerge' versions. + OMP_nm_lower = 160, +}; + +/// \brief Map the OpenMP loop schedule to the runtime enumeration. +static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, + bool Chunked) { + auto Schedule = OMP_sch_static; + switch (ScheduleKind) { + case OMPC_SCHEDULE_static: + Schedule = Chunked ? OMP_sch_static_chunked : OMP_sch_static; + break; + case OMPC_SCHEDULE_dynamic: + Schedule = OMP_sch_dynamic_chunked; + break; + case OMPC_SCHEDULE_guided: + Schedule = OMP_sch_guided_chunked; + break; + case OMPC_SCHEDULE_auto: + Schedule = OMP_sch_auto; + break; + case OMPC_SCHEDULE_runtime: + Schedule = OMP_sch_runtime; + break; + case OMPC_SCHEDULE_unknown: + assert(!Chunked && "chunk was specified but schedule kind not known"); + break; + } + return Schedule; +} + +bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, + bool Chunked) const { + auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); + return Schedule == OMP_sch_static; +} + +void CGOpenMPRuntime::EmitOMPForInit(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPScheduleClauseKind ScheduleKind, + unsigned IVSize, bool IVSigned, + llvm::Value *IL, llvm::Value *LB, + llvm::Value *UB, llvm::Value *ST, + llvm::Value *Chunk) { + OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr); + // Call __kmpc_for_static_init( + // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, + // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, + // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, + // kmp_int[32|64] incr, kmp_int[32|64] chunk); + // TODO: Implement dynamic schedule. + if (Chunk == nullptr) + Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1); + llvm::Value *Args[] = { + EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), + GetOpenMPThreadID(CGF, Loc), + CGF.Builder.getInt32(Schedule), // Schedule type + IL, // &isLastIter + LB, // &LB + UB, // &UB + ST, // &Stride + CGF.Builder.getIntN(IVSize, 1), // Incr + Chunk // Chunk + }; + assert((IVSize == 32 || IVSize == 64) && + "Index size is not compatible with the omp runtime"); + auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4 + : OMPRTL__kmpc_for_static_init_4u) + : (IVSigned ? OMPRTL__kmpc_for_static_init_8 + : OMPRTL__kmpc_for_static_init_8u); + auto RTLFn = CreateRuntimeFunction(F); + CGF.EmitRuntimeCall(RTLFn, Args); +} + +void CGOpenMPRuntime::EmitOMPForFini(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPScheduleClauseKind ScheduleKind) { + assert((ScheduleKind == OMPC_SCHEDULE_static || + ScheduleKind == OMPC_SCHEDULE_unknown) && + "Non-static schedule kinds are not yet implemented"); + // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); + llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), + GetOpenMPThreadID(CGF, Loc)}; + auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_for_static_fini); + CGF.EmitRuntimeCall(RTLFn, Args); +} + void CGOpenMPRuntime::EmitOMPNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) { Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -443,8 +443,113 @@ DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd()); } -void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &) { - llvm_unreachable("CodeGen for 'omp for' is not supported yet."); +/// \brief Emit a helper variable and return corresponding lvalue. +static LValue EmitOMPHelperVar(CodeGenFunction &CGF, + const DeclRefExpr *Helper) { + auto VDecl = cast(Helper->getDecl()); + CGF.EmitVarDecl(*VDecl); + return CGF.EmitLValue(Helper); +} + +void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { + // Emit the loop iteration variable. + auto IVExpr = cast(S.getIterationVariable()); + auto IVDecl = cast(IVExpr->getDecl()); + EmitVarDecl(*IVDecl); + + // Emit the iterations count variable. + // If it is not a variable, Sema decided to calculate iterations count on each + // iteration (e.g., it is foldable into a constant). + if (auto LIExpr = dyn_cast(S.getLastIteration())) { + EmitVarDecl(*cast(LIExpr->getDecl())); + // Emit calculation of the iterations count. + EmitIgnoredExpr(S.getCalcLastIteration()); + } + + auto &RT = CGM.getOpenMPRuntime(); + + // Check pre-condition. + { + // Emit: if (LastIteration > 0) - begin. + RegionCounter Cnt = getPGORegionCounter(&S); + auto ThenBlock = createBasicBlock("omp.precond.then"); + auto ContBlock = createBasicBlock("omp.precond.end"); + EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); + EmitBlock(ThenBlock); + Cnt.beginRegion(Builder); + // Emit 'then' code. + { + // Emit helper vars inits. + LValue LB = + EmitOMPHelperVar(*this, cast(S.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(*this, cast(S.getUpperBoundVariable())); + LValue ST = + EmitOMPHelperVar(*this, cast(S.getStrideVariable())); + LValue IL = + EmitOMPHelperVar(*this, cast(S.getIsLastIterVariable())); + + OMPPrivateScope LoopScope(*this); + EmitPrivateLoopCounters(*this, LoopScope, S.counters()); + + // Detect the loop schedule kind and chunk. + auto ScheduleKind = OMPC_SCHEDULE_unknown; + llvm::Value *Chunk = nullptr; + if (auto C = cast_or_null( + S.getSingleClause(OMPC_schedule))) { + ScheduleKind = C->getScheduleKind(); + if (auto Ch = C->getChunkSize()) { + Chunk = EmitScalarExpr(Ch); + Chunk = EmitScalarConversion(Chunk, Ch->getType(), + S.getIterationVariable()->getType()); + } + } + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); + const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); + if (RT.isStaticNonchunked(ScheduleKind, + /* Chunked */ Chunk != nullptr)) { + // OpenMP [2.7.1, Loop Construct, Description, table 2-1] + // When no chunk_size is specified, the iteration space is divided into + // chunks that are approximately equal in size, and at most one chunk is + // distributed to each thread. Note that the size of the chunks is + // unspecified in this case. + RT.EmitOMPForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, + IL.getAddress(), LB.getAddress(), UB.getAddress(), + ST.getAddress()); + // UB = min(UB, GlobalUB); + EmitIgnoredExpr(S.getEnsureUpperBound()); + // IV = LB; + EmitIgnoredExpr(S.getInit()); + // while (idx <= UB) { BODY; ++idx; } + EmitOMPInnerLoop(S, LoopScope); + // Tell the runtime we are done. + RT.EmitOMPForFini(*this, S.getLocStart(), ScheduleKind); + } else + llvm_unreachable("Requested OpenMP schedule is not yet implemented"); + } + // Emit: if (LastIteration != 0) - end. + EmitBranch(ContBlock); + EmitBlock(ContBlock, true); + } +} + +void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { + RunCleanupsScope DirectiveScope(*this); + + CGDebugInfo *DI = getDebugInfo(); + if (DI) + DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin()); + + EmitOMPWorksharingLoop(S); + + // Emit an implicit barrier at the end. + auto Flags = static_cast( + CGOpenMPRuntime::OMP_IDENT_KMPC | + CGOpenMPRuntime::OMP_IDENT_BARRIER_IMPL); + CGM.getOpenMPRuntime().EmitOMPBarrierCall(*this, S.getLocStart(), Flags); + + if (DI) + DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd()); } void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) { Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -2035,12 +2035,17 @@ void EmitOMPTargetDirective(const OMPTargetDirective &S); void EmitOMPTeamsDirective(const OMPTeamsDirective &S); - /// Helpers for 'omp simd' directive. +private: + + /// Helpers for the OpenMP loop directives. void EmitOMPLoopBody(const OMPLoopDirective &Directive, bool SeparateIter = false); void EmitOMPInnerLoop(const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool SeparateIter = false); void EmitOMPSimdFinal(const OMPLoopDirective &S); + void EmitOMPWorksharingLoop(const OMPLoopDirective &S); + +public: //===--------------------------------------------------------------------===// // LValue Expression Emission Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -2392,6 +2392,13 @@ Expr *SeparatedCond; Expr *Init; Expr *Inc; + Expr *IL; + Expr *LB; + Expr *UB; + Expr *ST; + Expr *EUB; + Expr *NLB; + Expr *NUB; SmallVector Counters; SmallVector Updates; SmallVector Finals; @@ -2410,6 +2417,13 @@ SeparatedCond = nullptr; Init = nullptr; Inc = nullptr; + IL = nullptr; + LB = nullptr; + UB = nullptr; + ST = nullptr; + EUB = nullptr; + NLB = nullptr; + NUB = nullptr; Counters.resize(size); Updates.resize(size); Finals.resize(size); @@ -2763,23 +2777,69 @@ CurScope, InitLoc, BO_GT, LastIteration.get(), SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get()); - // Build the iteration variable and its initialization to zero before loop. + QualType VType = LastIteration.get()->getType(); + // Build variables passed into runtime, nesessary for worksharing directives. + ExprResult LB, UB, IL, ST, EUB; + if (isOpenMPWorksharingDirective(DKind)) { + // Lower bound variable, initialized with zero. + VarDecl *LBDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.lb"); + LB = SemaRef.BuildDeclRefExpr(LBDecl, VType, VK_LValue, InitLoc); + SemaRef.AddInitializerToDecl( + LBDecl, SemaRef.ActOnIntegerConstant(InitLoc, 0).get(), + /*DirectInit*/ false, /*TypeMayContainAuto*/ false); + + // Upper bound variable, initialized with last iteration number. + VarDecl *UBDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.ub"); + UB = SemaRef.BuildDeclRefExpr(UBDecl, VType, VK_LValue, InitLoc); + SemaRef.AddInitializerToDecl(UBDecl, LastIteration.get(), + /*DirectInit*/ false, + /*TypeMayContainAuto*/ false); + + // A 32-bit variable-flag where runtime returns 1 for the last iteration. + // This will be used to implement clause 'lastprivate'. + QualType Int32Ty = SemaRef.Context.getIntTypeForBitwidth(32, true); + VarDecl *ILDecl = BuildVarDecl(SemaRef, InitLoc, Int32Ty, ".omp.is_last"); + IL = SemaRef.BuildDeclRefExpr(ILDecl, Int32Ty, VK_LValue, InitLoc); + SemaRef.AddInitializerToDecl( + ILDecl, SemaRef.ActOnIntegerConstant(InitLoc, 0).get(), + /*DirectInit*/ false, /*TypeMayContainAuto*/ false); + + // Stride variable returned by runtime (we initialize it to 1 by default). + VarDecl *STDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.stride"); + ST = SemaRef.BuildDeclRefExpr(STDecl, VType, VK_LValue, InitLoc); + SemaRef.AddInitializerToDecl( + STDecl, SemaRef.ActOnIntegerConstant(InitLoc, 1).get(), + /*DirectInit*/ false, /*TypeMayContainAuto*/ false); + + // Build expression: UB = min(UB, LastIteration) + // It is nesessary for CodeGen of directives with static scheduling. + ExprResult IsUBGreater = SemaRef.BuildBinOp(CurScope, InitLoc, BO_GT, + UB.get(), LastIteration.get()); + ExprResult CondOp = SemaRef.ActOnConditionalOp( + InitLoc, InitLoc, IsUBGreater.get(), LastIteration.get(), UB.get()); + EUB = SemaRef.BuildBinOp(CurScope, InitLoc, BO_Assign, UB.get(), + CondOp.get()); + } + + // Build the iteration variable and its initialization before loop. ExprResult IV; ExprResult Init; { - VarDecl *IVDecl = BuildVarDecl(SemaRef, InitLoc, - LastIteration.get()->getType(), ".omp.iv"); - IV = SemaRef.BuildDeclRefExpr(IVDecl, LastIteration.get()->getType(), - VK_LValue, InitLoc); - Init = SemaRef.BuildBinOp( - CurScope, InitLoc, BO_Assign, IV.get(), - SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get()); + VarDecl *IVDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.iv"); + IV = SemaRef.BuildDeclRefExpr(IVDecl, VType, VK_LValue, InitLoc); + Expr *RHS = isOpenMPWorksharingDirective(DKind) + ? LB.get() + : SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get(); + Init = SemaRef.BuildBinOp(CurScope, InitLoc, BO_Assign, IV.get(), RHS); } - // Loop condition (IV < NumIterations) + // Loop condition (IV < NumIterations) or (IV <= UB) for worksharing loops. SourceLocation CondLoc; - ExprResult Cond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(), - NumIterations.get()); + ExprResult Cond = + isOpenMPWorksharingDirective(DKind) + ? SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get()) + : SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(), + NumIterations.get()); // Loop condition with 1 iteration separated (IV < LastIteration) ExprResult SeparatedCond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(), LastIteration.get()); @@ -2792,6 +2852,32 @@ if (!Inc.isUsable()) return 0; Inc = SemaRef.BuildBinOp(CurScope, IncLoc, BO_Assign, IV.get(), Inc.get()); + if (!Inc.isUsable()) + return 0; + + // Increments for worksharing loops (LB = LB + ST; UB = UB + ST). + // Used for directives with static scheduling. + ExprResult NextLB, NextUB; + if (isOpenMPWorksharingDirective(DKind)) { + // LB + ST + NextLB = SemaRef.BuildBinOp(CurScope, IncLoc, BO_Add, LB.get(), ST.get()); + if (!NextLB.isUsable()) + return 0; + // LB = LB + ST + NextLB = + SemaRef.BuildBinOp(CurScope, IncLoc, BO_Assign, LB.get(), NextLB.get()); + if (!NextLB.isUsable()) + return 0; + // UB + ST + NextUB = SemaRef.BuildBinOp(CurScope, IncLoc, BO_Add, UB.get(), ST.get()); + if (!NextUB.isUsable()) + return 0; + // UB = UB + ST + NextUB = + SemaRef.BuildBinOp(CurScope, IncLoc, BO_Assign, UB.get(), NextUB.get()); + if (!NextUB.isUsable()) + return 0; + } // Build updates and final values of the loop counters. bool HasErrors = false; @@ -2881,6 +2967,13 @@ Built.SeparatedCond = SeparatedCond.get(); Built.Init = Init.get(); Built.Inc = Inc.get(); + Built.LB = LB.get(); + Built.UB = UB.get(); + Built.IL = IL.get(); + Built.ST = ST.get(); + Built.EUB = EUB.get(); + Built.NLB = NextLB.get(); + Built.NUB = NextUB.get(); return NestedLoopCount; } @@ -2937,7 +3030,8 @@ return OMPForDirective::Create( Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond, - B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals); + B.Cond, B.SeparatedCond, B.Init, B.Inc, B.IL, B.LB, B.UB, B.ST, B.EUB, + B.NLB, B.NUB, B.Counters, B.Updates, B.Finals); } StmtResult Sema::ActOnOpenMPForSimdDirective( @@ -2952,11 +3046,15 @@ if (NestedLoopCount == 0) return StmtError(); + assert((CurContext->isDependentContext() || B.builtAll()) && + "omp for simd loop exprs were not built"); + getCurFunction()->setHasBranchProtectedScope(); return OMPForSimdDirective::Create( Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond, - B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals); + B.Cond, B.SeparatedCond, B.Init, B.Inc, B.IL, B.LB, B.UB, B.ST, B.EUB, + B.NLB, B.NUB, B.Counters, B.Updates, B.Finals); } StmtResult Sema::ActOnOpenMPSectionsDirective(ArrayRef Clauses, @@ -3064,7 +3162,8 @@ return OMPParallelForDirective::Create( Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond, - B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals); + B.Cond, B.SeparatedCond, B.Init, B.Inc, B.IL, B.LB, B.UB, B.ST, B.EUB, + B.NLB, B.NUB, B.Counters, B.Updates, B.Finals); } StmtResult Sema::ActOnOpenMPParallelForSimdDirective( @@ -3092,7 +3191,8 @@ return OMPParallelForSimdDirective::Create( Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond, - B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals); + B.Cond, B.SeparatedCond, B.Init, B.Inc, B.IL, B.LB, B.UB, B.ST, B.EUB, + B.NLB, B.NUB, B.Counters, B.Updates, B.Finals); } StmtResult Index: lib/Serialization/ASTReaderStmt.cpp =================================================================== --- lib/Serialization/ASTReaderStmt.cpp +++ lib/Serialization/ASTReaderStmt.cpp @@ -1986,6 +1986,15 @@ D->setCond(Fst, Snd); D->setInit(Reader.ReadSubExpr()); D->setInc(Reader.ReadSubExpr()); + if (isOpenMPWorksharingDirective(D->getDirectiveKind())) { + D->setIsLastIterVariable(Reader.ReadSubExpr()); + D->setLowerBoundVariable(Reader.ReadSubExpr()); + D->setUpperBoundVariable(Reader.ReadSubExpr()); + D->setStrideVariable(Reader.ReadSubExpr()); + D->setEnsureUpperBound(Reader.ReadSubExpr()); + D->setNextLowerBound(Reader.ReadSubExpr()); + D->setNextUpperBound(Reader.ReadSubExpr()); + } SmallVector Sub; unsigned CollapsedNum = D->getCollapsedNumber(); Sub.reserve(CollapsedNum); Index: lib/Serialization/ASTWriterStmt.cpp =================================================================== --- lib/Serialization/ASTWriterStmt.cpp +++ lib/Serialization/ASTWriterStmt.cpp @@ -1863,6 +1863,15 @@ Writer.AddStmt(D->getCond(/* SeparateIter */ true)); Writer.AddStmt(D->getInit()); Writer.AddStmt(D->getInc()); + if (isOpenMPWorksharingDirective(D->getDirectiveKind())) { + Writer.AddStmt(D->getIsLastIterVariable()); + Writer.AddStmt(D->getLowerBoundVariable()); + Writer.AddStmt(D->getUpperBoundVariable()); + Writer.AddStmt(D->getStrideVariable()); + Writer.AddStmt(D->getEnsureUpperBound()); + Writer.AddStmt(D->getNextLowerBound()); + Writer.AddStmt(D->getNextUpperBound()); + } for (auto I : D->counters()) { Writer.AddStmt(I); } Index: test/OpenMP/for_codegen.cpp =================================================================== --- test/OpenMP/for_codegen.cpp +++ test/OpenMP/for_codegen.cpp @@ -0,0 +1,172 @@ +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) +void without_schedule_clause(float *a, float *b, float *c, float *d) { +// CHECK: [[GTID:%.+]] = call{{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) + #pragma omp for +// CHECK: call{{.*}}void @__kmpc_for_static_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 1) +// UB = min(UB, GlobalUB) +// CHECK-NEXT: [[UB:%.+]] = load i64* [[OMP_UB]] +// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i64 [[UB]], 4571423 +// CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]] +// CHECK: [[UBRESULT:%.+]] = phi i64 [ 4571423, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ] +// CHECK-NEXT: store i64 [[UBRESULT]], i64* [[OMP_UB]] +// CHECK-NEXT: [[LB:%.+]] = load i64* [[OMP_LB]] +// CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]] +// Loop header +// CHECK: [[IV:%.+]] = load i64* [[OMP_IV]] +// CHECK-NEXT: [[UB:%.+]] = load i64* [[OMP_UB]] +// CHECK-NEXT: [[CMP:%.+]] = icmp sle i64 [[IV]], [[UB]] +// CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]] + for (long long i = 33; i < 32000000; i += 7) { +// CHECK: [[LOOP1_BODY]] +// Start of body: calculate i from IV: +// CHECK: [[IV1_1:%.+]] = load i64* [[OMP_IV]] +// CHECK-NEXT: [[CALC_I_1:%.+]] = mul nsw i64 [[IV1_1]], 7 +// CHECK-NEXT: [[CALC_I_2:%.+]] = add nsw i64 33, [[CALC_I_1]] +// CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]] +// ... loop body ... +// End of body: store into a[i]: +// CHECK: store float [[RESULT:%.+]], float* {{%.+}} + a[i] = b[i] * c[i] * d[i]; +// CHECK: [[IV1_2:%.+]] = load i64* [[OMP_IV]]{{.*}} +// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i64 [[IV1_2]], 1 +// CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]] +// CHECK-NEXT: br label %{{.+}} + } +// CHECK: [[LOOP1_END]] +// CHECK: call {{.*}}void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: call {{.*}}void @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) +// CHECK: ret void +} + +// CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause2{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) +void without_schedule_clause2(float *a, float *b, float *c, float *d) { +// CHECK: [[GTID:%.+]] = call{{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) + #pragma omp for +// CHECK: call{{.*}}void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 1) +// UB = min(UB, GlobalUB) +// CHECK-NEXT: [[UB:%.+]] = load i64* [[OMP_UB]] +// CHECK-NEXT: [[UBCMP:%.+]] = icmp ugt i64 [[UB]], 4571423 +// CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]] +// CHECK: [[UBRESULT:%.+]] = phi i64 [ 4571423, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ] +// CHECK-NEXT: store i64 [[UBRESULT]], i64* [[OMP_UB]] +// CHECK-NEXT: [[LB:%.+]] = load i64* [[OMP_LB]] +// CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]] +// Loop header +// CHECK: [[IV:%.+]] = load i64* [[OMP_IV]] +// CHECK-NEXT: [[UB:%.+]] = load i64* [[OMP_UB]] +// CHECK-NEXT: [[CMP:%.+]] = icmp ule i64 [[IV]], [[UB]] +// CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]] + for (unsigned long long i = 33; i < 32000000; i += 7) { +// CHECK: [[LOOP1_BODY]] +// Start of body: calculate i from IV: +// CHECK: [[IV1_1:%.+]] = load i64* [[OMP_IV]] +// CHECK-NEXT: [[CALC_I_1:%.+]] = mul i64 [[IV1_1]], 7 +// CHECK-NEXT: [[CALC_I_2:%.+]] = add i64 33, [[CALC_I_1]] +// CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]] +// ... loop body ... +// End of body: store into a[i]: +// CHECK: store float [[RESULT:%.+]], float* {{%.+}} + a[i] = b[i] * c[i] * d[i]; +// CHECK: [[IV1_2:%.+]] = load i64* [[OMP_IV]]{{.*}} +// CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1 +// CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]] +// CHECK-NEXT: br label %{{.+}} + } +// CHECK: [[LOOP1_END]] +// CHECK: call {{.*}}void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: call {{.*}}void @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) +// CHECK: ret void +} + +// CHECK-LABEL: define {{.*void}} @{{.*}}static_not_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) +void static_not_chunked(float *a, float *b, float *c, float *d) { +// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) + #pragma omp for schedule(static) +// CHECK: call {{.*}}void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1) +// UB = min(UB, GlobalUB) +// CHECK-NEXT: [[UB:%.+]] = load i32* [[OMP_UB]] +// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 25 +// CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]] +// CHECK: [[UBRESULT:%.+]] = phi i32 [ 25, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ] +// CHECK-NEXT: store i32 [[UBRESULT]], i32* [[OMP_UB]] +// CHECK-NEXT: [[LB:%.+]] = load i32* [[OMP_LB]] +// CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]] +// Loop header +// CHECK: [[IV:%.+]] = load i32* [[OMP_IV]] +// CHECK-NEXT: [[UB:%.+]] = load i32* [[OMP_UB]] +// CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]] +// CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]] + for (unsigned char i = 'z'; i >= 'a'; i--) { +// CHECK: [[LOOP1_BODY]] +// Start of body: calculate i from IV: +// CHECK: [[IV1_1:%.+]] = load i32* [[OMP_IV]] +// CHECK-NEXT: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 1 +// CHECK-NEXT: [[CALC_I_2:%.+]] = sub nsw i32 122, [[CALC_I_1]] +// CHECK-NEXT: [[CALC_I_3:%.+]] = trunc i32 [[CALC_I_2]] to i8 +// CHECK-NEXT: store i8 [[CALC_I_3]], i8* [[LC_I:.+]] +// ... loop body ... +// End of body: store into a[i]: +// CHECK: store float [[RESULT:%.+]], float* {{%.+}} + a[i] = b[i] * c[i] * d[i]; +// CHECK: [[IV1_2:%.+]] = load i32* [[OMP_IV]]{{.*}} +// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1 +// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]] +// CHECK-NEXT: br label %{{.+}} + } +// CHECK: [[LOOP1_END]] +// CHECK: call{{.*}}void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: call{{.*}}void @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) +// CHECK: ret void +} + +// CHECK-LABEL: define {{.*void}} @{{.*}}static_not_chunked2{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) +void static_not_chunked2(float *a, float *b, float *c, float *d) { +// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) + #pragma omp for schedule(static) +// CHECK: call {{.*}}void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1) +// UB = min(UB, GlobalUB) +// CHECK-NEXT: [[UB:%.+]] = load i32* [[OMP_UB]] +// CHECK-NEXT: [[UBCMP:%.+]] = icmp ugt i32 [[UB]], 4571423 +// CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]] +// CHECK: [[UBRESULT:%.+]] = phi i32 [ 4571423, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ] +// CHECK-NEXT: store i32 [[UBRESULT]], i32* [[OMP_UB]] +// CHECK-NEXT: [[LB:%.+]] = load i32* [[OMP_LB]] +// CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]] +// Loop header +// CHECK: [[IV:%.+]] = load i32* [[OMP_IV]] +// CHECK-NEXT: [[UB:%.+]] = load i32* [[OMP_UB]] +// CHECK-NEXT: [[CMP:%.+]] = icmp ule i32 [[IV]], [[UB]] +// CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]] + for (unsigned i = 32000000; i > 33; i -= 7) { +// CHECK: [[LOOP1_BODY]] +// Start of body: calculate i from IV: +// CHECK: [[IV1_1:%.+]] = load i32* [[OMP_IV]] +// CHECK-NEXT: [[CALC_I_1:%.+]] = mul i32 [[IV1_1]], 7 +// CHECK-NEXT: [[CALC_I_2:%.+]] = sub i32 32000000, [[CALC_I_1]] +// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]] +// ... loop body ... +// End of body: store into a[i]: +// CHECK: store float [[RESULT:%.+]], float* {{%.+}} + a[i] = b[i] * c[i] * d[i]; +// CHECK: [[IV1_2:%.+]] = load i32* [[OMP_IV]]{{.*}} +// CHECK-NEXT: [[ADD1_2:%.+]] = add i32 [[IV1_2]], 1 +// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]] +// CHECK-NEXT: br label %{{.+}} + } +// CHECK: [[LOOP1_END]] +// CHECK: call{{.*}}void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: call{{.*}}void @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) +// CHECK: ret void +} + +#endif // HEADER +