diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -2839,6 +2839,29 @@ return llvm::makeArrayRef(getRHSExprs().end(), varlist_size()); } + /// Set list of helper temp vars for inscan copy operations. + void setInscanCopyTemps(ArrayRef CopyTemps); + + /// Get the list of helper inscan copy temps. + MutableArrayRef getInscanCopyTemps() { + return MutableArrayRef(getReductionOps().end(), varlist_size()); + } + ArrayRef getInscanCopyTemps() const { + return llvm::makeArrayRef(getReductionOps().end(), varlist_size()); + } + + /// Set list of helper copy operations for inscan reductions. + /// The form is: Temps[i] = LHS[i]; + void setInscanCopyOps(ArrayRef Ops); + + /// Get the list of helper inscan copy operations. + MutableArrayRef getInscanCopyOps() { + return MutableArrayRef(getInscanCopyTemps().end(), varlist_size()); + } + ArrayRef getInscanCopyOps() const { + return llvm::makeArrayRef(getInscanCopyTemps().end(), varlist_size()); + } + public: /// Creates clause with a list of variables \a VL. /// @@ -2869,6 +2892,11 @@ /// \endcode /// Required for proper codegen of final reduction operation performed by the /// reduction clause. + /// \param CopyTemps List of temp expressions for the inscan copy operations. + /// \param CopyOps List of copy operations for inscan reductions: + /// \code + /// TempExprs = LHSExprs; + /// \endcode /// \param PreInit Statement that must be executed before entering the OpenMP /// region with this clause. /// \param PostUpdate Expression that must be executed after exit from the @@ -2880,13 +2908,17 @@ ArrayRef VL, NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo, ArrayRef Privates, ArrayRef LHSExprs, ArrayRef RHSExprs, - ArrayRef ReductionOps, Stmt *PreInit, Expr *PostUpdate); + ArrayRef ReductionOps, ArrayRef CopyTemps, + ArrayRef CopyOps, Stmt *PreInit, Expr *PostUpdate); /// Creates an empty clause with the place for \a N variables. /// /// \param C AST context. /// \param N The number of variables. - static OMPReductionClause *CreateEmpty(const ASTContext &C, unsigned N); + /// \param Modifier Reduction modifier. + static OMPReductionClause * + CreateEmpty(const ASTContext &C, unsigned N, + OpenMPReductionClauseModifier Modifier); /// Returns modifier. OpenMPReductionClauseModifier getModifier() const { return Modifier; } @@ -2943,6 +2975,26 @@ getReductionOps().end()); } + helper_expr_const_range copy_temps() const { + return helper_expr_const_range(getInscanCopyTemps().begin(), + getInscanCopyTemps().end()); + } + + helper_expr_range copy_temps() { + return helper_expr_range(getInscanCopyTemps().begin(), + getInscanCopyTemps().end()); + } + + helper_expr_const_range copy_ops() const { + return helper_expr_const_range(getInscanCopyOps().begin(), + getInscanCopyOps().end()); + } + + helper_expr_range copy_ops() { + return helper_expr_range(getInscanCopyOps().begin(), + getInscanCopyOps().end()); + } + child_range children() { return child_range(reinterpret_cast(varlist_begin()), reinterpret_cast(varlist_end())); diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3362,6 +3362,14 @@ for (auto *E : C->reduction_ops()) { TRY_TO(TraverseStmt(E)); } + if (C->getModifier() == OMPC_REDUCTION_inscan) { + for (auto *E : C->copy_temps()) { + TRY_TO(TraverseStmt(E)); + } + for (auto *E : C->copy_ops()) { + TRY_TO(TraverseStmt(E)); + } + } return true; } diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -709,15 +709,33 @@ std::copy(ReductionOps.begin(), ReductionOps.end(), getRHSExprs().end()); } +void OMPReductionClause::setInscanCopyTemps(ArrayRef CopyTemps) { + assert(Modifier == OMPC_REDUCTION_inscan && "Expected inscan reduction."); + assert(CopyTemps.size() == varlist_size() && + "Number of copy temp expressions is not the same as the preallocated " + "buffer"); + llvm::copy(CopyTemps, getReductionOps().end()); +} + +void OMPReductionClause::setInscanCopyOps(ArrayRef Ops) { + assert(Modifier == OMPC_REDUCTION_inscan && "Expected inscan reduction."); + assert(Ops.size() == varlist_size() && "Number of copy " + "expressions is not the same " + "as the preallocated buffer"); + llvm::copy(Ops, getInscanCopyTemps().end()); +} + OMPReductionClause *OMPReductionClause::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ModifierLoc, SourceLocation EndLoc, SourceLocation ColonLoc, OpenMPReductionClauseModifier Modifier, ArrayRef VL, NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo, ArrayRef Privates, ArrayRef LHSExprs, - ArrayRef RHSExprs, ArrayRef ReductionOps, Stmt *PreInit, + ArrayRef RHSExprs, ArrayRef ReductionOps, + ArrayRef CopyTemps, ArrayRef CopyOps, Stmt *PreInit, Expr *PostUpdate) { - void *Mem = C.Allocate(totalSizeToAlloc(5 * VL.size())); + void *Mem = C.Allocate(totalSizeToAlloc( + (Modifier == OMPC_REDUCTION_inscan ? 7 : 5) * VL.size())); auto *Clause = new (Mem) OMPReductionClause(StartLoc, LParenLoc, ModifierLoc, EndLoc, ColonLoc, Modifier, VL.size(), QualifierLoc, NameInfo); @@ -728,13 +746,26 @@ Clause->setReductionOps(ReductionOps); Clause->setPreInitStmt(PreInit); Clause->setPostUpdateExpr(PostUpdate); + if (Modifier == OMPC_REDUCTION_inscan) { + Clause->setInscanCopyTemps(CopyTemps); + Clause->setInscanCopyOps(CopyOps); + } else { + assert(CopyTemps.empty() && + "copy temp expressions are expected in inscan reductions only."); + assert(CopyOps.empty() && + "copy operations are expected in inscan reductions only."); + } return Clause; } -OMPReductionClause *OMPReductionClause::CreateEmpty(const ASTContext &C, - unsigned N) { - void *Mem = C.Allocate(totalSizeToAlloc(5 * N)); - return new (Mem) OMPReductionClause(N); +OMPReductionClause * +OMPReductionClause::CreateEmpty(const ASTContext &C, unsigned N, + OpenMPReductionClauseModifier Modifier) { + void *Mem = C.Allocate(totalSizeToAlloc( + (Modifier == OMPC_REDUCTION_inscan ? 7 : 5) * N)); + auto *Clause = new (Mem) OMPReductionClause(N); + Clause->setModifier(Modifier); + return Clause; } void OMPTaskReductionClause::setPrivates(ArrayRef Privates) { diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -609,6 +609,16 @@ if (E) Profiler->VisitStmt(E); } + if (C->getModifier() == clang::OMPC_REDUCTION_inscan) { + for (auto *E : C->copy_temps()) { + if (E) + Profiler->VisitStmt(E); + } + for (auto *E : C->copy_ops()) { + if (E) + Profiler->VisitStmt(E); + } + } } void OMPClauseProfiler::VisitOMPTaskReductionClause( const OMPTaskReductionClause *C) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1356,17 +1356,31 @@ ArrayRef RHSExprs, ArrayRef ReductionOps); + /// Data for the inscan reduction codegen. + struct InscanReductionData { + const DeclRefExpr *CopyTemp = nullptr; + const Expr *CopyOp = nullptr; + llvm::Optional ScanRed = llvm::None; + }; + /// Emits single reduction combiner void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, - const DeclRefExpr *RHS); + const DeclRefExpr *RHS, + const InscanReductionData &InscanData); struct ReductionOptionsTy { - bool WithNowait; - bool SimpleReduction; - OpenMPDirectiveKind ReductionKind; + bool WithNowait = false; + bool SimpleReduction = false; + OpenMPDirectiveKind ReductionKind = llvm::omp::OMPD_unknown; + llvm::Optional ScanRed = llvm::None; + ReductionOptionsTy(bool WithNowait, bool SimpleReduction, + OpenMPDirectiveKind ReductionKind, + llvm::Optional ScanRed = llvm::None) + : WithNowait(WithNowait), SimpleReduction(SimpleReduction), + ReductionKind(ReductionKind), ScanRed(ScanRed) {} }; /// Emit a code for reduction clause. Next code should be emitted for /// reduction: @@ -1404,18 +1418,19 @@ /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' /// or 'operator binop(LHS, RHS)'. + /// \param CopyTemps List of copy helper temp vars for inscan reductions. + /// \param CopyOps List of copy operations for inscan reductions: Temps = LHS; /// \param Options List of options for reduction codegen: /// WithNowait true if parent directive has also nowait clause, false /// otherwise. /// SimpleReduction Emit reduction operation only. Used for omp simd /// directive on the host. /// ReductionKind The kind of reduction to perform. - virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, - ArrayRef Privates, - ArrayRef LHSExprs, - ArrayRef RHSExprs, - ArrayRef ReductionOps, - ReductionOptionsTy Options); + virtual void emitReduction( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates, + ArrayRef LHSExprs, ArrayRef RHSExprs, + ArrayRef ReductionOps, ArrayRef CopyTemps, + ArrayRef CopyOps, ReductionOptionsTy Options); /// Emit a code for initialization of task reduction clause. Next code /// should be emitted for reduction: @@ -2209,18 +2224,19 @@ /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' /// or 'operator binop(LHS, RHS)'. + /// \param CopyTemps List of copy helper temp vars for inscan reductions. + /// \param CopyOps List of copy operations for inscan reductions: Temps = LHS; /// \param Options List of options for reduction codegen: /// WithNowait true if parent directive has also nowait clause, false /// otherwise. /// SimpleReduction Emit reduction operation only. Used for omp simd /// directive on the host. /// ReductionKind The kind of reduction to perform. - void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, - ArrayRef Privates, - ArrayRef LHSExprs, - ArrayRef RHSExprs, - ArrayRef ReductionOps, - ReductionOptionsTy Options) override; + void emitReduction( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates, + ArrayRef LHSExprs, ArrayRef RHSExprs, + ArrayRef ReductionOps, ArrayRef CopyTemps, + ArrayRef CopyOps, ReductionOptionsTy Options) override; /// Emit a code for initialization of task reduction clause. Next code /// should be emitted for reduction: diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6268,32 +6268,57 @@ return Fn; } -void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, - const Expr *ReductionOp, - const Expr *PrivateRef, - const DeclRefExpr *LHS, - const DeclRefExpr *RHS) { +void CGOpenMPRuntime::emitSingleReductionCombiner( + CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, + const DeclRefExpr *LHS, const DeclRefExpr *RHS, + const InscanReductionData &InscanData) { + const auto &&SingleGen = [&InscanData, ReductionOp, RHS, + LHS](CodeGenFunction &CGF) { + const VarDecl *TempDeclPtr = nullptr; + if (InscanData.ScanRed.hasValue()) + TempDeclPtr = cast(InscanData.CopyTemp->getDecl()); + if (InscanData.ScanRed.getValueOr(OMPC_unknown) == OMPC_exclusive) { + // Create temp var and copy LHS value to this temp value. + // TMP = LHS; + CGF.EmitAutoVarDecl(*TempDeclPtr); + CGF.EmitIgnoredExpr(InscanData.CopyOp); + } + // Emit reduction for array subscript or single variable. + emitReductionCombiner(CGF, ReductionOp); + if (InscanData.ScanRed.hasValue()) { + // Map TempDecl to RHS, because we need to copy the value to RHS. + CodeGenFunction::OMPMapVars MappedVars; + if (*InscanData.ScanRed == OMPC_exclusive) { + MappedVars.apply(CGF); + // RHS = TMP; + const auto *LHSVar = cast(LHS->getDecl()); + MappedVars.setVarAddr(CGF, LHSVar, CGF.GetAddrOfLocalVar(TempDeclPtr)); + } + MappedVars.setVarAddr(CGF, TempDeclPtr, + CGF.EmitLValue(RHS).getAddress(CGF)); + MappedVars.apply(CGF); + CGF.EmitIgnoredExpr(InscanData.CopyOp); + MappedVars.restore(CGF); + } + }; if (PrivateRef->getType()->isArrayType()) { // Emit reduction for array section. const auto *LHSVar = cast(LHS->getDecl()); const auto *RHSVar = cast(RHS->getDecl()); - EmitOMPAggregateReduction( - CGF, PrivateRef->getType(), LHSVar, RHSVar, - [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { - emitReductionCombiner(CGF, ReductionOp); - }); + EmitOMPAggregateReduction(CGF, PrivateRef->getType(), LHSVar, RHSVar, + [&SingleGen](CodeGenFunction &CGF, const Expr *, + const Expr *, + const Expr *) { SingleGen(CGF); }); } else { - // Emit reduction for array subscript or single variable. - emitReductionCombiner(CGF, ReductionOp); + SingleGen(CGF); } } -void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, - ArrayRef Privates, - ArrayRef LHSExprs, - ArrayRef RHSExprs, - ArrayRef ReductionOps, - ReductionOptionsTy Options) { +void CGOpenMPRuntime::emitReduction( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates, + ArrayRef LHSExprs, ArrayRef RHSExprs, + ArrayRef ReductionOps, ArrayRef CopyTemps, + ArrayRef CopyOps, ReductionOptionsTy Options) { if (!CGF.HaveInsertPoint()) return; @@ -6339,12 +6364,23 @@ if (SimpleReduction) { CodeGenFunction::RunCleanupsScope Scope(CGF); - auto IPriv = Privates.begin(); - auto ILHS = LHSExprs.begin(); - auto IRHS = RHSExprs.begin(); + const auto *IPriv = Privates.begin(); + const auto *ILHS = LHSExprs.begin(); + const auto *IRHS = RHSExprs.begin(); + const auto *ITemp = CopyTemps.begin(); + const auto *ICopy = CopyOps.begin(); + bool CopyOpsEmpty = CopyOps.empty(); + InscanReductionData InscanData; + InscanData.ScanRed = Options.ScanRed; for (const Expr *E : ReductionOps) { + if (!CopyOpsEmpty) { + InscanData.CopyTemp = cast(*ITemp); + InscanData.CopyOp = *ICopy; + ++ITemp; + ++ICopy; + } emitSingleReductionCombiner(CGF, E, *IPriv, cast(*ILHS), - cast(*IRHS)); + cast(*IRHS), InscanData); ++IPriv; ++ILHS; ++IRHS; @@ -6442,12 +6478,13 @@ auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( CodeGenFunction &CGF, PrePostActionTy &Action) { CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); + InscanReductionData InscanData; auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (const Expr *E : ReductionOps) { RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast(*ILHS), - cast(*IRHS)); + cast(*IRHS), InscanData); ++IPriv; ++ILHS; ++IRHS; @@ -6750,9 +6787,10 @@ // Emit the combiner body: // %2 = ( *%lhs, *%rhs) // store %2, * %lhs + CGOpenMPRuntime::InscanReductionData InscanData; CGM.getOpenMPRuntime().emitSingleReductionCombiner( CGF, ReductionOp, PrivateRef, cast(LHS), - cast(RHS)); + cast(RHS), InscanData); CGF.FinishFunction(); return Fn; } @@ -12558,10 +12596,11 @@ void CGOpenMPSIMDRuntime::emitReduction( CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates, ArrayRef LHSExprs, ArrayRef RHSExprs, - ArrayRef ReductionOps, ReductionOptionsTy Options) { + ArrayRef ReductionOps, ArrayRef CopyTemps, + ArrayRef CopyOps, ReductionOptionsTy Options) { assert(Options.SimpleReduction && "Only simple reduction is expected."); CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, - ReductionOps, Options); + ReductionOps, CopyTemps, CopyOps, Options); } llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -304,18 +304,19 @@ /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' /// or 'operator binop(LHS, RHS)'. + /// \param CopyTemps List of copy helper temp vars for inscan reductions. + /// \param CopyOps List of copy operations for inscan reductions: Temps = LHS; /// \param Options List of options for reduction codegen: /// WithNowait true if parent directive has also nowait clause, false /// otherwise. /// SimpleReduction Emit reduction operation only. Used for omp simd /// directive on the host. /// ReductionKind The kind of reduction to perform. - virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, - ArrayRef Privates, - ArrayRef LHSExprs, - ArrayRef RHSExprs, - ArrayRef ReductionOps, - ReductionOptionsTy Options) override; + virtual void emitReduction( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates, + ArrayRef LHSExprs, ArrayRef RHSExprs, + ArrayRef ReductionOps, ArrayRef CopyTemps, + ArrayRef CopyOps, ReductionOptionsTy Options) override; /// Returns specified OpenMP runtime function for the current OpenMP /// implementation. Specialized for the NVPTX device. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4275,7 +4275,8 @@ void CGOpenMPRuntimeNVPTX::emitReduction( CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates, ArrayRef LHSExprs, ArrayRef RHSExprs, - ArrayRef ReductionOps, ReductionOptionsTy Options) { + ArrayRef ReductionOps, ArrayRef CopyTemps, + ArrayRef CopyOps, ReductionOptionsTy Options) { if (!CGF.HaveInsertPoint()) return; @@ -4288,7 +4289,7 @@ assert(!TeamsReduction && !ParallelReduction && "Invalid reduction selection in emitReduction."); CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, - ReductionOps, Options); + ReductionOps, CopyTemps, CopyOps, Options); return; } @@ -4435,9 +4436,10 @@ auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); + const InscanReductionData InscanData; for (const Expr *E : ReductionOps) { emitSingleReductionCombiner(CGF, E, *IPriv, cast(*ILHS), - cast(*IRHS)); + cast(*IRHS), InscanData); ++IPriv; ++ILHS; ++IRHS; diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -252,7 +252,7 @@ EmitOMPDepobjDirective(cast(*S)); break; case Stmt::OMPScanDirectiveClass: - llvm_unreachable("Scan directive not supported yet."); + EmitOMPScanDirective(cast(*S)); break; case Stmt::OMPOrderedDirectiveClass: EmitOMPOrderedDirective(cast(*S)); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1161,7 +1161,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit( const OMPExecutableDirective &D, - CodeGenFunction::OMPPrivateScope &PrivateScope) { + CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { if (!HaveInsertPoint()) return; SmallVector Shareds; @@ -1173,6 +1173,8 @@ SmallVector TaskLHSs; SmallVector TaskRHSs; for (const auto *C : D.getClausesOfKind()) { + if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) + continue; Shareds.append(C->varlist_begin(), C->varlist_end()); Privates.append(C->privates().begin(), C->privates().end()); ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); @@ -1377,16 +1379,21 @@ } void CodeGenFunction::EmitOMPReductionClauseFinal( - const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { + const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind, + llvm::Optional ForScanClause) { if (!HaveInsertPoint()) return; llvm::SmallVector Privates; llvm::SmallVector LHSExprs; llvm::SmallVector RHSExprs; llvm::SmallVector ReductionOps; + llvm::SmallVector CopyTemps; + llvm::SmallVector CopyOps; bool HasAtLeastOneReduction = false; bool IsReductionWithTaskMod = false; for (const auto *C : D.getClausesOfKind()) { + if (ForScanClause.hasValue() != (C->getModifier() == OMPC_REDUCTION_inscan)) + continue; HasAtLeastOneReduction = true; Privates.append(C->privates().begin(), C->privates().end()); LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); @@ -1394,6 +1401,10 @@ ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); IsReductionWithTaskMod = IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; + if (ForScanClause.hasValue()) { + CopyTemps.append(C->copy_temps().begin(), C->copy_temps().end()); + CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); + } } if (HasAtLeastOneReduction) { if (IsReductionWithTaskMod) { @@ -1409,7 +1420,8 @@ // parallel directive (it always has implicit barrier). CGM.getOpenMPRuntime().emitReduction( *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, - {WithNowait, SimpleReduction, ReductionKind}); + CopyTemps, CopyOps, + {WithNowait, SimpleReduction, ReductionKind, ForScanClause}); } } @@ -1705,6 +1717,23 @@ getProfileCount(D.getBody())); EmitBlock(NextBB); } + + OMPPrivateScope InscanScope(*this); + EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); + bool IsInscanRegion = InscanScope.Privatize(); + if (IsInscanRegion) { + // Need to remember the block before and after scan directive + // to dispatch them correctly depending on the clause used in + // this directive. + OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); + OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); + OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); + OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); + OMPScanReduce = createBasicBlock("omp.inscan.reduce"); + EmitBranch(OMPScanDispatch); + EmitBlock(OMPBeforeScanBlock); + } + // Emit loop variables for C++ range loops. const Stmt *Body = D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); @@ -1714,13 +1743,16 @@ Body, /*TryImperfectlyNestedLoops=*/true), D.getCollapsedNumber()); + if (IsInscanRegion) + EmitBranch(OMPScanExitBlock); + // The end (updates/cleanups). EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); } void CodeGenFunction::EmitOMPInnerLoop( - const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, + const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, const Expr *IncExpr, const llvm::function_ref BodyGen, const llvm::function_ref PostIncGen) { @@ -2221,8 +2253,8 @@ CGF.EmitOMPInnerLoop( S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), [&S](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); - CGF.EmitStopPoint(&S); + emitOMPLoopBodyWithStopPoint(CGF, S, + CodeGenFunction::JumpDest()); }, [](CodeGenFunction &) {}); }); @@ -2243,6 +2275,7 @@ } void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { + ParentLoopDirectiveForScanRegion ScanRegion(*this, S); auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitOMPSimdRegion(CGF, S, Action); }; @@ -3961,6 +3994,27 @@ } } +void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { + bool IsInclusive = S.hasClausesOfKind(); + EmitBranch(IsInclusive ? OMPScanReduce + : BreakContinueStack.back().ContinueBlock.getBlock()); + EmitBlock(OMPScanDispatch); + EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); + EmitBlock(OMPScanReduce); + const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; + OpenMPDirectiveKind RedKind = OMPD_unknown; + if (ParentDir.getDirectiveKind() == OMPD_simd) + RedKind = OMPD_simd; + assert(RedKind != OMPD_unknown && "Unexpected parent directive."); + EmitOMPReductionClauseFinal(ParentDir, RedKind, + IsInclusive ? OMPC_inclusive : OMPC_exclusive); + EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); + OMPScanExitBlock = IsInclusive + ? BreakContinueStack.back().ContinueBlock.getBlock() + : OMPScanReduce; + EmitBlock(OMPAfterScanBlock); +} + void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr) { @@ -5950,6 +6004,10 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( const OMPExecutableDirective &D) { + if (const auto *SD = dyn_cast(&D)) { + EmitOMPScanDirective(*SD); + return; + } if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) return; auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { @@ -5974,6 +6032,7 @@ } if (isOpenMPSimdDirective(D.getDirectiveKind())) { (void)GlobalsScope.Privatize(); + ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); emitOMPSimdRegion(CGF, cast(D), Action); } else { if (const auto *LD = dyn_cast(&D)) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -673,6 +673,32 @@ llvm::BasicBlock *getInvokeDestImpl(); + /// Parent loop-based directive for scan directive. + const OMPExecutableDirective *OMPParentLoopDirectiveForScan = nullptr; + llvm::BasicBlock *OMPBeforeScanBlock = nullptr; + llvm::BasicBlock *OMPAfterScanBlock = nullptr; + llvm::BasicBlock *OMPScanExitBlock = nullptr; + llvm::BasicBlock *OMPScanDispatch = nullptr; + llvm::BasicBlock *OMPScanReduce = nullptr; + + /// Manages parent directive for scan directives. + class ParentLoopDirectiveForScanRegion { + CodeGenFunction &CGF; + const OMPExecutableDirective &ParentLoopDirectiveForScan; + + public: + ParentLoopDirectiveForScanRegion( + CodeGenFunction &CGF, + const OMPExecutableDirective &ParentLoopDirectiveForScan) + : CGF(CGF), + ParentLoopDirectiveForScan(*CGF.OMPParentLoopDirectiveForScan) { + CGF.OMPParentLoopDirectiveForScan = &ParentLoopDirectiveForScan; + } + ~ParentLoopDirectiveForScanRegion() { + CGF.OMPParentLoopDirectiveForScan = &ParentLoopDirectiveForScan; + } + }; + template typename DominatingValue::saved_type saveValueInCond(T value) { return DominatingValue::save(*this, value); @@ -3201,14 +3227,16 @@ /// proper codegen in internal captured statement. /// void EmitOMPReductionClauseInit(const OMPExecutableDirective &D, - OMPPrivateScope &PrivateScope); + OMPPrivateScope &PrivateScope, + bool ForInscan = false); /// Emit final update of reduction values to original variables at /// the end of the directive. /// /// \param D Directive that has at least one 'reduction' directives. /// \param ReductionKind The kind of reduction to perform. - void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D, - const OpenMPDirectiveKind ReductionKind); + void EmitOMPReductionClauseFinal( + const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind, + llvm::Optional ForScanClause = llvm::None); /// Emit initial code for linear variables. Creates private copies /// and initializes them with the values according to OpenMP standard. /// @@ -3260,6 +3288,7 @@ void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S); void EmitOMPFlushDirective(const OMPFlushDirective &S); void EmitOMPDepobjDirective(const OMPDepobjDirective &S); + void EmitOMPScanDirective(const OMPScanDirective &S); void EmitOMPOrderedDirective(const OMPOrderedDirective &S); void EmitOMPAtomicDirective(const OMPAtomicDirective &S); void EmitOMPTargetDirective(const OMPTargetDirective &S); @@ -3361,8 +3390,8 @@ /// \param PostIncGen Genrator for post-increment code (required for ordered /// loop directvies). void EmitOMPInnerLoop( - const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, - const Expr *IncExpr, + const OMPExecutableDirective &S, bool RequiresCleanup, + const Expr *LoopCond, const Expr *IncExpr, const llvm::function_ref BodyGen, const llvm::function_ref PostIncGen); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -9176,6 +9176,14 @@ diag::err_omp_scan_single_clause_expected); return StmtError(); } + // Check that scan directive is used in the scopeof the OpenMP loop body. + if (Scope *S = DSAStack->getCurScope()) { + Scope *ParentS = S->getParent(); + if (!ParentS || ParentS->getParent() != ParentS->getBreakParent() || + !ParentS->getBreakParent()->isOpenMPLoopScope()) + return StmtError(Diag(StartLoc, diag::err_omp_orphaned_device_directive) + << getOpenMPDirectiveName(OMPD_scan) << 5); + } // Check that only one instance of scan directives is used in the same outer // region. if (DSAStack->doesParentHasScanDirective()) { @@ -14461,6 +14469,10 @@ SmallVector RHSs; /// Reduction operation expression. SmallVector ReductionOps; + /// inscan copy temp expressions. + SmallVector InscanCopyTemps; + /// inscan copy operation expressions. + SmallVector InscanCopyOps; /// Taskgroup descriptors for the corresponding reduction items in /// in_reduction clauses. SmallVector TaskgroupDescriptors; @@ -14478,6 +14490,10 @@ LHSs.reserve(Size); RHSs.reserve(Size); ReductionOps.reserve(Size); + if (RedModifier == OMPC_REDUCTION_inscan) { + InscanCopyTemps.reserve(Size); + InscanCopyOps.reserve(Size); + } TaskgroupDescriptors.reserve(Size); ExprCaptures.reserve(Size); ExprPostUpdates.reserve(Size); @@ -14491,16 +14507,27 @@ RHSs.emplace_back(nullptr); ReductionOps.emplace_back(ReductionOp); TaskgroupDescriptors.emplace_back(nullptr); + if (RedModifier == OMPC_REDUCTION_inscan) { + InscanCopyTemps.push_back(nullptr); + InscanCopyOps.push_back(nullptr); + } } /// Stores reduction data. void push(Expr *Item, Expr *Private, Expr *LHS, Expr *RHS, Expr *ReductionOp, - Expr *TaskgroupDescriptor) { + Expr *TaskgroupDescriptor, Expr *CopyTemp, Expr *CopyOp) { Vars.emplace_back(Item); Privates.emplace_back(Private); LHSs.emplace_back(LHS); RHSs.emplace_back(RHS); ReductionOps.emplace_back(ReductionOp); TaskgroupDescriptors.emplace_back(TaskgroupDescriptor); + if (RedModifier == OMPC_REDUCTION_inscan) { + InscanCopyTemps.push_back(CopyTemp); + InscanCopyOps.push_back(CopyOp); + } else { + assert(CopyOp == nullptr && CopyTemp == nullptr && + "Copy operation must be used for inscan reductions only."); + } } }; } // namespace @@ -15102,6 +15129,28 @@ continue; } + // Add copy operations for inscan reductions. + // Temp = LHS; + ExprResult TempRes, CopyOpRes; + if (ClauseKind == OMPC_reduction && + RD.RedModifier == OMPC_REDUCTION_inscan) { + VarDecl *TempVD = buildVarDecl(S, ELoc, Type, D->getName(), + D->hasAttrs() ? &D->getAttrs() : nullptr); + // Add a constructor to the temp decl. + S.ActOnUninitializedDecl(TempVD); + DeclRefExpr *TempDRE = buildDeclRefExpr(S, TempVD, Type, ELoc); + TempRes = TempDRE; + ExprResult LHS = S.DefaultLvalueConversion(LHSDRE); + CopyOpRes = S.BuildBinOp(Stack->getCurScope(), ELoc, BO_Assign, TempDRE, + LHS.get()); + if (!CopyOpRes.isUsable()) + continue; + CopyOpRes = + S.ActOnFinishFullExpr(CopyOpRes.get(), /*DiscardedValue=*/true); + if (!CopyOpRes.isUsable()) + continue; + } + // OpenMP [2.15.4.6, Restrictions, p.2] // A list item that appears in an in_reduction clause of a task construct // must appear in a task_reduction clause of a construct associated with a @@ -15203,7 +15252,7 @@ Stack->addTaskgroupReductionData(D, ReductionIdRange, BOK); } RD.push(VarsExpr, PrivateDRE, LHSDRE, RHSDRE, ReductionOp.get(), - TaskgroupDescriptor); + TaskgroupDescriptor, TempRes.get(), CopyOpRes.get()); } return RD.Vars.empty(); } @@ -15246,8 +15295,8 @@ return OMPReductionClause::Create( Context, StartLoc, LParenLoc, ModifierLoc, ColonLoc, EndLoc, Modifier, RD.Vars, ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, - RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps, - buildPreInits(Context, RD.ExprCaptures), + RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps, RD.InscanCopyTemps, + RD.InscanCopyOps, buildPreInits(Context, RD.ExprCaptures), buildPostUpdate(*this, RD.ExprPostUpdates)); } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11822,9 +11822,12 @@ case llvm::omp::OMPC_shared: C = OMPSharedClause::CreateEmpty(Context, Record.readInt()); break; - case llvm::omp::OMPC_reduction: - C = OMPReductionClause::CreateEmpty(Context, Record.readInt()); + case llvm::omp::OMPC_reduction: { + unsigned N = Record.readInt(); + auto Modifier = Record.readEnum(); + C = OMPReductionClause::CreateEmpty(Context, N, Modifier); break; + } case llvm::omp::OMPC_task_reduction: C = OMPTaskReductionClause::CreateEmpty(Context, Record.readInt()); break; @@ -12205,7 +12208,6 @@ C->setLParenLoc(Record.readSourceLocation()); C->setModifierLoc(Record.readSourceLocation()); C->setColonLoc(Record.readSourceLocation()); - C->setModifier(Record.readEnum()); NestedNameSpecifierLoc NNSL = Record.readNestedNameSpecifierLoc(); DeclarationNameInfo DNI = Record.readDeclarationNameInfo(); C->setQualifierLoc(NNSL); @@ -12233,6 +12235,16 @@ for (unsigned i = 0; i != NumVars; ++i) Vars.push_back(Record.readSubExpr()); C->setReductionOps(Vars); + if (C->getModifier() == OMPC_REDUCTION_inscan) { + Vars.clear(); + for (unsigned i = 0; i != NumVars; ++i) + Vars.push_back(Record.readSubExpr()); + C->setInscanCopyTemps(Vars); + Vars.clear(); + for (unsigned i = 0; i != NumVars; ++i) + Vars.push_back(Record.readSubExpr()); + C->setInscanCopyOps(Vars); + } } void OMPClauseReader::VisitOMPTaskReductionClause(OMPTaskReductionClause *C) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6305,11 +6305,11 @@ void OMPClauseWriter::VisitOMPReductionClause(OMPReductionClause *C) { Record.push_back(C->varlist_size()); + Record.writeEnum(C->getModifier()); VisitOMPClauseWithPostUpdate(C); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getModifierLoc()); Record.AddSourceLocation(C->getColonLoc()); - Record.writeEnum(C->getModifier()); Record.AddNestedNameSpecifierLoc(C->getQualifierLoc()); Record.AddDeclarationNameInfo(C->getNameInfo()); for (auto *VE : C->varlists()) @@ -6322,6 +6322,12 @@ Record.AddStmt(E); for (auto *E : C->reduction_ops()) Record.AddStmt(E); + if (C->getModifier() == clang::OMPC_REDUCTION_inscan) { + for (auto *E : C->copy_temps()) + Record.AddStmt(E); + for (auto *E : C->copy_ops()) + Record.AddStmt(E); + } } void OMPClauseWriter::VisitOMPTaskReductionClause(OMPTaskReductionClause *C) { diff --git a/clang/test/OpenMP/scan_codegen.cpp b/clang/test/OpenMP/scan_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/scan_codegen.cpp @@ -0,0 +1,221 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +void foo(); +void bar(); + +// CHECK-LABEL: baz +void baz() { + int a = 0; + + // CHECK: store i32 0, i32* [[A_ADDR:%.+]], + // CHECK: store i32 0, i32* [[OMP_CNT:%.+]], + // CHECK: br label %[[OMP_HEADER:.+]] + + // CHECK: [[OMP_HEADER]]: + // CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]], + // CHECK: [[CMP:%.+]] = icmp slt i32 [[CNT_VAL]], 10 + // CHECK: br i1 [[CMP]], label %[[OMP_BODY:.+]], label %[[OMP_END:.+]] +#pragma omp simd reduction(inscan, + : a) + for (int i = 0; i < 10; ++i) { + // CHECK: [[OMP_BODY]]: + + // i = OMP_CNT*1 + 0; + // CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]], + // CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT_VAL]], 1 + // CHECK: [[ADD:%.+]] = add nsw i32 0, [[MUL]] + // CHECK: store i32 [[ADD]], i32* [[I_ADDR:%.+]], + + // A_PRIV = 0; + // CHECK: store i32 0, i32* [[A_PRIV_ADDR:%.+]], + + // goto DISPATCH; + // CHECK: br label %[[DISPATCH:[^,]+]] + + // INPUT_PHASE: + // foo(); + // goto REDUCE; + // CHECK: [[INPUT_PHASE:.+]]: + // CHECK: call void @{{.*}}foo{{.*}}() + // CHECK: br label %[[REDUCE:[^,]+]] + foo(); + + // DISPATCH: + // goto INPUT_PHASE; + // CHECK: [[DISPATCH]]: + // CHECK: br label %[[INPUT_PHASE]] + + // REDUCE: + // A = A_PRIV + A; + // A_PRIV = A; + // goto SCAN_PHASE; + // CHECK: [[REDUCE]]: + // CHECK: [[A:%.+]] = load i32, i32* [[A_ADDR]], + // CHECK: [[A_PRIV:%.+]] = load i32, i32* [[A_PRIV_ADDR]], + // CHECK: [[SUM:%.+]] = add nsw i32 [[A]], [[A_PRIV]] + // CHECK: store i32 [[SUM]], i32* [[A_ADDR]], + // CHECK: [[A:%.+]] = load i32, i32* [[A_ADDR]], + // CHECK: store i32 [[A]], i32* [[A_PRIV_ADDR]], + // CHECK: br label %[[SCAN_PHASE:[^,]+]] +#pragma omp scan inclusive(a) + + // SCAN_PHASE: + // bar(); + // goto CONTINUE; + // CHECK: [[SCAN_PHASE]]: + // CHECK: call void @{{.*}}bar{{.*}}(), + // CHECK: br label %[[CONTINUE:[^,]+]] + bar(); + + // CHECK: [[CONTINUE]]: + // CHECK: br label %[[INC_BLOCK:[^,]+]] + + // ++OMP_CNT; + // CHECK: [[INC_BLOCK]]: + // CHECK: [[CNT:%.+]] = load i32, i32* [[OMP_CNT]], + // CHECK: [[INC:%.+]] = add nsw i32 [[CNT]], 1 + // CHECK: store i32 [[INC]], i32* [[OMP_CNT]], + // CHECK: br label %[[OMP_HEADER]] + } + // CHECK: [[OMP_END]]: +} + +struct S { + int a; + S() {} + ~S() {} + S& operator+(const S&); + S& operator=(const S&); +}; + +// CHECK-LABEL: xyz +void xyz() { + S s[2]; + + // CHECK: [[S_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ADDR:%.+]], i{{.+}} 0, i{{.+}} 0 + // CHECK: [[S_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_BEGIN]], i{{.+}} 2 + // CHECK: br label %[[ARRAY_INIT:.+]] + // CHECK: [[ARRAY_INIT]]: + // CHECK: [[S_CUR:%.+]] = phi %struct.S* [ [[S_BEGIN]], %{{.+}} ], [ [[S_NEXT:%.+]], %[[ARRAY_INIT]] ] + // CHECK: call void [[CONSTR:@.+]](%struct.S* [[S_CUR]]) + // CHECK: [[S_NEXT]] = getelementptr inbounds %struct.S, %struct.S* [[S_CUR]], i{{.+}} 1 + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_NEXT]], [[S_END]] + // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_INIT]] + // CHECK: [[DONE]]: + // CHECK: store i32 0, i32* [[OMP_CNT:%.+]], + // CHECK: br label %[[OMP_HEADER:.+]] + + // CHECK: [[OMP_HEADER]]: + // CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]], + // CHECK: [[CMP:%.+]] = icmp slt i32 [[CNT_VAL]], 10 + // CHECK: br i1 [[CMP]], label %[[OMP_BODY:.+]], label %[[OMP_END:.+]] +#pragma omp simd reduction(inscan, + : s) + for (int i = 0; i < 10; ++i) { + // CHECK: [[OMP_BODY]]: + + // i = OMP_CNT*1 + 0; + // CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]], + // CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT_VAL]], 1 + // CHECK: [[ADD:%.+]] = add nsw i32 0, [[MUL]] + // CHECK: store i32 [[ADD]], i32* [[I_ADDR:%.+]], + + // S S_PRIV[2]; + // CHECK: [[S_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_PRIV_ADDR:%.+]], i{{.+}} 0, i{{.+}} 0 + // CHECK: [[S_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_BEGIN]], i{{.+}} 2 + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_BEGIN]], [[S_END]] + // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_INIT:[^,]+]] + // CHECK: [[ARRAY_INIT]]: + // CHECK: [[S_CUR:%.+]] = phi %struct.S* [ [[S_BEGIN]], %[[OMP_BODY]] ], [ [[S_NEXT:%.+]], %[[ARRAY_INIT]] ] + // CHECK: call void [[CONSTR]](%struct.S* [[S_CUR]]) + // CHECK: [[S_NEXT]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_CUR]], i{{.+}} 1 + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_NEXT]], [[S_END]] + // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_INIT]] + // CHECK: [[DONE]]: + // CHECK: [[LHS_BEGIN:%.+]] = bitcast [2 x %struct.S]* [[S_ADDR]] to %struct.S* + // CHECK: [[RHS_BEGIN:%.+]] = bitcast [2 x %struct.S]* [[S_PRIV_ADDR]] to %struct.S* + + // goto DISPATCH; + // CHECK: br label %[[DISPATCH:[^,]+]] + + // SCAN_PHASE: + // foo(); + // goto CONTINUE; + // CHECK: [[SCAN_PHASE:.+]]: + // CHECK: call void @{{.*}}foo{{.*}}() + // CHECK: br label %[[CONTINUE:[^,]+]] + foo(); + + // DISPATCH: + // goto INPUT_PHASE; + // CHECK: [[DISPATCH]]: + // CHECK: br label %[[INPUT_PHASE:[^,]+]] + + // REDUCE: + // TEMP = S; + // S = S_PRIV + S; + // S_PRIV = TEMP; + // goto SCAN_PHASE; + // CHECK: [[REDUCE:.+]]: + // CHECK: [[LHS_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[LHS_BEGIN]], i{{.+}} 2 + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[LHS_BEGIN]], [[LHS_END]] + // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_REDUCE_COPY:[^,]+]] + // CHECK: [[ARRAY_REDUCE_COPY]]: + // CHECK: [[SRC_CUR:%.+]] = phi %struct.S* [ [[RHS_BEGIN]], %[[REDUCE]] ], [ [[SRC_NEXT:%.+]], %[[ARRAY_REDUCE_COPY]] ] + // CHECK: [[DEST_CUR:%.+]] = phi %struct.S* [ [[LHS_BEGIN]], %[[REDUCE]] ], [ [[DEST_NEXT:%.+]], %[[ARRAY_REDUCE_COPY]] ] + // CHECK: call void [[CONSTR]](%struct.S* [[S_TEMP_ADDR:%.+]]) + // CHECK: call {{.*}}%struct.S* [[S_COPY:@.+]](%struct.S* [[S_TEMP_ADDR]], %struct.S* {{.*}}[[DEST_CUR]]), + // CHECK: [[SUM:%.+]] = call {{.*}}%struct.S* @{{.+}}_(%struct.S* [[DEST_CUR]], %struct.S* {{.*}}[[SRC_CUR]]) + // CHECK: call {{.*}}%struct.S* [[S_COPY]](%struct.S* [[DEST_CUR]], %struct.S* {{.*}}[[SUM]]), + // CHECK: call {{.*}}%struct.S* [[S_COPY]](%struct.S* [[SRC_CUR]], %struct.S* {{.*}}[[S_TEMP_ADDR]]) + // CHECK: call void [[DESTR:@.+]](%struct.S* [[S_TEMP_ADDR]]) + // CHECK: [[DEST_NEXT]] = getelementptr {{.*}}%struct.S, %struct.S* [[DEST_CUR]], i{{.+}} 1 + // CHECK: [[SRC_NEXT]] = getelementptr {{.*}}%struct.S, %struct.S* [[SRC_CUR]], i{{.+}} 1 + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[DEST_NEXT]], [[LHS_END]] + // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_REDUCE_COPY]] + // CHECK: [[DONE]]: + // CHECK: br label %[[SCAN_PHASE]] +#pragma omp scan exclusive(s) + + // INPUT_PHASE: + // bar(); + // goto REDUCE; + // CHECK: [[INPUT_PHASE]]: + // CHECK: call void @{{.*}}bar{{.*}}(), + // CHECK: br label %[[REDUCE]] + bar(); + + // CHECK: [[CONTINUE]]: + + // S_PRIV[2].~S(); + // CHECK: [[S_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_PRIV_ADDR]], i{{.+}} 0, i{{.+}} 0 + // CHECK: [[S_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_BEGIN]], i{{.+}} 2 + // CHECK: br label %[[ARRAY_DESTR:[^,]+]] + // CHECK: [[ARRAY_DESTR]]: + // CHECK: [[S_CUR:%.+]] = phi %struct.S* [ [[S_END]], %[[CONTINUE]] ], [ [[S_PREV:%.+]], %[[ARRAY_DESTR]] ] + // CHECK: [[S_PREV]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_CUR]], i{{.+}} -1 + // CHECK: call void [[DESTR]](%struct.S* [[S_PREV]]) + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_PREV]], [[S_BEGIN]] + // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_DESTR]] + // CHECK: [[DONE]]: + // CHECK: br label %[[INC_BLOCK:[^,]+]] + + // ++OMP_CNT; + // CHECK: [[INC_BLOCK]]: + // CHECK: [[CNT:%.+]] = load i32, i32* [[OMP_CNT]], + // CHECK: [[INC:%.+]] = add nsw i32 [[CNT]], 1 + // CHECK: store i32 [[INC]], i32* [[OMP_CNT]], + // CHECK: br label %[[OMP_HEADER]] + } + // CHECK: [[OMP_END]]: +} + +#endif // HEADER diff --git a/clang/test/OpenMP/scan_messages.cpp b/clang/test/OpenMP/scan_messages.cpp --- a/clang/test/OpenMP/scan_messages.cpp +++ b/clang/test/OpenMP/scan_messages.cpp @@ -19,32 +19,32 @@ #pragma omp for simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) if (argc) -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} if (argc) { #pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) while (argc) -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} while (argc) { #pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) do -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} while (argc) ; -#pragma omp simd reduction(inscan, +: argc) +#pragma omp simd reduction(inscan, +: argc) // expected-error {{the inscan reduction list item must appear as a list item in an 'inclusive' or 'exclusive' clause on an inner 'omp scan' directive}} for (int i = 0; i < 10; ++i) do { -#pragma omp scan inclusive(argc) +#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } while (argc); #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) switch (argc) -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} switch (argc) case 1: #pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} @@ -52,21 +52,21 @@ case 1: { #pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } -#pragma omp simd reduction(inscan, +: argc) +#pragma omp simd reduction(inscan, +: argc) // expected-error {{the inscan reduction list item must appear as a list item in an 'inclusive' or 'exclusive' clause on an inner 'omp scan' directive}} for (int i = 0; i < 10; ++i) switch (argc) { -#pragma omp scan exclusive(argc) // expected-note 2 {{previous 'scan' directive used here}} +#pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} case 1: -#pragma omp scan exclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}} +#pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} break; default: { -#pragma omp scan exclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}} +#pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } break; } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) for (;;) -#pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} for (;;) { #pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } @@ -77,8 +77,10 @@ } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) { +#pragma omp scan inclusive(argc) // expected-note {{previous 'scan' directive used here}} +#pragma omp scan inclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}} label1 : { -#pragma omp scan inclusive(argc) +#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} }} return T(); @@ -109,32 +111,32 @@ #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) if (argc) -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} if (argc) { #pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} expected-error {{the list item must appear in 'reduction' clause with the 'inscan' modifier of the parent directive}} } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) while (argc) -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} while (argc) { #pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} expected-error {{the list item must appear in 'reduction' clause with the 'inscan' modifier of the parent directive}} } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) do -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} while (argc) ; #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) do { -#pragma omp scan exclusive(argc) +#pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } while (argc); #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) switch (argc) -#pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} switch (argc) case 1: #pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} expected-error {{the list item must appear in 'reduction' clause with the 'inscan' modifier of the parent directive}} @@ -145,18 +147,18 @@ #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) switch (argc) { -#pragma omp scan inclusive(argc) // expected-note 2 {{previous 'scan' directive used here}} +#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} case 1: -#pragma omp scan inclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}} +#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} break; default: { -#pragma omp scan inclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}} +#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } break; } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) for (;;) -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} for (;;) { #pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} expected-error {{the list item must appear in 'reduction' clause with the 'inscan' modifier of the parent directive}} } @@ -167,10 +169,12 @@ } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) { +#pragma omp scan inclusive(argc) // expected-note {{previous 'scan' directive used here}} +#pragma omp scan inclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}} label1 : { -#pragma omp scan inclusive(argc) +#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } } - return tmain(); + return tmain(); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2376,6 +2376,14 @@ for (auto *E : C->reduction_ops()) { Visitor->AddStmt(E); } + if (C->getModifier() == clang::OMPC_REDUCTION_inscan) { + for (auto *E : C->copy_temps()) { + Visitor->AddStmt(E); + } + for (auto *E : C->copy_ops()) { + Visitor->AddStmt(E); + } + } } void OMPClauseEnqueue::VisitOMPTaskReductionClause( const OMPTaskReductionClause *C) {