diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -2839,6 +2839,41 @@ return llvm::makeArrayRef(getRHSExprs().end(), varlist_size()); } + /// Set list of helper copy operations for inscan reductions. + /// The form is: Temps[i] = LHS[i]; + void setInscanCopyOps(ArrayRef Ops); + + /// Get the list of helper inscan copy operations. + MutableArrayRef getInscanCopyOps() { + return MutableArrayRef(getReductionOps().end(), varlist_size()); + } + ArrayRef getInscanCopyOps() const { + return llvm::makeArrayRef(getReductionOps().end(), varlist_size()); + } + + /// Set list of helper temp vars for inscan copy array operations. + void setInscanCopyArrayTemps(ArrayRef CopyArrayTemps); + + /// Get the list of helper inscan copy temps. + MutableArrayRef getInscanCopyArrayTemps() { + return MutableArrayRef(getInscanCopyOps().end(), varlist_size()); + } + ArrayRef getInscanCopyArrayTemps() const { + return llvm::makeArrayRef(getInscanCopyOps().end(), varlist_size()); + } + + /// Set list of helper temp elements vars for inscan copy array operations. + void setInscanCopyArrayElems(ArrayRef CopyArrayElems); + + /// Get the list of helper inscan copy temps. + MutableArrayRef getInscanCopyArrayElems() { + return MutableArrayRef(getInscanCopyArrayTemps().end(), + varlist_size()); + } + ArrayRef getInscanCopyArrayElems() const { + return llvm::makeArrayRef(getInscanCopyArrayTemps().end(), varlist_size()); + } + public: /// Creates clause with a list of variables \a VL. /// @@ -2869,6 +2904,12 @@ /// \endcode /// Required for proper codegen of final reduction operation performed by the /// reduction clause. + /// \param CopyOps List of copy operations for inscan reductions: + /// \code + /// TempExprs = LHSExprs; + /// \endcode + /// \param CopyArrayTemps Temp arrays for prefix sums. + /// \param CopyArrayElems Temp arrays for prefix sums. /// \param PreInit Statement that must be executed before entering the OpenMP /// region with this clause. /// \param PostUpdate Expression that must be executed after exit from the @@ -2880,13 +2921,18 @@ ArrayRef VL, NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo, ArrayRef Privates, ArrayRef LHSExprs, ArrayRef RHSExprs, - ArrayRef ReductionOps, Stmt *PreInit, Expr *PostUpdate); + ArrayRef ReductionOps, ArrayRef CopyOps, + ArrayRef CopyArrayTemps, ArrayRef CopyArrayElems, + Stmt *PreInit, Expr *PostUpdate); /// Creates an empty clause with the place for \a N variables. /// /// \param C AST context. /// \param N The number of variables. - static OMPReductionClause *CreateEmpty(const ASTContext &C, unsigned N); + /// \param Modifier Reduction modifier. + static OMPReductionClause * + CreateEmpty(const ASTContext &C, unsigned N, + OpenMPReductionClauseModifier Modifier); /// Returns modifier. OpenMPReductionClauseModifier getModifier() const { return Modifier; } @@ -2943,6 +2989,36 @@ getReductionOps().end()); } + helper_expr_const_range copy_ops() const { + return helper_expr_const_range(getInscanCopyOps().begin(), + getInscanCopyOps().end()); + } + + helper_expr_range copy_ops() { + return helper_expr_range(getInscanCopyOps().begin(), + getInscanCopyOps().end()); + } + + helper_expr_const_range copy_array_temps() const { + return helper_expr_const_range(getInscanCopyArrayTemps().begin(), + getInscanCopyArrayTemps().end()); + } + + helper_expr_range copy_array_temps() { + return helper_expr_range(getInscanCopyArrayTemps().begin(), + getInscanCopyArrayTemps().end()); + } + + helper_expr_const_range copy_array_elems() const { + return helper_expr_const_range(getInscanCopyArrayElems().begin(), + getInscanCopyArrayElems().end()); + } + + helper_expr_range copy_array_elems() { + return helper_expr_range(getInscanCopyArrayElems().begin(), + getInscanCopyArrayElems().end()); + } + child_range children() { return child_range(reinterpret_cast(varlist_begin()), reinterpret_cast(varlist_end())); diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3362,6 +3362,17 @@ for (auto *E : C->reduction_ops()) { TRY_TO(TraverseStmt(E)); } + if (C->getModifier() == OMPC_REDUCTION_inscan) { + for (auto *E : C->copy_ops()) { + TRY_TO(TraverseStmt(E)); + } + for (auto *E : C->copy_array_temps()) { + TRY_TO(TraverseStmt(E)); + } + for (auto *E : C->copy_array_elems()) { + TRY_TO(TraverseStmt(E)); + } + } return true; } diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -709,15 +709,43 @@ std::copy(ReductionOps.begin(), ReductionOps.end(), getRHSExprs().end()); } +void OMPReductionClause::setInscanCopyOps(ArrayRef Ops) { + assert(Modifier == OMPC_REDUCTION_inscan && "Expected inscan reduction."); + assert(Ops.size() == varlist_size() && "Number of copy " + "expressions is not the same " + "as the preallocated buffer"); + llvm::copy(Ops, getReductionOps().end()); +} + +void OMPReductionClause::setInscanCopyArrayTemps( + ArrayRef CopyArrayTemps) { + assert(Modifier == OMPC_REDUCTION_inscan && "Expected inscan reduction."); + assert(CopyArrayTemps.size() == varlist_size() && + "Number of copy temp expressions is not the same as the preallocated " + "buffer"); + llvm::copy(CopyArrayTemps, getInscanCopyOps().end()); +} + +void OMPReductionClause::setInscanCopyArrayElems( + ArrayRef CopyArrayElems) { + assert(Modifier == OMPC_REDUCTION_inscan && "Expected inscan reduction."); + assert(CopyArrayElems.size() == varlist_size() && + "Number of copy temp expressions is not the same as the preallocated " + "buffer"); + llvm::copy(CopyArrayElems, getInscanCopyArrayTemps().end()); +} + OMPReductionClause *OMPReductionClause::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ModifierLoc, SourceLocation EndLoc, SourceLocation ColonLoc, OpenMPReductionClauseModifier Modifier, ArrayRef VL, NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo, ArrayRef Privates, ArrayRef LHSExprs, - ArrayRef RHSExprs, ArrayRef ReductionOps, Stmt *PreInit, - Expr *PostUpdate) { - void *Mem = C.Allocate(totalSizeToAlloc(5 * VL.size())); + ArrayRef RHSExprs, ArrayRef ReductionOps, + ArrayRef CopyOps, ArrayRef CopyArrayTemps, + ArrayRef CopyArrayElems, Stmt *PreInit, Expr *PostUpdate) { + void *Mem = C.Allocate(totalSizeToAlloc( + (Modifier == OMPC_REDUCTION_inscan ? 8 : 5) * VL.size())); auto *Clause = new (Mem) OMPReductionClause(StartLoc, LParenLoc, ModifierLoc, EndLoc, ColonLoc, Modifier, VL.size(), QualifierLoc, NameInfo); @@ -728,13 +756,29 @@ Clause->setReductionOps(ReductionOps); Clause->setPreInitStmt(PreInit); Clause->setPostUpdateExpr(PostUpdate); + if (Modifier == OMPC_REDUCTION_inscan) { + Clause->setInscanCopyOps(CopyOps); + Clause->setInscanCopyArrayTemps(CopyArrayTemps); + Clause->setInscanCopyArrayElems(CopyArrayElems); + } else { + assert(CopyOps.empty() && + "copy operations are expected in inscan reductions only."); + assert(CopyArrayTemps.empty() && + "copy array temps are expected in inscan reductions only."); + assert(CopyArrayElems.empty() && + "copy array temps are expected in inscan reductions only."); + } return Clause; } -OMPReductionClause *OMPReductionClause::CreateEmpty(const ASTContext &C, - unsigned N) { - void *Mem = C.Allocate(totalSizeToAlloc(5 * N)); - return new (Mem) OMPReductionClause(N); +OMPReductionClause * +OMPReductionClause::CreateEmpty(const ASTContext &C, unsigned N, + OpenMPReductionClauseModifier Modifier) { + void *Mem = C.Allocate(totalSizeToAlloc( + (Modifier == OMPC_REDUCTION_inscan ? 8 : 5) * N)); + auto *Clause = new (Mem) OMPReductionClause(N); + Clause->setModifier(Modifier); + return Clause; } void OMPTaskReductionClause::setPrivates(ArrayRef Privates) { diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -609,6 +609,20 @@ if (E) Profiler->VisitStmt(E); } + if (C->getModifier() == clang::OMPC_REDUCTION_inscan) { + for (auto *E : C->copy_ops()) { + if (E) + Profiler->VisitStmt(E); + } + for (auto *E : C->copy_array_temps()) { + if (E) + Profiler->VisitStmt(E); + } + for (auto *E : C->copy_array_elems()) { + if (E) + Profiler->VisitStmt(E); + } + } } void OMPClauseProfiler::VisitOMPTaskReductionClause( const OMPTaskReductionClause *C) { diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -252,7 +252,7 @@ EmitOMPDepobjDirective(cast(*S)); break; case Stmt::OMPScanDirectiveClass: - llvm_unreachable("Scan directive not supported yet."); + EmitOMPScanDirective(cast(*S)); break; case Stmt::OMPOrderedDirectiveClass: EmitOMPOrderedDirective(cast(*S)); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1161,7 +1161,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit( const OMPExecutableDirective &D, - CodeGenFunction::OMPPrivateScope &PrivateScope) { + CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { if (!HaveInsertPoint()) return; SmallVector Shareds; @@ -1173,6 +1173,8 @@ SmallVector TaskLHSs; SmallVector TaskRHSs; for (const auto *C : D.getClausesOfKind()) { + if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) + continue; Shareds.append(C->varlist_begin(), C->varlist_end()); Privates.append(C->privates().begin(), C->privates().end()); ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); @@ -1387,6 +1389,9 @@ bool HasAtLeastOneReduction = false; bool IsReductionWithTaskMod = false; for (const auto *C : D.getClausesOfKind()) { + // Do not emit for inscan reductions. + if (C->getModifier() == OMPC_REDUCTION_inscan) + continue; HasAtLeastOneReduction = true; Privates.append(C->privates().begin(), C->privates().end()); LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); @@ -1705,6 +1710,24 @@ getProfileCount(D.getBody())); EmitBlock(NextBB); } + + OMPPrivateScope InscanScope(*this); + EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); + bool IsInscanRegion = InscanScope.Privatize(); + if (IsInscanRegion) { + // Need to remember the block before and after scan directive + // to dispatch them correctly depending on the clause used in + // this directive, inclusive or exclusive. For inclusive scan the natural + // order of the blocks is used, for exclusive clause the blocks must be + // executed in reverse order. + OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); + OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); + OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); + OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); + EmitBranch(OMPScanDispatch); + EmitBlock(OMPBeforeScanBlock); + } + // Emit loop variables for C++ range loops. const Stmt *Body = D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); @@ -1714,6 +1737,10 @@ Body, /*TryImperfectlyNestedLoops=*/true), D.getCollapsedNumber()); + // Jump to the dispatcher at the end of the loop body. + if (IsInscanRegion) + EmitBranch(OMPScanExitBlock); + // The end (updates/cleanups). EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); @@ -2979,14 +3006,217 @@ return {LBVal, UBVal}; } +/// Emits the code for the directive with inscan reductions. +/// The code is the following: +/// \code +/// size num_iters = ; +/// buffer[num_iters]; +/// #pragma omp ... +/// for (i: 0..) { +/// ; +/// buffer[i] = red; +/// } +/// for (int k = 0; k != ceil(log2(num_iters)); ++k) +/// for (size cnt = last_iter; cnt >= pow(2, k); --k) +/// buffer[i] op= buffer[i-pow(2,k)]; +/// #pragma omp ... +/// for (0..) { +/// red = InclusiveScan ? buffer[i] : buffer[i-1]; +/// ; +/// } +/// \endcode +static void emitScanBasedDirective( + CodeGenFunction &CGF, const OMPLoopDirective &S, + llvm::function_ref NumIteratorsGen, + llvm::function_ref FirstGen, + llvm::function_ref SecondGen) { + llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( + NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); + SmallVector Shareds; + SmallVector Privates; + SmallVector ReductionOps; + SmallVector LHSs; + SmallVector RHSs; + SmallVector CopyOps; + SmallVector CopyArrayTemps; + SmallVector CopyArrayElems; + for (const auto *C : S.getClausesOfKind()) { + assert(C->getModifier() == OMPC_REDUCTION_inscan && + "Only inscan reductions are expected."); + Shareds.append(C->varlist_begin(), C->varlist_end()); + Privates.append(C->privates().begin(), C->privates().end()); + ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); + CopyArrayTemps.append(C->copy_array_temps().begin(), + C->copy_array_temps().end()); + CopyArrayElems.append(C->copy_array_elems().begin(), + C->copy_array_elems().end()); + } + { + // Emit buffers for each reduction variables. + // ReductionCodeGen is required to emit correctly the code for array + // reductions. + ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); + unsigned Count = 0; + auto *ITA = CopyArrayTemps.begin(); + for (const Expr *IRef : Privates) { + const auto *PrivateVD = cast(cast(IRef)->getDecl()); + // Emit variably modified arrays, used for arrays/array sections + // reductions. + if (PrivateVD->getType()->isVariablyModifiedType()) { + RedCG.emitSharedOrigLValue(CGF, Count); + RedCG.emitAggregateType(CGF, Count); + } + CodeGenFunction::OpaqueValueMapping DimMapping( + CGF, + cast( + cast((*ITA)->getType()->getAsArrayTypeUnsafe()) + ->getSizeExpr()), + RValue::get(OMPScanNumIterations)); + // Emit temp buffer. + CGF.EmitVarDecl(*cast(cast(*ITA)->getDecl())); + ++ITA; + ++Count; + } + } + CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); + { + // Emit loop with input phase: + // #pragma omp ... + // for (i: 0..) { + // ; + // buffer[i] = red; + // } + CGF.OMPFirstScanLoop = true; + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + FirstGen(CGF); + } + // Emit prefix reduction: + // for (int k = 0; k <= ceil(log2(n)); ++k) + llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); + llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); + llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); + llvm::Value *Arg = + CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); + llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); + F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); + LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); + LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); + llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( + OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); + auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); + CGF.EmitBlock(LoopBB); + auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); + // size pow2k = 1; + auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); + Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); + Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); + // for (size i = n - 1; i >= 2 ^ k; --i) + // tmp[i] op= tmp[i-pow2k]; + llvm::BasicBlock *InnerLoopBB = + CGF.createBasicBlock("omp.inner.log.scan.body"); + llvm::BasicBlock *InnerExitBB = + CGF.createBasicBlock("omp.inner.log.scan.exit"); + llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); + CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); + CGF.EmitBlock(InnerLoopBB); + auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); + IVal->addIncoming(NMin1, LoopBB); + { + CodeGenFunction::OMPPrivateScope PrivScope(CGF); + auto *ILHS = LHSs.begin(); + auto *IRHS = RHSs.begin(); + for (const Expr *CopyArrayElem : CopyArrayElems) { + const auto *LHSVD = cast(cast(*ILHS)->getDecl()); + const auto *RHSVD = cast(cast(*IRHS)->getDecl()); + Address LHSAddr = Address::invalid(); + { + CodeGenFunction::OpaqueValueMapping IdxMapping( + CGF, + cast( + cast(CopyArrayElem)->getIdx()), + RValue::get(IVal)); + LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); + } + PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; }); + Address RHSAddr = Address::invalid(); + { + llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); + CodeGenFunction::OpaqueValueMapping IdxMapping( + CGF, + cast( + cast(CopyArrayElem)->getIdx()), + RValue::get(OffsetIVal)); + RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); + } + PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; }); + ++ILHS; + ++IRHS; + } + PrivScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitReduction( + CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, + {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); + } + llvm::Value *NextIVal = + CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); + IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); + CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); + CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); + CGF.EmitBlock(InnerExitBB); + llvm::Value *Next = + CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); + Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); + // pow2k <<= 1; + llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); + Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); + llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); + CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); + auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); + CGF.EmitBlock(ExitBB); + + CGF.OMPFirstScanLoop = false; + SecondGen(CGF); +} + void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { - OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); - HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), - emitForLoopBounds, - emitDispatchForLoopBounds); + if (llvm::any_of(S.getClausesOfKind(), + [](const OMPReductionClause *C) { + return C->getModifier() == OMPC_REDUCTION_inscan; + })) { + const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { + OMPLocalDeclMapRAII Scope(CGF); + OMPLoopScope LoopScope(CGF, S); + return CGF.EmitScalarExpr(S.getNumIterations()); + }; + const auto &&FirstGen = [&S](CodeGenFunction &CGF) { + OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); + (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); + // Emit an implicit barrier at the end. + CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), + OMPD_for); + }; + const auto &&SecondGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { + OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); + } else { + OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); + } }; { auto LPCRegion = @@ -3961,6 +4191,112 @@ } } +void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { + // Do not emit code for non-simd directives in simd-only mode. + if (getLangOpts().OpenMPSimd && !OMPParentLoopDirectiveForScan) + return; + const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; + SmallVector Shareds; + SmallVector Privates; + SmallVector LHSs; + SmallVector RHSs; + SmallVector CopyOps; + SmallVector CopyArrayTemps; + SmallVector CopyArrayElems; + for (const auto *C : ParentDir.getClausesOfKind()) { + if (C->getModifier() != OMPC_REDUCTION_inscan) + continue; + Shareds.append(C->varlist_begin(), C->varlist_end()); + Privates.append(C->privates().begin(), C->privates().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); + CopyArrayTemps.append(C->copy_array_temps().begin(), + C->copy_array_temps().end()); + CopyArrayElems.append(C->copy_array_elems().begin(), + C->copy_array_elems().end()); + } + bool IsInclusive = S.hasClausesOfKind(); + if (!IsInclusive) { + EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); + EmitBlock(OMPScanExitBlock); + } + if (OMPFirstScanLoop) { + // Emit buffer[i] = red; at the end of the input phase. + const auto *IVExpr = cast(ParentDir) + .getIterationVariable() + ->IgnoreParenImpCasts(); + LValue IdxLVal = EmitLValue(IVExpr); + llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); + IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + const Expr *OrigExpr = Shareds[I]; + const Expr *CopyArrayElem = CopyArrayElems[I]; + OpaqueValueMapping IdxMapping( + *this, + cast( + cast(CopyArrayElem)->getIdx()), + RValue::get(IdxVal)); + LValue DestLVal = EmitLValue(CopyArrayElem); + LValue SrcLVal = EmitLValue(OrigExpr); + EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), + SrcLVal.getAddress(*this), + cast(cast(LHSs[I])->getDecl()), + cast(cast(RHSs[I])->getDecl()), + CopyOps[I]); + } + } + EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); + if (IsInclusive) { + EmitBlock(OMPScanExitBlock); + EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); + } + EmitBlock(OMPScanDispatch); + if (!OMPFirstScanLoop) { + // Emit red = buffer[i]; at the entrance to the scan phase. + const auto *IVExpr = cast(ParentDir) + .getIterationVariable() + ->IgnoreParenImpCasts(); + LValue IdxLVal = EmitLValue(IVExpr); + llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); + IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); + llvm::BasicBlock *ExclusiveExitBB = nullptr; + if (!IsInclusive) { + llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); + ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); + llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); + Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); + EmitBlock(ContBB); + // Use idx - 1 iteration for exclusive scan. + IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); + } + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + const Expr *OrigExpr = Shareds[I]; + const Expr *CopyArrayElem = CopyArrayElems[I]; + OpaqueValueMapping IdxMapping( + *this, + cast( + cast(CopyArrayElem)->getIdx()), + RValue::get(IdxVal)); + LValue SrcLVal = EmitLValue(CopyArrayElem); + LValue DestLVal = EmitLValue(OrigExpr); + EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), + SrcLVal.getAddress(*this), + cast(cast(LHSs[I])->getDecl()), + cast(cast(RHSs[I])->getDecl()), + CopyOps[I]); + } + if (!IsInclusive) { + EmitBlock(ExclusiveExitBB); + } + } + EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock + : OMPAfterScanBlock); + EmitBlock(OMPAfterScanBlock); +} + void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr) { @@ -5950,6 +6286,10 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( const OMPExecutableDirective &D) { + if (const auto *SD = dyn_cast(&D)) { + EmitOMPScanDirective(*SD); + return; + } if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) return; auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -673,6 +673,32 @@ llvm::BasicBlock *getInvokeDestImpl(); + /// Parent loop-based directive for scan directive. + const OMPExecutableDirective *OMPParentLoopDirectiveForScan = nullptr; + llvm::BasicBlock *OMPBeforeScanBlock = nullptr; + llvm::BasicBlock *OMPAfterScanBlock = nullptr; + llvm::BasicBlock *OMPScanExitBlock = nullptr; + llvm::BasicBlock *OMPScanDispatch = nullptr; + bool OMPFirstScanLoop = false; + + /// Manages parent directive for scan directives. + class ParentLoopDirectiveForScanRegion { + CodeGenFunction &CGF; + const OMPExecutableDirective &ParentLoopDirectiveForScan; + + public: + ParentLoopDirectiveForScanRegion( + CodeGenFunction &CGF, + const OMPExecutableDirective &ParentLoopDirectiveForScan) + : CGF(CGF), + ParentLoopDirectiveForScan(*CGF.OMPParentLoopDirectiveForScan) { + CGF.OMPParentLoopDirectiveForScan = &ParentLoopDirectiveForScan; + } + ~ParentLoopDirectiveForScanRegion() { + CGF.OMPParentLoopDirectiveForScan = &ParentLoopDirectiveForScan; + } + }; + template typename DominatingValue::saved_type saveValueInCond(T value) { return DominatingValue::save(*this, value); @@ -3201,7 +3227,8 @@ /// proper codegen in internal captured statement. /// void EmitOMPReductionClauseInit(const OMPExecutableDirective &D, - OMPPrivateScope &PrivateScope); + OMPPrivateScope &PrivateScope, + bool ForInscan = false); /// Emit final update of reduction values to original variables at /// the end of the directive. /// @@ -3260,6 +3287,7 @@ void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S); void EmitOMPFlushDirective(const OMPFlushDirective &S); void EmitOMPDepobjDirective(const OMPDepobjDirective &S); + void EmitOMPScanDirective(const OMPScanDirective &S); void EmitOMPOrderedDirective(const OMPOrderedDirective &S); void EmitOMPAtomicDirective(const OMPAtomicDirective &S); void EmitOMPTargetDirective(const OMPTargetDirective &S); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -9176,6 +9176,14 @@ diag::err_omp_scan_single_clause_expected); return StmtError(); } + // Check that scan directive is used in the scopeof the OpenMP loop body. + if (Scope *S = DSAStack->getCurScope()) { + Scope *ParentS = S->getParent(); + if (!ParentS || ParentS->getParent() != ParentS->getBreakParent() || + !ParentS->getBreakParent()->isOpenMPLoopScope()) + return StmtError(Diag(StartLoc, diag::err_omp_orphaned_device_directive) + << getOpenMPDirectiveName(OMPD_scan) << 5); + } // Check that only one instance of scan directives is used in the same outer // region. if (DSAStack->doesParentHasScanDirective()) { @@ -14461,6 +14469,12 @@ SmallVector RHSs; /// Reduction operation expression. SmallVector ReductionOps; + /// inscan copy operation expressions. + SmallVector InscanCopyOps; + /// inscan copy temp array expressions for prefix sums. + SmallVector InscanCopyArrayTemps; + /// inscan copy temp array element expressions for prefix sums. + SmallVector InscanCopyArrayElems; /// Taskgroup descriptors for the corresponding reduction items in /// in_reduction clauses. SmallVector TaskgroupDescriptors; @@ -14478,6 +14492,11 @@ LHSs.reserve(Size); RHSs.reserve(Size); ReductionOps.reserve(Size); + if (RedModifier == OMPC_REDUCTION_inscan) { + InscanCopyOps.reserve(Size); + InscanCopyArrayTemps.reserve(Size); + InscanCopyArrayElems.reserve(Size); + } TaskgroupDescriptors.reserve(Size); ExprCaptures.reserve(Size); ExprPostUpdates.reserve(Size); @@ -14491,16 +14510,31 @@ RHSs.emplace_back(nullptr); ReductionOps.emplace_back(ReductionOp); TaskgroupDescriptors.emplace_back(nullptr); + if (RedModifier == OMPC_REDUCTION_inscan) { + InscanCopyOps.push_back(nullptr); + InscanCopyArrayTemps.push_back(nullptr); + InscanCopyArrayElems.push_back(nullptr); + } } /// Stores reduction data. void push(Expr *Item, Expr *Private, Expr *LHS, Expr *RHS, Expr *ReductionOp, - Expr *TaskgroupDescriptor) { + Expr *TaskgroupDescriptor, Expr *CopyOp, Expr *CopyArrayTemp, + Expr *CopyArrayElem) { Vars.emplace_back(Item); Privates.emplace_back(Private); LHSs.emplace_back(LHS); RHSs.emplace_back(RHS); ReductionOps.emplace_back(ReductionOp); TaskgroupDescriptors.emplace_back(TaskgroupDescriptor); + if (RedModifier == OMPC_REDUCTION_inscan) { + InscanCopyOps.push_back(CopyOp); + InscanCopyArrayTemps.push_back(CopyArrayTemp); + InscanCopyArrayElems.push_back(CopyArrayElem); + } else { + assert(CopyOp == nullptr && CopyArrayTemp == nullptr && + CopyArrayElem == nullptr && + "Copy operation must be used for inscan reductions only."); + } } }; } // namespace @@ -14893,11 +14927,11 @@ if (isOpenMPTargetExecutionDirective(Stack->getCurrentDirective())) { S.Diag(ELoc, diag::err_omp_reduction_vla_unsupported) << !!OASE; S.Diag(ELoc, diag::note_vla_unsupported); + continue; } else { S.targetDiag(ELoc, diag::err_omp_reduction_vla_unsupported) << !!OASE; S.targetDiag(ELoc, diag::note_vla_unsupported); } - continue; } // For arrays/array sections only: // Create pseudo array type for private copy. The size for this array will @@ -15102,6 +15136,40 @@ continue; } + // Add copy operations for inscan reductions. + // LHS = RHS; + ExprResult CopyOpRes, TempArrayRes, TempArrayElem; + if (ClauseKind == OMPC_reduction && + RD.RedModifier == OMPC_REDUCTION_inscan) { + ExprResult RHS = S.DefaultLvalueConversion(RHSDRE); + CopyOpRes = S.BuildBinOp(Stack->getCurScope(), ELoc, BO_Assign, LHSDRE, + RHS.get()); + if (!CopyOpRes.isUsable()) + continue; + CopyOpRes = + S.ActOnFinishFullExpr(CopyOpRes.get(), /*DiscardedValue=*/true); + if (!CopyOpRes.isUsable()) + continue; + // Build temp array for prefix sum. + auto *Dim = new (S.Context) + OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue); + QualType ArrayTy = + S.Context.getVariableArrayType(PrivateTy, Dim, ArrayType::Normal, + /*IndexTypeQuals=*/0, {ELoc, ELoc}); + VarDecl *TempArrayVD = + buildVarDecl(S, ELoc, ArrayTy, D->getName(), + D->hasAttrs() ? &D->getAttrs() : nullptr); + // Add a constructor to the temp decl. + S.ActOnUninitializedDecl(TempArrayVD); + TempArrayRes = buildDeclRefExpr(S, TempArrayVD, ArrayTy, ELoc); + TempArrayElem = + S.DefaultFunctionArrayLvalueConversion(TempArrayRes.get()); + auto *Idx = new (S.Context) + OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue); + TempArrayElem = S.CreateBuiltinArraySubscriptExpr(TempArrayElem.get(), + ELoc, Idx, ELoc); + } + // OpenMP [2.15.4.6, Restrictions, p.2] // A list item that appears in an in_reduction clause of a task construct // must appear in a task_reduction clause of a construct associated with a @@ -15203,7 +15271,8 @@ Stack->addTaskgroupReductionData(D, ReductionIdRange, BOK); } RD.push(VarsExpr, PrivateDRE, LHSDRE, RHSDRE, ReductionOp.get(), - TaskgroupDescriptor); + TaskgroupDescriptor, CopyOpRes.get(), TempArrayRes.get(), + TempArrayElem.get()); } return RD.Vars.empty(); } @@ -15246,7 +15315,8 @@ return OMPReductionClause::Create( Context, StartLoc, LParenLoc, ModifierLoc, ColonLoc, EndLoc, Modifier, RD.Vars, ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, - RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps, + RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps, RD.InscanCopyOps, + RD.InscanCopyArrayTemps, RD.InscanCopyArrayElems, buildPreInits(Context, RD.ExprCaptures), buildPostUpdate(*this, RD.ExprPostUpdates)); } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11822,9 +11822,12 @@ case llvm::omp::OMPC_shared: C = OMPSharedClause::CreateEmpty(Context, Record.readInt()); break; - case llvm::omp::OMPC_reduction: - C = OMPReductionClause::CreateEmpty(Context, Record.readInt()); + case llvm::omp::OMPC_reduction: { + unsigned N = Record.readInt(); + auto Modifier = Record.readEnum(); + C = OMPReductionClause::CreateEmpty(Context, N, Modifier); break; + } case llvm::omp::OMPC_task_reduction: C = OMPTaskReductionClause::CreateEmpty(Context, Record.readInt()); break; @@ -12205,7 +12208,6 @@ C->setLParenLoc(Record.readSourceLocation()); C->setModifierLoc(Record.readSourceLocation()); C->setColonLoc(Record.readSourceLocation()); - C->setModifier(Record.readEnum()); NestedNameSpecifierLoc NNSL = Record.readNestedNameSpecifierLoc(); DeclarationNameInfo DNI = Record.readDeclarationNameInfo(); C->setQualifierLoc(NNSL); @@ -12233,6 +12235,20 @@ for (unsigned i = 0; i != NumVars; ++i) Vars.push_back(Record.readSubExpr()); C->setReductionOps(Vars); + if (C->getModifier() == OMPC_REDUCTION_inscan) { + Vars.clear(); + for (unsigned i = 0; i != NumVars; ++i) + Vars.push_back(Record.readSubExpr()); + C->setInscanCopyOps(Vars); + Vars.clear(); + for (unsigned i = 0; i != NumVars; ++i) + Vars.push_back(Record.readSubExpr()); + C->setInscanCopyArrayTemps(Vars); + Vars.clear(); + for (unsigned i = 0; i != NumVars; ++i) + Vars.push_back(Record.readSubExpr()); + C->setInscanCopyArrayElems(Vars); + } } void OMPClauseReader::VisitOMPTaskReductionClause(OMPTaskReductionClause *C) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6305,11 +6305,11 @@ void OMPClauseWriter::VisitOMPReductionClause(OMPReductionClause *C) { Record.push_back(C->varlist_size()); + Record.writeEnum(C->getModifier()); VisitOMPClauseWithPostUpdate(C); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getModifierLoc()); Record.AddSourceLocation(C->getColonLoc()); - Record.writeEnum(C->getModifier()); Record.AddNestedNameSpecifierLoc(C->getQualifierLoc()); Record.AddDeclarationNameInfo(C->getNameInfo()); for (auto *VE : C->varlists()) @@ -6322,6 +6322,14 @@ Record.AddStmt(E); for (auto *E : C->reduction_ops()) Record.AddStmt(E); + if (C->getModifier() == clang::OMPC_REDUCTION_inscan) { + for (auto *E : C->copy_ops()) + Record.AddStmt(E); + for (auto *E : C->copy_array_temps()) + Record.AddStmt(E); + for (auto *E : C->copy_array_elems()) + Record.AddStmt(E); + } } void OMPClauseWriter::VisitOMPTaskReductionClause(OMPTaskReductionClause *C) { diff --git a/clang/test/OpenMP/for_scan_codegen.cpp b/clang/test/OpenMP/for_scan_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/for_scan_codegen.cpp @@ -0,0 +1,311 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +void foo(); +void bar(); + +// CHECK: define void @{{.*}}baz{{.*}}(i32 %n) +void baz(int n) { + static float a[10]; + static double b; + // CHECK: call i8* @llvm.stacksave() + // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]] + + // float a_buffer[10][n]; + // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]], + + // double b_buffer[10]; + // CHECK: [[B_BUF:%.+]] = alloca double, i64 10, +#pragma omp for reduction(inscan, +:a[:n], b) + for (int i = 0; i < 10; ++i) { + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call i8* @llvm.stacksave() + // CHECK: store float 0.000000e+00, float* % + // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]], + // CHECK: br label %[[DISPATCH:[^,]+]] + // CHECK: [[INPUT_PHASE:.+]]: + // CHECK: call void @{{.+}}foo{{.+}}() + + // a_buffer[i][0..n] = a_priv[[0..n]; + // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]], + // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 + // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]] + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 + // CHECK: [[DEST:%.+]] = bitcast float* [[A_BUF_IDX]] to i8* + // CHECK: [[SRC:%.+]] = bitcast float* [[A_PRIV]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false) + + // b_buffer[i] = b_priv; + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]] + // CHECK: [[B_PRIV:%.+]] = load double, double* [[B_PRIV_ADDR]], + // CHECK: store double [[B_PRIV]], double* [[B_BUF_IDX]], + // CHECK: br label %[[LOOP_CONTINUE:.+]] + + // CHECK: [[DISPATCH]]: + // CHECK: br label %[[INPUT_PHASE]] + // CHECK: [[LOOP_CONTINUE]]: + // CHECK: call void @llvm.stackrestore(i8* % + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: call void @__kmpc_barrier( + foo(); +#pragma omp scan inclusive(a[:n], b) + // CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01) + // CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]]) + // CHECK: [[CEIL_LOG2_10_INT:%.+]] = fptoui double [[CEIL_LOG2_10]] to i32 + // CHECK: br label %[[OUTER_BODY:[^,]+]] + // CHECK: [[OUTER_BODY]]: + // CHECK: [[K:%.+]] = phi i32 [ 0, %{{.+}} ], [ [[K_NEXT:%.+]], %{{.+}} ] + // CHECK: [[K2POW:%.+]] = phi i64 [ 1, %{{.+}} ], [ [[K2POW_NEXT:%.+]], %{{.+}} ] + // CHECK: [[CMP:%.+]] = icmp uge i64 9, [[K2POW]] + // CHECK: br i1 [[CMP]], label %[[INNER_BODY:[^,]+]], label %[[INNER_EXIT:[^,]+]] + // CHECK: [[INNER_BODY]]: + // CHECK: [[I:%.+]] = phi i64 [ 9, %[[OUTER_BODY]] ], [ [[I_PREV:%.+]], %{{.+}} ] + + // a_buffer[i] += a_buffer[i-pow(2, k)]; + // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]] + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]] + // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] + // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]] + // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[I]] + // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] + // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[IDX_SUB_K2POW]] + // CHECK: [[A_BUF_END:%.+]] = getelementptr float, float* [[A_BUF_IDX]], i64 [[NUM_ELEMS]] + // CHECK: [[ISEMPTY:%.+]] = icmp eq float* [[A_BUF_IDX]], [[A_BUF_END]] + // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]] + // CHECK: [[RED_BODY]]: + // CHECK: [[A_BUF_IDX_SUB_K2POW_ELEM:%.+]] = phi float* [ [[A_BUF_IDX_SUB_K2POW]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_SUB_K2POW_NEXT:%.+]], %[[RED_BODY]] ] + // CHECK: [[A_BUF_IDX_ELEM:%.+]] = phi float* [ [[A_BUF_IDX]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_NEXT:%.+]], %[[RED_BODY]] ] + // CHECK: [[A_BUF_IDX_VAL:%.+]] = load float, float* [[A_BUF_IDX_ELEM]], + // CHECK: [[A_BUF_IDX_SUB_K2POW_VAL:%.+]] = load float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]], + // CHECK: [[RED:%.+]] = fadd float [[A_BUF_IDX_VAL]], [[A_BUF_IDX_SUB_K2POW_VAL]] + // CHECK: store float [[RED]], float* [[A_BUF_IDX_ELEM]], + // CHECK: [[A_BUF_IDX_NEXT]] = getelementptr float, float* [[A_BUF_IDX_ELEM]], i32 1 + // CHECK: [[A_BUF_IDX_SUB_K2POW_NEXT]] = getelementptr float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]], i32 1 + // CHECK: [[DONE:%.+]] = icmp eq float* [[A_BUF_IDX_NEXT]], [[A_BUF_END]] + // CHECK: br i1 [[DONE]], label %[[RED_DONE]], label %[[RED_BODY]] + // CHECK: [[RED_DONE]]: + + // b_buffer[i] += b_buffer[i-pow(2, k)]; + // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]], + // CHECK: [[B_BUF_IDX_SUB_K2POW_VAL:%.+]] = load double, double* [[B_BUF_IDX_SUB_K2POW]], + // CHECK: [[RED:%.+]] = fadd double [[B_BUF_IDX_VAL]], [[B_BUF_IDX_SUB_K2POW_VAL]] + // CHECK: store double [[RED]], double* [[B_BUF_IDX]], + + // --i; + // CHECK: [[I_PREV:%.+]] = sub nuw i64 [[I]], 1 + // CHECK: [[CMP:%.+]] = icmp uge i64 [[I_PREV]], [[K2POW]] + // CHECK: br i1 [[CMP]], label %[[INNER_BODY]], label %[[INNER_EXIT]] + // CHECK: [[INNER_EXIT]]: + + // ++k; + // CHECK: [[K_NEXT]] = add nuw i32 [[K]], 1 + // k2pow <<= 1; + // CHECK: [[K2POW_NEXT]] = shl nuw i64 [[K2POW]], 1 + // CHECK: [[CMP:%.+]] = icmp ne i32 [[K_NEXT]], [[CEIL_LOG2_10_INT]] + // CHECK: br i1 [[CMP]], label %[[OUTER_BODY]], label %[[OUTER_EXIT:[^,]+]] + // CHECK: [[OUTER_EXIT]]: + bar(); + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call i8* @llvm.stacksave() + // CHECK: store float 0.000000e+00, float* % + // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]], + // CHECK: br label %[[DISPATCH:[^,]+]] + + // Skip the before scan body. + // CHECK: call void @{{.+}}foo{{.+}}() + + // CHECK: [[EXIT_INSCAN:[^,]+]]: + // CHECK: br label %[[LOOP_CONTINUE:[^,]+]] + + // CHECK: [[DISPATCH]]: + // a_priv[[0..n] = a_buffer[i][0..n]; + // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]], + // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 + // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]] + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 + // CHECK: [[DEST:%.+]] = bitcast float* [[A_PRIV]] to i8* + // CHECK: [[SRC:%.+]] = bitcast float* [[A_BUF_IDX]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false) + + // b_priv = b_buffer[i]; + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]] + // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]], + // CHECK: store double [[B_BUF_IDX_VAL]], double* [[B_PRIV_ADDR]], + // CHECK: br label %[[SCAN_PHASE:[^,]+]] + + // CHECK: [[SCAN_PHASE]]: + // CHECK: call void @{{.+}}bar{{.+}}() + // CHECK: br label %[[EXIT_INSCAN]] + + // CHECK: [[LOOP_CONTINUE]]: + // CHECK: call void @llvm.stackrestore(i8* % + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: call void @llvm.stackrestore(i8* + // CHECK: call void @__kmpc_barrier( + } + + // CHECK: call i8* @llvm.stacksave() + // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]] + + // float a_buffer[10][n]; + // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]], + + // double b_buffer[10]; + // CHECK: [[B_BUF:%.+]] = alloca double, i64 10, +#pragma omp for reduction(inscan, +:a[:n], b) + for (int i = 0; i < 10; ++i) { + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call i8* @llvm.stacksave() + // CHECK: store float 0.000000e+00, float* % + // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]], + // CHECK: br label %[[DISPATCH:[^,]+]] + + // Skip the before scan body. + // CHECK: call void @{{.+}}foo{{.+}}() + + // CHECK: [[EXIT_INSCAN:[^,]+]]: + + // a_buffer[i][0..n] = a_priv[[0..n]; + // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]], + // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 + // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]] + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 + // CHECK: [[DEST:%.+]] = bitcast float* [[A_BUF_IDX]] to i8* + // CHECK: [[SRC:%.+]] = bitcast float* [[A_PRIV]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false) + + // b_buffer[i] = b_priv; + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]] + // CHECK: [[B_PRIV:%.+]] = load double, double* [[B_PRIV_ADDR]], + // CHECK: store double [[B_PRIV]], double* [[B_BUF_IDX]], + // CHECK: br label %[[LOOP_CONTINUE:[^,]+]] + + // CHECK: [[DISPATCH]]: + // CHECK: br label %[[INPUT_PHASE:[^,]+]] + + // CHECK: [[INPUT_PHASE]]: + // CHECK: call void @{{.+}}bar{{.+}}() + // CHECK: br label %[[EXIT_INSCAN]] + + // CHECK: [[LOOP_CONTINUE]]: + // CHECK: call void @llvm.stackrestore(i8* % + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: call void @__kmpc_barrier( + foo(); +#pragma omp scan exclusive(a[:n], b) + // CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01) + // CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]]) + // CHECK: [[CEIL_LOG2_10_INT:%.+]] = fptoui double [[CEIL_LOG2_10]] to i32 + // CHECK: br label %[[OUTER_BODY:[^,]+]] + // CHECK: [[OUTER_BODY]]: + // CHECK: [[K:%.+]] = phi i32 [ 0, %{{.+}} ], [ [[K_NEXT:%.+]], %{{.+}} ] + // CHECK: [[K2POW:%.+]] = phi i64 [ 1, %{{.+}} ], [ [[K2POW_NEXT:%.+]], %{{.+}} ] + // CHECK: [[CMP:%.+]] = icmp uge i64 9, [[K2POW]] + // CHECK: br i1 [[CMP]], label %[[INNER_BODY:[^,]+]], label %[[INNER_EXIT:[^,]+]] + // CHECK: [[INNER_BODY]]: + // CHECK: [[I:%.+]] = phi i64 [ 9, %[[OUTER_BODY]] ], [ [[I_PREV:%.+]], %{{.+}} ] + + // a_buffer[i] += a_buffer[i-pow(2, k)]; + // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]] + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]] + // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] + // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]] + // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[I]] + // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] + // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[IDX_SUB_K2POW]] + // CHECK: [[A_BUF_END:%.+]] = getelementptr float, float* [[A_BUF_IDX]], i64 [[NUM_ELEMS]] + // CHECK: [[ISEMPTY:%.+]] = icmp eq float* [[A_BUF_IDX]], [[A_BUF_END]] + // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]] + // CHECK: [[RED_BODY]]: + // CHECK: [[A_BUF_IDX_SUB_K2POW_ELEM:%.+]] = phi float* [ [[A_BUF_IDX_SUB_K2POW]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_SUB_K2POW_NEXT:%.+]], %[[RED_BODY]] ] + // CHECK: [[A_BUF_IDX_ELEM:%.+]] = phi float* [ [[A_BUF_IDX]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_NEXT:%.+]], %[[RED_BODY]] ] + // CHECK: [[A_BUF_IDX_VAL:%.+]] = load float, float* [[A_BUF_IDX_ELEM]], + // CHECK: [[A_BUF_IDX_SUB_K2POW_VAL:%.+]] = load float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]], + // CHECK: [[RED:%.+]] = fadd float [[A_BUF_IDX_VAL]], [[A_BUF_IDX_SUB_K2POW_VAL]] + // CHECK: store float [[RED]], float* [[A_BUF_IDX_ELEM]], + // CHECK: [[A_BUF_IDX_NEXT]] = getelementptr float, float* [[A_BUF_IDX_ELEM]], i32 1 + // CHECK: [[A_BUF_IDX_SUB_K2POW_NEXT]] = getelementptr float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]], i32 1 + // CHECK: [[DONE:%.+]] = icmp eq float* [[A_BUF_IDX_NEXT]], [[A_BUF_END]] + // CHECK: br i1 [[DONE]], label %[[RED_DONE]], label %[[RED_BODY]] + // CHECK: [[RED_DONE]]: + + // b_buffer[i] += b_buffer[i-pow(2, k)]; + // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]], + // CHECK: [[B_BUF_IDX_SUB_K2POW_VAL:%.+]] = load double, double* [[B_BUF_IDX_SUB_K2POW]], + // CHECK: [[RED:%.+]] = fadd double [[B_BUF_IDX_VAL]], [[B_BUF_IDX_SUB_K2POW_VAL]] + // CHECK: store double [[RED]], double* [[B_BUF_IDX]], + + // --i; + // CHECK: [[I_PREV:%.+]] = sub nuw i64 [[I]], 1 + // CHECK: [[CMP:%.+]] = icmp uge i64 [[I_PREV]], [[K2POW]] + // CHECK: br i1 [[CMP]], label %[[INNER_BODY]], label %[[INNER_EXIT]] + // CHECK: [[INNER_EXIT]]: + + // ++k; + // CHECK: [[K_NEXT]] = add nuw i32 [[K]], 1 + // k2pow <<= 1; + // CHECK: [[K2POW_NEXT]] = shl nuw i64 [[K2POW]], 1 + // CHECK: [[CMP:%.+]] = icmp ne i32 [[K_NEXT]], [[CEIL_LOG2_10_INT]] + // CHECK: br i1 [[CMP]], label %[[OUTER_BODY]], label %[[OUTER_EXIT:[^,]+]] + // CHECK: [[OUTER_EXIT]]: + bar(); + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call i8* @llvm.stacksave() + // CHECK: store float 0.000000e+00, float* % + // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]], + // CHECK: br label %[[DISPATCH:[^,]+]] + + // CHECK: [[SCAN_PHASE:.+]]: + // CHECK: call void @{{.+}}foo{{.+}}() + // CHECK: br label %[[LOOP_CONTINUE:.+]] + + // CHECK: [[DISPATCH]]: + // if (i >0) + // a_priv[[0..n] = a_buffer[i-1][0..n]; + // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]], + // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 + // CHECK: [[CMP:%.+]] = icmp eq i64 [[BASE_IDX]], 0 + // CHECK: br i1 [[CMP]], label %[[IF_DONE:[^,]+]], label %[[IF_THEN:[^,]+]] + // CHECK: [[IF_THEN]]: + // CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1 + // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]] + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 + // CHECK: [[DEST:%.+]] = bitcast float* [[A_PRIV]] to i8* + // CHECK: [[SRC:%.+]] = bitcast float* [[A_BUF_IDX]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false) + + // b_priv = b_buffer[i]; + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX_SUB_1]] + // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]], + // CHECK: store double [[B_BUF_IDX_VAL]], double* [[B_PRIV_ADDR]], + // CHECK: br label %[[SCAN_PHASE]] + + // CHECK: [[LOOP_CONTINUE]]: + // CHECK: call void @llvm.stackrestore(i8* % + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: call void @llvm.stackrestore(i8* + // CHECK: call void @__kmpc_barrier( + } +} + +#endif + diff --git a/clang/test/OpenMP/scan_messages.cpp b/clang/test/OpenMP/scan_messages.cpp --- a/clang/test/OpenMP/scan_messages.cpp +++ b/clang/test/OpenMP/scan_messages.cpp @@ -19,32 +19,32 @@ #pragma omp for simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) if (argc) -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} if (argc) { #pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) while (argc) -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} while (argc) { #pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) do -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} while (argc) ; -#pragma omp simd reduction(inscan, +: argc) +#pragma omp simd reduction(inscan, +: argc) // expected-error {{the inscan reduction list item must appear as a list item in an 'inclusive' or 'exclusive' clause on an inner 'omp scan' directive}} for (int i = 0; i < 10; ++i) do { -#pragma omp scan inclusive(argc) +#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } while (argc); #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) switch (argc) -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} switch (argc) case 1: #pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} @@ -52,21 +52,21 @@ case 1: { #pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } -#pragma omp simd reduction(inscan, +: argc) +#pragma omp simd reduction(inscan, +: argc) // expected-error {{the inscan reduction list item must appear as a list item in an 'inclusive' or 'exclusive' clause on an inner 'omp scan' directive}} for (int i = 0; i < 10; ++i) switch (argc) { -#pragma omp scan exclusive(argc) // expected-note 2 {{previous 'scan' directive used here}} +#pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} case 1: -#pragma omp scan exclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}} +#pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} break; default: { -#pragma omp scan exclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}} +#pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } break; } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) for (;;) -#pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} for (;;) { #pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } @@ -77,8 +77,10 @@ } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) { +#pragma omp scan inclusive(argc) // expected-note {{previous 'scan' directive used here}} +#pragma omp scan inclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}} label1 : { -#pragma omp scan inclusive(argc) +#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} }} return T(); @@ -109,32 +111,32 @@ #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) if (argc) -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} if (argc) { #pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} expected-error {{the list item must appear in 'reduction' clause with the 'inscan' modifier of the parent directive}} } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) while (argc) -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} while (argc) { #pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} expected-error {{the list item must appear in 'reduction' clause with the 'inscan' modifier of the parent directive}} } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) do -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} while (argc) ; #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) do { -#pragma omp scan exclusive(argc) +#pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } while (argc); #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) switch (argc) -#pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} switch (argc) case 1: #pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} expected-error {{the list item must appear in 'reduction' clause with the 'inscan' modifier of the parent directive}} @@ -145,18 +147,18 @@ #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) switch (argc) { -#pragma omp scan inclusive(argc) // expected-note 2 {{previous 'scan' directive used here}} +#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} case 1: -#pragma omp scan inclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}} +#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} break; default: { -#pragma omp scan inclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}} +#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } break; } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) for (;;) -#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} +#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} for (;;) { #pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} expected-error {{the list item must appear in 'reduction' clause with the 'inscan' modifier of the parent directive}} } @@ -167,10 +169,12 @@ } #pragma omp simd reduction(inscan, +: argc) for (int i = 0; i < 10; ++i) { +#pragma omp scan inclusive(argc) // expected-note {{previous 'scan' directive used here}} +#pragma omp scan inclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}} label1 : { -#pragma omp scan inclusive(argc) +#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} } } - return tmain(); + return tmain(); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2376,6 +2376,17 @@ for (auto *E : C->reduction_ops()) { Visitor->AddStmt(E); } + if (C->getModifier() == clang::OMPC_REDUCTION_inscan) { + for (auto *E : C->copy_ops()) { + Visitor->AddStmt(E); + } + for (auto *E : C->copy_array_temps()) { + Visitor->AddStmt(E); + } + for (auto *E : C->copy_array_elems()) { + Visitor->AddStmt(E); + } + } } void OMPClauseEnqueue::VisitOMPTaskReductionClause( const OMPTaskReductionClause *C) {