diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -18,6 +18,7 @@ #include "flang/Lower/PFTDefs.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Semantics/symbol.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Operation.h" #include "llvm/ADT/ArrayRef.h" @@ -103,8 +104,9 @@ virtual bool createHostAssociateVarClone(const Fortran::semantics::Symbol &sym) = 0; - virtual void copyHostAssociateVar(const Fortran::semantics::Symbol &sym, - mlir::Block *lastPrivBlock = nullptr) = 0; + virtual void copyHostAssociateVar( + const Fortran::semantics::Symbol &sym, + mlir::OpBuilder::InsertPoint *copyInsertionPoint = nullptr) = 0; /// Collect the set of symbols with \p flag in \p eval /// region if \p collectSymbols is true. Likewise, collect the diff --git a/flang/include/flang/Lower/PFTBuilder.h b/flang/include/flang/Lower/PFTBuilder.h --- a/flang/include/flang/Lower/PFTBuilder.h +++ b/flang/include/flang/Lower/PFTBuilder.h @@ -271,6 +271,7 @@ }}); } + bool operator==(const Evaluation &that) const { return this == &that; } LLVM_DUMP_METHOD void dump() const; /// Return the first non-nop successor of an evaluation, possibly exiting diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -517,9 +517,9 @@ } // FIXME: Generalize this function, so that lastPrivBlock can be removed - void - copyHostAssociateVar(const Fortran::semantics::Symbol &sym, - mlir::Block *lastPrivBlock = nullptr) override final { + void copyHostAssociateVar(const Fortran::semantics::Symbol &sym, + mlir::OpBuilder::InsertPoint *copyInsertionPoint = + nullptr) override final { // 1) Fetch the original copy of the variable. assert(sym.has() && "No host-association found"); @@ -537,13 +537,13 @@ // 3) Perform the assignment. mlir::OpBuilder::InsertPoint insPt = builder->saveInsertionPoint(); - if (lastPrivBlock) - builder->setInsertionPointToStart(lastPrivBlock); + if (copyInsertionPoint) + builder->restoreInsertionPoint(*copyInsertionPoint); else builder->setInsertionPointAfter(fir::getBase(exv).getDefiningOp()); fir::ExtendedValue lhs, rhs; - if (lastPrivBlock) { + if (copyInsertionPoint) { // lastprivate case lhs = hexv; rhs = exv; @@ -568,7 +568,7 @@ builder->create(loc, loadVal, fir::getBase(lhs)); } - if (lastPrivBlock) + if (copyInsertionPoint) builder->restoreInsertionPoint(insPt); } diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -61,10 +61,10 @@ return sym; } -static void -privatizeSymbol(Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol *sym, - [[maybe_unused]] mlir::Block *lastPrivBlock = nullptr) { +static void privatizeSymbol(Fortran::lower::AbstractConverter &converter, + const Fortran::semantics::Symbol *sym, + [[maybe_unused]] mlir::OpBuilder::InsertPoint + *lastPrivBlockInsertionPoint = nullptr) { // Privatization for symbols which are pre-determined (like loop index // variables) happen separately, for everything else privatize here. if (sym->test(Fortran::semantics::Symbol::Flag::OmpPreDetermined)) @@ -75,7 +75,97 @@ if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) converter.copyHostAssociateVar(*sym); if (sym->test(Fortran::semantics::Symbol::Flag::OmpLastPrivate)) - converter.copyHostAssociateVar(*sym, lastPrivBlock); + converter.copyHostAssociateVar(*sym, lastPrivBlockInsertionPoint); +} + +static void +privatizeSymbolInSectionOp(Fortran::lower::AbstractConverter &converter, + const Fortran::semantics::Symbol *sym, + Fortran::lower::pft::Evaluation &eval, + omp::SectionOp *sectionOp) { + // Privatization for symbols which are pre-determined (like loop index + // variables) happen separately, for everything else privatize here. + if (sym->test(Fortran::semantics::Symbol::Flag::OmpPreDetermined)) + return; + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + bool success = converter.createHostAssociateVarClone(*sym); + (void)success; + assert(success && "Privatization failed due to existing binding"); + if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) + converter.copyHostAssociateVar(*sym); + if (sym->test(Fortran::semantics::Symbol::Flag::OmpLastPrivate) && + eval == eval.parentConstruct->getLastNestedEvaluation()) { + // `lastprivate` symbols are added only to the last + // `omp.section` operation in a `omp.sections` operation. + // A `lastprivate` variable's privatization shall be split into two parts: + // omp.section { + // fir.allocate + // + // fir.store to original variable + // } + auto sectionOpOperationsIter = sectionOp->region().back().rbegin(); + auto &targetOper = *(++sectionOpOperationsIter); + firOpBuilder.setInsertionPoint(sectionOp->region().back().getTerminator()); + mlir::OpBuilder::InsertPoint insertPoint = + firOpBuilder.saveInsertionPoint(); + converter.copyHostAssociateVar(*sym, &insertPoint); + firOpBuilder.setInsertionPointAfter(&targetOper); + } +} + +static bool privatizeVarsInSectionConstruct( + mlir::omp::SectionOp &op, Fortran::lower::AbstractConverter &converter, + const Fortran::parser::OmpClauseList &opClauseList, + Fortran::lower::pft::Evaluation &eval) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + // Symbols in private, firstprivate, and/or lastprivate clauses. + llvm::SetVector privatizedSymbols; + auto collectOmpObjectListSymbol = + [&](const Fortran::parser::OmpObjectList &ompObjectList, + llvm::SetVector &symbolSet) { + for (const Fortran::parser::OmpObject &ompObject : ompObjectList.v) { + Fortran::semantics::Symbol *sym = getOmpObjectSymbol(ompObject); + symbolSet.insert(sym); + } + }; + + bool hasLastPrivateOp = false; + for (const Fortran::parser::OmpClause &clause : opClauseList.v) { + if (const auto &privateClause = + std::get_if(&clause.u)) { + collectOmpObjectListSymbol(privateClause->v, privatizedSymbols); + } else if (const auto &firstPrivateClause = + std::get_if( + &clause.u)) { + collectOmpObjectListSymbol(firstPrivateClause->v, privatizedSymbols); + } else if (const auto &lastPrivateClause = + std::get_if( + &clause.u)) { + collectOmpObjectListSymbol(lastPrivateClause->v, privatizedSymbols); + hasLastPrivateOp = true; + } + } + + firOpBuilder.setInsertionPointToStart(&op.getRegion().back()); + bool needBarrier = false; + for (auto sym : privatizedSymbols) { + privatizeSymbolInSectionOp(converter, sym, eval, + dyn_cast(&op)); + if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate) && + sym->test(Fortran::semantics::Symbol::Flag::OmpLastPrivate)) + needBarrier = true; + } + + // Emit implicit barrier to synchronize threads and avoid data races on + // initialization of firstprivate variables and post-update of lastprivate + // variables. + // FIXME: Emit barrier for lastprivate clause when 'sections' directive has + // 'nowait' clause. Otherwise, emit barrier when 'sections' directive has + // both firstprivate and lastprivate clause. + // Emit implicit barrier for linear clause. Maybe on somewhere else. + if (needBarrier) + firOpBuilder.create(converter.getCurrentLocation()); + return hasLastPrivateOp; } template @@ -96,7 +186,7 @@ }; // We need just one ICmpOp for multiple LastPrivate clauses. mlir::arith::CmpIOp cmpOp; - mlir::Block *lastPrivBlock = nullptr; + mlir::OpBuilder::InsertPoint lastPrivBlockInsertionPoint; bool hasLastPrivateOp = false; for (const Fortran::parser::OmpClause &clause : opClauseList.v) { if (const auto &privateClause = @@ -109,7 +199,7 @@ } else if (const auto &lastPrivateClause = std::get_if( &clause.u)) { - // TODO: Add lastprivate support for sections construct, simd construct + // TODO: Add lastprivate support for simd construct if (std::is_same_v) { omp::WsLoopOp *wsLoopOp = dyn_cast(&op); mlir::Operation *lastOper = wsLoopOp->region().back().getTerminator(); @@ -154,11 +244,14 @@ } mlir::scf::IfOp ifOp = firOpBuilder.create( wsLoopOp->getLoc(), cmpOp, /*else*/ false); - lastPrivBlock = &ifOp.getThenRegion().front(); - } else { + firOpBuilder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + lastPrivBlockInsertionPoint = firOpBuilder.saveInsertionPoint(); + } + // `lastprivate` variables in `omp.section` operation + // are handled elsewhere + else if (!std::is_same_v) TODO(converter.getCurrentLocation(), "lastprivate clause in constructs other than worksharing-loop"); - } collectOmpObjectListSymbol(lastPrivateClause->v, privatizedSymbols); hasLastPrivateOp = true; } @@ -201,12 +294,9 @@ } bool needBarrier = false; - if (mlir::isa(op)) - firOpBuilder.setInsertionPointToStart(&op.getRegion().back()); - else - firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); + firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); for (auto sym : privatizedSymbols) { - privatizeSymbol(converter, sym, lastPrivBlock); + privatizeSymbol(converter, sym, &lastPrivBlockInsertionPoint); if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate) && sym->test(Fortran::semantics::Symbol::Flag::OmpLastPrivate)) needBarrier = true; @@ -227,7 +317,6 @@ // Emit implicit barrier for linear clause. Maybe on somewhere else. if (needBarrier) firOpBuilder.create(converter.getCurrentLocation()); - firOpBuilder.restoreInsertionPoint(insPt); return hasLastPrivateOp; } @@ -518,13 +607,19 @@ // Handle privatization. Do not privatize if this is the outer operation. if (clauses && !outerCombined) { - bool lastPrivateOp = privatizeVars(op, converter, *clauses, eval); - // LastPrivatization, due to introduction of - // new control flow, changes the insertion point, - // thus restore it. - // TODO: Clean up later a bit to avoid this many sets and resets. - if (lastPrivateOp) - resetBeforeTerminator(firOpBuilder, storeOp, block); + if constexpr (std::is_same_v) + // Lastprivate variables in `omp.section` operations require + // a different control-flow. Hence, privatize them separately. + privatizeVarsInSectionConstruct(op, converter, *clauses, eval); + else { + bool lastPrivateOp = privatizeVars(op, converter, *clauses, eval); + // LastPrivatization, due to introduction of + // new control flow, changes the insertion point, + // thus restore it. + // TODO: Clean up later a bit to avoid this many sets and resets. + if (lastPrivateOp) + resetBeforeTerminator(firOpBuilder, storeOp, block); + } } if constexpr (std::is_same_v) { diff --git a/flang/test/Lower/OpenMP/sections.f90 b/flang/test/Lower/OpenMP/sections.f90 --- a/flang/test/Lower/OpenMP/sections.f90 +++ b/flang/test/Lower/OpenMP/sections.f90 @@ -108,3 +108,69 @@ alpha = alpha * 5 !$omp end sections end subroutine + +subroutine lastprivate() + integer :: x +!CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlastprivateEx"} +!CHECK: omp.sections { + !$omp sections lastprivate(x) +!CHECK: omp.section { +!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"} +!CHECK: %[[const:.*]] = arith.constant 10 : i32 +!CHECK: %[[temp:.*]] = fir.load %1 : !fir.ref +!CHECK: %[[result:.*]] = arith.muli %c10_i32, %2 : i32 +!CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref +!CHECK: omp.terminator +!CHECK: } + !$omp section + x = x * 10 +!CHECK: omp.section { +!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"} +!CHECK: %[[temp:.*]] = fir.load %1 : !fir.ref +!CHECK: %[[const:.*]] = arith.constant 1 : i32 +!CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32 +!CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref +!CHECK: %[[temp:.*]] = fir.load %1 : !fir.ref +!CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref +!CHECK: omp.terminator +!CHECK: } + !$omp section + x = x + 1 +!CHECK: omp.terminator +!CHECK: } + !$omp end sections + +!CHECK: omp.sections { + !$omp sections firstprivate(x) lastprivate(x) +!CHECK: omp.section { +!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"} +!CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref +!CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref +!CHECK: omp.barrier +!CHECK: %[[const:.*]] = arith.constant 10 : i32 +!CHECK: %[[temp:.*]] = fir.load %1 : !fir.ref +!CHECK: %[[result:.*]] = arith.muli %c10_i32, %3 : i32 +!CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref +!CHECK: omp.terminator +!CHECK: } + !$omp section + x = x * 10 +!CHECK: omp.section { +!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"} +!CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref +!CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref +!CHECK: omp.barrier +!CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref +!CHECK: %[[const:.*]] = arith.constant 1 : i32 +!CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32 +!CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref +!CHECK: %[[temp:.*]] = fir.load %1 : !fir.ref +!CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref +!CHECK: omp.terminator +!CHECK: } + !$omp section + x = x + 1 +!CHECK: omp.terminator +!CHECK: } + !$omp end sections +end subroutine