Index: flang/include/flang/Lower/AbstractConverter.h =================================================================== --- flang/include/flang/Lower/AbstractConverter.h +++ flang/include/flang/Lower/AbstractConverter.h @@ -18,6 +18,7 @@ #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Semantics/symbol.h" #include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/Operation.h" #include "llvm/ADT/ArrayRef.h" namespace fir { @@ -60,6 +61,7 @@ using SomeExpr = Fortran::evaluate::Expr; using SymbolRef = Fortran::common::Reference; class StatementContext; +using namespace mlir; //===----------------------------------------------------------------------===// // AbstractConverter interface @@ -101,7 +103,8 @@ virtual bool createHostAssociateVarClone(const Fortran::semantics::Symbol &sym) = 0; - virtual void copyHostAssociateVar(const Fortran::semantics::Symbol &sym) = 0; + virtual void copyHostAssociateVar(const Fortran::semantics::Symbol &sym, + Block *lastPrivBlock = nullptr) = 0; /// Collect the set of ultimate symbols of symbols with \p flag in \p eval /// region if \p isUltimateSymbol is true. Otherwise, collect the set of Index: flang/lib/Lower/Bridge.cpp =================================================================== --- flang/lib/Lower/Bridge.cpp +++ flang/lib/Lower/Bridge.cpp @@ -44,6 +44,7 @@ #include "flang/Runtime/iostat.h" #include "flang/Semantics/tools.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Parser/Parser.h" #include "mlir/Transforms/RegionUtils.h" @@ -492,7 +493,8 @@ } void - copyHostAssociateVar(const Fortran::semantics::Symbol &sym) override final { + copyHostAssociateVar(const Fortran::semantics::Symbol &sym, + mlir::Block *lastPrivBlock = nullptr) override final { // 1) Fetch the original copy of the variable. assert(sym.has() && "No host-association found"); @@ -509,21 +511,41 @@ fir::ExtendedValue exv = getExtendedValue(sb); // 3) Perform the assignment. - builder->setInsertionPointAfter(fir::getBase(exv).getDefiningOp()); + auto insPt = builder->saveInsertionPoint(); + if (lastPrivBlock) { + builder->setInsertionPointToStart(lastPrivBlock); + } else { + builder->setInsertionPointAfter(fir::getBase(exv).getDefiningOp()); + } + + fir::ExtendedValue lhs, rhs; + if (lastPrivBlock) { + // lastprivate case + lhs = hexv; + rhs = exv; + } else { + lhs = exv; + rhs = hexv; + } + mlir::Location loc = genLocation(sym.name()); mlir::Type symType = genType(sym); if (auto seqTy = symType.dyn_cast()) { Fortran::lower::StatementContext stmtCtx; - Fortran::lower::createSomeArrayAssignment(*this, exv, hexv, localSymbols, + Fortran::lower::createSomeArrayAssignment(*this, lhs, rhs, localSymbols, stmtCtx); stmtCtx.finalize(); } else if (hexv.getBoxOf()) { - fir::factory::CharacterExprHelper{*builder, loc}.createAssign(exv, hexv); + fir::factory::CharacterExprHelper{*builder, loc}.createAssign(lhs, rhs); } else if (hexv.getBoxOf()) { TODO(loc, "firstprivatisation of allocatable variables"); } else { - auto loadVal = builder->create(loc, fir::getBase(hexv)); - builder->create(loc, loadVal, fir::getBase(exv)); + auto loadVal = builder->create(loc, fir::getBase(rhs)); + builder->create(loc, loadVal, fir::getBase(lhs)); + } + + if (lastPrivBlock) { + builder->restoreInsertionPoint(insPt); } } Index: flang/lib/Lower/OpenMP.cpp =================================================================== --- flang/lib/Lower/OpenMP.cpp +++ flang/lib/Lower/OpenMP.cpp @@ -22,9 +22,11 @@ #include "flang/Parser/parse-tree.h" #include "flang/Semantics/tools.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/Dialect/SCF/IR/SCF.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" using namespace mlir; +using namespace mlir::scf; int64_t Fortran::lower::getCollapseValue( const Fortran::parser::OmpClauseList &clauseList) { @@ -61,7 +63,8 @@ template static void createPrivateVarSyms(Fortran::lower::AbstractConverter &converter, - const T *clause) { + const T *clause, + Block *lastPrivBlock = nullptr) { const Fortran::parser::OmpObjectList &ompObjectList = clause->v; for (const Fortran::parser::OmpObject &ompObject : ompObjectList.v) { Fortran::semantics::Symbol *sym = getOmpObjectSymbol(ompObject); @@ -74,16 +77,69 @@ assert(success && "Privatization failed due to existing binding"); if constexpr (std::is_same_v) { converter.copyHostAssociateVar(*sym); + } else if constexpr (std::is_same_v< + T, Fortran::parser::OmpClause::Lastprivate>) { + converter.copyHostAssociateVar(*sym, lastPrivBlock); } } } -static void privatizeVars(Fortran::lower::AbstractConverter &converter, +// Introduce a control-flow skeleton to prepare the code for Lastprivatization. +// Return the Block in which the load and store for the Lastprivate var will go. +static Block * +constructLastPrivLoop(omp::WsLoopOp *op, + Fortran::lower::AbstractConverter &converter) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + auto insPt = firOpBuilder.saveInsertionPoint(); + + // Our goal in this function is to introduce the following control flow + // just before exiting the worksharing loop. + // Say our wsloop is as follows: + // + // omp.wsloop { + // ... + // store + // omp.yield + // } + // + // We want to convert it to the following: + // + // omp.wsloop { + // ... + // store + // %cmp = llvm.icmp "eq" %iv %ub + // scf.if %cmp { + // ^%lpv_update_blk: + // } + // omp.yield + // } + + Operation *lastOper; + for (auto &i : op->region().getOps()) { + lastOper = &i; + } + + firOpBuilder.setInsertionPoint(lastOper); + + auto cmpOp = firOpBuilder.create( + op->getLoc(), LLVM::ICmpPredicate::eq, + op->getRegion().front().getArguments()[0], op->upperBound()[0]); + scf::IfOp ifOp = + firOpBuilder.create(op->getLoc(), cmpOp, /*else*/ false); + + firOpBuilder.restoreInsertionPoint(insPt); + return &(ifOp.getThenRegion().front()); +} + +template +static bool privatizeVars(Op &op, Fortran::lower::AbstractConverter &converter, const Fortran::parser::OmpClauseList &opClauseList) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); auto insPt = firOpBuilder.saveInsertionPoint(); firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); bool hasFirstPrivateOp = false; + bool hasLastPrivateOp = false; + Block *lastPrivBlock = nullptr; for (const Fortran::parser::OmpClause &clause : opClauseList.v) { if (const auto &privateClause = std::get_if(&clause.u)) { @@ -93,11 +149,22 @@ &clause.u)) { createPrivateVarSyms(converter, firstPrivateClause); hasFirstPrivateOp = true; + } else if (const auto &lastPrivateClause = + std::get_if( + &clause.u)) { + // TODO: Add lastprivate support for sections construct, simd construct + hasLastPrivateOp = true; + if (std::is_same_v) { + lastPrivBlock = + constructLastPrivLoop(dyn_cast(&op), converter); + createPrivateVarSyms(converter, lastPrivateClause, lastPrivBlock); + } } } if (hasFirstPrivateOp) firOpBuilder.create(converter.getCurrentLocation()); firOpBuilder.restoreInsertionPoint(insPt); + return hasLastPrivateOp; } /// The COMMON block is a global structure. \p commonValue is the base address @@ -300,6 +367,14 @@ } } +void resetBeforeTerminator(fir::FirOpBuilder &firOpBuilder, + mlir::Operation *storeOp, mlir::Block &block) { + if (storeOp) + firOpBuilder.setInsertionPointAfter(storeOp); + else + firOpBuilder.setInsertionPointToStart(&block); +} + /// Create the body (block) for an OpenMP Operation. /// /// \param [in] op - the operation the body belongs to. @@ -374,14 +449,17 @@ } // Reset the insert point to before the terminator. - if (storeOp) - firOpBuilder.setInsertionPointAfter(storeOp); - else - firOpBuilder.setInsertionPointToStart(&block); + resetBeforeTerminator(firOpBuilder, storeOp, block); // Handle privatization. Do not privatize if this is the outer operation. - if (clauses && !outerCombined) - privatizeVars(converter, *clauses); + if (clauses && !outerCombined) { + bool lastPrivateOp = privatizeVars(op, converter, *clauses); + // LastPrivatization, due to introduction of + // new control flow, changes the insertion point, + // thus restore it. + if (lastPrivateOp) + resetBeforeTerminator(firOpBuilder, storeOp, block); + } if constexpr (std::is_same_v) { threadPrivatizeVars(converter, eval); Index: flang/test/Lower/OpenMP/omp-parallel-lastprivate-clause-scalar.f90 =================================================================== --- /dev/null +++ flang/test/Lower/OpenMP/omp-parallel-lastprivate-clause-scalar.f90 @@ -0,0 +1,84 @@ +! This test checks lowering of `FIRSTPRIVATE` clause for scalar types. + +! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s --check-prefix=FIRDialect + +!FIRDialect: func @_QPlastprivate_character(%[[ARG1:.*]]: !fir.boxchar<1>{{.*}}) { +!FIRDialect-DAG: %[[ARG1_UNBOX:.*]]:2 = fir.unboxchar +!FIRDialect-DAG: %[[FIVE:.*]] = arith.constant 5 : index + +!FIRDialect: omp.parallel { +!FIRDialect-DAG: %[[ARG1_PVT:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "arg1", + +! Check that we are accessing the clone inside the loop +!FIRDialect-DAG: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!FIRDialect-DAG: %[[NEG_ONE:.*]] = arith.constant -1 : i32 +!FIRDialect-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQcl. +!FIRDialect-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] +!FIRDialect-NEXT: %[[CNST:.*]] = arith.constant +!FIRDialect-NEXT: %[[CALL_BEGIN_IO:.*]] = fir.call @_FortranAioBeginExternalListOutput(%[[NEG_ONE]], %[[CVT0]], %[[CNST]]) : (i32, !fir.ref, i32) -> !fir.ref +!FIRDialect-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT]] +!FIRDialect-NEXT: %[[CVT_0_2:.*]] = fir.convert %[[FIVE]] +!FIRDialect-NEXT: %[[CALL_OP_ASCII:.*]] = fir.call @_FortranAioOutputAscii(%[[CALL_BEGIN_IO]], %[[CVT_0_1]], %[[CVT_0_2]]) +!FIRDialect-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]]) + +! Testing last iteration check +!FIRDialect-NEXT: %[[IV_CMP:.*]] = llvm.icmp "eq" %[[INDX_WS]] +!FIRDialect: scf.if %[[IV_CMP]] { + +! Testing lastprivate val update +!FIRDialect-DAG: %[[CVT:.*]] = fir.convert %[[ARG1_UNBOX]]#0 : (!fir.ref>) -> !fir.ref +!FIRDialect-DAG: %[[CVT1:.*]] = fir.convert %[[ARG1_PVT]] : (!fir.ref>) -> !fir.ref +!FIRDialect-DAG: fir.call @llvm.memmove.p0.p0.i64(%[[CVT]], %[[CVT1]]{{.*}}) +!FIRDialect: %[[THIRTY_TWO:.*]] = arith.constant 32 : i8 +!FIRDialect-DAG: %[[UNDEF:.*]] = fir.undefined !fir.char<1> +!FIRDialect-DAG: %[[INSERT:.*]] = fir.insert_value %[[UNDEF]], %[[THIRTY_TWO]], [0 : index] : (!fir.char<1>, i8) -> !fir.char<1> +!FIRDialect-DAG: %[[ONE_3:.*]] = arith.constant 1 : index +!FIRDialect: fir.do_loop %[[ARG2:.*]] = {{.*}} { +!FIRDialect-DAG: %[[CVT_2:.*]] = fir.convert %[[ARG1_UNBOX]]#0 : (!fir.ref>) -> !fir.ref>> +!FIRDialect-DAG: %[[COORD:.*]] = fir.coordinate_of %[[CVT_2]], %[[ARG2]] : (!fir.ref>>, index) -> !fir.ref> +!FIRDialect-DAG: fir.store %[[INSERT]] to %[[COORD]] : !fir.ref> +!FIRDialect-DAG: } +!FIRDialect-DAG: } +!FIRDialect-DAG: omp.yield + +subroutine lastprivate_character(arg1) + character(5) :: arg1 +!$OMP PARALLEL +!$OMP DO LASTPRIVATE(arg1) +do n = 1, 5 + arg1(n:n) = 'c' + print *, arg1 +end do +!$OMP END DO +!$OMP END PARALLEL +end subroutine + +!FIRDialect: func @_QPlastprivate_int(%[[ARG1:.*]]: !fir.ref {fir.bindc_name = "arg1"}) { +!FIRDialect-DAG: omp.parallel { +!FIRDialect-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1" +!FIRDialect: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { + +! Testing last iteration check +!FIRDialect-DAG: %[[IV_CMP:.*]] = llvm.icmp "eq" %[[INDX_WS]] +!FIRDialect-DAG: scf.if %[[IV_CMP]] { + +! Testing lastprivate val update +!FIRDialect-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE]] : !fir.ref +!FIRDialect-NEXT: fir.store %[[CLONE_LD]] to %[[ARG1]] : !fir.ref +!FIRDialect-DAG: } +!FIRDialect-DAG: omp.yield + +subroutine lastprivate_int(arg1) + integer :: arg1 +!$OMP PARALLEL +!$OMP DO LASTPRIVATE(arg1) +do n = 1, 5 + arg1 = 2 + print *, arg1 +end do +!$OMP END DO +!$OMP END PARALLEL +print *, arg1 +end subroutine + +