diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -18,6 +18,7 @@ #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Semantics/symbol.h" #include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/Operation.h" #include "llvm/ADT/ArrayRef.h" namespace fir { @@ -101,7 +102,8 @@ virtual bool createHostAssociateVarClone(const Fortran::semantics::Symbol &sym) = 0; - virtual void copyHostAssociateVar(const Fortran::semantics::Symbol &sym) = 0; + virtual void copyHostAssociateVar(const Fortran::semantics::Symbol &sym, + mlir::Block *lastPrivBlock = nullptr) = 0; /// Collect the set of ultimate symbols of symbols with \p flag in \p eval /// region if \p isUltimateSymbol is true. Otherwise, collect the set of diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -44,6 +44,7 @@ #include "flang/Runtime/iostat.h" #include "flang/Semantics/tools.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Parser/Parser.h" #include "mlir/Transforms/RegionUtils.h" @@ -491,8 +492,10 @@ return bindIfNewSymbol(sym, exv); } + // FIXME: Generalize this function, so that lastPrivBlock can be removed void - copyHostAssociateVar(const Fortran::semantics::Symbol &sym) override final { + copyHostAssociateVar(const Fortran::semantics::Symbol &sym, + mlir::Block *lastPrivBlock = nullptr) override final { // 1) Fetch the original copy of the variable. assert(sym.has() && "No host-association found"); @@ -509,22 +512,40 @@ fir::ExtendedValue exv = getExtendedValue(sb); // 3) Perform the assignment. - builder->setInsertionPointAfter(fir::getBase(exv).getDefiningOp()); + mlir::OpBuilder::InsertPoint insPt = builder->saveInsertionPoint(); + if (lastPrivBlock) + builder->setInsertionPointToStart(lastPrivBlock); + else + builder->setInsertionPointAfter(fir::getBase(exv).getDefiningOp()); + + fir::ExtendedValue lhs, rhs; + if (lastPrivBlock) { + // lastprivate case + lhs = hexv; + rhs = exv; + } else { + lhs = exv; + rhs = hexv; + } + mlir::Location loc = genLocation(sym.name()); mlir::Type symType = genType(sym); if (auto seqTy = symType.dyn_cast()) { Fortran::lower::StatementContext stmtCtx; - Fortran::lower::createSomeArrayAssignment(*this, exv, hexv, localSymbols, + Fortran::lower::createSomeArrayAssignment(*this, lhs, rhs, localSymbols, stmtCtx); stmtCtx.finalize(); } else if (hexv.getBoxOf()) { - fir::factory::CharacterExprHelper{*builder, loc}.createAssign(exv, hexv); + fir::factory::CharacterExprHelper{*builder, loc}.createAssign(lhs, rhs); } else if (hexv.getBoxOf()) { TODO(loc, "firstprivatisation of allocatable variables"); } else { - auto loadVal = builder->create(loc, fir::getBase(hexv)); - builder->create(loc, loadVal, fir::getBase(exv)); + auto loadVal = builder->create(loc, fir::getBase(rhs)); + builder->create(loc, loadVal, fir::getBase(lhs)); } + + if (lastPrivBlock) + builder->restoreInsertionPoint(insPt); } //===--------------------------------------------------------------------===// diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -22,6 +22,7 @@ #include "flang/Parser/parse-tree.h" #include "flang/Semantics/tools.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/Dialect/SCF/IR/SCF.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" using namespace mlir; @@ -61,7 +62,8 @@ template static void createPrivateVarSyms(Fortran::lower::AbstractConverter &converter, - const T *clause) { + const T *clause, + Block *lastPrivBlock = nullptr) { const Fortran::parser::OmpObjectList &ompObjectList = clause->v; for (const Fortran::parser::OmpObject &ompObject : ompObjectList.v) { Fortran::semantics::Symbol *sym = getOmpObjectSymbol(ompObject); @@ -74,16 +76,25 @@ assert(success && "Privatization failed due to existing binding"); if constexpr (std::is_same_v) { converter.copyHostAssociateVar(*sym); + } else if constexpr (std::is_same_v< + T, Fortran::parser::OmpClause::Lastprivate>) { + converter.copyHostAssociateVar(*sym, lastPrivBlock); } } } -static void privatizeVars(Fortran::lower::AbstractConverter &converter, +template +static bool privatizeVars(Op &op, Fortran::lower::AbstractConverter &converter, const Fortran::parser::OmpClauseList &opClauseList) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); auto insPt = firOpBuilder.saveInsertionPoint(); firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); bool hasFirstPrivateOp = false; + bool hasLastPrivateOp = false; + Block *lastPrivBlock = nullptr; + // We need just one ICmpOp for multiple LastPrivate clauses. + mlir::arith::CmpIOp cmpOp; + for (const Fortran::parser::OmpClause &clause : opClauseList.v) { if (const auto &privateClause = std::get_if(&clause.u)) { @@ -93,11 +104,73 @@ &clause.u)) { createPrivateVarSyms(converter, firstPrivateClause); hasFirstPrivateOp = true; + } else if (const auto &lastPrivateClause = + std::get_if( + &clause.u)) { + // TODO: Add lastprivate support for sections construct, simd construct + if (std::is_same_v) { + omp::WsLoopOp *wsLoopOp = dyn_cast(&op); + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + auto insPt = firOpBuilder.saveInsertionPoint(); + + // Our goal here is to introduce the following control flow + // just before exiting the worksharing loop. + // Say our wsloop is as follows: + // + // omp.wsloop { + // ... + // store + // omp.yield + // } + // + // We want to convert it to the following: + // + // omp.wsloop { + // ... + // store + // %cmp = llvm.icmp "eq" %iv %ub + // scf.if %cmp { + // ^%lpv_update_blk: + // } + // omp.yield + // } + + Operation *lastOper = wsLoopOp->region().back().getTerminator(); + + firOpBuilder.setInsertionPoint(lastOper); + + // TODO: The following will not work when there is collapse present. + // Have to modify this in future. + for (const Fortran::parser::OmpClause &clause : opClauseList.v) + if (const auto &collapseClause = + std::get_if(&clause.u)) + TODO(converter.getCurrentLocation(), + "Collapse clause with lastprivate"); + // Only generate the compare once in presence of multiple LastPrivate + // clauses + if (!hasLastPrivateOp) { + cmpOp = firOpBuilder.create( + wsLoopOp->getLoc(), mlir::arith::CmpIPredicate::eq, + wsLoopOp->getRegion().front().getArguments()[0], + wsLoopOp->upperBound()[0]); + } + mlir::scf::IfOp ifOp = firOpBuilder.create( + wsLoopOp->getLoc(), cmpOp, /*else*/ false); + + firOpBuilder.restoreInsertionPoint(insPt); + createPrivateVarSyms(converter, lastPrivateClause, + &(ifOp.getThenRegion().front())); + } else { + TODO(converter.getCurrentLocation(), + "lastprivate clause in constructs other than work-share loop"); + } + hasLastPrivateOp = true; } } if (hasFirstPrivateOp) firOpBuilder.create(converter.getCurrentLocation()); firOpBuilder.restoreInsertionPoint(insPt); + return hasLastPrivateOp; } /// The COMMON block is a global structure. \p commonValue is the base address @@ -300,6 +373,14 @@ } } +void resetBeforeTerminator(fir::FirOpBuilder &firOpBuilder, + mlir::Operation *storeOp, mlir::Block &block) { + if (storeOp) + firOpBuilder.setInsertionPointAfter(storeOp); + else + firOpBuilder.setInsertionPointToStart(&block); +} + /// Create the body (block) for an OpenMP Operation. /// /// \param [in] op - the operation the body belongs to. @@ -374,14 +455,18 @@ } // Reset the insert point to before the terminator. - if (storeOp) - firOpBuilder.setInsertionPointAfter(storeOp); - else - firOpBuilder.setInsertionPointToStart(&block); + resetBeforeTerminator(firOpBuilder, storeOp, block); // Handle privatization. Do not privatize if this is the outer operation. - if (clauses && !outerCombined) - privatizeVars(converter, *clauses); + if (clauses && !outerCombined) { + bool lastPrivateOp = privatizeVars(op, converter, *clauses); + // LastPrivatization, due to introduction of + // new control flow, changes the insertion point, + // thus restore it. + // TODO: Clean up later a bit to avoid this many sets and resets. + if (lastPrivateOp) + resetBeforeTerminator(firOpBuilder, storeOp, block); + } if constexpr (std::is_same_v) { threadPrivatizeVars(converter, eval); diff --git a/flang/test/Lower/OpenMP/omp-parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/omp-parallel-lastprivate-clause-scalar.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-parallel-lastprivate-clause-scalar.f90 @@ -0,0 +1,185 @@ +! This test checks lowering of `FIRSTPRIVATE` clause for scalar types. +! TODO: Add a test for same var being first and lastprivate when support is there. + +! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s +! RUN: flang-new -fc1 -fopenmp -emit-fir %s -o - | FileCheck %s + +!CHECK: func @_QPlastprivate_character(%[[ARG1:.*]]: !fir.boxchar<1>{{.*}}) { +!CHECK-DAG: %[[ARG1_UNBOX:.*]]:2 = fir.unboxchar +!CHECK-DAG: %[[FIVE:.*]] = arith.constant 5 : index + +!CHECK: omp.parallel { +!CHECK-DAG: %[[ARG1_PVT:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "arg1", + +! Check that we are accessing the clone inside the loop +!CHECK-DAG: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK-DAG: %[[NEG_ONE:.*]] = arith.constant -1 : i32 +!CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQcl. +!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] +!CHECK-NEXT: %[[CNST:.*]] = arith.constant +!CHECK-NEXT: %[[CALL_BEGIN_IO:.*]] = fir.call @_FortranAioBeginExternalListOutput(%[[NEG_ONE]], %[[CVT0]], %[[CNST]]) : (i32, !fir.ref, i32) -> !fir.ref +!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT]] +!CHECK-NEXT: %[[CVT_0_2:.*]] = fir.convert %[[FIVE]] +!CHECK-NEXT: %[[CALL_OP_ASCII:.*]] = fir.call @_FortranAioOutputAscii(%[[CALL_BEGIN_IO]], %[[CVT_0_1]], %[[CVT_0_2]]) +!CHECK-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]]) + +! Testing last iteration check +!CHECK-NEXT: %[[IV_CMP:.*]] = arith.cmpi eq, %[[INDX_WS]] +!CHECK: scf.if %[[IV_CMP]] { + +! Testing lastprivate val update +!CHECK-DAG: %[[CVT:.*]] = fir.convert %[[ARG1_UNBOX]]#0 : (!fir.ref>) -> !fir.ref +!CHECK-DAG: %[[CVT1:.*]] = fir.convert %[[ARG1_PVT]] : (!fir.ref>) -> !fir.ref +!CHECK-DAG: fir.call @llvm.memmove.p0.p0.i64(%[[CVT]], %[[CVT1]]{{.*}}) +!CHECK: %[[THIRTY_TWO:.*]] = arith.constant 32 : i8 +!CHECK-DAG: %[[UNDEF:.*]] = fir.undefined !fir.char<1> +!CHECK-DAG: %[[INSERT:.*]] = fir.insert_value %[[UNDEF]], %[[THIRTY_TWO]], [0 : index] : (!fir.char<1>, i8) -> !fir.char<1> +!CHECK-DAG: %[[ONE_3:.*]] = arith.constant 1 : index +!CHECK: fir.do_loop %[[ARG2:.*]] = {{.*}} { +!CHECK-DAG: %[[CVT_2:.*]] = fir.convert %[[ARG1_UNBOX]]#0 : (!fir.ref>) -> !fir.ref>> +!CHECK-DAG: %[[COORD:.*]] = fir.coordinate_of %[[CVT_2]], %[[ARG2]] : (!fir.ref>>, index) -> !fir.ref> +!CHECK-DAG: fir.store %[[INSERT]] to %[[COORD]] : !fir.ref> +!CHECK-DAG: } +!CHECK-DAG: } +!CHECK-DAG: omp.yield + +subroutine lastprivate_character(arg1) + character(5) :: arg1 +!$OMP PARALLEL +!$OMP DO LASTPRIVATE(arg1) +do n = 1, 5 + arg1(n:n) = 'c' + print *, arg1 +end do +!$OMP END DO +!$OMP END PARALLEL +end subroutine + +!CHECK: func @_QPlastprivate_int(%[[ARG1:.*]]: !fir.ref {fir.bindc_name = "arg1"}) { +!CHECK-DAG: omp.parallel { +!CHECK-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1" +!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { + +! Testing last iteration check +!CHECK-DAG: %[[IV_CMP:.*]] = arith.cmpi eq, %[[INDX_WS]] +!CHECK-DAG: scf.if %[[IV_CMP]] { + +! Testing lastprivate val update +!CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE]] : !fir.ref +!CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG1]] : !fir.ref +!CHECK-DAG: } +!CHECK-DAG: omp.yield + +subroutine lastprivate_int(arg1) + integer :: arg1 +!$OMP PARALLEL +!$OMP DO LASTPRIVATE(arg1) +do n = 1, 5 + arg1 = 2 + print *, arg1 +end do +!$OMP END DO +!$OMP END PARALLEL +print *, arg1 +end subroutine + +!CHECK: func.func @_QPmult_lastprivate_int(%[[ARG1:.*]]: !fir.ref {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "arg2"}) { +!CHECK-DAG: omp.parallel { +!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1" +!CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" +!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { + +! Testing last iteration check +!CHECK-DAG: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]] +!CHECK-DAG: scf.if %[[IV_CMP1]] { +! Testing lastprivate val update +!CHECK-NEXT: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1]] : !fir.ref +!CHECK-NEXT: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref +!CHECK-DAG: } +!CHECK-DAG: scf.if %[[IV_CMP1]] { +! Testing lastprivate val update +!CHECK-NEXT: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2]] : !fir.ref +!CHECK-NEXT: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref +!CHECK-DAG: } +!CHECK-DAG: omp.yield + +subroutine mult_lastprivate_int(arg1, arg2) + integer :: arg1, arg2 +!$OMP PARALLEL +!$OMP DO LASTPRIVATE(arg1) LASTPRIVATE(arg2) +do n = 1, 5 + arg1 = 2 + arg2 = 3 + print *, arg1, arg2 +end do +!$OMP END DO +!$OMP END PARALLEL +print *, arg1, arg2 +end subroutine + +!CHECK: func.func @_QPmult_lastprivate_int2(%[[ARG1:.*]]: !fir.ref {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "arg2"}) { +!CHECK-DAG: omp.parallel { +!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1" +!CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" +!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { + +! Testing last iteration check +!CHECK-DAG: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]] +!CHECK-DAG: scf.if %[[IV_CMP1]] { +! Testing lastprivate val update +!CHECK-NEXT: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2]] : !fir.ref +!CHECK-NEXT: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref +!CHECK-NEXT: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1]] : !fir.ref +!CHECK-NEXT: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref +!CHECK-NEXT: } +!CHECK-NEXT: omp.yield + +subroutine mult_lastprivate_int2(arg1, arg2) + integer :: arg1, arg2 +!$OMP PARALLEL +!$OMP DO LASTPRIVATE(arg1, arg2) +do n = 1, 5 + arg1 = 2 + arg2 = 3 + print *, arg1, arg2 +end do +!$OMP END DO +!$OMP END PARALLEL +print *, arg1, arg2 +end subroutine + +!CHECK: func.func @_QPfirstpriv_lastpriv_int(%[[ARG1:.*]]: !fir.ref {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "arg2"}) { +!CHECK-DAG: omp.parallel { +!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1" +! Firstprivate update +!CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1]] : !fir.ref +!CHECK-NEXT: fir.store %[[FPV_LD]] to %[[CLONE1]] : !fir.ref +! Lastprivate Allocation +!CHECK-NEXT: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" +!CHECK-NEXT: omp.barrier +!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { + +! Testing last iteration check +!CHECK-DAG: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]] +!CHECK-DAG: scf.if %[[IV_CMP1]] { +! Testing lastprivate val update +!CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE2]] : !fir.ref +!CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG2]] : !fir.ref +!CHECK-NEXT: } +!CHECK-NEXT: omp.yield + +subroutine firstpriv_lastpriv_int(arg1, arg2) + integer :: arg1, arg2 +!$OMP PARALLEL +!$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg2) +do n = 1, 5 + arg1 = 2 + arg2 = 3 + print *, arg1, arg2 +end do +!$OMP END DO +!$OMP END PARALLEL +print *, arg1, arg2 +end subroutine + +