diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -18,6 +18,7 @@ #include "flang/Lower/PFTDefs.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Semantics/symbol.h" +#include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Operation.h" #include "llvm/ADT/ArrayRef.h" @@ -103,8 +104,9 @@ virtual bool createHostAssociateVarClone(const Fortran::semantics::Symbol &sym) = 0; - virtual void copyHostAssociateVar(const Fortran::semantics::Symbol &sym, - mlir::Block *lastPrivBlock = nullptr) = 0; + virtual void copyHostAssociateVar( + const Fortran::semantics::Symbol &sym, + mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr) = 0; /// Collect the set of symbols with \p flag in \p eval /// region if \p collectSymbols is true. Likewise, collect the diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -516,10 +516,9 @@ return bindIfNewSymbol(sym, exv); } - // FIXME: Generalize this function, so that lastPrivBlock can be removed - void - copyHostAssociateVar(const Fortran::semantics::Symbol &sym, - mlir::Block *lastPrivBlock = nullptr) override final { + void copyHostAssociateVar( + const Fortran::semantics::Symbol &sym, + mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr) override final { // 1) Fetch the original copy of the variable. assert(sym.has() && "No host-association found"); @@ -537,13 +536,14 @@ // 3) Perform the assignment. mlir::OpBuilder::InsertPoint insPt = builder->saveInsertionPoint(); - if (lastPrivBlock) - builder->setInsertionPointToStart(lastPrivBlock); + if (copyAssignIP && copyAssignIP->isSet()) + builder->restoreInsertionPoint(*copyAssignIP); else builder->setInsertionPointAfter(fir::getBase(exv).getDefiningOp()); fir::ExtendedValue lhs, rhs; - if (lastPrivBlock) { + if (copyAssignIP && copyAssignIP->isSet() && + sym.test(Fortran::semantics::Symbol::Flag::OmpLastPrivate)) { // lastprivate case lhs = hexv; rhs = exv; @@ -568,7 +568,8 @@ builder->create(loc, loadVal, fir::getBase(lhs)); } - if (lastPrivBlock) + if (copyAssignIP && copyAssignIP->isSet() && + sym.test(Fortran::semantics::Symbol::Flag::OmpLastPrivate)) builder->restoreInsertionPoint(insPt); } diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -61,10 +61,11 @@ return sym; } -static void -privatizeSymbol(Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol *sym, - [[maybe_unused]] mlir::Block *lastPrivBlock = nullptr) { +template +static void privatizeSymbol( + Op &op, Fortran::lower::AbstractConverter &converter, + const Fortran::semantics::Symbol *sym, + [[maybe_unused]] mlir::OpBuilder::InsertPoint *lastPrivIP = nullptr) { // Privatization for symbols which are pre-determined (like loop index // variables) happen separately, for everything else privatize here. if (sym->test(Fortran::semantics::Symbol::Flag::OmpPreDetermined)) @@ -72,10 +73,20 @@ bool success = converter.createHostAssociateVarClone(*sym); (void)success; assert(success && "Privatization failed due to existing binding"); - if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) - converter.copyHostAssociateVar(*sym); + if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::OpBuilder::InsertPoint firstPrivIP, insPt; + if (mlir::isa(op)) { + insPt = firOpBuilder.saveInsertionPoint(); + firOpBuilder.setInsertionPointToStart(&op.getRegion().front()); + firstPrivIP = firOpBuilder.saveInsertionPoint(); + } + converter.copyHostAssociateVar(*sym, &firstPrivIP); + if (mlir::isa(op)) + firOpBuilder.restoreInsertionPoint(insPt); + } if (sym->test(Fortran::semantics::Symbol::Flag::OmpLastPrivate)) - converter.copyHostAssociateVar(*sym, lastPrivBlock); + converter.copyHostAssociateVar(*sym, lastPrivIP); } template @@ -96,7 +107,7 @@ }; // We need just one ICmpOp for multiple LastPrivate clauses. mlir::arith::CmpIOp cmpOp; - mlir::Block *lastPrivBlock = nullptr; + mlir::OpBuilder::InsertPoint lastPrivIP; bool hasLastPrivateOp = false; for (const Fortran::parser::OmpClause &clause : opClauseList.v) { if (const auto &privateClause = @@ -155,7 +166,8 @@ } mlir::scf::IfOp ifOp = firOpBuilder.create( wsLoopOp->getLoc(), cmpOp, /*else*/ false); - lastPrivBlock = &ifOp.getThenRegion().front(); + firOpBuilder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + lastPrivIP = firOpBuilder.saveInsertionPoint(); } else { TODO(converter.getCurrentLocation(), "lastprivate clause in constructs other than worksharing-loop"); @@ -207,7 +219,7 @@ else firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); for (auto sym : privatizedSymbols) { - privatizeSymbol(converter, sym, lastPrivBlock); + privatizeSymbol(op, converter, sym, &lastPrivIP); if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate) && sym->test(Fortran::semantics::Symbol::Flag::OmpLastPrivate)) needBarrier = true; @@ -217,7 +229,7 @@ if (!symbolsInNestedRegions.contains(sym) && !symbolsInParentRegions.contains(sym) && !privatizedSymbols.contains(sym)) - privatizeSymbol(converter, sym); + privatizeSymbol(op, converter, sym); // Emit implicit barrier to synchronize threads and avoid data races on // initialization of firstprivate variables and post-update of lastprivate @@ -825,7 +837,8 @@ } else if (blockDirective.v == llvm::omp::OMPD_single) { auto singleOp = firOpBuilder.create( currentLocation, allocateOperands, allocatorOperands, nowaitAttr); - createBodyOfOp(singleOp, converter, currentLocation, eval); + createBodyOfOp(singleOp, converter, currentLocation, eval, + &opClauseList); } else if (blockDirective.v == llvm::omp::OMPD_ordered) { auto orderedOp = firOpBuilder.create( currentLocation, /*simd=*/nullptr); @@ -839,7 +852,7 @@ allocatorOperands); createBodyOfOp(taskOp, converter, currentLocation, eval, &opClauseList); } else if (blockDirective.v == llvm::omp::OMPD_taskgroup) { - // TODO: Add task_reduction support + // TODO: Add task_reduction support auto taskGroupOp = firOpBuilder.create( currentLocation, /*task_reduction_vars=*/ValueRange(), /*task_reductions=*/nullptr, allocateOperands, allocatorOperands); diff --git a/flang/test/Lower/OpenMP/single.f90 b/flang/test/Lower/OpenMP/single.f90 --- a/flang/test/Lower/OpenMP/single.f90 +++ b/flang/test/Lower/OpenMP/single.f90 @@ -1,25 +1,25 @@ -!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s --check-prefixes="FIRDialect,OMPDialect" -!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | fir-opt --cfg-conversion | fir-opt --fir-to-llvm-ir | FileCheck %s --check-prefixes="LLVMDialect,OMPDialect" +!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s +!RUN: bbc -emit-fir -fopenmp %s -o - | FileCheck %s !=============================================================================== ! Single construct !=============================================================================== -!FIRDialect-LABEL: func @_QPomp_single -!FIRDialect-SAME: (%[[x:.*]]: !fir.ref {fir.bindc_name = "x"}) +!CHECK-LABEL: func @_QPomp_single +!CHECK-SAME: (%[[x:.*]]: !fir.ref {fir.bindc_name = "x"}) subroutine omp_single(x) integer, intent(inout) :: x - !OMPDialect: omp.parallel + !CHECK: omp.parallel !$omp parallel - !OMPDialect: omp.single + !CHECK: omp.single !$omp single - !FIRDialect: %[[xval:.*]] = fir.load %[[x]] : !fir.ref - !FIRDialect: %[[res:.*]] = arith.addi %[[xval]], %{{.*}} : i32 - !FIRDialect: fir.store %[[res]] to %[[x]] : !fir.ref + !CHECK: %[[xval:.*]] = fir.load %[[x]] : !fir.ref + !CHECK: %[[res:.*]] = arith.addi %[[xval]], %{{.*}} : i32 + !CHECK: fir.store %[[res]] to %[[x]] : !fir.ref x = x + 12 - !OMPDialect: omp.terminator + !CHECK: omp.terminator !$omp end single - !OMPDialect: omp.terminator + !CHECK: omp.terminator !$omp end parallel end subroutine omp_single @@ -27,21 +27,21 @@ ! Single construct with nowait !=============================================================================== -!FIRDialect-LABEL: func @_QPomp_single_nowait -!FIRDialect-SAME: (%[[x:.*]]: !fir.ref {fir.bindc_name = "x"}) +!CHECK-LABEL: func @_QPomp_single_nowait +!CHECK-SAME: (%[[x:.*]]: !fir.ref {fir.bindc_name = "x"}) subroutine omp_single_nowait(x) integer, intent(inout) :: x - !OMPDialect: omp.parallel + !CHECK: omp.parallel !$omp parallel - !OMPDialect: omp.single nowait + !CHECK: omp.single nowait !$omp single - !FIRDialect: %[[xval:.*]] = fir.load %[[x]] : !fir.ref - !FIRDialect: %[[res:.*]] = arith.addi %[[xval]], %{{.*}} : i32 - !FIRDialect: fir.store %[[res]] to %[[x]] : !fir.ref + !CHECK: %[[xval:.*]] = fir.load %[[x]] : !fir.ref + !CHECK: %[[res:.*]] = arith.addi %[[xval]], %{{.*}} : i32 + !CHECK: fir.store %[[res]] to %[[x]] : !fir.ref x = x + 12 - !OMPDialect: omp.terminator + !CHECK: omp.terminator !$omp end single nowait - !OMPDialect: omp.terminator + !CHECK: omp.terminator !$omp end parallel end subroutine omp_single_nowait @@ -49,21 +49,73 @@ ! Single construct with allocate !=============================================================================== -!FIRDialect-LABEL: func @_QPsingle_allocate +!CHECK-LABEL: func @_QPsingle_allocate subroutine single_allocate() use omp_lib integer :: x - !OMPDialect: omp.parallel { + !CHECK: omp.parallel { !$omp parallel - !OMPDialect: omp.single allocate( - !FIRDialect: %{{.+}} : i32 -> %{{.+}} : !fir.ref - !LLVMDialect: %{{.+}} : i32 -> %{{.+}} : !llvm.ptr - !OMPDialect: ) { + !CHECK: omp.single allocate(%{{.+}} : i32 -> %{{.+}} : !fir.ref) { !$omp single allocate(omp_high_bw_mem_alloc: x) private(x) - !FIRDialect: arith.addi + !CHECK: arith.addi x = x + 12 - !OMPDialect: omp.terminator + !CHECK: omp.terminator !$omp end single - !OMPDialect: omp.terminator + !CHECK: omp.terminator !$omp end parallel end subroutine single_allocate + +!=============================================================================== +! Single construct with private/firstprivate +!=============================================================================== + +! CHECK-LABEL: func.func @_QPsingle_privatization( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "x"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFsingle_privatizationEx"} +! CHECK: %[[VAL_3:.*]] = fir.alloca f64 {bindc_name = "y", pinned, uniq_name = "_QFsingle_privatizationEy"} +! CHECK: omp.single { +! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_1]] : !fir.ref +! CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref +! CHECK: fir.call @_QPbar(%[[VAL_2]], %[[VAL_3]]) : (!fir.ref, !fir.ref) -> () +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + +subroutine single_privatization(x, y) + real :: x + real(8) :: y + + !$omp single private(x) firstprivate(y) + call bar(x, y) + !$omp end single +end subroutine + +! CHECK-LABEL: func.func @_QPsingle_privatization2( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "x"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "y"}) { +! CHECK: omp.parallel { +! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFsingle_privatization2Ex"} +! CHECK: %[[VAL_3:.*]] = fir.alloca f64 {bindc_name = "y", pinned, uniq_name = "_QFsingle_privatization2Ey"} +! CHECK: omp.single { +! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_1]] : !fir.ref +! CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref +! CHECK: fir.call @_QPbar(%[[VAL_2]], %[[VAL_3]]) : (!fir.ref, !fir.ref) -> () +! CHECK: omp.terminator +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + +subroutine single_privatization2(x, y) + real :: x + real(8) :: y + + !$omp parallel + !$omp single private(x) firstprivate(y) + call bar(x, y) + !$omp end single + !$omp end parallel +end subroutine