diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -246,6 +246,80 @@ standaloneConstruct.u); } +static omp::ClauseProcBindKindAttr genProcBindKindAttr( + fir::FirOpBuilder &firOpBuilder, + const Fortran::parser::OmpClause::ProcBind *procBindClause) { + omp::ClauseProcBindKind pbKind; + switch (procBindClause->v.v) { + case Fortran::parser::OmpProcBindClause::Type::Master: + pbKind = omp::ClauseProcBindKind::Master; + break; + case Fortran::parser::OmpProcBindClause::Type::Close: + pbKind = omp::ClauseProcBindKind::Close; + break; + case Fortran::parser::OmpProcBindClause::Type::Spread: + pbKind = omp::ClauseProcBindKind::Spread; + break; + case Fortran::parser::OmpProcBindClause::Type::Primary: + pbKind = omp::ClauseProcBindKind::Primary; + break; + } + return omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind); +} + +/* When parallel is used in a combined construct, then use this function to + * create the parallel operation. It handles the parallel specific clauses + * and leaves the rest for handling at the inner operations. + * TODO: Refactor clause handling + */ +template +static void +createCombinedParallelOp(Fortran::lower::AbstractConverter &converter, + Fortran::lower::pft::Evaluation &eval, + const Directive &directive) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::Location currentLocation = converter.getCurrentLocation(); + Fortran::lower::StatementContext stmtCtx; + llvm::ArrayRef argTy; + mlir::Value ifClauseOperand, numThreadsClauseOperand; + SmallVector allocatorOperands, allocateOperands; + mlir::omp::ClauseProcBindKindAttr procBindKindAttr; + const auto &opClauseList = + std::get(directive.t); + // TODO: Handle the following clauses + // 1. default + // 2. copyin + // Note: rest of the clauses are handled when the inner operation is created + for (const Fortran::parser::OmpClause &clause : opClauseList.v) { + if (const auto &ifClause = + std::get_if(&clause.u)) { + auto &expr = std::get(ifClause->v.t); + mlir::Value ifVal = fir::getBase( + converter.genExprValue(*Fortran::semantics::GetExpr(expr), stmtCtx)); + ifClauseOperand = firOpBuilder.createConvert( + currentLocation, firOpBuilder.getI1Type(), ifVal); + } else if (const auto &numThreadsClause = + std::get_if( + &clause.u)) { + numThreadsClauseOperand = fir::getBase(converter.genExprValue( + *Fortran::semantics::GetExpr(numThreadsClause->v), stmtCtx)); + } else if (const auto &procBindClause = + std::get_if( + &clause.u)) { + procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause); + } + } + // Create and insert the operation. + auto parallelOp = firOpBuilder.create( + currentLocation, argTy, ifClauseOperand, numThreadsClauseOperand, + allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(), + /*reductions=*/nullptr, procBindKindAttr); + + createBodyOfOp(parallelOp, converter, currentLocation, + &opClauseList, /*iv=*/nullptr, + /*isCombined=*/true); +} + static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, @@ -286,23 +360,7 @@ } else if (const auto &procBindClause = std::get_if( &clause.u)) { - omp::ClauseProcBindKind pbKind; - switch (procBindClause->v.v) { - case Fortran::parser::OmpProcBindClause::Type::Master: - pbKind = omp::ClauseProcBindKind::Master; - break; - case Fortran::parser::OmpProcBindClause::Type::Close: - pbKind = omp::ClauseProcBindKind::Close; - break; - case Fortran::parser::OmpProcBindClause::Type::Spread: - pbKind = omp::ClauseProcBindKind::Spread; - break; - case Fortran::parser::OmpProcBindClause::Type::Primary: - pbKind = omp::ClauseProcBindKind::Primary; - break; - } - procBindKindAttr = - omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind); + procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause); } else if (const auto &allocateClause = std::get_if( &clause.u)) { @@ -385,13 +443,17 @@ mlir::Value scheduleChunkClauseOperand; mlir::Attribute scheduleClauseOperand, collapseClauseOperand, noWaitClauseOperand, orderedClauseOperand, orderClauseOperand; - const auto &wsLoopOpClauseList = std::get( - std::get(loopConstruct.t).t); - if (llvm::omp::OMPD_do != + + const auto ompDirective = std::get( std::get(loopConstruct.t).t) - .v) { - TODO(converter.getCurrentLocation(), "Combined worksharing loop construct"); + .v; + if (llvm::omp::OMPD_parallel_do == ompDirective) { + createCombinedParallelOp( + converter, eval, + std::get(loopConstruct.t)); + } else if (llvm::omp::OMPD_do != ompDirective) { + TODO(converter.getCurrentLocation(), "Construct enclosing do loop"); } Fortran::lower::pft::Evaluation *doConstructEval = @@ -438,6 +500,8 @@ orderClauseOperand.dyn_cast_or_null(), /*inclusive=*/firOpBuilder.getUnitAttr()); + const auto &wsLoopOpClauseList = std::get( + std::get(loopConstruct.t).t); // Handle attribute based clauses. for (const Fortran::parser::OmpClause &clause : wsLoopOpClauseList.v) { if (const auto &scheduleClause = @@ -591,15 +655,14 @@ // Parallel Sections Construct if (dir == llvm::omp::Directive::OMPD_parallel_sections) { - auto parallelOp = firOpBuilder.create( - currentLocation, /*if_expr_var*/ nullptr, /*num_threads_var*/ nullptr, - allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(), - /*reductions=*/nullptr, /*proc_bind_val*/ nullptr); - createBodyOfOp(parallelOp, converter, currentLocation); + createCombinedParallelOp( + converter, eval, + std::get( + sectionsConstruct.t)); auto sectionsOp = firOpBuilder.create( currentLocation, /*reduction_vars*/ ValueRange(), - /*reductions=*/nullptr, /*allocate_vars*/ ValueRange(), - /*allocators_vars*/ ValueRange(), /*nowait=*/nullptr); + /*reductions=*/nullptr, allocateOperands, allocatorOperands, + /*nowait=*/nullptr); createBodyOfOp(sectionsOp, converter, currentLocation); // Sections Construct diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -71,3 +71,36 @@ // CHECK: } // CHECK: llvm.return // CHECK: } + + +// ----- + +func.func @_QPsb(%arr: !fir.box> {fir.bindc_name = "arr"}) { + %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsbEi"} + omp.parallel { + %c1 = arith.constant 1 : i32 + %c50 = arith.constant 50 : i32 + omp.wsloop for (%indx) : i32 = (%c1) to (%c50) inclusive step (%c1) { + %1 = fir.convert %indx : (i32) -> i64 + %c1_i64 = arith.constant 1 : i64 + %2 = arith.subi %1, %c1_i64 : i64 + %3 = fir.coordinate_of %arr, %2 : (!fir.box>, i64) -> !fir.ref + fir.store %indx to %3 : !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// Check only for the structure of the OpenMP portion and the feasibility of the conversion +// CHECK-LABEL: @_QPsb +// CHECK-SAME: %{{.*}}: !llvm.ptr> {fir.bindc_name = "arr"} +// CHECK: omp.parallel { +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32 +// CHECK: %[[C50:.*]] = llvm.mlir.constant(50 : i32) : i32 +// CHECK: omp.wsloop for (%[[INDX:.*]]) : i32 = (%[[C1]]) to (%[[C50]]) inclusive step (%[[C1]]) { +// CHECK: llvm.store %[[INDX]], %{{.*}} : !llvm.ptr +// CHECK: omp.yield +// CHECK: omp.terminator +// CHECK: llvm.return diff --git a/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90 b/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90 @@ -0,0 +1,96 @@ +! This test checks lowering of OpenMP DO Directive (Worksharing). + +! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s + +! CHECK-LABEL: func @_QPsimple_parallel_do() +subroutine simple_parallel_do + integer :: i + ! CHECK: omp.parallel + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP PARALLEL DO + do i=1, 9 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + ! CHECK: omp.terminator + !$OMP END PARALLEL DO +end subroutine + +! CHECK-LABEL: func @_QPparallel_do_with_parallel_clauses +! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref {fir.bindc_name = "nt"} +subroutine parallel_do_with_parallel_clauses(cond, nt) + logical :: cond + integer :: nt + integer :: i + ! CHECK: %[[COND:.*]] = fir.load %[[COND_REF]] : !fir.ref> + ! CHECK: %[[COND_CVT:.*]] = fir.convert %[[COND]] : (!fir.logical<4>) -> i1 + ! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref + ! CHECK: omp.parallel if(%[[COND_CVT]] : i1) num_threads(%[[NT]] : i32) proc_bind(close) + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close) + do i=1, 9 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + ! CHECK: omp.terminator + !$OMP END PARALLEL DO +end subroutine + +! CHECK-LABEL: func @_QPparallel_do_with_clauses +! CHECK-SAME: %[[NT_REF:.*]]: !fir.ref {fir.bindc_name = "nt"} +subroutine parallel_do_with_clauses(nt) + integer :: nt + integer :: i + ! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref + ! CHECK: omp.parallel num_threads(%[[NT]] : i32) + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic) + do i=1, 9 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + ! CHECK: omp.terminator + !$OMP END PARALLEL DO +end subroutine + +! CHECK-LABEL: func @_QPparallel_do_with_privatisation_clauses +! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref {fir.bindc_name = "nt"} +subroutine parallel_do_with_privatisation_clauses(cond,nt) + logical :: cond + integer :: nt + integer :: i + ! CHECK: omp.parallel + ! CHECK: %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"} + ! CHECK: %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"} + ! CHECK: %[[NT_VAL:.*]] = fir.load %[[NT_REF]] : !fir.ref + ! CHECK: fir.store %[[NT_VAL]] to %[[PRIVATE_NT_REF]] : !fir.ref + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt) + do i=1, 9 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref, i32) -> i1 + ! CHECK: %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref> + ! CHECK: %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1 + ! CHECK: fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) : (!fir.ref, i1) -> i1 + ! CHECK: %[[PRIVATE_NT_VAL:.*]] = fir.load %[[PRIVATE_NT_REF]] : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[PRIVATE_NT_VAL]]) : (!fir.ref, i32) -> i1 + print*, i, cond, nt + end do + ! CHECK: omp.yield + ! CHECK: omp.terminator + !$OMP END PARALLEL DO +end subroutine diff --git a/flang/test/Lower/OpenMP/parallel-sections.f90 b/flang/test/Lower/OpenMP/parallel-sections.f90 --- a/flang/test/Lower/OpenMP/parallel-sections.f90 +++ b/flang/test/Lower/OpenMP/parallel-sections.f90 @@ -40,8 +40,8 @@ integer, intent(inout) :: x, y !FIRDialect: %[[allocator:.*]] = arith.constant 1 : i32 !LLVMDialect: %[[allocator:.*]] = llvm.mlir.constant(1 : i32) : i32 - !OMPDialect: omp.parallel allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref) { - !OMPDialect: omp.sections { + !OMPDialect: omp.parallel { + !OMPDialect: omp.sections allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref) { !$omp parallel sections allocate(omp_high_bw_mem_alloc: x) !OMPDialect: omp.section { !$omp section