diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h --- a/flang/include/flang/Lower/OpenMP.h +++ b/flang/include/flang/Lower/OpenMP.h @@ -13,6 +13,7 @@ #ifndef FORTRAN_LOWER_OPENMP_H #define FORTRAN_LOWER_OPENMP_H +#include "clang/Basic/BitmaskEnum.h" #include namespace mlir { @@ -31,6 +32,7 @@ struct OpenMPDeclarativeConstruct; struct OmpEndLoopDirective; struct OmpClauseList; +struct OmpClause; } // namespace parser namespace lower { @@ -42,6 +44,28 @@ struct Variable; } // namespace pft +namespace omp { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); +// TODO Struct duplicated in clang/lib/CodeGen/CGOpenMPRuntime.cpp and +// ompenmp/libomptarget/include/omptarget.h +/// Flags representing clauses passed to a requires directive. +enum OpenMPOffloadingRequiresDirFlags { + /// flag undefined. + OMP_REQ_UNDEFINED = 0x000, + /// no requires directive present. + OMP_REQ_NONE = 0x001, + /// reverse_offload clause. + OMP_REQ_REVERSE_OFFLOAD = 0x002, + /// unified_address clause. + OMP_REQ_UNIFIED_ADDRESS = 0x004, + /// unified_shared_memory clause. + OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, + /// dynamic_allocators clause. + OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) +}; +} // namespace omp + void genOpenMPConstruct(AbstractConverter &, pft::Evaluation &, const parser::OpenMPConstruct &); void genOpenMPDeclarativeConstruct(AbstractConverter &, pft::Evaluation &, @@ -56,6 +80,15 @@ void updateReduction(mlir::Operation *, fir::FirOpBuilder &, mlir::Value, mlir::Value, fir::ConvertOp * = nullptr); void removeStoreOp(mlir::Operation *, mlir::Value); + +omp::OpenMPOffloadingRequiresDirFlags +getOpenMPRequiresFlags(AbstractConverter &, + const Fortran::parser::OmpClauseList &); +void genOpenMPRequires(AbstractConverter &, + omp::OpenMPOffloadingRequiresDirFlags); + +bool isOpenMPTargetConstruct(const parser::OpenMPConstruct &); + } // namespace lower } // namespace Fortran diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -272,7 +272,8 @@ public: explicit FirConverter(Fortran::lower::LoweringBridge &bridge) : Fortran::lower::AbstractConverter(bridge.getLoweringOptions()), - bridge{bridge}, foldingContext{bridge.createFoldingContext()} {} + bridge{bridge}, foldingContext{bridge.createFoldingContext()}, + ompRequiresFlags{Fortran::lower::omp::OMP_REQ_UNDEFINED} {} virtual ~FirConverter() = default; /// Convert the PFT to FIR. @@ -299,12 +300,16 @@ if (f.isMainProgram()) hasMainProgram = true; declareFunction(f); + analyzeOpenMPDeclarative(f.evaluationList); }, [&](Fortran::lower::pft::ModuleLikeUnit &m) { lowerModuleDeclScope(m); + analyzeOpenMPDeclarative(m.evaluationList); for (Fortran::lower::pft::FunctionLikeUnit &f : - m.nestedFunctions) + m.nestedFunctions) { declareFunction(f); + analyzeOpenMPDeclarative(f.evaluationList); + } }, [&](Fortran::lower::pft::BlockDataUnit &b) {}, [&](Fortran::lower::pft::CompilerDirectiveUnit &d) {}, @@ -354,6 +359,17 @@ fir::runtime::genEnvironmentDefaults(*builder, toLocation(), bridge.getEnvironmentDefaults()); }); + + // Only apply if the compilation unit has a target region, otherwise the + // runtime will throw errors for unmatching requires flags unnecessarily. + // TODO Remove 'true' from condition once minimal OpenMP target region + // support is implemented (i.e. the compiler doesn't crash if the + // simplest possible region is introduced in the user program). + // Otherwise, lowering of this directive can't be tested. + if (ompRequiresFlags != Fortran::lower::omp::OMP_REQ_UNDEFINED && + (true || ompTargetRegionFound || ompDeclareTargetRegionFound)) { + genOpenMPRequires(*this, ompRequiresFlags); + } } /// Declare a function. @@ -1006,6 +1022,60 @@ genFIRConditionalBranch(cond, trueTarget->block, falseTarget->block); } + /// Perform a pass to gather compilation unit-level data from OpenMP + /// declarative constructs. This must be done prior to lowering, to ensure + /// data is available to the lowering pass. + void analyzeOpenMPDeclarative( + const Fortran::lower::pft::EvaluationList &evaluationList) { + // Populate ompTargetRegionFound and ompDeclareTargetRegionFound during + // analysis, so that semantically necessary ordering information between + // the requires directive and other OpenMP directives is present. + auto analyzeRequires = + [&](const Fortran::parser::OpenMPRequiresConstruct &ompReq) { + auto flags = Fortran::lower::getOpenMPRequiresFlags( + *this, std::get(ompReq.t)); + + if (flags & (Fortran::lower::omp::OMP_REQ_REVERSE_OFFLOAD | + Fortran::lower::omp::OMP_REQ_UNIFIED_ADDRESS | + Fortran::lower::omp::OMP_REQ_UNIFIED_SHARED_MEMORY) && + (ompTargetRegionFound || ompDeclareTargetRegionFound)) { + mlir::emitError(toLocation(), + "requires directive specifies a reverse_offload, " + "unified_address or unified_shared_memory " + "requirement lexically after a device construct"); + } + + ompRequiresFlags |= flags; + }; + + auto analyzeDeclareTarget = + [&](const Fortran::parser::OpenMPDeclareTargetConstruct &ompReq) { + // Only register that a "declare target" region is found here, until + // support for this construct is added. + ompDeclareTargetRegionFound = true; + }; + + for (const Fortran::lower::pft::Evaluation &eval : evaluationList) { + if (const auto *ompDecl = + eval.getIf()) { + std::visit( + Fortran::common::visitors{ + analyzeRequires, + analyzeDeclareTarget, + // Other OpenMP declarative constructs currently skipped here + [&](const auto &) {}, + }, + ompDecl->u); + } else if (const auto *ompDecl = + eval.getIf()) { + // Register if a target region is found + ompTargetRegionFound = + ompTargetRegionFound || + Fortran::lower::isOpenMPTargetConstruct(*ompDecl); + } + } + } + //===--------------------------------------------------------------------===// // Termination of symbolically referenced execution units //===--------------------------------------------------------------------===// @@ -3794,6 +3864,12 @@ /// Tuple of host assoicated variables. mlir::Value hostAssocTuple; + + /// OpenMP Requires flags + Fortran::lower::omp::OpenMPOffloadingRequiresDirFlags ompRequiresFlags; + + bool ompTargetRegionFound = false; + bool ompDeclareTargetRegionFound = false; }; } // namespace diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -1819,7 +1819,11 @@ }, [&](const Fortran::parser::OpenMPRequiresConstruct &requiresConstruct) { - TODO(converter.getCurrentLocation(), "OpenMPRequiresConstruct"); + // Requires directives are processed before any statements are + // lowered. Then, the result of combining the set of clauses of all + // requires directives present in the compilation unit is what is + // used to emit code, so no code is emitted independently for each + // "requires" instance. }, [&](const Fortran::parser::OpenMPThreadprivate &threadprivate) { // The directive is lowered when instantiating the variable to @@ -1978,3 +1982,114 @@ } } } + +Fortran::lower::omp::OpenMPOffloadingRequiresDirFlags +Fortran::lower::getOpenMPRequiresFlags( + Fortran::lower::AbstractConverter &converter, + const Fortran::parser::OmpClauseList &clauseList) { + omp::OpenMPOffloadingRequiresDirFlags flags = omp::OMP_REQ_UNDEFINED; + + for (const auto &clause : clauseList.v) { + if (const auto &atomicClause = + std::get_if( + &clause.u)) { + switch (atomicClause->v.v) { + case Fortran::parser::OmpAtomicDefaultMemOrderClause::Type::SeqCst: + case Fortran::parser::OmpAtomicDefaultMemOrderClause::Type::AcqRel: + case Fortran::parser::OmpAtomicDefaultMemOrderClause::Type::Relaxed: + // Can't represent clause parameters in current flags structure + TODO(converter.getCurrentLocation(), + "atomic_default_mem_order clause not supported"); + break; + } + } else if (std::get_if( + &clause.u)) { + flags |= omp::OMP_REQ_DYNAMIC_ALLOCATORS; + } else if (std::get_if( + &clause.u)) { + flags |= omp::OMP_REQ_REVERSE_OFFLOAD; + } else if (std::get_if( + &clause.u)) { + flags |= omp::OMP_REQ_UNIFIED_ADDRESS; + } else if (std::get_if( + &clause.u)) { + flags |= omp::OMP_REQ_UNIFIED_SHARED_MEMORY; + } + } + + return flags; +} + +void Fortran::lower::genOpenMPRequires( + Fortran::lower::AbstractConverter &converter, + Fortran::lower::omp::OpenMPOffloadingRequiresDirFlags flags) { + auto *ctx = &converter.getMLIRContext(); + auto mod = converter.getModuleOp(); + auto loc = converter.getCurrentLocation(); + + auto voidType = mlir::LLVM::LLVMVoidType::get(ctx); + auto flagsType = mlir::IntegerType::get(ctx, 64); + auto priorityType = mlir::IntegerType::get(ctx, 32); + auto moduleBuilder = mlir::OpBuilder::atBlockEnd(mod.getBody()); + + // TODO Represent flags in MLIR as module attribute and refactor codegen into + // IRBuilder instead? + + // Declare OpenMP runtime "requires" flags registration function + auto registerRequiresFunc = moduleBuilder.create( + loc, "__tgt_register_requires", + mlir::LLVM::LLVMFunctionType::get(voidType, {flagsType}), + mlir::LLVM::Linkage::External); + + // Create "requires" registration function + // TODO Put it in section ".text.startup"? May involve extending LLVMFuncOp + auto requiresRegFunc = moduleBuilder.create( + loc, ".omp_offloading.requires_reg", + mlir::LLVM::LLVMFunctionType::get(voidType, {}), + mlir::LLVM::Linkage::Internal); + requiresRegFunc.setPassthroughAttr( + mlir::ArrayAttr::get(ctx, {mlir::StringAttr::get(ctx, "noinline")})); + requiresRegFunc.addEntryBlock(); + + // Emit call to OpenMP runtime from within registration function + auto funcBuilder = mlir::OpBuilder::atBlockBegin(&requiresRegFunc.front()); + auto flagsConst = funcBuilder.create( + loc, mlir::IntegerAttr::get(flagsType, static_cast(flags))); + funcBuilder.create(loc, registerRequiresFunc, + mlir::ValueRange{flagsConst}); + funcBuilder.create(loc, nullptr); + + // Register init function as constructor of the current compilation unit + auto ctors = + mlir::ArrayAttr::get(ctx, {mlir::SymbolRefAttr::get(requiresRegFunc)}); + auto priorities = + mlir::ArrayAttr::get(ctx, {mlir::IntegerAttr::get(priorityType, 0)}); + moduleBuilder.create(loc, ctors, priorities); +} + +bool Fortran::lower::isOpenMPTargetConstruct( + const Fortran::parser::OpenMPConstruct &omp) { + if (const auto *blockDir = + std::get_if(&omp.u)) { + const auto &beginBlockDir{ + std::get(blockDir->t)}; + const auto &beginDir{ + std::get(beginBlockDir.t)}; + + switch (beginDir.v) { + case llvm::omp::Directive::OMPD_target: + case llvm::omp::Directive::OMPD_target_parallel: + case llvm::omp::Directive::OMPD_target_parallel_do: + case llvm::omp::Directive::OMPD_target_parallel_do_simd: + case llvm::omp::Directive::OMPD_target_simd: + case llvm::omp::Directive::OMPD_target_teams: + case llvm::omp::Directive::OMPD_target_teams_distribute: + case llvm::omp::Directive::OMPD_target_teams_distribute_simd: + return true; + default: + break; + } + } + + return false; +} diff --git a/flang/test/Lower/OpenMP/requires.f90 b/flang/test/Lower/OpenMP/requires.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenMP/requires.f90 @@ -0,0 +1,29 @@ +! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s + +! This test checks the lowering of requires + +!CHECK: func @_QQmain() { +!CHECK: return +!CHECK: } + +!CHECK: llvm.func @__tgt_register_requires(i64) + +!CHECK: llvm.func internal @".omp_offloading.requires_reg"() attributes {passthrough = ["noinline"]} { +!CHECK: %[[FLAGS:.*]] = arith.constant 10 : i64 +!CHECK: llvm.call @__tgt_register_requires(%[[FLAGS]]) : (i64) -> () +!CHECK: llvm.return +!CHECK: } + +!CHECK: llvm.mlir.global_ctors {ctors = [@".omp_offloading.requires_reg"], +!CHECK-SAME: priorities = [0 : i32]} + +program requires + !$omp requires unified_shared_memory reverse_offload + + ! TODO Uncomment the following directives when conditional codegen for + ! requires directive depending on the presence of a target region is + ! enabled. +! !$omp target +! !$omp end target +end program requires