diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -479,27 +479,6 @@ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) }; -namespace { -LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); -/// Values for bit flags for marking which requires clauses have been used. -enum OpenMPOffloadingRequiresDirFlags : int64_t { - /// flag undefined. - OMP_REQ_UNDEFINED = 0x000, - /// no requires clause present. - OMP_REQ_NONE = 0x001, - /// reverse_offload clause. - OMP_REQ_REVERSE_OFFLOAD = 0x002, - /// unified_address clause. - OMP_REQ_UNIFIED_ADDRESS = 0x004, - /// unified_shared_memory clause. - OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, - /// dynamic_allocators clause. - OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) -}; - -} // anonymous namespace - /// Describes ident structure that describes a source location. /// All descriptions are taken from /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h @@ -1056,9 +1035,11 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) : CGM(CGM), OMPBuilder(CGM.getModule()) { KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); - llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, false, - hasRequiresUnifiedSharedMemory(), - CGM.getLangOpts().OpenMPOffloadMandatory); + llvm::OpenMPIRBuilderConfig Config( + CGM.getLangOpts().OpenMPIsDevice, + /*IsTargetCodegen*/ false, CGM.getLangOpts().OpenMPOffloadMandatory, + /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false, + hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false); // Initialize Types used in OpenMPIRBuilder from OMPKinds.def OMPBuilder.initialize(); OMPBuilder.setConfig(Config); @@ -10594,7 +10575,6 @@ std::string ReqName = getName({"omp_offloading", "requires_reg"}); RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); - OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; // TODO: check for other requires clauses. // The requires directive takes effect only when a target region is // present in the compilation unit. Otherwise it is ignored and not @@ -10604,11 +10584,10 @@ assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OMPBuilder.OffloadInfoManager.empty()) && "Target or declare target region expected."); - if (HasRequiresUnifiedSharedMemory) - Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___tgt_register_requires), - llvm::ConstantInt::get(CGM.Int64Ty, Flags)); + llvm::ConstantInt::get( + CGM.Int64Ty, OMPBuilder.Config.getRequiresFlags())); CGF.FinishFunction(); } return RequiresRegFn; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -859,9 +859,11 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) : CGOpenMPRuntime(CGM) { - llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, true, - hasRequiresUnifiedSharedMemory(), - CGM.getLangOpts().OpenMPOffloadMandatory); + llvm::OpenMPIRBuilderConfig Config( + CGM.getLangOpts().OpenMPIsDevice, + /*IsTargetCodegen*/ true, CGM.getLangOpts().OpenMPOffloadMandatory, + /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false, + hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false); OMPBuilder.setConfig(Config); if (!CGM.getLangOpts().OpenMPIsDevice) diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h --- a/flang/include/flang/Lower/OpenMP.h +++ b/flang/include/flang/Lower/OpenMP.h @@ -13,13 +13,9 @@ #ifndef FORTRAN_LOWER_OPENMP_H #define FORTRAN_LOWER_OPENMP_H +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include -namespace mlir { -class Value; -class Operation; -} // namespace mlir - namespace fir { class FirOpBuilder; class ConvertOp; @@ -29,6 +25,7 @@ namespace parser { struct OpenMPConstruct; struct OpenMPDeclarativeConstruct; +struct OpenMPDeclareTargetConstruct; struct OmpEndLoopDirective; struct OmpClauseList; } // namespace parser @@ -56,6 +53,17 @@ void updateReduction(mlir::Operation *, fir::FirOpBuilder &, mlir::Value, mlir::Value, fir::ConvertOp * = nullptr); void removeStoreOp(mlir::Operation *, mlir::Value); + +std::optional +getOpenMPDeclareTargetFunctionDevice( + Fortran::lower::AbstractConverter &, Fortran::lower::pft::Evaluation &, + const Fortran::parser::OpenMPDeclareTargetConstruct &); +std::optional +extractOpenMPRequiresClauses(const Fortran::parser::OmpClauseList &, + mlir::omp::ClauseRequires &); + +bool isOpenMPTargetConstruct(const parser::OpenMPConstruct &); + } // namespace lower } // namespace Fortran diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -52,6 +52,7 @@ #include "flang/Semantics/runtime-type-info.h" #include "flang/Semantics/tools.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Parser/Parser.h" #include "mlir/Transforms/RegionUtils.h" @@ -266,7 +267,8 @@ public: explicit FirConverter(Fortran::lower::LoweringBridge &bridge) : Fortran::lower::AbstractConverter(bridge.getLoweringOptions()), - bridge{bridge}, foldingContext{bridge.createFoldingContext()} {} + bridge{bridge}, foldingContext{bridge.createFoldingContext()}, + ompRequiresFlags{mlir::omp::ClauseRequires::none} {} virtual ~FirConverter() = default; /// Convert the PFT to FIR. @@ -343,6 +345,16 @@ fir::runtime::genEnvironmentDefaults(*builder, toLocation(), bridge.getEnvironmentDefaults()); }); + + // Set the module attributes related to OpenMP requires directives + if (auto mod = llvm::dyn_cast( + getModuleOp().getOperation())) { + if (ompDeviceCodeFound) + mod.setRequires(ompRequiresFlags); + + if (ompAtomicDefaultMemOrder) + mod.setAtomicDefaultMemOrder(*ompAtomicDefaultMemOrder); + } } /// Declare a function. @@ -2053,10 +2065,66 @@ localSymbols.popScope(); builder->restoreInsertionPoint(insertPt); + + // Register if a target region was found + ompDeviceCodeFound = + ompDeviceCodeFound || Fortran::lower::isOpenMPTargetConstruct(omp); + } + + /// Extract information from OpenMP declarative constructs + void analyzeOpenMPDeclarative( + const Fortran::parser::OpenMPDeclarativeConstruct &ompDecl) { + auto analyzeRequires = + [&](const Fortran::parser::OpenMPRequiresConstruct &ompReq) { + using mlir::omp::ClauseRequires; + + mlir::omp::ClauseRequires requiresFlags; + auto atomicDefaultMemOrder = + Fortran::lower::extractOpenMPRequiresClauses( + std::get(ompReq.t), + requiresFlags); + + if (requiresFlags != ClauseRequires::none) + ompRequiresFlags = ompRequiresFlags | requiresFlags; + + if (atomicDefaultMemOrder) { + if (ompAtomicDefaultMemOrder && + ompAtomicDefaultMemOrder != atomicDefaultMemOrder) + fir::emitFatalError( + toLocation(), + "conflicting atomic_default_mem_order clause found: " + + stringifyEnum(*atomicDefaultMemOrder) + + " != " + stringifyEnum(*ompAtomicDefaultMemOrder), + /*genCrashDiag=*/false); + ompAtomicDefaultMemOrder = atomicDefaultMemOrder; + } + }; + + auto analyzeDeclareTarget = + [&](const Fortran::parser::OpenMPDeclareTargetConstruct &ompReq) { + auto targetType = + Fortran::lower::getOpenMPDeclareTargetFunctionDevice( + *this, getEval(), ompReq); + + ompDeviceCodeFound = + ompDeviceCodeFound || + (targetType && + *targetType != mlir::omp::DeclareTargetDeviceType::host); + }; + + std::visit( + Fortran::common::visitors{ + analyzeRequires, + analyzeDeclareTarget, + // Add other OpenMP declarative constructs currently skipped + [&](const auto &) {}, + }, + ompDecl.u); } void genFIR(const Fortran::parser::OpenMPDeclarativeConstruct &ompDecl) { mlir::OpBuilder::InsertPoint insertPt = builder->saveInsertionPoint(); + analyzeOpenMPDeclarative(ompDecl); genOpenMPDeclarativeConstruct(*this, getEval(), ompDecl); for (Fortran::lower::pft::Evaluation &e : getEval().getNestedEvaluations()) genFIR(e); @@ -4098,6 +4166,17 @@ /// Tuple of host associated variables mlir::Value hostAssocTuple; + + /// OpenMP Requires flags + mlir::omp::ClauseRequires ompRequiresFlags; + + /// OpenMP Default memory order for atomic operations, as defined by a + /// 'requires' directive + std::optional ompAtomicDefaultMemOrder; + + /// Whether an OpenMP target region or declare target function/subroutine + /// intended for device offloading has been detected + bool ompDeviceCodeFound = false; }; } // namespace diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -24,7 +24,9 @@ #include "flang/Semantics/tools.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/Dialect/SCF/IR/SCF.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include using namespace mlir; @@ -2239,6 +2241,111 @@ converter.bindSymbol(sym, symThreadprivateExv); } +/// Extract the list of function and variable symbols affected by the given +/// 'declare target' directive and return the intended device type for them. +static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo( + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, + SmallVectorImpl &symbols) { + // Gather the symbols + auto findFuncAndVarSyms = [&](const Fortran::parser::OmpObjectList &objList) { + for (const auto &ompObject : objList.v) { + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::parser::Designator &designator) { + if (const Fortran::parser::Name *name = + getDesignatorNameIfDataRef(designator)) { + symbols.push_back(*name->symbol); + } + }, + [&](const Fortran::parser::Name &name) { + symbols.push_back(*name.symbol); + }}, + ompObject.u); + } + }; + + // The default capture type + auto deviceType = Fortran::parser::OmpDeviceTypeClause::Type::Any; + const auto &spec{std::get( + declareTargetConstruct.t)}; + + if (const auto *objectList{ + Fortran::parser::Unwrap(spec.u)}) { + // Case: declare target(func, var1, var2) + findFuncAndVarSyms(*objectList); + } else if (const auto *clauseList{ + Fortran::parser::Unwrap( + spec.u)}) { + if (clauseList->v.empty()) { + // Case: declare target, implicit capture of function + symbols.push_back(eval.getOwningProcedure()->getSubprogramSymbol()); + } + + for (const auto &clause : clauseList->v) { + if (const auto *toClause{ + std::get_if(&clause.u)}) { + // Case: declare target to(func, var1, var2)... + findFuncAndVarSyms(toClause->v); + } else if (const auto *linkClause{ + std::get_if( + &clause.u)}) { + // Case: declare target link(var1, var2)... + TODO_NOLOC("the link clause is currently unsupported"); + } else if (const auto *deviceClause{ + std::get_if( + &clause.u)}) { + // Case: declare target ... device_type(any | host | nohost) + deviceType = deviceClause->v.v; + } + } + } + + switch (deviceType) { + case Fortran::parser::OmpDeviceTypeClause::Type::Any: + return mlir::omp::DeclareTargetDeviceType::any; + case Fortran::parser::OmpDeviceTypeClause::Type::Host: + return mlir::omp::DeclareTargetDeviceType::host; + case Fortran::parser::OmpDeviceTypeClause::Type::Nohost: + return mlir::omp::DeclareTargetDeviceType::nohost; + } +} + +void handleDeclareTarget(Fortran::lower::AbstractConverter &converter, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPDeclareTargetConstruct + &declareTargetConstruct) { + SmallVector symbols; + auto deviceType = getDeclareTargetInfo(eval, declareTargetConstruct, symbols); + + auto mod = converter.getFirOpBuilder().getModule(); + for (auto sym : symbols) { + auto *op = mod.lookupSymbol(converter.mangleName(sym)); + + // TODO: Remove this cast and TODO assert when global data and link are + // supported + mlir::func::FuncOp fOp = mlir::dyn_cast(op); + if (!fOp) + TODO(converter.getCurrentLocation(), + "only subroutines and functions are currently supported"); + + // The function already has a declare target applied to it, very + // likely through implicit capture (usage in another declare target + // function/subroutine). It should be marked as any if it has been + // assigned both host and nohost, else we skip, as there is no + // change + if (mlir::omp::OpenMPDialect::isDeclareTarget(fOp)) { + if (mlir::omp::OpenMPDialect::getDeclareTargetDeviceType(fOp) != + deviceType) + mlir::omp::OpenMPDialect::setDeclareTarget( + op, mlir::omp::DeclareTargetDeviceType::any); + continue; + } + + mlir::omp::OpenMPDialect::setDeclareTarget(op, deviceType); + } +} + void Fortran::lower::genOpenMPDeclarativeConstruct( Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, @@ -2261,12 +2368,15 @@ }, [&](const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) { - TODO(converter.getCurrentLocation(), - "OpenMPDeclareTargetConstruct"); + handleDeclareTarget(converter, eval, declareTargetConstruct); }, [&](const Fortran::parser::OpenMPRequiresConstruct &requiresConstruct) { - TODO(converter.getCurrentLocation(), "OpenMPRequiresConstruct"); + // Requires directives are analyzed before any statements are + // lowered. Then, the result of combining the set of clauses of all + // requires directives present in the compilation unit is used to + // emit code, so no code is emitted independently for each + // "requires" instance. }, [&](const Fortran::parser::OpenMPThreadprivate &threadprivate) { // The directive is lowered when instantiating the variable to @@ -2476,3 +2586,93 @@ } } } + +std::optional +Fortran::lower::getOpenMPDeclareTargetFunctionDevice( + Fortran::lower::AbstractConverter &converter, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPDeclareTargetConstruct + &declareTargetConstruct) { + SmallVector symbols; + auto deviceType = getDeclareTargetInfo(eval, declareTargetConstruct, symbols); + + // Return the device type only if at least one of the targets for the + // directive is a function or subroutine + auto mod = converter.getFirOpBuilder().getModule(); + for (auto sym : symbols) { + auto *op = mod.lookupSymbol(converter.mangleName(sym)); + + if (mlir::isa(op)) + return deviceType; + } + + return std::nullopt; +} + +std::optional +Fortran::lower::extractOpenMPRequiresClauses( + const Fortran::parser::OmpClauseList &clauseList, + omp::ClauseRequires &requiresFlags) { + std::optional atomicDefaultMemOrder; + requiresFlags = omp::ClauseRequires::none; + + for (const auto &clause : clauseList.v) { + if (const auto &atomicClause = + std::get_if( + &clause.u)) { + switch (atomicClause->v.v) { + case Fortran::parser::OmpAtomicDefaultMemOrderClause::Type::SeqCst: + atomicDefaultMemOrder = omp::ClauseMemoryOrderKind::Seq_cst; + break; + case Fortran::parser::OmpAtomicDefaultMemOrderClause::Type::AcqRel: + atomicDefaultMemOrder = omp::ClauseMemoryOrderKind::Acq_rel; + break; + case Fortran::parser::OmpAtomicDefaultMemOrderClause::Type::Relaxed: + atomicDefaultMemOrder = omp::ClauseMemoryOrderKind::Relaxed; + break; + } + } else if (std::get_if( + &clause.u)) { + requiresFlags = requiresFlags | omp::ClauseRequires::dynamic_allocators; + } else if (std::get_if( + &clause.u)) { + requiresFlags = requiresFlags | omp::ClauseRequires::reverse_offload; + } else if (std::get_if( + &clause.u)) { + requiresFlags = requiresFlags | omp::ClauseRequires::unified_address; + } else if (std::get_if( + &clause.u)) { + requiresFlags = + requiresFlags | omp::ClauseRequires::unified_shared_memory; + } + } + + return atomicDefaultMemOrder; +} + +bool Fortran::lower::isOpenMPTargetConstruct( + const Fortran::parser::OpenMPConstruct &omp) { + if (const auto *blockDir = + std::get_if(&omp.u)) { + const auto &beginBlockDir{ + std::get(blockDir->t)}; + const auto &beginDir{ + std::get(beginBlockDir.t)}; + + switch (beginDir.v) { + case llvm::omp::Directive::OMPD_target: + case llvm::omp::Directive::OMPD_target_parallel: + case llvm::omp::Directive::OMPD_target_parallel_do: + case llvm::omp::Directive::OMPD_target_parallel_do_simd: + case llvm::omp::Directive::OMPD_target_simd: + case llvm::omp::Directive::OMPD_target_teams: + case llvm::omp::Directive::OMPD_target_teams_distribute: + case llvm::omp::Directive::OMPD_target_teams_distribute_simd: + return true; + default: + break; + } + } + + return false; +} diff --git a/flang/lib/Semantics/CMakeLists.txt b/flang/lib/Semantics/CMakeLists.txt --- a/flang/lib/Semantics/CMakeLists.txt +++ b/flang/lib/Semantics/CMakeLists.txt @@ -28,6 +28,7 @@ data-to-inits.cpp definable.cpp expression.cpp + finalize-omp.cpp mod-file.cpp pointer-assignment.cpp program-tree.cpp diff --git a/flang/lib/Semantics/finalize-omp.h b/flang/lib/Semantics/finalize-omp.h new file mode 100644 --- /dev/null +++ b/flang/lib/Semantics/finalize-omp.h @@ -0,0 +1,21 @@ +//===-- lib/Semantics/finalize-omp.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_SEMANTICS_FINALIZE_OMP_H_ +#define FORTRAN_SEMANTICS_FINALIZE_OMP_H_ + +namespace Fortran::parser { +struct Program; +} // namespace Fortran::parser + +namespace Fortran::semantics { +class SemanticsContext; +bool FinalizeOMP(SemanticsContext &context, parser::Program &program); +} // namespace Fortran::semantics + +#endif // FORTRAN_SEMANTICS_FINALIZE_OMP_H_ diff --git a/flang/lib/Semantics/finalize-omp.cpp b/flang/lib/Semantics/finalize-omp.cpp new file mode 100644 --- /dev/null +++ b/flang/lib/Semantics/finalize-omp.cpp @@ -0,0 +1,141 @@ +#include "finalize-omp.h" +#include "flang/Parser/parse-tree-visitor.h" +#include "flang/Semantics/tools.h" + +#include +#include +#include + +namespace Fortran::semantics { + +using namespace parser::literals; + +class GatherCallRefs { +public: + GatherCallRefs() {} + + // Default action for a parse tree node is to visit children. + template bool Pre(T &) { return true; } + template void Post(T &) {} + + void Post(parser::Call &call) { + if (std::holds_alternative(std::get<0>(call.t).u)) + callNames.push_back(std::get(std::get<0>(call.t).u)); + } + + std::list callNames; +}; + +class ImplicitDeclareTargetCapture { +public: + template bool Pre(T &) { return true; } + template void Post(T &) {} + ImplicitDeclareTargetCapture(SemanticsContext &context) + : messages_{context.messages()} {} + + // Related to rewriting declare target specifiers to + // contain functions nested within the primary declare + // target function. + void Post(parser::OpenMPDeclareTargetConstruct &x) { + auto &spec{std::get(x.t)}; + if (parser::OmpObjectList * + objectList{parser::Unwrap(spec.u)}) { + markDeclTarForEachProgramInList(programUnits_, *objectList); + } else if (auto *clauseList{ + parser::Unwrap(spec.u)}) { + for (auto &clause : clauseList->v) { + if (auto *toClause{std::get_if(&clause.u)}) { + markDeclTarForEachProgramInList(programUnits_, toClause->v); + } else if (auto *linkClause{ + std::get_if(&clause.u)}) { + markDeclTarForEachProgramInList(programUnits_, linkClause->v); + } + } + + // The default "declare target" inside of a function case, we must + // create and generate an to extended-list, containing at minimum the + // current function + if (clauseList->v.empty()) { + if (auto *name = getNameFromProgramUnit(*currentProgramUnit_)) { + std::list list; + list.push_back(parser::OmpObject{ + parser::Designator{parser::DataRef{std::move(*name)}}}); + auto objList = parser::OmpObjectList{std::move(list)}; + markDeclTarForEachProgramInList(programUnits_, objList); + clauseList->v.push_back(parser::OmpClause::To{std::move(objList)}); + } + } + } + } + + bool Pre(parser::ProgramUnit &x) { + currentProgramUnit_ = &x; + if (auto *name = getNameFromProgramUnit(x)) + programUnits_[name->ToString()] = &x; + return true; + } + + parser::Name *getNameFromProgramUnit(parser::ProgramUnit &x) { + if (auto *func{parser::Unwrap(x.u)}) { + parser::FunctionStmt &Stmt = std::get<0>(func->t).statement; + return &std::get(Stmt.t); + } else if (auto *subr{parser::Unwrap(x.u)}) { + parser::SubroutineStmt &Stmt = std::get<0>(subr->t).statement; + return &std::get(Stmt.t); + } + return nullptr; + } + + void markDeclTarForEachProgramInList( + std::map programUnits, + parser::OmpObjectList &objList) { + auto existsInList = [](parser::OmpObjectList &objList, parser::Name name) { + for (auto &ompObject : objList.v) + if (auto *objName{parser::Unwrap(ompObject)}) + if (objName->ToString() == name.ToString()) + return true; + return false; + }; + + GatherCallRefs gatherer{}; + for (auto &ompObject : objList.v) { + if (auto *name{parser::Unwrap(ompObject)}) { + auto programUnit = programUnits.find(name->ToString()); + // something other than a subroutine or function, skip it + if (programUnit == programUnits.end()) + continue; + + parser::Walk(*programUnit->second, gatherer); + + // Currently using the Function Name rather than the CallRef name, + // unsure if these are interchangeable. However, ideally functions + // and subroutines should probably be parser::PorcedureDesignator's + // rather than parser::Designator's, but regular designators seem + // to be all that is utilised in the PFT definition for OmpObjects. + for (auto v : gatherer.callNames) { + if (!existsInList(objList, v)) { + objList.v.push_back(parser::OmpObject{parser::Designator{ + parser::DataRef{std::move(*getNameFromProgramUnit( + *programUnits.find(v.ToString())->second))}}}); + } + } + + gatherer.callNames.clear(); + } + } + } + +private: + std::map programUnits_; + parser::ProgramUnit *currentProgramUnit_ = nullptr; + + parser::Messages &messages_; +}; + +bool FinalizeOMP(SemanticsContext &context, parser::Program &program) { + ImplicitDeclareTargetCapture impCap{context}; + Walk(program, impCap); + return !context.AnyFatalError(); +} + +} // namespace Fortran::semantics diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -31,6 +31,7 @@ #include "check-select-type.h" #include "check-stop.h" #include "compute-offsets.h" +#include "finalize-omp.h" #include "mod-file.h" #include "resolve-labels.h" #include "resolve-names.h" @@ -170,6 +171,7 @@ ResolveNames(context, program, context.globalScope()); RewriteParseTree(context, program); ComputeOffsets(context, context.globalScope()); + FinalizeOMP(context, program); CheckDeclarations(context); StatementSemanticsPass1{context}.Walk(program); StatementSemanticsPass2 pass2{context}; diff --git a/flang/test/Lower/OpenMP/Todo/omp-declare-target.f90 b/flang/test/Lower/OpenMP/Todo/omp-declare-target.f90 deleted file mode 100644 --- a/flang/test/Lower/OpenMP/Todo/omp-declare-target.f90 +++ /dev/null @@ -1,12 +0,0 @@ -! This test checks lowering of OpenMP declare target Directive. - -// RUN: not flang-new -fc1 -emit-fir -fopenmp %s 2>&1 | FileCheck %s - -module mod1 -contains - subroutine sub() - integer :: x, y - // CHECK: not yet implemented: OpenMPDeclareTargetConstruct - !$omp declare target - end -end module diff --git a/flang/test/Lower/OpenMP/omp-declare-target.f90 b/flang/test/Lower/OpenMP/omp-declare-target.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-declare-target.f90 @@ -0,0 +1,262 @@ +!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s + +! Check specification valid forms of declare target with functions +! utilising device_type and to clauses as well as the default +! zero clause declare target + +! CHECK-LABEL: func.func @_QPfunc_t_device() +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION FUNC_T_DEVICE() RESULT(I) +!$omp declare target to(FUNC_T_DEVICE) device_type(nohost) + INTEGER :: I + I = 1 +END FUNCTION FUNC_T_DEVICE + +! CHECK-LABEL: func.func @_QPfunc_t_host() +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION FUNC_T_HOST() RESULT(I) +!$omp declare target to(FUNC_T_HOST) device_type(host) + INTEGER :: I + I = 1 +END FUNCTION FUNC_T_HOST + +! CHECK-LABEL: func.func @_QPfunc_t_any() +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION FUNC_T_ANY() RESULT(I) +!$omp declare target to(FUNC_T_ANY) device_type(any) + INTEGER :: I + I = 1 +END FUNCTION FUNC_T_ANY + +! CHECK-LABEL: func.func @_QPfunc_default_t_any() +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION FUNC_DEFAULT_T_ANY() RESULT(I) +!$omp declare target to(FUNC_DEFAULT_T_ANY) + INTEGER :: I + I = 1 +END FUNCTION FUNC_DEFAULT_T_ANY + +! CHECK-LABEL: func.func @_QPfunc_default_any() +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION FUNC_DEFAULT_ANY() RESULT(I) +!$omp declare target + INTEGER :: I + I = 1 +END FUNCTION FUNC_DEFAULT_ANY + +! CHECK-LABEL: func.func @_QPfunc_default_extendedlist() +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION FUNC_DEFAULT_EXTENDEDLIST() RESULT(I) +!$omp declare target(FUNC_DEFAULT_EXTENDEDLIST) + INTEGER :: I + I = 1 +END FUNCTION FUNC_DEFAULT_EXTENDEDLIST + +! CHECK-LABEL: func.func @_QPexist_on_both() +! CHECK-NOT: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION EXIST_ON_BOTH() RESULT(I) + INTEGER :: I + I = 1 +END FUNCTION EXIST_ON_BOTH + +!! ----- + +! Check specification valid forms of declare target with subroutines +! utilising device_type and to clauses as well as the default +! zero clause declare target + +! CHECK-LABEL: func.func @_QPsubr_t_device() +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +SUBROUTINE SUBR_T_DEVICE() +!$omp declare target to(SUBR_T_DEVICE) device_type(nohost) +END + +! CHECK-LABEL: func.func @_QPsubr_t_host() +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +SUBROUTINE SUBR_T_HOST() +!$omp declare target to(SUBR_T_HOST) device_type(host) +END + +! CHECK-LABEL: func.func @_QPsubr_t_any() +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +SUBROUTINE SUBR_T_ANY() +!$omp declare target to(SUBR_T_ANY) device_type(any) +END + +! CHECK-LABEL: func.func @_QPsubr_default_t_any() +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +SUBROUTINE SUBR_DEFAULT_T_ANY() +!$omp declare target to(SUBR_DEFAULT_T_ANY) +END + +! CHECK-LABEL: func.func @_QPsubr_default_any() +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +SUBROUTINE SUBR_DEFAULT_ANY() +!$omp declare target +END + +! CHECK-LABEL: func.func @_QPsubr_default_extendedlist() +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +SUBROUTINE SUBR_DEFAULT_EXTENDEDLIST() +!$omp declare target(SUBR_DEFAULT_EXTENDEDLIST) +END + +! CHECK-LABEL: func.func @_QPsubr_exist_on_both() +! CHECK-NOT: {{.*}}attributes {omp.declare_target = #omp{{.*}} +SUBROUTINE SUBR_EXIST_ON_BOTH() +END + +!! ----- + +! Check declare target inconjunction with implicitly +! invoked functions, this tests the declare target +! implicit capture pass within Flang. Functions +! invoked within an explicitly declare target function +! are marked as declare target with the callers +! device_type clause + +! CHECK-LABEL: func.func @_QPimplicitly_captured +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION IMPLICITLY_CAPTURED(TOGGLE) RESULT(K) + INTEGER :: I, J, K + LOGICAL :: TOGGLE + I = 10 + J = 5 + IF (TOGGLE) THEN + K = I + ELSE + K = J + END IF +END FUNCTION IMPLICITLY_CAPTURED + + +! CHECK-LABEL: func.func @_QPtarget_function +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION TARGET_FUNCTION(TOGGLE) RESULT(I) +!$omp declare target + INTEGER :: I + LOGICAL :: TOGGLE + I = IMPLICITLY_CAPTURED(TOGGLE) +END FUNCTION TARGET_FUNCTION + +!! ----- + +! Check declare target inconjunction with implicitly +! invoked functions, this tests the declare target +! implicit capture pass within Flang. Functions +! invoked within an explicitly declare target function +! are marked as declare target with the callers +! device_type clause, however, if they are found with +! distinct device_type clauses i.e. host and nohost, +! then they should be marked as any + +! CHECK-LABEL: func.func @_QPimplicitly_captured_twice +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION IMPLICITLY_CAPTURED_TWICE() RESULT(K) + INTEGER :: I + I = 10 + K = I +END FUNCTION IMPLICITLY_CAPTURED_TWICE + +! CHECK-LABEL: func.func @_QPtarget_function_twice_host +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION TARGET_FUNCTION_TWICE_HOST() RESULT(I) +!$omp declare target to(TARGET_FUNCTION_TWICE_HOST) device_type(host) + INTEGER :: I + I = IMPLICITLY_CAPTURED_TWICE() +END FUNCTION TARGET_FUNCTION_TWICE_HOST + +! CHECK-LABEL: func.func @_QPtarget_function_twice_device +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION TARGET_FUNCTION_TWICE_DEVICE() RESULT(I) +!$omp declare target to(TARGET_FUNCTION_TWICE_DEVICE) device_type(nohost) + INTEGER :: I + I = IMPLICITLY_CAPTURED_TWICE() +END FUNCTION TARGET_FUNCTION_TWICE_DEVICE + +!! ----- + +! Check declare target inconjunction with implicitly +! invoked functions, this tests the declare target +! implicit capture pass within Flang. A slightly more +! complex test checking functions are marked implicitly +! appropriately. + +! CHECK-LABEL: func.func @_QPimplicitly_captured_nest +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION IMPLICITLY_CAPTURED_NEST() RESULT(K) + INTEGER :: I + I = 10 + K = I +END FUNCTION IMPLICITLY_CAPTURED_NEST + +! CHECK-LABEL: func.func @_QPimplicitly_captured_one +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION IMPLICITLY_CAPTURED_ONE() RESULT(K) + K = IMPLICITLY_CAPTURED_NEST() +END FUNCTION IMPLICITLY_CAPTURED_ONE + +! CHECK-LABEL: func.func @_QPimplicitly_captured_two +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION IMPLICITLY_CAPTURED_TWO() RESULT(K) + INTEGER :: I + I = 10 + K = I +END FUNCTION IMPLICITLY_CAPTURED_TWO + +! CHECK-LABEL: func.func @_QPtarget_function_test +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION TARGET_FUNCTION_TEST() RESULT(J) +!$omp declare target to(TARGET_FUNCTION_TEST) device_type(nohost) + INTEGER :: I, J + I = IMPLICITLY_CAPTURED_ONE() + J = IMPLICITLY_CAPTURED_TWO() + I +END FUNCTION TARGET_FUNCTION_TEST + +!! ----- + +! Check declare target inconjunction with implicitly +! invoked functions, this tests the declare target +! implicit capture pass within Flang. A slightly more +! complex test checking functions are marked implicitly +! appropriately. + +! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION IMPLICITLY_CAPTURED_NEST_TWICE() RESULT(K) + INTEGER :: I + I = 10 + K = I +END FUNCTION IMPLICITLY_CAPTURED_NEST_TWICE + +! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION IMPLICITLY_CAPTURED_ONE_TWICE() RESULT(K) + K = IMPLICITLY_CAPTURED_NEST_TWICE() +END FUNCTION IMPLICITLY_CAPTURED_ONE_TWICE + +! CHECK-LABEL: func.func @_QPimplicitly_captured_two_twice +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION IMPLICITLY_CAPTURED_TWO_TWICE() RESULT(K) + INTEGER :: I + I = 10 + K = I +END FUNCTION IMPLICITLY_CAPTURED_TWO_TWICE + +! CHECK-LABEL: func.func @_QPtarget_function_test_device +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION TARGET_FUNCTION_TEST_DEVICE() RESULT(J) + !$omp declare target to(TARGET_FUNCTION_TEST_DEVICE) device_type(nohost) + INTEGER :: I, J + I = IMPLICITLY_CAPTURED_ONE_TWICE() + J = IMPLICITLY_CAPTURED_TWO_TWICE() + I +END FUNCTION TARGET_FUNCTION_TEST_DEVICE + +! CHECK-LABEL: func.func @_QPtarget_function_test_host +! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp{{.*}} +FUNCTION TARGET_FUNCTION_TEST_HOST() RESULT(J) + !$omp declare target to(TARGET_FUNCTION_TEST_HOST) device_type(host) + INTEGER :: I, J + I = IMPLICITLY_CAPTURED_ONE_TWICE() + J = IMPLICITLY_CAPTURED_TWO_TWICE() + I +END FUNCTION TARGET_FUNCTION_TEST_HOST diff --git a/flang/test/Lower/OpenMP/requires-error.f90 b/flang/test/Lower/OpenMP/requires-error.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenMP/requires-error.f90 @@ -0,0 +1,15 @@ +! RUN: not %flang_fc1 -emit-fir -fopenmp %s -o - 2>&1 | FileCheck %s + +! This test checks that requires lowering into MLIR skips creating the +! omp.requires attribute with target-related clauses if there are no device +! functions in the compilation unit + +!CHECK: error: {{.*}} conflicting atomic_default_mem_order clause found: +!CHECK-SAME: acq_rel != seq_cst +program requires + !$omp requires atomic_default_mem_order(seq_cst) +end program requires + +subroutine f() + !$omp requires atomic_default_mem_order(acq_rel) +end subroutine f diff --git a/flang/test/Lower/OpenMP/requires-notarget.f90 b/flang/test/Lower/OpenMP/requires-notarget.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenMP/requires-notarget.f90 @@ -0,0 +1,12 @@ +! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s + +! This test checks that requires lowering into MLIR skips creating the +! omp.requires attribute with target-related clauses if there are no device +! functions in the compilation unit + +!CHECK: module attributes { +!CHECK-SAME: omp.atomic_default_mem_order = #omp +!CHECK-NOT: omp.requires +program requires + !$omp requires unified_shared_memory reverse_offload atomic_default_mem_order(seq_cst) +end program requires diff --git a/flang/test/Lower/OpenMP/requires.f90 b/flang/test/Lower/OpenMP/requires.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenMP/requires.f90 @@ -0,0 +1,14 @@ +! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s + +! This test checks the lowering of requires into MLIR + +!CHECK: module attributes { +!CHECK-SAME: omp.atomic_default_mem_order = #omp +!CHECK-SAME: omp.requires = #omp +program requires + !$omp requires unified_shared_memory reverse_offload atomic_default_mem_order(seq_cst) +end program requires + +subroutine f + !$omp declare target +end subroutine f diff --git a/flang/test/Semantics/OpenMP/declare-target-implicit-capture-rewrite.f90 b/flang/test/Semantics/OpenMP/declare-target-implicit-capture-rewrite.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/OpenMP/declare-target-implicit-capture-rewrite.f90 @@ -0,0 +1,191 @@ +! RUN: %flang_fc1 -fopenmp -fdebug-dump-parse-tree %s 2>&1 | FileCheck %s +! +! Ensure that functions and subroutines referenced within +! declare target functions are themselves made declare target +! as specified by more recent iterations of the OpenMP +! specification. This is done through a semantic pass which +! appends the implicitly captured functions to the original +! declare target declaration rather than generating and +! inserting new ones within the captured functions. +! +! For example a declare target inside of a function named 'ORIGINAL', +! would initially be empty, after the pass, the declare target +! would be expanded to declare target to(ORIGINAL). If +! there is a function named 'CAPTURED' called within 'ORIGINAL' +! the declare target inside of 'ORIGINAL' would be further +! expanded to declare target to(ORIGINAL, CAPTURED) + +FUNCTION IMPLICITLY_CAPTURED_NEST_TWICE() RESULT(I) + INTEGER :: I + I = 10 +END FUNCTION IMPLICITLY_CAPTURED_NEST_TWICE + +FUNCTION IMPLICITLY_CAPTURED_ONE_TWICE() RESULT(K) + K = IMPLICITLY_CAPTURED_NEST_TWICE() +END FUNCTION IMPLICITLY_CAPTURED_ONE_TWICE + +FUNCTION IMPLICITLY_CAPTURED_TWO_TWICE() RESULT(Y) + INTEGER :: Y + Y = 5 +END FUNCTION IMPLICITLY_CAPTURED_TWO_TWICE + +! CHECK: SpecificationPart +! CHECK: OpenMPDeclarativeConstruct -> OpenMPDeclareTargetConstruct +! CHECK: Verbatim +! CHECK: OmpDeclareTargetSpecifier -> OmpDeclareTargetWithClause -> OmpClauseList -> OmpClause -> To -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'target_function_test_device' +! CHECK: OmpObject -> Designator -> DataRef -> Name = 'implicitly_captured_one_twice' +! CHECK: OmpObject -> Designator -> DataRef -> Name = 'implicitly_captured_two_twice' +! CHECK: OmpObject -> Designator -> DataRef -> Name = 'implicitly_captured_nest_twice' +! CHECK: OmpClause -> DeviceType -> OmpDeviceTypeClause -> Type = Nohost +FUNCTION TARGET_FUNCTION_TEST_DEVICE() RESULT(J) +!$omp declare target to(TARGET_FUNCTION_TEST_DEVICE) device_type(nohost) + INTEGER :: I, J + I = IMPLICITLY_CAPTURED_ONE_TWICE() + J = IMPLICITLY_CAPTURED_TWO_TWICE() + I +END FUNCTION TARGET_FUNCTION_TEST_DEVICE + +! CHECK: SpecificationPart +! CHECK: OpenMPDeclarativeConstruct -> OpenMPDeclareTargetConstruct +! CHECK: Verbatim +! CHECK: OmpDeclareTargetSpecifier -> OmpDeclareTargetWithClause -> OmpClauseList -> OmpClause -> To -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'target_function_test_host' +! CHECK: OmpObject -> Designator -> DataRef -> Name = 'implicitly_captured_one_twice' +! CHECK: OmpObject -> Designator -> DataRef -> Name = 'implicitly_captured_two_twice' +! CHECK: OmpObject -> Designator -> DataRef -> Name = 'implicitly_captured_nest_twice' +! CHECK: OmpClause -> DeviceType -> OmpDeviceTypeClause -> Type = Host +FUNCTION TARGET_FUNCTION_TEST_HOST() RESULT(J) +!$omp declare target to(TARGET_FUNCTION_TEST_HOST) device_type(host) + INTEGER :: I, J + I = IMPLICITLY_CAPTURED_ONE_TWICE() + J = IMPLICITLY_CAPTURED_TWO_TWICE() + I +END FUNCTION TARGET_FUNCTION_TEST_HOST + +!! ----- + +! CHECK: SpecificationPart +! CHECK: OpenMPDeclarativeConstruct -> OpenMPDeclareTargetConstruct +! CHECK: Verbatim +! CHECK: OmpDeclareTargetSpecifier -> OmpDeclareTargetWithClause -> OmpClauseList -> OmpClause -> To -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'func_t_device' +! CHECK: OmpClause -> DeviceType -> OmpDeviceTypeClause -> Type = Nohost +FUNCTION FUNC_T_DEVICE() RESULT(I) +!$omp declare target to(FUNC_T_DEVICE) device_type(nohost) + INTEGER :: I + I = 1 +END FUNCTION FUNC_T_DEVICE + +!! ----- + +! CHECK: SpecificationPart +! CHECK: OpenMPDeclarativeConstruct -> OpenMPDeclareTargetConstruct +! CHECK: Verbatim +! CHECK: OmpDeclareTargetSpecifier -> OmpDeclareTargetWithClause -> OmpClauseList -> OmpClause -> To -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'subr_t_any' +! CHECK: OmpClause -> DeviceType -> OmpDeviceTypeClause -> Type = Any +SUBROUTINE SUBR_T_ANY() +!$omp declare target to(SUBR_T_ANY) device_type(any) +END + +!! ----- + +! CHECK: SpecificationPart +! CHECK: OpenMPDeclarativeConstruct -> OpenMPDeclareTargetConstruct +! CHECK: Verbatim +! CHECK: OmpDeclareTargetSpecifier -> OmpDeclareTargetWithList -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'subr_default_extendedlist' +SUBROUTINE SUBR_DEFAULT_EXTENDEDLIST() +!$omp declare target(SUBR_DEFAULT_EXTENDEDLIST) +END + +!! ----- + +! CHECK: SpecificationPart +! CHECK: OpenMPDeclarativeConstruct -> OpenMPDeclareTargetConstruct +! CHECK: Verbatim +! CHECK: OmpDeclareTargetSpecifier -> OmpDeclareTargetWithClause -> OmpClauseList -> OmpClause -> To -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'subr_unspecified' +SUBROUTINE SUBR_UNSPECIFIED() +!$omp declare target +END + +!! ----- + +FUNCTION UNSPECIFIED_CAPTURE() RESULT(K) + REAL :: K + K = 1 +END FUNCTION UNSPECIFIED_CAPTURE + +! CHECK: SpecificationPart +! CHECK: OpenMPDeclarativeConstruct -> OpenMPDeclareTargetConstruct +! CHECK: Verbatim +! CHECK: OmpDeclareTargetSpecifier -> OmpDeclareTargetWithClause -> OmpClauseList -> OmpClause -> To -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'subr_unspecified_capture' +! CHECK: OmpObject -> Designator -> DataRef -> Name = 'unspecified_capture' +SUBROUTINE SUBR_UNSPECIFIED_CAPTURE() +!$omp declare target + REAL :: I + I = UNSPECIFIED_CAPTURE() +END + +!!!! ----- + +FUNCTION IMPLICITLY_CAPTURED_NEST() RESULT(K) + INTEGER :: I + I = 10 + K = I +END FUNCTION IMPLICITLY_CAPTURED_NEST + +FUNCTION IMPLICITLY_CAPTURED_ONE() RESULT(K) + K = IMPLICITLY_CAPTURED_NEST() +END FUNCTION IMPLICITLY_CAPTURED_ONE + +FUNCTION IMPLICITLY_CAPTURED_TWO() RESULT(K) + INTEGER :: I + I = 10 + K = I +END FUNCTION IMPLICITLY_CAPTURED_TWO + +! CHECK: SpecificationPart +! CHECK: OpenMPDeclarativeConstruct -> OpenMPDeclareTargetConstruct +! CHECK: Verbatim +! CHECK: OmpDeclareTargetSpecifier -> OmpDeclareTargetWithClause -> OmpClauseList -> OmpClause -> To -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'target_function_test' +! CHECK: OmpObject -> Designator -> DataRef -> Name = 'implicitly_captured_one' +! CHECK: OmpObject -> Designator -> DataRef -> Name = 'implicitly_captured_two' +! CHECK: OmpObject -> Designator -> DataRef -> Name = 'implicitly_captured_nest' +! CHECK: OmpClause -> DeviceType -> OmpDeviceTypeClause -> Type = Nohost +FUNCTION TARGET_FUNCTION_TEST() RESULT(J) +!$omp declare target to(TARGET_FUNCTION_TEST) device_type(nohost) + INTEGER :: I, J + I = IMPLICITLY_CAPTURED_ONE() + J = IMPLICITLY_CAPTURED_TWO() + I +END FUNCTION TARGET_FUNCTION_TEST + +!!!! ----- + +FUNCTION NO_DECLARE_TARGET() RESULT(K) + implicit none + REAL :: I, K + I = 10.0 + K = I +END FUNCTION NO_DECLARE_TARGET + +! CHECK: SpecificationPart +! CHECK: OpenMPDeclarativeConstruct -> OpenMPDeclareTargetConstruct +! CHECK: Verbatim +! CHECK: OmpDeclareTargetSpecifier -> OmpDeclareTargetWithClause -> OmpClauseList -> OmpClause -> To -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'declare_target_two' +! CHECK: OmpObject -> Designator -> DataRef -> Name = 'no_declare_target' +FUNCTION DECLARE_TARGET_TWO() RESULT(J) +!$omp declare target to(DECLARE_TARGET_TWO) + implicit none + REAL :: I, J + I = NO_DECLARE_TARGET() + J = I +END FUNCTION DECLARE_TARGET_TWO + +! CHECK: SpecificationPart +! CHECK: OpenMPDeclarativeConstruct -> OpenMPDeclareTargetConstruct +! CHECK: Verbatim +! CHECK: OmpDeclareTargetSpecifier -> OmpDeclareTargetWithClause -> OmpClauseList -> OmpClause -> To -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'declare_target_one' +! CHECK: OmpObject -> Designator -> DataRef -> Name = 'declare_target_two' +! CHECK: OmpObject -> Designator -> DataRef -> Name = 'no_declare_target' +FUNCTION DECLARE_TARGET_ONE() RESULT(I) +!$omp declare target to(DECLARE_TARGET_ONE) + implicit none + REAL :: K, I + I = DECLARE_TARGET_TWO() + K = I +END FUNCTION DECLARE_TARGET_ONE diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -81,6 +81,10 @@ /// is not necessary at all, because because the only functions that are called /// are ones that are not dependent on the configuration. class OpenMPIRBuilderConfig { +private: + /// Flags for specifying which requires directive clauses are present. + int64_t RequiresFlags; + public: /// Flag for specifying if the compilation is done for embedded device code /// or host code. @@ -90,10 +94,6 @@ /// like GPU. std::optional IsTargetCodegen; - /// Flag for specifying weather a requires unified_shared_memory - /// directive is present or not. - std::optional HasRequiresUnifiedSharedMemory; - // Flag for specifying if offloading is mandatory. std::optional OpenMPOffloadMandatory; @@ -102,13 +102,13 @@ /// Separator used between all of the rest consecutive parts of s name std::optional Separator; - OpenMPIRBuilderConfig() {} + OpenMPIRBuilderConfig(); OpenMPIRBuilderConfig(bool IsEmbedded, bool IsTargetCodegen, + bool OpenMPOffloadMandatory, + bool HasRequiresReverseOffload, + bool HasRequiresUnifiedAddress, bool HasRequiresUnifiedSharedMemory, - bool OpenMPOffloadMandatory) - : IsEmbedded(IsEmbedded), IsTargetCodegen(IsTargetCodegen), - HasRequiresUnifiedSharedMemory(HasRequiresUnifiedSharedMemory), - OpenMPOffloadMandatory(OpenMPOffloadMandatory) {} + bool HasRequiresDynamicAllocators); // Getters functions that assert if the required values are not present. bool isEmbedded() const { @@ -121,17 +121,22 @@ return *IsTargetCodegen; } - bool hasRequiresUnifiedSharedMemory() const { - assert(HasRequiresUnifiedSharedMemory.has_value() && - "HasUnifiedSharedMemory is not set"); - return *HasRequiresUnifiedSharedMemory; - } - bool openMPOffloadMandatory() const { assert(OpenMPOffloadMandatory.has_value() && "OpenMPOffloadMandatory is not set"); return *OpenMPOffloadMandatory; } + + bool hasRequiresFlags() const { return RequiresFlags; } + bool hasRequiresReverseOffload() const; + bool hasRequiresUnifiedAddress() const; + bool hasRequiresUnifiedSharedMemory() const; + bool hasRequiresDynamicAllocators() const; + + /// Returns requires directive clauses as flags compatible with those expected + /// by libomptarget. + int64_t getRequiresFlags() const; + // Returns the FirstSeparator if set, otherwise use the default // separator depending on isTargetCodegen StringRef firstSeparator() const { @@ -154,11 +159,13 @@ void setIsEmbedded(bool Value) { IsEmbedded = Value; } void setIsTargetCodegen(bool Value) { IsTargetCodegen = Value; } - void setHasRequiresUnifiedSharedMemory(bool Value) { - HasRequiresUnifiedSharedMemory = Value; - } void setFirstSeparator(StringRef FS) { FirstSeparator = FS; } void setSeparator(StringRef S) { Separator = S; } + + void setHasRequiresReverseOffload(bool Value); + void setHasRequiresUnifiedAddress(bool Value); + void setHasRequiresUnifiedSharedMemory(bool Value); + void setHasRequiresDynamicAllocators(bool Value); }; /// Data structure to contain the information needed to uniquely identify @@ -2174,6 +2181,16 @@ /// \param Name Name of the variable. GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace = 0); + + /// Create a global function to register OpenMP requires flags into the + /// runtime, according to the `Config`. + /// + /// This function should be added to the list of constructors of the + /// compilation unit in order to be called before other OpenMP runtime + /// functions. + /// + /// \param Name Name of the created function. + Function *createRegisterRequires(StringRef Name); }; /// Class to represented the control flow structure of an OpenMP canonical loop. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -21,10 +21,12 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" @@ -328,6 +330,104 @@ return splitBB(Builder, CreateBranch, Old->getName() + Suffix); } +//===----------------------------------------------------------------------===// +// OpenMPIRBuilderConfig +//===----------------------------------------------------------------------===// + +namespace { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); +/// Values for bit flags for marking which requires clauses have been used. +enum OpenMPOffloadingRequiresDirFlags { + /// flag undefined. + OMP_REQ_UNDEFINED = 0x000, + /// no requires directive present. + OMP_REQ_NONE = 0x001, + /// reverse_offload clause. + OMP_REQ_REVERSE_OFFLOAD = 0x002, + /// unified_address clause. + OMP_REQ_UNIFIED_ADDRESS = 0x004, + /// unified_shared_memory clause. + OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, + /// dynamic_allocators clause. + OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) +}; + +} // anonymous namespace + +OpenMPIRBuilderConfig::OpenMPIRBuilderConfig() + : RequiresFlags(OMP_REQ_UNDEFINED) {} + +OpenMPIRBuilderConfig::OpenMPIRBuilderConfig( + bool IsEmbedded, bool IsTargetCodegen, bool OpenMPOffloadMandatory, + bool HasRequiresReverseOffload, bool HasRequiresUnifiedAddress, + bool HasRequiresUnifiedSharedMemory, bool HasRequiresDynamicAllocators) + : RequiresFlags(OMP_REQ_UNDEFINED), IsEmbedded(IsEmbedded), + IsTargetCodegen(IsTargetCodegen), + OpenMPOffloadMandatory(OpenMPOffloadMandatory) { + if (HasRequiresReverseOffload) + RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD; + if (HasRequiresUnifiedAddress) + RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS; + if (HasRequiresUnifiedSharedMemory) + RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY; + if (HasRequiresDynamicAllocators) + RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS; +} + +bool OpenMPIRBuilderConfig::hasRequiresReverseOffload() const { + return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD; +} + +bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress() const { + return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS; +} + +bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory() const { + return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY; +} + +bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators() const { + return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS; +} + +int64_t OpenMPIRBuilderConfig::getRequiresFlags() const { + return hasRequiresFlags() ? RequiresFlags + : static_cast(OMP_REQ_NONE); +} + +void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(bool Value) { + if (Value) + RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD; + else + RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD; +} + +void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(bool Value) { + if (Value) + RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS; + else + RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS; +} + +void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(bool Value) { + if (Value) + RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY; + else + RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY; +} + +void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(bool Value) { + if (Value) + RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS; + else + RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS; +} + +//===----------------------------------------------------------------------===// +// OpenMPIRBuilder +//===----------------------------------------------------------------------===// + void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { LLVMContext &Ctx = Fn.getContext(); Triple T(M.getTargetTriple()); @@ -5106,6 +5206,39 @@ } } +Function *OpenMPIRBuilder::createRegisterRequires(StringRef Name) { + // Skip the creation of the registration function if this is device codegen + if (Config.isEmbedded()) + return nullptr; + + Builder.ClearInsertionPoint(); + + // Create registration function prototype + auto *RegFnTy = FunctionType::get(Builder.getVoidTy(), {}); + auto *RegFn = Function::Create( + RegFnTy, GlobalVariable::LinkageTypes::InternalLinkage, Name, M); + RegFn->setSection(".text.startup"); + RegFn->addFnAttr(Attribute::NoInline); + RegFn->addFnAttr(Attribute::NoUnwind); + + // Create registration function body + auto *BB = BasicBlock::Create(M.getContext(), "entry", RegFn); + ConstantInt *FlagsVal = + ConstantInt::getSigned(Builder.getInt64Ty(), Config.getRequiresFlags()); + Function *RTLRegFn = getOrCreateRuntimeFunctionPtr( + omp::RuntimeFunction::OMPRTL___tgt_register_requires); + + Builder.SetInsertPoint(BB); + Builder.CreateCall(RTLRegFn, {FlagsVal}); + Builder.CreateRetVoid(); + + return RegFn; +} + +//===----------------------------------------------------------------------===// +// OffloadEntriesInfoManager +//===----------------------------------------------------------------------===// + bool OffloadEntriesInfoManager::empty() const { return OffloadEntriesTargetRegion.empty() && OffloadEntriesDeviceGlobalVar.empty(); @@ -5239,6 +5372,10 @@ Action(E.getKey(), E.getValue()); } +//===----------------------------------------------------------------------===// +// CanonicalLoopInfo +//===----------------------------------------------------------------------===// + void CanonicalLoopInfo::collectControlBlocks( SmallVectorImpl &BBs) { // We only count those BBs as control block for which we do not need to diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -9,12 +9,14 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" #include "llvm/IR/Verifier.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/Casting.h" @@ -5695,7 +5697,8 @@ TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) { OpenMPIRBuilder OMPBuilder(*M); - OMPBuilder.setConfig(OpenMPIRBuilderConfig(true, false, false, false)); + OMPBuilder.setConfig( + OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); OffloadEntriesInfoManager &InfoManager = OMPBuilder.OffloadInfoManager; TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0); InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0); @@ -5710,4 +5713,44 @@ GlobalValue::WeakAnyLinkage); EXPECT_TRUE(InfoManager.hasDeviceGlobalVarEntryInfo("gvar")); } + +TEST_F(OpenMPIRBuilderTest, CreateRegisterRequires) { + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + + OMPBuilder.setConfig( + OpenMPIRBuilderConfig(/*IsEmbedded=*/false, + /*IsTargetCodegen=*/false, + /*OpenMPOffloadMandatory=*/false, + /*HasRequiresReverseOffload=*/true, + /*HasRequiresUnifiedAddress=*/false, + /*HasRequiresUnifiedSharedMemory=*/true, + /*HasRequiresDynamicAllocators=*/false)); + + auto FName = + OMPBuilder.createPlatformSpecificName({"omp_offloading", "requires_reg"}); + EXPECT_EQ(FName, ".omp_offloading.requires_reg"); + + Function *Fn = OMPBuilder.createRegisterRequires(FName); + EXPECT_NE(Fn, nullptr); + EXPECT_EQ(FName, Fn->getName()); + + EXPECT_EQ(Fn->getSection(), ".text.startup"); + EXPECT_TRUE(Fn->hasInternalLinkage()); + EXPECT_TRUE(Fn->hasFnAttribute(Attribute::NoInline)); + EXPECT_TRUE(Fn->hasFnAttribute(Attribute::NoUnwind)); + EXPECT_EQ(Fn->size(), 1u); + + BasicBlock *Entry = &Fn->getEntryBlock(); + EXPECT_FALSE(Entry->empty()); + EXPECT_EQ(Fn->getReturnType()->getTypeID(), Type::VoidTyID); + + CallInst *Call = &cast(*Entry->begin()); + EXPECT_EQ(Call->getCalledFunction()->getName(), "__tgt_register_requires"); + EXPECT_EQ(Call->getNumOperands(), 2u); + + Value *Flags = Call->getArgOperand(0); + EXPECT_EQ(cast(Flags)->getSExtValue(), + OMPBuilder.Config.getRequiresFlags()); +} } // namespace diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h @@ -21,6 +21,10 @@ #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/SideEffectInterfaces.h" +namespace mlir::omp { +enum class DeclareTargetDeviceType : uint32_t; +} // namespace mlir::omp + #include "mlir/Dialect/OpenMP/OpenMPOpsDialect.h.inc" #include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h.inc" #include "mlir/Dialect/OpenMP/OpenMPTypeInterfaces.h.inc" diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -28,6 +28,13 @@ let cppNamespace = "::mlir::omp"; let dependentDialects = ["::mlir::LLVM::LLVMDialect"]; let useDefaultAttributePrinterParser = 1; + + let extraClassDeclaration = [{ + // Helper functions for assigning a DeclareTargetDeviceType Attribute to functions + static void setDeclareTarget(Operation *func, mlir::omp::DeclareTargetDeviceType deviceType); + static bool isDeclareTarget(Operation *func); + static mlir::omp::DeclareTargetDeviceType getDeclareTargetDeviceType(Operation *func); + }]; } // OmpCommon requires definition of OpenACC_Dialect. @@ -87,6 +94,27 @@ def OpenMP_PointerLikeType : TypeAlias; +//===----------------------------------------------------------------------===// +// 2.12.7 Declare Target Directive +//===----------------------------------------------------------------------===// + +def DeviceTypeAny : I32EnumAttrCase<"any", 0>; +def DeviceTypeHost : I32EnumAttrCase<"host", 1>; +def DeviceTypeNoHost : I32EnumAttrCase<"nohost", 2>; + +def DeclareTargetDeviceType : I32EnumAttr< + "DeclareTargetDeviceType", + "device_type clause", + [DeviceTypeAny, DeviceTypeHost, DeviceTypeNoHost]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::omp"; +} + +def DeclareTargetDeviceTypeAttr : EnumAttr { + let assemblyFormat = "`(` $value `)`"; +} + //===----------------------------------------------------------------------===// // 2.6 parallel Construct //===----------------------------------------------------------------------===// @@ -1644,4 +1672,33 @@ let hasVerifier = 1; } +//===----------------------------------------------------------------------===// +// 8.2 requires directive +//===----------------------------------------------------------------------===// + +def ClauseRequiresNone : I32BitEnumAttrCaseNone<"none">; +def ClauseRequiresReverseOffload : I32BitEnumAttrCaseBit<"reverse_offload", 0>; +def ClauseRequiresUnifiedAddress : I32BitEnumAttrCaseBit<"unified_address", 1>; +def ClauseRequiresUnifiedSharedMemory + : I32BitEnumAttrCaseBit<"unified_shared_memory", 2>; +def ClauseRequiresDynamicAllocators + : I32BitEnumAttrCaseBit<"dynamic_allocators", 3>; + +def ClauseRequires : I32BitEnumAttr< + "ClauseRequires", + "requires clause", + [ + ClauseRequiresNone, + ClauseRequiresReverseOffload, + ClauseRequiresUnifiedAddress, + ClauseRequiresUnifiedSharedMemory, + ClauseRequiresDynamicAllocators + ]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::omp"; +} +def ClauseRequiresAttr : + EnumAttr { +} + #endif // OPENMP_OPS diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td @@ -49,7 +49,7 @@ def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> { let description = [{ - Operations that represent a module for offloading (host or device) + Operations that represent a module for offloading (host or device) should have this interface. }]; @@ -68,7 +68,7 @@ mlir::StringAttr::get($_op->getContext(), llvm::Twine{"omp.is_device"}), mlir::omp::IsDeviceAttr::get($_op->getContext(), isDevice)); }]>, - InterfaceMethod< + InterfaceMethod< /*description=*/[{ Get the IsDeviceAttr attribute on the current module if it exists and return its value, if it doesn't exit it returns false by default. @@ -138,6 +138,54 @@ targetCPU.str(), targetFeatures.str())); }]>, + InterfaceMethod< + /*description=*/[{ + Get the omp.requires attribute on the operator if it's present and + return its value. If it doesn't exist, return `ClauseRequires::none` by + default. + }], + /*retTy=*/"::mlir::omp::ClauseRequires", + /*methodName=*/"getRequires", + (ins), [{}], [{ + if (Attribute requiresAttr = $_op->getAttr("omp.requires")) + if (auto requiresVal = requiresAttr.dyn_cast()) + return requiresVal.getValue(); + return mlir::omp::ClauseRequires::none; + }]>, + InterfaceMethod< + /*description=*/[{ + Set the omp.requires attribute on the operator to the specified clauses. + }], + /*retTy=*/"void", + /*methodName=*/"setRequires", + (ins "::mlir::omp::ClauseRequires":$clauses), [{}], [{ + $_op->setAttr(mlir::StringAttr::get($_op->getContext(), "omp.requires"), + mlir::omp::ClauseRequiresAttr::get($_op->getContext(), clauses)); + }]>, + InterfaceMethod< + /*description=*/[{ + Get the omp.atomic_default_mem_order attribute on the operator and + return its value if it's present. + }], + /*retTy=*/"std::optional<::mlir::omp::ClauseMemoryOrderKind>", + /*methodName=*/"getAtomicDefaultMemOrder", + (ins), [{}], [{ + if (mlir::Attribute atomicDefaultAttr = $_op->getAttr("omp.atomic_default_mem_order")) + if (auto atomicDefault = atomicDefaultAttr.dyn_cast()) + return atomicDefault.getValue(); + return std::nullopt; + }]>, + InterfaceMethod< + /*description=*/[{ + Set the omp.atomic_default_mem_order attribute on the operator to the + specified memory order kind. + }], + /*retTy=*/"void", + /*methodName=*/"setAtomicDefaultMemOrder", + (ins "::mlir::omp::ClauseMemoryOrderKind":$order), [{}], [{ + $_op->setAttr(mlir::StringAttr::get($_op->getContext(), "omp.atomic_default_mem_order"), + mlir::omp::ClauseMemoryOrderKindAttr::get($_op->getContext(), order)); + }]> ]; } diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1441,6 +1441,30 @@ return success(); } +//===----------------------------------------------------------------------===// +// OpenMPDialect helper functions +//===----------------------------------------------------------------------===// + +void OpenMPDialect::setDeclareTarget( + Operation *func, mlir::omp::DeclareTargetDeviceType deviceType) { + func->setAttr("omp.declare_target", + mlir::omp::DeclareTargetDeviceTypeAttr::get(func->getContext(), + deviceType)); +} + +bool OpenMPDialect::isDeclareTarget(Operation *func) { + return func->hasAttr("omp.declare_target"); +} + +mlir::omp::DeclareTargetDeviceType +OpenMPDialect::getDeclareTargetDeviceType(Operation *func) { + if (mlir::Attribute declTar = func->getAttr("omp.declare_target")) { + if (declTar.isa()) + return declTar.cast().getValue(); + } + return {}; +} + #define GET_ATTRDEF_CLASSES #include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc" diff --git a/mlir/lib/Target/LLVMIR/CMakeLists.txt b/mlir/lib/Target/LLVMIR/CMakeLists.txt --- a/mlir/lib/Target/LLVMIR/CMakeLists.txt +++ b/mlir/lib/Target/LLVMIR/CMakeLists.txt @@ -38,6 +38,7 @@ MLIRDLTIDialect MLIRLLVMDialect MLIRLLVMIRTransforms + MLIROpenMPDialect MLIRTranslateLib ) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -15,6 +15,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/IR/Operation.h" #include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" #include "mlir/Target/LLVMIR/Dialect/OpenMPCommon.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" @@ -24,6 +25,8 @@ #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include using namespace mlir; @@ -1031,11 +1034,24 @@ return success(); } +static std::optional +getAtomicDefaultMemOrder(Operation &opInst) { + // Try to get the omp.atomic_default_mem_order attribute, if present + if (auto offloadModule = + opInst.getParentOfType()) + return offloadModule.getAtomicDefaultMemOrder(); + + return std::nullopt; +} + /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. -llvm::AtomicOrdering -convertAtomicOrdering(std::optional ao) { +static llvm::AtomicOrdering +convertAtomicOrdering(std::optional ao, + std::optional defaultAo) { + // If not specified, try using the default atomic ordering gathered from a + // requires atomic_mem_default_order clause, if present if (!ao) - return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering + ao = defaultAo.value_or(omp::ClauseMemoryOrderKind::Relaxed); switch (*ao) { case omp::ClauseMemoryOrderKind::Seq_cst: @@ -1056,13 +1072,14 @@ static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { - auto readOp = cast(opInst); llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrderVal()); + auto defaultAO = getAtomicDefaultMemOrder(opInst); + llvm::AtomicOrdering AO = + convertAtomicOrdering(readOp.getMemoryOrderVal(), defaultAO); llvm::Value *x = moduleTranslation.lookupValue(readOp.getX()); llvm::Value *v = moduleTranslation.lookupValue(readOp.getV()); @@ -1083,7 +1100,9 @@ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrderVal()); + auto defaultAO = getAtomicDefaultMemOrder(opInst); + llvm::AtomicOrdering ao = + convertAtomicOrdering(writeOp.getMemoryOrderVal(), defaultAO); llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getValue()); llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getAddress()); llvm::Type *ty = moduleTranslation.convertType(writeOp.getValue().getType()); @@ -1147,8 +1166,9 @@ /*isSigned=*/false, /*isVolatile=*/false}; + auto defaultAO = getAtomicDefaultMemOrder(*opInst.getOperation()); llvm::AtomicOrdering atomicOrdering = - convertAtomicOrdering(opInst.getMemoryOrderVal()); + convertAtomicOrdering(opInst.getMemoryOrderVal(), defaultAO); // Generate update code. LogicalResult updateGenStatus = success(); @@ -1236,8 +1256,9 @@ /*isSigned=*/false, /*isVolatile=*/false}; + auto defaultAO = getAtomicDefaultMemOrder(*atomicCaptureOp.getOperation()); llvm::AtomicOrdering atomicOrdering = - convertAtomicOrdering(atomicCaptureOp.getMemoryOrderVal()); + convertAtomicOrdering(atomicCaptureOp.getMemoryOrderVal(), defaultAO); LogicalResult updateGenStatus = success(); auto updateFn = [&](llvm::Value *atomicx, @@ -1574,6 +1595,27 @@ return success(); } +/// Converts the module-level set of OpenMP requires clauses into LLVM IR using +/// OpenMPIRBuilder. +static LogicalResult +convertRequiresAttr(Operation &op, omp::ClauseRequiresAttr requiresAttr, + LLVM::ModuleTranslation &moduleTranslation) { + auto *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + // No need to read requiresAttr here, because it has already been done in + // translateModuleToLLVMIR(). There, flags are stored in the + // OpenMPIRBuilderConfig object, available to the OpenMPIRBuilder. + auto *regFn = + ompBuilder->createRegisterRequires(ompBuilder->createPlatformSpecificName( + {"omp_offloading", "requires_reg"})); + + // Add registration function as global constructor + if (regFn) + llvm::appendToGlobalCtors(ompBuilder->M, regFn, /* Priority = */ 0); + + return success(); +} + namespace { /// Implementation of the dialect interface that converts operations belonging @@ -1589,6 +1631,8 @@ convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const final; + /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR, runtime + /// calls, or operation amendments LogicalResult amendOperation(Operation *op, NamedAttribute attribute, LLVM::ModuleTranslation &moduleTranslation) const final; @@ -1596,8 +1640,6 @@ } // namespace -/// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR, runtime -/// calls, or operation amendments LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation( Operation *op, NamedAttribute attribute, LLVM::ModuleTranslation &moduleTranslation) const { @@ -1606,9 +1648,14 @@ .Case([&](mlir::omp::FlagsAttr rtlAttr) { return convertFlagsAttr(op, rtlAttr, moduleTranslation); }) + .Case([&](omp::ClauseRequiresAttr requiresAttr) { + return convertRequiresAttr(*op, requiresAttr, moduleTranslation); + }) .Default([&](Attribute attr) { - // fall through for omp attributes that do not require lowering and/or - // have no concrete definition and thus no type to define a case on + // Fall through for omp attributes that do not require lowering and/or + // have no concrete definition and thus no type to define a case on. + // The omp.atomic_default_mem_order attribute is read directly during + // OpenMP atomic ops lowering return success(); }); diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -1322,6 +1322,25 @@ LLVM::ensureDistinctSuccessors(module); ModuleTranslation translator(module, std::move(llvmModule)); + + // Set OpenMP IR Builder configuration + if (auto offloadMod = dyn_cast(module)) { + llvm::OpenMPIRBuilderConfig config; + config.setIsEmbedded(offloadMod.getIsDevice()); + config.setIsTargetCodegen(false); + + const auto requiresFlags = offloadMod.getRequires(); + config.setHasRequiresReverseOffload(bitEnumContainsAll( + requiresFlags, omp::ClauseRequires::reverse_offload)); + config.setHasRequiresUnifiedAddress(bitEnumContainsAll( + requiresFlags, omp::ClauseRequires::unified_address)); + config.setHasRequiresUnifiedSharedMemory(bitEnumContainsAll( + requiresFlags, omp::ClauseRequires::unified_shared_memory)); + config.setHasRequiresDynamicAllocators(bitEnumContainsAll( + requiresFlags, omp::ClauseRequires::dynamic_allocators)); + translator.getOpenMPBuilder()->setConfig(config); + } + if (failed(translator.convertFunctionSignatures())) return nullptr; if (failed(translator.convertGlobals())) diff --git a/mlir/test/Dialect/OpenMP/attr.mlir b/mlir/test/Dialect/OpenMP/attr.mlir --- a/mlir/test/Dialect/OpenMP/attr.mlir +++ b/mlir/test/Dialect/OpenMP/attr.mlir @@ -29,3 +29,23 @@ // CHECK: module attributes {omp.flags = #omp.flags} { module attributes {omp.flags = #omp.flags} {} + +// ---- + +// CHECK-LABEL: func @omp_decl_tar_host +// CHECK-SAME: {{.*}} attributes {omp.declare_target = #omp} { +func.func @omp_decl_tar_host() -> () attributes {omp.declare_target = #omp} { + return +} + +// CHECK-LABEL: func @omp_decl_tar_nohost +// CHECK-SAME: {{.*}} attributes {omp.declare_target = #omp} { +func.func @omp_decl_tar_nohost() -> () attributes {omp.declare_target = #omp} { + return +} + +// CHECK-LABEL: func @omp_decl_tar_any +// CHECK-SAME: {{.*}} attributes {omp.declare_target = #omp} { +func.func @omp_decl_tar_any() -> () attributes {omp.declare_target = #omp} { + return +} diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2537,3 +2537,51 @@ // CHECK: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 module attributes {omp.flags = #omp.flags, omp.is_device = #omp.isdevice} {} + +// ----- + +// Check that the atomic default memory order is picked up by atomic operations. +module attributes { + omp.atomic_default_mem_order = #omp +} { + // CHECK-LABEL: @omp_atomic_default_mem_order + // CHECK-SAME: (ptr %[[ARG0:.*]], ptr %[[ARG1:.*]], i32 %[[EXPR:.*]]) + llvm.func @omp_atomic_default_mem_order(%arg0 : !llvm.ptr, + %arg1 : !llvm.ptr, + %expr : i32) -> () { + + // CHECK: %[[X1:.*]] = load atomic i32, ptr %[[ARG0]] seq_cst, align 4 + // CHECK: store i32 %[[X1]], ptr %[[ARG1]], align 4 + omp.atomic.read %arg1 = %arg0 : !llvm.ptr, i32 + + // CHECK: store atomic i32 %[[EXPR]], ptr %[[ARG1]] seq_cst, align 4 + // CHECK: call void @__kmpc_flush(ptr @{{.*}}) + omp.atomic.write %arg1 = %expr : !llvm.ptr, i32 + + // CHECK: atomicrmw add ptr %[[ARG1]], i32 %[[EXPR]] seq_cst + omp.atomic.update %arg1 : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.add %xval, %expr : i32 + omp.yield(%newval : i32) + } + + // CHECK: %[[xval:.*]] = atomicrmw xchg ptr %[[ARG0]], i32 %[[EXPR]] seq_cst + // CHECK: store i32 %[[xval]], ptr %[[ARG1]] + omp.atomic.capture { + omp.atomic.read %arg1 = %arg0 : !llvm.ptr, i32 + omp.atomic.write %arg0 = %expr : !llvm.ptr, i32 + } + + llvm.return + } +} + +// ----- + +// Check that OpenMP requires flags are registered by a global constructor. +// CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] +// CHECK-SAME: [{ i32, ptr, ptr } { i32 0, ptr @[[REG_FN:.*]], ptr null }] +// CHECK: define {{.*}} @[[REG_FN]]({{.*}}) +// CHECK-NOT: } +// CHECK: call void @__tgt_register_requires(i64 10) +module attributes {omp.requires = #omp} {}