diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def --- a/flang/include/flang/Frontend/LangOptions.def +++ b/flang/include/flang/Frontend/LangOptions.def @@ -38,6 +38,8 @@ LANGOPT(OpenMPVersion, 32, 0) /// Generate code only for OpenMP target device LANGOPT(OpenMPIsDevice, 1, false) +/// Generate OpenMP target code only for GPUs +LANGOPT(OpenMPIsTargetCodegen, 1, false) /// Enable debugging in the OpenMP offloading device RTL LANGOPT(OpenMPTargetDebug, 32, 0) /// Assume work-shared loops do not have more iterations than participating diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h --- a/flang/include/flang/Tools/CrossToolHelpers.h +++ b/flang/include/flang/Tools/CrossToolHelpers.h @@ -24,14 +24,16 @@ OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription, bool OpenMPThreadSubscription, bool OpenMPNoThreadState, bool OpenMPNoNestedParallelism, bool OpenMPIsDevice, - uint32_t OpenMPVersion, std::string OMPHostIRFile = {}) + bool OpenMPIsTargetCodegen, uint32_t OpenMPVersion, + std::string OMPHostIRFile = {}) : OpenMPTargetDebug(OpenMPTargetDebug), OpenMPTeamSubscription(OpenMPTeamSubscription), OpenMPThreadSubscription(OpenMPThreadSubscription), OpenMPNoThreadState(OpenMPNoThreadState), OpenMPNoNestedParallelism(OpenMPNoNestedParallelism), - OpenMPIsDevice(OpenMPIsDevice), OpenMPVersion(OpenMPVersion), - OMPHostIRFile(OMPHostIRFile) {} + OpenMPIsDevice(OpenMPIsDevice), + OpenMPIsTargetCodegen(OpenMPIsTargetCodegen), + OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile) {} OffloadModuleOpts(Fortran::frontend::LangOptions &Opts) : OpenMPTargetDebug(Opts.OpenMPTargetDebug), @@ -39,8 +41,9 @@ OpenMPThreadSubscription(Opts.OpenMPThreadSubscription), OpenMPNoThreadState(Opts.OpenMPNoThreadState), OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism), - OpenMPIsDevice(Opts.OpenMPIsDevice), OpenMPVersion(Opts.OpenMPVersion), - OMPHostIRFile(Opts.OMPHostIRFile) {} + OpenMPIsDevice(Opts.OpenMPIsDevice), + OpenMPIsTargetCodegen(Opts.OpenMPIsTargetCodegen), + OpenMPVersion(Opts.OpenMPVersion), OMPHostIRFile(Opts.OMPHostIRFile) {} uint32_t OpenMPTargetDebug = 0; bool OpenMPTeamSubscription = false; @@ -48,6 +51,7 @@ bool OpenMPNoThreadState = false; bool OpenMPNoNestedParallelism = false; bool OpenMPIsDevice = false; + bool OpenMPIsTargetCodegen = false; uint32_t OpenMPVersion = 11; std::string OMPHostIRFile = {}; }; @@ -60,6 +64,7 @@ if (auto offloadMod = llvm::dyn_cast( module.getOperation())) { offloadMod.setIsDevice(Opts.OpenMPIsDevice); + offloadMod.setIsTargetCodegen(Opts.OpenMPIsTargetCodegen); if (Opts.OpenMPIsDevice) { offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription, Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState, diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -781,6 +781,23 @@ res.getLangOpts().OpenMPTargetDebug = 1; } } + + switch (llvm::Triple(res.getTargetOpts().triple).getArch()) { + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + case llvm::Triple::amdgcn: + if (!res.getLangOpts().OpenMPIsDevice) { + const unsigned diagID = diags.getCustomDiagID( + clang::DiagnosticsEngine::Error, + "OpenMP AMDGPU/NVPTX is only prepared to deal with device code."); + diags.Report(diagID); + } + res.getLangOpts().OpenMPIsTargetCodegen = 1; + break; + default: + res.getLangOpts().OpenMPIsTargetCodegen = 0; + break; + } } // -pedantic diff --git a/flang/test/Lower/OpenMP/omp-is-target-codegen.f90 b/flang/test/Lower/OpenMP/omp-is-target-codegen.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-is-target-codegen.f90 @@ -0,0 +1,12 @@ +!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-fir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s +!RUN: bbc -fopenmp -fopenmp-is-device -fopenmp-is-target-codegen -emit-fir -o - %s | FileCheck %s + +!RUN: not %flang_fc1 -triple amdgcn-amd-amdhsa -emit-fir -fopenmp %s -o - 2>&1 | FileCheck %s --check-prefix=FLANG-ERROR +!RUN: not bbc -fopenmp -fopenmp-is-target-codegen -emit-fir %s -o - 2>&1 | FileCheck %s --check-prefix=BBC-ERROR + +!CHECK: module attributes {{{.*}}omp.is_target_codegen = true +subroutine omp_subroutine() +end subroutine omp_subroutine + +!FLANG-ERROR: error: OpenMP AMDGPU/NVPTX is only prepared to deal with device code. +!BBC-ERROR: FATAL: -fopenmp-is-target-codegen can only be set if -fopenmp-is-device is also set diff --git a/flang/test/Lower/OpenMP/rtl-flags.f90 b/flang/test/Lower/OpenMP/rtl-flags.f90 --- a/flang/test/Lower/OpenMP/rtl-flags.f90 +++ b/flang/test/Lower/OpenMP/rtl-flags.f90 @@ -20,16 +20,20 @@ !RUN: bbc -emit-fir -fopenmp -fopenmp-assume-no-nested-parallelism -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=NEST-PAR-DEVICE-FIR !RUN: bbc -emit-fir -fopenmp -fopenmp-target-debug=1 -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism -fopenmp-assume-threads-oversubscription -fopenmp-assume-no-thread-state -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=ALL-DEVICE-FIR -!DEFAULT-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags, omp.is_device = true{{.*}}} -!DEFAULT-DEVICE-FIR-VERSION: module attributes {{{.*}}, omp.flags = #omp.flags, omp.is_device = true, omp.version = #omp.version{{.*}} -!DEFAULT-HOST-FIR: module attributes {{{.*}}, omp.is_device = false{{.*}} -!DEFAULT-HOST-FIR-VERSION: module attributes {{{.*}}, omp.is_device = false, omp.version = #omp.version{{.*}} -!DBG-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} -!DBG-EQ-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} -!TEAMS-OSUB-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} -!THREAD-OSUB-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} -!THREAD-STATE-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} -!NEST-PAR-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} -!ALL-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} +!DEFAULT-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +!DEFAULT-DEVICE-FIR-SAME: omp.is_device = true +!DEFAULT-DEVICE-FIR-VERSION: module attributes {{{.*}}omp.flags = #omp.flags +!DEFAULT-DEVICE-FIR-VERSION-SAME: omp.is_device = true +!DEFAULT-DEVICE-FIR-VERSION-SAME: omp.version = #omp.version +!DEFAULT-HOST-FIR: module attributes {{{.*}}omp.is_device = false{{.*}} +!DEFAULT-HOST-FIR-VERSION: module attributes {{{.*}}omp.is_device = false +!DEFAULT-HOST-FIR-VERSION-SAME: omp.version = #omp.version +!DBG-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +!DBG-EQ-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +!TEAMS-OSUB-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +!THREAD-OSUB-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +!THREAD-STATE-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +!NEST-PAR-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +!ALL-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags subroutine omp_subroutine() end subroutine omp_subroutine diff --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90 --- a/flang/test/Lower/OpenMP/target_cpu_features.f90 +++ b/flang/test/Lower/OpenMP/target_cpu_features.f90 @@ -1,5 +1,5 @@ !REQUIRES: amdgpu-registered-target -!RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp %s -o - | FileCheck %s +!RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-device %s -o - | FileCheck %s !=============================================================================== ! Target_Enter Simple diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -135,6 +135,10 @@ llvm::cl::desc("enable openmp device compilation"), llvm::cl::init(false)); +static llvm::cl::opt enableOpenMPTargetCodegen( + "fopenmp-is-target-codegen", + llvm::cl::desc("enable openmp GPU target codegen"), llvm::cl::init(false)); + // A simplified subset of the OpenMP RTL Flags from Flang, only the primary // positive options are available, no negative options e.g. fopen_assume* vs // fno_open_assume* @@ -288,10 +292,16 @@ burnside.lower(parseTree, semanticsContext); mlir::ModuleOp mlirModule = burnside.getModule(); if (enableOpenMP) { - auto offloadModuleOpts = OffloadModuleOpts( - setOpenMPTargetDebug, setOpenMPTeamSubscription, - setOpenMPThreadSubscription, setOpenMPNoThreadState, - setOpenMPNoNestedParallelism, enableOpenMPDevice, setOpenMPVersion); + if (enableOpenMPTargetCodegen && !enableOpenMPDevice) { + llvm::errs() << "FATAL: -fopenmp-is-target-codegen can only be set if " + "-fopenmp-is-device is also set"; + return mlir::failure(); + } + auto offloadModuleOpts = + OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription, + setOpenMPThreadSubscription, setOpenMPNoThreadState, + setOpenMPNoNestedParallelism, enableOpenMPDevice, + enableOpenMPTargetCodegen, setOpenMPVersion); setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts); setOpenMPVersionAttribute(mlirModule, setOpenMPVersion); } diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td @@ -149,6 +149,31 @@ return isDevice.dyn_cast().getValue(); return false; }]>, + InterfaceMethod< + /*description=*/[{ + Set the attribute on the current module with the specified boolean + argument. + }], + /*retTy=*/"void", + /*methodName=*/"setIsTargetCodegen", + (ins "bool":$isTargetCodegen), [{}], [{ + $_op->setAttr( + mlir::StringAttr::get($_op->getContext(), "omp.is_target_codegen"), + mlir::BoolAttr::get($_op->getContext(), isTargetCodegen)); + }]>, + InterfaceMethod< + /*description=*/[{ + Get the attribute on the current module if it exists and + return its value, if it doesn't exist it returns false by default. + }], + /*retTy=*/"bool", + /*methodName=*/"getIsTargetCodegen", + (ins), [{}], [{ + if (Attribute isTargetCGAttr = $_op->getAttr("omp.is_target_codegen")) + if (auto isTargetCGVal = isTargetCGAttr.dyn_cast()) + return isTargetCGVal.getValue(); + return false; + }]>, InterfaceMethod< /*description=*/[{ Get the FlagsAttr attribute on the current module if it exists diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -1274,23 +1274,26 @@ if (!ompBuilder) { ompBuilder = std::make_unique(*llvmModule); - bool isDevice = false; + bool isDevice = false, isTargetCodegen = false; llvm::StringRef hostIRFilePath = ""; - if (Attribute deviceAttr = mlirModule->getAttr("omp.is_device")) - if (::llvm::isa(deviceAttr)) - isDevice = ::llvm::dyn_cast(deviceAttr).getValue(); + if (auto deviceAttr = + mlirModule->getAttrOfType("omp.is_device")) + isDevice = deviceAttr.getValue(); - if (Attribute filepath = mlirModule->getAttr("omp.host_ir_filepath")) - if (::llvm::isa(filepath)) - hostIRFilePath = - ::llvm::dyn_cast(filepath).getValue(); + if (auto targetAttr = + mlirModule->getAttrOfType("omp.is_target_codegen")) + isTargetCodegen = targetAttr.getValue(); + + if (auto filepathAttr = + mlirModule->getAttrOfType("omp.host_ir_filepath")) + hostIRFilePath = filepathAttr.getValue(); ompBuilder->initialize(hostIRFilePath); // TODO: set the flags when available llvm::OpenMPIRBuilderConfig config( - isDevice, /* IsTargetCodegen */ false, + isDevice, isTargetCodegen, /* HasRequiresUnifiedSharedMemory */ false, /* OpenMPOffloadMandatory */ false); ompBuilder->setConfig(config);