diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def --- a/flang/include/flang/Frontend/LangOptions.def +++ b/flang/include/flang/Frontend/LangOptions.def @@ -38,6 +38,8 @@ LANGOPT(OpenMPVersion, 32, 0) /// Generate code only for OpenMP target device LANGOPT(OpenMPIsTargetDevice, 1, false) +/// Generate OpenMP target code only for GPUs +LANGOPT(OpenMPIsGPU, 1, false) /// Enable debugging in the OpenMP offloading device RTL LANGOPT(OpenMPTargetDebug, 32, 0) /// Assume work-shared loops do not have more iterations than participating diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h --- a/flang/include/flang/Tools/CrossToolHelpers.h +++ b/flang/include/flang/Tools/CrossToolHelpers.h @@ -24,13 +24,13 @@ OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription, bool OpenMPThreadSubscription, bool OpenMPNoThreadState, bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice, - uint32_t OpenMPVersion, std::string OMPHostIRFile = {}) + bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {}) : OpenMPTargetDebug(OpenMPTargetDebug), OpenMPTeamSubscription(OpenMPTeamSubscription), OpenMPThreadSubscription(OpenMPThreadSubscription), OpenMPNoThreadState(OpenMPNoThreadState), OpenMPNoNestedParallelism(OpenMPNoNestedParallelism), - OpenMPIsTargetDevice(OpenMPIsTargetDevice), + OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU), OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile) {} OffloadModuleOpts(Fortran::frontend::LangOptions &Opts) @@ -40,7 +40,8 @@ OpenMPNoThreadState(Opts.OpenMPNoThreadState), OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism), OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice), - OpenMPVersion(Opts.OpenMPVersion), OMPHostIRFile(Opts.OMPHostIRFile) {} + OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion), + OMPHostIRFile(Opts.OMPHostIRFile) {} uint32_t OpenMPTargetDebug = 0; bool OpenMPTeamSubscription = false; @@ -48,6 +49,7 @@ bool OpenMPNoThreadState = false; bool OpenMPNoNestedParallelism = false; bool OpenMPIsTargetDevice = false; + bool OpenMPIsGPU = false; uint32_t OpenMPVersion = 11; std::string OMPHostIRFile = {}; }; @@ -60,6 +62,7 @@ if (auto offloadMod = llvm::dyn_cast( module.getOperation())) { offloadMod.setIsTargetDevice(Opts.OpenMPIsTargetDevice); + offloadMod.setIsGPU(Opts.OpenMPIsGPU); if (Opts.OpenMPIsTargetDevice) { offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription, Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState, diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -782,6 +782,23 @@ res.getLangOpts().OpenMPTargetDebug = 1; } } + + switch (llvm::Triple(res.getTargetOpts().triple).getArch()) { + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + case llvm::Triple::amdgcn: + if (!res.getLangOpts().OpenMPIsTargetDevice) { + const unsigned diagID = diags.getCustomDiagID( + clang::DiagnosticsEngine::Error, + "OpenMP AMDGPU/NVPTX is only prepared to deal with device code."); + diags.Report(diagID); + } + res.getLangOpts().OpenMPIsGPU = 1; + break; + default: + res.getLangOpts().OpenMPIsGPU = 0; + break; + } } // -pedantic diff --git a/flang/test/Lower/OpenMP/host-ir-flag.f90 b/flang/test/Lower/OpenMP/host-ir-flag.f90 --- a/flang/test/Lower/OpenMP/host-ir-flag.f90 +++ b/flang/test/Lower/OpenMP/host-ir-flag.f90 @@ -1,6 +1,6 @@ !RUN: %flang_fc1 -emit-llvm-bc -fopenmp -o %t.bc %s 2>&1 !RUN: %flang_fc1 -emit-mlir -fopenmp -fopenmp-is-target-device -fopenmp-host-ir-file-path %t.bc -o - %s 2>&1 | FileCheck %s -!CHECK: module attributes {{{.*}}, omp.host_ir_filepath = "{{.*}}.bc", omp.is_target_device = true{{.*}}} +!CHECK: module attributes {{{.*}}, omp.host_ir_filepath = "{{.*}}.bc", omp.is_gpu = false, omp.is_target_device = true{{.*}}} subroutine omp_subroutine() end subroutine omp_subroutine diff --git a/flang/test/Lower/OpenMP/omp-is-gpu.f90 b/flang/test/Lower/OpenMP/omp-is-gpu.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-is-gpu.f90 @@ -0,0 +1,12 @@ +!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s +!RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -emit-fir -o - %s | FileCheck %s + +!RUN: not %flang_fc1 -triple amdgcn-amd-amdhsa -emit-fir -fopenmp %s -o - 2>&1 | FileCheck %s --check-prefix=FLANG-ERROR +!RUN: not bbc -fopenmp -fopenmp-is-gpu -emit-fir %s -o - 2>&1 | FileCheck %s --check-prefix=BBC-ERROR + +!CHECK: module attributes {{{.*}}omp.is_gpu = true +subroutine omp_subroutine() +end subroutine omp_subroutine + +!FLANG-ERROR: error: OpenMP AMDGPU/NVPTX is only prepared to deal with device code. +!BBC-ERROR: FATAL: -fopenmp-is-gpu can only be set if -fopenmp-is-target-device is also set diff --git a/flang/test/Lower/OpenMP/rtl-flags.f90 b/flang/test/Lower/OpenMP/rtl-flags.f90 --- a/flang/test/Lower/OpenMP/rtl-flags.f90 +++ b/flang/test/Lower/OpenMP/rtl-flags.f90 @@ -20,16 +20,20 @@ !RUN: bbc -emit-fir -fopenmp -fopenmp-assume-no-nested-parallelism -fopenmp-is-target-device -o - %s | FileCheck %s --check-prefix=NEST-PAR-DEVICE-FIR !RUN: bbc -emit-fir -fopenmp -fopenmp-target-debug=1 -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism -fopenmp-assume-threads-oversubscription -fopenmp-assume-no-thread-state -fopenmp-is-target-device -o - %s | FileCheck %s --check-prefix=ALL-DEVICE-FIR -!DEFAULT-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags, omp.is_target_device = true{{.*}}} -!DEFAULT-DEVICE-FIR-VERSION: module attributes {{{.*}}, omp.flags = #omp.flags, omp.is_target_device = true, omp.version = #omp.version{{.*}} -!DEFAULT-HOST-FIR: module attributes {{{.*}}, omp.is_target_device = false{{.*}} -!DEFAULT-HOST-FIR-VERSION: module attributes {{{.*}}, omp.is_target_device = false, omp.version = #omp.version{{.*}} -!DBG-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} -!DBG-EQ-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} -!TEAMS-OSUB-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} -!THREAD-OSUB-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} -!THREAD-STATE-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} -!NEST-PAR-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} -!ALL-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} +!DEFAULT-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +!DEFAULT-DEVICE-FIR-SAME: omp.is_target_device = true +!DEFAULT-DEVICE-FIR-VERSION: module attributes {{{.*}}omp.flags = #omp.flags +!DEFAULT-DEVICE-FIR-VERSION-SAME: omp.is_target_device = true +!DEFAULT-DEVICE-FIR-VERSION-SAME: omp.version = #omp.version +!DEFAULT-HOST-FIR: module attributes {{{.*}}omp.is_target_device = false{{.*}} +!DEFAULT-HOST-FIR-VERSION: module attributes {{{.*}}omp.is_target_device = false +!DEFAULT-HOST-FIR-VERSION-SAME: omp.version = #omp.version +!DBG-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +!DBG-EQ-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +!TEAMS-OSUB-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +!THREAD-OSUB-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +!THREAD-STATE-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +!NEST-PAR-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags +!ALL-DEVICE-FIR: module attributes {{{.*}}omp.flags = #omp.flags subroutine omp_subroutine() end subroutine omp_subroutine diff --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90 --- a/flang/test/Lower/OpenMP/target_cpu_features.f90 +++ b/flang/test/Lower/OpenMP/target_cpu_features.f90 @@ -1,5 +1,5 @@ !REQUIRES: amdgpu-registered-target -!RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp %s -o - | FileCheck %s +!RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s !=============================================================================== ! Target_Enter Simple @@ -10,7 +10,10 @@ !CHECK-SAME: +16-bit-insts,+s-memrealtime,+dot6-insts,+dl-insts,+image-insts,+wavefrontsize64, !CHECK-SAME: +gfx9-insts,+gfx8-insts,+ci-insts,+dot10-insts,+dot7-insts, !CHECK-SAME: +dot1-insts,+dot5-insts,+mai-insts,+dpp,+dot2-insts"> -!CHECK-LABEL: func.func @_QPomp_target_simple() { +!CHECK-LABEL: func.func @_QPomp_target_simple() subroutine omp_target_simple + ! Directive needed to prevent subroutine from being filtered out when + ! compiling for the device. + !$omp declare target end subroutine omp_target_simple diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -135,6 +135,11 @@ llvm::cl::desc("enable openmp device compilation"), llvm::cl::init(false)); +static llvm::cl::opt + enableOpenMPGPU("fopenmp-is-gpu", + llvm::cl::desc("enable openmp GPU target codegen"), + llvm::cl::init(false)); + // A simplified subset of the OpenMP RTL Flags from Flang, only the primary // positive options are available, no negative options e.g. fopen_assume* vs // fno_open_assume* @@ -288,10 +293,16 @@ burnside.lower(parseTree, semanticsContext); mlir::ModuleOp mlirModule = burnside.getModule(); if (enableOpenMP) { - auto offloadModuleOpts = OffloadModuleOpts( - setOpenMPTargetDebug, setOpenMPTeamSubscription, - setOpenMPThreadSubscription, setOpenMPNoThreadState, - setOpenMPNoNestedParallelism, enableOpenMPDevice, setOpenMPVersion); + if (enableOpenMPGPU && !enableOpenMPDevice) { + llvm::errs() << "FATAL: -fopenmp-is-gpu can only be set if " + "-fopenmp-is-target-device is also set"; + return mlir::failure(); + } + auto offloadModuleOpts = + OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription, + setOpenMPThreadSubscription, setOpenMPNoThreadState, + setOpenMPNoNestedParallelism, enableOpenMPDevice, + enableOpenMPGPU, setOpenMPVersion); setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts); setOpenMPVersionAttribute(mlirModule, setOpenMPVersion); } diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -5871,4 +5871,46 @@ } } +TEST_F(OpenMPIRBuilderTest, createGPUOffloadEntry) { + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + OpenMPIRBuilderConfig Config(/* IsTargetDevice = */ true, + /* IsGPU = */ true, + /* HasRequiresUnifiedSharedMemory = */ false, + /* OpenMPOffloadMandatory = */ false); + OMPBuilder.setConfig(Config); + + FunctionCallee FnTypeAndCallee = + M->getOrInsertFunction("test_kernel", Type::getVoidTy(Ctx)); + + auto *Fn = cast(FnTypeAndCallee.getCallee()); + OMPBuilder.createOffloadEntry(/* ID = */ nullptr, Fn, + /* Size = */ 0, + /* Flags = */ 0, GlobalValue::WeakAnyLinkage); + + // Check nvvm.annotations only created for GPU kernels + NamedMDNode *MD = M->getNamedMetadata("nvvm.annotations"); + EXPECT_NE(MD, nullptr); + EXPECT_EQ(MD->getNumOperands(), 1u); + + MDNode *Annotations = MD->getOperand(0); + EXPECT_EQ(Annotations->getNumOperands(), 3u); + + Constant *ConstVal = + dyn_cast(Annotations->getOperand(0))->getValue(); + EXPECT_TRUE(isa(Fn)); + EXPECT_EQ(ConstVal, cast(Fn)); + + EXPECT_TRUE(Annotations->getOperand(1).equalsStr("kernel")); + + EXPECT_TRUE(mdconst::hasa(Annotations->getOperand(2))); + APInt IntVal = + mdconst::extract(Annotations->getOperand(2))->getValue(); + EXPECT_EQ(IntVal, 1); + + // Check kernel attributes + EXPECT_TRUE(Fn->hasFnAttribute("kernel")); + EXPECT_TRUE(Fn->hasFnAttribute(Attribute::MustProgress)); +} + } // namespace diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td @@ -149,6 +149,31 @@ return isTargetDevice.dyn_cast().getValue(); return false; }]>, + InterfaceMethod< + /*description=*/[{ + Set the attribute on the current module with the specified boolean + argument. + }], + /*retTy=*/"void", + /*methodName=*/"setIsGPU", + (ins "bool":$isGPU), [{}], [{ + $_op->setAttr( + mlir::StringAttr::get($_op->getContext(), "omp.is_gpu"), + mlir::BoolAttr::get($_op->getContext(), isGPU)); + }]>, + InterfaceMethod< + /*description=*/[{ + Get the attribute on the current module if it exists and + return its value, if it doesn't exist it returns false by default. + }], + /*retTy=*/"bool", + /*methodName=*/"getIsGPU", + (ins), [{}], [{ + if (Attribute isTargetCGAttr = $_op->getAttr("omp.is_gpu")) + if (auto isTargetCGVal = isTargetCGAttr.dyn_cast()) + return isTargetCGVal.getValue(); + return false; + }]>, InterfaceMethod< /*description=*/[{ Get the FlagsAttr attribute on the current module if it exists diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -1283,24 +1283,25 @@ if (!ompBuilder) { ompBuilder = std::make_unique(*llvmModule); - bool isTargetDevice = false; + bool isTargetDevice = false, isGPU = false; llvm::StringRef hostIRFilePath = ""; - if (Attribute deviceAttr = mlirModule->getAttr("omp.is_target_device")) - if (::llvm::isa(deviceAttr)) - isTargetDevice = - ::llvm::dyn_cast(deviceAttr).getValue(); + if (auto deviceAttr = + mlirModule->getAttrOfType("omp.is_target_device")) + isTargetDevice = deviceAttr.getValue(); - if (Attribute filepath = mlirModule->getAttr("omp.host_ir_filepath")) - if (::llvm::isa(filepath)) - hostIRFilePath = - ::llvm::dyn_cast(filepath).getValue(); + if (auto gpuAttr = mlirModule->getAttrOfType("omp.is_gpu")) + isGPU = gpuAttr.getValue(); + + if (auto filepathAttr = + mlirModule->getAttrOfType("omp.host_ir_filepath")) + hostIRFilePath = filepathAttr.getValue(); ompBuilder->initialize(hostIRFilePath); // TODO: set the flags when available llvm::OpenMPIRBuilderConfig config( - isTargetDevice, /* IsGPU */ false, + isTargetDevice, isGPU, /* HasRequiresUnifiedSharedMemory */ false, /* OpenMPOffloadMandatory */ false); ompBuilder->setConfig(config);