diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td @@ -82,4 +82,109 @@ ]; } +def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> { + let summary = "Attaches an NVVM target attribute to a GPU Module."; + let description = [{ + This pass searches for all GPU Modules in the immediate regions and attaches + an NVVM target if the module matches the name specified by the `module` argument. + + Example: + ``` + // File: in.mlir: + gpu.module @nvvm_module_1 {...} + gpu.module @nvvm_module_2 {...} + gpu.module @rocdl_module_1 {...} + // mlir-opt --nvvm-attach-target="module=nvvm.* chip=sm_90" in.mlir + gpu.module @nvvm_module_1 [#nvvm.target] {...} + gpu.module @nvvm_module_2 [#nvvm.target] {...} + gpu.module @rocdl_module_1 {...} + ``` + }]; + let options = [ + Option<"moduleMatcher", "module", "std::string", + /*default=*/ [{""}], + "Regex used to identify the modules to attach the target to.">, + Option<"triple", "triple", "std::string", + /*default=*/ "\"nvptx64-nvidia-cuda\"", + "Target triple.">, + Option<"chip", "chip", "std::string", + /*default=*/"\"sm_50\"", + "Target chip.">, + Option<"features", "features", "std::string", + /*default=*/"\"+ptx60\"", + "Target features.">, + Option<"optLevel", "O", "unsigned", + /*default=*/"2", + "Optimization level.">, + Option<"fastFlag", "fast", "bool", + /*default=*/"false", + "Enable fast math mode.">, + Option<"ftzFlag", "ftz", "bool", + /*default=*/"false", + "Enable flush to zero for denormals.">, + ListOption<"linkLibs", "l", "std::string", + "Extra bitcode libraries paths to link to.">, + ]; +} + +def GpuROCDLAttachTarget: Pass<"rocdl-attach-target", ""> { + let summary = "Attaches a ROCDL target attribute to a GPU Module."; + let description = [{ + This pass searches for all GPU Modules in the immediate regions and attaches + a ROCDL target if the module matches the name specified by the `module` argument. + + Example: + ``` + // File: in.mlir: + gpu.module @nvvm_module_1 {...} + gpu.module @nvvm_module_2 {...} + gpu.module @rocdl_module_1 {...} + // mlir-opt --nvvm-attach-target="module=rocdl.* chip=gfx90a" in.mlir + gpu.module @nvvm_module_1 {...} + gpu.module @nvvm_module_2 {...} + gpu.module @rocdl_module_1 [#rocdl.target] {...} + ``` + }]; + let options = [ + Option<"moduleMatcher", "module", "std::string", + /*default=*/ [{""}], + "Regex used to identify the modules to attach the target to.">, + Option<"triple", "triple", "std::string", + /*default=*/ "\"amdgcn-amd-amdhsa\"", + "Target triple.">, + Option<"chip", "chip", "std::string", + /*default=*/"\"gfx900\"", + "Target chip.">, + Option<"features", "features", "std::string", + /*default=*/"\"\"", + "Target features.">, + Option<"abiVersion", "abi", "std::string", + /*default=*/"\"400\"", + "Optimization level.">, + Option<"optLevel", "O", "unsigned", + /*default=*/"2", + "Optimization level.">, + Option<"wave64Flag", "wave64", "bool", + /*default=*/"true", + "Use Wave64 mode.">, + Option<"fastFlag", "fast", "bool", + /*default=*/"false", + "Enable fast relaxed math opt.">, + Option<"dazFlag", "daz", "bool", + /*default=*/"false", + "Enable denormals are zero opt.">, + Option<"finiteOnlyFlag", "finite-only", "bool", + /*default=*/"false", + "Enable finite only opt.">, + Option<"unsafeMathFlag", "unsafe-math", "bool", + /*default=*/"false", + "Enable unsafe math opt.">, + Option<"correctSqrtFlag", "correct-sqrt", "bool", + /*default=*/"true", + "Enable correct rounded sqrt.">, + ListOption<"linkLibs", "l", "std::string", + "Extra bitcode libraries paths to link to.">, + ]; +} + #endif // MLIR_DIALECT_GPU_PASSES diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -52,11 +52,13 @@ Transforms/KernelOutlining.cpp Transforms/MemoryPromotion.cpp Transforms/ModuleToBinary.cpp + Transforms/NVVMAttachTarget.cpp Transforms/ParallelLoopMapper.cpp Transforms/SerializeToBlob.cpp Transforms/SerializeToCubin.cpp Transforms/SerializeToHsaco.cpp Transforms/ShuffleRewriter.cpp + Transforms/ROCDLAttachTarget.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU diff --git a/mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp b/mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp @@ -0,0 +1,86 @@ +//===- NVVMAttachTarget.cpp - Attach an NVVM target -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the `GpuNVVMAttachTarget` pass, attaching `#nvvm.target` +// attributes to GPU modules. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/Transforms/Passes.h" + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/NVVMDialect.h" +#include "mlir/IR/Builders.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Target/LLVM/NVVM/Target.h" +#include "llvm/Support/Regex.h" + +namespace mlir { +#define GEN_PASS_DEF_GPUNVVMATTACHTARGET +#include "mlir/Dialect/GPU/Transforms/Passes.h.inc" +} // namespace mlir + +using namespace mlir; +using namespace mlir::NVVM; + +namespace { +struct NVVMAttachTarget + : public impl::GpuNVVMAttachTargetBase { + using Base::Base; + + DictionaryAttr getFlags(OpBuilder &builder) const; + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registerNVVMTarget(registry); + } +}; +} // namespace + +DictionaryAttr NVVMAttachTarget::getFlags(OpBuilder &builder) const { + UnitAttr unitAttr = builder.getUnitAttr(); + SmallVector flags; + auto addFlag = [&](StringRef flag) { + flags.push_back(builder.getNamedAttr(flag, unitAttr)); + }; + if (fastFlag) + addFlag("fast"); + if (ftzFlag) + addFlag("ftz"); + if (flags.size()) + return builder.getDictionaryAttr(flags); + return nullptr; +} + +void NVVMAttachTarget::runOnOperation() { + OpBuilder builder(&getContext()); + ArrayRef libs(linkLibs); + SmallVector filesToLink(libs.begin(), libs.end()); + auto target = builder.getAttr( + optLevel, triple, chip, features, getFlags(builder), + filesToLink.size() ? builder.getStrArrayAttr(filesToLink) : nullptr); + llvm::Regex matcher(moduleMatcher); + for (Region ®ion : getOperation()->getRegions()) + for (Block &block : region.getBlocks()) + for (auto module : block.getOps()) { + // Check if the name of the module matches. + if (!moduleMatcher.empty() && !matcher.match(module.getName())) + continue; + // Create the target array. + SmallVector targets; + if (std::optional attrs = module.getTargets()) + targets.append(attrs->getValue().begin(), attrs->getValue().end()); + targets.push_back(target); + // Remove any duplicate targets. + targets.erase(std::unique(targets.begin(), targets.end()), + targets.end()); + // Update the target attribute array. + module.setTargetsAttr(builder.getArrayAttr(targets)); + } +} diff --git a/mlir/lib/Dialect/GPU/Transforms/ROCDLAttachTarget.cpp b/mlir/lib/Dialect/GPU/Transforms/ROCDLAttachTarget.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/GPU/Transforms/ROCDLAttachTarget.cpp @@ -0,0 +1,94 @@ +//===- ROCDLAttachTarget.cpp - Attach an ROCDL target ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the `GpuROCDLAttachTarget` pass, attaching +// `#rocdl.target` attributes to GPU modules. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/Transforms/Passes.h" + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" +#include "mlir/IR/Builders.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Target/LLVM/ROCDL/Target.h" +#include "llvm/Support/Regex.h" + +namespace mlir { +#define GEN_PASS_DEF_GPUROCDLATTACHTARGET +#include "mlir/Dialect/GPU/Transforms/Passes.h.inc" +} // namespace mlir + +using namespace mlir; +using namespace mlir::ROCDL; + +namespace { +struct ROCDLAttachTarget + : public impl::GpuROCDLAttachTargetBase { + using Base::Base; + + DictionaryAttr getFlags(OpBuilder &builder) const; + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registerROCDLTarget(registry); + } +}; +} // namespace + +DictionaryAttr ROCDLAttachTarget::getFlags(OpBuilder &builder) const { + UnitAttr unitAttr = builder.getUnitAttr(); + SmallVector flags; + auto addFlag = [&](StringRef flag) { + flags.push_back(builder.getNamedAttr(flag, unitAttr)); + }; + if (!wave64Flag) + addFlag("no_wave64"); + if (fastFlag) + addFlag("fast"); + if (dazFlag) + addFlag("daz"); + if (finiteOnlyFlag) + addFlag("finite_only"); + if (unsafeMathFlag) + addFlag("unsafe_math"); + if (!correctSqrtFlag) + addFlag("unsafe_sqrt"); + if (flags.size()) + return builder.getDictionaryAttr(flags); + return nullptr; +} + +void ROCDLAttachTarget::runOnOperation() { + OpBuilder builder(&getContext()); + ArrayRef libs(linkLibs); + SmallVector filesToLink(libs.begin(), libs.end()); + auto target = builder.getAttr( + optLevel, triple, chip, features, abiVersion, getFlags(builder), + filesToLink.size() ? builder.getStrArrayAttr(filesToLink) : nullptr); + llvm::Regex matcher(moduleMatcher); + for (Region ®ion : getOperation()->getRegions()) + for (Block &block : region.getBlocks()) + for (auto module : block.getOps()) { + // Check if the name of the module matches. + if (!moduleMatcher.empty() && !matcher.match(module.getName())) + continue; + // Create the target array. + SmallVector targets; + if (std::optional attrs = module.getTargets()) + targets.append(attrs->getValue().begin(), attrs->getValue().end()); + targets.push_back(target); + // Remove any duplicate targets. + targets.erase(std::unique(targets.begin(), targets.end()), + targets.end()); + // Update the target attribute array. + module.setTargetsAttr(builder.getArrayAttr(targets)); + } +} diff --git a/mlir/test/Dialect/LLVMIR/attach-targets.mlir b/mlir/test/Dialect/LLVMIR/attach-targets.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/attach-targets.mlir @@ -0,0 +1,29 @@ +// RUN: mlir-opt %s --nvvm-attach-target='module=nvvm.* O=3 chip=sm_90' --rocdl-attach-target='module=rocdl.* O=3 chip=gfx90a' | FileCheck %s +// RUN: mlir-opt %s --nvvm-attach-target='module=options.* O=1 chip=sm_70 fast=true ftz=true' --rocdl-attach-target='module=options.* l=file1.bc,file2.bc wave64=false finite-only=true' | FileCheck %s --check-prefix=CHECK_OPTS + +module attributes {gpu.container_module} { +// Verify the target is appended. +// CHECK: @nvvm_module_1 [#nvvm.target] { +gpu.module @nvvm_module_1 { +} +// Verify the target is appended. +// CHECK: @nvvm_module_2 [#nvvm.target, #nvvm.target] { +gpu.module @nvvm_module_2 [#nvvm.target] { +} +// Verify the target is not added multiple times. +// CHECK: @nvvm_module_3 [#nvvm.target] { +gpu.module @nvvm_module_3 [#nvvm.target] { +} +// Verify the NVVM target is not added as it fails to match the regex, but the ROCDL does get appended. +// CHECK: @rocdl_module [#rocdl.target] { +gpu.module @rocdl_module { +} +// Check the options were added. +// CHECK_OPTS: @options_module_1 [#nvvm.target, #rocdl.target] { +gpu.module @options_module_1 { +} +// Check the options were added and that the first target was preserved. +// CHECK_OPTS: @options_module_2 [#nvvm.target, #nvvm.target, #rocdl.target] { +gpu.module @options_module_2 [#nvvm.target] { +} +}