diff --git a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt --- a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt @@ -62,4 +62,6 @@ add_mlir_doc(ROCDLOps ROCDLDialect Dialects/ -gen-dialect-doc -dialect=rocdl) set(LLVM_TARGET_DEFINITIONS ROCDLOps.td) mlir_tablegen(ROCDLConversions.inc -gen-llvmir-conversions) +mlir_tablegen(ROCDLOpsAttributes.h.inc -gen-attrdef-decls -attrdefs-dialect=rocdl) +mlir_tablegen(ROCDLOpsAttributes.cpp.inc -gen-attrdef-defs -attrdefs-dialect=rocdl) add_public_tablegen_target(MLIRROCDLConversionsIncGen) diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h b/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h @@ -23,12 +23,16 @@ #define MLIR_DIALECT_LLVMIR_ROCDLDIALECT_H_ #include "mlir/Bytecode/BytecodeOpInterface.h" +#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" #include "mlir/Interfaces/SideEffectInterfaces.h" ///// Ops ///// +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/LLVMIR/ROCDLOpsAttributes.h.inc" + #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/ROCDLOps.h.inc" diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -13,6 +13,7 @@ #ifndef ROCDLIR_OPS #define ROCDLIR_OPS +include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" @@ -44,8 +45,20 @@ /// The address space value that represents private memory. static constexpr unsigned kPrivateMemoryAddressSpace = 5; }]; + + let useDefaultAttributePrinterParser = 1; } +//===----------------------------------------------------------------------===// +// ROCDL attribute definitions +//===----------------------------------------------------------------------===// + +class ROCDL_Attr traits = []> + : AttrDef { + let mnemonic = attrMnemonic; +} + + //===----------------------------------------------------------------------===// // ROCDL op definitions //===----------------------------------------------------------------------===// @@ -386,4 +399,92 @@ let hasCustomAssemblyFormat = 1; } +//===----------------------------------------------------------------------===// +// ROCDL target attribute. +//===----------------------------------------------------------------------===// + +def ROCDL_TargettAttr : + ROCDL_Attr<"ROCDLTarget", "target"> { + let description = [{ + ROCDL target attribute for controlling compilation of AMDGPU targets. All + parameters decay into default values if not present. + + Examples: + + 1. Target with default values. + ``` + gpu.module @mymodule [#rocdl.target] attributes {...} { + ... + } + ``` + + 2. Target with `gfx90a` chip and fast math. + ``` + gpu.module @mymodule [#rocdl.target] { + ... + } + ``` + }]; + let parameters = (ins + DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O, + StringRefParameter<"Target triple.", "\"amdgcn-amd-amdhsa\"">:$triple, + StringRefParameter<"Target chip.", "\"gfx900\"">:$chip, + StringRefParameter<"Target chip features.", "\"\"">:$features, + StringRefParameter<"ABI version.", "\"400\"">:$abi, + OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags, + OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link + ); + let assemblyFormat = [{ + (`<` struct($O, $triple, $chip, $features, $abi, $flags, $link)^ `>`)? + }]; + let builders = [ + AttrBuilder<(ins CArg<"int", "2">:$optLevel, + CArg<"StringRef", "\"amdgcn-amd-amdhsa\"">:$triple, + CArg<"StringRef", "\"gfx900\"">:$chip, + CArg<"StringRef", "\"\"">:$features, + CArg<"StringRef", "\"400\"">:$abiVersion, + CArg<"DictionaryAttr", "nullptr">:$targetFlags, + CArg<"ArrayAttr", "nullptr">:$linkFiles), [{ + return Base::get($_ctxt, optLevel, triple, chip, features, abiVersion, + targetFlags, linkFiles); + }]> + ]; + let skipDefaultBuilders = 1; + let genVerifyDecl = 1; + let extraClassDeclaration = [{ + bool hasFlag(StringRef flag) const; + bool hasWave64() const; + bool hasFastMath() const; + bool hasDaz() const; + bool hasFiniteOnly() const; + bool hasUnsafeMath() const; + bool hasCorrectSqrt() const; + }]; + let extraClassDefinition = [{ + bool $cppClass::hasFlag(StringRef flag) const { + if (DictionaryAttr flags = getFlags()) + return flags.get(flag) != nullptr; + return false; + } + bool $cppClass::hasWave64() const { + return hasFlag("wave64") || !hasFlag("no_wave64"); + } + bool $cppClass::hasFastMath() const { + return hasFlag("fast"); + } + bool $cppClass::hasDaz() const { + return hasFlag("daz"); + } + bool $cppClass::hasFiniteOnly() const { + return hasFlag("finite_only"); + } + bool $cppClass::hasUnsafeMath() const { + return hasFlag("unsafe_math"); + } + bool $cppClass::hasCorrectSqrt() const { + return !hasFlag("unsafe_sqrt"); + } + }]; +} + #endif // ROCDLIR_OPS diff --git a/mlir/include/mlir/InitAllExtensions.h b/mlir/include/mlir/InitAllExtensions.h --- a/mlir/include/mlir/InitAllExtensions.h +++ b/mlir/include/mlir/InitAllExtensions.h @@ -25,6 +25,7 @@ #include "mlir/Conversion/UBToLLVM/UBToLLVM.h" #include "mlir/Dialect/Func/Extensions/AllExtensions.h" #include "mlir/Target/LLVM/NVVM/Target.h" +#include "mlir/Target/LLVM/ROCDL/Target.h" #include @@ -47,6 +48,7 @@ registerConvertNVVMToLLVMInterface(registry); ub::registerConvertUBToLLVMInterface(registry); registerNVVMTarget(registry); + registerROCDLTarget(registry); } } // namespace mlir diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Target.h b/mlir/include/mlir/Target/LLVM/ROCDL/Target.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Target/LLVM/ROCDL/Target.h @@ -0,0 +1,28 @@ +//===- Target.h - MLIR ROCDL target registration ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This provides registration calls for attaching the ROCDL target interface. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVM_ROCDL_TARGET_H +#define MLIR_TARGET_LLVM_ROCDL_TARGET_H + +namespace mlir { +class DialectRegistry; +class MLIRContext; +/// Registers the `TargetAttrInterface` for the `#rocdl.target` attribute in the +/// given registry. +void registerROCDLTarget(DialectRegistry ®istry); + +/// Registers the `TargetAttrInterface` for the `#rocdl.target` attribute in the +/// registry associated with the given context. +void registerROCDLTarget(MLIRContext &context); +} // namespace mlir + +#endif // MLIR_TARGET_LLVM_ROCDL_TARGET_H diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h @@ -0,0 +1,94 @@ +//===- Utils.h - MLIR ROCDL target utils ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files declares ROCDL target related utility classes and functions. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVM_ROCDL_UTILS_H +#define MLIR_TARGET_LLVM_ROCDL_UTILS_H + +#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" +#include "mlir/Target/LLVM/ModuleToObject.h" + +namespace mlir { +namespace ROCDL { +/// Searches & returns the path ROCM toolkit path, the search order is: +/// 1. The `ROCM_PATH` environment variable. +/// 2. The `ROCM_ROOT` environment variable. +/// 3. The `ROCM_HOME` environment variable. +/// 4. The ROCM path detected by CMake. +/// 5. Returns an empty string. +StringRef getROCMPath(); + +/// Base class for all ROCDL serializations from GPU modules into binary +/// strings. By default this class serializes into LLVM bitcode. +class SerializeGPUModuleBase : public LLVM::ModuleToObject { +public: + /// Initializes the `toolkitPath` with the path in `targetOptions` or if empty + /// with the path in `getROCMPath`. + SerializeGPUModuleBase(Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions = {}); + + /// Initializes the LLVM AMDGPU target by safely calling + /// `LLVMInitializeAMDGPU*` methods if available. + static void init(); + + /// Returns the target attribute. + ROCDLTargetAttr getTarget() const; + + /// Returns the ROCM toolkit path. + StringRef getToolkitPath() const; + + /// Returns the bitcode files to be loaded. + ArrayRef getFileList() const; + + /// Appends standard ROCm device libraries like `ocml.bc`, `ockl.bc`, etc. + LogicalResult appendStandardLibs(); + + /// Loads the bitcode files in `fileList`. + virtual std::optional>> + loadBitcodeFiles(llvm::Module &module, + llvm::TargetMachine &targetMachine) override; + + /// Adds `oclc` control variables to the LLVM module. + void handleModulePreLink(llvm::Module &module, + llvm::TargetMachine &targetMachine) override; + + /// Removes unnecessary metadata from the loaded bitcode files. + LogicalResult handleBitcodeFile(llvm::Module &module, + llvm::TargetMachine &targetMachine) override; + +protected: + /// Appends the paths of common ROCm device libraries to `libs`. + LogicalResult getCommonBitcodeLibs(llvm::SmallVector &libs, + SmallVector &libPath, + StringRef isaVersion); + + /// Adds `oclc` control variables to the LLVM module. + void addControlVariables(llvm::Module &module, bool wave64, bool daz, + bool finiteOnly, bool unsafeMath, bool fastMath, + bool correctSqrt, StringRef abiVer); + + /// Returns the assembled ISA. + std::optional> assembleIsa(StringRef isa); + + /// ROCDL target attribute. + ROCDLTargetAttr target; + + /// ROCM toolkit path. + std::string toolkitPath; + + /// List of LLVM bitcode files to link to. + SmallVector fileList; +}; +} // namespace ROCDL +} // namespace mlir + +#endif // MLIR_TARGET_LLVM_ROCDL_UTILS_H diff --git a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp --- a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp @@ -16,11 +16,14 @@ #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/DialectImplementation.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Operation.h" +#include "llvm/ADT/TypeSwitch.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -237,8 +240,14 @@ #include "mlir/Dialect/LLVMIR/ROCDLOps.cpp.inc" >(); + addAttributes< +#define GET_ATTRDEF_LIST +#include "mlir/Dialect/LLVMIR/ROCDLOpsAttributes.cpp.inc" + >(); + // Support unknown operations because not all ROCDL operations are registered. allowUnknownOperations(); + declarePromisedInterface(); } LogicalResult ROCDLDialect::verifyOperationAttribute(Operation *op, @@ -253,5 +262,41 @@ return success(); } +//===----------------------------------------------------------------------===// +// ROCDL target attribute. +//===----------------------------------------------------------------------===// +LogicalResult +ROCDLTargetAttr::verify(function_ref emitError, + int optLevel, StringRef triple, StringRef chip, + StringRef features, StringRef abiVersion, + DictionaryAttr flags, ArrayAttr files) { + if (optLevel < 0 || optLevel > 3) { + emitError() << "The optimization level must be a number between 0 and 3."; + return failure(); + } + if (triple.empty()) { + emitError() << "The target triple cannot be empty."; + return failure(); + } + if (chip.empty()) { + emitError() << "The target chip cannot be empty."; + return failure(); + } + if (abiVersion != "400" && abiVersion != "500") { + emitError() << "Invalid ABI version, it must be either `400` or `500`."; + return failure(); + } + if (files && !llvm::all_of(files, [](::mlir::Attribute attr) { + return attr && mlir::isa(attr); + })) { + emitError() << "All the elements in the `link` array must be strings."; + return failure(); + } + return success(); +} + #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/ROCDLOps.cpp.inc" + +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/LLVMIR/ROCDLOpsAttributes.cpp.inc" diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -97,3 +97,53 @@ __DEFAULT_CUDATOOLKIT_PATH__="${MLIR_CUDAToolkit_ROOT}" ) endif() + +if (MLIR_ENABLE_ROCM_CONVERSIONS) + set(AMDGPU_LIBS + IRReader + IPO + linker + MCParser + AMDGPUAsmParser + AMDGPUCodeGen + AMDGPUDesc + AMDGPUInfo + target + ) +endif() + +add_mlir_dialect_library(MLIRROCDLTarget + ROCDL/Target.cpp + + LINK_COMPONENTS + ${AMDGPU_LIBS} + + LINK_LIBS PUBLIC + MLIRIR + MLIRExecutionEngineUtils + MLIRSupport + MLIRGPUDialect + MLIRTargetLLVM + MLIRROCDLToLLVMIRTranslation + ) + +if(MLIR_ENABLE_ROCM_CONVERSIONS) + if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)) + message(SEND_ERROR + "Building mlir with ROCm support requires the AMDGPU backend") + endif() + + if (DEFINED ROCM_PATH) + set(DEFAULT_ROCM_PATH "${ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") + elseif(DEFINED ENV{ROCM_PATH}) + set(DEFAULT_ROCM_PATH "$ENV{ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") + else() + set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") + endif() + message(VERBOSE "MLIR Default ROCM toolkit path: ${DEFAULT_ROCM_PATH}") + + target_compile_definitions(obj.MLIRROCDLTarget + PRIVATE + __DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}" + ) +endif() diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -0,0 +1,464 @@ +//===- Target.cpp - MLIR LLVM ROCDL target compilation ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files defines ROCDL target related functions including registration +// calls for the `#rocdl.target` compilation attribute. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Target/LLVM/ROCDL/Target.h" + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" +#include "mlir/Support/FileUtilities.h" +#include "mlir/Target/LLVM/ROCDL/Utils.h" +#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Export.h" + +#include "llvm/IR/Constants.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/TargetParser/TargetParser.h" + +#include + +using namespace mlir; +using namespace mlir::ROCDL; + +#ifndef __DEFAULT_ROCM_PATH__ +#define __DEFAULT_ROCM_PATH__ "" +#endif + +namespace { +// Implementation of the `TargetAttrInterface` model. +class ROCDLTargetAttrImpl + : public gpu::TargetAttrInterface::FallbackModel { +public: + std::optional> + serializeToObject(Attribute attribute, Operation *module, + const gpu::TargetOptions &options) const; +}; +} // namespace + +// Register the ROCDL dialect, the ROCDL translation and the target interface. +void mlir::registerROCDLTarget(DialectRegistry ®istry) { + registerROCDLDialectTranslation(registry); + registry.addExtension(+[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) { + ROCDLTargetAttr::attachInterface(*ctx); + }); +} + +void mlir::registerROCDLTarget(MLIRContext &context) { + DialectRegistry registry; + registerROCDLTarget(registry); + context.appendDialectRegistry(registry); +} + +// Search for the ROCM path. +StringRef mlir::ROCDL::getROCMPath() { + if (const char *var = std::getenv("ROCM_PATH")) + return var; + if (const char *var = std::getenv("ROCM_ROOT")) + return var; + if (const char *var = std::getenv("ROCM_HOME")) + return var; + return __DEFAULT_ROCM_PATH__; +} + +SerializeGPUModuleBase::SerializeGPUModuleBase( + Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions) + : ModuleToObject(module, target.getTriple(), target.getChip(), + target.getFeatures(), target.getO()), + target(target), toolkitPath(targetOptions.getToolkitPath()), + fileList(targetOptions.getLinkFiles()) { + + // If `targetOptions` has an empty toolkitPath use `getROCMPath` + if (toolkitPath.empty()) + toolkitPath = getROCMPath(); + + // Append the files in the target attribute. + if (ArrayAttr files = target.getLink()) + for (Attribute attr : files.getValue()) + if (auto file = dyn_cast(attr)) + fileList.push_back(file.str()); + + // Append standard ROCm device bitcode libraries to the files to be loaded. + (void)appendStandardLibs(); +} + +void SerializeGPUModuleBase::init() { + static llvm::once_flag initializeBackendOnce; + llvm::call_once(initializeBackendOnce, []() { + // If the `AMDGPU` LLVM target was built, initialize it. +#if MLIR_ROCM_CONVERSIONS_ENABLED == 1 + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUAsmParser(); + LLVMInitializeAMDGPUAsmPrinter(); +#endif + }); +} + +ROCDLTargetAttr SerializeGPUModuleBase::getTarget() const { return target; } + +StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; } + +ArrayRef SerializeGPUModuleBase::getFileList() const { + return fileList; +} + +LogicalResult SerializeGPUModuleBase::appendStandardLibs() { + StringRef pathRef = getToolkitPath(); + if (pathRef.size()) { + SmallVector path; + path.insert(path.begin(), pathRef.begin(), pathRef.end()); + llvm::sys::path::append(path, "amdgcn", "bitcode"); + pathRef = StringRef(path.data(), path.size()); + if (!llvm::sys::fs::is_directory(pathRef)) { + getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef + << " does not exist or is not a directory."; + return failure(); + } + StringRef isaVersion = + llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip)); + isaVersion.consume_front("gfx"); + return getCommonBitcodeLibs(fileList, path, isaVersion); + } + return success(); +} + +std::optional>> +SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module, + llvm::TargetMachine &targetMachine) { + SmallVector> bcFiles; + if (failed(loadBitcodeFilesFromList(module.getContext(), targetMachine, + fileList, bcFiles, true))) + return std::nullopt; + return bcFiles; +} + +LogicalResult +SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module, + llvm::TargetMachine &targetMachine) { + // Some ROCM builds don't strip this like they should + if (auto *openclVersion = module.getNamedMetadata("opencl.ocl.version")) + module.eraseNamedMetadata(openclVersion); + // Stop spamming us with clang version numbers + if (auto *ident = module.getNamedMetadata("llvm.ident")) + module.eraseNamedMetadata(ident); + return success(); +} + +void SerializeGPUModuleBase::handleModulePreLink( + llvm::Module &module, llvm::TargetMachine &targetMachine) { + addControlVariables(module, target.hasWave64(), target.hasDaz(), + target.hasFiniteOnly(), target.hasUnsafeMath(), + target.hasFastMath(), target.hasCorrectSqrt(), + target.getAbi()); +} + +// Get the paths of ROCm device libraries. +LogicalResult SerializeGPUModuleBase::getCommonBitcodeLibs( + llvm::SmallVector &libs, SmallVector &libPath, + StringRef isaVersion) { + auto addLib = [&](StringRef path) -> bool { + if (!llvm::sys::fs::is_regular_file(path)) { + getOperation().emitRemark() << "Bitcode library path: " << path + << " does not exist or is not a file.\n"; + return true; + } + libs.push_back(path.str()); + return false; + }; + auto getLibPath = [&libPath](Twine lib) { + auto baseSize = libPath.size(); + llvm::sys::path::append(libPath, lib + ".bc"); + std::string path(StringRef(libPath.data(), libPath.size()).str()); + libPath.truncate(baseSize); + return path; + }; + + // Add ROCm device libraries. Fail if any of the libraries is not found. + if (addLib(getLibPath("ocml")) || addLib(getLibPath("ockl")) || + addLib(getLibPath("hip")) || addLib(getLibPath("opencl")) || + addLib(getLibPath("oclc_isa_version_" + isaVersion))) + return failure(); + return success(); +} + +void SerializeGPUModuleBase::addControlVariables( + llvm::Module &module, bool wave64, bool daz, bool finiteOnly, + bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer) { + llvm::Type *i8Ty = llvm::Type::getInt8Ty(module.getContext()); + auto addControlVariable = [i8Ty, &module](StringRef name, bool enable) { + llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable( + module, i8Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, + llvm::ConstantInt::get(i8Ty, enable), name, nullptr, + llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4); + controlVariable->setVisibility( + llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); + controlVariable->setAlignment(llvm::MaybeAlign(1)); + controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); + }; + addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath); + addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath); + addControlVariable("__oclc_daz_opt", daz || fastMath); + addControlVariable("__oclc_correctly_rounded_sqrt32", + correctSqrt && !fastMath); + addControlVariable("__oclc_wavefrontsize64", wave64); + + llvm::Type *i32Ty = llvm::Type::getInt32Ty(module.getContext()); + int abi = 400; + abiVer.getAsInteger(0, abi); + llvm::GlobalVariable *abiVersion = new llvm::GlobalVariable( + module, i32Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, + llvm::ConstantInt::get(i32Ty, abi), "__oclc_ABI_version", nullptr, + llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4); + abiVersion->setVisibility( + llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); + abiVersion->setAlignment(llvm::MaybeAlign(4)); + abiVersion->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); +} + +std::optional> +SerializeGPUModuleBase::assembleIsa(StringRef isa) { + auto loc = getOperation().getLoc(); + + StringRef targetTriple = this->triple; + + SmallVector result; + llvm::raw_svector_ostream os(result); + + llvm::Triple triple(llvm::Triple::normalize(targetTriple)); + std::string error; + const llvm::Target *target = + llvm::TargetRegistry::lookupTarget(triple.normalize(), error); + if (!target) { + emitError(loc, Twine("failed to lookup target: ") + error); + return std::nullopt; + } + + llvm::SourceMgr srcMgr; + srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), SMLoc()); + + const llvm::MCTargetOptions mcOptions; + std::unique_ptr mri( + target->createMCRegInfo(targetTriple)); + std::unique_ptr mai( + target->createMCAsmInfo(*mri, targetTriple, mcOptions)); + mai->setRelaxELFRelocations(true); + std::unique_ptr sti( + target->createMCSubtargetInfo(targetTriple, chip, features)); + + llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, + &mcOptions); + std::unique_ptr mofi(target->createMCObjectFileInfo( + ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); + ctx.setObjectFileInfo(mofi.get()); + + SmallString<128> cwd; + if (!llvm::sys::fs::current_path(cwd)) + ctx.setCompilationDir(cwd); + + std::unique_ptr mcStreamer; + std::unique_ptr mcii(target->createMCInstrInfo()); + + llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, ctx); + llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); + mcStreamer.reset(target->createMCObjectStreamer( + triple, ctx, std::unique_ptr(mab), + mab->createObjectWriter(os), std::unique_ptr(ce), + *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ false)); + mcStreamer->setUseAssemblerInfoForParsing(true); + + std::unique_ptr parser( + createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); + std::unique_ptr tap( + target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); + + if (!tap) { + emitError(loc, "assembler initialization error"); + return {}; + } + + parser->setTargetParser(*tap); + parser->Run(false); + + return result; +} + +#ifdef MLIR_ROCM_CONVERSIONS_ENABLED +namespace { +class AMDGPUSerializer : public SerializeGPUModuleBase { +public: + AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions); + + gpu::GPUModuleOp getOperation(); + + // Compile to HSA. + std::optional> + compileToBinary(const std::string &serializedISA); + + std::optional> + moduleToObject(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) override; + +private: + // Target options. + gpu::TargetOptions targetOptions; +}; +} // namespace + +AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions) + : SerializeGPUModuleBase(module, target, targetOptions), + targetOptions(targetOptions) {} + +gpu::GPUModuleOp AMDGPUSerializer::getOperation() { + return dyn_cast(&SerializeGPUModuleBase::getOperation()); +} + +std::optional> +AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { + // Assemble the ISA. + std::optional> isaBinary = assembleIsa(serializedISA); + + if (!isaBinary) { + getOperation().emitError() << "Failed during ISA assembling."; + return std::nullopt; + } + + // Save the ISA binary to a temp file. + int tempIsaBinaryFd = -1; + SmallString<128> tempIsaBinaryFilename; + if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempIsaBinaryFd, + tempIsaBinaryFilename)) { + getOperation().emitError() + << "Failed to create a temporary file for dumping the ISA binary."; + return std::nullopt; + } + llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); + { + llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); + tempIsaBinaryOs << StringRef(isaBinary->data(), isaBinary->size()); + tempIsaBinaryOs.flush(); + } + + // Create a temp file for HSA code object. + int tempHsacoFD = -1; + SmallString<128> tempHsacoFilename; + if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD, + tempHsacoFilename)) { + getOperation().emitError() + << "Failed to create a temporary file for the HSA code object."; + return std::nullopt; + } + llvm::FileRemover cleanupHsaco(tempHsacoFilename); + + llvm::SmallString<128> lldPath(toolkitPath); + llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld"); + int lldResult = llvm::sys::ExecuteAndWait( + lldPath, + {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename}); + if (lldResult != 0) { + getOperation().emitError() << "lld invocation failed."; + return std::nullopt; + } + + // Load the HSA code object. + auto hsacoFile = openInputFile(tempHsacoFilename); + if (!hsacoFile) { + getOperation().emitError() + << "Failed to read the HSA code object from the temp file."; + return std::nullopt; + } + + StringRef buffer = hsacoFile->getBuffer(); + + return SmallVector(buffer.begin(), buffer.end()); +} + +std::optional> +AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) { + // Return LLVM IR if the compilation target is offload. +#define DEBUG_TYPE "serialize-to-llvm" + LLVM_DEBUG({ + llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr() + << "\n" + << llvmModule << "\n"; + }); +#undef DEBUG_TYPE + if (targetOptions.getCompilationTarget() == gpu::TargetOptions::offload) + return SerializeGPUModuleBase::moduleToObject(llvmModule, targetMachine); + + // Translate the Module to ISA. + std::optional serializedISA = + translateToISA(llvmModule, targetMachine); + if (!serializedISA) { + getOperation().emitError() << "Failed translating the module to ISA."; + return std::nullopt; + } +#define DEBUG_TYPE "serialize-to-isa" + LLVM_DEBUG({ + llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n" + << *serializedISA << "\n"; + }); +#undef DEBUG_TYPE + // Return ISA assembly code if the compilation target is assembly. + if (targetOptions.getCompilationTarget() == gpu::TargetOptions::assembly) + return SmallVector(serializedISA->begin(), serializedISA->end()); + + // Compile to binary. + return compileToBinary(*serializedISA); +} +#endif // MLIR_ROCM_CONVERSIONS_ENABLED + +std::optional> ROCDLTargetAttrImpl::serializeToObject( + Attribute attribute, Operation *module, + const gpu::TargetOptions &options) const { + assert(module && "The module must be non null."); + if (!module) + return std::nullopt; + if (!mlir::isa(module)) { + module->emitError("Module must be a GPU module."); + return std::nullopt; + } +#if MLIR_ROCM_CONVERSIONS_ENABLED == 1 + AMDGPUSerializer serializer(*module, cast(attribute), + options); + serializer.init(); + return serializer.run(); +#else + module->emitError("The `AMDGPU` target was not built. Please enable it when " + "building LLVM."); + return std::nullopt; +#endif // MLIR_ROCM_CONVERSIONS_ENABLED == 1 +} diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -371,3 +371,9 @@ gpu.return } } + +gpu.module @module_with_two_target [#nvvm.target, #rocdl.target] { + gpu.func @kernel(%arg0 : f32) kernel { + gpu.return + } +} diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir --- a/mlir/test/Dialect/LLVMIR/rocdl.mlir +++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir @@ -274,3 +274,12 @@ // expected-error@below {{attribute attached to unexpected op}} func.func private @expected_llvm_func() attributes { rocdl.kernel } + +// ----- + +// Just check these don't emit errors. +gpu.module @module_1 [#rocdl.target] { +} + +gpu.module @module_2 [#rocdl.target, #rocdl.target] { +} diff --git a/mlir/unittests/Target/LLVM/CMakeLists.txt b/mlir/unittests/Target/LLVM/CMakeLists.txt --- a/mlir/unittests/Target/LLVM/CMakeLists.txt +++ b/mlir/unittests/Target/LLVM/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_unittest(MLIRTargetLLVMTests SerializeNVVMTarget.cpp + SerializeROCDLTarget.cpp SerializeToLLVMBitcode.cpp ) @@ -9,12 +10,14 @@ PRIVATE MLIRTargetLLVM MLIRNVVMTarget + MLIRROCDLTarget MLIRGPUDialect MLIRNVVMDialect MLIRLLVMDialect MLIRLLVMToLLVMIRTranslation MLIRBuiltinToLLVMIRTranslation MLIRNVVMToLLVMIRTranslation + MLIRROCDLToLLVMIRTranslation MLIRGPUToLLVMIRTranslation ${llvm_libs} ) diff --git a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp new file mode 100644 --- /dev/null +++ b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp @@ -0,0 +1,158 @@ +//===- SerializeROCDLTarget.cpp ---------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/InitAllDialects.h" +#include "mlir/Parser/Parser.h" +#include "mlir/Target/LLVM/ROCDL/Target.h" +#include "mlir/Target/LLVM/ROCDL/Utils.h" +#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" + +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBufferRef.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TargetParser/Host.h" + +#include "gmock/gmock.h" + +using namespace mlir; + +// Skip the test if the AMDGPU target was not built. +#if MLIR_ROCM_CONVERSIONS_ENABLED == 0 +#define SKIP_WITHOUT_AMDGPU(x) DISABLED_##x +#else +#define SKIP_WITHOUT_AMDGPU(x) x +#endif + +class MLIRTargetLLVMROCDL : public ::testing::Test { +protected: + virtual void SetUp() { + registerBuiltinDialectTranslation(registry); + registerLLVMDialectTranslation(registry); + registerGPUDialectTranslation(registry); + registerROCDLTarget(registry); + } + + // Checks if a ROCm installation is available. + bool hasROCMTools() { + StringRef rocmPath = ROCDL::getROCMPath(); + if (rocmPath.empty()) + return false; + llvm::SmallString<128> lldPath(rocmPath); + llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld"); + return llvm::sys::fs::can_execute(lldPath); + } + + // Dialect registry. + DialectRegistry registry; + + // MLIR module used for the tests. + const std::string moduleStr = R"mlir( + gpu.module @rocdl_test { + llvm.func @rocdl_kernel(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} { + llvm.return + } + })mlir"; +}; + +// Test ROCDL serialization to LLVM. +TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLMToLLVM)) { + MLIRContext context(registry); + + OwningOpRef module = + parseSourceString(moduleStr, &context); + ASSERT_TRUE(!!module); + + // Create a ROCDL target. + ROCDL::ROCDLTargetAttr target = ROCDL::ROCDLTargetAttr::get(&context); + + // Serialize the module. + auto serializer = dyn_cast(target); + ASSERT_TRUE(!!serializer); + gpu::TargetOptions options("", {}, "", gpu::TargetOptions::offload); + for (auto gpuModule : (*module).getBody()->getOps()) { + std::optional> object = + serializer.serializeToObject(gpuModule, options); + // Check that the serializer was successful. + ASSERT_TRUE(object != std::nullopt); + ASSERT_TRUE(object->size() > 0); + + // Read the serialized module. + llvm::MemoryBufferRef buffer(StringRef(object->data(), object->size()), + "module"); + llvm::LLVMContext llvmContext; + llvm::Expected> llvmModule = + llvm::getLazyBitcodeModule(buffer, llvmContext); + ASSERT_TRUE(!!llvmModule); + ASSERT_TRUE(!!*llvmModule); + + // Check that it has a function named `foo`. + ASSERT_TRUE((*llvmModule)->getFunction("rocdl_kernel") != nullptr); + } +} + +// Test ROCDL serialization to PTX. +TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToPTX)) { + MLIRContext context(registry); + + OwningOpRef module = + parseSourceString(moduleStr, &context); + ASSERT_TRUE(!!module); + + // Create a ROCDL target. + ROCDL::ROCDLTargetAttr target = ROCDL::ROCDLTargetAttr::get(&context); + + // Serialize the module. + auto serializer = dyn_cast(target); + ASSERT_TRUE(!!serializer); + gpu::TargetOptions options("", {}, "", gpu::TargetOptions::assembly); + for (auto gpuModule : (*module).getBody()->getOps()) { + std::optional> object = + serializer.serializeToObject(gpuModule, options); + // Check that the serializer was successful. + ASSERT_TRUE(object != std::nullopt); + ASSERT_TRUE(object->size() > 0); + + ASSERT_TRUE( + StringRef(object->data(), object->size()).contains("rocdl_kernel")); + } +} + +// Test ROCDL serialization to Binary. +TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToBinary)) { + if (!hasROCMTools()) + GTEST_SKIP() << "ROCm installation not found, skipping test."; + + MLIRContext context(registry); + + OwningOpRef module = + parseSourceString(moduleStr, &context); + ASSERT_TRUE(!!module); + + // Create a ROCDL target. + ROCDL::ROCDLTargetAttr target = ROCDL::ROCDLTargetAttr::get(&context); + + // Serialize the module. + auto serializer = dyn_cast(target); + ASSERT_TRUE(!!serializer); + gpu::TargetOptions options("", {}, "", gpu::TargetOptions::binary); + for (auto gpuModule : (*module).getBody()->getOps()) { + std::optional> object = + serializer.serializeToObject(gpuModule, options); + // Check that the serializer was successful. + ASSERT_TRUE(object != std::nullopt); + ASSERT_FALSE(object->empty()); + } +}