diff --git a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt --- a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt @@ -62,4 +62,6 @@ add_mlir_doc(ROCDLOps ROCDLDialect Dialects/ -gen-dialect-doc -dialect=rocdl) set(LLVM_TARGET_DEFINITIONS ROCDLOps.td) mlir_tablegen(ROCDLConversions.inc -gen-llvmir-conversions) +mlir_tablegen(ROCDLOpsAttributes.h.inc -gen-attrdef-decls -attrdefs-dialect=rocdl) +mlir_tablegen(ROCDLOpsAttributes.cpp.inc -gen-attrdef-defs -attrdefs-dialect=rocdl) add_public_tablegen_target(MLIRROCDLConversionsIncGen) diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h b/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h @@ -23,12 +23,16 @@ #define MLIR_DIALECT_LLVMIR_ROCDLDIALECT_H_ #include "mlir/Bytecode/BytecodeOpInterface.h" +#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" #include "mlir/Interfaces/SideEffectInterfaces.h" ///// Ops ///// +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/LLVMIR/ROCDLOpsAttributes.h.inc" + #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/ROCDLOps.h.inc" diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -44,8 +44,20 @@ /// The address space value that represents private memory. static constexpr unsigned kPrivateMemoryAddressSpace = 5; }]; + + let useDefaultAttributePrinterParser = 1; } +//===----------------------------------------------------------------------===// +// ROCDL attribute definitions +//===----------------------------------------------------------------------===// + +class ROCDL_Attr traits = []> + : AttrDef { + let mnemonic = attrMnemonic; +} + + //===----------------------------------------------------------------------===// // ROCDL op definitions //===----------------------------------------------------------------------===// @@ -386,4 +398,91 @@ let hasCustomAssemblyFormat = 1; } +//===----------------------------------------------------------------------===// +// ROCDL target attribute. +//===----------------------------------------------------------------------===// + +def ROCDL_TargettAttr : ROCDL_Attr<"ROCDLTarget", "target"> { + let description = [{ + ROCDL target attribute for controlling compilation of AMDGPU targets. All + parameters decay into default values if not present. + + Examples: + + 1. Target with default values. + ``` + gpu.module @mymodule [#rocdl.target] attributes {...} { + ... + } + ``` + + 2. Target with `gfx90a` chip and fast math. + ``` + gpu.module @mymodule [#rocdl.target] { + ... + } + ``` + }]; + let parameters = (ins + DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O, + StringRefParameter<"Target triple.", "\"amdgcn-amd-amdhsa\"">:$triple, + StringRefParameter<"Target chip.", "\"gfx900\"">:$chip, + StringRefParameter<"Target chip features.", "\"\"">:$features, + StringRefParameter<"ABI version.", "\"400\"">:$abi, + OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags, + OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link + ); + let assemblyFormat = [{ + (`<` struct($O, $triple, $chip, $features, $abi, $flags)^ `>`)? + }]; + let builders = [ + AttrBuilder<(ins CArg<"int", "2">:$optLevel, + CArg<"StringRef", "\"amdgcn-amd-amdhsa\"">:$triple, + CArg<"StringRef", "\"gfx900\"">:$chip, + CArg<"StringRef", "\"\"">:$features, + CArg<"StringRef", "\"400\"">:$abiVersion, + CArg<"DictionaryAttr", "nullptr">:$targetFlags, + CArg<"ArrayAttr", "nullptr">:$linkFiles), [{ + return Base::get($_ctxt, optLevel, triple, chip, features, abiVersion, + targetFlags, linkFiles); + }]> + ]; + let skipDefaultBuilders = 1; + let genVerifyDecl = 1; + let extraClassDeclaration = [{ + bool hasFlag(StringRef flag) const; + bool hasWave64() const; + bool hasFastMath() const; + bool hasDaz() const; + bool hasFiniteOnly() const; + bool hasUnsafeMath() const; + bool hasCorrectSqrt() const; + }]; + let extraClassDefinition = [{ + bool $cppClass::hasFlag(StringRef flag) const { + if (DictionaryAttr flags = getFlags()) + return flags.get(flag) != nullptr; + return false; + } + bool $cppClass::hasWave64() const { + return hasFlag("wave64") || !hasFlag("no_wave64"); + } + bool $cppClass::hasFastMath() const { + return hasFlag("fast"); + } + bool $cppClass::hasDaz() const { + return hasFlag("daz"); + } + bool $cppClass::hasFiniteOnly() const { + return hasFlag("finite_only"); + } + bool $cppClass::hasUnsafeMath() const { + return hasFlag("unsafe_math"); + } + bool $cppClass::hasCorrectSqrt() const { + return !hasFlag("unsafe_sqrt"); + } + }]; +} + #endif // ROCDLIR_OPS diff --git a/mlir/include/mlir/InitAllExtensions.h b/mlir/include/mlir/InitAllExtensions.h --- a/mlir/include/mlir/InitAllExtensions.h +++ b/mlir/include/mlir/InitAllExtensions.h @@ -16,6 +16,7 @@ #include "mlir/Dialect/Func/Extensions/AllExtensions.h" #include "mlir/Target/LLVM/NVVM/Target.h" +#include "mlir/Target/LLVM/ROCDL/Target.h" #include @@ -29,6 +30,7 @@ inline void registerAllExtensions(DialectRegistry ®istry) { func::registerAllExtensions(registry); registerNVVMTarget(registry); + registerROCDLTarget(registry); } } // namespace mlir diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Target.h b/mlir/include/mlir/Target/LLVM/ROCDL/Target.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Target/LLVM/ROCDL/Target.h @@ -0,0 +1,28 @@ +//===- Target.h - MLIR ROCDL target registration ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This provides registration calls for attaching the ROCDL target interface. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVM_ROCDL_TARGET_H +#define MLIR_TARGET_LLVM_ROCDL_TARGET_H + +namespace mlir { +class DialectRegistry; +class MLIRContext; +/// Registers the `TargetAttrInterface` for the `#rocdl.target` attribute in the +/// given registry. +void registerROCDLTarget(DialectRegistry ®istry); + +/// Registers the `TargetAttrInterface` for the `#rocdl.target` attribute in the +/// registry associated with the given context. +void registerROCDLTarget(MLIRContext &context); +} // namespace mlir + +#endif // MLIR_TARGET_LLVM_ROCDL_TARGET_H diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h @@ -0,0 +1,90 @@ +//===- Utils.h - MLIR ROCDL target compilation ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files declares ROCDL target related utility classes and functions. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVM_ROCDL_UTILS_H +#define MLIR_TARGET_LLVM_ROCDL_UTILS_H + +#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" +#include "mlir/Target/LLVM/ModuleToObject.h" + +namespace mlir { +class DialectRegistry; +class MLIRContext; +namespace ROCDL { +/// Searches & returns the path ROCM toolkit path, the search order is: +/// 1. The `ROCM_PATH` environment variable. +/// 2. The `ROCM_ROOT` environment variable. +/// 3. The `ROCM_HOME` environment variable. +/// 4. The ROCM path detected by CMake. +/// 5. Returns an empty string. +StringRef getROCMPath(); + +/// Base class for all ROCDL serializations from GPU modules into binary +/// strings. By default this class serializes into LLVM bitcode. +class SerializeGPUModuleBase : public LLVM::ModuleToObject { +public: + /// Initializes the `toolkitPath` with the path in `targetOptions` or if empty + /// with the path in `getROCMPath`. + SerializeGPUModuleBase(Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions = {}); + + /// Initializes the LLVM AMDGPU target by safely calling + /// `LLVMInitializeAMDGPU*` methods if available. + static void init(); + + /// Returns the target attribute. + ROCDLTargetAttr getTarget() const; + + /// Returns the ROCM toolkit path. + StringRef getToolkitPath() const; + + /// Returns the bitcode files to be loaded. + ArrayRef getFileList() const; + + /// Appends standard ROCm device libraries like `ocml.bc`, `ockl.bc`, etc. + LogicalResult appendStandardLibs(); + + /// Loads the bitcode files in `fileList`. + virtual std::optional>> + loadBitcodeFiles(llvm::Module &module, + llvm::TargetMachine &targetMachine) override; + + /// Removes unnecessary metadata from the loaded bitcode files. + LogicalResult handleBitcodeFile(llvm::Module &module, + llvm::TargetMachine &targetMachine) override; + +protected: + /// Appends the paths of common ROCm device libraries to `libs`. + LogicalResult getCommonBitcodeLibs(llvm::SmallVector &libs, + SmallVector &libPath, + StringRef isaVersion, bool wave64, + bool daz, bool finiteOnly, bool unsafeMath, + bool fastMath, bool correctSqrt, + StringRef abiVer); + + /// Returns the assembled ISA. + std::optional> assembleIsa(StringRef isa); + + /// ROCDL target attribute. + ROCDLTargetAttr target; + + /// ROCM toolkit path. + std::string toolkitPath; + + /// List of LLVM bitcode files to link to. + SmallVector fileList; +}; +} // namespace ROCDL +} // namespace mlir + +#endif // MLIR_TARGET_LLVM_ROCDL_UTILS_H diff --git a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp --- a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp @@ -19,8 +19,10 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/DialectImplementation.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Operation.h" +#include "llvm/ADT/TypeSwitch.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -237,8 +239,14 @@ #include "mlir/Dialect/LLVMIR/ROCDLOps.cpp.inc" >(); + addAttributes< +#define GET_ATTRDEF_LIST +#include "mlir/Dialect/LLVMIR/ROCDLOpsAttributes.cpp.inc" + >(); + // Support unknown operations because not all ROCDL operations are registered. allowUnknownOperations(); + declarePromisedInterface(); } LogicalResult ROCDLDialect::verifyOperationAttribute(Operation *op, @@ -253,5 +261,41 @@ return success(); } +//===----------------------------------------------------------------------===// +// ROCDL target attribute. +//===----------------------------------------------------------------------===// +LogicalResult +ROCDLTargetAttr::verify(function_ref emitError, + int optLevel, StringRef triple, StringRef chip, + StringRef features, StringRef abiVersion, + DictionaryAttr flags, ArrayAttr files) { + if (optLevel < 0 || optLevel > 3) { + emitError() << "The optimization level must be a number between 0 and 3."; + return failure(); + } + if (triple.empty()) { + emitError() << "The target triple cannot be empty."; + return failure(); + } + if (chip.empty()) { + emitError() << "The target chip cannot be empty."; + return failure(); + } + if (abiVersion != "400" && abiVersion != "500") { + emitError() << "Invalid ABI version, it must be either `400` or `500`."; + return failure(); + } + if (files && llvm::all_of(files, [](::mlir::Attribute attr) { + return attr && mlir::isa(attr); + })) { + emitError() << "All the elements in the `link` array must be strings."; + return failure(); + } + return success(); +} + #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/ROCDLOps.cpp.inc" + +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/LLVMIR/ROCDLOpsAttributes.cpp.inc" diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -89,3 +89,61 @@ ${CUDA_DRIVER_LIBRARY} ) endif() + +if (MLIR_ENABLE_ROCM_CONVERSIONS) + set(AMDGPU_LIBS + IRReader + IPO + linker + MCParser + AMDGPUAsmParser + AMDGPUCodeGen + AMDGPUDesc + AMDGPUInfo + target + ) +endif() + +add_mlir_dialect_library(MLIRROCDLTarget + ROCDL/Target.cpp + + LINK_COMPONENTS + Core + MC + Target + ${AMDGPU_LIBS} + + LINK_LIBS PUBLIC + MLIRIR + MLIRExecutionEngineUtils + MLIRSupport + MLIRGPUDialect + MLIRTargetLLVM + ) + +if(MLIR_ENABLE_ROCM_CONVERSIONS) + if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)) + message(SEND_ERROR + "Building mlir with ROCm support requires the AMDGPU backend") + endif() + + if (DEFINED ROCM_PATH) + set(DEFAULT_ROCM_PATH "${ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") + elseif(DEFINED ENV{ROCM_PATH}) + set(DEFAULT_ROCM_PATH "$ENV{ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") + else() + set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") + endif() + message(VERBOSE "MLIR Default ROCM toolkit path: ${DEFAULT_ROCM_PATH}") + + target_compile_definitions(obj.MLIRROCDLTarget + PRIVATE + MLIR_GPU_AMDGPU_TARGET_ENABLED=1 + __DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}" + ) + + target_link_libraries(MLIRROCDLTarget + PRIVATE + MLIRROCDLToLLVMIRTranslation + ) +endif() diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -0,0 +1,402 @@ +//===- Target.cpp - MLIR LLVM ROCDL target compilation ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files defines ROCDL target related functions including registration +// calls for the `#rocdl.target` compilation attribute. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Target/LLVM/ROCDL/Target.h" + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" +#include "mlir/Support/FileUtilities.h" +#include "mlir/Target/LLVM/ROCDL/Utils.h" +#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Export.h" + +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/TargetParser/TargetParser.h" + +#include + +using namespace mlir; +using namespace mlir::ROCDL; + +#ifndef __DEFAULT_ROCM_PATH__ +#define __DEFAULT_ROCM_PATH__ "" +#endif + +namespace { +// Implementation of the `TargetAttrInterface` model. +class ROCDLTargetAttrImpl + : public gpu::TargetAttrInterface::FallbackModel { +public: + std::optional> + serializeToObject(Attribute attribute, Operation *module, + const gpu::TargetOptions &options) const; +}; +} // namespace + +// Register the ROCDL dialect, the ROCDL translation and the target interface. +void mlir::registerROCDLTarget(DialectRegistry ®istry) { + registerROCDLDialectTranslation(registry); + registry.addExtension(+[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) { + ROCDLTargetAttr::attachInterface(*ctx); + }); +} + +void mlir::registerROCDLTarget(MLIRContext &context) { + DialectRegistry registry; + registerROCDLTarget(registry); + context.appendDialectRegistry(registry); +} + +// Search for the ROCM path. +StringRef mlir::ROCDL::getROCMPath() { + if (const char *var = std::getenv("ROCM_PATH")) + return var; + if (const char *var = std::getenv("ROCM_ROOT")) + return var; + if (const char *var = std::getenv("ROCM_HOME")) + return var; + return __DEFAULT_ROCM_PATH__; +} + +SerializeGPUModuleBase::SerializeGPUModuleBase( + Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions) + : ModuleToObject(module, target.getTriple(), target.getChip(), + target.getFeatures(), target.getO()), + target(target), toolkitPath(targetOptions.getToolkitPath()), + fileList(targetOptions.getBitcodeFiles()) { + + // If `targetOptions` has an empty toolkitPath use `getROCMPath` + if (toolkitPath.empty()) + toolkitPath = getROCMPath(); + + // Append the files in the target attribute. + if (ArrayAttr files = target.getLink()) + for (Attribute attr : files.getValue()) + if (auto file = dyn_cast(attr)) + fileList.push_back(file.str()); + + // Append standard ROCm device bitcode libraries to the files to be loaded. + (void)appendStandardLibs(); +} + +void SerializeGPUModuleBase::init() { + static llvm::once_flag initializeBackendOnce; + llvm::call_once(initializeBackendOnce, []() { + // If the `AMDGPU` LLVM target was built, initialize it. +#if MLIR_ROCM_CONVERSIONS_ENABLED == 1 + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUAsmParser(); + LLVMInitializeAMDGPUAsmPrinter(); +#endif + }); +} + +ROCDLTargetAttr SerializeGPUModuleBase::getTarget() const { return target; } + +StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; } + +ArrayRef SerializeGPUModuleBase::getFileList() const { + return fileList; +} + +LogicalResult SerializeGPUModuleBase::appendStandardLibs() { + StringRef pathRef = getToolkitPath(); + if (pathRef.size()) { + SmallVector path; + path.insert(path.begin(), pathRef.begin(), pathRef.end()); + llvm::sys::path::append(path, "amdgcn", "bitcode"); + pathRef = StringRef(path.data(), path.size()); + if (!llvm::sys::fs::is_directory(pathRef)) { + getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef + << " does not exist or is not a directory."; + return failure(); + } + StringRef isaVersion = + llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip)); + isaVersion.consume_front("gfx"); + return getCommonBitcodeLibs(fileList, path, isaVersion, target.hasWave64(), + target.hasDaz(), target.hasFiniteOnly(), + target.hasUnsafeMath(), target.hasFastMath(), + target.hasCorrectSqrt(), target.getAbi()); + } + return success(); +} + +std::optional>> +SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module, + llvm::TargetMachine &targetMachine) { + SmallVector> bcFiles; + if (failed(loadBitcodeFilesFromList(module.getContext(), targetMachine, + fileList, bcFiles, true))) + return std::nullopt; + return bcFiles; +} + +LogicalResult +SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module, + llvm::TargetMachine &targetMachine) { + // Some ROCM builds don't strip this like they should + if (auto *openclVersion = module.getNamedMetadata("opencl.ocl.version")) + module.eraseNamedMetadata(openclVersion); + // Stop spamming us with clang version numbers + if (auto *ident = module.getNamedMetadata("llvm.ident")) + module.eraseNamedMetadata(ident); + return success(); +} + +// Get the paths of ROCm device libraries. Function adapted from: +// https://github.com/llvm/llvm-project/blob/main/clang/lib/Driver/ToolChains/AMDGPU.cpp +LogicalResult SerializeGPUModuleBase::getCommonBitcodeLibs( + llvm::SmallVector &libs, SmallVector &libPath, + StringRef isaVersion, bool wave64, bool daz, bool finiteOnly, + bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer) { + auto addLib = [&](StringRef path) -> bool { + if (!llvm::sys::fs::is_regular_file(path)) { + getOperation().emitRemark() << "Bitcode library path: " << path + << " does not exist or is not a file.\n"; + return true; + } + libs.push_back(path.str()); + return false; + }; + auto optLib = [](StringRef name, bool on) -> Twine { + return name + (on ? "_on" : "_off"); + }; + auto getLibPath = [&libPath](Twine lib) { + auto baseSize = libPath.size(); + llvm::sys::path::append(libPath, lib + ".bc"); + std::string path(StringRef(libPath.data(), libPath.size()).str()); + libPath.truncate(baseSize); + return path; + }; + + // Add ROCm device libraries. Fail if any of the libraries is not found. + if (addLib(getLibPath("ocml")) || addLib(getLibPath("ockl")) || + addLib(getLibPath(optLib("oclc_daz_opt", daz))) || + addLib(getLibPath(optLib("oclc_unsafe_math", unsafeMath || fastMath))) || + addLib(getLibPath(optLib("oclc_finite_only", finiteOnly || fastMath))) || + addLib(getLibPath(optLib("oclc_correctly_rounded_sqrt", correctSqrt))) || + addLib(getLibPath(optLib("oclc_wavefrontsize64", wave64))) || + addLib(getLibPath("oclc_isa_version_" + isaVersion))) + return failure(); + if (abiVer.size() && addLib(getLibPath("oclc_abi_version_" + abiVer))) + return failure(); + return success(); +} + +std::optional> +SerializeGPUModuleBase::assembleIsa(StringRef isa) { + auto loc = getOperation().getLoc(); + + StringRef targetTriple = this->triple; + + SmallVector result; + llvm::raw_svector_ostream os(result); + + llvm::Triple triple(llvm::Triple::normalize(targetTriple)); + std::string error; + const llvm::Target *target = + llvm::TargetRegistry::lookupTarget(triple.normalize(), error); + if (!target) { + emitError(loc, Twine("failed to lookup target: ") + error); + return std::nullopt; + } + + llvm::SourceMgr srcMgr; + srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), SMLoc()); + + const llvm::MCTargetOptions mcOptions; + std::unique_ptr mri( + target->createMCRegInfo(targetTriple)); + std::unique_ptr mai( + target->createMCAsmInfo(*mri, targetTriple, mcOptions)); + mai->setRelaxELFRelocations(true); + std::unique_ptr sti( + target->createMCSubtargetInfo(targetTriple, chip, features)); + + llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, + &mcOptions); + std::unique_ptr mofi(target->createMCObjectFileInfo( + ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); + ctx.setObjectFileInfo(mofi.get()); + + SmallString<128> cwd; + if (!llvm::sys::fs::current_path(cwd)) + ctx.setCompilationDir(cwd); + + std::unique_ptr mcStreamer; + std::unique_ptr mcii(target->createMCInstrInfo()); + + llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, ctx); + llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); + mcStreamer.reset(target->createMCObjectStreamer( + triple, ctx, std::unique_ptr(mab), + mab->createObjectWriter(os), std::unique_ptr(ce), + *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ false)); + mcStreamer->setUseAssemblerInfoForParsing(true); + + std::unique_ptr parser( + createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); + std::unique_ptr tap( + target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); + + if (!tap) { + emitError(loc, "assembler initialization error"); + return {}; + } + + parser->setTargetParser(*tap); + parser->Run(false); + + return result; +} + +#ifdef MLIR_GPU_AMDGPU_TARGET_ENABLED +#include "llvm/Support/Program.h" + +#define DEBUG_TYPE "serialize-to-object" + +namespace { +class SerializeToHSA : public SerializeGPUModuleBase { +public: + using SerializeGPUModuleBase::SerializeGPUModuleBase; + + // Create the HSACO object. + std::optional> createHsaco(SmallVector &&ptx); + + std::optional> + moduleToObject(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) override; +}; +} // namespace + +std::optional> +SerializeToHSA::createHsaco(SmallVector &&ptx) { + SmallVector isaBinary = std::move(ptx); + auto loc = getOperation().getLoc(); + + // Save the ISA binary to a temp file. + int tempIsaBinaryFd = -1; + SmallString<128> tempIsaBinaryFilename; + if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd, + tempIsaBinaryFilename)) { + emitError(loc, "temporary file for ISA binary creation error"); + return {}; + } + llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); + llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); + tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size()); + tempIsaBinaryOs.close(); + + // Create a temp file for HSA code object. + int tempHsacoFD = -1; + SmallString<128> tempHsacoFilename; + if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD, + tempHsacoFilename)) { + emitError(loc, "temporary file for HSA code object creation error"); + return {}; + } + llvm::FileRemover cleanupHsaco(tempHsacoFilename); + + llvm::SmallString<32> lldPath(toolkitPath); + llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld"); + int lldResult = llvm::sys::ExecuteAndWait( + lldPath, + {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename}); + if (lldResult != 0) { + emitError(loc, "lld invocation error"); + return {}; + } + + // Load the HSA code object. + auto hsacoFile = openInputFile(tempHsacoFilename); + if (!hsacoFile) { + emitError(loc, "read HSA code object from temp file error"); + return {}; + } + + StringRef buffer = hsacoFile->getBuffer(); + + return SmallVector(buffer.begin(), buffer.end()); +} + +std::optional> +SerializeToHSA::moduleToObject(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) { + std::optional serializedISA = + translateToISA(llvmModule, targetMachine); + if (!serializedISA) { + getOperation().emitError() << "Failed translating the module to ISA."; + return std::nullopt; + } + + LLVM_DEBUG({ + llvm::dbgs() << "ISA for module: " + << dyn_cast(&getOperation()).getNameAttr() + << "\n"; + llvm::dbgs() << *serializedISA << "\n"; + llvm::dbgs().flush(); + }); + + std::optional> assembledIsa = + assembleIsa(serializedISA.value()); + + if (!assembledIsa) { + getOperation().emitError() << "Failed during ISA assembling."; + return std::nullopt; + } + + return createHsaco(std::move(assembledIsa.value())); +} +#endif // MLIR_GPU_AMDGPU_TARGET_ENABLED + +std::optional> ROCDLTargetAttrImpl::serializeToObject( + Attribute attribute, Operation *module, + const gpu::TargetOptions &options) const { + assert(module && "The module must be non null."); + if (!module) + return std::nullopt; + if (!mlir::isa(module)) { + module->emitError("Module must be a GPU module."); + return std::nullopt; + } +#ifdef MLIR_GPU_AMDGPU_TARGET_ENABLED + SerializeToHSA serializer(*module, cast(attribute), options); + serializer.init(); + return serializer.run(); +#else + return SmallVector{}; +#endif +} diff --git a/mlir/unittests/Target/LLVM/CMakeLists.txt b/mlir/unittests/Target/LLVM/CMakeLists.txt --- a/mlir/unittests/Target/LLVM/CMakeLists.txt +++ b/mlir/unittests/Target/LLVM/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_unittest(MLIRTargetLLVMTests SerializeNVVMTarget.cpp + SerializeROCDLTarget.cpp SerializeToLLVMBitcode.cpp ) diff --git a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp new file mode 100644 --- /dev/null +++ b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp @@ -0,0 +1,64 @@ +//===- SerializeROCDLTarget.cpp ---------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/InitAllDialects.h" +#include "mlir/Parser/Parser.h" +#include "mlir/Target/LLVM/ROCDL/Target.h" +#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" + +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/MemoryBufferRef.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TargetParser/Host.h" + +#include "gmock/gmock.h" + +using namespace mlir; + +#if MLIR_ROCM_CONVERSIONS_ENABLED == 1 +TEST(MLIRTargetLLVM, SerializeROCDLModule) { + std::string moduleStr = R"mlir( + gpu.module @kernels { + llvm.func @kernel(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} { + llvm.return + } + } + )mlir"; + + DialectRegistry registry; + registerBuiltinDialectTranslation(registry); + registerLLVMDialectTranslation(registry); + registerGPUDialectTranslation(registry); + registerROCDLTarget(registry); + MLIRContext context(registry); + + OwningOpRef module = + parseSourceString(moduleStr, &context); + ASSERT_TRUE(!!module); + + // Create a ROCDL target. + ROCDL::ROCDLTargetAttr target = ROCDL::ROCDLTargetAttr::get(&context); + + // Serialize the module. + auto serializer = dyn_cast(target); + ASSERT_TRUE(!!serializer); + for (auto gpuModule : (*module).getBody()->getOps()) { + std::optional> object = + serializer.serializeToObject(gpuModule, {}); + // Check that the serializer was successful. + ASSERT_TRUE(object != std::nullopt); + ASSERT_TRUE(object->size() > 0); + } +} +#endif