diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt --- a/mlir/CMakeLists.txt +++ b/mlir/CMakeLists.txt @@ -97,7 +97,7 @@ # Build the CUDA conversions and run according tests if the NVPTX backend # is available -if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD AND MLIR_ENABLE_EXECUTION_ENGINE) +if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) set(MLIR_ENABLE_CUDA_CONVERSIONS 1) else() set(MLIR_ENABLE_CUDA_CONVERSIONS 0) @@ -118,6 +118,9 @@ set(MLIR_ENABLE_ROCM_RUNNER 0 CACHE BOOL "Enable building the mlir ROCm runner") set(MLIR_ENABLE_SPIRV_CPU_RUNNER 0 CACHE BOOL "Enable building the mlir SPIR-V cpu runner") set(MLIR_ENABLE_VULKAN_RUNNER 0 CACHE BOOL "Enable building the mlir Vulkan runner") +set(MLIR_ENABLE_NVPTXCOMPILER 0 CACHE BOOL + "Statically link the nvptxlibrary instead of calling ptxas as a subprocess \ + for compiling PTX to cubin") option(MLIR_INCLUDE_TESTS "Generate build targets for the MLIR unit tests." diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h @@ -15,6 +15,7 @@ #define MLIR_DIALECT_LLVMIR_NVVMDIALECT_H_ #include "mlir/Bytecode/BytecodeOpInterface.h" +#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -14,6 +14,7 @@ #define NVVMIR_OPS include "mlir/IR/EnumAttr.td" +include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" @@ -1472,4 +1473,72 @@ }]; } +//===----------------------------------------------------------------------===// +// NVVM target attribute. +//===----------------------------------------------------------------------===// + +def NVVM_TargettAttr : NVVM_Attr<"NVVMTarget", "target"> { + let description = [{ + GPU target attribute for controlling compilation of NVIDIA targets. All + parameters decay into default values if not present. + + Examples: + + 1. Target with default values. + ``` + gpu.module @mymodule [#nvvm.target] attributes {...} { + ... + } + ``` + + 2. Target with `sm_90` chip and fast math. + ``` + gpu.module @mymodule [#nvvm.target] { + ... + } + ``` + }]; + let parameters = (ins + DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O, + StringRefParameter<"Target triple.", "\"nvptx64-nvidia-cuda\"">:$triple, + StringRefParameter<"Target chip.", "\"sm_50\"">:$chip, + StringRefParameter<"Target chip features.", "\"+ptx60\"">:$features, + OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags, + OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link + ); + let assemblyFormat = [{ + (`<` struct($O, $triple, $chip, $features, $flags, $link)^ `>`)? + }]; + let builders = [ + AttrBuilder<(ins CArg<"int", "2">:$optLevel, + CArg<"StringRef", "\"nvptx64-nvidia-cuda\"">:$triple, + CArg<"StringRef", "\"sm_50\"">:$chip, + CArg<"StringRef", "\"+ptx60\"">:$features, + CArg<"DictionaryAttr", "nullptr">:$targetFlags, + CArg<"ArrayAttr", "nullptr">:$linkFiles), [{ + return Base::get($_ctxt, optLevel, triple, chip, features, targetFlags, linkFiles); + }]> + ]; + let skipDefaultBuilders = 1; + let genVerifyDecl = 1; + let extraClassDeclaration = [{ + bool hasFlag(StringRef flag) const; + bool hasFastMath() const; + bool hasFtz() const; + }]; + let extraClassDefinition = [{ + bool $cppClass::hasFlag(StringRef flag) const { + if (DictionaryAttr flags = getFlags()) + return flags.get(flag) != nullptr; + return false; + } + bool $cppClass::hasFastMath() const { + return hasFlag("fast"); + } + bool $cppClass::hasFtz() const { + return hasFlag("ftz"); + } + }]; +} + #endif // NVVMIR_OPS diff --git a/mlir/include/mlir/InitAllExtensions.h b/mlir/include/mlir/InitAllExtensions.h --- a/mlir/include/mlir/InitAllExtensions.h +++ b/mlir/include/mlir/InitAllExtensions.h @@ -16,6 +16,7 @@ #include "mlir/Conversion/NVVMToLLVM/NVVMToLLVM.h" #include "mlir/Dialect/Func/Extensions/AllExtensions.h" +#include "mlir/Target/LLVM/NVVM/Target.h" #include @@ -29,6 +30,7 @@ inline void registerAllExtensions(DialectRegistry ®istry) { func::registerAllExtensions(registry); registerConvertNVVMToLLVMInterface(registry); + registerNVVMTarget(registry); } } // namespace mlir diff --git a/mlir/include/mlir/Target/LLVM/NVVM/Target.h b/mlir/include/mlir/Target/LLVM/NVVM/Target.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Target/LLVM/NVVM/Target.h @@ -0,0 +1,28 @@ +//===- Target.h - MLIR NVVM target registration -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This provides registration calls for attaching the NVVM target interface. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVM_NVVM_TARGET_H +#define MLIR_TARGET_LLVM_NVVM_TARGET_H + +namespace mlir { +class DialectRegistry; +class MLIRContext; +/// Registers the `TargetAttrInterface` for the `#nvvm.target` attribute in the +/// given registry. +void registerNVVMTarget(DialectRegistry ®istry); + +/// Registers the `TargetAttrInterface` for the `#nvvm.target` attribute in the +/// registry associated with the given context. +void registerNVVMTarget(MLIRContext &context); +} // namespace mlir + +#endif // MLIR_TARGET_LLVM_NVVM_TARGET_H diff --git a/mlir/include/mlir/Target/LLVM/NVVM/Utils.h b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h @@ -0,0 +1,74 @@ +//===- Utils.h - MLIR NVVM target utils -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files declares NVVM target related utility classes and functions. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVM_NVVM_UTILS_H +#define MLIR_TARGET_LLVM_NVVM_UTILS_H + +#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h" +#include "mlir/Dialect/LLVMIR/NVVMDialect.h" +#include "mlir/Target/LLVM/ModuleToObject.h" + +namespace mlir { +namespace NVVM { +/// Searches & returns the path CUDA toolkit path, the search order is: +/// 1. The `CUDA_ROOT` environment variable. +/// 2. The `CUDA_HOME` environment variable. +/// 3. The `CUDA_PATH` environment variable. +/// 4. The CUDA toolkit path detected by CMake. +/// 5. Returns an empty string. +StringRef getCUDAToolkitPath(); + +/// Base class for all NVVM serializations from GPU modules into binary strings. +/// By default this class serializes into LLVM bitcode. +class SerializeGPUModuleBase : public LLVM::ModuleToObject { +public: + /// Initializes the `toolkitPath` with the path in `targetOptions` or if empty + /// with the path in `getCUDAToolkitPath`. + SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target, + const gpu::TargetOptions &targetOptions = {}); + + /// Initializes the LLVM NVPTX target by safely calling `LLVMInitializeNVPTX*` + /// methods if available. + static void init(); + + /// Returns the target attribute. + NVVMTargetAttr getTarget() const; + + /// Returns the CUDA toolkit path. + StringRef getToolkitPath() const; + + /// Returns the bitcode files to be loaded. + ArrayRef getFileList() const; + + /// Appends `nvvm/libdevice.bc` into `fileList`. Returns failure if the + /// library couldn't be found. + LogicalResult appendStandardLibs(); + + /// Loads the bitcode files in `fileList`. + virtual std::optional>> + loadBitcodeFiles(llvm::Module &module, + llvm::TargetMachine &targetMachine) override; + +protected: + /// NVVM target attribute. + NVVMTargetAttr target; + + /// CUDA toolkit path. + std::string toolkitPath; + + /// List of LLVM bitcode files to link to. + SmallVector fileList; +}; +} // namespace NVVM +} // namespace mlir + +#endif // MLIR_TARGET_LLVM_NVVM_UTILS_H diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -17,6 +17,7 @@ #include "mlir/Dialect/LLVMIR/NVVMDialect.h" #include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinAttributes.h" @@ -723,6 +724,7 @@ // registered. allowUnknownOperations(); declarePromisedInterface(); + declarePromisedInterface(); } LogicalResult NVVMDialect::verifyOperationAttribute(Operation *op, @@ -761,6 +763,35 @@ return success(); } +//===----------------------------------------------------------------------===// +// NVVM target attribute. +//===----------------------------------------------------------------------===// +LogicalResult +NVVMTargetAttr::verify(function_ref emitError, + int optLevel, StringRef triple, StringRef chip, + StringRef features, DictionaryAttr flags, + ArrayAttr files) { + if (optLevel < 0 || optLevel > 3) { + emitError() << "The optimization level must be a number between 0 and 3."; + return failure(); + } + if (triple.empty()) { + emitError() << "The target triple cannot be empty."; + return failure(); + } + if (chip.empty()) { + emitError() << "The target chip cannot be empty."; + return failure(); + } + if (files && !llvm::all_of(files, [](::mlir::Attribute attr) { + return attr && mlir::isa(attr); + })) { + emitError() << "All the elements in the `link` array must be strings."; + return failure(); + } + return success(); +} + #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc" diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -20,3 +20,80 @@ MLIRExecutionEngineUtils MLIRTargetLLVMIRExport ) + +if (MLIR_ENABLE_CUDA_CONVERSIONS) + set(NVPTX_LIBS + NVPTXCodeGen + NVPTXDesc + NVPTXInfo + ) +endif() + +add_mlir_dialect_library(MLIRNVVMTarget + NVVM/Target.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR + + LINK_COMPONENTS + ${NVPTX_LIBS} + + LINK_LIBS PUBLIC + MLIRIR + MLIRExecutionEngineUtils + MLIRSupport + MLIRGPUDialect + MLIRTargetLLVM + MLIRNVVMToLLVMIRTranslation + ) + +if(MLIR_ENABLE_CUDA_CONVERSIONS) + # Find the CUDA toolkit. + find_package(CUDAToolkit) + + if(CUDAToolkit_FOUND) + # Get the CUDA toolkit path. The path is needed for detecting `libdevice.bc`. + # These extra steps are needed because of a bug on CMake. + # See: https://gitlab.kitware.com/cmake/cmake/-/issues/24858 + # TODO: Bump the MLIR CMake version to 3.26.4 and switch to + # ${CUDAToolkit_LIBRARY_ROOT} + if(NOT DEFINED ${CUDAToolkit_LIBRARY_ROOT}) + get_filename_component(MLIR_CUDAToolkit_ROOT ${CUDAToolkit_BIN_DIR} + DIRECTORY ABSOLUTE) + else() + set(MLIR_CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_ROOT}) + endif() + + # Add the `nvptxcompiler` library. + if(MLIR_ENABLE_NVPTXCOMPILER) + # Find the `nvptxcompiler` library. + # TODO: Bump the MLIR CMake version to 3.25 and use `CUDA::nvptxcompiler_static`. + find_library(MLIR_NVPTXCOMPILER_LIB nvptxcompiler_static + PATHS ${CUDAToolkit_LIBRARY_DIR} NO_DEFAULT_PATH) + + # Fail if `nvptxcompiler_static` couldn't be found. + if(MLIR_NVPTXCOMPILER_LIB STREQUAL "MLIR_NVPTXCOMPILER_LIB-NOTFOUND") + message(FATAL_ERROR + "Requested using the `nvptxcompiler` library backend but it couldn't be found.") + endif() + + # Link against `nvptxcompiler_static`. TODO: use `CUDA::nvptxcompiler_static`. + target_link_libraries(MLIRNVVMTarget PRIVATE ${MLIR_NVPTXCOMPILER_LIB}) + target_include_directories(obj.MLIRNVVMTarget PUBLIC ${CUDAToolkit_INCLUDE_DIRS}) + endif() + else() + # Fail if `MLIR_ENABLE_NVPTXCOMPILER` is enabled and the toolkit couldn't be found. + if(MLIR_ENABLE_NVPTXCOMPILER) + message(FATAL_ERROR + "Requested using the `nvptxcompiler` library backend but it couldn't be found.") + endif() + endif() + message(VERBOSE "MLIR default CUDA toolkit path: ${MLIR_CUDAToolkit_ROOT}") + + # Define the `CUDAToolkit` path. + target_compile_definitions(obj.MLIRNVVMTarget + PRIVATE + MLIR_NVPTXCOMPILER_ENABLED=${MLIR_ENABLE_NVPTXCOMPILER} + __DEFAULT_CUDATOOLKIT_PATH__="${MLIR_CUDAToolkit_ROOT}" + ) +endif() diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp @@ -0,0 +1,508 @@ +//===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files defines NVVM target related functions including registration +// calls for the `#nvvm.target` compilation attribute. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Target/LLVM/NVVM/Target.h" + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/NVVMDialect.h" +#include "mlir/Target/LLVM/NVVM/Utils.h" +#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Export.h" + +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/TargetSelect.h" + +#include + +using namespace mlir; +using namespace mlir::NVVM; + +#ifndef __DEFAULT_CUDATOOLKIT_PATH__ +#define __DEFAULT_CUDATOOLKIT_PATH__ "" +#endif + +namespace { +// Implementation of the `TargetAttrInterface` model. +class NVVMTargetAttrImpl + : public gpu::TargetAttrInterface::FallbackModel { +public: + std::optional> + serializeToObject(Attribute attribute, Operation *module, + const gpu::TargetOptions &options) const; +}; +} // namespace + +// Register the NVVM dialect, the NVVM translation & the target interface. +void mlir::registerNVVMTarget(DialectRegistry ®istry) { + registerNVVMDialectTranslation(registry); + registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) { + NVVMTargetAttr::attachInterface(*ctx); + }); +} + +void mlir::registerNVVMTarget(MLIRContext &context) { + DialectRegistry registry; + registerNVVMTarget(registry); + context.appendDialectRegistry(registry); +} + +// Search for the CUDA toolkit path. +StringRef mlir::NVVM::getCUDAToolkitPath() { + if (const char *var = std::getenv("CUDA_ROOT")) + return var; + if (const char *var = std::getenv("CUDA_HOME")) + return var; + if (const char *var = std::getenv("CUDA_PATH")) + return var; + return __DEFAULT_CUDATOOLKIT_PATH__; +} + +SerializeGPUModuleBase::SerializeGPUModuleBase( + Operation &module, NVVMTargetAttr target, + const gpu::TargetOptions &targetOptions) + : ModuleToObject(module, target.getTriple(), target.getChip(), + target.getFeatures(), target.getO()), + target(target), toolkitPath(targetOptions.getToolkitPath()), + fileList(targetOptions.getLinkFiles()) { + + // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath` + if (toolkitPath.empty()) + toolkitPath = getCUDAToolkitPath(); + + // Append the files in the target attribute. + if (ArrayAttr files = target.getLink()) + for (Attribute attr : files.getValue()) + if (auto file = dyn_cast(attr)) + fileList.push_back(file.str()); + + // Append libdevice to the files to be loaded. + (void)appendStandardLibs(); +} + +void SerializeGPUModuleBase::init() { + static llvm::once_flag initializeBackendOnce; + llvm::call_once(initializeBackendOnce, []() { + // If the `NVPTX` LLVM target was built, initialize it. +#if MLIR_CUDA_CONVERSIONS_ENABLED == 1 + LLVMInitializeNVPTXTarget(); + LLVMInitializeNVPTXTargetInfo(); + LLVMInitializeNVPTXTargetMC(); + LLVMInitializeNVPTXAsmPrinter(); +#endif + }); +} + +NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; } + +StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; } + +ArrayRef SerializeGPUModuleBase::getFileList() const { + return fileList; +} + +// Try to append `libdevice` from a CUDA toolkit installation. +LogicalResult SerializeGPUModuleBase::appendStandardLibs() { + StringRef pathRef = getToolkitPath(); + if (pathRef.size()) { + SmallVector path; + path.insert(path.begin(), pathRef.begin(), pathRef.end()); + pathRef = StringRef(path.data(), path.size()); + if (!llvm::sys::fs::is_directory(pathRef)) { + getOperation().emitError() << "CUDA path: " << pathRef + << " does not exist or is not a directory.\n"; + return failure(); + } + llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc"); + pathRef = StringRef(path.data(), path.size()); + if (!llvm::sys::fs::is_regular_file(pathRef)) { + getOperation().emitError() << "LibDevice path: " << pathRef + << " does not exist or is not a file.\n"; + return failure(); + } + fileList.push_back(pathRef.str()); + } + return success(); +} + +std::optional>> +SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module, + llvm::TargetMachine &targetMachine) { + SmallVector> bcFiles; + if (failed(loadBitcodeFilesFromList(module.getContext(), targetMachine, + fileList, bcFiles, true))) + return std::nullopt; + return bcFiles; +} + +#if MLIR_CUDA_CONVERSIONS_ENABLED == 1 +namespace { +class NVPTXSerializer : public SerializeGPUModuleBase { +public: + NVPTXSerializer(Operation &module, NVVMTargetAttr target, + const gpu::TargetOptions &targetOptions); + + gpu::GPUModuleOp getOperation(); + + // Compile PTX to cubin using `ptxas`. + std::optional> + compileToBinary(const std::string &ptxCode); + + // Compile PTX to cubin using the `nvptxcompiler` library. + std::optional> + compileToBinaryNVPTX(const std::string &ptxCode); + + std::optional> + moduleToObject(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) override; + +private: + using TmpFile = std::pair, llvm::FileRemover>; + + // Create a temp file. + std::optional createTemp(StringRef name, StringRef suffix); + + // Find the PTXAS compiler. The search order is: + // 1. The toolkit path in `targetOptions`. + // 2. In the system PATH. + // 3. The path from `getCUDAToolkitPath()`. + std::optional findPtxas() const; + + // Target options. + gpu::TargetOptions targetOptions; +}; +} // namespace + +NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target, + const gpu::TargetOptions &targetOptions) + : SerializeGPUModuleBase(module, target, targetOptions), + targetOptions(targetOptions) {} + +std::optional +NVPTXSerializer::createTemp(StringRef name, StringRef suffix) { + llvm::SmallString<128> filename; + std::error_code ec = + llvm::sys::fs::createTemporaryFile(name, suffix, filename); + if (ec) { + getOperation().emitError() << "Couldn't create the temp file: `" << filename + << "`, error message: " << ec.message(); + return std::nullopt; + } + return TmpFile(filename, llvm::FileRemover(filename.c_str())); +} + +gpu::GPUModuleOp NVPTXSerializer::getOperation() { + return dyn_cast(&SerializeGPUModuleBase::getOperation()); +} + +std::optional NVPTXSerializer::findPtxas() const { + // Find the `ptxas` compiler. + // 1. Check the toolkit path given in the command line. + StringRef pathRef = targetOptions.getToolkitPath(); + SmallVector path; + if (pathRef.size()) { + path.insert(path.begin(), pathRef.begin(), pathRef.end()); + llvm::sys::path::append(path, "bin", "ptxas"); + if (llvm::sys::fs::can_execute(path)) + return StringRef(path.data(), path.size()).str(); + } + + // 2. Check PATH. + if (std::optional ptxasCompiler = + llvm::sys::Process::FindInEnvPath("PATH", "ptxas")) + return *ptxasCompiler; + + // 3. Check `getCUDAToolkitPath()`. + pathRef = getCUDAToolkitPath(); + path.clear(); + if (pathRef.size()) { + path.insert(path.begin(), pathRef.begin(), pathRef.end()); + llvm::sys::path::append(path, "bin", "ptxas"); + if (llvm::sys::fs::can_execute(path)) + return StringRef(path.data(), path.size()).str(); + } + return std::nullopt; +} + +// TODO: clean this method & have a generic tool driver or never emit binaries +// with this mechanism and let another stage take care of it. +std::optional> +NVPTXSerializer::compileToBinary(const std::string &ptxCode) { + // Find the PTXAS compiler. + std::optional ptxasCompiler = findPtxas(); + if (!ptxasCompiler) { + getOperation().emitError() + << "Couldn't find the `ptxas` compiler. Please specify the toolkit " + "path, add the compiler to $PATH, or set one of the environment " + "variables in `NVVM::getCUDAToolkitPath()`."; + return std::nullopt; + } + + // Base name for all temp files: mlir---. + std::string basename = + llvm::formatv("mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(), + getTarget().getTriple(), getTarget().getChip()); + + // Create temp files: + std::optional ptxFile = createTemp(basename, "ptx"); + if (!ptxFile) + return std::nullopt; + std::optional logFile = createTemp(basename, "log"); + if (!logFile) + return std::nullopt; + std::optional cubinFile = createTemp(basename, "cubin"); + if (!cubinFile) + return std::nullopt; + + std::error_code ec; + // Dump the PTX to a temp file. + { + llvm::raw_fd_ostream ptxStream(ptxFile->first, ec); + if (ec) { + getOperation().emitError() + << "Couldn't open the file: `" << ptxFile->first + << "`, error message: " << ec.message(); + return std::nullopt; + } + ptxStream << ptxCode; + if (ptxStream.has_error()) { + getOperation().emitError() + << "An error occurred while writing the PTX to: `" << ptxFile->first + << "`."; + return std::nullopt; + } + ptxStream.flush(); + } + + // Create PTX args. + std::string optLevel = std::to_string(this->optLevel); + SmallVector ptxasArgs( + {StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(), + StringRef(ptxFile->first), StringRef("-o"), StringRef(cubinFile->first), + "--opt-level", optLevel}); + + std::pair> cmdOpts = + targetOptions.tokenizeCmdOptions(); + for (auto arg : cmdOpts.second) + ptxasArgs.push_back(arg); + + std::optional redirects[] = { + std::nullopt, + logFile->first, + logFile->first, + }; + + // Invoke PTXAS. + std::string message; + if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs, + /*Env=*/std::nullopt, + /*Redirects=*/redirects, + /*SecondsToWait=*/0, + /*MemoryLimit=*/0, + /*ErrMsg=*/&message)) { + if (message.empty()) { + llvm::ErrorOr> ptxasStderr = + llvm::MemoryBuffer::getFile(logFile->first); + if (ptxasStderr) + getOperation().emitError() << "PTXAS invocation failed. PTXAS log:\n" + << ptxasStderr->get()->getBuffer(); + else + getOperation().emitError() << "PTXAS invocation failed."; + return std::nullopt; + } + getOperation().emitError() + << "PTXAS invocation failed, error message: " << message; + return std::nullopt; + } + +// Dump the output of PTXAS, helpful if the verbose flag was passed. +#define DEBUG_TYPE "serialize-to-binary" + LLVM_DEBUG({ + llvm::dbgs() << "PTXAS invocation for module: " + << getOperation().getNameAttr() << "\n"; + llvm::dbgs() << "Command: "; + llvm::interleave(ptxasArgs, llvm::dbgs(), " "); + llvm::dbgs() << "\n"; + llvm::ErrorOr> ptxasLog = + llvm::MemoryBuffer::getFile(logFile->first); + if (ptxasLog && (*ptxasLog)->getBuffer().size()) { + llvm::dbgs() << "Output:\n" << (*ptxasLog)->getBuffer() << "\n"; + llvm::dbgs().flush(); + } + }); +#undef DEBUG_TYPE + + // Read the cubin file. + llvm::ErrorOr> cubinBuffer = + llvm::MemoryBuffer::getFile(cubinFile->first); + if (!cubinBuffer) { + getOperation().emitError() + << "Couldn't open the file: `" << cubinFile->first + << "`, error message: " << cubinBuffer.getError().message(); + return std::nullopt; + } + StringRef cubinStr = (*cubinBuffer)->getBuffer(); + return SmallVector(cubinStr.begin(), cubinStr.end()); +} + +#if MLIR_NVPTXCOMPILER_ENABLED == 1 +#include "nvPTXCompiler.h" + +#define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \ + do { \ + if (auto status = (expr)) { \ + emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \ + << status; \ + return std::nullopt; \ + } \ + } while (false) + +std::optional> +NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) { + Location loc = getOperation().getLoc(); + nvPTXCompilerHandle compiler = nullptr; + nvPTXCompileResult status; + size_t logSize; + + // Create the options. + std::string optLevel = std::to_string(this->optLevel); + std::pair> cmdOpts = + targetOptions.tokenizeCmdOptions(); + cmdOpts.second.append( + {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()}); + + // Create the compiler handle. + RETURN_ON_NVPTXCOMPILER_ERROR( + nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str())); + + // Try to compile the binary. + status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(), + cmdOpts.second.data()); + + // Check if compilation failed. + if (status != NVPTXCOMPILE_SUCCESS) { + RETURN_ON_NVPTXCOMPILER_ERROR( + nvPTXCompilerGetErrorLogSize(compiler, &logSize)); + if (logSize != 0) { + SmallVector log(logSize + 1, 0); + RETURN_ON_NVPTXCOMPILER_ERROR( + nvPTXCompilerGetErrorLog(compiler, log.data())); + emitError(loc) << "NVPTX compiler invocation failed, error log: " + << log.data(); + } else + emitError(loc) << "NVPTX compiler invocation failed with error code: " + << status; + return std::nullopt; + } + + // Retrieve the binary. + size_t elfSize; + RETURN_ON_NVPTXCOMPILER_ERROR( + nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize)); + SmallVector binary(elfSize, 0); + RETURN_ON_NVPTXCOMPILER_ERROR( + nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data())); + +// Dump the log of the compiler, helpful if the verbose flag was passed. +#define DEBUG_TYPE "serialize-to-binary" + LLVM_DEBUG({ + RETURN_ON_NVPTXCOMPILER_ERROR( + nvPTXCompilerGetInfoLogSize(compiler, &logSize)); + if (logSize != 0) { + SmallVector log(logSize + 1, 0); + RETURN_ON_NVPTXCOMPILER_ERROR( + nvPTXCompilerGetInfoLog(compiler, log.data())); + llvm::dbgs() << "NVPTX compiler invocation for module: " + << getOperation().getNameAttr() << "\n"; + llvm::dbgs() << "Arguments: "; + llvm::interleave(cmdOpts.second, llvm::dbgs(), " "); + llvm::dbgs() << "\nOutput\n" << log.data() << "\n"; + llvm::dbgs().flush(); + } + }); +#undef DEBUG_TYPE + RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler)); + return binary; +} +#endif // MLIR_NVPTXCOMPILER_ENABLED == 1 + +std::optional> +NVPTXSerializer::moduleToObject(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) { + // Return LLVM IR if the compilation target is offload. +#define DEBUG_TYPE "serialize-to-llvm" + LLVM_DEBUG({ + llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr() + << "\n"; + llvm::dbgs() << llvmModule << "\n"; + llvm::dbgs().flush(); + }); +#undef DEBUG_TYPE + if (targetOptions.getCompilationTarget() == gpu::TargetOptions::offload) + return SerializeGPUModuleBase::moduleToObject(llvmModule, targetMachine); + + // Emit PTX code. + std::optional serializedISA = + translateToISA(llvmModule, targetMachine); + if (!serializedISA) { + getOperation().emitError() << "Failed translating the module to ISA."; + return std::nullopt; + } +#define DEBUG_TYPE "serialize-to-isa" + LLVM_DEBUG({ + llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n"; + llvm::dbgs() << *serializedISA << "\n"; + llvm::dbgs().flush(); + }); +#undef DEBUG_TYPE + + // Return PTX if the compilation target is assembly. + if (targetOptions.getCompilationTarget() == gpu::TargetOptions::assembly) + return SmallVector(serializedISA->begin(), serializedISA->end()); + + // Compile to binary. +#if MLIR_NVPTXCOMPILER_ENABLED == 1 + return compileToBinaryNVPTX(*serializedISA); +#else + return compileToBinary(*serializedISA); +#endif // MLIR_NVPTXCOMPILER_ENABLED == 1 +} +#endif // MLIR_CUDA_CONVERSIONS_ENABLED == 1 + +std::optional> +NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module, + const gpu::TargetOptions &options) const { + assert(module && "The module must be non null."); + if (!module) + return std::nullopt; + if (!mlir::isa(module)) { + module->emitError("Module must be a GPU module."); + return std::nullopt; + } +#if MLIR_CUDA_CONVERSIONS_ENABLED == 1 + NVPTXSerializer serializer(*module, cast(attribute), options); + serializer.init(); + return serializer.run(); +#else + module->emitError( + "The `NVPTX` target was not built. Please enable it when building LLVM."); + return std::nullopt; +#endif // MLIR_CUDA_CONVERSIONS_ENABLED == 1 +} diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -364,3 +364,10 @@ gpu.return }) {function_type = () -> (), sym_name = "func"} : () -> () } + +// Check that this doesn't crash. +gpu.module @module_with_one_target [#nvvm.target] { + gpu.func @kernel(%arg0 : f32) kernel { + gpu.return + } +} diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -429,3 +429,12 @@ nvvm.wgmma.wait.group.sync.aligned 0 return } + +// ----- + +// Just check these don't emit errors. +gpu.module @module_1 [#nvvm.target] { +} + +gpu.module @module_2 [#nvvm.target, #nvvm.target, #nvvm.target] { +} diff --git a/mlir/unittests/Target/LLVM/CMakeLists.txt b/mlir/unittests/Target/LLVM/CMakeLists.txt --- a/mlir/unittests/Target/LLVM/CMakeLists.txt +++ b/mlir/unittests/Target/LLVM/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_unittest(MLIRTargetLLVMTests + SerializeNVVMTarget.cpp SerializeToLLVMBitcode.cpp ) @@ -7,9 +8,14 @@ target_link_libraries(MLIRTargetLLVMTests PRIVATE MLIRTargetLLVM + MLIRNVVMTarget + MLIRGPUDialect + MLIRNVVMDialect MLIRLLVMDialect MLIRLLVMToLLVMIRTranslation MLIRBuiltinToLLVMIRTranslation + MLIRNVVMToLLVMIRTranslation + MLIRGPUToLLVMIRTranslation ${llvm_libs} ) diff --git a/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp new file mode 100644 --- /dev/null +++ b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp @@ -0,0 +1,154 @@ +//===- SerializeNVVMTarget.cpp ----------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/NVVMDialect.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/InitAllDialects.h" +#include "mlir/Parser/Parser.h" +#include "mlir/Target/LLVM/NVVM/Target.h" +#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" + +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/MemoryBufferRef.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TargetParser/Host.h" + +#include "gmock/gmock.h" + +using namespace mlir; + +// Skip the test if the NVPTX target was not built. +#if MLIR_CUDA_CONVERSIONS_ENABLED == 0 +#define SKIP_WITHOUT_NVPTX(x) DISABLED_##x +#else +#define SKIP_WITHOUT_NVPTX(x) x +#endif + +class MLIRTargetLLVMNVVM : public ::testing::Test { +protected: + virtual void SetUp() { + registerBuiltinDialectTranslation(registry); + registerLLVMDialectTranslation(registry); + registerGPUDialectTranslation(registry); + registerNVVMTarget(registry); + } + + // Checks if PTXAS is in PATH. + bool hasPtxas() { + // Find the `ptxas` compiler. + std::optional ptxasCompiler = + llvm::sys::Process::FindInEnvPath("PATH", "ptxas"); + return ptxasCompiler.has_value(); + } + + // Dialect registry. + DialectRegistry registry; + + // MLIR module used for the tests. + const std::string moduleStr = R"mlir( + gpu.module @nvvm_test { + llvm.func @nvvm_kernel(%arg0: f32) attributes {gpu.kernel, nvvm.kernel} { + llvm.return + } + })mlir"; +}; + +// Test NVVM serialization to LLVM. +TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMMToLLVM)) { + MLIRContext context(registry); + + OwningOpRef module = + parseSourceString(moduleStr, &context); + ASSERT_TRUE(!!module); + + // Create an NVVM target. + NVVM::NVVMTargetAttr target = NVVM::NVVMTargetAttr::get(&context); + + // Serialize the module. + auto serializer = dyn_cast(target); + ASSERT_TRUE(!!serializer); + gpu::TargetOptions options("", {}, "", gpu::TargetOptions::offload); + for (auto gpuModule : (*module).getBody()->getOps()) { + std::optional> object = + serializer.serializeToObject(gpuModule, options); + // Check that the serializer was successful. + ASSERT_TRUE(object != std::nullopt); + ASSERT_TRUE(object->size() > 0); + + // Read the serialized module. + llvm::MemoryBufferRef buffer(StringRef(object->data(), object->size()), + "module"); + llvm::LLVMContext llvmContext; + llvm::Expected> llvmModule = + llvm::getLazyBitcodeModule(buffer, llvmContext); + ASSERT_TRUE(!!llvmModule); + ASSERT_TRUE(!!*llvmModule); + + // Check that it has a function named `foo`. + ASSERT_TRUE((*llvmModule)->getFunction("nvvm_kernel") != nullptr); + } +} + +// Test NVVM serialization to PTX. +TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMToPTX)) { + MLIRContext context(registry); + + OwningOpRef module = + parseSourceString(moduleStr, &context); + ASSERT_TRUE(!!module); + + // Create an NVVM target. + NVVM::NVVMTargetAttr target = NVVM::NVVMTargetAttr::get(&context); + + // Serialize the module. + auto serializer = dyn_cast(target); + ASSERT_TRUE(!!serializer); + gpu::TargetOptions options("", {}, "", gpu::TargetOptions::assembly); + for (auto gpuModule : (*module).getBody()->getOps()) { + std::optional> object = + serializer.serializeToObject(gpuModule, options); + // Check that the serializer was successful. + ASSERT_TRUE(object != std::nullopt); + ASSERT_TRUE(object->size() > 0); + + ASSERT_TRUE( + StringRef(object->data(), object->size()).contains("nvvm_kernel")); + } +} + +// Test NVVM serialization to Binary. +TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMToBinary)) { + if (!hasPtxas()) + GTEST_SKIP() << "PTXAS compiler not found, skipping test."; + + MLIRContext context(registry); + + OwningOpRef module = + parseSourceString(moduleStr, &context); + ASSERT_TRUE(!!module); + + // Create an NVVM target. + NVVM::NVVMTargetAttr target = NVVM::NVVMTargetAttr::get(&context); + + // Serialize the module. + auto serializer = dyn_cast(target); + ASSERT_TRUE(!!serializer); + gpu::TargetOptions options("", {}, "", gpu::TargetOptions::binary); + for (auto gpuModule : (*module).getBody()->getOps()) { + std::optional> object = + serializer.serializeToObject(gpuModule, options); + // Check that the serializer was successful. + ASSERT_TRUE(object != std::nullopt); + ASSERT_TRUE(object->size() > 0); + } +}