diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUCompilationAttr.td b/mlir/include/mlir/Dialect/GPU/IR/GPUCompilationAttr.td new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUCompilationAttr.td @@ -0,0 +1,91 @@ +//===-- GPUTargetAttr.td - GPU compilation attributes ------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the GPU NVPTX target attribute. +// +//===----------------------------------------------------------------------===// + +#ifndef GPU_COMPILATIONATTR +#define GPU_COMPILATIONATTR + +include "mlir/Dialect/GPU/IR/GPUBase.td" +include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" + +//===----------------------------------------------------------------------===// +// GPU NVPTX target attribute. +//===----------------------------------------------------------------------===// + +def GPU_NVPTXTargetAttr : GPU_Attr<"NVPTXTarget", "nvptx", [ + DeclareAttrInterfaceMethods + ]> { + let description = [{ + NVPTX target attribute for controlling compilation of NVIDIA targets. All + parameters decay into default values if not present. + + Examples: + + 1. Target with default values. + ``` + gpu.module @mymodule [#gpu.nvptx] attributes {...} { + ... + } + ``` + + 2. Target with `sm_90` chip and fast math. + ``` + gpu.module @mymodule [#gpu.nvptx] { + ... + } + ``` + }]; + let parameters = (ins + DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O, + StringRefParameter<"Target triple.", "\"nvptx64-nvidia-cuda\"">:$triple, + StringRefParameter<"Target chip.", "\"sm_50\"">:$chip, + StringRefParameter<"Target chip features.", "\"+ptx60\"">:$features, + OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags, + OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link + ); + let assemblyFormat = [{ + (`<` struct($O, $triple, $chip, $features, $flags)^ `>`)? + }]; + let builders = [ + AttrBuilder<(ins CArg<"int", "2">:$optLevel, + CArg<"StringRef", "\"nvptx64-nvidia-cuda\"">:$triple, + CArg<"StringRef", "\"sm_50\"">:$chip, + CArg<"StringRef", "\"+ptx60\"">:$features, + CArg<"DictionaryAttr", "nullptr">:$targetFlags, + CArg<"ArrayAttr", "nullptr">:$linkFiles), [{ + return Base::get($_ctxt, optLevel, triple, chip, features, targetFlags, linkFiles); + }]> + ]; + let skipDefaultBuilders = 1; + let genVerifyDecl = 1; + let extraClassDeclaration = [{ + bool hasFlag(StringRef flag) const; + bool getFastMath() const; + bool getFtz() const; + }]; + let extraClassDefinition = [{ + bool $cppClass::hasFlag(StringRef flag) const { + if (DictionaryAttr flags = getFlags()) + return flags.get(flag) != nullptr; + return false; + } + bool $cppClass::getFastMath() const { + return hasFlag("fast"); + } + bool $cppClass::getFtz() const { + return hasFlag("ftz"); + } + }]; +} + +#endif // GPU_COMPILATIONATTR diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -16,6 +16,7 @@ include "mlir/Dialect/DLTI/DLTIBase.td" include "mlir/Dialect/GPU/IR/GPUBase.td" include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" +include "mlir/Dialect/GPU/IR/GPUCompilationAttr.td" include "mlir/Dialect/GPU/IR/ParallelLoopMapperAttr.td" include "mlir/Dialect/GPU/TransformOps/GPUDeviceMappingAttr.td" include "mlir/IR/EnumAttr.td" diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -42,6 +42,31 @@ MLIRMemRefDialect MLIRSideEffectInterfaces MLIRSupport + + PRIVATE + MLIRGPUTargets + ) + +add_mlir_dialect_library(MLIRGPUTargets + Targets/NVPTXTarget.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU + + LINK_COMPONENTS + Core + MC + Target + ${NVPTX_LIBS} + + LINK_LIBS PUBLIC + MLIRIR + MLIRExecutionEngineUtils + MLIRSupport + MLIRTargetLLVMIRExport + + PRIVATE + MLIRGPUDialect ) add_mlir_dialect_library(MLIRGPUTransforms @@ -129,6 +154,35 @@ ${CUDA_DRIVER_LIBRARY} ) + # Find the CUDA toolkit. + if (NOT DEFINED CUDAToolkit_ROOT) + find_package(CUDAToolkit) + get_filename_component(CUDAToolkit_ROOT ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE) + endif() + message(VERBOSE "MLIR Default CUDA toolkit path: ${CUDAToolkit_ROOT}") + + # Enable the gpu to cubin target. + target_compile_definitions(obj.MLIRGPUTargets + PRIVATE + MLIR_GPU_NVPTX_TARGET_ENABLED=1 + __DEFAULT_CUDATOOLKIT_PATH__="${CUDAToolkit_ROOT}" + ) + # Enable the gpu to cubin target. + target_compile_definitions(obj.MLIRGPUTransforms + PRIVATE + MLIR_GPU_NVPTX_TARGET_ENABLED=1 + ) + + # Add CUDA headers includes and the libcuda.so library. + target_include_directories(obj.MLIRGPUTargets + PRIVATE + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} + ) + target_link_libraries(MLIRGPUTargets + PRIVATE + ${CUDA_DRIVER_LIBRARY} + ) + endif() if(MLIR_ENABLE_ROCM_CONVERSIONS) diff --git a/mlir/lib/Dialect/GPU/Targets/NVPTXTarget.cpp b/mlir/lib/Dialect/GPU/Targets/NVPTXTarget.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/GPU/Targets/NVPTXTarget.cpp @@ -0,0 +1,254 @@ +//===- NVPTXTarget.cpp - MLIR GPU Dialect NVPTX target attribute ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files implements the NVPTX target attribute. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/GPU/Transforms/Passes.h" + +using namespace mlir; +using namespace mlir::gpu; + +#ifdef MLIR_GPU_NVPTX_TARGET_ENABLED +#include "mlir/ExecutionEngine/ModuleToObject.h" +#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Export.h" + +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/TargetSelect.h" + +#ifndef __DEFAULT_CUDATOOLKIT_PATH__ +#define __DEFAULT_CUDATOOLKIT_PATH__ "" +#endif + +#define DEBUG_TYPE "serialize-to-object" + +#include + +static void emitCudaError(const llvm::Twine &expr, const char *buffer, + CUresult result, Location loc) { + const char *error; + cuGetErrorString(result, &error); + emitError(loc, expr.concat(" failed with error code ") + .concat(llvm::Twine{error}) + .concat("[") + .concat(buffer) + .concat("]")); +} + +#define RETURN_ON_CUDA_ERROR(expr) \ + do { \ + if (auto status = (expr)) { \ + emitCudaError(#expr, jitErrorBuffer, status, loc); \ + return {}; \ + } \ + } while (false) + +namespace { +struct InitTarget { + InitTarget() { + LLVMInitializeNVPTXTarget(); + LLVMInitializeNVPTXTargetInfo(); + LLVMInitializeNVPTXTargetMC(); + LLVMInitializeNVPTXAsmPrinter(); + } +}; + +class SerializeToCubin : public ModuleToObject { +public: + SerializeToCubin(Operation &module, NVPTXTargetAttr target, + TargetOptions targetOptions = {}); + + // Init the target. + static void init(); + + std::optional>> + loadBitcodeFiles(llvm::LLVMContext &context, llvm::Module &module) override; + + std::optional> + moduleToObject(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) override; + +private: + StringRef toolkitPath; + SmallVector fileList; +}; +} // namespace + +SerializeToCubin::SerializeToCubin(Operation &module, NVPTXTargetAttr target, + TargetOptions targetOptions) + : ModuleToObject(module, target.getTriple(), target.getChip(), + target.getFeatures(), target.getO()), + toolkitPath(targetOptions.getToolkitPath()), + fileList(targetOptions.getBitcodeFiles()) { + if (toolkitPath.empty()) + toolkitPath = __DEFAULT_CUDATOOLKIT_PATH__; + + if (ArrayAttr files = target.getLink()) + for (Attribute attr : files.getValue()) + if (auto file = dyn_cast(attr)) + fileList.push_back(file.str()); +} + +void SerializeToCubin::init() { static InitTarget target = InitTarget(); } + +std::optional>> +SerializeToCubin::loadBitcodeFiles(llvm::LLVMContext &context, + llvm::Module &module) { + // Try loading `libdevice` from a CUDA toolkit installation. + StringRef pathRef = toolkitPath; + if (pathRef.size()) { + SmallVector path; + path.insert(path.begin(), pathRef.begin(), pathRef.end()); + pathRef = StringRef(path.data(), path.size()); + if (!llvm::sys::fs::is_directory(pathRef)) { + getOperation().emitError() << "CUDA path: " << pathRef + << " does not exist or is not a directory.\n"; + return std::nullopt; + } + // TODO remove this hard coded path. + llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc"); + pathRef = StringRef(path.data(), path.size()); + if (!llvm::sys::fs::is_regular_file(pathRef)) { + getOperation().emitError() << "LibDevice path: " << pathRef + << " does not exist or is not a file.\n"; + return std::nullopt; + } + fileList.push_back(pathRef.str()); + } + + SmallVector> bcFiles; + if (failed(loadBitcodeFilesFromList(context, fileList, bcFiles, true))) + return std::nullopt; + return bcFiles; +} + +std::optional> +SerializeToCubin::moduleToObject(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) { + std::optional serializedISA = + translateToISA(llvmModule, targetMachine); + if (!serializedISA) { + getOperation().emitError() << "Failed translating the module to ISA."; + return std::nullopt; + } + + LLVM_DEBUG({ + llvm::dbgs() << "ISA for module: " + << dyn_cast(&getOperation()).getNameAttr() + << "\n"; + llvm::dbgs() << *serializedISA << "\n"; + llvm::dbgs().flush(); + }); + + auto loc = getOperation().getLoc(); + char jitErrorBuffer[4096] = {0}; + + RETURN_ON_CUDA_ERROR(cuInit(0)); + + // Linking requires a device context. + CUdevice device; + RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0)); + CUcontext context; + RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device)); + CUlinkState linkState; + + CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER, + CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}; + void *jitOptionsVals[] = {jitErrorBuffer, + reinterpret_cast(sizeof(jitErrorBuffer))}; + + RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */ + jitOptions, /* jit options */ + jitOptionsVals, /* jit option values */ + &linkState)); + + auto kernelName = dyn_cast(getOperation()).getName().str(); + RETURN_ON_CUDA_ERROR(cuLinkAddData( + linkState, CUjitInputType::CU_JIT_INPUT_PTX, + const_cast(static_cast(serializedISA->c_str())), + serializedISA->length(), kernelName.c_str(), + 0, /* number of jit options */ + nullptr, /* jit options */ + nullptr /* jit option values */ + )); + + void *cubinData; + size_t cubinSize; + RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize)); + + char *cubinAsChar = static_cast(cubinData); + auto result = SmallVector(cubinAsChar, cubinAsChar + cubinSize); + + // This will also destroy the cubin data. + RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState)); + RETURN_ON_CUDA_ERROR(cuCtxDestroy(context)); + return result; +} + +std::optional> +NVPTXTargetAttr::serializeToObject(Operation *module, + const TargetOptions &options) const { + assert(module && "The module must be non null."); + if (!module) + return std::nullopt; + if (!mlir::isa(module)) { + module->emitError("Module must be a GPU module."); + return std::nullopt; + } + SerializeToCubin::init(); + SerializeToCubin serializer(*module, *this, options); + return serializer.run(); +} + +#else +// Provide a null vector for testing purposes. +std::optional> +NVPTXTargetAttr::serializeToObject(Operation *module, + const TargetOptions &options) const { + assert(module && "The module must be non null."); + if (!module) + return std::nullopt; + if (!mlir::isa(module)) { + module->emitError("Module must be a GPU module."); + return std::nullopt; + } + return SmallVector{}; +} +#endif // MLIR_GPU_NVPTX_TARGET_ENABLED + +LogicalResult +NVPTXTargetAttr::verify(function_ref emitError, + int optLevel, StringRef triple, StringRef chip, + StringRef features, DictionaryAttr flags, + ArrayAttr files) { + if (optLevel < 0 || optLevel > 3) { + emitError() << "The optimization level must be a number between 0 and 3."; + return failure(); + } + if (triple.empty()) { + emitError() << "The target triple cannot be empty."; + return failure(); + } + if (chip.empty()) { + emitError() << "The target chip cannot be empty."; + return failure(); + } + if (files && llvm::all_of(files, [](::mlir::Attribute attr) { + return attr && mlir::isa(attr); + })) { + emitError() << "All the elements in the `link` array must be strings."; + return failure(); + } + return success(); +}