diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h --- a/mlir/include/mlir/Dialect/GPU/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Passes.h @@ -90,6 +90,10 @@ /// annotation. void registerGpuSerializeToCubinPass(); +/// Register pass to serialize GPU kernel functions to a HSAco binary +/// annotation. +void registerGpuSerializeToHsacoPass(); + /// Generate the code for registering passes. #define GEN_PASS_REGISTRATION #include "mlir/Dialect/GPU/Passes.h.inc" diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h --- a/mlir/include/mlir/InitAllPasses.h +++ b/mlir/include/mlir/InitAllPasses.h @@ -52,6 +52,7 @@ registerAsyncPasses(); registerGPUPasses(); registerGpuSerializeToCubinPass(); + registerGpuSerializeToHsacoPass(); registerLinalgPasses(); LLVM::registerLLVMPasses(); quant::registerQuantPasses(); diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -6,6 +6,17 @@ ) endif() +if (MLIR_ROCM_CONVERSIONS_ENABLED) + set(AMDGPU_LIBS + MCParser + AMDGPUAsmParser + AMDGPUAsmPrinter + AMDGPUCodeGen + AMDGPUDesc + AMDGPUInfo + ) +endif() + add_mlir_dialect_library(MLIRGPU IR/GPUDialect.cpp Transforms/AllReduceLowering.cpp @@ -15,6 +26,7 @@ Transforms/ParallelLoopMapper.cpp Transforms/SerializeToBlob.cpp Transforms/SerializeToCubin.cpp + Transforms/SerializeToHsaco.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU @@ -23,6 +35,7 @@ Core MC ${NVPTX_LIBS} + ${AMDGPU_LIBS} DEPENDS MLIRGPUOpsIncGen @@ -83,3 +96,74 @@ ) endif() + +if(MLIR_ROCM_RUNNER_ENABLED) + if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)) + message(SEND_ERROR + "Building the mlir rocm runner requires the AMDGPU backend") + endif() + + # Ensure lld is enabled. + if (NOT "lld" IN_LIST LLVM_ENABLE_PROJECTS) + message(SEND_ERROR "lld is not enabled. Please revise LLVM_ENABLE_PROJECTS") + endif() + + # lld header files. + include_directories(${MLIR_SOURCE_DIR}/../lld/include) + + # Configure ROCm support. + if (NOT DEFINED ROCM_PATH) + if (NOT DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed") + else() + set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed") + endif() + set(HIP_PATH "${ROCM_PATH}/hip" CACHE PATH " Path to which HIP has been installed") + endif() + set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) + find_package(HIP) + if (NOT HIP_FOUND) + message(SEND_ERROR "Build the mlir rocm runner requires a working ROCm and HIP install") + else() + message(STATUS "ROCm HIP version: ${HIP_VERSION}") + endif() + + # Set compile-time flags for ROCm path. + add_definitions(-D__ROCM_PATH__="${ROCM_PATH}") + + # Locate HIP runtime library. + find_library(ROCM_RUNTIME_LIBRARY amdhip64 + PATHS "${HIP_PATH}/lib") + if (NOT ROCM_RUNTIME_LIBRARY) + message(SEND_ERROR "Could not locate ROCm HIP runtime library") + else() + message(STATUS "ROCm HIP runtime lib: ${ROCM_RUNTIME_LIBRARY}") + endif() + + target_compile_definitions(obj.MLIRGPU + PRIVATE + # Set HIP compile-time flags. + __HIP_PLATFORM_HCC__ + # Enable gpu-to-hsaco pass. + MLIR_GPU_TO_HSACO_PASS_ENABLE=1 + ) + + # Add ROCm headers includes. + target_include_directories(obj.MLIRGPU + PRIVATE + "${ROCM_PATH}/include" + "${HIP_PATH}/include" + ) + + target_link_libraries(MLIRGPU + PRIVATE + lldCommon + lldDriver + lldELF + MLIRROCDLToLLVMIRTranslation + ${ROCM_RUNTIME_LIBRARY} + ) + + llvm_update_compile_flags(obj.MLIRGPU) + +endif() diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp @@ -0,0 +1,284 @@ +//===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass that serializes a gpu module into HSAco blob and +// adds that blob as a string attribute of the module. +// +//===----------------------------------------------------------------------===// +#include "mlir/Dialect/GPU/Passes.h" + +#if MLIR_GPU_TO_HSACO_PASS_ENABLE +#include "mlir/Pass/Pass.h" +#include "mlir/Support/FileUtilities.h" +#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Export.h" + +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" + +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Target/TargetOptions.h" + +#include "lld/Common/Driver.h" + +#include "hip/hip_version.h" + +#include + +using namespace mlir; + +namespace { +class SerializeToHsacoPass + : public PassWrapper { +public: + SerializeToHsacoPass(); + +private: + void getDependentDialects(DialectRegistry ®istry) const override; + + // Serializes ROCDL to HSACO. + std::unique_ptr> + serializeISA(const std::string &isa) override; + + std::unique_ptr> assembleIsa(const std::string &isa); + std::unique_ptr> + createHsaco(const SmallVectorImpl &isaBinary); +}; +} // namespace + +static std::string getDefaultChip() { + const char kDefaultChip[] = "gfx900"; + + // Locate rocm_agent_enumerator. + const char kRocmAgentEnumerator[] = "rocm_agent_enumerator"; + llvm::ErrorOr rocmAgentEnumerator = llvm::sys::findProgramByName( + kRocmAgentEnumerator, {__ROCM_PATH__ "/bin"}); + if (!rocmAgentEnumerator) { + llvm::WithColor::warning(llvm::errs()) + << kRocmAgentEnumerator << "couldn't be located under " << __ROCM_PATH__ + << "/bin\n"; + return kDefaultChip; + } + + // Prepare temp file to hold the outputs. + int tempFd = -1; + SmallString<128> tempFilename; + if (llvm::sys::fs::createTemporaryFile("rocm_agent", "txt", tempFd, + tempFilename)) { + llvm::WithColor::warning(llvm::errs()) + << "temporary file for " << kRocmAgentEnumerator << " creation error\n"; + return kDefaultChip; + } + llvm::FileRemover cleanup(tempFilename); + + // Invoke rocm_agent_enumerator. + std::string errorMessage; + SmallVector args{"-t", "GPU"}; + Optional redirects[3] = {{""}, tempFilename.str(), {""}}; + int result = + llvm::sys::ExecuteAndWait(rocmAgentEnumerator.get(), args, llvm::None, + redirects, 0, 0, &errorMessage); + if (result) { + llvm::WithColor::warning(llvm::errs()) + << kRocmAgentEnumerator << " invocation error: " << errorMessage + << "\n"; + return kDefaultChip; + } + + // Load and parse the result. + auto gfxIsaList = openInputFile(tempFilename); + if (!gfxIsaList) { + llvm::WithColor::error(llvm::errs()) + << "read ROCm agent list temp file error\n"; + return kDefaultChip; + } + for (llvm::line_iterator lines(*gfxIsaList); !lines.is_at_end(); ++lines) { + // Skip the line with content "gfx000". + if (*lines == "gfx000") + continue; + // Use the first ISA version found. + return lines->str(); + } + + return kDefaultChip; +} + +// Sets the 'option' to 'value' unless it already has a value. +static void maybeSetOption(Pass::Option &option, + function_ref getValue) { + if (!option.hasValue()) + option = getValue(); +} + +SerializeToHsacoPass::SerializeToHsacoPass() { + maybeSetOption(this->triple, [] { return "amdgcn-amd-amdhsa"; }); + maybeSetOption(this->chip, [] { + static auto chip = getDefaultChip(); + return chip; + }); +} + +void SerializeToHsacoPass::getDependentDialects( + DialectRegistry ®istry) const { + registerROCDLDialectTranslation(registry); + gpu::SerializeToBlobPass::getDependentDialects(registry); +} + +std::unique_ptr> +SerializeToHsacoPass::assembleIsa(const std::string &isa) { + auto loc = getOperation().getLoc(); + + SmallVector result; + llvm::raw_svector_ostream os(result); + + llvm::Triple triple(llvm::Triple::normalize(this->triple)); + std::string error; + const llvm::Target *target = + llvm::TargetRegistry::lookupTarget(triple.normalize(), error); + if (!target) { + emitError(loc, Twine("failed to lookup target: ") + error); + return {}; + } + + llvm::SourceMgr srcMgr; + srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), + llvm::SMLoc()); + + const llvm::MCTargetOptions mcOptions; + std::unique_ptr mri( + target->createMCRegInfo(this->triple)); + std::unique_ptr mai( + target->createMCAsmInfo(*mri, this->triple, mcOptions)); + mai->setRelaxELFRelocations(true); + + llvm::MCObjectFileInfo mofi; + llvm::MCContext ctx(mai.get(), mri.get(), &mofi, &srcMgr, &mcOptions); + mofi.InitMCObjectFileInfo(triple, false, ctx, false); + + SmallString<128> cwd; + if (!llvm::sys::fs::current_path(cwd)) + ctx.setCompilationDir(cwd); + + std::unique_ptr mcStreamer; + std::unique_ptr mcii(target->createMCInstrInfo()); + std::unique_ptr sti( + target->createMCSubtargetInfo(this->triple, this->chip, this->features)); + + llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, *mri, ctx); + llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); + mcStreamer.reset(target->createMCObjectStreamer( + triple, ctx, std::unique_ptr(mab), + mab->createObjectWriter(os), std::unique_ptr(ce), + *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ false)); + mcStreamer->setUseAssemblerInfoForParsing(true); + + std::unique_ptr parser( + createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); + std::unique_ptr tap( + target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); + + if (!tap) { + emitError(loc, "assembler initialization error"); + return {}; + } + + parser->setTargetParser(*tap); + parser->Run(false); + + return std::make_unique>(std::move(result)); +} + +std::unique_ptr> +SerializeToHsacoPass::createHsaco(const SmallVectorImpl &isaBinary) { + auto loc = getOperation().getLoc(); + + // Save the ISA binary to a temp file. + int tempIsaBinaryFd = -1; + SmallString<128> tempIsaBinaryFilename; + if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd, + tempIsaBinaryFilename)) { + emitError(loc, "temporary file for ISA binary creation error"); + return {}; + } + llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); + llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); + tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size()); + tempIsaBinaryOs.close(); + + // Create a temp file for HSA code object. + int tempHsacoFD = -1; + SmallString<128> tempHsacoFilename; + if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD, + tempHsacoFilename)) { + emitError(loc, "temporary file for HSA code object creation error"); + return {}; + } + llvm::FileRemover cleanupHsaco(tempHsacoFilename); + + { + static std::mutex mutex; + const std::lock_guard lock(mutex); + // Invoke lld. Expect a true return value from lld. + if (!lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(), + "-o", tempHsacoFilename.c_str()}, + /*canEarlyExit=*/false, llvm::outs(), llvm::errs())) { + emitError(loc, "lld invocation error"); + return {}; + } + } + + // Load the HSA code object. + auto hsacoFile = openInputFile(tempHsacoFilename); + if (!hsacoFile) { + emitError(loc, "read HSA code object from temp file error"); + return {}; + } + + StringRef buffer = hsacoFile->getBuffer(); + return std::make_unique>(buffer.begin(), buffer.end()); +} + +std::unique_ptr> +SerializeToHsacoPass::serializeISA(const std::string &isa) { + auto isaBinary = assembleIsa(isa); + if (!isaBinary) + return {}; + return createHsaco(*isaBinary); +} + +// Register pass to serialize GPU kernel functions to a HSACO binary annotation. +void mlir::registerGpuSerializeToHsacoPass() { + PassRegistration registerSerializeToHSACO( + "gpu-to-hsaco", "Lower GPU kernel function to HSACO binary annotations", + [] { + // Initialize LLVM AMDGPU backend. + LLVMInitializeAMDGPUAsmParser(); + LLVMInitializeAMDGPUAsmPrinter(); + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTargetMC(); + + return std::make_unique(); + }); +} +#else // MLIR_GPU_TO_HSACO_PASS_ENABLE +void mlir::registerGpuSerializeToHsacoPass() {} +#endif // MLIR_GPU_TO_HSACO_PASS_ENABLE diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -7,6 +7,7 @@ CudaRuntimeWrappers.cpp SparseUtils.cpp ExecutionEngine.cpp + RocmRuntimeWrappers.cpp RunnerUtils.cpp OptUtils.cpp JitRunner.cpp @@ -136,3 +137,66 @@ ${CUDA_RUNTIME_LIBRARY} ) endif() + +if(MLIR_ROCM_RUNNER_ENABLED) + if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)) + message(SEND_ERROR + "Building the mlir rocm runner requires the AMDGPU backend") + endif() + + # Ensure lld is enabled. + if (NOT "lld" IN_LIST LLVM_ENABLE_PROJECTS) + message(SEND_ERROR "lld is not enabled. Please revise LLVM_ENABLE_PROJECTS") + endif() + + # lld header files. + include_directories(${MLIR_SOURCE_DIR}/../lld/include) + + # Configure ROCm support. + if (NOT DEFINED ROCM_PATH) + if (NOT DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed") + else() + set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed") + endif() + set(HIP_PATH "${ROCM_PATH}/hip" CACHE PATH "Path to which HIP has been installed") + endif() + set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) + find_package(HIP) + if (NOT HIP_FOUND) + message(SEND_ERROR "Build the mlir rocm runner requires a working ROCm and HIP install") + else() + message(STATUS "ROCm HIP version: ${HIP_VERSION}") + endif() + + # Set compile-time flags for ROCm path. + add_definitions(-D__ROCM_PATH__="${ROCM_PATH}") + + # Locate HIP runtime library. + find_library(ROCM_RUNTIME_LIBRARY amdhip64 + PATHS "${HIP_PATH}/lib") + if (NOT ROCM_RUNTIME_LIBRARY) + message(SEND_ERROR "Could not locate ROCm HIP runtime library") + else() + message(STATUS "ROCm HIP runtime lib: ${ROCM_RUNTIME_LIBRARY}") + endif() + + # Set HIP compile-time flags. + add_definitions(-D__HIP_PLATFORM_HCC__) + + add_mlir_library(mlir_rocm_runtime + SHARED + RocmRuntimeWrappers.cpp + + EXCLUDE_FROM_LIBMLIR + ) + target_include_directories(mlir_rocm_runtime + PRIVATE + "${ROCM_PATH}/include" + "${HIP_PATH}/include" + ) + target_link_libraries(mlir_rocm_runtime + PRIVATE + ${ROCM_RUNTIME_LIBRARY} + ) +endif() \ No newline at end of file diff --git a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp b/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp rename from mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp rename to mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp --- a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp +++ b/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp @@ -1,4 +1,4 @@ -//===- rocm-runtime-wrappers.cpp - MLIR ROCM runner wrapper library -------===// +//===- RocmRuntimeWrappers.cpp - MLIR ROCM runner wrapper library ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -30,29 +30,25 @@ fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \ }(expr) -// Static reference to HIP primary context for device ordinal 0. -static hipCtx_t Context = [] { - HIP_REPORT_IF_ERROR(hipInit(/*flags=*/0)); - hipDevice_t device; - HIP_REPORT_IF_ERROR(hipDeviceGet(&device, /*ordinal=*/0)); - hipCtx_t context; - HIP_REPORT_IF_ERROR(hipDevicePrimaryCtxRetain(&context, device)); - return context; -}(); - // Sets the `Context` for the duration of the instance and restores the previous // context on destruction. class ScopedContext { public: ScopedContext() { - HIP_REPORT_IF_ERROR(hipCtxGetCurrent(&previous)); - HIP_REPORT_IF_ERROR(hipCtxSetCurrent(Context)); + // Static reference to HIP primary context for device ordinal 0. + static hipCtx_t context = [] { + HIP_REPORT_IF_ERROR(hipInit(/*flags=*/0)); + hipDevice_t device; + HIP_REPORT_IF_ERROR(hipDeviceGet(&device, /*ordinal=*/0)); + hipCtx_t ctx; + HIP_REPORT_IF_ERROR(hipDevicePrimaryCtxRetain(&ctx, device)); + return ctx; + }(); + + HIP_REPORT_IF_ERROR(hipCtxPushCurrent(context)); } - ~ScopedContext() { HIP_REPORT_IF_ERROR(hipCtxSetCurrent(previous)); } - -private: - hipCtx_t previous; + ~ScopedContext() { HIP_REPORT_IF_ERROR(hipCtxPopCurrent(nullptr)); } }; extern "C" hipModule_t mgpuModuleLoad(void *data) { diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt --- a/mlir/test/CMakeLists.txt +++ b/mlir/test/CMakeLists.txt @@ -21,8 +21,7 @@ set(MLIR_RUNNER_UTILS_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) # Passed to lit.site.cfg.py.in to set up the path where to find the libraries -# for the mlir rocm / spirv / vulkan runner tests. -set(MLIR_ROCM_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) +# for the mlir spirv / vulkan runner tests. set(MLIR_SPIRV_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) set(MLIR_VULKAN_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) @@ -74,6 +73,10 @@ list(APPEND MLIR_TEST_DEPENDS mlir_cuda_runtime) endif() +if(MLIR_ROCM_RUNNER_ENABLED) + list(APPEND MLIR_TEST_DEPENDS mlir_rocm_runtime) +endif() + list(APPEND MLIR_TEST_DEPENDS MLIRUnitTests) if(LLVM_BUILD_EXAMPLES) @@ -88,12 +91,6 @@ ) endif() -if(MLIR_ROCM_RUNNER_ENABLED) - list(APPEND MLIR_TEST_DEPENDS - mlir-rocm-runner - ) -endif() - if(MLIR_SPIRV_CPU_RUNNER_ENABLED) add_subdirectory(mlir-spirv-cpu-runner) list(APPEND MLIR_TEST_DEPENDS diff --git a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir b/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir --- a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir +++ b/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s --test-kernel-to-hsaco -split-input-file | FileCheck %s +// RUN: mlir-opt %s --test-gpu-to-hsaco | FileCheck %s -// CHECK: attributes {rocdl.hsaco = "HSACO"} +// CHECK: gpu.module @foo attributes {gpu.binary = "HSACO"} gpu.module @foo { llvm.func @kernel(%arg0 : f32, %arg1 : !llvm.ptr) // CHECK: attributes {gpu.kernel} @@ -9,8 +9,7 @@ } } -// ----- - +// CHECK: gpu.module @bar attributes {gpu.binary = "HSACO"} gpu.module @bar { // CHECK: func @kernel_a llvm.func @kernel_a() diff --git a/mlir/test/Integration/GPU/CUDA/lit.local.cfg b/mlir/test/Integration/GPU/CUDA/lit.local.cfg --- a/mlir/test/Integration/GPU/CUDA/lit.local.cfg +++ b/mlir/test/Integration/GPU/CUDA/lit.local.cfg @@ -1,2 +1,2 @@ if not config.enable_cuda_runner: - config.unsupported = True \ No newline at end of file + config.unsupported = True diff --git a/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir rename from mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir rename to mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir --- a/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir +++ b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir @@ -1,5 +1,9 @@ -// RUN: mlir-rocm-runner %s \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco)' \ +// RUN: -gpu-to-llvm \ +// RUN: | mlir-cpu-runner \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-rocm-runner/lit.local.cfg b/mlir/test/Integration/GPU/ROCM/lit.local.cfg rename from mlir/test/mlir-rocm-runner/lit.local.cfg rename to mlir/test/Integration/GPU/ROCM/lit.local.cfg diff --git a/mlir/test/mlir-rocm-runner/two-modules.mlir b/mlir/test/Integration/GPU/ROCM/two-modules.mlir rename from mlir/test/mlir-rocm-runner/two-modules.mlir rename to mlir/test/Integration/GPU/ROCM/two-modules.mlir --- a/mlir/test/mlir-rocm-runner/two-modules.mlir +++ b/mlir/test/Integration/GPU/ROCM/two-modules.mlir @@ -1,5 +1,9 @@ -// RUN: mlir-rocm-runner %s \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco)' \ +// RUN: -gpu-to-llvm \ +// RUN: | mlir-cpu-runner \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-rocm-runner/vecadd.mlir b/mlir/test/Integration/GPU/ROCM/vecadd.mlir rename from mlir/test/mlir-rocm-runner/vecadd.mlir rename to mlir/test/Integration/GPU/ROCM/vecadd.mlir --- a/mlir/test/mlir-rocm-runner/vecadd.mlir +++ b/mlir/test/Integration/GPU/ROCM/vecadd.mlir @@ -1,5 +1,9 @@ -// RUN: mlir-rocm-runner %s \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco)' \ +// RUN: -gpu-to-llvm \ +// RUN: | mlir-cpu-runner \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-rocm-runner/vector-transferops.mlir b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir rename from mlir/test/mlir-rocm-runner/vector-transferops.mlir rename to mlir/test/Integration/GPU/ROCM/vector-transferops.mlir --- a/mlir/test/mlir-rocm-runner/vector-transferops.mlir +++ b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir @@ -1,5 +1,9 @@ -// RUN: mlir-rocm-runner %s \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco)' \ +// RUN: -gpu-to-llvm \ +// RUN: | mlir-cpu-runner \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/lib/Transforms/TestConvertGPUKernelToHsaco.cpp b/mlir/test/lib/Transforms/TestConvertGPUKernelToHsaco.cpp --- a/mlir/test/lib/Transforms/TestConvertGPUKernelToHsaco.cpp +++ b/mlir/test/lib/Transforms/TestConvertGPUKernelToHsaco.cpp @@ -6,11 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Conversion/GPUCommon/GPUCommonPass.h" -#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" +#include "mlir/Dialect/GPU/Passes.h" + #include "mlir/Pass/Pass.h" -#include "mlir/Pass/PassManager.h" -#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "llvm/Support/TargetSelect.h" @@ -18,38 +16,54 @@ using namespace mlir; #if MLIR_ROCM_CONVERSIONS_ENABLED -static OwnedBlob compileIsaToHsacoForTesting(const std::string &, Location, - StringRef) { - const char data[] = "HSACO"; - return std::make_unique>(data, data + sizeof(data) - 1); +namespace { +class TestSerializeToHsacoPass + : public PassWrapper { +public: + TestSerializeToHsacoPass(); + +private: + void getDependentDialects(DialectRegistry ®istry) const override; + + // Serializes ROCDL IR to HSACO. + std::unique_ptr> + serializeISA(const std::string &isa) override; +}; +} // namespace + +TestSerializeToHsacoPass::TestSerializeToHsacoPass() { + this->triple = "amdgcn-amd-amdhsa"; + this->chip = "gfx900"; +} + +void TestSerializeToHsacoPass::getDependentDialects( + DialectRegistry ®istry) const { + registerROCDLDialectTranslation(registry); + gpu::SerializeToBlobPass::getDependentDialects(registry); } -static std::unique_ptr -translateModuleToROCDL(Operation *m, llvm::LLVMContext &llvmContext, - StringRef moduleName) { - registerLLVMDialectTranslation(*m->getContext()); - registerROCDLDialectTranslation(*m->getContext()); - return translateModuleToLLVMIR(m, llvmContext, moduleName); +std::unique_ptr> +TestSerializeToHsacoPass::serializeISA(const std::string &) { + std::string data = "HSACO"; + return std::make_unique>(data.begin(), data.end()); } namespace mlir { namespace test { -void registerTestConvertGPUKernelToHsacoPass() { - PassPipelineRegistration<>( - "test-kernel-to-hsaco", - "Convert all kernel functions to ROCm hsaco blobs", - [](OpPassManager &pm) { +// Register test pass to serialize GPU module to a HSAco binary annotation. +void registerTestGpuSerializeToHsacoPass() { + PassRegistration registerSerializeToHsaco( + "test-gpu-to-hsaco", + "Lower GPU kernel function to HSAco binary annotations", [] { // Initialize LLVM AMDGPU backend. LLVMInitializeAMDGPUTarget(); LLVMInitializeAMDGPUTargetInfo(); LLVMInitializeAMDGPUTargetMC(); LLVMInitializeAMDGPUAsmPrinter(); - pm.addPass(createConvertGPUKernelToBlobPass( - translateModuleToROCDL, compileIsaToHsacoForTesting, - "amdgcn-amd-amdhsa", "gfx900", "-code-object-v3", "rocdl.hsaco")); + return std::make_unique(); }); } } // namespace test } // namespace mlir -#endif +#endif // MLIR_ROCM_CONVERSIONS_ENABLED diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py --- a/mlir/test/lit.cfg.py +++ b/mlir/test/lit.cfg.py @@ -77,7 +77,6 @@ ToolSubst('toy-ch5', unresolved='ignore'), ToolSubst('%linalg_test_lib_dir', config.linalg_test_lib_dir, unresolved='ignore'), ToolSubst('%mlir_runner_utils_dir', config.mlir_runner_utils_dir, unresolved='ignore'), - ToolSubst('%rocm_wrapper_library_dir', config.rocm_wrapper_library_dir, unresolved='ignore'), ToolSubst('%spirv_wrapper_library_dir', config.spirv_wrapper_library_dir, unresolved='ignore'), ToolSubst('%vulkan_wrapper_library_dir', config.vulkan_wrapper_library_dir, unresolved='ignore'), ToolSubst('%mlir_integration_test_dir', config.mlir_integration_test_dir, unresolved='ignore'), diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in --- a/mlir/test/lit.site.cfg.py.in +++ b/mlir/test/lit.site.cfg.py.in @@ -39,7 +39,6 @@ config.run_cuda_tests = @MLIR_CUDA_CONVERSIONS_ENABLED@ config.enable_cuda_runner = @MLIR_CUDA_RUNNER_ENABLED@ config.run_rocm_tests = @MLIR_ROCM_CONVERSIONS_ENABLED@ -config.rocm_wrapper_library_dir = "@MLIR_ROCM_WRAPPER_LIBRARY_DIR@" config.enable_rocm_runner = @MLIR_ROCM_RUNNER_ENABLED@ config.spirv_wrapper_library_dir = "@MLIR_SPIRV_WRAPPER_LIBRARY_DIR@" config.enable_spirv_cpu_runner = @MLIR_SPIRV_CPU_RUNNER_ENABLED@ diff --git a/mlir/tools/CMakeLists.txt b/mlir/tools/CMakeLists.txt --- a/mlir/tools/CMakeLists.txt +++ b/mlir/tools/CMakeLists.txt @@ -1,7 +1,6 @@ add_subdirectory(mlir-cpu-runner) add_subdirectory(mlir-opt) add_subdirectory(mlir-reduce) -add_subdirectory(mlir-rocm-runner) add_subdirectory(mlir-shlib) add_subdirectory(mlir-spirv-cpu-runner) add_subdirectory(mlir-translate) diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -65,7 +65,7 @@ void registerTestConstantFold(); void registerTestConvVectorization(); void registerTestGpuSerializeToCubinPass(); -void registerTestConvertGPUKernelToHsacoPass(); +void registerTestGpuSerializeToHsacoPass(); void registerTestDataLayoutQuery(); void registerTestDecomposeCallGraphTypes(); void registerTestDialect(DialectRegistry &); @@ -140,7 +140,7 @@ test::registerTestGpuSerializeToCubinPass(); #endif #if MLIR_ROCM_CONVERSIONS_ENABLED - test::registerTestConvertGPUKernelToHsacoPass(); + test::registerTestGpuSerializeToHsacoPass(); #endif test::registerTestConvVectorization(); test::registerTestDecomposeCallGraphTypes(); diff --git a/mlir/tools/mlir-rocm-runner/CMakeLists.txt b/mlir/tools/mlir-rocm-runner/CMakeLists.txt deleted file mode 100644 --- a/mlir/tools/mlir-rocm-runner/CMakeLists.txt +++ /dev/null @@ -1,127 +0,0 @@ -set(LLVM_OPTIONAL_SOURCES - rocm-runtime-wrappers.cpp - mlir-rocm-runner.cpp - ) - -if(MLIR_ROCM_RUNNER_ENABLED) - if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)) - message(SEND_ERROR - "Building the mlir rocm runner requires the AMDGPU backend") - endif() - - # Ensure lld is enabled. - if (NOT "lld" IN_LIST LLVM_ENABLE_PROJECTS) - message(SEND_ERROR "lld is not enabled. Please revise LLVM_ENABLE_PROJECTS") - endif() - - # lld header files. - include_directories(${MLIR_SOURCE_DIR}/../lld/include) - - # Configure ROCm support. - if (NOT DEFINED ROCM_PATH) - if (NOT DEFINED ENV{ROCM_PATH}) - set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed") - else() - set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed") - endif() - set(HIP_PATH "${ROCM_PATH}/hip" CACHE PATH " Path to which HIP has been installed") - endif() - set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) - find_package(HIP) - if (NOT HIP_FOUND) - message(SEND_ERROR "Build the mlir rocm runner requires a working ROCm and HIP install") - else() - message(STATUS "ROCm HIP version: ${HIP_VERSION}") - endif() - - # Set compile-time flags for ROCm path. - add_definitions(-D__ROCM_PATH__="${ROCM_PATH}") - - # Locate HIP runtime library. - find_library(ROCM_RUNTIME_LIBRARY amdhip64 - PATHS "${HIP_PATH}/lib") - if (NOT ROCM_RUNTIME_LIBRARY) - message(SEND_ERROR "Could not locate ROCm HIP runtime library") - else() - message(STATUS "ROCm HIP runtime lib: ${ROCM_RUNTIME_LIBRARY}") - endif() - - # Set HIP compile-time flags. - add_definitions(-D__HIP_PLATFORM_HCC__) - - add_mlir_library(rocm-runtime-wrappers - SHARED - rocm-runtime-wrappers.cpp - - EXCLUDE_FROM_LIBMLIR - ) - target_include_directories(rocm-runtime-wrappers - PRIVATE - "${HIP_PATH}/../include" - "${HIP_PATH}/include" - ) - target_link_libraries(rocm-runtime-wrappers - PRIVATE - ${ROCM_RUNTIME_LIBRARY} - ) - - get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) - set(LIBS - ${conversion_libs} - lldCommon - lldDriver - lldELF - MLIRJitRunner - MLIRAnalysis - MLIREDSC - MLIRExecutionEngine - MLIRGPU - MLIRIR - MLIRLLVMIR - MLIRLLVMToLLVMIRTranslation - MLIRParser - MLIRROCDLIR - MLIRStandard - MLIRSupport - MLIRTargetLLVMIRExport - MLIRROCDLToLLVMIRTranslation - MLIRTransforms - MLIRTranslation - ${ROCM_RUNTIME_LIBRARY} - ) - - # Manually expand the target library, since our MLIR libraries - # aren't plugged into the LLVM dependency tracking. If we don't - # do this then we can't insert the CodeGen library after ourselves - llvm_expand_pseudo_components(TARGET_LIBS AllTargetsCodeGens AllTargetsAsmParsers) - # Prepend LLVM in front of every target, this is how the library - # are named with CMake - SET(targets_to_link) - FOREACH(t ${TARGET_LIBS}) - LIST(APPEND targets_to_link "LLVM${t}") - ENDFOREACH(t) - - add_llvm_tool(mlir-rocm-runner - mlir-rocm-runner.cpp - - DEPENDS - rocm-runtime-wrappers - - LINK_COMPONENTS - - Core - LTO - MC - MCParser - Option - Support - ) - llvm_update_compile_flags(mlir-rocm-runner) - target_include_directories(mlir-rocm-runner - PRIVATE - "${HIP_PATH}/../include" - "${HIP_PATH}/include" - ) - target_link_libraries(mlir-rocm-runner PRIVATE ${LIBS} ${targets_to_link}) - -endif() diff --git a/mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp b/mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp deleted file mode 100644 --- a/mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp +++ /dev/null @@ -1,349 +0,0 @@ -//===- mlir-rocm-runner.cpp - MLIR ROCM Execution Driver-------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is a command line utility that executes an MLIR file on the GPU by -// translating MLIR to ROCDL/LLVM IR before JIT-compiling and executing the -// latter. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/STLExtras.h" - -#include "mlir/Conversion/GPUCommon/GPUCommonPass.h" -#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h" -#include "mlir/Conversion/SCFToStandard/SCFToStandard.h" -#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" -#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" -#include "mlir/Dialect/GPU/GPUDialect.h" -#include "mlir/Dialect/GPU/Passes.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" -#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/ExecutionEngine/JitRunner.h" -#include "mlir/ExecutionEngine/OptUtils.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Pass/PassManager.h" -#include "mlir/Support/FileUtilities.h" -#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Export.h" -#include "mlir/Transforms/DialectConversion.h" -#include "mlir/Transforms/Passes.h" -#include "llvm/Support/ErrorOr.h" -#include "llvm/Support/FileUtilities.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/LineIterator.h" -#include "llvm/Support/Program.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" - -// MC headers. -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCParser/AsmLexer.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetOptionsCommandFlags.h" - -// lld headers. -#include "lld/Common/Driver.h" - -// HIP headers. -#include "hip/hip_version.h" - -#include - -using namespace mlir; -using namespace llvm; - -using Blob = SmallVector; - -static cl::opt tripleName("triple", cl::desc("target triple"), - cl::value_desc("triple string"), - cl::init("amdgcn-amd-amdhsa")); - -static cl::opt targetChip("target", cl::desc("target chip"), - cl::value_desc("AMDGPU ISA version"), - cl::init("")); - -static cl::opt features("feature", cl::desc("target features"), - cl::value_desc("AMDGPU target features"), - cl::init("")); - -static constexpr const char kRunnerProgram[] = "mlir-rocm-runner"; -static constexpr const char kRocmAgentEnumerator[] = "rocm_agent_enumerator"; -static constexpr const char kDefaultTargetChip[] = "gfx900"; - -static LogicalResult assembleIsa(const std::string isa, StringRef name, - Blob &result) { - raw_svector_ostream os(result); - - std::string error; - Triple theTriple(Triple::normalize(tripleName)); - const Target *theTarget = - TargetRegistry::lookupTarget(theTriple.normalize(), error); - if (!theTarget) { - WithColor::error(errs(), name) << error; - return failure(); - } - - SourceMgr srcMgr; - srcMgr.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(isa), SMLoc()); - - const MCTargetOptions mcOptions; - std::unique_ptr mri(theTarget->createMCRegInfo(tripleName)); - std::unique_ptr mai( - theTarget->createMCAsmInfo(*mri, tripleName, mcOptions)); - mai->setRelaxELFRelocations(true); - - MCObjectFileInfo mofi; - MCContext ctx(mai.get(), mri.get(), &mofi, &srcMgr, &mcOptions); - mofi.InitMCObjectFileInfo(theTriple, false, ctx, false); - - SmallString<128> cwd; - if (!sys::fs::current_path(cwd)) - ctx.setCompilationDir(cwd); - - std::unique_ptr mcStreamer; - std::unique_ptr mcii(theTarget->createMCInstrInfo()); - std::unique_ptr sti( - theTarget->createMCSubtargetInfo(tripleName, targetChip, features)); - - MCCodeEmitter *ce = theTarget->createMCCodeEmitter(*mcii, *mri, ctx); - MCAsmBackend *mab = theTarget->createMCAsmBackend(*sti, *mri, mcOptions); - mcStreamer.reset(theTarget->createMCObjectStreamer( - theTriple, ctx, std::unique_ptr(mab), - mab->createObjectWriter(os), std::unique_ptr(ce), *sti, - mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ false)); - mcStreamer->setUseAssemblerInfoForParsing(true); - - std::unique_ptr parser( - createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); - std::unique_ptr tap( - theTarget->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); - - if (!tap) { - WithColor::error(errs(), name) << "assembler initialization error.\n"; - return failure(); - } - - parser->setTargetParser(*tap); - parser->Run(false); - - return success(); -} - -static std::mutex mutex; -static LogicalResult createHsaco(const Blob &isaBlob, StringRef name, - Blob &hsacoBlob) { - // Save the ISA binary to a temp file. - int tempIsaBinaryFd = -1; - SmallString<128> tempIsaBinaryFilename; - std::error_code ec = sys::fs::createTemporaryFile( - "kernel", "o", tempIsaBinaryFd, tempIsaBinaryFilename); - if (ec) { - WithColor::error(errs(), name) - << "temporary file for ISA binary creation error.\n"; - return failure(); - } - FileRemover cleanupIsaBinary(tempIsaBinaryFilename); - raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); - tempIsaBinaryOs << isaBlob; - tempIsaBinaryOs.close(); - - // Create a temp file for HSA code object. - int tempHsacoFD = -1; - SmallString<128> tempHsacoFilename; - ec = sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD, - tempHsacoFilename); - if (ec) { - WithColor::error(errs(), name) - << "temporary file for HSA code object creation error.\n"; - return failure(); - } - FileRemover cleanupHsaco(tempHsacoFilename); - - const std::lock_guard lock(mutex); - // Invoke lld. Expect a true return value from lld. - bool ret = lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(), - "-o", tempHsacoFilename.c_str()}, - /*canEarlyExit=*/false, llvm::outs(), llvm::errs()); - if (!ret) { - WithColor::error(errs(), name) << "lld invocation error.\n"; - return failure(); - } - - // Load the HSA code object. - auto hsacoFile = mlir::openInputFile(tempHsacoFilename); - if (!hsacoFile) { - WithColor::error(errs(), name) - << "read HSA code object from temp file error.\n"; - return failure(); - } - hsacoBlob.assign(hsacoFile->getBuffer().begin(), - hsacoFile->getBuffer().end()); - - return success(); -} - -static std::unique_ptr -compileModuleToROCDLIR(Operation *m, llvm::LLVMContext &llvmContext, - StringRef name) { - auto llvmModule = translateModuleToROCDLIR(m, llvmContext, name); - // TODO: Link with ROCm-Device-Libs in case needed (ex: the Module - // depends on math functions). - return llvmModule; -} - -static OwnedBlob compileISAToHsaco(const std::string isa, Location loc, - StringRef name) { - // ISA -> ISA in binary form via MC. - // Use lld to create HSA code object. - Blob isaBlob; - Blob hsacoBlob; - - if (succeeded(assembleIsa(isa, name, isaBlob)) && - succeeded(createHsaco(isaBlob, name, hsacoBlob))) - return std::make_unique>(hsacoBlob.begin(), - hsacoBlob.end()); - - WithColor::error(errs(), name) << "producing HSA code object error.\n"; - return {}; -} - -static void configTargetChip() { - // Set targetChip to default value first. - targetChip = kDefaultTargetChip; - - // Locate rocm_agent_enumerator. - llvm::ErrorOr rocmAgentEnumerator = llvm::sys::findProgramByName( - kRocmAgentEnumerator, {__ROCM_PATH__ "/bin"}); - std::error_code ec; - if ((ec = rocmAgentEnumerator.getError())) { - WithColor::warning(errs(), kRunnerProgram) - << kRocmAgentEnumerator << " couldn't be located under " - << __ROCM_PATH__ << ", set target as " << kDefaultTargetChip << "\n"; - return; - } - - // Prepare temp file to hold the outputs. - int tempFd = -1; - SmallString<128> tempFilename; - ec = sys::fs::createTemporaryFile("rocm_agent", "txt", tempFd, tempFilename); - if (ec) { - WithColor::warning(errs(), kRunnerProgram) - << "temporary file for " << kRocmAgentEnumerator - << " creation error, set target as " << kDefaultTargetChip << "\n"; - return; - } - FileRemover cleanup(tempFilename); - - // Invoke rocm_agent_enumerator. - std::string errorMessage; - SmallVector args{"-t", "GPU"}; - Optional redirects[3] = {{""}, tempFilename.str(), {""}}; - int result = - llvm::sys::ExecuteAndWait(rocmAgentEnumerator.get(), args, llvm::None, - redirects, 0, 0, &errorMessage); - if (result) { - WithColor::warning(errs(), kRunnerProgram) - << kRocmAgentEnumerator << " invocation error: " << errorMessage - << ", set target as " << kDefaultTargetChip << "\n"; - return; - } - - // Load and parse the result. - auto gfxIsaList = mlir::openInputFile(tempFilename); - if (!gfxIsaList) { - WithColor::error(errs(), kRunnerProgram) - << "read ROCm agent list temp file error, set target as " - << kDefaultTargetChip << "\n"; - return; - } - for (line_iterator lines(*gfxIsaList); !lines.is_at_end(); ++lines) { - // Skip the line with content "gfx000". - if (*lines == "gfx000") - continue; - // Use the first ISA version found. - targetChip = lines->str(); - break; - } -} - -static void configTargetFeatures() { - if (features.size() > 0) - features += ","; - // After ROCm 3.5, adopt HSA code object V3. - if (HIP_VERSION_MAJOR >= 3 && HIP_VERSION_MINOR >= 5) - features += "+code-object-v3"; - else - features += "-code-object-v3"; -} - -static LogicalResult runMLIRPasses(ModuleOp m) { - PassManager pm(m.getContext()); - applyPassManagerCLOptions(pm); - - // Configure target chip ISA version if it has not been specified. - if (!targetChip.size()) - configTargetChip(); - - // Configure target features per ROCm / HIP version. - configTargetFeatures(); - - const char gpuBinaryAnnotation[] = "rocdl.hsaco"; - pm.addPass(createLowerToCFGPass()); - pm.addPass(createGpuKernelOutliningPass()); - auto &kernelPm = pm.nest(); - kernelPm.addPass(createStripDebugInfoPass()); - kernelPm.addPass(createLowerGpuOpsToROCDLOpsPass()); - kernelPm.addPass(createConvertGPUKernelToBlobPass( - compileModuleToROCDLIR, compileISAToHsaco, tripleName, targetChip, - features, gpuBinaryAnnotation)); - pm.addPass(createGpuToLLVMConversionPass(gpuBinaryAnnotation)); - - return pm.run(m); -} - -int main(int argc, char **argv) { - registerPassManagerCLOptions(); - llvm::InitLLVM y(argc, argv); - llvm::InitializeAllTargetInfos(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmParsers(); - - // Initialize LLVM AMDGPU backend. - LLVMInitializeAMDGPUTarget(); - LLVMInitializeAMDGPUTargetInfo(); - LLVMInitializeAMDGPUTargetMC(); - LLVMInitializeAMDGPUAsmPrinter(); - - mlir::initializeLLVMPasses(); - - mlir::JitRunnerConfig jitRunnerConfig; - jitRunnerConfig.mlirTransformer = runMLIRPasses; - - mlir::DialectRegistry registry; - registry.insert(); - mlir::registerLLVMDialectTranslation(registry); - mlir::registerROCDLDialectTranslation(registry); - - return mlir::JitRunnerMain(argc, argv, registry, jitRunnerConfig); -}