diff --git a/mlir/tools/mlir-rocm-runner/CMakeLists.txt b/mlir/tools/mlir-rocm-runner/CMakeLists.txt --- a/mlir/tools/mlir-rocm-runner/CMakeLists.txt +++ b/mlir/tools/mlir-rocm-runner/CMakeLists.txt @@ -108,6 +108,11 @@ rocm-runtime-wrappers ) llvm_update_compile_flags(mlir-rocm-runner) + target_include_directories(mlir-rocm-runner + PRIVATE + "${HIP_PATH}/../include" + "${HIP_PATH}/include" + ) target_link_libraries(mlir-rocm-runner PRIVATE ${LIBS} ${targets_to_link}) endif() diff --git a/mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp b/mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp --- a/mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp +++ b/mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp @@ -58,6 +58,9 @@ // lld headers. #include "lld/Common/Driver.h" +// HIP headers. +#include "hip/hip_version.h" + using namespace mlir; using namespace llvm; @@ -75,7 +78,7 @@ static cl::opt features("feature", cl::desc("target features"), cl::value_desc("AMDGPU target features"), - cl::init("-code-object-v3")); + cl::init("")); static LogicalResult assembleIsa(const std::string isa, StringRef name, Blob &result) { @@ -211,38 +214,50 @@ return {}; } -static LogicalResult runMLIRPasses(ModuleOp m) { - PassManager pm(m.getContext()); - applyPassManagerCLOptions(pm); - - pm.addPass(createGpuKernelOutliningPass()); - auto &kernelPm = pm.nest(); - kernelPm.addPass(createStripDebugInfoPass()); - kernelPm.addPass(createLowerGpuOpsToROCDLOpsPass()); - kernelPm.addPass(createConvertGPUKernelToBlobPass( - compileModuleToROCDLIR, compileISAToHsaco, tripleName, targetChip, - features, /*gpuBinaryAnnotation=*/"rocdl.hsaco")); - pm.addPass(createLowerToLLVMPass()); - pm.addPass(createConvertGpuLaunchFuncToGpuRuntimeCallsPass( - /*gpuBinaryAnnotation=*/"rocdl.hsaco")); - - return pm.run(m); -} +static void configTargetFeatures() { + if (features.size() > 0) + features += ","; + // Before ROCm 3.5, disable HSA code object V3. + if (HIP_VERSION_MAJOR >= 3 && HIP_VERSION_MINOR <= 5) { + features += "-code-object-v3"; + else features += "+code-object-v3"; + } -int main(int argc, char **argv) { - registerPassManagerCLOptions(); - mlir::registerAllDialects(); - llvm::InitLLVM y(argc, argv); - llvm::InitializeAllTargetInfos(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmParsers(); - - // Initialize LLVM AMDGPU backend. - LLVMInitializeAMDGPUTarget(); - LLVMInitializeAMDGPUTargetInfo(); - LLVMInitializeAMDGPUTargetMC(); - LLVMInitializeAMDGPUAsmPrinter(); - - mlir::initializeLLVMPasses(); - return mlir::JitRunnerMain(argc, argv, &runMLIRPasses); -} + static LogicalResult runMLIRPasses(ModuleOp m) { + PassManager pm(m.getContext()); + applyPassManagerCLOptions(pm); + + // Configure target features per ROCm / HIP version. + configTargetFeatures(); + + pm.addPass(createGpuKernelOutliningPass()); + auto &kernelPm = pm.nest(); + kernelPm.addPass(createStripDebugInfoPass()); + kernelPm.addPass(createLowerGpuOpsToROCDLOpsPass()); + kernelPm.addPass(createConvertGPUKernelToBlobPass( + compileModuleToROCDLIR, compileISAToHsaco, tripleName, targetChip, + features, /*gpuBinaryAnnotation=*/"rocdl.hsaco")); + pm.addPass(createLowerToLLVMPass()); + pm.addPass(createConvertGpuLaunchFuncToGpuRuntimeCallsPass( + /*gpuBinaryAnnotation=*/"rocdl.hsaco")); + + return pm.run(m); + } + + int main(int argc, char **argv) { + registerPassManagerCLOptions(); + mlir::registerAllDialects(); + llvm::InitLLVM y(argc, argv); + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + + // Initialize LLVM AMDGPU backend. + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUAsmPrinter(); + + mlir::initializeLLVMPasses(); + return mlir::JitRunnerMain(argc, argv, &runMLIRPasses); + }