diff --git a/mlir/tools/mlir-rocm-runner/CMakeLists.txt b/mlir/tools/mlir-rocm-runner/CMakeLists.txt --- a/mlir/tools/mlir-rocm-runner/CMakeLists.txt +++ b/mlir/tools/mlir-rocm-runner/CMakeLists.txt @@ -108,6 +108,11 @@ rocm-runtime-wrappers ) llvm_update_compile_flags(mlir-rocm-runner) + target_include_directories(mlir-rocm-runner + PRIVATE + "${HIP_PATH}/../include" + "${HIP_PATH}/include" + ) target_link_libraries(mlir-rocm-runner PRIVATE ${LIBS} ${targets_to_link}) endif() diff --git a/mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp b/mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp --- a/mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp +++ b/mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp @@ -58,6 +58,9 @@ // lld headers. #include "lld/Common/Driver.h" +// HIP headers. +#include "hip/hip_version.h" + using namespace mlir; using namespace llvm; @@ -75,7 +78,7 @@ static cl::opt features("feature", cl::desc("target features"), cl::value_desc("AMDGPU target features"), - cl::init("-code-object-v3")); + cl::init("")); static LogicalResult assembleIsa(const std::string isa, StringRef name, Blob &result) { @@ -211,10 +214,23 @@ return {}; } +static void configTargetFeatures() { + if (features.size() > 0) + features += ","; + // Before ROCm 3.5, disable HSA code object V3. + if (HIP_VERSION_MAJOR >= 3 && HIP_VERSION_MINOR <= 5) + features += "-code-object-v3"; + else + features += "+code-object-v3"; +} + static LogicalResult runMLIRPasses(ModuleOp m) { PassManager pm(m.getContext()); applyPassManagerCLOptions(pm); + // Configure target features per ROCm / HIP version. + configTargetFeatures(); + pm.addPass(createGpuKernelOutliningPass()); auto &kernelPm = pm.nest(); kernelPm.addPass(createStripDebugInfoPass());