diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp --- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp +++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp @@ -88,6 +88,8 @@ "mgpuModuleLoad", llvmPointerType /* void *module */, {llvmPointerType /* void *cubin */}}; + FunctionCallBuilder moduleUnloadCallBuilder = { + "mgpuModuleUnload", llvmVoidType, {llvmPointerType /* void *module */}}; FunctionCallBuilder moduleGetFunctionCallBuilder = { "mgpuModuleGetFunction", llvmPointerType /* void *function */, @@ -490,6 +492,8 @@ kernelParams, /* kernel params */ nullpointer /* extra */}); streamSynchronizeCallBuilder.create(loc, rewriter, stream.getResult(0)); + streamDestroyCallBuilder.create(loc, rewriter, stream.getResult(0)); + moduleUnloadCallBuilder.create(loc, rewriter, module.getResult(0)); rewriter.eraseOp(op); return success(); diff --git a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir --- a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir +++ b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir @@ -48,4 +48,6 @@ // CHECK-SAME: [[C8]], [[C8]], [[C8]], [[C0_I32]], [[STREAM]], // CHECK-SAME: [[PARAMS]], [[EXTRA_PARAMS]]) // CHECK: llvm.call @mgpuStreamSynchronize + // CHECK: llvm.call @mgpuStreamDestroy + // CHECK: llvm.call @mgpuModuleUnload } diff --git a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp --- a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp +++ b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp @@ -47,6 +47,10 @@ return module; } +extern "C" void mgpuModuleUnload(CUmodule module) { + CUDA_REPORT_IF_ERROR(cuModuleUnload(module)); +} + extern "C" CUfunction mgpuModuleGetFunction(CUmodule module, const char *name) { CUfunction function = nullptr; CUDA_REPORT_IF_ERROR(cuModuleGetFunction(&function, module, name)); diff --git a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp b/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp --- a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp +++ b/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp @@ -46,6 +46,10 @@ return module; } +extern "C" void mgpuModuleUnload(hipModule_t module) { + HIP_REPORT_IF_ERROR(hipModuleUnload(module)); +} + extern "C" hipFunction_t mgpuModuleGetFunction(hipModule_t module, const char *name) { hipFunction_t function = nullptr;