diff --git a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp --- a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp +++ b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp @@ -134,15 +134,16 @@ CUDA_REPORT_IF_ERROR(cuEventRecord(event, stream)); } -extern "C" void *mgpuMemAlloc(uint64_t sizeBytes, CUstream /*stream*/) { +extern "C" void *mgpuMemAlloc(uint64_t sizeBytes, CUstream stream) { ScopedContext scopedContext; CUdeviceptr ptr; - CUDA_REPORT_IF_ERROR(cuMemAlloc(&ptr, sizeBytes)); + CUDA_REPORT_IF_ERROR(cuMemAllocAsync(&ptr, sizeBytes, stream)); return reinterpret_cast(ptr); } -extern "C" void mgpuMemFree(void *ptr, CUstream /*stream*/) { - CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast(ptr))); +extern "C" void mgpuMemFree(void *ptr, CUstream stream) { + CUDA_REPORT_IF_ERROR( + cuMemFreeAsync(reinterpret_cast(ptr), stream)); } extern "C" void mgpuMemcpy(void *dst, void *src, size_t sizeBytes,