diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp @@ -639,6 +639,7 @@ .getAsyncToken(); token = rewriter.create(loc, tokenTp, token, dnC) .getAsyncToken(); + token = genDeallocMemRef(rewriter, loc, rowA, token); if (colA) token = genDeallocMemRef(rewriter, loc, colA, token); token = genDeallocMemRef(rewriter, loc, valA, token); diff --git a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp --- a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp +++ b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp @@ -79,11 +79,11 @@ ~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); } }; +#ifdef MLIR_ENABLE_CUDA_CUSPARSE // Note that (1) Nvidia confirms the safety to share handle across multiple // instances, and streams. (2) Clients are responsible to call the @mgpu // environment initialization/destruction in a thread-safe manner, e.g., // at the beginning of the program before multi-threads are created. -#ifdef MLIR_ENABLE_CUDA_CUSPARSE static cusparseHandle_t cusparse_env = nullptr; #ifdef MLIR_ENABLE_CUDA_CUSPARSELT diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir --- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir +++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir @@ -54,7 +54,8 @@ // CHECK: %[[VAL_55:.*]] = gpu.destroy_sp_mat async {{\[}}%[[VAL_54]]] %[[VAL_44]] // CHECK: %[[VAL_56:.*]] = gpu.destroy_dn_tensor async {{\[}}%[[VAL_55]]] %[[VAL_46]] // CHECK: %[[VAL_57:.*]] = gpu.destroy_dn_tensor async {{\[}}%[[VAL_56]]] %[[VAL_48]] -// CHECK: %[[VAL_60:.*]] = gpu.dealloc async {{\[}}%[[VAL_57]]] %[[VAL_19]] : memref +// CHECK: %[[VAL_59:.*]] = gpu.dealloc async {{\[}}%[[VAL_57]]] %[[VAL_14]] : memref +// CHECK: %[[VAL_60:.*]] = gpu.dealloc async {{\[}}%[[VAL_59]]] %[[VAL_19]] : memref // CHECK: %[[VAL_61:.*]] = gpu.dealloc async {{\[}}%[[VAL_60]]] %[[VAL_24]] : memref // CHECK: %[[VAL_62:.*]] = gpu.dealloc async {{\[}}%[[VAL_61]]] %[[VAL_52]] : memref // CHECK: %[[VAL_63:.*]] = gpu.dealloc async {{\[}}%[[VAL_62]]] %[[VAL_31]] : memref diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -2622,6 +2622,9 @@ "include/mlir/Dialect/SparseTensor/Transforms/Passes.h", ], includes = ["include"], + # Here: + # CUSPARSE_COO_AOS : enables AOS COO + local_defines = ["CUSPARSE_COO_AOS"], deps = [ ":AffineDialect", ":ArithDialect", @@ -8095,12 +8098,18 @@ cc_library( name = "_mlir_cuda_runtime", srcs = ["lib/ExecutionEngine/CudaRuntimeWrappers.cpp"], + #compatible_with = ["//buildenv/target:prod"], # Prevent needing EnableABIBreakingChecks symbol from LLVMSupport. copts = ["-DLLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING=1"], # Here: # MLIR_ENABLE_CUDA_CUSPARSE : enables cuSPARSE # MLIR_ENABLE_CUDA_CUSPARSELT : enables cuSPARSElt - local_defines = ["MLIR_ENABLE_CUDA_CUSPARSE"], + # CUSPARSE_COO_AOS : enables AOS COO + local_defines = [ + "MLIR_ENABLE_CUDA_CUSPARSE", + # "MLIR_ENABLE_CUDA_CUSPARSELT", + "CUSPARSE_COO_AOS", + ], tags = [ "manual", # External dependency "nobuildkite", # TODO(gcmn): Add support for this target