diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_combi.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_combi.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_combi.mlir @@ -0,0 +1,29 @@ +// RUN: mlir-opt %s --linalg-generalize-named-ops \ +// RUN: --pre-sparsification-rewrite \ +// RUN: --sparsification="parallelization-strategy=dense-outer-loop" \ +// RUN: --sparse-gpu-codegen | FileCheck %s + +#CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }> + +// +// CHECK-LABEL: gpu.module @sparse_kernels +// CHECK-DAG: gpu.func @kernel0 +// CHECK-DAG: gpu.func @kernel1 +// +// CHECK-LABEL: func.func @matmuls +// CHECK-DAG: gpu.launch_func @sparse_kernels::@kernel0 blocks +// CHECK-DAG: gpu.launch_func @sparse_kernels::@kernel1 blocks +// +func.func @matmuls(%A: tensor<1024x8xf64>, + %B: tensor<8x1024xf64, #CSR>, + %C: tensor<1024x1024xf64, #CSR>) -> tensor<1024x1024xf64> { + %Z = arith.constant dense<0.0> : tensor<1024x1024xf64> + %T = linalg.matmul + ins(%A, %B: tensor<1024x8xf64>, tensor<8x1024xf64, #CSR>) + outs(%Z: tensor<1024x1024xf64>) -> tensor<1024x1024xf64> + %D = linalg.matmul + ins(%T, %C: tensor<1024x1024xf64>, tensor<1024x1024xf64, #CSR>) + outs(%Z: tensor<1024x1024xf64>) -> tensor<1024x1024xf64> + return %D : tensor<1024x1024xf64> +} + diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir --- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir +++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir @@ -8,7 +8,8 @@ // // Compute matrix matrix C = AB // -// CHECK-LABEL: gpu.func @kernel( +// CHECK-LABEL: gpu.module @sparse_kernels +// CHECK-LABEL: gpu.func @kernel0( // CHECK-SAME: %[[VAL_0:.*0]]: index, // CHECK-SAME: %[[VAL_1:.*1]]: index, // CHECK-SAME: %[[VAL_2:.*2]]: memref, @@ -51,7 +52,7 @@ // CHECK: gpu.host_register // CHECK: gpu.host_register // CHECK: gpu.host_register -// CHECK: gpu.launch_func @sparsekernels::@kernel blocks +// CHECK: gpu.launch_func @sparse_kernels::@kernel0 blocks // func.func @matmul(%A: tensor, %B: tensor, %C_in: tensor) -> tensor { %C_out = linalg.matmul diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir --- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir +++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir @@ -8,8 +8,8 @@ // // Compute matrix vector y = Ax // -// -// CHECK: gpu.func @kernel( +// CHECK-LABEL: gpu.module @sparse_kernels +// CHECK: gpu.func @kernel0( // CHECK-SAME: %[[VAL_0:.*0]]: index, // CHECK-SAME: %[[VAL_1:.*1]]: memref, // CHECK-SAME: %[[VAL_2:.*2]]: memref, @@ -48,7 +48,7 @@ // CHECK: gpu.host_register // CHECK: gpu.host_register // CHECK: gpu.host_register -// CHECK: gpu.launch_func @sparsekernels::@kernel blocks +// CHECK: gpu.launch_func @sparse_kernels::@kernel0 blocks // func.func @matvec(%A: tensor, %x: tensor, %y_in: tensor) -> tensor { %y_out = linalg.matvec diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -7627,11 +7627,11 @@ name = "libmlir_cuda_runtime.so", linkshared = True, linkstatic = False, + deps = [":mlir_cuda_runtime"], tags = [ "manual", # External dependency "nobuildkite", # TODO(gcmn): Add support for this target ], - deps = [":mlir_cuda_runtime"], ) cc_library(