diff --git a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt --- a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt @@ -20,6 +20,7 @@ MLIRIR MLIRParser MLIRSideEffectInterfaces + MLIRSparseTensor MLIRSCF MLIRMath MLIRMemRef diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/Arithmetic/Utils/Utils.h" #include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/IR/AffineExprVisitor.h" @@ -819,9 +820,18 @@ Type resultType = genericOp->getResult(yieldVal.index()).getType(); // The input can have a different type than the result, e.g. a dynamic // input dimension can be turned into a static output dimension. - if (returnedArg.getType() != resultType) - returnedArg = rewriter.create(genericOp.getLoc(), - resultType, returnedArg); + Type returnType = returnedArg.getType(); + if (returnType != resultType) { + // Distinguish between sparse conversion or dense tensor casting. + // TODO: unify the two ops? + if (sparse_tensor::getSparseTensorEncoding(returnType) || + sparse_tensor::getSparseTensorEncoding(resultType)) + returnedArg = rewriter.create( + genericOp.getLoc(), resultType, returnedArg); + else + returnedArg = rewriter.create( + genericOp.getLoc(), resultType, returnedArg); + } returnedArgs.push_back(returnedArg); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -50,6 +50,7 @@ MLIRSCFTransforms MLIRSCFUtils MLIRPass + MLIRSparseTensor MLIRStandard MLIRStandardOpsTransforms MLIRStandardToLLVM diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp @@ -17,6 +17,7 @@ #include "mlir/Dialect/Linalg/Passes.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/Matchers.h" @@ -2184,6 +2185,10 @@ if (!operandType) continue; + // If outs is sparse, leave it to the sparse compiler. + if (sparse_tensor::getSparseTensorEncoding(operandVal.getType())) + continue; + // If outs is already an `init_tensor` operation, nothing to do. auto definingOp = operandVal.getDefiningOp(); if (definingOp) @@ -2213,7 +2218,7 @@ } // namespace //===---------------------------------------------------------------------===// -// Methods that add patterns descrined in this file to a pattern list. +// Methods that add patterns described in this file to a pattern list. //===---------------------------------------------------------------------===// void mlir::linalg::populateFoldReshapeOpsByLinearizationPatterns( diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp --- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp +++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp @@ -29,6 +29,8 @@ void mlir::sparse_tensor::buildSparseCompiler( OpPassManager &pm, const SparseCompilerOptions &options) { // TODO(wrengr): ensure the original `pm` is for ModuleOp + pm.addNestedPass(createLinalgGeneralizationPass()); + pm.addPass(createLinalgElementwiseOpFusionPass()); pm.addPass(createSparsificationPass(options.sparsificationOptions())); pm.addPass(createSparseTensorConversionPass()); pm.addNestedPass(createLinalgBufferizePass()); diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir @@ -1,6 +1,5 @@ // RUN: mlir-opt %s --sparse-compiler | \ // RUN: TENSOR0="%mlir_integration_test_dir/data/test.mtx" \ -// RUN: TENSOR1="%mlir_integration_test_dir/data/zero.mtx" \ // RUN: mlir-cpu-runner \ // RUN: -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ @@ -40,15 +39,17 @@ // library. module { // - // A kernel that assigns elements from A to an initially zero X. + // A kernel that assigns elements from A to X. // - func @dense_output(%arga: tensor, - %argx: tensor - {linalg.inplaceable = true}) - -> tensor { + func @dense_output(%arga: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %d0 = tensor.dim %arga, %c0 : tensor + %d1 = tensor.dim %arga, %c1 : tensor + %init = sparse_tensor.init [%d0, %d1] : tensor %0 = linalg.generic #trait_assign ins(%arga: tensor) - outs(%argx: tensor) { + outs(%init: tensor) { ^bb(%a: f64, %x: f64): linalg.yield %a : f64 } -> tensor @@ -70,15 +71,9 @@ %a = sparse_tensor.new %fileName : !Filename to tensor - // Initialize all-dense annotated "sparse" matrix to all zeros. - %fileZero = call @getTensorFilename(%c1) : (index) -> (!Filename) - %x = sparse_tensor.new %fileZero - : !Filename to tensor - // Call the kernel. - %0 = call @dense_output(%a, %x) - : (tensor, - tensor) -> tensor + %0 = call @dense_output(%a) + : (tensor) -> tensor // // Print the linearized 5x5 result for verification. @@ -92,7 +87,7 @@ // Release the resources. sparse_tensor.release %a : tensor - sparse_tensor.release %x : tensor + sparse_tensor.release %0 : tensor return } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir @@ -1,18 +1,12 @@ -// RUN: mlir-opt %s \ -// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \ -// RUN: --sparse-compiler | \ -// RUN: mlir-cpu-runner \ -// RUN: -e entry -entry-point-result=void \ +// RUN: mlir-opt %s --sparse-compiler | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s // // Do the same run, but now with SIMDization as well. This should not change the outcome. // -// RUN: mlir-opt %s \ -// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \ -// RUN: --sparse-compiler="vectorization-strategy=2 vl=2" | \ -// RUN: mlir-cpu-runner \ -// RUN: -e entry -entry-point-result=void \ +// RUN: mlir-opt %s --sparse-compiler="vectorization-strategy=2 vl=2" | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir @@ -1,11 +1,7 @@ -// RUN: mlir-opt %s \ -// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \ -// RUN: --sparse-compiler | \ -// RUN: mlir-cpu-runner \ -// RUN: -e entry -entry-point-result=void \ +// RUN: mlir-opt %s --sparse-compiler | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// #CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir @@ -1,18 +1,12 @@ -// RUN: mlir-opt %s \ -// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \ -// RUN: --sparse-compiler | \ -// RUN: mlir-cpu-runner \ -// RUN: -e entry -entry-point-result=void \ +// RUN: mlir-opt %s --sparse-compiler | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s // // Do the same run, but now with SIMDization as well. This should not change the outcome. // -// RUN: mlir-opt %s \ -// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \ -// RUN: --sparse-compiler="vectorization-strategy=2 vl=2" | \ -// RUN: mlir-cpu-runner \ -// RUN: -e entry -entry-point-result=void \ +// RUN: mlir-opt %s --sparse-compiler="vectorization-strategy=2 vl=2" | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir @@ -1,16 +1,12 @@ // RUN: mlir-opt %s --sparse-compiler | \ -// RUN: mlir-cpu-runner \ -// RUN: -e entry -entry-point-result=void \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s // // Do the same run, but now with SIMDization as well. This should not change the outcome. // -// RUN: mlir-opt %s \ -// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \ -// RUN: --sparse-compiler="vectorization-strategy=2 vl=8" | \ -// RUN: mlir-cpu-runner \ -// RUN: -e entry -entry-point-result=void \ +// RUN: mlir-opt %s --sparse-compiler="vectorization-strategy=2 vl=8" | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir @@ -1,18 +1,12 @@ -// RUN: mlir-opt %s \ -// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \ -// RUN: --sparse-compiler | \ -// RUN: mlir-cpu-runner \ -// RUN: -e entry -entry-point-result=void \ +// RUN: mlir-opt %s --sparse-compiler | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s // // Do the same run, but now with SIMDization as well. This should not change the outcome. // -// RUN: mlir-opt %s \ -// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \ -// RUN: --sparse-compiler="vectorization-strategy=2 vl=8" | \ -// RUN: mlir-cpu-runner \ -// RUN: -e entry -entry-point-result=void \ +// RUN: mlir-opt %s -sparse-compiler="vectorization-strategy=2 vl=8" | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py @@ -113,7 +113,6 @@ def __init__(self, options: str): pipeline = ( - f'builtin.func(linalg-generalize-named-ops,linalg-fuse-elementwise-ops),' f'sparse-compiler{{{options} reassociate-fp-reductions=1 enable-index-optimizations=1}}') self.pipeline = pipeline diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py @@ -73,7 +73,6 @@ def __init__(self): pipeline = ( - f'builtin.func(linalg-generalize-named-ops,linalg-fuse-elementwise-ops),' f'sparse-compiler{{reassociate-fp-reductions=1 enable-index-optimizations=1}}') self.pipeline = pipeline diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py @@ -171,7 +171,6 @@ def __init__(self, sparsification_options: str, support_lib: str): self._support_lib = support_lib self._pipeline = ( - f'builtin.func(linalg-generalize-named-ops,linalg-fuse-elementwise-ops),' f'sparse-compiler{{{sparsification_options} reassociate-fp-reductions=1 enable-index-optimizations=1}}') # Must be in the scope of a `with ir.Context():` self._passmanager = PassManager.parse(self._pipeline) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -6997,6 +6997,7 @@ ":Parser", ":SCFDialect", ":SideEffectInterfaces", + ":SparseTensor", ":StandardOps", ":Support", ":TensorDialect", @@ -7083,6 +7084,7 @@ ":SCFDialect", ":SCFTransforms", ":SCFUtils", + ":SparseTensor", ":StandardOps", ":StandardOpsTransforms", ":Support",