diff --git a/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h --- a/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h @@ -56,7 +56,6 @@ PassOptions::Option enableRuntimeLibrary{ *this, "enable-runtime-library", desc("Enable runtime library for manipulating sparse tensors"), - // TODO: Disable runtime library by default after feature complete. init(true)}; PassOptions::Option testBufferizationAnalysisOnly{ @@ -67,56 +66,62 @@ *this, "enable-buffer-initialization", desc("Enable zero-initialization of memory buffers"), init(false)}; - /// Projects out the options for `createSparsificationPass`. - SparsificationOptions sparsificationOptions() const { - return SparsificationOptions(parallelization); - } + PassOptions::Option vectorLength{ + *this, "vl", desc("Set the vector length (0 disables vectorization)"), + init(0)}; // These options must be kept in sync with `SparseTensorConversionBase`. PassOptions::Option sparseToSparse{ *this, "s2s-strategy", desc("Set the strategy for sparse-to-sparse conversion"), init(0)}; - /// Projects out the options for `createSparsificationPass`. - SparseTensorConversionOptions sparseTensorConversionOptions() const { - return SparseTensorConversionOptions( - sparseToSparseConversionStrategy(sparseToSparse)); - } - - // These options must be kept in sync with `ConvertVectorToLLVMBase`. - // TODO(wrengr): does `indexOptimizations` differ from `enableSIMDIndex32`? + // These options must be kept in sync with the `ConvertVectorToLLVM` + // (defined in include/mlir/Dialect/SparseTensor/Pipelines/Passes.h). PassOptions::Option reassociateFPReductions{ *this, "reassociate-fp-reductions", desc("Allows llvm to reassociate floating-point reductions for speed"), init(false)}; - PassOptions::Option indexOptimizations{ + PassOptions::Option force32BitVectorIndices{ *this, "enable-index-optimizations", desc("Allows compiler to assume indices fit in 32-bit if that yields " "faster code"), init(true)}; PassOptions::Option amx{ *this, "enable-amx", - desc("Enables the use of AMX dialect while lowering the vector dialect."), + desc("Enables the use of AMX dialect while lowering the vector dialect"), + init(false)}; + PassOptions::Option armNeon{ + *this, "enable-arm-neon", + desc("Enables the use of ArmNeon dialect while lowering the vector " + "dialect"), + init(false)}; + PassOptions::Option armSVE{ + *this, "enable-arm-sve", + desc("Enables the use of ArmSVE dialect while lowering the vector " + "dialect"), init(false)}; - PassOptions::Option armNeon{*this, "enable-arm-neon", - desc("Enables the use of ArmNeon dialect " - "while lowering the vector dialect."), - init(false)}; - PassOptions::Option armSVE{*this, "enable-arm-sve", - desc("Enables the use of ArmSVE dialect " - "while lowering the vector dialect."), - init(false)}; PassOptions::Option x86Vector{ *this, "enable-x86vector", desc("Enables the use of X86Vector dialect while lowering the vector " - "dialect."), + "dialect"), init(false)}; + /// Projects out the options for `createSparsificationPass`. + SparsificationOptions sparsificationOptions() const { + return SparsificationOptions(parallelization); + } + + /// Projects out the options for `createSparseTensorConversionPass`. + SparseTensorConversionOptions sparseTensorConversionOptions() const { + return SparseTensorConversionOptions( + sparseToSparseConversionStrategy(sparseToSparse)); + } + /// Projects out the options for `createConvertVectorToLLVMPass`. LowerVectorToLLVMOptions lowerVectorToLLVMOptions() const { LowerVectorToLLVMOptions opts{}; opts.enableReassociateFPReductions(reassociateFPReductions); - opts.enableIndexOptimizations(indexOptimizations); + opts.enableIndexOptimizations(force32BitVectorIndices); opts.enableArmNeon(armNeon); opts.enableArmSVE(armSVE); opts.enableAMX(amx); diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h @@ -166,7 +166,8 @@ const bufferization::OneShotBufferizationOptions &bufferizationOptions, const SparsificationOptions &sparsificationOptions, const SparseTensorConversionOptions &sparseTensorConversionOptions, - bool enableRuntimeLibrary, bool enableBufferInitialization); + bool enableRuntimeLibrary, bool enableBufferInitialization, + unsigned vectorLength, bool enableVLAVectorization, bool enableSIMDIndex32); void populateSparseBufferRewriting(RewritePatternSet &patterns, bool enableBufferInitialization); diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp --- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp +++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp @@ -56,7 +56,10 @@ pm.addPass(createSparsificationAndBufferizationPass( getBufferizationOptions(options.testBufferizationAnalysisOnly), options.sparsificationOptions(), options.sparseTensorConversionOptions(), - options.enableRuntimeLibrary, options.enableBufferInitialization)); + options.enableRuntimeLibrary, options.enableBufferInitialization, + options.vectorLength, + /*enableVLAVectorization=*/options.armSVE, + /*enableSIMDIndex32=*/options.force32BitVectorIndices)); if (options.testBufferizationAnalysisOnly) return; pm.addNestedPass(createCanonicalizerPass()); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp @@ -282,6 +282,19 @@ } continue; // success so far } + // Address calculation 'i = add inv, idx' (after LICM). + if (auto load = cast.getDefiningOp()) { + Value inv = load.getOperand(0); + Value idx = load.getOperand(1); + if (!inv.dyn_cast() && + inv.getDefiningOp()->getBlock() != &forOp.getRegion().front() && + idx.dyn_cast()) { + if (codegen) + idxs.push_back( + rewriter.create(forOp.getLoc(), inv, idx)); + continue; // success so far + } + } return false; } return true; @@ -409,6 +422,7 @@ TYPEDUNAOP(arith::IndexCastOp) TYPEDUNAOP(arith::TruncIOp) TYPEDUNAOP(arith::BitcastOp) + // TODO: complex? } } else if (def->getNumOperands() == 2) { Value vx, vy; @@ -428,6 +442,7 @@ BINOP(arith::AndIOp) BINOP(arith::OrIOp) BINOP(arith::XOrIOp) + // TODO: complex? // TODO: shift by invariant? } } @@ -602,6 +617,7 @@ unsigned vectorLength, bool enableVLAVectorization, bool enableSIMDIndex32) { + assert(vectorLength > 0); patterns.add(patterns.getContext(), vectorLength, enableVLAVectorization, enableSIMDIndex32); patterns.add, diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp @@ -9,6 +9,7 @@ #include "mlir/Dialect/SparseTensor/Transforms/Passes.h" #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" #include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h" @@ -17,6 +18,7 @@ #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" #include "mlir/Dialect/SparseTensor/Transforms/Passes.h" #include "mlir/Pass/PassManager.h" +#include "mlir/Transforms/Passes.h" using namespace mlir; using namespace mlir::func; @@ -52,12 +54,17 @@ const bufferization::OneShotBufferizationOptions &bufferizationOptions, const SparsificationOptions &sparsificationOptions, const SparseTensorConversionOptions &sparseTensorConversionOptions, - bool enableRuntimeLibrary, bool enableBufferInitialization) + bool enableRuntimeLibrary, bool enableBufferInitialization, + unsigned vectorLength, bool enableVLAVectorization, + bool enableSIMDIndex32) : bufferizationOptions(bufferizationOptions), sparsificationOptions(sparsificationOptions), sparseTensorConversionOptions(sparseTensorConversionOptions), enableRuntimeLibrary(enableRuntimeLibrary), - enableBufferInitialization(enableBufferInitialization) {} + enableBufferInitialization(enableBufferInitialization), + vectorLength(vectorLength), + enableVLAVectorization(enableVLAVectorization), + enableSIMDIndex32(enableSIMDIndex32) {} /// Bufferize all dense ops. This assumes that no further analysis is needed /// and that all required buffer copies were already inserted by @@ -86,6 +93,10 @@ return success(); } + void getDependentDialects(::mlir::DialectRegistry ®istry) const override { + registry.insert(); + } + void runOnOperation() override { { // Run enabling transformations. @@ -122,6 +133,11 @@ OpPassManager pm("builtin.module"); pm.addPass(createSparsificationPass(sparsificationOptions)); pm.addPass(createPostSparsificationRewritePass(enableRuntimeLibrary)); + if (vectorLength > 0) { + pm.addPass(mlir::createLoopInvariantCodeMotionPass()); + pm.addPass(createSparseVectorizationPass( + vectorLength, enableVLAVectorization, enableSIMDIndex32)); + } if (enableRuntimeLibrary) { pm.addPass( createSparseTensorConversionPass(sparseTensorConversionOptions)); @@ -144,7 +160,11 @@ SparseTensorConversionOptions sparseTensorConversionOptions; bool enableRuntimeLibrary; bool enableBufferInitialization; + unsigned vectorLength; + bool enableVLAVectorization; + bool enableSIMDIndex32; }; + } // namespace sparse_tensor } // namespace mlir @@ -152,10 +172,13 @@ const bufferization::OneShotBufferizationOptions &bufferizationOptions, const SparsificationOptions &sparsificationOptions, const SparseTensorConversionOptions &sparseTensorConversionOptions, - bool enableRuntimeLibrary, bool enableBufferInitialization) { + bool enableRuntimeLibrary, bool enableBufferInitialization, + unsigned vectorLength, bool enableVLAVectorization, + bool enableSIMDIndex32) { return std::make_unique< mlir::sparse_tensor::SparsificationAndBufferizationPass>( bufferizationOptions, sparsificationOptions, sparseTensorConversionOptions, enableRuntimeLibrary, - enableBufferInitialization); + enableBufferInitialization, vectorLength, enableVLAVectorization, + enableSIMDIndex32); } diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_mv.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_mv.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/sparse_vector_mv.mlir @@ -0,0 +1,31 @@ +// RUN: mlir-opt %s -sparse-compiler="vl=8" | FileCheck %s + +#Dense = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "dense" ] +}> + +#matvec = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)>, // A + affine_map<(i,j) -> (j)>, // b + affine_map<(i,j) -> (i)> // x (out) + ], + iterator_types = ["parallel", "reduction"], + doc = "X(i) += A(i,j) * B(j)" +} + +// CHECK-LABEL: llvm.func @kernel_matvec +// CHECK: llvm.intr.vector.reduce.fadd +func.func @kernel_matvec(%arga: tensor, + %argb: tensor, + %argx: tensor) -> tensor { + %x = linalg.generic #matvec + ins(%arga, %argb: tensor, tensor) + outs(%argx: tensor) { + ^bb(%a: f32, %b: f32, %x: f32): + %0 = arith.mulf %a, %b : f32 + %1 = arith.addf %x, %0 : f32 + linalg.yield %1 : f32 + } -> tensor + return %x : tensor +}