diff --git a/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h --- a/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h @@ -122,6 +122,14 @@ "dialect"), init(false)}; + /// These options are used to enable GPU code generation. + PassOptions::Option gpuTriple{*this, "gpu-triple", + desc("GPU target triple")}; + PassOptions::Option gpuChip{*this, "gpu-chip", + desc("GPU target architecture")}; + PassOptions::Option gpuFeatures{*this, "gpu-features", + desc("GPU target features")}; + /// Projects out the options for `createSparsificationPass`. SparsificationOptions sparsificationOptions() const { return SparsificationOptions(parallelization, enableIndexReduction); diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt --- a/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt +++ b/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt @@ -12,6 +12,8 @@ MLIRComplexToLibm MLIRComplexToStandard MLIRFuncTransforms + MLIRGPUToNVVMTransforms + MLIRGPUTransforms MLIRLinalgTransforms MLIRMathToLibm MLIRMathToLLVM diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp --- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp +++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp @@ -8,12 +8,16 @@ #include "mlir/Dialect/SparseTensor/Pipelines/Passes.h" +#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" #include "mlir/Conversion/Passes.h" #include "mlir/Dialect/Arith/Transforms/Passes.h" #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" #include "mlir/Dialect/Bufferization/Transforms/Passes.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/GPU/Transforms/Passes.h" +#include "mlir/Dialect/LLVMIR/NVVMDialect.h" #include "mlir/Dialect/Linalg/Passes.h" #include "mlir/Dialect/MemRef/Transforms/Passes.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" @@ -65,6 +69,16 @@ pm.addNestedPass(createCanonicalizerPass()); pm.addNestedPass( mlir::bufferization::createFinalizingBufferizePass()); + + // GPU code generation. + const bool gpuCodegen = options.gpuTriple.hasValue(); + if (gpuCodegen) { + pm.addPass(createSparseGPUCodegenPass()); + pm.addNestedPass(createStripDebugInfoPass()); + pm.addNestedPass(createConvertSCFToCFPass()); + pm.addNestedPass(createLowerGpuOpsToNVVMOpsPass()); + } + // TODO(springerm): Add sparse support to the BufferDeallocation pass and add // it to this pipeline. pm.addNestedPass(createConvertLinalgToLoopsPass()); @@ -75,7 +89,7 @@ pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions())); pm.addPass(createFinalizeMemRefToLLVMConversionPass()); pm.addNestedPass(createConvertComplexToStandardPass()); - pm.addNestedPass(mlir::arith::createArithExpandOpsPass()); + pm.addNestedPass(arith::createArithExpandOpsPass()); pm.addNestedPass(createConvertMathToLLVMPass()); pm.addPass(createConvertMathToLibmPass()); pm.addPass(createConvertComplexToLibmPass()); @@ -84,6 +98,16 @@ pm.addPass(createConvertComplexToLLVMPass()); pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions())); pm.addPass(createConvertFuncToLLVMPass()); + + // Finalize GPU code generation. + if (gpuCodegen) { +#if MLIR_GPU_TO_CUBIN_PASS_ENABLE + pm.addNestedPass(createGpuSerializeToCubinPass( + options.gpuTriple, options.gpuChip, options.gpuFeatures)); +#endif + pm.addPass(createGpuToLLVMConversionPass()); + } + pm.addPass(createReconcileUnrealizedCastsPass()); } diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -2372,15 +2372,21 @@ srcs = glob(["lib/Dialect/SparseTensor/Pipelines/*.cpp"]), hdrs = ["include/mlir/Dialect/SparseTensor/Pipelines/Passes.h"], includes = ["include"], + local_defines = if_cuda_available(["MLIR_GPU_TO_CUBIN_PASS_ENABLE"]), deps = [ ":ArithTransforms", ":BufferizationTransforms", ":ConversionPasses", ":FuncDialect", ":FuncTransforms", + ":GPUDialect", + ":GPUToNVVMTransforms", + ":GPUTransforms", ":LinalgTransforms", ":MemRefTransforms", + ":NVVMDialect", ":Pass", + ":SerializeToCubin", ":SparseTensorDialect", ":SparseTensorTransforms", ":TensorTransforms",