diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -1860,7 +1860,7 @@ Example: ```mlir - %buffersz, %token = gpu.spmv_buffersize async [%dep] %env, %spmatA{TRANSPOSE}, %dnX, %dnY + %buffersz, %token = gpu.spmv_buffer_size async [%dep] %env, %spmatA{TRANSPOSE}, %dnX, %dnY ``` }]; let arguments = (ins Variadic:$asyncDependencies, @@ -1868,24 +1868,27 @@ GPU_TransposeModeAttr:$modeA, GPU_SparseSpMatHandle:$spmatA, GPU_SparseDnVecHandle:$dnX, - GPU_SparseDnVecHandle:$dnY); + GPU_SparseDnVecHandle:$dnY, + OptionalAttr:$computeType); let results = (outs Res:$bufferSz, Optional:$asyncToken); let builders = [OpBuilder<(ins - "::mlir::Type":$bufferSz, - "::mlir::Type":$asyncToken, - "::mlir::ValueRange":$asyncDependencies, - "::mlir::Value":$env, - "::mlir::Value":$spmatA, - "::mlir::Value":$dnX, - "::mlir::Value":$dnY), [{ + "Type":$bufferSz, + "Type":$asyncToken, + "ValueRange":$asyncDependencies, + "Value":$env, + "Value":$spmatA, + "Value":$dnX, + "Value":$dnY) + , [{ auto modeA = gpu::TransposeMode::NON_TRANSPOSE; - return build($_builder, $_state, bufferSz, asyncToken, asyncDependencies, env, - modeA, spmatA, dnX, dnY);}]> + return build($_builder, $_state, bufferSz, asyncToken, asyncDependencies, + env, modeA, spmatA, dnX, dnY, {});}]> ]; let assemblyFormat = [{ + (`{` $computeType^ `}`)? custom(type($asyncToken), $asyncDependencies) $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnX `,` $dnY attr-dict }]; @@ -1919,23 +1922,25 @@ GPU_SparseSpMatHandle:$spmatA, GPU_SparseDnVecHandle:$dnX, GPU_SparseDnVecHandle:$dnY, + OptionalAttr:$computeType, AnyMemRef:$buffer); let results = (outs Optional:$asyncToken); let builders = [OpBuilder<(ins - "::mlir::Type":$asyncToken, - "::mlir::ValueRange":$asyncDependencies, - "::mlir::Value":$env, - "::mlir::Value":$spmatA, - "::mlir::Value":$dnX, - "::mlir::Value":$dnY, - "::mlir::Value":$buffer), [{ + "Type":$asyncToken, + "ValueRange":$asyncDependencies, + "Value":$env, + "Value":$spmatA, + "Value":$dnX, + "Value":$dnY, + "Value":$buffer), [{ auto modeA = gpu::TransposeMode::NON_TRANSPOSE; return build($_builder, $_state, asyncToken, asyncDependencies, env, modeA, - spmatA, dnX, dnY, buffer);}]> + spmatA, dnX, dnY, {}, buffer);}]> ]; let assemblyFormat = [{ + (`{` $computeType^ `}`)? custom(type($asyncToken), $asyncDependencies) $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnX `,` $dnY `,` $buffer attr-dict `:` type($buffer) }]; @@ -1970,25 +1975,27 @@ GPU_TransposeModeAttr:$modeB, GPU_SparseSpMatHandle:$spmatA, GPU_SparseDnMatHandle:$dnmatB, - GPU_SparseDnMatHandle:$dnmatC); + GPU_SparseDnMatHandle:$dnmatC, + OptionalAttr:$computeType); let results = (outs Res:$bufferSz, Optional:$asyncToken); let builders = [OpBuilder<(ins - "::mlir::Type":$bufferSz, - "::mlir::Type":$asyncToken, - "::mlir::ValueRange":$asyncDependencies, - "::mlir::Value":$env, - "::mlir::Value":$spmatA, - "::mlir::Value":$dnmatB, - "::mlir::Value":$dnmatC), [{ + "Type":$bufferSz, + "Type":$asyncToken, + "ValueRange":$asyncDependencies, + "Value":$env, + "Value":$spmatA, + "Value":$dnmatB, + "Value":$dnmatC), [{ auto modeA = gpu::TransposeMode::NON_TRANSPOSE; auto modeB = gpu::TransposeMode::NON_TRANSPOSE; return build($_builder, $_state, bufferSz, asyncToken, asyncDependencies, - env, modeA, modeB, spmatA, dnmatB, dnmatC);}]> + env, modeA, modeB, spmatA, dnmatB, dnmatC, {});}]> ]; let assemblyFormat = [{ + (`{` $computeType^ `}`)? custom(type($asyncToken), $asyncDependencies) $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $dnmatC attr-dict }]; @@ -2024,24 +2031,26 @@ GPU_SparseSpMatHandle:$spmatA, GPU_SparseDnMatHandle:$dnmatB, GPU_SparseDnMatHandle:$dnmatC, + OptionalAttr:$computeType, AnyMemRef:$buffer); let results = (outs Optional:$asyncToken); let builders = [OpBuilder<(ins - "::mlir::Type":$asyncToken, - "::mlir::ValueRange":$asyncDependencies, - "::mlir::Value":$env, - "::mlir::Value":$spmatA, - "::mlir::Value":$dnmatB, - "::mlir::Value":$dnmatC, - "::mlir::Value":$buffer), [{ + "Type":$asyncToken, + "ValueRange":$asyncDependencies, + "Value":$env, + "Value":$spmatA, + "Value":$dnmatB, + "Value":$dnmatC, + "Value":$buffer), [{ auto modeA = gpu::TransposeMode::NON_TRANSPOSE; auto modeB = gpu::TransposeMode::NON_TRANSPOSE; return build($_builder, $_state, asyncToken, asyncDependencies, env, modeA, - modeB, spmatA, dnmatB, dnmatC, buffer);}]> + modeB, spmatA, dnmatB, dnmatC, {}, buffer);}]> ]; let assemblyFormat = [{ + (`{` $computeType^ `}`)? custom(type($asyncToken), $asyncDependencies) $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $dnmatC `,` $buffer attr-dict `:` type($buffer) }]; diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp --- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp @@ -1404,6 +1404,7 @@ auto dw = rewriter.create(loc, llvmInt32Type, dType.getIntOrFloatBitWidth()); auto stream = adaptor.getAsyncDependencies().front(); + // TODO: retrieve the compute type, notice that it may be optional auto bufferSize = spMVBufferSizeCallBuilder .create(loc, rewriter, @@ -1430,6 +1431,7 @@ MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc); if (!getTypeConverter()->useOpaquePointers()) pBuf = rewriter.create(loc, llvmPointerType, pBuf); + // TODO: retrieve the compute type, notice that it may be optional spMVCallBuilder.create(loc, rewriter, {adaptor.getEnv(), modeA, adaptor.getSpmatA(), adaptor.getDnX(), adaptor.getDnY(), dw, pBuf, @@ -1451,6 +1453,7 @@ auto dw = rewriter.create(loc, llvmInt32Type, dType.getIntOrFloatBitWidth()); auto stream = adaptor.getAsyncDependencies().front(); + // TODO: retrieve the compute type, notice that it may be optional auto bufferSize = spMMBufferSizeCallBuilder .create(loc, rewriter, @@ -1478,6 +1481,7 @@ MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc); if (!getTypeConverter()->useOpaquePointers()) pBuf = rewriter.create(loc, llvmPointerType, pBuf); + // TODO: retrieve the compute type, notice that it may be optional spMMCallBuilder.create(loc, rewriter, {adaptor.getEnv(), modeA, modeB, adaptor.getSpmatA(), adaptor.getDnmatB(), adaptor.getDnmatC(), dw, pBuf, diff --git a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp --- a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp +++ b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp @@ -341,6 +341,7 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t mgpuSpMVBufferSize(void *h, int32_t ma, void *a, void *x, void *y, int32_t dw, CUstream /*stream*/) { + // TODO: pass in and pass on the compute type cusparseHandle_t handle = reinterpret_cast(h); cusparseOperation_t modeA = static_cast(ma); cusparseSpMatDescr_t matA = reinterpret_cast(a); @@ -359,6 +360,7 @@ void *x, void *y, int32_t dw, void *buf, CUstream /*stream*/) { + // TODO: pass in and pass on the compute type cusparseHandle_t handle = reinterpret_cast(h); cusparseOperation_t modeA = static_cast(ma); cusparseSpMatDescr_t matA = reinterpret_cast(a); @@ -374,6 +376,7 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t mgpuSpMMBufferSize(void *h, int32_t ma, int32_t mb, void *a, void *b, void *c, int32_t dw, CUstream /*stream*/) { + // TODO: pass in and pass on the compute type cusparseHandle_t handle = reinterpret_cast(h); cusparseOperation_t modeA = static_cast(ma); cusparseOperation_t modeB = static_cast(mb); @@ -392,6 +395,7 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpuSpMM(void *h, int32_t ma, int32_t mb, void *a, void *b, void *c, int32_t dw, void *buf, CUstream /*stream*/) { + // TODO: pass in and pass on the compute type cusparseHandle_t handle = reinterpret_cast(h); cusparseOperation_t modeA = static_cast(ma); cusparseOperation_t modeB = static_cast(mb);