diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -1869,7 +1869,7 @@ GPU_SparseSpMatHandle:$spmatA, GPU_SparseDnVecHandle:$dnX, GPU_SparseDnVecHandle:$dnY, - OptionalAttr:$computeType); + TypeAttr:$computeType); let results = (outs Res:$bufferSz, Optional:$asyncToken); @@ -1880,16 +1880,17 @@ "Value":$env, "Value":$spmatA, "Value":$dnX, - "Value":$dnY) + "Value":$dnY, + "Type":$computeType) , [{ auto modeA = gpu::TransposeMode::NON_TRANSPOSE; return build($_builder, $_state, bufferSz, asyncToken, asyncDependencies, - env, modeA, spmatA, dnX, dnY, {});}]> + env, modeA, spmatA, dnX, dnY, computeType);}]> ]; let assemblyFormat = [{ custom(type($asyncToken), $asyncDependencies) - $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnX `,` $dnY attr-dict ( `into` $computeType^)? + $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnX `,` $dnY attr-dict `into` $computeType }]; } @@ -1921,7 +1922,7 @@ GPU_SparseSpMatHandle:$spmatA, GPU_SparseDnVecHandle:$dnX, GPU_SparseDnVecHandle:$dnY, - OptionalAttr:$computeType, + TypeAttr:$computeType, AnyMemRef:$buffer); let results = (outs Optional:$asyncToken); @@ -1932,15 +1933,16 @@ "Value":$spmatA, "Value":$dnX, "Value":$dnY, + "Type":$computeType, "Value":$buffer), [{ auto modeA = gpu::TransposeMode::NON_TRANSPOSE; return build($_builder, $_state, asyncToken, asyncDependencies, env, modeA, - spmatA, dnX, dnY, {}, buffer);}]> + spmatA, dnX, dnY, computeType, buffer);}]> ]; let assemblyFormat = [{ custom(type($asyncToken), $asyncDependencies) - $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnX `,` $dnY `,` $buffer attr-dict `:` type($buffer) ( `into` $computeType^)? + $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnX `,` $dnY `,` $buffer attr-dict `:` type($buffer) `into` $computeType }]; } @@ -1974,7 +1976,7 @@ GPU_SparseSpMatHandle:$spmatA, GPU_SparseDnMatHandle:$dnmatB, GPU_SparseDnMatHandle:$dnmatC, - OptionalAttr:$computeType); + TypeAttr:$computeType); let results = (outs Res:$bufferSz, Optional:$asyncToken); @@ -1985,16 +1987,17 @@ "Value":$env, "Value":$spmatA, "Value":$dnmatB, - "Value":$dnmatC), [{ + "Value":$dnmatC, + "Type":$computeType), [{ auto modeA = gpu::TransposeMode::NON_TRANSPOSE; auto modeB = gpu::TransposeMode::NON_TRANSPOSE; return build($_builder, $_state, bufferSz, asyncToken, asyncDependencies, - env, modeA, modeB, spmatA, dnmatB, dnmatC, {});}]> + env, modeA, modeB, spmatA, dnmatB, dnmatC, computeType);}]> ]; let assemblyFormat = [{ custom(type($asyncToken), $asyncDependencies) - $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $dnmatC attr-dict ( `into` $computeType^)? + $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $dnmatC attr-dict `into` $computeType }]; } @@ -2028,7 +2031,7 @@ GPU_SparseSpMatHandle:$spmatA, GPU_SparseDnMatHandle:$dnmatB, GPU_SparseDnMatHandle:$dnmatC, - OptionalAttr:$computeType, + TypeAttr:$computeType, AnyMemRef:$buffer); let results = (outs Optional:$asyncToken); @@ -2039,16 +2042,17 @@ "Value":$spmatA, "Value":$dnmatB, "Value":$dnmatC, + "Type":$computeType, "Value":$buffer), [{ auto modeA = gpu::TransposeMode::NON_TRANSPOSE; auto modeB = gpu::TransposeMode::NON_TRANSPOSE; return build($_builder, $_state, asyncToken, asyncDependencies, env, modeA, - modeB, spmatA, dnmatB, dnmatC, {}, buffer);}]> + modeB, spmatA, dnmatB, dnmatC, computeType, buffer);}]> ]; let assemblyFormat = [{ custom(type($asyncToken), $asyncDependencies) - $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $dnmatC `,` $buffer attr-dict `:` type($buffer) ( `into` $computeType^)? + $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $dnmatC `,` $buffer attr-dict `:` type($buffer) `into` $computeType }]; } @@ -2082,26 +2086,27 @@ GPU_SparseDnMatHandle:$dnmatA, GPU_SparseDnMatHandle:$dnmatB, GPU_SparseSpMatHandle:$spmatC, - OptionalAttr:$computeType); + TypeAttr:$computeType); let results = (outs Res:$bufferSz, Optional:$asyncToken); let builders = [OpBuilder<(ins - "::mlir::Type":$bufferSz, - "::mlir::Type":$asyncToken, - "::mlir::ValueRange":$asyncDependencies, - "::mlir::Value":$env, - "::mlir::Value":$dnmatA, - "::mlir::Value":$dnmatB, - "::mlir::Value":$spmatC), [{ + "Type":$bufferSz, + "Type":$asyncToken, + "ValueRange":$asyncDependencies, + "Value":$env, + "Value":$dnmatA, + "Value":$dnmatB, + "Value":$spmatC, + "Type":$computeType), [{ auto modeA = gpu::TransposeMode::NON_TRANSPOSE; auto modeB = gpu::TransposeMode::NON_TRANSPOSE; return build($_builder, $_state, bufferSz, asyncToken, asyncDependencies, - env, modeA, modeB, dnmatA, dnmatB, spmatC, {});}]> + env, modeA, modeB, dnmatA, dnmatB, spmatC, computeType);}]> ]; let assemblyFormat = [{ custom(type($asyncToken), $asyncDependencies) - $env `,` $dnmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $spmatC attr-dict ( `into` $computeType^)? + $env `,` $dnmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $spmatC attr-dict `into` $computeType }]; } @@ -2135,27 +2140,28 @@ GPU_SparseDnMatHandle:$dnmatA, GPU_SparseDnMatHandle:$dnmatB, GPU_SparseSpMatHandle:$spmatC, - OptionalAttr:$computeType, + TypeAttr:$computeType, AnyMemRef:$buffer); let results = (outs Optional:$asyncToken); let builders = [OpBuilder<(ins - "::mlir::Type":$asyncToken, - "::mlir::ValueRange":$asyncDependencies, - "::mlir::Value":$env, - "::mlir::Value":$dnmatA, - "::mlir::Value":$dnmatB, - "::mlir::Value":$spmatC, - "::mlir::Value":$buffer), [{ + "Type":$asyncToken, + "ValueRange":$asyncDependencies, + "Value":$env, + "Value":$dnmatA, + "Value":$dnmatB, + "Value":$spmatC, + "Type":$computeType, + "Value":$buffer), [{ auto modeA = gpu::TransposeMode::NON_TRANSPOSE; auto modeB = gpu::TransposeMode::NON_TRANSPOSE; return build($_builder, $_state, asyncToken, asyncDependencies, env, modeA, - modeB, dnmatA, dnmatB, spmatC, {}, buffer);}]> + modeB, dnmatA, dnmatB, spmatC, computeType, buffer);}]> ]; let assemblyFormat = [{ custom(type($asyncToken), $asyncDependencies) - $env `,` $dnmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $spmatC `,` $buffer attr-dict `:` type($buffer) ( `into` $computeType^)? + $env `,` $dnmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $spmatC `,` $buffer attr-dict `:` type($buffer) `into` $computeType }]; } diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp --- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp @@ -1274,25 +1274,6 @@ return success(); } -// Returns the element type of the defining spmat op. -// TODO: safer and more flexible to store data type in actual op instead? -static Type getSpMatElemType(Value spMat) { - if (auto op = spMat.getDefiningOp()) - return llvm::cast(op.getValues().getType()).getElementType(); - if (auto op = spMat.getDefiningOp()) - return llvm::cast(op.getValues().getType()).getElementType(); - llvm_unreachable("cannot find spmat def"); -} - -// Returns the element type of the defining dnmat or dnvec op. -static Type getDnElemType(Value dn) { - if (auto op = dn.getDefiningOp()) - return op.getMemref().getType().getElementType(); - if (auto op = dn.getDefiningOp()) - return op.getMemref().getType().getElementType(); - llvm_unreachable("cannot find dn def"); -} - template static Value genConstInt32From(OpBuilder &builder, Location loc, T TValue) { Type llvmInt32Type = builder.getIntegerType(32); @@ -1300,14 +1281,11 @@ static_cast(TValue)); } -static Value -genConstInt32FromOptionalComputeMode(OpBuilder &builder, Location loc, - std::optional computeTypeOptional, - Type defaultType) { - auto computeTypeInt = - getCuSparseDataTypeFrom(computeTypeOptional.value_or(defaultType)); - auto computeType = genConstInt32From(builder, loc, computeTypeInt); - return computeType; +static Value genConstInt32FromComputeMode(OpBuilder &builder, Location loc, + Type computeType) { + auto computeTypeInt = getCuSparseDataTypeFrom(computeType); + auto computeTypeConst = genConstInt32From(builder, loc, computeTypeInt); + return computeTypeConst; } LogicalResult ConvertCreateSparseEnvOpToGpuRuntimeCallPattern::matchAndRewrite( @@ -1502,9 +1480,8 @@ return failure(); Location loc = op.getLoc(); auto modeA = genConstInt32From(rewriter, loc, op.getModeA()); - // retrieve the compute type, notice that it may be optional - auto computeType = genConstInt32FromOptionalComputeMode( - rewriter, loc, adaptor.getComputeType(), getDnElemType(op.getDnY())); + auto computeType = + genConstInt32FromComputeMode(rewriter, loc, adaptor.getComputeType()); auto stream = adaptor.getAsyncDependencies().front(); auto bufferSize = spMVBufferSizeCallBuilder @@ -1524,9 +1501,8 @@ return failure(); Location loc = op.getLoc(); auto modeA = genConstInt32From(rewriter, loc, adaptor.getModeA()); - // retrieve the compute type, notice that it may be optional - auto computeType = genConstInt32FromOptionalComputeMode( - rewriter, loc, adaptor.getComputeType(), getDnElemType(op.getDnY())); + auto computeType = + genConstInt32FromComputeMode(rewriter, loc, adaptor.getComputeType()); auto stream = adaptor.getAsyncDependencies().front(); Value pBuf = MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc); @@ -1550,9 +1526,8 @@ auto modeA = genConstInt32From(rewriter, loc, adaptor.getModeA()); auto modeB = genConstInt32From(rewriter, loc, adaptor.getModeB()); auto stream = adaptor.getAsyncDependencies().front(); - // retrieve the compute type, notice that it may be optional - auto computeType = genConstInt32FromOptionalComputeMode( - rewriter, loc, adaptor.getComputeType(), getDnElemType(op.getDnmatC())); + auto computeType = + genConstInt32FromComputeMode(rewriter, loc, adaptor.getComputeType()); auto bufferSize = spMMBufferSizeCallBuilder .create(loc, rewriter, @@ -1573,9 +1548,8 @@ Location loc = op.getLoc(); auto modeA = genConstInt32From(rewriter, loc, adaptor.getModeA()); auto modeB = genConstInt32From(rewriter, loc, adaptor.getModeB()); - auto computeType = genConstInt32FromOptionalComputeMode( - rewriter, loc, adaptor.getComputeType(), - getSpMatElemType(op.getSpmatC())); + auto computeType = + genConstInt32FromComputeMode(rewriter, loc, adaptor.getComputeType()); auto stream = adaptor.getAsyncDependencies().front(); auto bufferSize = SDDMMBufferSizeCallBuilder .create(loc, rewriter, @@ -1596,9 +1570,8 @@ Location loc = op.getLoc(); auto modeA = genConstInt32From(rewriter, loc, adaptor.getModeA()); auto modeB = genConstInt32From(rewriter, loc, adaptor.getModeB()); - // retrieve the compute type, notice that it may be optional - auto computeType = genConstInt32FromOptionalComputeMode( - rewriter, loc, adaptor.getComputeType(), getDnElemType(op.getDnmatC())); + auto computeType = + genConstInt32FromComputeMode(rewriter, loc, adaptor.getComputeType()); auto stream = adaptor.getAsyncDependencies().front(); Value pBuf = @@ -1628,9 +1601,8 @@ failed(isAsyncWithOneDependency(rewriter, op))) return failure(); Location loc = op.getLoc(); - auto computeType = genConstInt32FromOptionalComputeMode( - rewriter, loc, adaptor.getComputeType(), - getSpMatElemType(op.getSpmatC())); + auto computeType = + genConstInt32FromComputeMode(rewriter, loc, adaptor.getComputeType()); auto modeA = genConstInt32From(rewriter, loc, adaptor.getModeA()); auto modeB = genConstInt32From(rewriter, loc, adaptor.getModeB()); auto stream = adaptor.getAsyncDependencies().front(); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp @@ -462,9 +462,12 @@ Value dnY = dvecY.getResult(0); token = dvecY.getAsyncToken(); + auto dnYType = llvm::cast(y.getType()).getElementType(); + // Precompute buffersize for SpMV. auto bufferComp = rewriter.create( - loc, indexTp, tokenTp, token, handle, spMatA, dnX, dnY); + loc, indexTp, tokenTp, token, handle, spMatA, dnX, dnY, + /*computeType=*/dnYType); Value bufferSz = bufferComp.getResult(0); token = bufferComp.getAsyncToken(); auto buf = genAllocBuffer(rewriter, loc, bufferSz, token); @@ -472,8 +475,9 @@ token = buf.getAsyncToken(); // Perform the SpMV. - auto spmvComp = rewriter.create(loc, tokenTp, token, handle, - spMatA, dnX, dnY, buffer); + auto spmvComp = + rewriter.create(loc, tokenTp, token, handle, spMatA, dnX, + dnY, /*computeType=*/dnYType, buffer); token = spmvComp.getAsyncToken(); // Copy data back to host and free all the resoures. @@ -565,18 +569,24 @@ Value dnC = dmatC.getResult(0); token = dmatC.getAsyncToken(); + auto dmatCType = llvm::cast(c.getType()).getElementType(); + // Precompute buffersize for SpMM. auto bufferComp = rewriter.create( - loc, indexTp, tokenTp, token, handle, spMatA, dnB, dnC); + loc, indexTp, tokenTp, token, handle, spMatA, dnB, dnC, + /*computeType=*/dmatCType); Value bufferSz = bufferComp.getResult(0); token = bufferComp.getAsyncToken(); auto buf = genAllocBuffer(rewriter, loc, bufferSz, token); Value buffer = buf.getResult(0); token = buf.getAsyncToken(); + auto dnCType = llvm::cast(c.getType()).getElementType(); + // Perform the SpMM. - auto spmmComp = rewriter.create(loc, tokenTp, token, handle, - spMatA, dnB, dnC, buffer); + auto spmmComp = + rewriter.create(loc, tokenTp, token, handle, spMatA, dnB, + dnC, /*computeType=*/dnCType, buffer); token = spmmComp.getAsyncToken(); // Copy data back to host and free all the resoures. diff --git a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir --- a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir +++ b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir @@ -23,8 +23,8 @@ %env, %token3 = gpu.create_sparse_env async [%token2] %spmat, %token4 = gpu.create_coo async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref, memref, memref %dnvec, %token5 = gpu.create_dn_vec async [%token4] %mem2, %arg0 : memref - %bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %env, %spmat, %dnvec, %dnvec - %token7 = gpu.spmv async [%token6] %env, %spmat, %dnvec, %dnvec, %mem2 : memref + %bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %env, %spmat, %dnvec, %dnvec into f64 + %token7 = gpu.spmv async [%token6] %env, %spmat, %dnvec, %dnvec, %mem2 : memref into f64 %token8 = gpu.destroy_sp_mat async [%token7] %spmat %token9 = gpu.destroy_dn_vec async [%token8] %dnvec %token10 = gpu.destroy_sparse_env async [%token9] %env @@ -53,8 +53,8 @@ %env, %token3 = gpu.create_sparse_env async [%token2] %spmat, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref, memref, memref %dnmat, %token5 = gpu.create_dn_mat async [%token4] %arg0, %arg0, %mem2 : memref - %bufferSz, %token6 = gpu.spmm_buffer_size async [%token5] %env, %spmat, %dnmat, %dnmat - %token7 = gpu.spmm async [%token6] %env, %spmat, %dnmat, %dnmat, %mem2 : memref + %bufferSz, %token6 = gpu.spmm_buffer_size async [%token5] %env, %spmat, %dnmat, %dnmat into f64 + %token7 = gpu.spmm async [%token6] %env, %spmat, %dnmat, %dnmat, %mem2 : memref into f64 %token8 = gpu.destroy_sp_mat async [%token7] %spmat %token9 = gpu.destroy_dn_mat async [%token8] %dnmat %token10 = gpu.destroy_sparse_env async [%token9] %env @@ -83,8 +83,8 @@ %env, %token3 = gpu.create_sparse_env async [%token2] %spmat, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref, memref, memref %dnmat, %token5 = gpu.create_dn_mat async [%token4] %arg0, %arg0, %mem2 : memref - %bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %env, %dnmat, %dnmat, %spmat - %token7 = gpu.sddmm async [%token6] %env, %dnmat, %dnmat, %spmat, %mem2 : memref + %bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %env, %dnmat, %dnmat, %spmat into f64 + %token7 = gpu.sddmm async [%token6] %env, %dnmat, %dnmat, %spmat, %mem2 : memref into f64 %token8 = gpu.destroy_sp_mat async [%token7] %spmat %token9 = gpu.destroy_dn_mat async [%token8] %dnmat %token10 = gpu.destroy_sparse_env async [%token9] %env diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -335,19 +335,19 @@ // CHECK: gpu.create_dn_vec async %dnvec, %token6 = gpu.create_dn_vec async [%token5] %mem2, %arg0 : memref // CHECK: gpu.spmv_buffer_size async - %bufferSz, %token7 = gpu.spmv_buffer_size async [%token6] %env, %spmat, %dnvec, %dnvec + %bufferSz, %token7 = gpu.spmv_buffer_size async [%token6] %env, %spmat, %dnvec, %dnvec into f64 // CHECK: gpu.spmv async - %token8 = gpu.spmv async [%token7] %env, %spmat, %dnvec, %dnvec, %mem2 : memref + %token8 = gpu.spmv async [%token7] %env, %spmat, %dnvec, %dnvec, %mem2 : memref into f64 // CHECK: gpu.create_dn_mat async %dnmat, %token9 = gpu.create_dn_mat async [%token8] %arg0, %arg0, %mem2 : memref // CHECK: gpu.spmm_buffer_size async - %bufferSz2, %token10 = gpu.spmm_buffer_size async [%token9] %env, %spmat, %dnmat, %dnmat + %bufferSz2, %token10 = gpu.spmm_buffer_size async [%token9] %env, %spmat, %dnmat, %dnmat into f64 // CHECK: gpu.spmm async - %token11 = gpu.spmm async [%token10] %env, %spmat, %dnmat, %dnmat, %mem2 : memref + %token11 = gpu.spmm async [%token10] %env, %spmat, %dnmat, %dnmat, %mem2 : memref into f64 // CHECK: gpu.sddmm_buffer_size async - %bufferSz3, %token12 = gpu.sddmm_buffer_size async [%token11] %env, %dnmat, %dnmat, %spmat + %bufferSz3, %token12 = gpu.sddmm_buffer_size async [%token11] %env, %dnmat, %dnmat, %spmat into f64 // CHECK: gpu.sddmm async - %token13 = gpu.sddmm async [%token12] %env, %dnmat, %dnmat, %spmat, %mem2 : memref + %token13 = gpu.sddmm async [%token12] %env, %dnmat, %dnmat, %spmat, %mem2 : memref into f64 // CHECK: gpu.destroy_dn_mat async %token14 = gpu.destroy_dn_mat async [%token13] %dnmat // CHECK: gpu.destroy_sp_mat async diff --git a/mlir/test/Dialect/GPU/sparse-roundtrip.mlir b/mlir/test/Dialect/GPU/sparse-roundtrip.mlir --- a/mlir/test/Dialect/GPU/sparse-roundtrip.mlir +++ b/mlir/test/Dialect/GPU/sparse-roundtrip.mlir @@ -9,8 +9,8 @@ // CHECK: %{{.*}}, %{{.*}} = gpu.create_sparse_env async [%{{.*}}] // CHECK: %{{.*}}, %{{.*}} = gpu.create_coo async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref, memref, memref // CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_vec async [%{{.*}}] %{{.*}}, %{{.*}} : memref - // CHECK: %{{.*}}, %{{.*}} = gpu.spmv_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} - // CHECK: %{{.*}} = gpu.spmv async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref + // CHECK: %{{.*}}, %{{.*}} = gpu.spmv_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} into f64 + // CHECK: %{{.*}} = gpu.spmv async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref into f64 // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}} // CHECK: %{{.*}} = gpu.destroy_dn_vec async [%{{.*}}] %{{.*}} // CHECK: %{{.*}} = gpu.destroy_sparse_env async [%{{.*}}] %{{.*}} @@ -23,8 +23,8 @@ %env, %token3 = gpu.create_sparse_env async [%token2] %spmat, %token4 = gpu.create_coo async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref, memref, memref %dnvec, %token5 = gpu.create_dn_vec async [%token4] %mem2, %arg0 : memref - %bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %env, %spmat, %dnvec, %dnvec - %token7 = gpu.spmv async [%token6] %env, %spmat, %dnvec, %dnvec, %mem2 : memref + %bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %env, %spmat, %dnvec, %dnvec into f64 + %token7 = gpu.spmv async [%token6] %env, %spmat, %dnvec, %dnvec, %mem2 : memref into f64 %token8 = gpu.destroy_sp_mat async [%token7] %spmat %token9 = gpu.destroy_dn_vec async [%token8] %dnvec %token10 = gpu.destroy_sparse_env async [%token9] %env @@ -69,8 +69,8 @@ // CHECK: %{{.*}}, %{{.*}} = gpu.create_sparse_env async [%{{.*}}] // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref, memref, memref // CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_mat async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}} : memref - // CHECK: %{{.*}}, %{{.*}} = gpu.sddmm_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} - // CHECK: %{{.*}} = gpu.sddmm async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref + // CHECK: %{{.*}}, %{{.*}} = gpu.sddmm_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} into f64 + // CHECK: %{{.*}} = gpu.sddmm async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref into f64 // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}} // CHECK: %{{.*}} = gpu.destroy_dn_mat async [%{{.*}}] %{{.*}} // CHECK: %{{.*}} = gpu.destroy_sparse_env async [%{{.*}}] %{{.*}} @@ -83,8 +83,8 @@ %env, %token3 = gpu.create_sparse_env async [%token2] %spmat, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref, memref, memref %dnmat, %token5 = gpu.create_dn_mat async [%token4] %arg0, %arg0, %mem2 : memref - %bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %env, %dnmat, %dnmat, %spmat - %token7 = gpu.sddmm async [%token6] %env, %dnmat, %dnmat, %spmat, %mem2 : memref + %bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %env, %dnmat, %dnmat, %spmat into f64 + %token7 = gpu.sddmm async [%token6] %env, %dnmat, %dnmat, %spmat, %mem2 : memref into f64 %token8 = gpu.destroy_sp_mat async [%token7] %spmat %token9 = gpu.destroy_dn_mat async [%token8] %dnmat %token10 = gpu.destroy_sparse_env async [%token9] %env