diff --git a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir --- a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir +++ b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir @@ -54,6 +54,54 @@ return } + // CHECK-LABEL: func @spgemm + // CHECK: llvm.call @mgpuStreamCreate + // CHECK: llvm.call @mgpuMemAlloc + // CHECK: llvm.call @mgpuMemAlloc + // CHECK: llvm.call @mgpuCreateCsr + // CHECK: llvm.call @mgpuCreateCsr + // CHECK: llvm.call @mgpuCreateCsr + // CHECK: llvm.call @mgpuSpGEMMCreateDescr + // CHECK: llvm.call @mgpuSpGEMMWorkEstimation + // CHECK: llvm.call @mgpuSpGEMMCompute + // CHECK: llvm.call @mgpuSpGEMMGetSize + // CHECK: llvm.call @mgpuSetCsrPointers + // CHECK: llvm.call @mgpuSpGEMMCopy + // CHECK: llvm.call @mgpuSpGEMMDestroyDescr + // CHECK: llvm.call @mgpuDestroySpMat + // CHECK: llvm.call @mgpuDestroySpMat + // CHECK: llvm.call @mgpuDestroySpMat + // CHECK: llvm.call @mgpuStreamSynchronize + // CHECK: llvm.call @mgpuStreamDestroy + func.func @spgemm(%arg0: index) { + %token0 = gpu.wait async + %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref + %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref + %spmatA, %token3 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref, memref, memref + %spmatB, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref, memref, memref + %spmatC, %token5 = gpu.create_csr async [%token4] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref, memref, memref + %spgemmDesc, %token6 = gpu.spgemm_create_descr async [%token5] + %alloc = memref.alloc() : memref<0xi8> // nullptr + %c0 = arith.constant 0 : index + %bufferSz1, %token7 = gpu.spgemm_work_estimation_or_compute async + [%token6]{WORK_ESTIMATION} + %spmatA, %spmatB, %spmatC, + %spgemmDesc, %c0, %alloc: f32 into memref<0xi8> + %bufferSz2, %token8 = gpu.spgemm_work_estimation_or_compute async + [%token7]{COMPUTE} + %spmatA, %spmatB, %spmatC, + %spgemmDesc, %c0, %alloc: f32 into memref<0xi8> + %rows, %cols, %nnz, %token9 = gpu.spgemm_get_size async [%token8] %spmatC + %token10 = gpu.set_csr_pointers async [%token8] %spmatC, %mem1, %mem1, %mem2 : memref, memref, memref + %token11 = gpu.spgemm_copy async [%token10] %spmatA, %spmatB, %spmatC, %spgemmDesc: f32 + %token12 = gpu.spgemm_destroy_descr async [%token11] %spgemmDesc + %token13 = gpu.destroy_sp_mat async [%token12] %spmatA + %token14 = gpu.destroy_sp_mat async [%token13] %spmatB + %token15 = gpu.destroy_sp_mat async [%token14] %spmatC + gpu.wait [%token15] + return + } + // CHECK-LABEL: func @sddmm // CHECK: llvm.call @mgpuStreamCreate // CHECK: llvm.call @mgpuMemAlloc @@ -80,69 +128,4 @@ return } - - // CHECK-LABEL: func @spgemm - // CHECK: llvm.call @mgpuStreamCreate - // CHECK: llvm.call @mgpuMemAlloc - // CHECK: llvm.call @mgpuMemAlloc - // CHECK: llvm.call @mgpuCreateCsr - // CHECK: llvm.call @mgpuCreateCsr - // CHECK: llvm.call @mgpuCreateCsr - // CHECK: llvm.call @mgpuSpGEMMCreateDescr - // CHECK: llvm.call @malloc - // CHECK: llvm.call @mgpuSpGEMMWorkEstimation - // CHECK: llvm.call @mgpuMemAlloc - // CHECK: llvm.call @mgpuSpGEMMWorkEstimation - // CHECK: llvm.call @mgpuMemAlloc - // CHECK: llvm.call @mgpuSpGEMMCompute - // CHECK: llvm.call @mgpuMemAlloc - // CHECK: llvm.call @mgpuMemAlloc - // CHECK: llvm.call @mgpuStreamSynchronize - // CHECK: llvm.call @mgpuStreamDestroy - // CHECK: llvm.call @mgpuStreamCreate - // CHECK: llvm.call @mgpuSpGEMMCopy - // CHECK: llvm.call @mgpuDestroySpMat - // CHECK: llvm.call @mgpuDestroySpMat - // CHECK: llvm.call @mgpuDestroySpMat - // CHECK: llvm.call @mgpuStreamSynchronize - // CHECK: llvm.call @mgpuStreamDestroy - func.func @spgemm(%arg0: index) { - %token0 = gpu.wait async - %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref - %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref - %spmatA, %token3 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref, memref, memref - %spmatB, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref, memref, memref - %spmatC, %token5 = gpu.create_csr async [%token4] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref, memref, memref - %spgemmDesc, %token6 = gpu.spgemm_create_descr async [%token5] - // Used as nullptr - %alloc = memref.alloc() : memref<0xi8> - %c0 = arith.constant 0 : index - %bufferSz1, %token7 = gpu.spgemm_work_estimation_or_compute async - [%token6]{WORK_ESTIMATION} - %spmatA{NON_TRANSPOSE}, %spmatB{NON_TRANSPOSE}, - %spmatC, %spgemmDesc, %c0, - %alloc: f32 into memref<0xi8> - %buf1, %token8 = gpu.alloc async [%token7] (%bufferSz1) : memref - %bufferSz1_1, %token9 = gpu.spgemm_work_estimation_or_compute async - [%token8]{WORK_ESTIMATION} %spmatA, %spmatB, - %spmatC, %spgemmDesc, %bufferSz1, - %buf1: f32 into memref - %buf2, %token13 = gpu.alloc async [%token9] (%bufferSz1_1) : memref - %bufferSz2_2, %token14 = gpu.spgemm_work_estimation_or_compute async - [%token13]{COMPUTE} %spmatA, %spmatB, %spmatC, - %spgemmDesc, %bufferSz1_1, - %buf2: f32 into memref - %rows, %cols, %nnz, %token15 = gpu.spgemm_get_size async [%token14] %spmatC - %mem_columns, %token16 = gpu.alloc async [%token15] (%cols) : memref - %mem_values, %token17 = gpu.alloc async [%token16] (%nnz) : memref - gpu.wait [%token17] - %token18 = gpu.wait async - %token19 = gpu.spgemm_copy async [%token18] %spmatA, %spmatB, %spmatC, %spgemmDesc: f32 - %token20 = gpu.destroy_sp_mat async [%token19] %spmatA - %token21 = gpu.destroy_sp_mat async [%token20] %spmatB - %token22 = gpu.destroy_sp_mat async [%token21] %spmatC - gpu.wait [%token22] - return - } - } diff --git a/mlir/test/Dialect/GPU/sparse-roundtrip.mlir b/mlir/test/Dialect/GPU/sparse-roundtrip.mlir --- a/mlir/test/Dialect/GPU/sparse-roundtrip.mlir +++ b/mlir/test/Dialect/GPU/sparse-roundtrip.mlir @@ -54,24 +54,25 @@ return } - // CHECK-LABEL: func @spgemm - // CHECK: %{{.*}} = gpu.wait async - // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref - // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref - // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref, memref, memref - // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref, memref, memref - // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref, memref, memref - // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_create_descr async [%{{.*}}] - // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{ WORK_ESTIMATION} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8> - // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{ COMPUTE} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8> - // CHECK: %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} = gpu.spgemm_get_size async [%{{.*}}] %{{.*}} - // CHECK %{{.*}} = gpu.set_csr_pointers async [%{{.*}}] %{{.*}}, {{.*}}, {{.*}}, {{.*}} : memref, memref, memref - // CHECK: %{{.*}} = gpu.spgemm_copy async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 - // CHECK: %{{.*}} = gpu.spgemm_destroy_descr async [%{{.*}}] %{{.*}} - // CHECK: gpu.destroy_sp_mat %{{.*}} - // CHECK: gpu.destroy_sp_mat %{{.*}} - // CHECK: gpu.destroy_sp_mat %{{.*}} - // CHECK: return + // CHECK-LABEL: func @spgemm + // CHECK: %{{.*}} = gpu.wait async + // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref + // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref + // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref, memref, memref + // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref, memref, memref + // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref, memref, memref + // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_create_descr async [%{{.*}}] + // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{ WORK_ESTIMATION} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8> + // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{ COMPUTE} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8> + // CHECK: %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} = gpu.spgemm_get_size async [%{{.*}}] %{{.*}} + // CHECK: %{{.*}} = gpu.set_csr_pointers async [%{{.*}}] %{{.*}}, {{.*}}, {{.*}}, {{.*}} : memref, memref, memref + // CHECK: %{{.*}} = gpu.spgemm_copy async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 + // CHECK: %{{.*}} = gpu.spgemm_destroy_descr async [%{{.*}}] %{{.*}} + // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}} + // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}} + // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}} + // CHECK: gpu.wait [%{{.*}}] + // CHECK: return func.func @spgemm(%arg0: index) { %token0 = gpu.wait async %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref @@ -94,9 +95,10 @@ %token10 = gpu.set_csr_pointers async [%token8] %spmatC, %mem1, %mem1, %mem2 : memref, memref, memref %token11 = gpu.spgemm_copy async [%token10] %spmatA, %spmatB, %spmatC, %spgemmDesc: f32 %token12 = gpu.spgemm_destroy_descr async [%token11] %spgemmDesc - gpu.destroy_sp_mat %spmatA - gpu.destroy_sp_mat %spmatB - gpu.destroy_sp_mat %spmatC + %token13 = gpu.destroy_sp_mat async [%token12] %spmatA + %token14 = gpu.destroy_sp_mat async [%token13] %spmatB + %token15 = gpu.destroy_sp_mat async [%token14] %spmatC + gpu.wait [%token15] return }