diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt --- a/mlir/test/CMakeLists.txt +++ b/mlir/test/CMakeLists.txt @@ -42,7 +42,8 @@ set(MLIR_INTEGRATION_TEST_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) # Copy test data over. - file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/Integration/data/test.mtx + file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/Integration/data/mttkrp_b.tns + ${CMAKE_CURRENT_SOURCE_DIR}/Integration/data/test.mtx ${CMAKE_CURRENT_SOURCE_DIR}/Integration/data/test.tns ${CMAKE_CURRENT_SOURCE_DIR}/Integration/data/wide.mtx DESTINATION ${MLIR_INTEGRATION_TEST_DIR}/data/) diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir @@ -0,0 +1,132 @@ +// RUN: mlir-opt %s \ +// RUN: --sparsification --sparse-tensor-conversion \ +// RUN: --convert-linalg-to-loops --convert-vector-to-scf --convert-scf-to-std \ +// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize \ +// RUN: --convert-vector-to-llvm --convert-std-to-llvm | \ +// RUN: TENSOR0="%mlir_integration_test_dir/data/mttkrp_b.tns" \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +!Filename = type !llvm.ptr + +#SparseMatrix = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "compressed", "compressed" ] +}> + +#mttkrp = { + indexing_maps = [ + affine_map<(i,j,k,l) -> (i,k,l)>, // B + affine_map<(i,j,k,l) -> (k,j)>, // C + affine_map<(i,j,k,l) -> (l,j)>, // D + affine_map<(i,j,k,l) -> (i,j)> // A (out) + ], + iterator_types = ["parallel", "parallel", "reduction", "reduction"], + doc = "A(i,j) += B(i,k,l) * D(l,j) * C(k,j)" +} + +// +// Integration test that lowers a kernel annotated as sparse to +// actual sparse code, initializes a matching sparse storage scheme +// from file, and runs the resulting code with the JIT compiler. +// +module { + // + // Computes Matricized Tensor Times Khatri-Rao Product (MTTKRP) kernel. See + // http://tensor-compiler.org/docs/data_analytics/index.html. + // + func @kernel_mttkrp(%argb: tensor, + %argc: tensor, + %argd: tensor, + %arga: tensor) -> tensor { + %0 = linalg.generic #mttkrp + ins(%argb, %argc, %argd: + tensor, tensor, tensor) + outs(%arga: tensor) { + ^bb(%b: f64, %c: f64, %d: f64, %a: f64): + %0 = mulf %b, %c : f64 + %1 = mulf %d, %0 : f64 + %2 = addf %a, %1 : f64 + linalg.yield %2 : f64 + } -> tensor + return %0 : tensor + } + + func private @getTensorFilename(index) -> (!Filename) + + // + // Main driver that reads matrix from file and calls the sparse kernel. + // + func @entry() { + %i0 = constant 0. : f64 + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %c3 = constant 3 : index + %c4 = constant 4 : index + %c5 = constant 5 : index + %c256 = constant 256 : index + + // Read the sparse B input from a file. + %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename) + %b = sparse_tensor.new %fileName + : !llvm.ptr to tensor + + // Initialize dense C and D inputs and dense output A. + %cdata = memref.alloc(%c3, %c5) : memref + scf.for %i = %c0 to %c3 step %c1 { + scf.for %j = %c0 to %c5 step %c1 { + %k0 = muli %i, %c5 : index + %k1 = addi %k0, %j : index + %k2 = index_cast %k1 : index to i32 + %k = sitofp %k2 : i32 to f64 + memref.store %k, %cdata[%i, %j] : memref + } + } + %c = memref.tensor_load %cdata : memref + + %ddata = memref.alloc(%c4, %c5) : memref + scf.for %i = %c0 to %c4 step %c1 { + scf.for %j = %c0 to %c5 step %c1 { + %k0 = muli %i, %c5 : index + %k1 = addi %k0, %j : index + %k2 = index_cast %k1 : index to i32 + %k = sitofp %k2 : i32 to f64 + memref.store %k, %ddata[%i, %j] : memref + } + } + %d = memref.tensor_load %ddata : memref + + %adata = memref.alloc(%c2, %c5) : memref + scf.for %i = %c0 to %c2 step %c1 { + scf.for %j = %c0 to %c5 step %c1 { + memref.store %i0, %adata[%i, %j] : memref + } + } + %a = memref.tensor_load %adata : memref + + // Call kernel. + %0 = call @kernel_mttkrp(%b, %c, %d, %a) + : (tensor, + tensor, tensor, tensor) -> tensor + + // Print the result for verification. + // + // CHECK: ( ( 16075, 21930, 28505, 35800, 43815 ), + // CHECK: ( 10000, 14225, 19180, 24865, 31280 ) ) + // + %m = memref.buffer_cast %0 : memref + %v = vector.transfer_read %m[%c0, %c0], %i0 + : memref, vector<2x5xf64> + vector.print %v : vector<2x5xf64> + + // Release the resources. + memref.dealloc %adata : memref + memref.dealloc %cdata : memref + memref.dealloc %ddata : memref + + return + } +} diff --git a/mlir/test/Integration/data/mttkrp_b.tns b/mlir/test/Integration/data/mttkrp_b.tns new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/data/mttkrp_b.tns @@ -0,0 +1,64 @@ +# This is a rank 3 test sparse tensor in FROSTT file format, +# extended with two meta data lines: +# rank nnz +# dims (one per rank) +# +# see http://frostt.io/tensors/file-formats.html +# +# This matrix represents the "B" input to the MTTKRP kernel: +# http://tensor-compiler.org/docs/data_analytics/index.html +# +# It can be generated with the following script, adapted from the above link: +# +#> import pytaco as pt +#> import numpy as np +#> from pytaco import compressed, dense +#> import random +#> +#> # Define formats for storing the sparse tensor and dense matrices. +#> csf = pt.format([compressed, compressed, compressed]) +#> rm = pt.format([dense, dense]) +# +#> B=pt.tensor((2,3,4),csf) +#> density = 0.25 +#> for i in range(2): +#> for j in range(3): +#> for k in range(4): +#> if random.random() > density: +#> B.insert((i,j,k), random.randint(0,100)) +#> +#> C = pt.from_array(np.arange(B.shape[1]*5).reshape(B.shape[1],5)) +#> D = pt.from_array(np.arange(B.shape[2]*5).reshape(B.shape[2],5)) +#> +#> # Declare the result to be a dense matrix. +#> A = pt.tensor([B.shape[0], 5], rm) +#> +#> # Declare index vars. +#> i, j, k, l = pt.get_index_vars(4) +#> +#> # Define the MTTKRP computation. +#> A[i, j] = B[i, k, l] * D[l, j] * C[k, j] +#> +#> # Perform the MTTKRP computation and write the result to file. +#> pt.write("A.tns", A) +#> pt.write("B.tns", B) +# +3 17 +2 3 4 +1 1 3 3 +1 1 4 63 +1 2 2 11 +1 2 3 100 +1 3 1 66 +1 3 2 61 +1 3 3 13 +1 3 4 43 +2 1 1 77 +2 1 3 10 +2 1 4 46 +2 2 1 61 +2 2 2 53 +2 2 3 3 +2 2 4 75 +2 3 2 22 +2 3 3 18