diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py @@ -47,7 +47,8 @@ with tempfile.TemporaryDirectory() as test_dir: actual_file = os.path.join(test_dir, "A.tns") pt.write(actual_file, A) - actual = np.loadtxt(actual_file, np.float64) + # Read the result file and skip the comment and meta data rows. + actual = np.loadtxt(actual_file, np.float64, skiprows=3) expected = np.loadtxt( os.path.join(_SCRIPT_PATH, "data/gold_A.tns"), np.float64) print(f"Compare result {np.allclose(actual, expected, rtol=0.01)}") diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMM.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMM.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMM.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMM.py @@ -32,7 +32,10 @@ golden_file = os.path.join(_SCRIPT_PATH, "data/gold_C.tns") out_file = os.path.join(test_dir, "C.tns") pt.write(out_file, C) + # Read the result file and skip the comment and meta data rows. + actual = np.loadtxt(out_file, np.float64, skiprows=3) + expected = np.loadtxt(golden_file, np.float64) # - # CHECK: Compare files True + # CHECK: Compare result True # - print(f"Compare files {filecmp.cmp(golden_file, out_file)}") + print(f"Compare result {np.allclose(actual, expected, rtol=0.01)}") diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py @@ -48,7 +48,8 @@ with tempfile.TemporaryDirectory() as test_dir: actual_file = os.path.join(test_dir, "y.tns") pt.write(actual_file, y) - actual = np.loadtxt(actual_file, np.float64) + # Read the result file and skip the comment and meta data rows. + actual = np.loadtxt(actual_file, np.float64, skiprows=3) expected = np.loadtxt( os.path.join(_SCRIPT_PATH, "data/gold_y.tns"), np.float64) print(f"Compare result {np.allclose(actual, expected, rtol=0.01)}") diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py @@ -667,6 +667,11 @@ "Must be a tuple or list for a shape or a single value" f"if initializing a scalar tensor: {value_or_shape}.") + def _set_packed_sparse_tensor(self, pointer: ctypes.c_void_p) -> None: + """Records the MLIR sparse tensor pointer.""" + self._sparse_value_location = _SparseValueInfo._PACKED + self._packed_sparse_value = pointer + def is_unpacked(self) -> bool: """Returns true if the tensor value is not packed as MLIR sparse tensor.""" return (self._sparse_value_location == _SparseValueInfo._UNPACKED) @@ -826,11 +831,38 @@ sparse_tensor, shape = utils.create_sparse_tensor(filename, fmt.format_pack.formats) tensor = Tensor(shape.tolist(), fmt) - tensor._sparse_value_location = _SparseValueInfo._PACKED - tensor._packed_sparse_value = sparse_tensor + tensor._set_packed_sparse_tensor(sparse_tensor) return tensor + def to_file(self, filename: str) -> None: + """Output the tensor value to a file. + + This method evaluates any pending assignment to the tensor and outputs the + tensor value. + + Args: + filename: A string file name. + """ + self._sync_value() + if not self.is_unpacked(): + utils.output_sparse_tensor(self._packed_sparse_value, filename, + self._format.format_pack.formats) + return + + coords, values = self.get_coordinates_and_values() + assert len(coords) == len(values) + with open(filename, "w") as file: + # Output a comment line and the meta data. + file.write("; extended FROSTT format\n") + file.write(f"{self.order} {len(coords)}\n") + file.write(f"{' '.join(map(lambda i: str(i), self.shape))}\n") + # Output each (coordinate value) pair in a line. + for c, v in zip(coords, values): + # The coordinates are 1-based in the text file and 0-based in memory. + plus_one_to_str = lambda x: str(x + 1) + file.write(f"{' '.join(map(plus_one_to_str,c))} {v}\n") + @property def dtype(self) -> DType: """Returns the data type for the Tensor.""" @@ -914,9 +946,7 @@ assert isinstance(result, np.ndarray) self._dense_storage = result else: - assert _all_instance_of(result, np.ndarray) and len(result) == 2 - assert (result[0].ndim, result[1].ndim) == (1, 2) - (self._values, self._coords) = result + self._set_packed_sparse_tensor(result) def _sync_value(self) -> None: """Updates the tensor value by evaluating the pending assignment.""" @@ -1349,7 +1379,7 @@ self, dst: Tensor, dst_indices: Tuple[IndexVar, ...], - ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: + ) -> Union[np.ndarray, ctypes.c_void_p]: """Evaluates tensor assignment dst[dst_indices] = expression. Args: @@ -1357,9 +1387,8 @@ dst_indices: The tuple of IndexVar used to access the destination tensor. Returns: - The result of the dense tensor represented in numpy ndarray or the sparse - tensor represented by two numpy ndarray for its non-zero values and - indices. + The result of the dense tensor represented in numpy ndarray or the pointer + to the MLIR sparse tensor. Raises: ValueError: If the expression is not proper or not supported. @@ -1397,17 +1426,8 @@ if dst.is_dense(): return runtime.ranked_memref_to_numpy(arg_pointers[0][0]) - # Check and return the sparse tensor output. - rank, nse, shape, values, indices = utils.sparse_tensor_to_coo_tensor( - ctypes.cast(arg_pointers[-1][0], ctypes.c_void_p), - np.float64, - ) - assert (np.equal(rank, dst.order) - and np.array_equal(shape, np.array(dst.shape)) and - np.equal(values.ndim, 1) and np.equal(values.shape[0], nse) and - np.equal(indices.ndim, 2) and np.equal(indices.shape[0], nse) and - np.equal(indices.shape[1], rank)) - return (values, indices) + # Return the sparse tensor pointer. + return arg_pointers[-1][0] @dataclasses.dataclass(frozen=True) @@ -1438,6 +1458,13 @@ raise ValueError("Invalid indices for rank: " f"str{self.tensor.order} != len({str(self.indices)}).") + def __repr__(self) -> str: + # The Tensor __repr__ method evaluates the pending assignment to the tensor. + # We want to define the __repr__ method here to avoid such evaluation of the + # tensor assignment. + indices_str = ", ".join(map(lambda i: i.name, self.indices)) + return (f"Tensor({self.tensor.name}) " f"Indices({indices_str})") + def _emit_expression( self, expr_to_opnd: Dict[IndexExpr, lang.OperandDef], diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py @@ -31,17 +31,6 @@ _TNS_FILENAME_SUFFIX = ".tns" -def _write_tns(file: TextIO, tensor: Tensor) -> None: - """Outputs a tensor to a file using .tns format.""" - coords, non_zeros = tensor.get_coordinates_and_values() - assert len(coords) == len(non_zeros) - # Output a coordinate and the corresponding value in a line. - for c, v in zip(coords, non_zeros): - # The coordinates are 1-based in the text file and 0-based in memory. - plus_one_to_str = lambda x: str(x + 1) - file.write(f"{' '.join(map(plus_one_to_str,c))} {v}\n") - - def read(filename: str, fmt: Format) -> Tensor: """Inputs a tensor from a given file. @@ -88,7 +77,4 @@ if not isinstance(tensor, Tensor): raise ValueError(f"Expected a Tensor object: {tensor}.") - # TODO: combine the evaluation and the outputing into one step. - tensor._sync_value() - with open(filename, "w") as file: - return _write_tns(file, tensor) + tensor.to_file(filename) diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py @@ -270,3 +270,67 @@ engine.invoke(_ENTRY_NAME, *arg_pointers) shape = runtime.ranked_memref_to_numpy(ctypes.pointer(c_tensor_desc.shape)) return c_tensor_desc.storage, shape + + +# TODO: With better support from MLIR, we may improve the current implementation +# by using Python code to generate the kernel instead of doing MLIR text code +# stitching. +def _get_output_sparse_tensor_kernel( + sparsity_codes: Sequence[sparse_tensor.DimLevelType]) -> str: + """Creates an MLIR text kernel to output a sparse tensor to a file. + + The kernel returns void. + """ + rank = len(sparsity_codes) + + # Use ? to represent a dimension in the dynamic shape string representation. + shape = "x".join(map(lambda d: "?", range(rank))) + + # Convert the encoded sparsity values to a string representation. + sparsity = ", ".join( + map(lambda s: '"compressed"' if s.value else '"dense"', sparsity_codes)) + + # Return the MLIR text kernel. + return f""" +!Ptr = type !llvm.ptr +#enc = #sparse_tensor.encoding<{{ + dimLevelType = [ {sparsity} ] +}}> +func @{_ENTRY_NAME}(%t: tensor<{shape}xf64, #enc>, %filename: !Ptr) +attributes {{ llvm.emit_c_interface }} {{ + sparse_tensor.out %t, %filename : tensor<{shape}xf64, #enc>, !Ptr + std.return +}}""" + + +def output_sparse_tensor( + tensor: ctypes.c_void_p, filename: str, + sparsity: Sequence[sparse_tensor.DimLevelType]) -> None: + """Outputs an MLIR sparse tensor to the given file. + + Args: + tensor: A C pointer to the MLIR sparse tensor. + filename: A string for the name of the file that contains the tensor data in + a COO-flavored format. + sparsity: A sequence of DimLevelType values, one for each dimension of the + tensor. + + Raises: + OSError: If there is any problem in loading the supporting C shared library. + ValueError: If the shared library doesn't contain the needed routine. + """ + with ir.Context() as ctx, ir.Location.unknown(): + module = _get_output_sparse_tensor_kernel(sparsity) + module = ir.Module.parse(module) + engine = compile_and_build_engine(module) + + # Convert the filename to a byte stream. + c_filename = ctypes.c_char_p(bytes(filename, "utf-8")) + + arg_pointers = [ + ctypes.byref(ctypes.cast(tensor, ctypes.c_void_p)), + ctypes.byref(c_filename) + ] + + # Invoke the execution engine to run the module and return the result. + engine.invoke(_ENTRY_NAME, *arg_pointers) diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py @@ -108,3 +108,46 @@ passed += np.allclose(values, [2.0, 3.0, 4.0]) # CHECK: 4 print(passed) + + +# CHECK-LABEL: test_write_unpacked_tns +@_run +def test_write_unpacked_tns(): + a = mlir_pytaco.Tensor([2, 3]) + a.insert([0, 1], 10) + a.insert([1, 2], 40) + a.insert([0, 0], 20) + with tempfile.TemporaryDirectory() as test_dir: + file_name = os.path.join(test_dir, "data.tns") + mlir_pytaco_io.write(file_name, a) + with open(file_name, "r") as file: + lines = file.readlines() + passed = 0 + # Skip the comment line in the output. + if lines[1:] == ["2 3\n", "2 3\n", "1 2 10.0\n", "2 3 40.0\n", "1 1 20.0\n"]: + passed = 1 + # CHECK: 1 + print(passed) + + +# CHECK-LABEL: test_write_packed_tns +@_run +def test_write_packed_tns(): + a = mlir_pytaco.Tensor([2, 3]) + a.insert([0, 1], 10) + a.insert([1, 2], 40) + a.insert([0, 0], 20) + b = mlir_pytaco.Tensor([2, 3]) + i, j = mlir_pytaco.get_index_vars(2) + b[i, j] = a[i, j] + a[i, j] + with tempfile.TemporaryDirectory() as test_dir: + file_name = os.path.join(test_dir, "data.tns") + mlir_pytaco_io.write(file_name, b) + with open(file_name, "r") as file: + lines = file.readlines() + passed = 0 + # Skip the comment line in the output. + if lines[1:] == ["2 3\n", "2 3\n", "1 1 40\n", "1 2 20\n", "2 3 80\n"]: + passed = 1 + # CHECK: 1 + print(passed)