diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py @@ -1,5 +1,6 @@ # RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s +import numpy as np import os import sys @@ -17,7 +18,7 @@ A = pt.tensor([2, 3]) B = pt.tensor([2, 3]) C = pt.tensor([2, 3]) -D = pt.tensor([2, 3], dense) +D = pt.tensor([2, 3], compressed) A.insert([0, 1], 10) A.insert([1, 2], 40) B.insert([0, 0], 20) @@ -26,5 +27,9 @@ C.insert([1, 2], 7) D[i, j] = A[i, j] + B[i, j] - C[i, j] -# CHECK: [20. 5. 0. 0. 0. 63.] -print(D.to_array().reshape(6)) +indices, values = D.get_coordinates_and_values() +passed = np.allclose(indices, [[0, 0], [0, 1], [1, 2]]) +passed += np.allclose(values, [20.0, 5.0, 63.0]) + +# CHECK: Number of passed: 2 +print("Number of passed:", passed) diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py @@ -0,0 +1,22 @@ +# RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s + +import numpy as np +import os +import sys + +_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(_SCRIPT_PATH) +from tools import mlir_pytaco_api as pt + +i, j = pt.get_index_vars(2) +# Both tensors are true dense tensors. +A = pt.from_array(np.full([2,3], 1, dtype=np.float64)) +B = pt.from_array(np.full([2,3], 2, dtype=np.float64)) +# Define the result tensor as a true dense tensor. The parameter is_dense=True +# is an MLIR-PyTACO extension. +C = pt.tensor([2, 3], is_dense=True) + +C[i, j] = A[i, j] + B[i, j] + +# CHECK: [3. 3. 3. 3. 3. 3.] +print(C.to_array().reshape(6)) diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py @@ -298,19 +298,12 @@ f"len({self.format_pack}) != " f"len({self.ordering})") - def is_dense(self) -> bool: - """Returns true if all the Tensor dimensions have a dense format.""" - return all([f == ModeFormat.DENSE for f in self.format_pack.formats]) - def rank(self) -> int: """Returns the number of dimensions represented by the format.""" return self.format_pack.rank() def mlir_tensor_attr(self) -> Optional[sparse_tensor.EncodingAttr]: """Constructs the MLIR attributes for the tensor format.""" - if self.is_dense(): - return None - order = ( range(self.rank()) if (self.ordering is None) else self.ordering.ordering) @@ -467,22 +460,22 @@ op. dst_dtype: A DType representing the data type of the structured op result. dst_name: A string representing the name of the structured op result. - dst_format: A Format object representing the destination tensor format. + dst_format: An optional Format object representing the destination tensor + format. None represents a true dense tensor. """ dst_indices: Tuple[IndexVar, ...] dst_dims: Tuple[int, ...] dst_dtype: DType dst_name: str - dst_format: Format + dst_format: Optional[Format] def __post_init__(self) -> None: """Verifies the integrity of the attribute values.""" assert len(self.dst_indices) == len(self.dst_dims) - assert self.dst_format is not None def emit_tensor_init(self) -> ir.RankedTensorType: """Returns an initialization for the destination tensor.""" - if self.dst_format.is_dense(): + if self.dst_format is None: # Initialize the dense tensor. ir_type = _mlir_type_from_taco_type(self.dst_dtype) tensor = linalg.InitTensorOp(self.dst_dims, ir_type).result @@ -613,7 +606,8 @@ fmt: Optional[Union[ModeFormat, List[ModeFormat], Format]] = None, dtype: Optional[DType] = None, - name: Optional[str] = None): + name: Optional[str] = None, + is_dense: bool = False): """The tensor constructor interface defined by TACO API. Args: @@ -630,25 +624,35 @@ dtype: An object of dtype, representing the data type of the tensor. name: A string name of the tensor. If a name is not given, creates a unique name for the tensor. + is_dense: A boolean variable to indicate whether the tensor is a dense + tensor without any sparsity annotation. Raises: ValueError: If there is any inconsistency among the input arguments. """ - # Take care of the argument default values. - fmt = fmt or ModeFormat.COMPRESSED + # Take care of the argument default values common to both sparse tensors + # and dense tensors. dtype = dtype or DType(Type.FLOAT64) self._name = name or self._get_unique_name() - - self._dtype = dtype self._assignment = None + self._sparse_value_location = _SparseValueInfo._UNPACKED + self._dense_storage = None + self._dtype = dtype + + if is_dense: + assert (fmt is None) + assert (isinstance(value_or_shape, tuple) or isinstance( + value_or_shape, list)) and _all_instance_of(value_or_shape, int) + self._shape = value_or_shape + self._format = None + return + + fmt = fmt or ModeFormat.COMPRESSED # We currently use _coords and _values to host the sparse tensor value with - # COO format, and _dense_storage to host the dense tensor value. We haven't - # implement the conversion between the two storages yet. This will be - # improved in a follow up CL. + # COO format, and _dense_storage to host the dense tensor value. We don't + # support the conversion between the two storages. self._coords = [] self._values = [] - self._sparse_value_location = _SparseValueInfo._UNPACKED - self._dense_storage = None self._stats = _Stats() if value_or_shape is None or isinstance(value_or_shape, int) or isinstance( value_or_shape, float): @@ -694,7 +698,7 @@ def __repr__(self) -> str: self._sync_value() - self._unpack() + self.unpack() value_str = (f"{repr(self._dense_storage)})" if self.is_dense() else f"{repr(self._coords)} {repr(self._values)})") return (f"Tensor(_name={repr(self._name)} " @@ -733,8 +737,8 @@ self._values.append(self._dtype.value(val)) def is_dense(self) -> bool: - """Returns true if all the Tensor dimensions have a dense format.""" - return self._format.is_dense() + """Returns true if the tensor doesn't have sparsity annotation.""" + return self._format is None def to_array(self) -> np.ndarray: """Returns the numpy array for the Tensor. @@ -767,7 +771,7 @@ """ if array.dtype != np.float64: raise ValueError(f"Expected float64 value type: {array.dtype}.") - tensor = Tensor(array.shape, ModeFormat.DENSE) + tensor = Tensor(array.shape, is_dense=True) tensor._dense_storage = np.copy(array) return tensor @@ -843,26 +847,22 @@ Args: filename: A string file name. + + Raises: + ValueError: If the tensor is dense, or an unpacked sparse tensor. """ self._sync_value() - if not self.is_unpacked(): - utils.output_sparse_tensor(self._packed_sparse_value, filename, - self._format.format_pack.formats) - return - # TODO: Use MLIR code to output the value. - coords, values = self.get_coordinates_and_values() - assert len(coords) == len(values) - with open(filename, "w") as file: - # Output a comment line and the meta data. - file.write("; extended FROSTT format\n") - file.write(f"{self.order} {len(coords)}\n") - file.write(f"{' '.join(map(lambda i: str(i), self.shape))}\n") - # Output each (coordinate value) pair in a line. - for c, v in zip(coords, values): - # The coordinates are 1-based in the text file and 0-based in memory. - plus_one_to_str = lambda x: str(x + 1) - file.write(f"{' '.join(map(plus_one_to_str,c))} {v}\n") + if self.is_dense(): + raise ValueError("Writing dense tensors without sparsity annotation to " + "file is not supported.") + + if self.is_unpacked(): + raise ValueError("Writing unpacked sparse tensors to file is not " + "supported.") + + utils.output_sparse_tensor(self._packed_sparse_value, filename, + self._format.format_pack.formats) @property def dtype(self) -> DType: @@ -956,8 +956,9 @@ def mlir_tensor_type(self) -> ir.RankedTensorType: """Returns the MLIR type for the tensor.""" - return _mlir_tensor_type(self._dtype, tuple(self._shape), - self._format.mlir_tensor_attr()) + mlir_attr = None if ( + self._format is None) else self._format.mlir_tensor_attr() + return _mlir_tensor_type(self._dtype, tuple(self._shape), mlir_attr) def dense_dst_ctype_pointer(self) -> ctypes.pointer: """Returns the ctypes pointer for the pointer to an MemRefDescriptor. @@ -990,9 +991,15 @@ def get_coordinates_and_values( self) -> Tuple[List[Tuple[int, ...]], List[_AnyRuntimeType]]: - """Returns the coordinates and values for the non-zero elements.""" + """Returns the coordinates and values for the non-zero elements. + + This method also evaluate the assignment to the tensor and unpack the + sparse tensor. + """ + self._sync_value() + if not self.is_dense(): - assert (self.is_unpacked()) + self.unpack() return (self._coords, self._values) # Coordinates for non-zero elements, grouped by dimensions. @@ -1627,7 +1634,12 @@ if isinstance(expr, Access): src_indices = expr.indices src_dims = tuple(expr.tensor.shape) - mode_formats = tuple(expr.tensor.format.format_pack.formats) + if expr.tensor.format is None: + # Treat each dimension of a dense tensor as DENSE for the purpose of + # calculating temporary tensor storage format. + mode_formats = tuple([ModeFormat.DENSE] * len(src_dims)) + else: + mode_formats = tuple(expr.tensor.format.format_pack.formats) assert len(src_dims) == len(mode_formats) dim_infos = tuple([_DimInfo(d, m) for d, m in zip(src_dims, mode_formats)]) else: diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py @@ -51,8 +51,6 @@ raise ValueError("Expected string filename ends with " f"{_MTX_FILENAME_SUFFIX} or {_TNS_FILENAME_SUFFIX}: " f"{filename}.") - if not isinstance(fmt, Format) or fmt.is_dense(): - raise ValueError(f"Expected a sparse Format object: {fmt}.") return Tensor.from_file(filename, fmt, DType(Type.FLOAT64)) diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/testing_utils.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/testing_utils.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/testing_utils.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/testing_utils.py @@ -36,3 +36,10 @@ """Returns contents of file as string.""" with open(file, "r") as f: return f.read() + + +def run_test(f): + """Prints the test name and runs the test.""" + print(f.__name__) + f() + return f diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_core.py b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_core.py new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_core.py @@ -0,0 +1,39 @@ +# RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s + +from string import Template + +import numpy as np +import os +import sys +import tempfile + +_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(_SCRIPT_PATH) +from tools import mlir_pytaco +from tools import testing_utils as testing_utils + +# Define the aliases to shorten the code. +_COMPRESSED = mlir_pytaco.ModeFormat.COMPRESSED +_DENSE = mlir_pytaco.ModeFormat.DENSE + + +# CHECK-LABEL: test_tensor_all_dense_sparse +@testing_utils.run_test +def test_tensor_all_dense_sparse(): + a = mlir_pytaco.Tensor([4], [_DENSE]) + passed = (not a.is_dense()) + passed += (a.order == 1) + passed += (a.shape[0] == 4) + # CHECK: Number of passed: 3 + print("Number of passed:", passed) + + +# CHECK-LABEL: test_tensor_true_dense +@testing_utils.run_test +def test_tensor_true_dense(): + a = mlir_pytaco.Tensor.from_array(np.random.uniform(size=5)) + passed = a.is_dense() + passed += (a.order == 1) + passed += (a.shape[0] == 5) + # CHECK: Number of passed: 3 + print("Number of passed:", passed) diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py @@ -12,18 +12,14 @@ from tools import mlir_pytaco from tools import mlir_pytaco_io from tools import mlir_pytaco_utils as pytaco_utils +from tools import testing_utils as testing_utils + # Define the aliases to shorten the code. _COMPRESSED = mlir_pytaco.ModeFormat.COMPRESSED _DENSE = mlir_pytaco.ModeFormat.DENSE -def _run(f): - print(f.__name__) - f() - return f - - _FORMAT = mlir_pytaco.Format([_COMPRESSED, _COMPRESSED]) _MTX_DATA_TEMPLATE = Template( """%%MatrixMarket matrix coordinate real $general_or_symmetry @@ -40,7 +36,7 @@ # CHECK-LABEL: test_read_mtx_matrix_general -@_run +@testing_utils.run_test def test_read_mtx_matrix_general(): with tempfile.TemporaryDirectory() as test_dir: file_name = os.path.join(test_dir, "data.mtx") @@ -60,7 +56,7 @@ # CHECK-LABEL: test_read_mtx_matrix_symmetry -@_run +@testing_utils.run_test def test_read_mtx_matrix_symmetry(): with tempfile.TemporaryDirectory() as test_dir: file_name = os.path.join(test_dir, "data.mtx") @@ -91,7 +87,7 @@ # CHECK-LABEL: test_read_tns -@_run +@testing_utils.run_test def test_read_tns(): with tempfile.TemporaryDirectory() as test_dir: file_name = os.path.join(test_dir, "data.tns") @@ -111,7 +107,7 @@ # CHECK-LABEL: test_write_unpacked_tns -@_run +@testing_utils.run_test def test_write_unpacked_tns(): a = mlir_pytaco.Tensor([2, 3]) a.insert([0, 1], 10) @@ -119,19 +115,15 @@ a.insert([0, 0], 20) with tempfile.TemporaryDirectory() as test_dir: file_name = os.path.join(test_dir, "data.tns") - mlir_pytaco_io.write(file_name, a) - with open(file_name, "r") as file: - lines = file.readlines() - passed = 0 - # Skip the comment line in the output. - if lines[1:] == ["2 3\n", "2 3\n", "1 2 10.0\n", "2 3 40.0\n", "1 1 20.0\n"]: - passed = 1 - # CHECK: 1 - print(passed) + try: + mlir_pytaco_io.write(file_name, a) + except ValueError as e: + # CHECK: Writing unpacked sparse tensors to file is not supported + print(e) # CHECK-LABEL: test_write_packed_tns -@_run +@testing_utils.run_test def test_write_packed_tns(): a = mlir_pytaco.Tensor([2, 3]) a.insert([0, 1], 10)