diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py @@ -30,8 +30,8 @@ # These two lines have been modified from the original program to use static # data to support result comparison. -C = pt.from_array(np.full((B.shape[1], 25), 1, dtype=np.float64)) -D = pt.from_array(np.full((B.shape[2], 25), 2, dtype=np.float64)) +C = pt.from_array(np.full((B.shape[1], 25), 1, dtype=np.float32)) +D = pt.from_array(np.full((B.shape[2], 25), 2, dtype=np.float32)) # Declare the result to be a dense matrix. A = pt.tensor([B.shape[0], 25], rm) diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SDDMM.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SDDMM.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SDDMM.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SDDMM.py @@ -15,8 +15,8 @@ i, j, k = pt.get_index_vars(3) # Set up dense matrices. -A = pt.from_array(np.full((8, 8), 2.0)) -B = pt.from_array(np.full((8, 8), 3.0)) +A = pt.from_array(np.full((8, 8), 2.0, dtype=np.float32)) +B = pt.from_array(np.full((8, 8), 3.0, dtype=np.float32)) # Set up sparse matrices. S = pt.tensor([8, 8], pt.format([pt.compressed, pt.compressed])) diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py @@ -31,8 +31,8 @@ # These two lines have been modified from the original program to use static # data to support result comparison. -x = pt.from_array(np.full((A.shape[1],), 1, dtype=np.float64)) -z = pt.from_array(np.full((A.shape[0],), 2, dtype=np.float64)) +x = pt.from_array(np.full((A.shape[1],), 1, dtype=np.float32)) +z = pt.from_array(np.full((A.shape[0],), 2, dtype=np.float32)) # Declare the result to be a dense vector y = pt.tensor([A.shape[0]], dv) diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py @@ -14,7 +14,7 @@ B = pt.from_array(np.full([2,3], 2, dtype=np.float64)) # Define the result tensor as a true dense tensor. The parameter is_dense=True # is an MLIR-PyTACO extension. -C = pt.tensor([2, 3], is_dense=True) +C = pt.tensor([2, 3], dtype=pt.float64, is_dense=True) C[i, j] = A[i, j] + B[i, j] diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py @@ -96,7 +96,7 @@ kind: A Type enum representing the data type. value: The numpy data type for the TACO data type. """ - kind: Type = Type.FLOAT64 + kind: Type = Type.FLOAT32 def is_float(self) -> bool: """Returns whether the data type represents a floating point value.""" @@ -112,6 +112,30 @@ return self.kind.value +def _dtype_to_mlir_str(dtype: DType) -> str: + """Returns the MLIR string for the given dtype.""" + dtype_to_str = { + Type.INT16: "i16", + Type.INT32: "i32", + Type.INT64: "i64", + Type.FLOAT32: "f32", + Type.FLOAT64: "f64" + } + return dtype_to_str[dtype.kind] + + +def _nptype_to_taco_type(ty: np.dtype) -> DType: + """Returns the TACO type for the given numpy type.""" + nptype_to_dtype = { + np.int16: Type.INT16, + np.int32: Type.INT32, + np.int64: Type.INT64, + np.float32: Type.FLOAT32, + np.float64: Type.FLOAT64 + } + return DType(nptype_to_dtype[ty]) + + def _mlir_type_from_taco_type(dtype: DType) -> ir.Type: """Returns the MLIR type corresponding to the given TACO type.""" dtype_to_irtype = { @@ -123,7 +147,6 @@ } return dtype_to_irtype[dtype.kind] - def _ctype_pointer_from_array(array: np.ndarray) -> ctypes.pointer: """Returns the ctype pointer for the given numpy array.""" return ctypes.pointer( @@ -632,7 +655,7 @@ """ # Take care of the argument default values common to both sparse tensors # and dense tensors. - dtype = dtype or DType(Type.FLOAT64) + dtype = dtype or DType(Type.FLOAT32) self._name = name or self._get_unique_name() self._assignment = None self._sparse_value_location = _SparseValueInfo._UNPACKED @@ -688,7 +711,7 @@ # Use the output MLIR sparse tensor pointer to retrieve the COO-flavored # values and verify the values. rank, nse, shape, values, indices = utils.sparse_tensor_to_coo_tensor( - self._packed_sparse_value, np.float64) + self._packed_sparse_value, self._dtype.value) assert rank == self.order assert np.allclose(self.shape, shape) assert nse == len(values) @@ -757,7 +780,8 @@ def from_array(array: np.ndarray) -> "Tensor": """Returns a dense tensor with the value copied from the input array. - We currently only support the conversion of float64 numpy arrays to Tensor. + We currently only support the conversion of float32 and float64 numpy arrays + to Tensor. Args: array: The numpy array that provides the data type, shape and value for @@ -767,11 +791,14 @@ A Tensor object. Raises: - ValueError if the data type of the numpy array is not float64. + ValueError if the data type of the numpy array is not supported. """ - if array.dtype != np.float64: - raise ValueError(f"Expected float64 value type: {array.dtype}.") - tensor = Tensor(array.shape, is_dense=True) + if array.dtype != np.float32 and array.dtype != np.float64: + raise ValueError(f"Expected floating point value type: {array.dtype}.") + tensor = Tensor( + array.shape, + dtype=_nptype_to_taco_type(array.dtype.type), + is_dense=True) tensor._dense_storage = np.copy(array) return tensor @@ -808,7 +835,7 @@ # The size of each dimension is one more that such a maximum coordinate # value. shape = [c + 1 for c in max_coordinate] - tensor = Tensor(shape, fmt) + tensor = Tensor(shape, fmt, dtype=dtype) tensor._coords = coordinates tensor._values = values @@ -833,8 +860,9 @@ value is stored as an MLIR sparse tensor. """ sparse_tensor, shape = utils.create_sparse_tensor(filename, - fmt.format_pack.formats) - tensor = Tensor(shape.tolist(), fmt) + fmt.format_pack.formats, + _dtype_to_mlir_str(dtype)) + tensor = Tensor(shape.tolist(), fmt, dtype=dtype) tensor._set_packed_sparse_tensor(sparse_tensor) return tensor @@ -862,7 +890,8 @@ "supported.") utils.output_sparse_tensor(self._packed_sparse_value, filename, - self._format.format_pack.formats) + self._format.format_pack.formats, + _dtype_to_mlir_str(self._dtype)) @property def dtype(self) -> DType: diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py @@ -31,7 +31,8 @@ _TNS_FILENAME_SUFFIX = ".tns" -def read(filename: str, fmt: Format) -> Tensor: +def read(filename: str, fmt: Format, + dtype: DType = DType(Type.FLOAT32)) -> Tensor: """Inputs a tensor from a given file. The name suffix of the file specifies the format of the input tensor. We @@ -40,6 +41,7 @@ Args: filename: A string input filename. fmt: The storage format of the tensor. + dtype: The data type, default to float32. Raises: ValueError: If filename doesn't end with .mtx or .tns, or fmt is not an @@ -52,7 +54,7 @@ f"{_MTX_FILENAME_SUFFIX} or {_TNS_FILENAME_SUFFIX}: " f"{filename}.") - return Tensor.from_file(filename, fmt, DType(Type.FLOAT64)) + return Tensor.from_file(filename, fmt, dtype) def write(filename: str, tensor: Tensor) -> None: diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py @@ -4,7 +4,7 @@ # This file contains the utilities to process sparse tensor outputs. -from typing import Sequence, Tuple +from typing import Callable, Dict, Sequence, Tuple import ctypes import functools import numpy as np @@ -18,6 +18,10 @@ from mlir.dialects import sparse_tensor from mlir.passmanager import PassManager +# Type aliases for type annotation. +_SupportFunc = Callable[..., None] +_SupportFuncLocator = Callable[[np.dtype], Tuple[_SupportFunc, _SupportFunc]] + # The name for the environment variable that provides the full path for the # supporting library. _SUPPORTLIB_ENV_VAR = "SUPPORTLIB" @@ -36,15 +40,28 @@ return os.getenv(_SUPPORTLIB_ENV_VAR, _DEFAULT_SUPPORTLIB) +def _record_support_funcs( + ty: np.dtype, to_func: _SupportFunc, from_func: _SupportFunc, + ty_to_funcs: Dict[np.dtype, Tuple[_SupportFunc, _SupportFunc]]) -> None: + """Records the two supporting functions for a given data type.""" + to_func.restype = ctypes.c_void_p + from_func.restype = ctypes.c_void_p + ty_to_funcs[ty] = (to_func, from_func) + + @functools.lru_cache() -def _get_c_shared_lib() -> ctypes.CDLL: - """Loads the supporting C shared library with the needed routines. +def _get_support_func_locator() -> _SupportFuncLocator: + """Constructs a function to locate the supporting functions for a data type. + + Loads the supporting C shared library with the needed routines. Constructs a + dictionary from the supported data types to the routines for the data types, + and then a function to look up the dictionary for a given data type. The name of the supporting C shared library is either provided by an an environment variable or a default value. Returns: - The supporting C shared library. + The function to look up the supporting functions for a given data type. Raises: OSError: If there is any problem in loading the shared library. @@ -54,19 +71,25 @@ # library. c_lib = ctypes.CDLL(_get_support_lib_name()) + type_to_funcs = {} try: - c_lib.convertToMLIRSparseTensorF64.restype = ctypes.c_void_p + _record_support_funcs(np.float32, c_lib.convertToMLIRSparseTensorF32, + c_lib.convertFromMLIRSparseTensorF32, type_to_funcs) except Exception as e: - raise ValueError("Missing function convertToMLIRSparseTensorF64 from " - f"the supporting C shared library: {e} ") from e + raise ValueError(f"Missing supporting function: {e}") from e try: - c_lib.convertFromMLIRSparseTensorF64.restype = ctypes.c_void_p + _record_support_funcs(np.float64, c_lib.convertToMLIRSparseTensorF64, + c_lib.convertFromMLIRSparseTensorF64, type_to_funcs) except Exception as e: - raise ValueError("Missing function convertFromMLIRSparseTensorF64 from " - f"the C shared library: {e} ") from e + raise ValueError(f"Missing supporting function: {e}") from e + + def get_support_funcs(ty: np.dtype): + funcs = type_to_funcs[ty] + assert funcs is not None + return funcs - return c_lib + return get_support_funcs def sparse_tensor_to_coo_tensor( @@ -93,17 +116,14 @@ OSError: If there is any problem in loading the shared library. ValueError: If the shared library doesn't contain the needed routines. """ - c_lib = _get_c_shared_lib() - + convert_from = _get_support_func_locator()(dtype)[1] rank = ctypes.c_ulonglong(0) nse = ctypes.c_ulonglong(0) shape = ctypes.POINTER(ctypes.c_ulonglong)() values = ctypes.POINTER(np.ctypeslib.as_ctypes_type(dtype))() indices = ctypes.POINTER(ctypes.c_ulonglong)() - c_lib.convertFromMLIRSparseTensorF64(sparse_tensor, ctypes.byref(rank), - ctypes.byref(nse), ctypes.byref(shape), - ctypes.byref(values), - ctypes.byref(indices)) + convert_from(sparse_tensor, ctypes.byref(rank), ctypes.byref(nse), + ctypes.byref(shape), ctypes.byref(values), ctypes.byref(indices)) # Convert the returned values to the corresponding numpy types. shape = np.ctypeslib.as_array(shape, shape=[rank.value]) @@ -138,8 +158,8 @@ ctypes.POINTER(np.ctypeslib.as_ctypes_type(np_values.dtype))) indices = np_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong)) - c_lib = _get_c_shared_lib() - ptr = c_lib.convertToMLIRSparseTensorF64(rank, nse, shape, values, indices) + convert_to = _get_support_func_locator()(np_values.dtype.type)[0] + ptr = convert_to(rank, nse, shape, values, indices) assert ptr is not None, "Problem with calling convertToMLIRSparseTensorF64" return ptr @@ -171,11 +191,11 @@ ] -def _output_one_dim(dim: int, rank: int, shape: str) -> str: +def _output_one_dim(dim: int, rank: int, shape: str, type: str) -> str: """Produces the MLIR text code to output the size for the given dimension.""" return f""" %c{dim} = arith.constant {dim} : index - %d{dim} = tensor.dim %t, %c{dim} : tensor<{shape}xf64, #enc> + %d{dim} = tensor.dim %t, %c{dim} : tensor<{shape}x{type}, #enc> memref.store %d{dim}, %b[%c{dim}] : memref<{rank}xindex> """ @@ -187,7 +207,7 @@ # (2) Use scf.for instead of an unrolled loop to write out the dimension sizes # when tensor.dim supports non-constant dimension value. def _get_create_sparse_tensor_kernel( - sparsity_codes: Sequence[sparse_tensor.DimLevelType]) -> str: + sparsity_codes: Sequence[sparse_tensor.DimLevelType], type: str) -> str: """Creates an MLIR text kernel to contruct a sparse tensor from a file. The kernel returns a _SparseTensorDescriptor structure. @@ -203,7 +223,7 @@ # Get the MLIR text code to write the dimension sizes to the output buffer. output_dims = "\n".join( - map(lambda d: _output_one_dim(d, rank, shape), range(rank))) + map(lambda d: _output_one_dim(d, rank, shape, type), range(rank))) # Return the MLIR text kernel. return f""" @@ -211,18 +231,18 @@ #enc = #sparse_tensor.encoding<{{ dimLevelType = [ {sparsity} ] }}> -func @{_ENTRY_NAME}(%filename: !Ptr) -> (tensor<{shape}xf64, #enc>, memref<{rank}xindex>) +func @{_ENTRY_NAME}(%filename: !Ptr) -> (tensor<{shape}x{type}, #enc>, memref<{rank}xindex>) attributes {{ llvm.emit_c_interface }} {{ - %t = sparse_tensor.new %filename : !Ptr to tensor<{shape}xf64, #enc> + %t = sparse_tensor.new %filename : !Ptr to tensor<{shape}x{type}, #enc> %b = memref.alloc() : memref<{rank}xindex> {output_dims} - return %t, %b : tensor<{shape}xf64, #enc>, memref<{rank}xindex> + return %t, %b : tensor<{shape}x{type}, #enc>, memref<{rank}xindex> }}""" -def create_sparse_tensor( - filename: str, sparsity: Sequence[sparse_tensor.DimLevelType] -) -> Tuple[ctypes.c_void_p, np.ndarray]: +def create_sparse_tensor(filename: str, + sparsity: Sequence[sparse_tensor.DimLevelType], + type: str) -> Tuple[ctypes.c_void_p, np.ndarray]: """Creates an MLIR sparse tensor from the input file. Args: @@ -241,7 +261,7 @@ ValueError: If the shared library doesn't contain the needed routine. """ with ir.Context() as ctx, ir.Location.unknown(): - module = _get_create_sparse_tensor_kernel(sparsity) + module = _get_create_sparse_tensor_kernel(sparsity, type) module = ir.Module.parse(module) engine = compile_and_build_engine(module) @@ -265,7 +285,7 @@ # by using Python code to generate the kernel instead of doing MLIR text code # stitching. def _get_output_sparse_tensor_kernel( - sparsity_codes: Sequence[sparse_tensor.DimLevelType]) -> str: + sparsity_codes: Sequence[sparse_tensor.DimLevelType], type: str) -> str: """Creates an MLIR text kernel to output a sparse tensor to a file. The kernel returns void. @@ -285,16 +305,16 @@ #enc = #sparse_tensor.encoding<{{ dimLevelType = [ {sparsity} ] }}> -func @{_ENTRY_NAME}(%t: tensor<{shape}xf64, #enc>, %filename: !Ptr) +func @{_ENTRY_NAME}(%t: tensor<{shape}x{type}, #enc>, %filename: !Ptr) attributes {{ llvm.emit_c_interface }} {{ - sparse_tensor.out %t, %filename : tensor<{shape}xf64, #enc>, !Ptr + sparse_tensor.out %t, %filename : tensor<{shape}x{type}, #enc>, !Ptr std.return }}""" -def output_sparse_tensor( - tensor: ctypes.c_void_p, filename: str, - sparsity: Sequence[sparse_tensor.DimLevelType]) -> None: +def output_sparse_tensor(tensor: ctypes.c_void_p, filename: str, + sparsity: Sequence[sparse_tensor.DimLevelType], + type: str) -> None: """Outputs an MLIR sparse tensor to the given file. Args: @@ -303,13 +323,14 @@ a COO-flavored format. sparsity: A sequence of DimLevelType values, one for each dimension of the tensor. + type: The MLIR string for the data type. Raises: OSError: If there is any problem in loading the supporting C shared library. ValueError: If the shared library doesn't contain the needed routine. """ with ir.Context() as ctx, ir.Location.unknown(): - module = _get_output_sparse_tensor_kernel(sparsity) + module = _get_output_sparse_tensor_kernel(sparsity, type) module = ir.Module.parse(module) engine = compile_and_build_engine(module) diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_utils.py b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_utils.py --- a/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_utils.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_utils.py @@ -75,7 +75,7 @@ # Read the data from the file and construct an MLIR sparse tensor. sparse_tensor, o_shape = pytaco_utils.create_sparse_tensor( - file_name, sparsity_codes) + file_name, sparsity_codes, "f64") passed = 0