diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -232,7 +232,7 @@ Example: - ``` + ```mlir // Rank-reducing extract_slice. %1 = tensor.extract_slice %0[0, 0, 0][1, 16, 4][1, 1, 1] : tensor<8x16x4xf32> to tensor<16x4xf32> @@ -406,6 +406,93 @@ let hasFolder = 1; } +//===----------------------------------------------------------------------===// +// GatherOp +//===----------------------------------------------------------------------===// + +def Tensor_GatherOp : Tensor_Op<"gather", [ + NoSideEffect + ]> { + string summary = "gather a subset of a tensor at specified indices"; + string description = [{ + The `gather` operation extracts a subset of the elements from an + input tensor at the given indices. + + In its most general form, the tensor of indices specifies all the coordinates + of every element to extract (i.e. COO format). The indices are expected to be + confined to coordinate values that fit the range of the input tensor, + otherwise the behavior is undefined. + + Example: + + ```mlir + // For each 1x2 triple of coordinates in %indices, extract the + // element (i.e. 0-D subset) at the coordinates triple in %input. + // This corresponds to implicit COO coordinates = [0, 1, 2] + // + %out = gather %input[%indices] : + tensor<4x4x4xf32>[tensor<1x2x3xindex>] -> tensor<1x2xf32> + ``` + + A slice variant is provided that allows specifying whole slices of the input + tensor. + + Example: + + ```mlir + // For each 5x6 singleton of coordinates in %indices, extract the 2-D + // slice [:, 1, :] at the coordinates singleton in %input. + // + %out = gather %input[%indices] coordinates = [1] : + tensor<4x4x4xf32>[tensor<5x6x1xindex>] -> tensor<5x6x4x4xf32> + ``` + + An optional `unique` unit attribute may be specified to indicate that the + coordinates in `indices` are statically guaranteed to be unique at runtime. + Incorrectly setting the `unique` attribute when the coordinates are not truly + unique is undefined behavior. + + Only full slices are meant to be supported by this op, if one desires + partial slices one should compose this op with other tensor ops. + This is to avoid a slippery slope of complexity that would make the op + unusable in practice. + + At the tensor-level, the index tensor is specified in an SoA form (i.e. + coordinate tuple is the most minor). It is the responsibility of further + lowerings and bufferiation to implement various concrete layouts. + + Note: As currently specified, the operation must lower to an abstraction that + performs copies to the output tensor. This is because the buffer type system + is currently not rich enough to allow multiple non-contiguous views in the + same type. This is visible more clearly in a notional buffer version of the + op: + + ```mlir + // memref is a contiguous buffer of ?x4 elements, gather from + // random input slices must copy to the contiguous output. + %out = gather %input[%indices] coordinates = [1] : + memref<4x4xf32>[memref] -> memref + + // Nested buffer support would allow gather to view into the input data. + %out = gather %input[%indices] coordinates = [1] : + memref<4x4xf32>[memref] -> memref> + ``` + }]; + + let arguments = (ins AnyRankedTensor:$input, + AnyRankedTensor:$indices, + OptionalAttr:$coordinates, + UnitAttr:$unique); + let results = (outs AnyRankedTensor:$result); + + let assemblyFormat = [{ + $input `[` $indices `]` + (`unique` $unique^)? + attr-dict + `:` type($input) `[` type($indices) `]` `->` type($result) + }]; +} + //===----------------------------------------------------------------------===// // GenerateOp //===----------------------------------------------------------------------===// @@ -560,7 +647,7 @@ Example: - ``` + ```mlir // Rank-altering insert_slice. %1 = tensor.insert_slice %t into %0[0, 0, 0][1, 16, 4][1, 1, 1] : tensor<16x4xf32> into tensor<8x16x4xf32> @@ -1211,6 +1298,100 @@ let hasVerifier = 1; } + +//===----------------------------------------------------------------------===// +// ScatterOp +//===----------------------------------------------------------------------===// + +def Tensor_ScatterOp : Tensor_Op<"scatter", [ + NoSideEffect + ]> { + string summary = + "scatter a tensor into a destination tensor at specified indices"; + string description = [{ + The `scatter` operation inserts an input tensor into a destination tensor at + the given indices. + + In its most general form, the tensor of indices specifies all the coordinates + of every element to extract (i.e. COO format). The indices are expected to be + confined to coordinate values that fit the range of the `dest` tensor, + otherwise the behavior is undefined. + + Example: + + ```mlir + // For each 1x2 triple of coordinates in %indices, insert the + // element (i.e. 0-D subset) at the coordinates triple in %dest. + // This corresponds to implicit COO coordinates = [0, 1, 2] + // + %out = scatter %input into %dest[%indices] : + tensor<1x2xf32> into tensor<4x4x4xf32>[tensor<1x2x3xindex>] + -> tensor<4x4x4xf32> + ``` + + A slice variant is provided that allows specifying whole tensor slices. + + Example: + + ```mlir + // For each 5x6 singleton of coordinates in %indices, insert the 2-D + // slice [:, 1, :] at the coordinates singleton into %dest. + // + %out = scatter %input into %dest[%indices] coordinates = [1] : + tensor<5x6x4x4xf32> into tensor<4x4x4xf32>[tensor<5x6x1xindex>] + -> tensor<4x4x4xf32> + ``` + + An optional `unique` unit attribute may be specified to indicate that the + coordinates are statically guaranteed to be unique at runtime. Incorrectly + setting the `unique` attribute when the coordinates are not truly unique is + undefined behavior. + + Only full slices are meant to be supported by this op, if one desires + partial slices one should compose this op with other tensor ops. + This is to avoid a slippery slope of complexity that would make the op + unusable in practice. + + At the tensor-level, the index tensor is specified in an SoA form (i.e. + coordinate tuple is the most minor). It is the responsibility of further + lowerings and bufferiation to implement various concrete layouts. + + Note: As currently specified, the operation must lower to an abstraction that + performs copies to the output tensor. This is because the buffer type system + is currently not rich enough to allow multiple non-contiguous views in the + same type. This is visible more clearly in a notional buffer version of the + op: + + ```mlir + // memref is a contiguous buffer of ?x4 elements, scatter into + // random input slices must copy to the contiguous dest. + some_side_effecting_op_writing_into %input, ...: memref<4x4xf32> + scatter %input into %dest[%indices] coordinates = [1] : + memref<4x4xf32> into memref[memref] + + // Nested buffer support in the producing op would allow writing directly + // into the dest buffer. + %v = some_nested_buffer_view_op %dest[%indices] coordinates = [1] : + memref> + some_side_effecting_op_writing_into %v, ...: memref> + ``` + }]; + + let arguments = (ins AnyRankedTensor:$input, + AnyRankedTensor:$indices, + AnyRankedTensor:$dest, + OptionalAttr:$coordinates, + UnitAttr:$unique); + let results = (outs AnyRankedTensor:$result); + + let assemblyFormat = [{ + $input `into` $dest `[` $indices `]` + (`unique` $unique^)? + attr-dict + `:` type($input) `into` type($dest) `[` type($indices) `]` `->` type($result) + }]; +} + //===----------------------------------------------------------------------===// // SplatOp //===----------------------------------------------------------------------===//