diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -284,6 +284,11 @@ return getInputOperands(); } + bool payloadUsesValueFromOperand(OpOperand * opOperand) { + if (isOutput(opOperand)) return false; + return !getMatchingBlockArgument(opOperand).use_empty(); + } + static std::function)> getRegionBuilder() { diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -335,6 +335,70 @@ // ----- +func.func @map_binary(%lhs: tensor<64xf32>, %rhs: tensor<64xf32>, + %init: tensor<64xf32>) -> tensor<64xf32> { + %add = linalg.map + ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>) + outs(%init:tensor<64xf32>) + (%lhs_elem: f32, %rhs_elem: f32) { + %0 = arith.addf %lhs_elem, %rhs_elem: f32 + linalg.yield %0: f32 + } + func.return %add : tensor<64xf32> +} +// CHECK-LABEL: func @map_binary +// CHECK: linalg.map + +// ----- + +func.func @reduce(%input: tensor<16x32x64xf32>, + %init: tensor<16x64xf32>) -> tensor<16x64xf32> { + %reduce = linalg.reduce + ins(%input:tensor<16x32x64xf32>) + outs(%init:tensor<16x64xf32>) + dimensions = [1] + (%in: f32, %out: f32) { + %0 = arith.addf %in, %out: f32 + linalg.yield %0: f32 + } + func.return %reduce : tensor<16x64xf32> +} +// CHECK-LABEL: func @reduce +// CHECK: linalg.reduce + +// ----- + +func.func @variadic_reduce(%input1: tensor<16x32x64xf32>, + %init1: tensor<16x64xf32>, %input2: tensor<16x32x64xi64>, + %init2: tensor<16x64xi64>) -> (tensor<16x64xf32>, tensor<16x64xi64>) { + %reduce, %reduce2 = linalg.reduce + ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xi64>) + outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xi64>) + dimensions = [1] + (%in1: f32, %in2: i64, %out1: f32, %out2: i64) { + %0 = arith.addf %in1, %out1: f32 + %1 = arith.addi %in2, %out2: i64 + linalg.yield %0, %1: f32, i64 + } + func.return %reduce, %reduce2 : tensor<16x64xf32>, tensor<16x64xi64> +} +// CHECK-LABEL: func @variadic_reduce +// CHECK: linalg.reduce + +// ----- + +func.func @transpose(%input: tensor<16x32x64xf32>, + %init: tensor<32x64x16xf32>) -> tensor<32x64x16xf32> { + %transpose = linalg.transpose + ins(%input:tensor<16x32x64xf32>) + outs(%init:tensor<32x64x16xf32>) + permutation = [1, 2, 0] + func.return %transpose : tensor<32x64x16xf32> +} +// CHECK-LABEL: func @transpose + +// ----- + //===----------------------------------------------------------------------===// // AllocTensorOp elimination would produce SSA violations for the example below. //===----------------------------------------------------------------------===//