diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -213,12 +213,11 @@ if (parseCommonStructuredOpParts(parser, result, inputTypes, outputTypes)) return failure(); - // TODO: consider merging results parsing into region parsing. - // Need to wait for declarative assembly resolution to decide. - SmallVector outputTensorsTypes; - if (parseNamedStructuredOpResults(parser, outputTensorsTypes)) + if (outputTypes.empty()) return failure(); - result.addTypes(outputTensorsTypes); + + if (outputTypes.front().isa()) + result.addTypes(outputTypes); std::unique_ptr region = std::make_unique(); if (parseNamedStructuredOpRegion(parser, *region, numRegionArgs, inputTypes, @@ -250,9 +249,6 @@ // attributes. printCommonStructuredOpParts(p, inputs, outputs); - // Results printing. - printNamedStructuredOpResults(p, op->getResultTypes()); - // Region is elided. } diff --git a/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir b/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir --- a/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir +++ b/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir @@ -7,7 +7,7 @@ // CHECK-SAME: %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { // CHECK: %[[C0:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[INIT:.*]] = tensor.empty() : tensor<1x32x32x1xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) // CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]][0, 2, 2, 0] [1, 28, 28, 1] [1, 1, 1, 1] : tensor<1x28x28x1xf32> into tensor<1x32x32x1xf32> // CHECK: return %[[PADDED]] : tensor<1x32x32x1xf32> func.func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { @@ -32,7 +32,7 @@ // CHECK: %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32> // CHECK: %[[OUT_DIM3:.*]] = arith.addi %[[DIM3]], %[[OFFSET]] : index // CHECK: %[[INIT:.*]] = tensor.empty(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : tensor<4x?x?x?xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) // CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[OFFSET]], %[[C0]]] [4, %[[DIM1]], 2, %[[DIM3]]] [1, 1, 1, 1] : tensor<4x?x2x?xf32> into tensor<4x?x?x?xf32> // CHECK: return %[[PADDED]] : tensor<4x?x?x?xf32> // CHECK: } diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir @@ -4,8 +4,8 @@ func.func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) { // CHECK: [[C0:%.+]] = arith.constant 0 // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : f32) outs([[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : f32) outs([[INIT]] : tensor<1x5x6xf32>) + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) return %0 : tensor<1x5x6xf32> } @@ -17,10 +17,10 @@ func.func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) { // CHECK: [[C0:%.+]] = arith.constant 0 // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : i32) outs([[INIT]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> + // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : i32) outs([[INIT]] : tensor<1x5x6xi32>) // CHECK: [[ONE:%.+]] = arith.constant 1 // CHECK: [[TWO:%.+]] = arith.constant 2 - // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> + // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = #tosa.matmul_quant} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) return %0 : tensor<1x5x6xi32> } @@ -33,8 +33,8 @@ // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] // CHECK: %[[C0_0:.+]] = arith.constant 0 // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]]) - // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) outs(%[[INIT]] : tensor) -> tensor - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor, tensor) outs(%[[FILLED]] : tensor) -> tensor + // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) outs(%[[INIT]] : tensor) + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor, tensor) outs(%[[FILLED]] : tensor) %0 = "tosa.matmul"(%arg0, %arg1) : (tensor, tensor) -> (tensor) return %0 : tensor } @@ -47,8 +47,8 @@ // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]] // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]]) - // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> + // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x?xf32>) + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) return %0 : tensor<1x5x?xf32> } @@ -59,8 +59,8 @@ func.func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) { // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x6xf32>) + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) return %0 : tensor<1x5x6xf32> } @@ -78,7 +78,7 @@ // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]]) // CHECK: [[INITB:%.+]] = tensor.empty() - // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32> + // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32): // CHECK: [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32 @@ -103,7 +103,7 @@ // CHECK: [[INITB:%.+]] = tensor.empty() // CHECK: [[ONE:%.+]] = arith.constant 1 // CHECK: [[TWO:%.+]] = arith.constant 2 - // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32> + // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) outs([[INITB]] // CHECK: ^bb0([[IN1:%.+]]: i32, [[IN2:%.+]]: i32, [[UNUSED:%.+]]: i32): // CHECK: [[ADD:%.+]] = arith.addi @@ -127,7 +127,7 @@ // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]> // CHECK: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERM]]) // CHECK: %[[INITB:.+]] = tensor.empty(%[[DIM]]) - // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor, tensor<3x6xf32>) outs(%[[FILL]] : tensor) -> tensor + // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor, tensor<3x6xf32>) outs(%[[FILL]] : tensor) // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor) outs(%[[INITB]] : tensor) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32): // CHECK: %[[ADD:.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32 @@ -598,7 +598,7 @@ // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: index, %[[ARG4:[0-9a-zA-Z_]+]]: index, %[[ARG5:[0-9a-zA-Z_]+]]: index, %[[ARG6:[0-9a-zA-Z_]+]]: index): // CHECK: tensor.yield %cst : f32 // CHECK: } : tensor<2x?x?x3xf32> to tensor<2x?x?x3xf32> - // CHECK: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<[1, 2]> : tensor<2xi64>} ins(%[[PADDED]], %arg1 : tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>) outs(%{{.*}} : tensor<2x?x?x3x5xf32>) -> tensor<2x?x?x3x5xf32> + // CHECK: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<[1, 2]> : tensor<2xi64>} ins(%[[PADDED]], %arg1 : tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>) outs(%{{.*}} : tensor<2x?x?x3x5xf32>) // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[CONV]] {{\[}}[0], [1], [2], [3, 4]] %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = array, dilation = array, stride = array} : (tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>, tensor<15xf32>) -> tensor<2x?x?x15xf32> return @@ -617,7 +617,7 @@ // CHECK-DAG: %[[CONV3D:.+]] = linalg.conv_3d_ndhwc_dhwcf // CHECK-SAME: {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} // CHECK-SAME: ins(%arg0, %[[TRANSPOSE]] : tensor<1x49x48x47x27xf32>, tensor<3x4x5x27x28xf32>) - // CHECK-SAME: outs(%[[FILL]] : tensor<1x47x45x43x28xf32>) -> tensor<1x47x45x43x28xf32> + // CHECK-SAME: outs(%[[FILL]] : tensor<1x47x45x43x28xf32>) // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} // CHECK-SAME: ins(%arg2, %[[CONV3D]] : tensor<28xf32>, tensor<1x47x45x43x28xf32>) @@ -644,7 +644,7 @@ // CHECK-DAG: %[[CONV3D:.+]] = linalg.conv_3d_ndhwc_dhwcf_q // CHECK-SAME: {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} // CHECK-SAME: ins(%arg0, %[[TRANSPOSE]], %[[IZP]], %[[FZP]] : tensor<1x49x48x47x27xi8>, tensor<3x4x5x27x28xi8>, i32, i32) - // CHECK-SAME: outs(%[[FILL]] : tensor<1x47x45x43x28xi32>) -> tensor<1x47x45x43x28xi32> + // CHECK-SAME: outs(%[[FILL]] : tensor<1x47x45x43x28xi32>) // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} // CHECK-SAME: ins(%arg2, %[[CONV3D]] : tensor<28xi32>, tensor<1x47x45x43x28xi32>) diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir @@ -10,7 +10,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"] @@ -37,7 +37,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"] diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir @@ -20,7 +20,7 @@ // insert_slice. The pass replaces the tensor.empty with an out-of-place // extract_slice. %a = tensor.empty(%sz) : tensor - %f = linalg.fill ins(%f0 : f32) outs(%a : tensor) -> tensor + %f = linalg.fill ins(%f0 : f32) outs(%a : tensor) // CHECK: memref.copy %[[FUNC_ARG]], %[[ALLOC]] : memref to memref // CHECK: %[[SV0_ALLOC:.*]] = memref.subview %[[ALLOC]][0] [%[[sz]]] [1] : memref to memref> @@ -53,7 +53,7 @@ %a = tensor.empty(%sz) : tensor // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW]] : memref) -> tensor + %f = linalg.fill ins(%f0 : f32) outs(%a : tensor) // Self-copy canonicalizes away later. %r1 = tensor.insert_slice %f into %t[42][%sz][1]: tensor into tensor @@ -80,7 +80,7 @@ %f = arith.sitofp %iv_i32 : i32 to f32 // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[subview]] - %filled = linalg.fill ins(%f : f32) outs(%blank : tensor<5xf32>) -> tensor<5xf32> + %filled = linalg.fill ins(%f : f32) outs(%blank : tensor<5xf32>) // CHECK-NOT: memref.copy %inserted = tensor.insert_slice %filled into %bb[%iv][5][1] : tensor<5xf32> into tensor @@ -110,7 +110,7 @@ %f = arith.sitofp %iv_i32 : i32 to f32 // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[subview]] - %filled = linalg.fill ins(%f : f32) outs(%blank : tensor<5xf32>) -> tensor<5xf32> + %filled = linalg.fill ins(%f : f32) outs(%blank : tensor<5xf32>) // CHECK-NOT: memref.copy %inserted = tensor.insert_slice %filled into %bb[%idx][5][1] : tensor<5xf32> into tensor @@ -130,7 +130,7 @@ func.func @shape_mismatch(%t: tensor<5x6x128xf32>) -> tensor<5x6x128xf32> { %cst = arith.constant 8.0 : f32 %0 = tensor.empty() : tensor<128xf32> - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<128xf32>) -> tensor<128xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<128xf32>) %2 = tensor.expand_shape %1 [[0, 1, 2]] : tensor<128xf32> into tensor<1x1x128xf32> %3 = tensor.insert_slice %2 into %t[2, 3, 0][1, 1, 128][1, 1, 1] @@ -159,7 +159,7 @@ %a = tensor.empty(%sz) : tensor // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW]] : memref) -> tensor + %f = linalg.fill ins(%f0 : f32) outs(%a : tensor) // Self-copy canonicalizes away later. scf.foreach_thread.perform_concurrently { diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir @@ -140,7 +140,7 @@ // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[m1]] // CHECK: %[[filled_tensor:.*]] = bufferization.to_tensor %[[m1]] %t1 = bufferization.alloc_tensor() : tensor<10xf32> - %filled = linalg.fill ins(%cst : f32) outs(%t1 : tensor<10xf32>) -> tensor<10xf32> + %filled = linalg.fill ins(%cst : f32) outs(%t1 : tensor<10xf32>) // The transfer_write is out-of-place because "dummy_op" may read. // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10xf32> diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir @@ -57,7 +57,7 @@ func.func @main(%t: tensor, %sz: index, %idx: index) -> (f32, f32) { %cst = arith.constant 1.0 : f32 %0 = call @return_slice(%t, %sz) : (tensor, index) -> (tensor) - %filled = linalg.fill ins(%cst : f32) outs(%t : tensor) -> tensor + %filled = linalg.fill ins(%cst : f32) outs(%t : tensor) %r1 = tensor.extract %0[%idx] : tensor %r2 = tensor.extract %filled[%idx] : tensor return %r1, %r2 : f32, f32 diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir @@ -76,21 +76,18 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %C = linalg.matmul ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>) outs(%B: tensor<4x4xf32>) - -> tensor<4x4xf32> // matmul output operand interferes with input operand. // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>) outs(%B: tensor<4x4xf32>) - -> tensor<4x4xf32> // matmul output operand does not interferes with input operand. // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) outs(%B: tensor<4x4xf32>) - -> tensor<4x4xf32> // CHECK: return // CHECK-SAME: __equivalent_func_args__ = [-1, -1, 1] @@ -260,7 +257,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} @@ -292,7 +289,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} @@ -304,7 +301,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor) -> tensor + %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor) %3 = vector.transfer_read %1[%idx2], %cst2 : tensor, vector<5xf32> @@ -337,14 +334,12 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) outs(%B: tensor<4x4xf32>) - -> tensor<4x4xf32> // matmul output operand is inplaceable at the function boundary. // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) outs(%C: tensor<4x4xf32>) - -> tensor<4x4xf32> // CHECK: return // CHECK-SAME: __equivalent_func_args__ = [-1, 2] @@ -371,7 +366,6 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%B, %C: tensor, tensor) outs(%sB: tensor<4x4xf32>) - -> tensor<4x4xf32> // Step 2. %sC forward propagates to an inplace write in %E. // %sC backward propagates to %C which is inplaceable. @@ -386,7 +380,6 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>) outs(%sC: tensor<4x4xf32>) - -> tensor<4x4xf32> return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> } @@ -410,7 +403,7 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} - %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) outs(%15 : tensor) -> tensor + %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) outs(%15 : tensor) // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} @@ -451,7 +444,6 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %D = linalg.matmul ins(%B, %C: tensor, tensor) outs(%sB: tensor<4x4xf32>) - -> tensor<4x4xf32> // Step 2. %sC forward propagates to an inplace write in %E. // %sC backward propagates to %C which is inplaceable. @@ -466,7 +458,6 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) outs(%sC: tensor<4x4xf32>) - -> tensor<4x4xf32> return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> } @@ -504,7 +495,7 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]} %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32> + %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor into tensor @@ -527,7 +518,7 @@ %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor to tensor<4x?xf32> %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32> - %FB = linalg.fill ins(%f0 : f32) outs(%sssB : tensor<4x4xf32>) -> tensor<4x4xf32> + %FB = linalg.fill ins(%f0 : f32) outs(%sssB : tensor<4x4xf32>) %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32> %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor into tensor @@ -550,7 +541,7 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssC = tensor.extract_slice %sC[0, 0][%sz1, 4][1, 1] : tensor to tensor - %FC = linalg.fill ins(%f0 : f32) outs(%ssC : tensor) -> tensor + %FC = linalg.fill ins(%f0 : f32) outs(%ssC : tensor) %rsC = tensor.insert_slice %FC into %sC[0, 0][%sz2, 4][1, 1] : tensor into tensor %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor into tensor @@ -577,12 +568,12 @@ // cannot bufferize inplace. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} - %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> + %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) // 1. Bufferizes inplace: no alias to %A is yet possible. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> + %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) call @foo(%A) : (tensor<64xf32>) -> () call @foo(%B) : (tensor<64xf32>) -> () @@ -613,12 +604,12 @@ // bufferize inplace. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} - %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> + %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) // 4. Bufferizes inplace: no alias to %A is yet possible. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> + %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) // 3. Does not read or write, bufferizes inplace. // CHECK: scf.for @@ -638,12 +629,12 @@ // cannot bufferize inplace. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} - %A2 = linalg.fill ins(%f1 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32> + %A2 = linalg.fill ins(%f1 : f32) outs(%I2 : tensor<64xf32>) // 1. Bufferizes inplace: no alias to %A2 is yet possible. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %B2 = linalg.fill ins(%f2 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32> + %B2 = linalg.fill ins(%f2 : f32) outs(%I2 : tensor<64xf32>) call @bar(%A2) : (tensor<64xf32>) -> () call @bar(%B2) : (tensor<64xf32>) -> () @@ -688,8 +679,8 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> - %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> + %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) + %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} @@ -701,7 +692,7 @@ %sB = tensor.extract_slice %11[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32> %r = linalg.matmul ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>) - outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + outs(%arg2 : tensor<256x256xf32>) // CHECK: return // CHECK-SAME: __equivalent_func_args__ = [2] @@ -726,7 +717,7 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} // CHECK: vector.transfer_write // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] - %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> + %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) %9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> %10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> @@ -734,7 +725,7 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} // CHECK: vector.transfer_write // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] - %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> + %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) %12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> %13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> @@ -748,7 +739,7 @@ %sB = tensor.extract_slice %13[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32> %r = linalg.matmul ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>) - outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + outs(%arg2 : tensor<256x256xf32>) // CHECK: return // CHECK-SAME: __equivalent_func_args__ = [2] @@ -779,7 +770,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] - %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<62x90xf32>) -> tensor<62x90xf32> + %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<62x90xf32>) // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true"] diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir @@ -222,7 +222,7 @@ func.func @mini_test_case1() -> tensor<10x20xf32> { %f0 = arith.constant 0.0 : f32 %t = bufferization.alloc_tensor() : tensor<10x20xf32> - %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<10x20xf32>) -> tensor<10x20xf32> + %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<10x20xf32>) // expected-error @+1 {{operand #0 may return/yield a new buffer allocation}} return %r : tensor<10x20xf32> } diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -244,7 +244,7 @@ // CHECK-NOT: copy func.func @does_not_read(%t: tensor) -> tensor { %f0 = arith.constant 0.0 : f32 - %r = linalg.fill ins(%f0 : f32) outs(%t : tensor) -> tensor + %r = linalg.fill ins(%f0 : f32) outs(%t : tensor) return %r : tensor } @@ -417,11 +417,11 @@ %v0 = arith.constant 0.0 : f32 // CHECK-NEXT: linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref>) - %d = linalg.fill ins(%v0 : f32) outs(%c : tensor) -> tensor + %d = linalg.fill ins(%v0 : f32) outs(%c : tensor) // CHECK-NEXT: linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, strided<[?], offset: ?>>, memref<64xf32, strided<[?], offset: ?>>) outs(%[[C]] : memref>) %e = linalg.dot ins(%a, %b : tensor<64xf32>,tensor<64xf32>) - outs(%d: tensor) -> tensor + outs(%d: tensor) // CHECK-NEXT: return return %e : tensor @@ -449,9 +449,9 @@ // CHECK-DAG: linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>) // CHECK-DAG: linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>) // CHECK-DAG: linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref) - %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32> - %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32> - %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor) -> tensor + %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) + %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) + %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor) // CHECK-NEXT: call @init_and_dot(%[[cA]], %[[cB]], %[[cC]]) %res = call @init_and_dot(%AA, %BB, %CC) : diff --git a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir @@ -114,7 +114,7 @@ // CHECK-SAME: %[[C:.*]]: memref<12x6xf32>) -> memref<12x6xf32> { func.func @matmul(%A: tensor<12x9xf32>, %B: tensor<9x6xf32>, %C: tensor<12x6xf32>) -> tensor<12x6xf32> { // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) outs(%[[C]] : memref<12x6xf32>) - %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) outs(%C: tensor<12x6xf32>) -> tensor<12x6xf32> + %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) outs(%C: tensor<12x6xf32>) // CHECK: return %[[C]] : memref<12x6xf32> return %D : tensor<12x6xf32> } diff --git a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir --- a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir +++ b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir @@ -126,7 +126,7 @@ %name = gpu.launch async[%stream] blocks(%arg3, %arg4, %arg5) in (%arg9 = %one, %arg10 = %one, %arg11 = %one) threads(%arg6, %arg7, %arg8) in (%arg12 = %one, %arg13 = %one, %arg14 = %one) { - %t = linalg.matmul ins(%x, %y: tensor<32x32xf32>, tensor<32x32xf32>) outs(%z : tensor<32x32xf32>) -> tensor<32x32xf32> + %t = linalg.matmul ins(%x, %y: tensor<32x32xf32>, tensor<32x32xf32>) outs(%z : tensor<32x32xf32>) gpu.terminator } return diff --git a/mlir/test/Dialect/LLVM/transform-e2e.mlir b/mlir/test/Dialect/LLVM/transform-e2e.mlir --- a/mlir/test/Dialect/LLVM/transform-e2e.mlir +++ b/mlir/test/Dialect/LLVM/transform-e2e.mlir @@ -8,7 +8,6 @@ // CHECK: llvm.intr.fmuladd{{.*}} %0 = linalg.matmul ins(%arg0, %arg1: tensor<2x4xf32>, tensor<4x6xf32>) outs(%arg2: tensor<2x6xf32>) - -> tensor<2x6xf32> return %0 : tensor<2x6xf32> } diff --git a/mlir/test/Dialect/Linalg/affine.mlir b/mlir/test/Dialect/Linalg/affine.mlir --- a/mlir/test/Dialect/Linalg/affine.mlir +++ b/mlir/test/Dialect/Linalg/affine.mlir @@ -10,7 +10,7 @@ %B = memref.view %arg0[%c0][%K, %N] : memref to memref %C = memref.view %arg0[%c0][%M, %N] : memref to memref linalg.matmul ins(%A, %B: memref, memref) - outs(%C: memref) + outs(%C: memref) return } @@ -19,7 +19,7 @@ //----------------------------------------------------------------------------// func.func @named_batch_matmul(%A: memref, %B: memref, %C: memref) { linalg.batch_matmul ins(%A, %B: memref, memref) - outs(%C : memref) + outs(%C : memref) return } // CHECK-LABEL: @named_batch_matmul diff --git a/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir b/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir --- a/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir +++ b/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir @@ -112,7 +112,7 @@ %lhs = arith.constant dense<1.0> : tensor<4x4xf32> %rhs = arith.constant dense<1.0> : tensor<4x4xf32> %dst = arith.constant dense<[[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0], [8.0, 9.0, 10.0, 11.0], [12.0, 13.0, 14.0, 15.0]]> : tensor<4x4xf32> - %0 = linalg.matmul ins(%lhs, %rhs : tensor<4x4xf32>, tensor<4x4xf32>) outs(%dst : tensor<4x4xf32>) -> tensor<4x4xf32> + %0 = linalg.matmul ins(%lhs, %rhs : tensor<4x4xf32>, tensor<4x4xf32>) outs(%dst : tensor<4x4xf32>) %1 = tensor.extract_slice %0[1,1][2,2][1,1] : tensor<4x4xf32> to tensor<2x2xf32> return %1 : tensor<2x2xf32> } @@ -121,7 +121,7 @@ // CHECK: %[[SLICE0:.+]] = arith.constant dense<1.000000e+00> : tensor<2x4xf32> // CHECK: %[[SLICE1:.+]] = arith.constant dense<1.000000e+00> : tensor<4x2xf32> // CHECK: %[[SLICE3:.+]] = tensor.extract_slice %[[CST:.+]][1, 1] [2, 2] [1, 1] : tensor<4x4xf32> to tensor<2x2xf32> -// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%[[SLICE0]], %[[SLICE1]] : tensor<2x4xf32>, tensor<4x2xf32>) outs(%[[SLICE3]] : tensor<2x2xf32>) -> tensor<2x2xf32> +// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%[[SLICE0]], %[[SLICE1]] : tensor<2x4xf32>, tensor<4x2xf32>) outs(%[[SLICE3]] : tensor<2x2xf32>) // CHECK: return %[[MATMUL]] : tensor<2x2xf32> //----- @@ -136,12 +136,12 @@ %cst = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<1x112x112x32xf32> - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) %conv = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) - outs(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> + outs(%fill : tensor<1x112x112x32xf32>) %slice = tensor.extract_slice %conv [0, 64, 64, 16] [1, 32, 32, 16] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x32x32x16xf32> @@ -153,8 +153,8 @@ // CHECK: %[[SLICE0:.+]] = tensor.extract_slice %arg0[0, 128, 128, 0] [1, 65, 65, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> to tensor<1x65x65x3xf32> // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[0, 0, 0, 16] [3, 3, 3, 16] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x16xf32> // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[INIT]][0, 64, 64, 16] [1, 32, 32, 16] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x32x32x16xf32> -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST:.+]] : f32) outs(%[[SLICE2]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32> -// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%[[SLICE0]], %[[SLICE1]] : tensor<1x65x65x3xf32>, tensor<3x3x3x16xf32>) outs(%[[FILL]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32> +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST:.+]] : f32) outs(%[[SLICE2]] : tensor<1x32x32x16xf32>) +// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%[[SLICE0]], %[[SLICE1]] : tensor<1x65x65x3xf32>, tensor<3x3x3x16xf32>) outs(%[[FILL]] : tensor<1x32x32x16xf32>) // CHECK: return %[[CONV]] : tensor<1x32x32x16xf32> //----- @@ -163,7 +163,7 @@ func.func @rank_reducing_slice(%width : index) -> tensor<1x1x1x?xf32> { %cst = arith.constant 1.000000e+00 : f32 %init = tensor.empty(%width) : tensor<1x?xf32> - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x?xf32>) -> tensor<1x?xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x?xf32>) %slice = tensor.extract_slice %fill[0, 0] [1, %width] [1, 1] : tensor<1x?xf32> to tensor %expand = tensor.expand_shape %slice [[0, 1, 2, 3]] : tensor into tensor<1x1x1x?xf32> return %expand : tensor<1x1x1x?xf32> diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir --- a/mlir/test/Dialect/Linalg/bufferize.mlir +++ b/mlir/test/Dialect/Linalg/bufferize.mlir @@ -170,7 +170,7 @@ // CHECK: linalg.fill ins(%cst : f32) outs(%[[ALLOC]] : memref) // CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref // CHECK: return %[[TENSOR]] - %0 = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor) -> tensor + %0 = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor) return %0 : tensor } @@ -179,7 +179,7 @@ // CHECK-LABEL: func @bufferize_dot func.func @bufferize_dot(%in: tensor<4xf32>, %out: tensor) -> tensor { %dot = linalg.dot ins(%in, %in : tensor<4xf32>, tensor<4xf32>) - outs(%out : tensor) -> tensor + outs(%out : tensor) return %dot : tensor // CHECK: %[[ALLOC:.*]] = memref.alloc // TODO: The copy is not necessary. diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -56,9 +56,9 @@ %tc = tensor.cast %c : tensor<3x?xf32> to tensor // CHECK: linalg.matmul ins({{.*}}tensor<3x4xf32>, tensor<4x?xf32>) - // CHECK-SAME: outs({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32> + // CHECK-SAME: outs({{.*}}tensor<3x?xf32>) %0 = linalg.matmul ins(%ta, %tb: tensor, tensor) - outs(%tc: tensor) -> tensor + outs(%tc: tensor) %1 = tensor.cast %0 : tensor to tensor<3x?xf32> @@ -79,9 +79,9 @@ %tc = tensor.cast %c : tensor<*xf32> to tensor // CHECK: linalg.matmul ins({{.*}}tensor, tensor) - // CHECK-SAME: outs({{.*}}tensor) -> tensor + // CHECK-SAME: outs({{.*}}tensor) %0 = linalg.matmul ins(%ta, %tb: tensor, tensor) - outs(%tc: tensor) -> tensor + outs(%tc: tensor) // CHECK: tensor.cast %1 = tensor.cast %0 : tensor to tensor<*xf32> @@ -98,7 +98,7 @@ func.func @linalg_effects(%a : tensor, %b : memref, %c : tensor) { // CHECK-NOT: %{{.*}} = linalg.matmul %t = linalg.matmul ins(%a, %b : tensor, memref) - outs(%c : tensor) -> tensor + outs(%c : tensor) // CHECK: linalg.matmul linalg.matmul ins(%a, %c : tensor, tensor) @@ -247,9 +247,9 @@ %c0_i32 = arith.constant 0 : i32 %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%c0_i32 : i32) outs(%arg0 : tensor<7x7xi32>) -> tensor<7x7xi32> + %0 = linalg.fill ins(%c0_i32 : i32) outs(%arg0 : tensor<7x7xi32>) %1 = linalg.matmul ins(%arg1, %arg1: tensor<7x7xf32>, tensor<7x7xf32>) - outs(%arg1: tensor<7x7xf32>) -> tensor<7x7xf32> + outs(%arg1: tensor<7x7xf32>) %2 = linalg.generic #trait outs(%arg0 : tensor<7x7xi32>) { ^bb(%3: i32) : linalg.yield %3 : i32 @@ -270,7 +270,7 @@ %c21 = arith.constant 21 : index %c42 = arith.constant 42 : index %0 = tensor.empty(%c21, %c42) : tensor - %1 = linalg.fill ins(%arg1 : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%arg1 : f32) outs(%0 : tensor) %2 = tensor.dim %arg0, %c0 : tensor %3 = tensor.dim %arg0, %c1 : tensor %4 = tensor.insert_slice %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor into tensor @@ -303,7 +303,7 @@ // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape // CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) // CHECK-SAME: outs(%[[COLLAPSE]] : tensor<6x4xf32>) - %fill = linalg.fill ins(%zero : f32) outs(%empty : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32> + %fill = linalg.fill ins(%zero : f32) outs(%empty : tensor<1x2x3x4xf32>) %reshape = tensor.collapse_shape %fill [[0, 1, 2], [3]] : tensor<1x2x3x4xf32> into tensor<6x4xf32> // CHECK: return %[[FILL]] : tensor<6x4xf32> @@ -317,7 +317,7 @@ func.func @fold_fill_reshape_dynamic(%arg0 : tensor) -> tensor { %zero = arith.constant 0.0 : f32 // CHECK: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] - %0 = linalg.fill ins(%zero : f32) outs(%arg0 : tensor) -> tensor + %0 = linalg.fill ins(%zero : f32) outs(%arg0 : tensor) // CHECK: %[[RESULT:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[RESHAPE]] %1 = tensor.collapse_shape %0 [[0, 1, 2], [3, 4]] : tensor into tensor @@ -351,7 +351,7 @@ func.func @fold_static_pad_fill() -> tensor<412x276xf32> { %f0 = arith.constant 0.0 : f32 %empty = tensor.empty() : tensor<400x273xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32> + %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) %pad = tensor.pad %fill low[4, 1] high[8, 2] { ^bb0(%arg1: index, %arg2: index): tensor.yield %f0 : f32 @@ -382,7 +382,7 @@ // CHECK: return %[[FILL]] func.func @fold_dynamic_pad_fill(%empty: tensor<8x?x16x32xf32>, %low0: index, %low3: index, %high2: index, %high3: index) -> tensor { %f0 = arith.constant 0.0 : f32 - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32> + %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x?x16x32xf32>) %pad = tensor.pad %fill low[%low0, 8, 7, %low3] high[1, 2, %high2, %high3] { ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): tensor.yield %f0 : f32 @@ -397,7 +397,7 @@ %f0 = arith.constant 0.0 : f32 %f1 = arith.constant 1.0 : f32 %empty = tensor.empty() : tensor<400x273xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32> + %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) // CHECK: tensor.pad %pad = tensor.pad %fill low[4, 1] high[8, 2] { ^bb0(%arg1: index, %arg2: index): @@ -584,7 +584,7 @@ tensor.yield %f0 : f32 } : tensor to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) %0 = tensor.insert_slice %pad into %fill[0, 1, 2] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> return %0: tensor<8x384x384xf32> } @@ -605,7 +605,7 @@ tensor.yield %f0 : f32 } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) %0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %1 = tensor.insert_slice %a into %0 [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %2 = tensor.insert_slice %pad into %1 [0, 0, 256] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> @@ -624,7 +624,7 @@ tensor.yield %f0 : f32 } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) %0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %1 = tensor.insert_slice %a into %0 [0, 0, 129] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> // Range overlap with %1 at dim#3 @@ -644,7 +644,7 @@ tensor.yield %f0 : f32 } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) %0 = tensor.insert_slice %a into %fill[0, 0, %offset] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %1 = tensor.insert_slice %a into %0 [0, 128, 255] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> // Range overlap with %0 at dim#3 @@ -664,7 +664,7 @@ tensor.yield %f0 : f32 } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) // Overlap btween %0 and %1 is fine but not with %2 is fine. // CHECK-COUNT-3: tensor.insert_slice %0 = tensor.insert_slice %a into %fill[0, 0, %offset] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> @@ -687,7 +687,7 @@ } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> // Different filling value than padding value. - %fill = linalg.fill ins(%f1 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f1 : f32) outs(%empty : tensor<8x384x384xf32>) %0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %1 = tensor.insert_slice %a into %0 [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %2 = tensor.insert_slice %pad into %1 [0, 0, 256] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> @@ -699,7 +699,7 @@ func.func @fold_linalgop_with_cast_consumer(%arg0 : tensor, %arg1 : tensor, %arg2 : tensor) -> (tensor<4x8xf32>, tensor) { %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + outs(%arg2 : tensor) %1 = tensor.cast %0 : tensor to tensor<4x8xf32> return %1, %0 : tensor<4x8xf32>, tensor } @@ -723,7 +723,7 @@ func.func @linalgop_with_cond_cast_consumer(%arg0 : tensor, %arg1 : tensor, %arg2 : tensor, %arg3 : i1) -> tensor { %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + outs(%arg2 : tensor) scf.if %arg3 { %1 = tensor.cast %0 : tensor to tensor<4x8xf32> func.call @some_use(%1) : (tensor<4x8xf32>) -> () @@ -735,7 +735,7 @@ // CHECK-LABEL: func @linalgop_with_cond_cast_consumer // CHECK-SAME: (%[[ARG0:.*]]: tensor, %[[ARG1:.*]]: tensor, %[[ARG2:.*]]: tensor, %[[ARG3:.*]]: i1) // CHECK: %[[RES:.*]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]] : tensor, tensor) -// CHECK-SAME: outs(%[[ARG2]] : tensor) -> tensor +// CHECK-SAME: outs(%[[ARG2]] : tensor) // CHECK: scf.if %[[ARG3]] { // CHECK: %[[CAST:.*]] = tensor.cast %[[RES]] : tensor to tensor<4x8xf32> // CHECK: func.call @some_use(%[[CAST]]) : (tensor<4x8xf32>) -> () @@ -749,7 +749,7 @@ %arg1 : tensor, %arg2 : tensor) -> (tensor<4x8x12x16xf32>, tensor) { %0 = linalg.conv_2d_nchw_fchw ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + outs(%arg2 : tensor) %1 = tensor.cast %0 : tensor to tensor<4x8x12x16xf32> return %1, %0 : tensor<4x8x12x16xf32>, tensor } diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir --- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir +++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir @@ -313,7 +313,7 @@ func.func @fold_unit_dim_for_empty_tensor(%input: tensor<1x1000xf32>) -> tensor<1xf32> { %cst = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<1xf32> - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1xf32>) -> tensor<1xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1xf32>) %add = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} @@ -334,7 +334,7 @@ // CHECK: %[[INPUT_RESHAPE:.+]] = tensor.collapse_shape %{{.+}} {{\[}}[0, 1]] : tensor<1x1000xf32> into tensor<1000xf32> // CHECK: %[[INIT:.+]] = tensor.empty() : tensor -// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor) -> tensor +// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor) // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["reduction"] @@ -378,7 +378,7 @@ %c3 = arith.constant 3 : index %0 = tensor.dim %arg0, %c3 : tensor<1x?x1x?xf32> %1 = tensor.empty(%0) : tensor<1x?xf32> - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x?xf32>) -> tensor<1x?xf32> + %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x?xf32>) %3 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], @@ -412,7 +412,7 @@ %cst = arith.constant 1.000000e+00 : f32 %c3 = arith.constant 3 : index %1 = tensor.empty() : tensor<1x1xf32> - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32> + %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) %3 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], @@ -447,7 +447,7 @@ %c2 = arith.constant 2 : index %0 = tensor.dim %arg0, %c2 : tensor %1 = tensor.empty(%0) : tensor - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor) -> tensor + %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor) %3 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], @@ -882,7 +882,7 @@ %0 = tensor.empty() : tensor<4x2xf32> %res = scf.foreach_thread (%arg0, %arg1) in (%c4, %c2) shared_outs(%o = %0) -> (tensor<4x2xf32>) { %1 = tensor.empty() : tensor<1x1xf32> - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32> + %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) scf.foreach_thread.perform_concurrently { // CHECK: tensor.parallel_insert_slice %{{[0-9a-z]*}} into %{{[0-9a-z]*}} // CHECK-SAME: [%{{.*}}, %{{.*}}] [1, 1] [1, 1] : tensor into tensor<4x2xf32> diff --git a/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir b/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir --- a/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir +++ b/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir @@ -214,7 +214,7 @@ func.func @loop_dim_operand(%arg0 : tensor) -> tensor { %cst = arith.constant 0 : i32 %init = tensor.empty() : tensor - %fill = linalg.fill ins(%cst : i32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : i32) outs(%init : tensor) %0 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], iterator_types = ["reduction"]} @@ -239,7 +239,7 @@ %cst = arith.constant 0 : i32 %init1 = tensor.empty(%arg0) : tensor %init = tensor.empty() : tensor - %fill = linalg.fill ins(%cst : i32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : i32) outs(%init : tensor) %0:2 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], iterator_types = ["parallel"]} diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir --- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir +++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir @@ -934,7 +934,7 @@ linalg.yield %arg2 : f32 } -> tensor %6 = tensor.empty(%arg1) : tensor - %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor) -> tensor + %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor) %8 = linalg.generic { indexing_maps = [#map2, #map3], iterator_types = ["parallel", "reduction"] @@ -1005,7 +1005,7 @@ %cst = arith.constant 7.0 : f32 %0 = tensor.dim %arg0, %c0 : tensor %1 = tensor.empty(%0) : tensor - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor) -> tensor + %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor) %3 = tensor.empty(%0) : tensor %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %2 : tensor, tensor) outs (%3:tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): @@ -1029,7 +1029,7 @@ %cst = arith.constant 7.0 : f32 %0 = tensor.dim %arg0, %c0 : tensor %1 = tensor.empty(%0) : tensor - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor) -> tensor + %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor) %3 = tensor.empty(%0) : tensor %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %2 : tensor, tensor) outs (%3:tensor) { ^bb0(%arg1: f16, %arg2: f16, %arg3: f16): @@ -1056,9 +1056,9 @@ %0 = tensor.dim %arg0, %c0 : tensor %1 = tensor.dim %arg0, %c1 : tensor %2 = tensor.empty(%0, %1) : tensor - %3 = linalg.fill ins(%cst1 : f32) outs(%2 : tensor) -> tensor + %3 = linalg.fill ins(%cst1 : f32) outs(%2 : tensor) %4 = tensor.empty(%1, %0) : tensor - %5 = linalg.fill ins(%cst2 : f32) outs(%4 : tensor) -> tensor + %5 = linalg.fill ins(%cst2 : f32) outs(%4 : tensor) %6 = tensor.empty(%0, %1) : tensor %7 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%3, %5 : tensor, tensor) outs (%6:tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir --- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir @@ -31,7 +31,7 @@ func.func @generalize_matmul_tensor(%A : tensor<16x8xf32>, %B: tensor<8x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xf32>, tensor<8x32xf32>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + outs(%C: tensor<16x32xf32>) return %0: tensor<16x32xf32> } @@ -54,7 +54,7 @@ %C: tensor<16x32xcomplex>) -> tensor<16x32xcomplex> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xcomplex>, tensor<8x32xcomplex>) - outs(%C: tensor<16x32xcomplex>) -> tensor<16x32xcomplex> + outs(%C: tensor<16x32xcomplex>) return %0: tensor<16x32xcomplex> } diff --git a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir --- a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir @@ -3,7 +3,7 @@ // Verifies that different argument types is legal. func.func @generalize_matmul_tensor_f16f64f32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + outs(%C: tensor<16x32xf32>) return %0: tensor<16x32xf32> } @@ -22,7 +22,7 @@ // Verifies that different argument types is legal. func.func @generalize_matmul_tensor_i16i64i32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + outs(%C: tensor<16x32xi32>) return %0: tensor<16x32xi32> } @@ -43,7 +43,7 @@ func.func @generalize_matmul_tensor_i16i64i32_unsigned(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul {cast = #linalg.type_fn} ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + outs(%C: tensor<16x32xi32>) return %0: tensor<16x32xi32> } @@ -54,7 +54,7 @@ func.func @generalize_matmul_tensor_i16i64f32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + outs(%C: tensor<16x32xf32>) return %0: tensor<16x32xf32> } @@ -67,7 +67,7 @@ func.func @generalize_matmul_tensor_f16f64i32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + outs(%C: tensor<16x32xi32>) return %0: tensor<16x32xi32> } @@ -80,7 +80,7 @@ func.func @generalize_matmul_unsigned_tensor_i16i64i32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + outs(%C: tensor<16x32xi32>) return %0: tensor<16x32xi32> } @@ -93,7 +93,7 @@ func.func @generalize_matmul_unsigned_tensor_i16i64f32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + outs(%C: tensor<16x32xf32>) return %0: tensor<16x32xf32> } @@ -106,7 +106,7 @@ func.func @generalize_matmul_unsigned_tensor_f16f64i32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + outs(%C: tensor<16x32xi32>) return %0: tensor<16x32xi32> } @@ -119,7 +119,7 @@ func.func @generalize_pooling_nhwc_max_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { %0 = linalg.pooling_nhwc_max {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> + ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) return %0: tensor<1x2x4x1xf32> } @@ -133,7 +133,7 @@ func.func @generalize_pooling_nwc_max_f32(%input : tensor<1x16x1xf32>, %shape: tensor<2xf32>, %output: tensor<1x4x1xf32>) -> tensor<1x4x1xf32> { %0 = linalg.pooling_nwc_max {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32> + ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) return %0: tensor<1x4x1xf32> } @@ -147,7 +147,7 @@ func.func @generalize_pooling_nhwc_max_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_max {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) return %0: tensor<1x2x4x1xi32> } @@ -159,7 +159,7 @@ func.func @generalize_pooling_nwc_max_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { %0 = linalg.pooling_nwc_max {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) return %0: tensor<1x4x1xi32> } @@ -171,7 +171,7 @@ func.func @generalize_pooling_nhwc_max_unsigned_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_max_unsigned {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) return %0: tensor<1x2x4x1xi32> } @@ -183,7 +183,7 @@ func.func @generalize_pooling_nwc_max_unsigned_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { %0 = linalg.pooling_nwc_max_unsigned {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) return %0: tensor<1x4x1xi32> } @@ -195,7 +195,7 @@ func.func @generalize_pooling_nhwc_min_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { %0 = linalg.pooling_nhwc_min {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> + ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) return %0: tensor<1x2x4x1xf32> } @@ -209,7 +209,7 @@ func.func @generalize_pooling_nwc_min_f32(%input : tensor<1x16x1xf32>, %shape: tensor<2xf32>, %output: tensor<1x4x1xf32>) -> tensor<1x4x1xf32> { %0 = linalg.pooling_nwc_min {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32> + ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) return %0: tensor<1x4x1xf32> } @@ -223,7 +223,7 @@ func.func @generalize_pooling_nhwc_min_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_min {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) return %0: tensor<1x2x4x1xi32> } @@ -235,7 +235,7 @@ func.func @generalize_pooling_nwc_min_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { %0 = linalg.pooling_nwc_min {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) return %0: tensor<1x4x1xi32> } @@ -247,7 +247,7 @@ func.func @generalize_pooling_nhwc_min_unsigned_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_min_unsigned {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) return %0: tensor<1x2x4x1xi32> } @@ -259,7 +259,7 @@ func.func @generalize_pooling_nwc_min_unsigned_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { %0 = linalg.pooling_nwc_min_unsigned {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) return %0: tensor<1x4x1xi32> } @@ -271,7 +271,7 @@ func.func @generalize_pooling_nhwc_sum_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { %0 = linalg.pooling_nhwc_sum {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> + ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) return %0: tensor<1x2x4x1xf32> } @@ -285,7 +285,7 @@ func.func @generalize_pooling_nwc_sum_f32(%input : tensor<1x16x1xf32>, %shape: tensor<2xf32>, %output: tensor<1x4x1xf32>) -> tensor<1x4x1xf32> { %0 = linalg.pooling_nwc_sum {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32> + ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) return %0: tensor<1x4x1xf32> } @@ -299,7 +299,7 @@ func.func @generalize_pooling_nhwc_sum_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_sum {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) return %0: tensor<1x2x4x1xi32> } @@ -313,7 +313,7 @@ func.func @generalize_pooling_nwc_sum_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { %0 = linalg.pooling_nwc_sum {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) return %0: tensor<1x4x1xi32> } @@ -326,7 +326,7 @@ // ----- func.func @generalize_fill_0d(%value: f64, %O: tensor) -> tensor { - %0 = linalg.fill ins(%value: f64) outs(%O : tensor) -> tensor + %0 = linalg.fill ins(%value: f64) outs(%O : tensor) return %0: tensor } @@ -355,7 +355,7 @@ // ----- func.func @generalize_index(%min: f64, %max: f64, %seed: i32, %O: tensor<16x32xf32>) -> tensor<16x32xf32> { - %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) outs(%O : tensor<16x32xf32>) -> tensor<16x32xf32> + %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) outs(%O : tensor<16x32xf32>) return %0: tensor<16x32xf32> } @@ -368,7 +368,7 @@ // ----- func.func @generalize_const(%min: f64, %max: f64, %seed: i32, %O: tensor<16x32xf32>) -> tensor<16x32xf32> { - %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) outs(%O : tensor<16x32xf32>) -> tensor<16x32xf32> + %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) outs(%O : tensor<16x32xf32>) return %0: tensor<16x32xf32> } @@ -381,7 +381,7 @@ // Verifies the default value of the fun attribute is an exp op. func.func @generalize_elemwise_exp(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { - %0 = linalg.elemwise_unary ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + %0 = linalg.elemwise_unary ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) return %0: tensor<4x8xf32> } @@ -393,7 +393,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_log(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) return %0: tensor<4x8xf32> } @@ -405,7 +405,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_abs(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) return %0: tensor<4x8xf32> } @@ -417,7 +417,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_ceil(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) return %0: tensor<4x8xf32> } @@ -429,7 +429,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_floor(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) return %0: tensor<4x8xf32> } @@ -441,7 +441,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_negf(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) return %0: tensor<4x8xf32> } @@ -453,7 +453,7 @@ // Verifies the default value of the fun attribute is an add op. func.func @generalize_elemwise_add(%lhs : tensor<4x8xf32>, %rhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_binary ins(%lhs, %rhs: tensor<4x8xf32>, tensor<4x8xf32>) - outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + outs(%output: tensor<4x8xf32>) return %0: tensor<4x8xf32> } @@ -466,7 +466,7 @@ func.func @generalize_elemwise_mul(%lhs : tensor<4x8xf32>, %rhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_binary {fun = #linalg.binary_fn} ins(%lhs, %rhs: tensor<4x8xf32>, tensor<4x8xf32>) - outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + outs(%output: tensor<4x8xf32>) return %0: tensor<4x8xf32> } @@ -479,7 +479,7 @@ func.func @generalize_elemwise_rank_zero(%lhs : tensor, %rhs : tensor, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_binary {fun = #linalg.binary_fn} ins(%lhs, %rhs: tensor, tensor) - outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + outs(%output: tensor<4x8xf32>) return %0: tensor<4x8xf32> } @@ -492,7 +492,7 @@ // Verifies the fun attribute controls the binary function used. func.func @generalize_copy(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { - %0 = linalg.copy ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + %0 = linalg.copy ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) return %0: tensor<4x8xf32> } diff --git a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir --- a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir +++ b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir @@ -4,7 +4,7 @@ // CHECK-SAME: %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { // CHECK: %[[C0:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[INIT:.*]] = tensor.empty() : tensor<1x32x32x1xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) // CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]][0, 2, 2, 0] [1, 28, 28, 1] [1, 1, 1, 1] : tensor<1x28x28x1xf32> into tensor<1x32x32x1xf32> // CHECK: return %[[PADDED]] : tensor<1x32x32x1xf32> func.func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { @@ -29,7 +29,7 @@ // CHECK: %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32> // CHECK: %[[OUT_DIM3:.*]] = arith.addi %[[DIM3]], %[[OFFSET]] : index // CHECK: %[[INIT:.*]] = tensor.empty(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : tensor<4x?x?x?xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) // CHECK: %[[DIM1_1:.*]] = tensor.dim %[[IN]], %[[C1]] : tensor<4x?x2x?xf32> // CHECK: %[[DIM3_1:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32> // CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[OFFSET]], %[[C0]]] [4, %[[DIM1_1]], 2, %[[DIM3_1]]] [1, 1, 1, 1] : tensor<4x?x2x?xf32> into tensor<4x?x?x?xf32> diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -304,54 +304,14 @@ // ----- -func.func @incorrect_region_arg_count(%m: memref) { - // expected-error @+3 {{region expects 3 args, got 2}} - %res = linalg.matmul ins(%m, %m : memref, memref) - -> (tensor, tensor) +func.func @incorrect_region_arg_count(%m: tensor) { + // expected-error @+2 {{region expects 3 args, got 2}} + %res = linalg.matmul outs(%m, %m : tensor, tensor) return } // ----- -func.func @matching_inits(%m: memref, %t: tensor) { - // expected-error @+1 {{expected type of operand #2 ('tensor') to match type of corresponding result ('tensor')}} - %res = linalg.matmul ins(%m, %m : memref, memref) - outs(%t : tensor) - -> tensor - return -} - -// ----- - -func.func @illegal_fill_tensor_no_return(%arg0 : index, %arg1 : index, %arg2 : f32) -{ - %0 = tensor.empty(%arg0, %arg1) : tensor - // expected-error @+1 {{expected the number of results (0) to be equal to the number of output tensors (1)}} - linalg.fill ins(%arg2 : f32) outs(%0 : tensor) -} - -// ----- - -func.func @illegal_fill_memref_with_tensor_return - (%arg0 : memref, %arg1 : f32) -> tensor -{ - // expected-error @+1 {{expected the number of results (1) to be equal to the number of output tensors (0)}} - %0 = linalg.fill ins(%arg1 : f32) outs(%arg0 : memref) -> tensor - return %0 : tensor -} - -// ----- - -func.func @illegal_fill_tensor_with_memref_return - (%arg0 : tensor, %arg1 : f32) -> memref -{ - // expected-error @+1 {{result #0 must be ranked tensor of any type values, but got 'memref'}} - %0 = linalg.fill ins(%arg1 : f32) outs(%arg0 : tensor) -> memref - return %0 : memref -} - -// ----- - func.func @invalid_static_matmul(%arg0: memref<2x4xf32>, %arg1: memref<3x4xf32>, %arg2: memref<2x4xf32>) { // expected-error @+1 {{inferred input/output operand #1 has shape's dimension #0 to be 4, but found 3}} linalg.matmul ins(%arg0, %arg1 : memref<2x4xf32>, memref<3x4xf32>) diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir --- a/mlir/test/Dialect/Linalg/named-ops.mlir +++ b/mlir/test/Dialect/Linalg/named-ops.mlir @@ -4,11 +4,11 @@ func.func @depthwise_conv_1d_nwc_wcm(%input: tensor<1x12x8xf32>, %filter: tensor<3x8x8xf32>) -> tensor<1x10x8x8xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<1x10x8x8xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32> + %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8x8xf32>) // CHECK: depthwise_conv_1d_nwc_wcm %0 = linalg.depthwise_conv_1d_nwc_wcm {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : tensor<1x12x8xf32>, tensor<3x8x8xf32>) - outs(%fill : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32> + outs(%fill : tensor<1x10x8x8xf32>) return %0 : tensor<1x10x8x8xf32> } @@ -18,11 +18,11 @@ func.func @depthwise_conv_1d_nwc_wc(%input: tensor<1x12x8xf32>, %filter: tensor<3x8xf32>) -> tensor<1x10x8xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<1x10x8xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8xf32>) -> tensor<1x10x8xf32> + %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8xf32>) // CHECK: depthwise_conv_1d_nwc_wc %0 = linalg.depthwise_conv_1d_nwc_wc {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : tensor<1x12x8xf32>, tensor<3x8xf32>) - outs(%fill : tensor<1x10x8xf32>) -> tensor<1x10x8xf32> + outs(%fill : tensor<1x10x8xf32>) return %0 : tensor<1x10x8xf32> } @@ -32,7 +32,7 @@ func.func @depthwise_conv_2d_nhwc_hwcm_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<2x3x4x2x3xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32> + %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x4x2x3xf32>) // CHECK: %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwcm // CHECK-SAME: {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>) @@ -40,7 +40,7 @@ %0 = linalg.depthwise_conv_2d_nhwc_hwcm { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>) - outs(%fill : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32> + outs(%fill : tensor<2x3x4x2x3xf32>) return %0 : tensor<2x3x4x2x3xf32> } @@ -63,10 +63,10 @@ // CHECK: %{{.+}} = linalg.depthwise_conv_1d_nw // CHECK-SAME: {dilations = dense<1> : vector<1xi64>, strides = dense<2> : vector<1xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x113x96xf32>, tensor<3x96xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<1x56x96xf32>) -> tensor<1x56x96xf32> + // CHECK-SAME: outs(%{{.+}} : tensor<1x56x96xf32>) %0 = linalg.depthwise_conv_1d_nwc_wc {dilations = dense<1> : vector<1xi64>, strides = dense<2> : vector<1xi64>} ins(%input, %filter: tensor<1x113x96xf32>, tensor<3x96xf32>) - outs(%init: tensor<1x56x96xf32>) -> tensor<1x56x96xf32> + outs(%init: tensor<1x56x96xf32>) return %0: tensor<1x56x96xf32> } @@ -76,10 +76,10 @@ // CHECK: %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwc // CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x113x113x96xf32>, tensor<3x3x96xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32> + // CHECK-SAME: outs(%{{.+}} : tensor<1x56x56x96xf32>) %0 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: tensor<1x113x113x96xf32>, tensor<3x3x96xf32>) - outs(%init: tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32> + outs(%init: tensor<1x56x56x96xf32>) return %0: tensor<1x56x56x96xf32> } @@ -101,10 +101,10 @@ // CHECK: %{{.+}} = linalg.depthwise_conv_2d_nchw_chw // CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x96x113x113xf32>, tensor<96x3x3xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32> + // CHECK-SAME: outs(%{{.+}} : tensor<1x96x56x56xf32>) %0 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: tensor<1x96x113x113xf32>, tensor<96x3x3xf32>) - outs(%init: tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32> + outs(%init: tensor<1x96x56x56xf32>) return %0: tensor<1x96x56x56xf32> } @@ -123,7 +123,7 @@ func.func @depthwise_conv_2d_nhwc_hwcm_tensor_dilated(%input: tensor<2x8x9x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x6x7x2x3xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<2x6x7x2x3xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32> + %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x6x7x2x3xf32>) // CHECK: %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwcm // CHECK-SAME: {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>) @@ -131,7 +131,7 @@ %0 = linalg.depthwise_conv_2d_nhwc_hwcm { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>) - outs(%fill : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32> + outs(%fill : tensor<2x6x7x2x3xf32>) return %0 : tensor<2x6x7x2x3xf32> } @@ -187,11 +187,11 @@ func.func @depthwise_conv_3d_ndhwc_dhwcm(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<2x3x13x4x6x6xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> + %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6x6xf32>) // CHECK: depthwise_conv_3d_ndhwc_dhwcm %0 = linalg.depthwise_conv_3d_ndhwc_dhwcm {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>} ins(%input, %filter : tensor<2x6x13x12x6xf32>, tensor<2x1x3x6x6xf32>) - outs(%fill : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> + outs(%fill : tensor<2x3x13x4x6x6xf32>) return %0 : tensor<2x3x13x4x6x6xf32> } @@ -201,11 +201,11 @@ func.func @depthwise_conv_3d_ndhwc_dhwc(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6xf32>) -> tensor<2x3x13x4x6xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<2x3x13x4x6xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32> + %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6xf32>) // CHECK: depthwise_conv_3d_ndhwc_dhwc %0 = linalg.depthwise_conv_3d_ndhwc_dhwc {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>} ins(%input, %filter : tensor<2x6x13x12x6xf32>, tensor<2x1x3x6xf32>) - outs(%fill : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32> + outs(%fill : tensor<2x3x13x4x6xf32>) return %0 : tensor<2x3x13x4x6xf32> } @@ -217,11 +217,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: outs(%{{.+}} : tensor) %0 = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: tensor, tensor) - outs (%init: tensor) -> tensor + outs (%init: tensor) return %0 : tensor } @@ -249,11 +249,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: outs(%{{.+}} : tensor) %0 = linalg.conv_1d_ncw_fcw {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: tensor, tensor) - outs (%init: tensor) -> tensor + outs (%init: tensor) return %0 : tensor } @@ -281,11 +281,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: outs(%{{.+}} : tensor) %0 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor) - outs (%init: tensor) -> tensor + outs (%init: tensor) return %0 : tensor } @@ -297,11 +297,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: outs(%{{.+}} : tensor) %0 = linalg.conv_2d_ngchw_fgchw {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor) - outs (%init: tensor) -> tensor + outs (%init: tensor) return %0 : tensor } @@ -313,11 +313,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: outs(%{{.+}} : tensor) %0 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor) - outs (%init: tensor) -> tensor + outs (%init: tensor) return %0 : tensor } @@ -329,11 +329,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor<64x3x3x32xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: outs(%{{.+}} : tensor) %0 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor<64x3x3x32xf32>) - outs (%init: tensor) -> tensor + outs (%init: tensor) return %0 : tensor } @@ -377,11 +377,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: outs(%{{.+}} : tensor) %0 = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%input, %filter: tensor, tensor) - outs (%init: tensor) -> tensor + outs (%init: tensor) return %0 : tensor } @@ -408,15 +408,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) func.func @pooling_nhwc_sum_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) %res = linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + outs(%fill: tensor<1x2x2x1xf32>) return %res : tensor<1x2x2x1xf32> } @@ -427,15 +427,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xf32>, tensor<3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xf32>) func.func @pooling_nwc_sum_tensor(%input: tensor<1x4x1xf32>) -> tensor<1x2x1xf32> { %fake = tensor.empty() : tensor<3xf32> %init = tensor.empty() : tensor<1x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) %res = linalg.pooling_nwc_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x4x1xf32>, tensor<3xf32>) - outs(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + outs(%fill: tensor<1x2x1xf32>) return %res : tensor<1x2x1xf32> } @@ -476,15 +476,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x1x4x4xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2x2xf32>) func.func @pooling_nchw_sum_tensor(%input: tensor<1x1x4x4xf32>) -> tensor<1x1x2x2xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x1x2x2xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) %res = linalg.pooling_nchw_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x1x4x4xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> + outs(%fill: tensor<1x1x2x2xf32>) return %res : tensor<1x1x2x2xf32> } @@ -495,15 +495,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x1x4xf32>, tensor<3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2xf32>) func.func @pooling_ncw_sum_tensor(%input: tensor<1x1x4xf32>) -> tensor<1x1x2xf32> { %fake = tensor.empty() : tensor<3xf32> %init = tensor.empty() : tensor<1x1x2xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2xf32>) %res = linalg.pooling_ncw_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x1x4xf32>, tensor<3xf32>) - outs(%fill: tensor<1x1x2xf32>) -> tensor<1x1x2xf32> + outs(%fill: tensor<1x1x2xf32>) return %res : tensor<1x1x2xf32> } @@ -544,15 +544,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) func.func @pooling_nhwc_max_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + outs(%fill: tensor<1x2x2x1xf32>) return %res : tensor<1x2x2x1xf32> } @@ -562,15 +562,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xf32>, tensor<3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xf32>) func.func @pooling_nwc_max_tensor(%input: tensor<1x4x1xf32>) -> tensor<1x2x1xf32> { %fake = tensor.empty() : tensor<3xf32> %init = tensor.empty() : tensor<1x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x4x1xf32>, tensor<3xf32>) - outs(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + outs(%fill: tensor<1x2x1xf32>) return %res : tensor<1x2x1xf32> } @@ -580,16 +580,16 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x1x4x4xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2x2xf32>) func.func @pooling_nchw_max_tensor(%input: tensor<1x1x4x4xf32>) -> tensor<1x1x2x2xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x1x2x2xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) %res = linalg.pooling_nchw_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x1x4x4xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> + outs(%fill: tensor<1x1x2x2xf32>) return %res : tensor<1x1x2x2xf32> } @@ -599,16 +599,16 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x1x4xf32>, tensor<3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2xf32>) func.func @pooling_ncw_max_tensor(%input: tensor<1x1x4xf32>) -> tensor<1x1x2xf32> { %fake = tensor.empty() : tensor<3xf32> %init = tensor.empty() : tensor<1x1x2xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2xf32>) %res = linalg.pooling_ncw_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x1x4xf32>, tensor<3xf32>) - outs(%fill: tensor<1x1x2xf32>) -> tensor<1x1x2xf32> + outs(%fill: tensor<1x1x2xf32>) return %res : tensor<1x1x2xf32> } @@ -649,15 +649,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi8>, tensor<3x3xi8>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi8>) func.func @pooling_nhwc_i8_max_tensor(%input: tensor<1x4x4x1xi8>) -> tensor<1x2x2x1xi8> { %fake = tensor.empty() : tensor<3x3xi8> %init = tensor.empty() : tensor<1x2x2x1xi8> %cst = arith.constant 0 : i8 - %fill = linalg.fill ins(%cst : i8) outs(%init : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> + %fill = linalg.fill ins(%cst : i8) outs(%init : tensor<1x2x2x1xi8>) %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xi8>, tensor<3x3xi8>) - outs(%fill: tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> + outs(%fill: tensor<1x2x2x1xi8>) return %res : tensor<1x2x2x1xi8> } @@ -668,15 +668,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xi8>, tensor<3xi8>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xi8>) -> tensor<1x2x1xi8> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xi8>) func.func @pooling_nwc_i8_max_tensor(%input: tensor<1x4x1xi8>) -> tensor<1x2x1xi8> { %fake = tensor.empty() : tensor<3xi8> %init = tensor.empty() : tensor<1x2x1xi8> %cst = arith.constant 0 : i8 - %fill = linalg.fill ins(%cst : i8) outs(%init : tensor<1x2x1xi8>) -> tensor<1x2x1xi8> + %fill = linalg.fill ins(%cst : i8) outs(%init : tensor<1x2x1xi8>) %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x4x1xi8>, tensor<3xi8>) - outs(%fill: tensor<1x2x1xi8>) -> tensor<1x2x1xi8> + outs(%fill: tensor<1x2x1xi8>) return %res : tensor<1x2x1xi8> } @@ -717,15 +717,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi16>, tensor<3x3xi16>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi16>) func.func @pooling_nhwc_i16_max_tensor(%input: tensor<1x4x4x1xi16>) -> tensor<1x2x2x1xi16> { %fake = tensor.empty() : tensor<3x3xi16> %init = tensor.empty() : tensor<1x2x2x1xi16> %cst = arith.constant 0 : i16 - %fill = linalg.fill ins(%cst : i16) outs(%init : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> + %fill = linalg.fill ins(%cst : i16) outs(%init : tensor<1x2x2x1xi16>) %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xi16>, tensor<3x3xi16>) - outs(%fill: tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> + outs(%fill: tensor<1x2x2x1xi16>) return %res : tensor<1x2x2x1xi16> } @@ -736,15 +736,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xi16>, tensor<3xi16>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xi16>) -> tensor<1x2x1xi16> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xi16>) func.func @pooling_nwc_i16_max_tensor(%input: tensor<1x4x1xi16>) -> tensor<1x2x1xi16> { %fake = tensor.empty() : tensor<3xi16> %init = tensor.empty() : tensor<1x2x1xi16> %cst = arith.constant 0 : i16 - %fill = linalg.fill ins(%cst : i16) outs(%init : tensor<1x2x1xi16>) -> tensor<1x2x1xi16> + %fill = linalg.fill ins(%cst : i16) outs(%init : tensor<1x2x1xi16>) %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x4x1xi16>, tensor<3xi16>) - outs(%fill: tensor<1x2x1xi16>) -> tensor<1x2x1xi16> + outs(%fill: tensor<1x2x1xi16>) return %res : tensor<1x2x1xi16> } @@ -785,15 +785,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi32>, tensor<3x3xi32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi32>) func.func @pooling_nhwc_i32_max_tensor(%input: tensor<1x4x4x1xi32>) -> tensor<1x2x2x1xi32> { %fake = tensor.empty() : tensor<3x3xi32> %init = tensor.empty() : tensor<1x2x2x1xi32> %cst = arith.constant 0 : i32 - %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> + %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<1x2x2x1xi32>) %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xi32>, tensor<3x3xi32>) - outs(%fill: tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> + outs(%fill: tensor<1x2x2x1xi32>) return %res : tensor<1x2x2x1xi32> } @@ -804,15 +804,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xi32>, tensor<3xi32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xi32>) -> tensor<1x2x1xi32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xi32>) func.func @pooling_nwc_i32_max_tensor(%input: tensor<1x4x1xi32>) -> tensor<1x2x1xi32> { %fake = tensor.empty() : tensor<3xi32> %init = tensor.empty() : tensor<1x2x1xi32> %cst = arith.constant 0 : i32 - %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<1x2x1xi32>) -> tensor<1x2x1xi32> + %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<1x2x1xi32>) %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x4x1xi32>, tensor<3xi32>) - outs(%fill: tensor<1x2x1xi32>) -> tensor<1x2x1xi32> + outs(%fill: tensor<1x2x1xi32>) return %res : tensor<1x2x1xi32> } @@ -854,15 +854,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) func.func @pooling_nhwc_min_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) %res = linalg.pooling_nhwc_min {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + outs(%fill: tensor<1x2x2x1xf32>) return %res : tensor<1x2x2x1xf32> } @@ -873,15 +873,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xf32>, tensor<3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xf32>) func.func @pooling_nwc_min_tensor(%input: tensor<1x4x1xf32>) -> tensor<1x2x1xf32> { %fake = tensor.empty() : tensor<3xf32> %init = tensor.empty() : tensor<1x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) %res = linalg.pooling_nwc_min {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x4x1xf32>, tensor<3xf32>) - outs(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + outs(%fill: tensor<1x2x1xf32>) return %res : tensor<1x2x1xf32> } @@ -922,15 +922,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) func.func @pooling_ndhwc_sum_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3x3xf32> %init = tensor.empty() : tensor<1x2x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) %res = linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) - outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + outs(%fill: tensor<1x2x2x2x1xf32>) return %res : tensor<1x2x2x2x1xf32> } @@ -956,15 +956,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) func.func @pooling_ndhwc_max_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3x3xf32> %init = tensor.empty() : tensor<1x2x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) %res = linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) - outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + outs(%fill: tensor<1x2x2x2x1xf32>) return %res : tensor<1x2x2x2x1xf32> } @@ -990,15 +990,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) func.func @pooling_ndhwc_min_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3x3xf32> %init = tensor.empty() : tensor<1x2x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) %res = linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) - outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + outs(%fill: tensor<1x2x2x2x1xf32>) return %res : tensor<1x2x2x2x1xf32> } @@ -1056,8 +1056,8 @@ func.func @batch_reduce_matmul(%arg0: tensor<8x128x256xf32>, %arg1: tensor<8x256x512xf32>, %arg2: tensor<128x512xf32>) -> tensor<128x512xf32> { // CHECK: %{{.+}} = linalg.batch_reduce_matmul // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<8x128x256xf32>, tensor<8x256x512xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<128x512xf32>) -> tensor<128x512xf32> - %0 = linalg.batch_reduce_matmul ins(%arg0, %arg1 : tensor<8x128x256xf32>, tensor<8x256x512xf32>) outs(%arg2: tensor<128x512xf32>) -> tensor<128x512xf32> + // CHECK-SAME: outs(%{{.+}} : tensor<128x512xf32>) + %0 = linalg.batch_reduce_matmul ins(%arg0, %arg1 : tensor<8x128x256xf32>, tensor<8x256x512xf32>) outs(%arg2: tensor<128x512xf32>) return %0: tensor<128x512xf32> } diff --git a/mlir/test/Dialect/Linalg/namedop_conversion.mlir b/mlir/test/Dialect/Linalg/namedop_conversion.mlir --- a/mlir/test/Dialect/Linalg/namedop_conversion.mlir +++ b/mlir/test/Dialect/Linalg/namedop_conversion.mlir @@ -6,7 +6,7 @@ // CHECK-DAG: %[[INIT:.+]] = tensor.collapse_shape %arg2 {{\[\[}}0], [1], [2], [3, 4]] // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]] : tensor, tensor) outs(%[[INIT]] : tensor) // CHECK: %[[OUT:.+]] = tensor.expand_shape %[[CONV]] {{\[\[}}0], [1], [2], [3, 4]] - %0 = linalg.depthwise_conv_2d_nhwc_hwcm {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor, tensor) outs(%arg2 : tensor) -> tensor + %0 = linalg.depthwise_conv_2d_nhwc_hwcm {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor, tensor) outs(%arg2 : tensor) return %0 : tensor } @@ -19,6 +19,6 @@ // CHECK-DAG: %[[INIT:.+]] = tensor.collapse_shape %arg2 {{\[\[}}0], [1], [2], [3, 4]] // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]], %arg3, %arg4 : tensor, tensor, i32, i32) outs(%[[INIT]] : tensor) // CHECK: %[[OUT:.+]] = tensor.expand_shape %[[CONV]] {{\[\[}}0], [1], [2], [3, 4]] - %0 = linalg.depthwise_conv_2d_nhwc_hwcm_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1, %arg3, %arg4 : tensor, tensor, i32, i32) outs(%arg2 : tensor) -> tensor + %0 = linalg.depthwise_conv_2d_nhwc_hwcm_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1, %arg3, %arg4 : tensor, tensor, i32, i32) outs(%arg2 : tensor) return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir --- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir @@ -18,15 +18,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -45,15 +45,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -72,15 +72,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -99,15 +99,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -126,15 +126,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -153,15 +153,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -180,15 +180,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -207,15 +207,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -234,15 +234,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -261,15 +261,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -288,15 +288,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -315,15 +315,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -344,13 +344,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -371,13 +371,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -397,13 +397,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -424,13 +424,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -453,11 +453,11 @@ // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -480,11 +480,11 @@ // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -505,13 +505,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -532,13 +532,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -559,13 +559,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -586,13 +586,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -615,11 +615,11 @@ // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } @@ -642,10 +642,10 @@ // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) return %5 : tensor<256x256xf32> } diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -23,7 +23,7 @@ /// Inplaceable, no alloc // CHECK-NOT: alloc // CHECK: linalg.fill ins(%[[F0]] : f32) outs(%[[A]] : memref>) - %r = linalg.fill ins(%f0 : f32) outs(%A : tensor) -> tensor + %r = linalg.fill ins(%f0 : f32) outs(%A : tensor) // CHECK: return // CHECK-NOT: tensor @@ -46,7 +46,7 @@ // CHECK: %[[D0:.*]] = memref.dim %[[A]], {{.*}} : memref> // CHECK: %[[ALLOC:.*]] = memref.alloc(%[[D0]]) {alignment = 64 : i64} : memref // CHECK: linalg.fill ins(%[[F0]] : f32) outs(%[[ALLOC]] : memref) - %r = linalg.fill ins(%f0 : f32) outs(%A : tensor) -> tensor + %r = linalg.fill ins(%f0 : f32) outs(%A : tensor) // CHECK-NOT: dealloc // CHECK: return %[[ALLOC]] : memref @@ -68,14 +68,13 @@ /// Cross-op multiple uses of %A, the first op which has interfering reads must alloc. // CHECK: %[[ALLOC:.*]] = memref.alloc // CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[ALLOC]] - %f = linalg.fill ins(%f0 : f32) outs(%A : tensor) -> tensor + %f = linalg.fill ins(%f0 : f32) outs(%A : tensor) /// The second op has no interfering reads and can reuse. // CHECK-NOT: alloc // CHECK: linalg.matmul ins(%[[ALLOC]], %[[ALLOC]]{{.*}}) outs(%[[A]] %r = linalg.matmul ins(%f, %f: tensor, tensor) outs(%A: tensor) - -> tensor // CHECK: memref.dealloc %[[ALLOC]] // CHECK: return @@ -92,7 +91,6 @@ // CHECK: alloc %r = linalg.matmul ins(%A, %A: tensor, tensor) outs(%A: tensor) - -> tensor // CHECK-NOT: dealloc return %r: tensor } @@ -182,7 +180,7 @@ // linalg.fill is inplace. // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[C_SLICE]] - %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<8x16xf32>) -> tensor<8x16xf32> + %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<8x16xf32>) // CHECK: scf.for %[[K:.*]] = %6 = scf.for %arg7 = %c0 to %c256 step %c32 iter_args(%arg8 = %5) -> (tensor<8x16xf32>) { @@ -195,7 +193,6 @@ // CHECK: linalg.matmul ins({{.*}} outs(%[[C_SLICE]] %10 = linalg.matmul ins(%8, %9 : tensor<8x32xf32>, tensor<32x16xf32>) outs(%arg8 : tensor<8x16xf32>) - -> tensor<8x16xf32> scf.yield %10 : tensor<8x16xf32> } @@ -231,7 +228,7 @@ %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32> + %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor into tensor diff --git a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir --- a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir +++ b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir @@ -50,7 +50,7 @@ } -> tensor %0 = tensor.expand_shape %fill [[0, 1], [2]] : tensor into tensor<1x?x?xf32> %1 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x?x?xf32>, tensor<1x?x?xf32>) - outs(%0 : tensor<1x?x?xf32>) -> tensor<1x?x?xf32> + outs(%0 : tensor<1x?x?xf32>) return %1 : tensor<1x?x?xf32> } // CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2) diff --git a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir --- a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir +++ b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir @@ -111,7 +111,7 @@ %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + outs(%arg2 : tensor) %1 = tensor.dim %0, %c0 : tensor %2 = tensor.dim %0, %c1 : tensor %3 = linalg.generic diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -183,9 +183,9 @@ -> (tensor, tensor) { %c0 = arith.constant 0 : index %0 = tensor.empty() : tensor - %1 = linalg.fill ins(%arg2 : i32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%arg2 : i32) outs(%0 : tensor) %2 = tensor.empty() : tensor - %3 = linalg.fill ins(%arg2 : i32) outs(%2 : tensor) -> tensor + %3 = linalg.fill ins(%arg2 : i32) outs(%2 : tensor) %4:2 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>, affine_map<(d0) -> ()>], iterator_types = ["reduction"]} @@ -287,7 +287,6 @@ %res1 = linalg.batch_matmul ins(%ta3, %tb3: tensor, tensor) outs(%tc3: tensor) - -> tensor return %res1 : tensor } // CHECK-LABEL: func @named_ops @@ -298,10 +297,10 @@ func.func @fill_tensor(%arg0 : index, %arg1 : index, %arg2 : f32) -> tensor { %0 = tensor.empty(%arg0, %arg1) : tensor - %1 = linalg.fill ins(%arg2 : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%arg2 : f32) outs(%0 : tensor) return %1 : tensor } -// CHECK: %{{.+}} = linalg.fill ins(%{{.+}} : f32) outs(%{{.+}} : tensor) -> tensor +// CHECK: %{{.+}} = linalg.fill ins(%{{.+}} : f32) outs(%{{.+}} : tensor) // ----- diff --git a/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir b/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir --- a/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir +++ b/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir @@ -4,11 +4,11 @@ // CHECK-SAME: (%[[INIT:.+]]: tensor, %[[OFFSET0:.+]]: index, %[[SIZE1:.+]]: index) // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[EXT:.+]] = tensor.extract_slice %[[INIT]][%[[OFFSET0]], 8, 4] [1, %[[SIZE1]], 6] [1, 3, 1] -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) outs(%[[EXT]] : tensor) -> tensor +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) outs(%[[EXT]] : tensor) // CHECK: return %[[FILL]] func.func @swap_fill_insert_slice(%init : tensor, %offset0: index, %size1: index) -> tensor { %f0 = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%f0 : f32) outs(%init : tensor) -> tensor + %0 = linalg.fill ins(%f0 : f32) outs(%init : tensor) %1 = tensor.extract_slice %0[%offset0, 8, 4] [1, %size1, 6] [1, 3, 1] : tensor to tensor return %1: tensor @@ -21,7 +21,7 @@ // CHECK: tensor.extract_slice func.func @dont_swap_fill_insert_slice_multi_user(%init : tensor, %offset0: index, %size1: index) -> (tensor, tensor<2x?x6xf32>) { %f0 = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%f0 : f32) outs(%init : tensor) -> tensor + %0 = linalg.fill ins(%f0 : f32) outs(%init : tensor) %1 = tensor.extract_slice %0[%offset0, 8, 4] [2, %size1, 6] [1, 3, 1] : tensor to tensor<2x?x6xf32> return %0, %1: tensor, tensor<2x?x6xf32> diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -3,7 +3,6 @@ func.func @matmul_tensors(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { %t0 = linalg.matmul ins(%arg0, %arg1: tensor, tensor) outs(%arg2: tensor) - -> tensor %c4 = arith.constant 4 : index %c2 = arith.constant 2 : index @@ -19,7 +18,7 @@ %6 = tensor.extract_slice %t0[%arg3, %arg7][%c2, 4][1, 1] : tensor to tensor %7 = tensor.extract_slice %arg1[%arg7, %arg5][4, %c3][1, 1] : tensor to tensor<4x?xf32> %8 = tensor.extract_slice %arg8[%arg3, %arg5][%c2, %c3][1, 1] : tensor to tensor - %9 = linalg.matmul ins(%6, %7 : tensor, tensor<4x?xf32>) outs(%8 : tensor) -> tensor + %9 = linalg.matmul ins(%6, %7 : tensor, tensor<4x?xf32>) outs(%8 : tensor) %10 = tensor.insert_slice %9 into %arg8[%arg3, %arg5] [%c2, %c3] [1, 1] : tensor into tensor scf.yield %10 : tensor } @@ -50,8 +49,8 @@ // slices of the producing matmul. // CHECK-DAG: %[[stB2:.*]] = tensor.extract_slice %[[B]][0, %[[K]]] [%[[dB0]], 4] [1, 1] : tensor to tensor // CHECK-DAG: %[[stC:.*]] = tensor.extract_slice %[[C]][%[[I]], %[[K]]] [2, 4] [1, 1] : tensor to tensor<2x4xf32> -// CHECK: %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor) outs(%[[stC]] : tensor<2x4xf32>) -> tensor<2x4xf32> -// CHECK-NEXT: %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) outs(%[[stF]] : tensor<2x3xf32>) -> tensor<2x3xf32> +// CHECK: %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor) outs(%[[stC]] : tensor<2x4xf32>) +// CHECK-NEXT: %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) outs(%[[stF]] : tensor<2x3xf32>) // CHECK-NEXT: tensor.insert_slice %[[stG]] into %[[RES]][%[[I]], %[[J]]] // ----- @@ -66,12 +65,12 @@ %cst = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<1x112x112x32xf32> - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) %conv = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) - outs(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> + outs(%fill : tensor<1x112x112x32xf32>) %for0 = scf.for %iv0 = %c0 to %c112 step %c8 iter_args(%arg0 = %fill) -> tensor<1x112x112x32xf32> { %for1 = scf.for %iv1 = %c0 to %c112 step %c16 iter_args(%arg1 = %arg0) -> tensor<1x112x112x32xf32> { @@ -110,7 +109,7 @@ // CHECK-SAME: (%[[INPUT:.+]]: tensor<1x225x225x3xf32>, %[[FILTER:.+]]: tensor<3x3x3x32xf32>, %[[ELEM:.+]]: tensor<1x112x112x32xf32>) // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x112x112x32xf32> -// CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> +// CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<1x112x112x32xf32>) // CHECK-NEXT: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG0:.+]] = %[[FILL]]) // CHECK-NEXT: %[[OFFSET_H:.+]] = affine.apply #[[MAP0]](%[[IV0]]) @@ -148,12 +147,12 @@ %oc = tensor.dim %elementwise, %c3 : tensor %init = tensor.empty(%n, %oh, %ow, %oc) : tensor - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor) %conv = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor, tensor) - outs(%fill : tensor) -> tensor + outs(%fill : tensor) %for0 = scf.for %iv0 = %c0 to %n step %c8 iter_args(%arg0 = %fill) -> tensor { %for1 = scf.for %iv1 = %c0 to %oh step %c16 iter_args(%arg1 = %arg0) -> tensor { @@ -217,7 +216,7 @@ // CHECK-DAG: %[[ELEM_OC:.+]] = tensor.dim %[[ELEM]], %[[C3]] : tensor // CHECK: %[[INIT:.+]] = tensor.empty(%[[ELEM_N]], %[[ELEM_OH]], %[[ELEM_OW]], %[[ELEM_OC]]) : tensor -// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor) -> tensor +// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor) // CHECK-DAG: %[[FILTER_H:.+]] = tensor.dim %[[FILTER]], %[[C0]] : tensor // CHECK-DAG: %[[FILTER_W:.+]] = tensor.dim %[[FILTER]], %[[C1]] : tensor @@ -256,7 +255,7 @@ // CHECK-SAME: [%[[SIZE_INPUT_N]], %[[SIZE_ELEM_OH_2]], %[[SIZE_ELEM_OW_2]], %[[SIZE_ELEM_OC_2]]] // CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_nhwc_hwcf // CHECK-SAME: ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor, tensor) -// CHECK-SAME: outs(%[[ST_FILL]] : tensor) -> tensor +// CHECK-SAME: outs(%[[ST_FILL]] : tensor) // CHECK-NEXT: %[[ST_ADD:.+]] = linalg.generic // CHECK-SAME: ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor, tensor) // CHECK-SAME: outs(%[[ST_ARG]] : tensor) @@ -301,7 +300,7 @@ tensor.yield %zero : f32 } : tensor<58x1xf32> to tensor<64x128xf32> - %fill = linalg.fill ins(%zero : f32) outs(%large_input : tensor<64x128xf32>) -> tensor<64x128xf32> + %fill = linalg.fill ins(%zero : f32) outs(%large_input : tensor<64x128xf32>) %for0 = scf.for %iv0 = %c0 to %d0 step %c16 iter_args(%arg0 = %fill) -> tensor<64x128xf32> { %for1 = scf.for %iv1 = %c0 to %d1 step %c32 iter_args(%arg1 = %arg0) -> tensor<64x128xf32> { diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir @@ -14,14 +14,13 @@ // CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor to tensor // CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor to tensor // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor) -// CHECK-SAME: outs(%[[sTC]] : tensor) -> tensor +// CHECK-SAME: outs(%[[sTC]] : tensor) // CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor // CHECK: scf.yield %[[TD]] : tensor // CHECK: scf.yield %[[TD2]] : tensor // CHECK: scf.yield %[[TD1]] : tensor %0 = linalg.matmul ins(%arg0, %arg1: tensor, tensor) outs(%arg2: tensor) - -> tensor // CHECK: return %[[TD0]] : tensor return %0 : tensor diff --git a/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir b/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir --- a/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir +++ b/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir @@ -21,14 +21,14 @@ // CHECK: %[[tC:.*]] = tensor.extract_slice %[[C_BLK]]{{.*}} : tensor to tensor // CHECK: %[[RES:.*]] = linalg.matmul // CHECK-SAME: ins(%[[tA]], %[[tB]] : tensor, tensor) - // CHECK-SAME: outs(%[[tC]] : tensor) -> tensor + // CHECK-SAME: outs(%[[tC]] : tensor) // CHECK: scf.foreach_thread.perform_concurrently { // CHECK-NEXT: tensor.parallel_insert_slice %[[RES]] into %[[C_BLK]]{{.*}} : // CHECK-SAME: tensor into tensor // CHECK-NEXT: } // CHECK-NEXT: } {mapping = [#gpu.thread, #gpu.thread]} %0 = linalg.matmul ins(%A, %B : tensor, tensor) - outs(%C : tensor) -> (tensor) + outs(%C : tensor) return %0 : tensor } @@ -70,7 +70,7 @@ %tile_size_1 = "test.dummy"() : () -> (index) %tile_size_2 = "test.dummy"() : () -> (index) %0 = linalg.matmul ins(%A, %B : tensor, tensor) - outs(%C : tensor) -> (tensor) + outs(%C : tensor) return %0 : tensor } @@ -111,7 +111,7 @@ // CHECK: scf.foreach_thread.perform_concurrently // CHECK-NEXT: tensor.parallel_insert_slice %0 = linalg.matmul ins(%A, %B : tensor<100x200xf32>, tensor<200x300xf32>) - outs(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>) + outs(%C : tensor<100x300xf32>) return %0 : tensor<100x300xf32> } @@ -152,7 +152,7 @@ // CHECK: scf.foreach_thread.perform_concurrently // CHECK-NEXT: tensor.parallel_insert_slice %0 = linalg.matmul ins(%A, %B : tensor, tensor) - outs(%C : tensor) -> (tensor) + outs(%C : tensor) return %0 : tensor } @@ -190,7 +190,7 @@ // CHECK: scf.foreach_thread.perform_concurrently // CHECK-NEXT: tensor.parallel_insert_slice %0 = linalg.matmul ins(%A, %B : tensor<100x200xf32>, tensor<200x300xf32>) - outs(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>) + outs(%C : tensor<100x300xf32>) return %0 : tensor<100x300xf32> } @@ -263,7 +263,7 @@ // CHECK-NEXT: tensor.parallel_insert_slice %tile_size = "test.dummy"() : () -> (index) %0 = linalg.matmul ins(%A, %B : tensor, tensor) - outs(%C : tensor) -> (tensor) + outs(%C : tensor) return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir --- a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir @@ -13,7 +13,7 @@ %0 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor<1x?x?x?xf32>) - outs (%init: tensor) -> tensor + outs (%init: tensor) // CHECK: return %[[RES]] return %0 : tensor } @@ -31,7 +31,7 @@ %0 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor) - outs (%init: tensor) -> tensor + outs (%init: tensor) // CHECK: return %[[RES]] return %0 : tensor } @@ -51,7 +51,7 @@ // CHECK: %[[INSERTED:.+]] = tensor.insert_slice %[[OPRES]] into %[[RES]] %0 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: tensor<1x1x113x96xf32>, tensor<1x3x96xf32>) - outs(%init: tensor<1x1x56x96xf32>) -> tensor<1x1x56x96xf32> + outs(%init: tensor<1x1x56x96xf32>) // CHECK: %[[INSERTED]] return %0: tensor<1x1x56x96xf32> } @@ -69,7 +69,7 @@ %0 = linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor<1x?xf32>) - outs (%init: tensor) -> tensor + outs (%init: tensor) // CHECK: return %[[RES]] return %0 : tensor } @@ -87,7 +87,7 @@ %0 = linalg.pooling_nchw_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor<1x?xf32>) - outs (%init: tensor) -> tensor + outs (%init: tensor) // CHECK: return %[[RES]] return %0 : tensor } @@ -105,7 +105,7 @@ %0 = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor<1x?xf32>) - outs (%init: tensor) -> tensor + outs (%init: tensor) // CHECK: return %[[RES]] return %0 : tensor } @@ -123,7 +123,7 @@ %0 = linalg.pooling_nhwc_max_unsigned {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor<1x?xf32>) - outs (%init: tensor) -> tensor + outs (%init: tensor) // CHECK: return %[[RES]] return %0 : tensor } @@ -141,7 +141,7 @@ %0 = linalg.pooling_nhwc_min {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor<1x?xf32>) - outs (%init: tensor) -> tensor + outs (%init: tensor) // CHECK: return %[[RES]] return %0 : tensor } @@ -159,7 +159,7 @@ %0 = linalg.pooling_nhwc_min_unsigned {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor<1x?xf32>) - outs (%init: tensor) -> tensor + outs (%init: tensor) // CHECK: return %[[RES]] return %0 : tensor } @@ -177,7 +177,7 @@ %0 = linalg.pooling_nchw_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor<1x?xf32>) - outs (%init: tensor) -> tensor + outs (%init: tensor) // CHECK: return %[[RES]] return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir --- a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir @@ -12,7 +12,7 @@ func.func @fuse_tileable_op(%arg0: index, %arg1: tensor, %arg2: tensor) -> tensor { %cst = arith.constant 4.200000e+01 : f32 %c0 = arith.constant 0 : index - %0 = linalg.fill ins(%cst : f32) outs(%arg1 : tensor) -> tensor + %0 = linalg.fill ins(%cst : f32) outs(%arg1 : tensor) %d0 = tensor.dim %arg1, %c0 : tensor %1 = affine.apply #map0()[%d0, %arg0] @@ -27,7 +27,7 @@ %6 = tensor.extract_slice %0[%3] [%4] [1] : tensor to tensor // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[T1]] - %7 = linalg.elemwise_unary ins(%6 : tensor) outs(%5 : tensor) -> tensor + %7 = linalg.elemwise_unary ins(%6 : tensor) outs(%5 : tensor) scf.foreach_thread.perform_concurrently { tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor into tensor } @@ -74,7 +74,7 @@ %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor<64xf32> to tensor // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[INIT_TENSOR]] - %7 = linalg.elemwise_unary ins(%0 : tensor) outs(%5 : tensor) -> tensor + %7 = linalg.elemwise_unary ins(%0 : tensor) outs(%5 : tensor) scf.foreach_thread.perform_concurrently { tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor into tensor<64xf32> } @@ -108,7 +108,7 @@ func.func @fuse_tileable_op_rank_reducing(%arg0: index, %arg1: tensor, %arg2: tensor) -> tensor { %cst = arith.constant 4.200000e+01 : f32 %c0 = arith.constant 0 : index - %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor) -> tensor + %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor) %d0 = tensor.dim %arg1, %c0 : tensor // CHECK: scf.foreach_thread {{.*}} -> (tensor) { @@ -116,7 +116,7 @@ %5 = tensor.extract_slice %o[%arg3] [1] [1] : tensor to tensor // CHECK: tensor.extract_slice %{{.*}}[%{{.*}}] [1] [1] : tensor to tensor<1xf32> - // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%{{.*}} : tensor<1xf32>) -> tensor<1xf32> + // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%{{.*}} : tensor<1xf32>) // CHECK: tensor.extract_slice %{{.*}}[0] [1] [1] : tensor<1xf32> to tensor // CHECK: func.call @foo(%{{.*}}) : (tensor) -> tensor %7 = func.call @foo(%5) : (tensor) -> tensor @@ -154,7 +154,7 @@ func.func @fuse_tileable_op_through_bbarg(%arg0: index, %arg1: tensor, %arg2: tensor) -> tensor { %cst = arith.constant 4.200000e+01 : f32 %c0 = arith.constant 0 : index - %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor) -> tensor + %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor) %d0 = tensor.dim %arg1, %c0 : tensor %1 = affine.apply #map0()[%d0, %arg0] @@ -169,7 +169,7 @@ %6 = tensor.extract_slice %arg1[%3] [%4] [1] : tensor to tensor // CHECK: %[[T2:.*]] = linalg.elemwise_unary {{.*}} outs(%[[T1]] - %7 = linalg.elemwise_unary ins(%6 : tensor) outs(%5 : tensor) -> tensor + %7 = linalg.elemwise_unary ins(%6 : tensor) outs(%5 : tensor) scf.foreach_thread.perform_concurrently { tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor into tensor } @@ -229,7 +229,7 @@ %6 = tensor.extract_slice %0#0[%3] [%4] [1] : tensor to tensor // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[T1]]#0 - %7 = linalg.elemwise_unary ins(%6 : tensor) outs(%5 : tensor) -> tensor + %7 = linalg.elemwise_unary ins(%6 : tensor) outs(%5 : tensor) scf.foreach_thread.perform_concurrently { tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor into tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir --- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir @@ -9,9 +9,9 @@ // CHECK: linalg.elemwise_binary // CHECK: return %[[RES]] %0 = linalg.elemwise_unary ins(%arg0 : tensor) - outs(%arg1: tensor) -> tensor + outs(%arg1: tensor) %1 = linalg.elemwise_binary ins(%0, %arg0 : tensor, tensor) - outs(%arg1: tensor) -> tensor + outs(%arg1: tensor) return %1 : tensor } @@ -36,9 +36,9 @@ // CHECK: linalg.elemwise_binary // CHECK: return %[[RES]] %0 = linalg.elemwise_unary ins(%arg0 : tensor) - outs(%arg1: tensor) -> tensor + outs(%arg1: tensor) %1 = linalg.elemwise_binary ins(%0, %arg0 : tensor, tensor) - outs(%arg1: tensor) -> tensor + outs(%arg1: tensor) return %1 : tensor } @@ -73,7 +73,7 @@ // CHECK: linalg.generic {{.+}} ins(%[[IN_SLICE]] : tensor) outs(%[[OUT_SLICE2]] : tensor) // CHECK: return %[[RES]] - %fill = linalg.fill ins(%five : f32) outs(%init : tensor<12x25xf32>) -> tensor<12x25xf32> + %fill = linalg.fill ins(%five : f32) outs(%init : tensor<12x25xf32>) %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d2)>], iterator_types = ["parallel", "reduction", "parallel"] @@ -105,7 +105,7 @@ %1 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0 : tensor<16x48x8x8xf32> -> tensor<128x384xf32> %2 = linalg.elemwise_unary ins(%1: tensor<128x384xf32>) - outs(%arg1: tensor<128x384xf32>) -> tensor<128x384xf32> + outs(%arg1: tensor<128x384xf32>) return %2 : tensor<128x384xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-op-generalize.mlir b/mlir/test/Dialect/Linalg/transform-op-generalize.mlir --- a/mlir/test/Dialect/Linalg/transform-op-generalize.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-generalize.mlir @@ -6,7 +6,7 @@ // CHECK-NOT: linalg.elemwise_unary // CHECK: linalg.generic %0 = linalg.elemwise_unary ins(%arg0 : tensor) - outs(%arg1: tensor) -> tensor + outs(%arg1: tensor) return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-interchange.mlir b/mlir/test/Dialect/Linalg/transform-op-interchange.mlir --- a/mlir/test/Dialect/Linalg/transform-op-interchange.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-interchange.mlir @@ -28,7 +28,7 @@ func.func @interchange_matmul(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { // expected-note @below {{when applied to this op}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) outs(%arg2 : tensor) -> tensor + %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) outs(%arg2 : tensor) return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir b/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir --- a/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir @@ -14,7 +14,6 @@ -> tensor<13x42xf32> { %0 = linalg.matmul ins(%arg0, %arg1: tensor<13x34xf32>, tensor<34x42xf32>) outs(%arg2: tensor<13x42xf32>) - -> tensor<13x42xf32> // The first application computes the total size. // CHECK: %{{.*}} = affine.apply #[[$MAP13]]() // CHECK: %[[SIZE:.+]] = affine.apply #[[$MAP13]]() @@ -60,7 +59,6 @@ -> tensor { %0 = linalg.matmul ins(%arg0, %arg1: tensor, tensor) outs(%arg2: tensor) - -> tensor return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-pad.mlir --- a/mlir/test/Dialect/Linalg/transform-op-pad.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-pad.mlir @@ -26,7 +26,7 @@ // CHECK: %[[T5:.*]] = linalg.matmul // CHECK-SAME: ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>) // CHECK-SAME: outs(%[[T2]] : tensor<4x5xf32>) - %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> func.return %5 : tensor<24x25xf32> } @@ -43,7 +43,7 @@ %arg1: tensor<12x25xf32>, %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { // expected-note @below {{when applied to this op}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) func.return %0 : tensor<24x25xf32> } @@ -60,7 +60,7 @@ %arg1: tensor<12x25xf32>, %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { // expected-note @below {{when applied to this op}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) func.return %0 : tensor<24x25xf32> } @@ -78,7 +78,7 @@ %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { // This is attached to an error that is silenceable and is not reported by this transform // {{when applied to this op}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) func.return %0 : tensor<24x25xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir b/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir --- a/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir @@ -12,7 +12,7 @@ // CHECK: scf.yield %[[INS_2]] : tensor // CHECK: %[[INS_1:.*]] = tensor.insert_slice %[[RES_LOOP_2]] into %{{.*}}, 25] [1, 1] : tensor into tensor<24x25xf32> // CHECK: scf.yield %[[INS_1]] : tensor<24x25xf32> - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) // CHECK: return %[[RES_LOOP_1]] : tensor<24x25xf32> func.return %0 : tensor<24x25xf32> diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir --- a/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir @@ -14,7 +14,7 @@ // CHECK-SAME: ins(%{{[a-zA-Z0-9]*}} : tensor) // CHECK-SAME: outs(%{{[a-zA-Z0-9]*}} : tensor) { %0 = linalg.matmul ins(%A, %B: tensor, tensor<256x32xf32>) - outs(%C: tensor) -> tensor + outs(%C: tensor) return %0: tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir --- a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir @@ -2,7 +2,7 @@ func.func @matmul_split(%A : tensor<16x256xf32>, %B: tensor<256x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + outs(%C: tensor<16x32xf32>) return %0: tensor<16x32xf32> } @@ -16,7 +16,7 @@ // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<16x256xf32> into tensor<16x4x64xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<256x32xf32> into tensor<4x64x32xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<16x32x4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) // CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: , iterator_types = ["parallel", "parallel", "parallel", "reduction"]} // CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<16x4x64xf32>, tensor<4x64x32xf32>) outs(%[[F]] : tensor<16x32x4xf32>) { @@ -64,7 +64,7 @@ // CHECK-DAG: %[[ID:.*]] = arith.constant 1.000000e+00 : f32 // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] : tensor<32xf32> into tensor<4x8xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) // CHECK: %[[G:.*]] = linalg.generic // CHECK: {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], // CHECK: iterator_types = ["parallel", "reduction"]} ins(%[[I1]], %{{.*}} : tensor<4x8xf32>, tensor) outs(%[[F]] : tensor<4xf32>) { @@ -116,7 +116,7 @@ // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<4x8x2xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x4x8xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) // CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]} // CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<4x8x2xf32>, tensor<5x4x8xf32>) outs(%[[F]] : tensor<5x2x4xf32>) { // CHECK: arith.addf @@ -140,7 +140,7 @@ func.func @matmul_split(%A : tensor<16x256xf32>, %B: tensor<256x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + outs(%C: tensor<16x32xf32>) return %0: tensor<16x32xf32> } @@ -154,7 +154,7 @@ // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<16x256xf32> into tensor<16x64x4xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<256x32xf32> into tensor<64x4x32xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<16x32x4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) // CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: , iterator_types = ["parallel", "parallel", "reduction", "parallel"]} // CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<16x64x4xf32>, tensor<64x4x32xf32>) outs(%[[F]] : tensor<16x32x4xf32>) { @@ -202,7 +202,7 @@ // CHECK-DAG: %[[ID:.*]] = arith.constant 1.000000e+00 : f32 // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] : tensor<32xf32> into tensor<8x4xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) // CHECK: %[[G:.*]] = linalg.generic // CHECK: {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], // CHECK: iterator_types = ["reduction", "parallel"]} ins(%[[I1]], %{{.*}} : tensor<8x4xf32>, tensor) outs(%[[F]] : tensor<4xf32>) { @@ -254,7 +254,7 @@ // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<8x4x2xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x8x4xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) // CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]} // CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) outs(%[[F]] : tensor<5x2x4xf32>) { // CHECK: arith.addf diff --git a/mlir/test/Dialect/Linalg/transform-op-tile.mlir b/mlir/test/Dialect/Linalg/transform-op-tile.mlir --- a/mlir/test/Dialect/Linalg/transform-op-tile.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-tile.mlir @@ -21,14 +21,13 @@ // CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32> // CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32> // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<4x4xf32>, tensor<4x4xf32>) -// CHECK-SAME: outs(%[[sTC]] : tensor<4x4xf32>) -> tensor<4x4xf32> +// CHECK-SAME: outs(%[[sTC]] : tensor<4x4xf32>) // CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor<4x4xf32> into tensor<128x128xf32> // CHECK: scf.yield %[[TD]] : tensor<128x128xf32> // CHECK: scf.yield %[[TD2]] : tensor<128x128xf32> // CHECK: scf.yield %[[TD1]] : tensor<128x128xf32> %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) outs(%arg2: tensor<128x128xf32>) - -> tensor<128x128xf32> // CHECK: return %[[TD0]] : tensor<128x128xf32> return %0 : tensor<128x128xf32> @@ -60,7 +59,7 @@ // CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<128x128xf32> to tensor<4x?xf32> // CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<128x128xf32> to tensor // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor<4x?xf32>) -// CHECK-SAME: outs(%[[sTC]] : tensor) -> tensor +// CHECK-SAME: outs(%[[sTC]] : tensor) // CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor<128x128xf32> // CHECK: scf.yield %[[TD]] : tensor<128x128xf32> // CHECK: scf.yield %[[TD2]] : tensor<128x128xf32> @@ -68,7 +67,6 @@ %sz = func.call @get_dynamic_tile_size() : () -> index %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) outs(%arg2: tensor<128x128xf32>) - -> tensor<128x128xf32> // CHECK: return %[[TD0]] : tensor<128x128xf32> return %0 : tensor<128x128xf32> diff --git a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir --- a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir @@ -12,7 +12,7 @@ // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]] // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]] // CHECK: vector.transfer_write %[[vR]], %[[C]] - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) func.return %0 : tensor<24x25xf32> } @@ -57,7 +57,7 @@ // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]] // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]] // CHECK: vector.transfer_write %[[vR]], %[[C]] - %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) %9 = tensor.insert_slice %8 into %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> return %9 : tensor<24x25xf32> } @@ -105,7 +105,7 @@ // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]] // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]] // CHECK: vector.transfer_write %[[vR]], %[[C]] - %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) %9 = tensor.insert_slice %8 into %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> return %9 : tensor<24x25xf32> } @@ -123,7 +123,7 @@ %arg1: tensor<12x25xf32>, %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { // expected-note @below {{non-isolated target}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) func.return %0 : tensor<24x25xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir b/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir --- a/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir +++ b/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir @@ -17,11 +17,11 @@ %5 = linalg.fill {__producer__} ins(%cst : f32) - outs(%D : tensor) -> tensor + outs(%D : tensor) %6 = linalg.matmul {__producer__} ins(%A, %B : tensor, tensor) - outs(%5 : tensor) -> tensor + outs(%5 : tensor) %7 = linalg.generic {__root__, indexing_maps = [affine_map<(d0, d1) -> (d0)>, @@ -74,11 +74,11 @@ %5 = linalg.fill {__producer__} ins(%cst : f32) - outs(%D : tensor) -> tensor + outs(%D : tensor) %6 = linalg.matmul {__producer__} ins(%A, %B : tensor, tensor) - outs(%5 : tensor) -> tensor + outs(%5 : tensor) %7 = linalg.generic {__root__, indexing_maps = [affine_map<(d0, d1) -> (d0)>, diff --git a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir --- a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir +++ b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir @@ -33,7 +33,7 @@ // CHECK-DAG: %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor // CHECK-DAG: %[[D2:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor // CHECK: %[[E:.*]] = tensor.empty(%[[D2]]) : tensor -// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) -> tensor +// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) // CHECK: %[[L:.*]] = scf.for %[[K:.*]] = %[[C0]] to %[[D1]] step %[[C5]] iter_args(%[[ARG3:.*]] = %[[F]]) -> (tensor) { // CHECK: %[[PS:.*]] = affine.min #[[MAP2]](%[[K]])[%[[D1]]] // CHECK: %[[EXT2:.*]] = tensor.extract_slice %[[ARG0]][0, %[[K:.*]]] [%[[D0]], %[[PS]]] [1, 1] : tensor to tensor @@ -78,7 +78,7 @@ // CHECK: func @reduction_tile_transpose // CHECK: tensor.empty(%{{.*}}) : tensor<5x?xf32> -// CHECK: linalg.fill {{.*}} : tensor<5x?xf32>) -> tensor<5x?xf32> +// CHECK: linalg.fill {{.*}} : tensor<5x?xf32>) // CHECK: scf.for // CHECK: linalg.generic // CHECK: %[[D3:.*]] = tensor.dim %{{.*}}, %[[C0]] : tensor @@ -127,7 +127,7 @@ // CHECK-DAG: %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor // CHECK-DAG: %[[D2:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor // CHECK: %[[E:.*]] = tensor.empty(%[[D2]]) : tensor -// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) -> tensor +// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) // CHECK: %[[L:.*]] = scf.foreach_thread (%[[IV:.+]]) in (%[[C5]]) shared_outs(%[[ARG3:.+]] = %[[F]]) -> (tensor) { // CHECK-DAG: %[[TS0:.+]] = affine.min #[[MAP0]](%[[IV]])[%[[D1]]] // CHECK-DAG: %[[TS1:.+]] = affine.max #[[MAP1]](%[[TS0]]) @@ -155,7 +155,7 @@ func.func @matmul_tile_parallel( %A: tensor, %B: tensor, %out: tensor) -> tensor { %matmul = linalg.matmul ins(%A, %B: tensor, tensor) - outs(%out: tensor) -> tensor + outs(%out: tensor) return %matmul : tensor } @@ -182,7 +182,7 @@ // CHECK-DAG: %[[D3:.*]] = tensor.dim %[[ARG2]], %[[C0]] : tensor // CHECK-DAG: %[[D4:.*]] = tensor.dim %[[ARG2]], %[[C1]] : tensor // CHECK: %[[E:.*]] = tensor.empty(%[[D3]], %[[D4]]) : tensor -// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) -> tensor +// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) // CHECK: %[[L:.*]] = scf.foreach_thread (%[[IV:.+]]) in (%[[C5]]) shared_outs(%[[ARG3:.+]] = %[[F]]) -> (tensor) { // CHECK-DAG: %[[TS0:.+]] = affine.min #[[MAP0]](%[[IV]])[%[[D1]]] // CHECK-DAG: %[[TS1:.+]] = affine.max #[[MAP1]](%[[TS0]]) @@ -191,7 +191,7 @@ // CHECK: %[[INCHUNKA:.+]] = tensor.extract_slice %[[ARG0]][0, %[[TINDEX]]] [%[[D0]], %[[TS1]]] [1, 1] : tensor to tensor // CHECK: %[[INCHUNKB:.+]] = tensor.extract_slice %[[ARG1]][%[[TINDEX]], 0] [%[[TS1]], %[[D2]]] [1, 1] : tensor to tensor // CHECK: %[[TEMPEXT:.+]] = tensor.extract_slice %[[ET]][0, 0] [%[[D0]], %[[D2]]] [1, 1] : tensor to tensor -// CHECK: %[[PARTIAL:.+]] = linalg.matmul ins(%[[INCHUNKA]], %[[INCHUNKB]] : tensor, tensor) outs(%[[TEMPEXT]] : tensor) -> tensor +// CHECK: %[[PARTIAL:.+]] = linalg.matmul ins(%[[INCHUNKA]], %[[INCHUNKB]] : tensor, tensor) outs(%[[TEMPEXT]] : tensor) // CHECK: scf.foreach_thread.perform_concurrently { // CHECK: tensor.parallel_insert_slice %[[PARTIAL]] into %[[ARG3]][0, 0, %[[IV]]] [%[[D0]], %[[D2]], 1] [1, 1, 1] : tensor into tensor // CHECK: } @@ -240,7 +240,7 @@ // CHECK-DAG: %[[D0:.*]] = tensor.dim %[[ARG0]], %[[C0]] : tensor // CHECK-DAG: %[[D2:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor // CHECK: %[[E:.*]] = tensor.empty(%[[D2]]) : tensor -// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) -> tensor +// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) // CHECK: %[[L:.*]] = scf.foreach_thread (%[[IV:.+]]) in (%[[C5]]) shared_outs(%[[ARG3:.+]] = %[[F]]) -> (tensor) { // CHECK: %[[ET:.+]] = tensor.extract_slice %[[ARG3:.+]][0, %[[IV]]] [%[[D0]], 1] [1, 1] : tensor to tensor // CHECK: %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -699,7 +699,6 @@ // CHECK: %[[W:.*]] = vector.transfer_write %[[R]], %[[ARG2]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<8x12xf32>, tensor<8x12xf32> %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x4xf32>, tensor<4x12xf32>) outs(%arg2: tensor<8x12xf32>) - -> tensor<8x12xf32> // CHECK: return %[[W]] : tensor<8x12xf32> return %0 : tensor<8x12xf32> } @@ -785,7 +784,7 @@ // CHECK: %[[V4:.*]] = arith.addi %[[DIM3]], %[[C3]] : index // CHECK: %[[V5:.*]] = arith.addi %[[V4]], %[[C2]] : index // CHECK: %[[INIT:.*]] = tensor.empty(%[[V1]], %[[V2]], %[[V5]]) : tensor<6x?x?x?xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) outs(%[[INIT]] : tensor<6x?x?x?xf32>) -> tensor<6x?x?x?xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) outs(%[[INIT]] : tensor<6x?x?x?xf32>) // CHECK: %[[SRCDIM:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32> // CHECK: %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32> // CHECK: return %[[RESULT]] @@ -1096,7 +1095,7 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32> %ident = arith.constant -3.40282e+38 : f32 %init = tensor.empty() : tensor<4xf32> - %fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32> + %fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} @@ -1127,7 +1126,7 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32> %maxf32 = arith.constant 3.40282e+38 : f32 %init = tensor.empty() : tensor<4xf32> - %fill = linalg.fill ins(%maxf32 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32> + %fill = linalg.fill ins(%maxf32 : f32) outs(%init : tensor<4xf32>) %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} @@ -1157,7 +1156,7 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32> %ident = arith.constant 1.0 : f32 %init = tensor.empty() : tensor<4xf32> - %fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32> + %fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} @@ -1187,7 +1186,7 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1> %ident = arith.constant false %init = tensor.empty() : tensor<4xi1> - %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1> + %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} @@ -1217,7 +1216,7 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1> %ident = arith.constant true %init = tensor.empty() : tensor<4xi1> - %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1> + %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} @@ -1247,7 +1246,7 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1> %ident = arith.constant false %init = tensor.empty() : tensor<4xi1> - %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1> + %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} @@ -1279,7 +1278,7 @@ // CHECK: vector.transfer_write {{.*}} {in_bounds = [true, true]} : vector<4x4xf32>, tensor<4x4xf32> %c0 = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<4x4xf32> - %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4x4xf32>) -> tensor<4x4xf32> + %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4x4xf32>) %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, 0)>, affine_map<(d0, d1) -> (d0, d1)>], @@ -1315,7 +1314,7 @@ // CHECK: vector.transfer_write {{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<4xf32> %c0 = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<4xf32> - %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32> + %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4xf32>) %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, 0)>, affine_map<(d0, d1) -> (d0)>], @@ -1356,7 +1355,7 @@ // CHECK: %[[init:.*]] = tensor.empty() : tensor %0 = tensor.empty() : tensor - %1 = linalg.fill ins(%f0 : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%f0 : f32) outs(%0 : tensor) // CHECK: %[[r:.*]] = vector.transfer_read %[[A]][%[[C0]]] // CHECK-SAME: : tensor<32xf32>, vector<32xf32> // CHECK: %[[f0:.*]] = vector.extractelement %[[vF0]][] : vector @@ -1397,7 +1396,7 @@ func.func @not_projected_permutation(%arg0: tensor<8x8xf32>) -> tensor<6x6x3x3xf32> { %c0 = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<6x6x3x3xf32> - %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<6x6x3x3xf32>) -> tensor<6x6x3x3xf32> + %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<6x6x3x3xf32>) // CHECK: linalg.generic %result = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0 + d2, d1 + d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], @@ -1866,7 +1865,7 @@ %cst_6 = arith.constant 4.000000e+00 : f32 %1 = scf.for %arg0 = %c0 to %c64 step %c4 iter_args(%arg1 = %input) -> (tensor<120x64xf32>) { %extracted_slice = tensor.extract_slice %arg1[%c0, %arg0] [1, 4] [1, 1] : tensor<120x64xf32> to tensor<1x4xf32> - %10 = linalg.fill {__internal_linalg_transform__ = "1"} ins(%cst_6 : f32) outs(%extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> + %10 = linalg.fill {__internal_linalg_transform__ = "1"} ins(%cst_6 : f32) outs(%extracted_slice : tensor<1x4xf32>) %11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} outs(%10 : tensor<1x4xf32>) { ^bb0(%out: f32): %12 = linalg.index 0 : index diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir --- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir @@ -620,9 +620,9 @@ // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]} %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32> // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<1xf32>) -> tensor<1xf32> + %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<1xf32>) // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<1xf32>) -> tensor<1xf32> + %8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<1xf32>) scf.foreach_thread.perform_concurrently { // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]} @@ -647,14 +647,14 @@ %0 = bufferization.alloc_tensor() : tensor<4xf32> // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<4xf32>) %2 = scf.for %arg5 = %arg2 to %arg3 step %arg4 iter_args(%arg6 = %arg1) -> (tensor<4xf32>) { // CHECK: tensor.extract {{.*}} {__inplace_operands_attr__ = ["true", "none"]} %4 = tensor.extract %1[%arg4] : tensor<4xf32> vector.print %4 : f32 // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %5 = linalg.fill ins(%cst2 : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32> + %5 = linalg.fill ins(%cst2 : f32) outs(%0 : tensor<4xf32>) scf.yield %5 : tensor<4xf32> } @@ -677,14 +677,14 @@ %0 = bufferization.alloc_tensor() : tensor<4xf32> // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<4xf32>) %2 = scf.for %arg5 = %arg2 to %arg3 step %arg4 iter_args(%arg6 = %arg1) -> (tensor<4xf32>) { // CHECK: tensor.extract {{.*}} {__inplace_operands_attr__ = ["true", "none"]} %4 = tensor.extract %1[%arg4] : tensor<4xf32> vector.print %4 : f32 // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]} - %5 = linalg.fill ins(%cst2 : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> + %5 = linalg.fill ins(%cst2 : f32) outs(%1 : tensor<4xf32>) scf.yield %5 : tensor<4xf32> } @@ -693,7 +693,7 @@ %6 = tensor.extract %1[%arg4] : tensor<4xf32> vector.print %6 : f32 // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %7 = linalg.fill ins(%cst3 : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> + %7 = linalg.fill ins(%cst3 : f32) outs(%1 : tensor<4xf32>) return %2, %7 : tensor<4xf32>, tensor<4xf32> } diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir --- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir @@ -54,7 +54,7 @@ // CHECK: %[[clone:.*]] = bufferization.clone %[[alloc]] // CHECK: scf.for {{.*}} iter_args(%{{.*}} = %[[clone]]) %0 = scf.for %iv = %lb to %ub step %c1 iter_args(%1 = %A) -> tensor { - %r = linalg.fill ins(%cst : f32) outs(%1 : tensor) -> tensor + %r = linalg.fill ins(%cst : f32) outs(%1 : tensor) scf.yield %B : tensor } %1 = tensor.extract %0[%c1] : tensor @@ -547,7 +547,7 @@ // CHECK: %[[subview:.*]] = memref.subview %[[arg2]][5] [%[[idx]]] [1] %6 = tensor.extract_slice %o[5] [%idx] [%c1] : tensor to tensor // CHECK: linalg.fill ins(%{{.*}}) outs(%[[subview]] : memref) -> tensor + %8 = linalg.fill ins(%cst : f32) outs(%6 : tensor) // Self-copy will DCE away later. // CHECK: memref.copy %[[subview]], %[[subview]] @@ -594,7 +594,7 @@ %6 = tensor.extract_slice %o[5] [%idx] [%c1] : tensor to tensor // CHECK: linalg.fill ins(%{{.*}}) outs(%[[subview1]] : memref) -> tensor + %8 = linalg.fill ins(%cst : f32) outs(%6 : tensor) // Now the copy of the actual insert_slice. (It will fold away.) // CHECK: memref.copy %[[subview1]], %[[subview1]] @@ -637,7 +637,7 @@ %7 = tensor.extract_slice %o[%1, %4] [4, 4] [1, 1] : tensor<8x8xf32> to tensor<4x4xf32> // CHECK: linalg.matmul ins({{.*}}memref<4x8xf32, strided<[?, ?], offset: ?>>, memref<8x4xf32, strided<[?, ?], offset: ?>>) outs({{.*}} : memref<4x4xf32, strided<[?, ?], offset: ?>>) - %8 = linalg.matmul ins(%3, %6 : tensor<4x8xf32>, tensor<8x4xf32>) outs(%7 : tensor<4x4xf32>) -> tensor<4x4xf32> + %8 = linalg.matmul ins(%3, %6 : tensor<4x8xf32>, tensor<8x4xf32>) outs(%7 : tensor<4x4xf32>) scf.foreach_thread.perform_concurrently { tensor.parallel_insert_slice %8 into %o[%1, %4] [4, 4] [1, 1] : tensor<4x4xf32> into tensor<8x8xf32> } diff --git a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir @@ -124,7 +124,7 @@ %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSR> %D = linalg.matmul ins(%A, %B: tensor<8x2xf64, #CSR>, tensor<2x4xf64, #CSR>) - outs(%C: tensor<8x4xf64, #CSR>) -> tensor<8x4xf64, #CSR> + outs(%C: tensor<8x4xf64, #CSR>) return %D: tensor<8x4xf64, #CSR> } @@ -172,6 +172,6 @@ %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC> %D = linalg.matmul ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>) - outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> + outs(%C: tensor<8x4xf64, #CSC>) return %D: tensor<8x4xf64, #CSC> } diff --git a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir @@ -124,8 +124,8 @@ %0 = bufferization.alloc_tensor() : tensor<100x300xf64, #DCSR> %cst = arith.constant 0.000000e+00 : f64 %1 = linalg.fill ins(%cst : f64) - outs(%0 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR> + outs(%0 : tensor<100x300xf64, #DCSR>) %2 = linalg.matmul ins(%arg0, %arg1 : tensor<100x200xf64, #DCSR>, tensor<200x300xf64, #DCSR>) - outs(%1 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR> + outs(%1 : tensor<100x300xf64, #DCSR>) return %2 : tensor<100x300xf64, #DCSR> } diff --git a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir @@ -47,7 +47,7 @@ %c: tensor<10x30xf32>) -> tensor<10x30xf32> { %0 = linalg.matmul ins(%a, %b: tensor<10x20xf32, #DCSR>, tensor<20x30xf32>) - outs(%c: tensor<10x30xf32>) -> tensor<10x30xf32> + outs(%c: tensor<10x30xf32>) return %0 : tensor<10x30xf32> } @@ -144,7 +144,7 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR> %D = linalg.matmul ins(%A, %B: tensor<4x8xf64, #DCSR>, tensor<8x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> + outs(%C: tensor<4x4xf64, #DCSR>) return %D: tensor<4x4xf64, #DCSR> } @@ -193,7 +193,7 @@ %output: tensor<6x6xi32>) -> tensor<6x6xi32> { %0 = linalg.conv_2d ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>) - outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32> + outs (%output: tensor<6x6xi32>) return %0 : tensor<6x6xi32> } @@ -244,7 +244,7 @@ %c2 = arith.constant 2 : i32 %0 = linalg.quantized_matmul ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32) - outs(%output : tensor<5x6xi64>) -> tensor<5x6xi64> + outs(%output : tensor<5x6xi64>) return %0: tensor<5x6xi64> } @@ -306,6 +306,6 @@ %x: tensor) -> tensor { %dot = linalg.dot ins(%a, %b: tensor<1024xf32, #SparseVector>, tensor<1024xf32, #SparseVector>) - outs(%x: tensor) -> tensor + outs(%x: tensor) return %dot : tensor } diff --git a/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir b/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir @@ -157,6 +157,6 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR> %D = linalg.matmul ins(%A, %B: tensor<4x8xf64, #CSR>, tensor<8x4xf64, #CSR>) - outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> + outs(%C: tensor<4x4xf64, #CSR>) return %D: tensor<4x4xf64, #CSR> } diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir @@ -71,7 +71,7 @@ /// Overwrite A inplace. // CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]] - %r1 = linalg.fill ins(%f0 : f32) outs(%r0 : tensor) -> tensor + %r1 = linalg.fill ins(%f0 : f32) outs(%r0 : tensor) // CHECK: return // CHECK-NOT: tensor @@ -91,7 +91,7 @@ %f0 = arith.constant 0.0 : f32 // CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]] - %r0 = linalg.fill ins(%f0 : f32) outs(%A : tensor) -> tensor + %r0 = linalg.fill ins(%f0 : f32) outs(%A : tensor) // CHECK-NOT: alloc // CHECK: %[[SV_A:.*]] = memref.subview %[[A]] @@ -255,7 +255,7 @@ // CHECK: memref.alloc %cst = arith.constant 4.200000e+01 : f32 // CHECK: linalg.fill - %0 = linalg.fill ins(%cst : f32) outs(%t : tensor<10xf32>) -> tensor<10xf32> + %0 = linalg.fill ins(%cst : f32) outs(%t : tensor<10xf32>) // CHECK: memref.copy %1 = tensor.insert_slice %0 into %t[0][10][1] : tensor<10xf32> into tensor<10xf32> return %1 : tensor<10xf32> @@ -298,7 +298,7 @@ %c0 = arith.constant 0 : index // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10xf32> // CHECK: linalg.fill {{.*}} outs(%[[alloc]] : memref<10xf32>) - %1 = linalg.fill ins(%cst : f32) outs(%t : tensor<10xf32>) -> tensor<10xf32> + %1 = linalg.fill ins(%cst : f32) outs(%t : tensor<10xf32>) // Read %1 so that it does not DCE away. %vec = vector.transfer_read %1[%c0], %cst : tensor<10xf32>, vector<10xf32> @@ -319,7 +319,7 @@ %cst = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index // CHECK: linalg.fill {{.*}} outs(%[[t]] : memref<10xf32,{{.*}}>) - %1 = linalg.fill ins(%cst : f32) outs(%t : tensor<10xf32>) -> tensor<10xf32> + %1 = linalg.fill ins(%cst : f32) outs(%t : tensor<10xf32>) // Read %1 so that it does not DCE away. %vec = vector.transfer_read %1[%c0], %cst : tensor<10xf32>, vector<10xf32> diff --git a/mlir/test/Dialect/Transform/selective-targeting.mlir b/mlir/test/Dialect/Transform/selective-targeting.mlir --- a/mlir/test/Dialect/Transform/selective-targeting.mlir +++ b/mlir/test/Dialect/Transform/selective-targeting.mlir @@ -9,11 +9,10 @@ // CHECK-COUNT-3: scf.for // CHECK-COUNT-3: tensor.extract_slice // CHECK: linalg.matmul - // CHECK-SAME: -> tensor<4x4xf32> + // CHECK-SAME: tensor<4x4xf32> %0 = linalg.matmul { test.attrA } ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) outs(%arg2: tensor<128x128xf32>) - -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -31,7 +30,6 @@ %0 = linalg.matmul { test.attrA, test.attrC } ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) outs(%arg2: tensor<128x128xf32>) - -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -48,7 +46,6 @@ %0 = linalg.matmul { test.attrC } ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) outs(%arg2: tensor<128x128xf32>) - -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -95,7 +92,6 @@ %0 = linalg.matmul {test.attrA} ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) outs(%arg2: tensor<128x128xf32>) - -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -106,7 +102,6 @@ // CHECK: linalg.matmul %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) outs(%arg2: tensor<128x128xf32>) - -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -140,11 +135,9 @@ %0 = linalg.matmul {test.attrA} ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) outs(%arg2: tensor<128x128xf32>) - -> tensor<128x128xf32> // CHECK: vector.contract %1 = linalg.matmul ins(%arg0, %0: tensor<128x128xf32>, tensor<128x128xf32>) outs(%arg3: tensor<128x128xf32>) - -> tensor<128x128xf32> return %1 : tensor<128x128xf32> } diff --git a/mlir/test/Dialect/Vector/transform-vector.mlir b/mlir/test/Dialect/Vector/transform-vector.mlir --- a/mlir/test/Dialect/Vector/transform-vector.mlir +++ b/mlir/test/Dialect/Vector/transform-vector.mlir @@ -9,7 +9,6 @@ // CHECK: vector.store {{.*}} : memref<8x32xf32>, vector<4xf32> %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x16xf32>, tensor<16x32xf32>) outs(%arg2: tensor<8x32xf32>) - -> tensor<8x32xf32> return %0 : tensor<8x32xf32> } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir @@ -14,7 +14,7 @@ %cst = arith.constant 0.000000e+00 : f32 %c2 = arith.constant 2 : index %c0 = arith.constant 0 : index - %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor) -> tensor + %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor) %1 = affine.apply #map0(%c0, %c64)[%c2] %2 = bufferization.alloc_tensor(%1) : tensor %3 = scf.for %arg3 = %c0 to %c64 step %c2 iter_args(%arg4 = %2) -> (tensor) { @@ -61,7 +61,7 @@ %13 = tensor.extract_slice %6[%12, 0] [1, 2] [1, 1] : tensor to tensor<2xf32> %14 = affine.apply #map1(%arg3, %c0)[%c2] %15 = tensor.extract_slice %3[%14, 0] [1, 2] [1, 1] : tensor to tensor<2xf32> - %16 = linalg.dot ins(%13, %15 : tensor<2xf32>, tensor<2xf32>) outs(%arg4 : tensor) -> tensor + %16 = linalg.dot ins(%13, %15 : tensor<2xf32>, tensor<2xf32>) outs(%arg4 : tensor) // %AA = tensor.cast %13 : tensor<2xf32> to tensor<*xf32> // call @printMemrefF32(%AA) : (tensor<*xf32>) -> () @@ -83,9 +83,9 @@ %A = bufferization.alloc_tensor() : tensor<64xf32> %B = bufferization.alloc_tensor() : tensor<64xf32> %C = bufferization.alloc_tensor() : tensor - %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32> - %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32> - %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor) -> tensor + %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) + %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) + %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor) %res = call @init_and_dot(%AA, %BB, %CC) : (tensor<64xf32>, tensor<64xf32>, tensor) -> tensor diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir @@ -23,7 +23,7 @@ %C = arith.constant dense<1000.0> : tensor<2x4xf32> %D = linalg.matmul ins(%A, %B: tensor<2x3xf32>, tensor<3x4xf32>) - outs(%C: tensor<2x4xf32>) -> tensor<2x4xf32> + outs(%C: tensor<2x4xf32>) %unranked = tensor.cast %D : tensor<2x4xf32> to tensor<*xf32> call @printMemrefF32(%unranked) : (tensor<*xf32>) -> () diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir @@ -27,7 +27,7 @@ // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> tensor { %buf = bufferization.alloc_tensor(%s1, %s2, %s3) : tensor - %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) -> tensor + %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) return %ret : tensor } @@ -35,7 +35,7 @@ %ret = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%arg0, %arg1: tensor, tensor) - outs (%arg2: tensor) -> tensor + outs (%arg2: tensor) return %ret : tensor } @@ -47,7 +47,7 @@ %ret = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%arg0, %arg1: tensor, tensor) - outs (%s: tensor) -> tensor + outs (%s: tensor) return %ret : tensor } @@ -59,7 +59,7 @@ %ret = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%arg0, %arg1: tensor, tensor) - outs (%s: tensor) -> tensor + outs (%s: tensor) return %ret : tensor } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir @@ -30,7 +30,7 @@ %output: tensor<6x6xi32>) -> tensor<6x6xi32> { %0 = linalg.conv_2d ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>) - outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32> + outs (%output: tensor<6x6xi32>) return %0 : tensor<6x6xi32> } @@ -39,7 +39,7 @@ %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR> %0 = linalg.conv_2d ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>) - outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> + outs (%s: tensor<6x6xi32, #DCSR>) return %0 : tensor<6x6xi32, #DCSR> } @@ -48,7 +48,7 @@ %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR> %0 = linalg.conv_2d ins (%input, %filter: tensor<8x8xi32, #DCSR>, tensor<3x3xi32, #DCSR>) - outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> + outs (%s: tensor<6x6xi32, #DCSR>) return %0 : tensor<6x6xi32, #DCSR> } @@ -57,7 +57,7 @@ %s = bufferization.alloc_tensor() : tensor<6x6xi32, #CSR> %0 = linalg.conv_2d ins (%input, %filter: tensor<8x8xi32, #CSR>, tensor<3x3xi32, #CSR>) - outs (%s: tensor<6x6xi32, #CSR>) -> tensor<6x6xi32, #CSR> + outs (%s: tensor<6x6xi32, #CSR>) return %0 : tensor<6x6xi32, #CSR> } @@ -66,7 +66,7 @@ %s = bufferization.alloc_tensor() : tensor<6x6xi32, #CSC> %0 = linalg.conv_2d ins (%input, %filter: tensor<8x8xi32, #CSC>, tensor<3x3xi32, #CSC>) - outs (%s: tensor<6x6xi32, #CSC>) -> tensor<6x6xi32, #CSC> + outs (%s: tensor<6x6xi32, #CSC>) return %0 : tensor<6x6xi32, #CSC> } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir @@ -26,7 +26,7 @@ // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f func.func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> tensor { %buf = bufferization.alloc_tensor(%s1, %s2, %s3, %s4) : tensor - %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) -> tensor + %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) return %ret : tensor } @@ -34,7 +34,7 @@ %ret = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%arg0, %arg1: tensor, tensor) - outs (%arg2: tensor) -> tensor + outs (%arg2: tensor) return %ret : tensor } @@ -46,7 +46,7 @@ %ret = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%arg0, %arg1: tensor, tensor) - outs (%s: tensor) -> tensor + outs (%s: tensor) return %ret : tensor } @@ -58,7 +58,7 @@ %ret = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%arg0, %arg1: tensor, tensor) - outs (%s: tensor) -> tensor + outs (%s: tensor) return %ret : tensor } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir @@ -26,14 +26,14 @@ // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> tensor { %buf = bufferization.alloc_tensor(%s1, %s2, %s3) : tensor - %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) -> tensor + %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) return %ret : tensor } func.func @conv_3d(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { %ret = linalg.conv_3d ins (%arg0, %arg1: tensor, tensor) - outs (%arg2: tensor) -> tensor + outs (%arg2: tensor) return %ret : tensor } @@ -42,7 +42,7 @@ %s = bufferization.alloc_tensor(%c6, %c6, %c6) : tensor %ret = linalg.conv_3d ins (%arg0, %arg1: tensor, tensor) - outs (%s: tensor) -> tensor + outs (%s: tensor) return %ret : tensor } @@ -51,7 +51,7 @@ %s = bufferization.alloc_tensor(%c6, %c6, %c6) : tensor %ret = linalg.conv_3d ins (%arg0, %arg1: tensor, tensor) - outs (%s: tensor) -> tensor + outs (%s: tensor) return %ret : tensor } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir @@ -26,7 +26,7 @@ // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f func.func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> tensor { %buf = bufferization.alloc_tensor(%s1, %s2, %s3, %s4, %s5) : tensor - %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) -> tensor + %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) return %ret : tensor } @@ -36,7 +36,7 @@ %ret = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%arg0, %arg1: tensor, tensor) - outs (%arg2: tensor) -> tensor + outs (%arg2: tensor) return %ret : tensor } @@ -50,7 +50,7 @@ %ret = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%arg0, %arg1: tensor, tensor) - outs (%s: tensor) -> tensor + outs (%s: tensor) return %ret : tensor } @@ -64,7 +64,7 @@ %ret = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%arg0, %arg1: tensor, tensor) - outs (%s: tensor) -> tensor + outs (%s: tensor) return %ret : tensor } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir @@ -27,7 +27,7 @@ %x: tensor) -> tensor { %dot = linalg.dot ins(%a, %b: tensor<1024xf32, #SparseVector>, tensor<1024xf32, #SparseVector>) - outs(%x: tensor) -> tensor + outs(%x: tensor) return %dot : tensor } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir @@ -33,7 +33,7 @@ %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC> %D = linalg.matmul ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>) - outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> + outs(%C: tensor<8x4xf64, #CSC>) return %D: tensor<8x4xf64, #CSC> } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir @@ -25,7 +25,7 @@ %output: tensor<6x6xi32>) -> tensor<6x6xi32> { %0 = linalg.conv_2d ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>) - outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32> + outs (%output: tensor<6x6xi32>) return %0 : tensor<6x6xi32> } @@ -34,7 +34,7 @@ %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR> %0 = linalg.conv_2d ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>) - outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> + outs (%s: tensor<6x6xi32, #DCSR>) return %0 : tensor<6x6xi32, #DCSR> } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir @@ -44,7 +44,7 @@ %C: tensor<4x4xf64>) -> tensor<4x4xf64> { %D = linalg.matmul ins(%A, %B: tensor<4x8xf64>, tensor<8x4xf64>) - outs(%C: tensor<4x4xf64>) -> tensor<4x4xf64> + outs(%C: tensor<4x4xf64>) return %D: tensor<4x4xf64> } @@ -56,7 +56,7 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR> %D = linalg.matmul ins(%A, %B: tensor<4x8xf64, #CSR>, tensor<8x4xf64, #CSR>) - outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> + outs(%C: tensor<4x4xf64, #CSR>) return %D: tensor<4x4xf64, #CSR> } @@ -68,7 +68,7 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR> %D = linalg.matmul ins(%A, %B: tensor<4x8xf64, #DCSR>, tensor<8x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> + outs(%C: tensor<4x4xf64, #DCSR>) return %D: tensor<4x4xf64, #DCSR> } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir @@ -30,7 +30,7 @@ %c2 = arith.constant 2 : i32 %0 = linalg.quantized_matmul ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32) - outs(%output : tensor<5x6xi32>) -> tensor<5x6xi32> + outs(%output : tensor<5x6xi32>) return %0: tensor<5x6xi32> } diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir --- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir @@ -7,10 +7,10 @@ %d0 = tensor.dim %arg0, %c0 : tensor %d1 = tensor.dim %arg1, %c1 : tensor %init = tensor.empty(%d0, %d1) : tensor - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor) %gemm = linalg.matmul {__internal_linalg_transform__ = "fusion"} ins(%arg0, %arg1 : tensor, tensor) - outs(%fill : tensor) -> tensor + outs(%fill : tensor) return %gemm : tensor } // CHECK: func.func @gemm_fill_fusion( @@ -42,10 +42,10 @@ %d0 = tensor.dim %arg0, %c0 : tensor %d1 = tensor.dim %arg1, %c1 : tensor %init = tensor.empty(%d0, %d1) : tensor - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor) %gemm = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%fill : tensor) -> tensor + outs(%fill : tensor) %generic = linalg.generic { __internal_linalg_transform__ = "fusion", indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], @@ -91,14 +91,14 @@ %d0 = tensor.dim %lhs0, %c0 : tensor %d1 = tensor.dim %rhs0, %c1 : tensor %init0 = tensor.empty(%d0, %d1) : tensor - %fill0 = linalg.fill ins(%cst : f32) outs(%init0 : tensor) -> tensor + %fill0 = linalg.fill ins(%cst : f32) outs(%init0 : tensor) %gemm0 = linalg.matmul - ins(%lhs0, %rhs0 : tensor, tensor) outs(%fill0 : tensor) -> tensor + ins(%lhs0, %rhs0 : tensor, tensor) outs(%fill0 : tensor) %d2 = tensor.dim %rhs1, %c1 : tensor %init1 = tensor.empty(%d0, %d2) : tensor - %fill1 = linalg.fill ins(%cst : f32) outs(%init1 : tensor) -> tensor + %fill1 = linalg.fill ins(%cst : f32) outs(%init1 : tensor) %gemm1 = linalg.matmul {__internal_linalg_transform__ = "gemm_fusion"} - ins(%gemm0, %rhs1 : tensor, tensor) outs(%fill1 : tensor) -> tensor + ins(%gemm0, %rhs1 : tensor, tensor) outs(%fill1 : tensor) return %gemm1 : tensor } // CHECK: func.func @gemm_gemm_fusion( @@ -141,10 +141,10 @@ %d0 = tensor.dim %arg0, %c0 : tensor %d1 = tensor.dim %arg1, %c1 : tensor %init0 = tensor.empty(%d0, %d1) : tensor - %fill = linalg.fill ins(%cst : f32) outs(%init0 : tensor) -> tensor + %fill = linalg.fill ins(%cst : f32) outs(%init0 : tensor) %gemm = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%fill : tensor) -> tensor + outs(%fill : tensor) %init1 = tensor.empty(%d1, %d0) : tensor %transpose = linalg.generic { __internal_linalg_transform__ = "fusion", @@ -193,10 +193,10 @@ %d1 = tensor.dim %arg1, %c1 : tensor %cst = arith.constant 0.0 : f32 %0 = tensor.empty(%d0, %d1) : tensor - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) %2 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%1 : tensor) -> tensor + outs(%1 : tensor) %3 = linalg.generic { __internal_linalg_transform__ = "gemm_interchange_fusion", indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], @@ -240,7 +240,7 @@ %0 = tensor.dim %arg2, %c0 : tensor %1 = tensor.dim %arg2, %c1 : tensor %2 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + outs(%arg2 : tensor) %3 = tensor.dim %2, %c0 : tensor %4 = tensor.dim %2, %c1 : tensor %5 = tensor.empty(%3, %4) : tensor @@ -293,7 +293,7 @@ %0 = tensor.dim %arg2, %c0 : tensor %1 = tensor.dim %arg2, %c1 : tensor %2 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + outs(%arg2 : tensor) %3 = tensor.dim %2, %c0 : tensor %4 = tensor.dim %2, %c1 : tensor %5 = tensor.empty(%3, %4) : tensor @@ -348,13 +348,13 @@ %arg2: tensor, %arg3: tensor, %arg4: tensor, %arg5: tensor, %arg6: tensor) -> tensor { %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor // [M, N0] * [N0, N1] + outs(%arg2 : tensor) %1 = linalg.matmul ins(%0, %arg3 : tensor, tensor) - outs(%arg4 : tensor) -> tensor // [M, N1] * [N1, N2] + outs(%arg4 : tensor) %2 = linalg.matmul {__internal_linalg_transform__ = "gemm_sequence_fusion"} ins(%1, %arg5 : tensor, tensor) - outs(%arg6 : tensor) -> tensor // [M, N2] * [N2, N3] + outs(%arg6 : tensor) return %2 : tensor } @@ -402,7 +402,7 @@ %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 0xFF800000 : f32 %0 = tensor.empty() : tensor<30xf32> - %1 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<30xf32>) -> tensor<30xf32> + %1 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<30xf32>) %2 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} @@ -412,7 +412,7 @@ linalg.yield %8 : f32 } -> tensor<30xf32> %3 = tensor.empty() : tensor<30x3xf32> - %4 = linalg.fill ins(%cst : f32) outs(%0 : tensor<30xf32>) -> tensor<30xf32> + %4 = linalg.fill ins(%cst : f32) outs(%0 : tensor<30xf32>) %5:2 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>], diff --git a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir --- a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir @@ -4,7 +4,7 @@ %arg2 : tensor) -> tensor { %0 = linalg.matmul {__internal_linalg_transform__ = "simple_gemm"} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + outs(%arg2 : tensor) return %0 : tensor } // CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> @@ -141,7 +141,7 @@ dilation = dense<[4, 5]> : tensor<2xi64>, __internal_linalg_transform__ = "simple_conv"} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + outs(%arg2 : tensor) return %0 : tensor } // CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> @@ -231,7 +231,7 @@ %arg2 : tensor) -> tensor { %0 = linalg.matmul {__internal_linalg_transform__ = "gemm_interchange"} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + outs(%arg2 : tensor) return %0 : tensor } // CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> diff --git a/mlir/test/python/dialects/linalg/ops.py b/mlir/test/python/dialects/linalg/ops.py --- a/mlir/test/python/dialects/linalg/ops.py +++ b/mlir/test/python/dialects/linalg/ops.py @@ -21,7 +21,7 @@ # CHECK-LABEL: func @fill_tensor # CHECK-SAME: %[[OUT:[0-9a-z]+]]: tensor<12x?xf32> # CHECK-NEXT: %[[CST:.*]] = arith.constant 0.0{{.*}} : f32 - # CHECK-NEXT: %[[RES:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[OUT]] : tensor<12x?xf32>) -> tensor<12x?xf32> + # CHECK-NEXT: %[[RES:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[OUT]] : tensor<12x?xf32>) # CHECK-NEXT: return %[[RES]] : tensor<12x?xf32> @func.FuncOp.from_py_func( RankedTensorType.get((12, ShapedType.get_dynamic_size()), f32))