diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h @@ -72,7 +72,7 @@ /// /// Examples: /// -/// 1. linalg.fill(%A, %f) : memref, f32 +/// 1. linalg.fill(%f, %A) : f32, memref /// name mangles into `linalg_fill_viewf32_f32_impl` /// /// 2. linalg.dot %A, %B, %C : diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -211,9 +211,9 @@ }]; let assemblyFormat = [{ - `(` $output `,` $value `)` attr-dict `:` - type($output) `,` type($value) (`->` type($result)^)? - custom($region, ref(type($output)), ref(type($value))) + `(` $value `,` $output `)` attr-dict `:` + type($value) `,` type($output) (`->` type($result)^)? + custom($region, ref(type($value)), ref(type($output))) }]; let builders = [ diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -435,8 +435,8 @@ TypeRange{output.getType()}, {}); } -ParseResult parseFillOpRegion(OpAsmParser &parser, Region &r, Type outputType, - Type valueType) { +ParseResult parseFillOpRegion(OpAsmParser &parser, Region &r, Type valueType, + Type outputType) { OpBuilder opBuilder(parser.getBuilder().getContext()); fillStructuredOpRegion(opBuilder, r, TypeRange{valueType}, TypeRange{outputType}); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -648,7 +648,7 @@ /// parallel loops and appear in the result of the map /// /// Example 1: -/// linalg.fill(%c, %cst) +/// linalg.fill(%cst, %c) /// linalg.matmul ins(%a, %b) outs(%c) /// Number of parallel loops : 2 /// producerIndexMap = affine_map<(i, j) ->(i , j)> diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -2413,7 +2413,7 @@ /// memref.cast %A: memref to compatibleMemRefType /// scf.yield %view, ... : compatibleMemRefType, index, index /// } else { -/// %2 = linalg.fill(%alloc, %pad) +/// %2 = linalg.fill(%pad, %alloc) /// %3 = subview %view [...][...][...] /// linalg.copy(%3, %alloc) /// memref.cast %alloc: memref to compatibleMemRefType diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -544,7 +544,7 @@ func @reduce_float(%arg0: tensor<5x4xf32>) -> () { // CHECK: [[INIT:%.+]] = linalg.init_tensor [4] // CHECK: [[CST0:%.+]] = constant 0.0 - // CHECK: [[FILL:%.+]] = linalg.fill([[INIT]], [[CST0]]) + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<4xf32>) // CHECK: ^bb0(%arg1: f32, %arg2: f32) // CHECK: [[RES:%.+]] = addf %arg1, %arg2 : f32 @@ -554,7 +554,7 @@ // CHECK: [[INIT:%.+]] = linalg.init_tensor [5] // CHECK: [[CST0:%.+]] = constant 0.0 - // CHECK: [[FILL:%.+]] = linalg.fill([[INIT]], [[CST0]]) + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<5xf32>) // CHECK: ^bb0(%arg1: f32, %arg2: f32) // CHECK: [[RES:%.+]] = addf %arg1, %arg2 : f32 @@ -595,7 +595,7 @@ func @reduce_int(%arg0: tensor<5x4xi32>) -> () { // CHECK: [[INIT:%.+]] = linalg.init_tensor [4] // CHECK: [[CST0:%.+]] = constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[INIT]], [[CST0]]) + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<4xi32>) // CHECK: ^bb0(%arg1: i32, %arg2: i32) // CHECK: [[RES:%.+]] = addi %arg1, %arg2 : i32 @@ -605,7 +605,7 @@ // CHECK: [[INIT:%.+]] = linalg.init_tensor [5] // CHECK: [[CST0:%.+]] = constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[INIT]], [[CST0]]) + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<5xi32>) // CHECK: ^bb0(%arg1: i32, %arg2: i32) // CHECK: [[RES:%.+]] = addi %arg1, %arg2 : i32 @@ -645,7 +645,7 @@ func @reduce_bool(%arg0: tensor<5x4xi1>) -> () { // CHECK: [[INIT:%.+]] = linalg.init_tensor [4] // CHECK: [[CST0:%.+]] = constant true - // CHECK: [[FILL:%.+]] = linalg.fill([[INIT]], [[CST0]]) + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi1>) outs([[FILL]] : tensor<4xi1>) // CHECK: ^bb0(%arg1: i1, %arg2: i1) // CHECK: [[RES:%.+]] = and %arg1, %arg2 : i1 @@ -677,7 +677,7 @@ // CHECK: [[RESULT_AXIS:%.+]] = addi [[ARG0_DIM0]], [[ARG1_AXIS]] // CHECK: [[INIT:%.+]] = linalg.init_tensor [11, 1] // CHECK: [[CST:%.+]] = constant 0.0 - // CHECK: [[FILL:%.+]] = linalg.fill([[INIT]], [[CST]]) + // CHECK: [[FILL:%.+]] = linalg.fill([[CST]], [[INIT]]) // CHECK: [[ARG0_DIM0:%.+]] = memref.dim %arg0, [[AXIS]] // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]] // CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM0]] @@ -696,7 +696,7 @@ // CHECK: [[RESULT_AXIS:%.+]] = addi [[ARG0_DIM1]], [[ARG1_AXIS]] // CHECK: [[INIT:%.+]] = linalg.init_tensor [5, 2] // CHECK: [[CST:%.+]] = constant 0.0 - // CHECK: [[FILL:%.+]] = linalg.fill([[INIT]], [[CST]]) + // CHECK: [[FILL:%.+]] = linalg.fill([[CST]], [[INIT]]) // CHECK: [[ARG0_DIM1:%.+]] = memref.dim %arg0, [[AXIS]] // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]] // CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM1]] @@ -847,7 +847,7 @@ func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>, %arg2: tensor<1x6xf32>) -> (tensor<1x5x6xf32>) { // CHECK: [[C0:%.+]] = constant 0 // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6] - // CHECK: [[FILLED:%.+]] = linalg.fill([[INIT]], [[C0]]) : tensor<1x5x6xf32>, f32 -> tensor<1x5x6xf32> + // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32> // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) return %0 : tensor<1x5x6xf32> @@ -928,10 +928,10 @@ func @argmax(%arg0 : tensor<3x2xi32>, %arg1 : tensor<6xf32>) -> () { // CHECK: [[IDX_INIT:%.+]] = linalg.init_tensor [2] // CHECK: [[IDX_MIN:%.+]] = constant 0 : i32 - // CHECK: [[IDX_FILL:%.+]] = linalg.fill([[IDX_INIT]], [[IDX_MIN]]) + // CHECK: [[IDX_FILL:%.+]] = linalg.fill([[IDX_MIN]], [[IDX_INIT]]) // CHECK: [[VAL_INIT:%.+]] = linalg.init_tensor [2] // CHECK: [[VAL_MIN:%.+]] = constant -2147483648 - // CHECK: [[VAL_FILL:%.+]] = linalg.fill([[VAL_INIT]], [[VAL_MIN]]) + // CHECK: [[VAL_FILL:%.+]] = linalg.fill([[VAL_MIN]], [[VAL_INIT]]) // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%arg0 : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>) // CHECK: [[IDX:%.+]] = linalg.index 0 // CHECK: [[CAST:%.+]] = index_cast [[IDX]] @@ -943,10 +943,10 @@ // CHECK: [[IDX_INIT:%.+]] = linalg.init_tensor [3] // CHECK: [[IDX_MIN:%.+]] = constant 0 : i32 - // CHECK: [[IDX_FILL:%.+]] = linalg.fill([[IDX_INIT]], [[IDX_MIN]]) + // CHECK: [[IDX_FILL:%.+]] = linalg.fill([[IDX_MIN]], [[IDX_INIT]]) // CHECK: [[VAL_INIT:%.+]] = linalg.init_tensor [3] // CHECK: [[VAL_MIN:%.+]] = constant -2147483648 - // CHECK: [[VAL_FILL:%.+]] = linalg.fill([[VAL_INIT]], [[VAL_MIN]]) + // CHECK: [[VAL_FILL:%.+]] = linalg.fill([[VAL_MIN]], [[VAL_INIT]]) // CHECK: linalg.generic {indexing_maps = [#map0, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>) // CHECK: [[IDX:%.+]] = linalg.index 1 // CHECK: [[CAST:%.+]] = index_cast [[IDX]] @@ -1047,7 +1047,7 @@ func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () { // CHECK-DAG: [[CONST:%.+]] = constant -3.40282347E+38 // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 32, 62] - // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INIT]], [[CONST]]) + // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[CONST]], [[INIT]]) // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3] // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>) %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x32x62xf32>) @@ -1061,7 +1061,7 @@ // CHECK-DAG: linalg.yield [[CONST]] // CHECK-DAG: [[INITVAL:%.+]] = constant -3.40282347E+38 : f32 // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62] - // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INIT]], [[INITVAL]]) + // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INITVAL]], [[INIT]]) // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3] // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>) %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 1], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x33x62xf32>) @@ -1100,7 +1100,7 @@ // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] // CHECK: [[CONST:%.+]] = constant 0 // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] - // CHECK: [[FILL:%.+]] = linalg.fill([[INIT]], [[CONST]]) + // CHECK: [[FILL:%.+]] = linalg.fill([[CONST]], [[INIT]]) // CHECK: [[KERNEL:%.+]] = linalg.init_tensor [4, 4] // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>) // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs([[POOL]] : tensor<1x5x33x62xf32>) diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir --- a/mlir/test/Dialect/Linalg/bufferize.mlir +++ b/mlir/test/Dialect/Linalg/bufferize.mlir @@ -244,10 +244,10 @@ func @bufferize_fill(%arg0: tensor) -> tensor { %c0 = constant 0.0 : f32 // CHECK: %[[MEMREF:.*]] = memref.buffer_cast %[[IN]] : memref - // CHECK: linalg.fill(%[[MEMREF]], %cst) : memref, f32 + // CHECK: linalg.fill(%cst, %[[MEMREF]]) : f32, memref // CHECK: %[[TENSOR:.*]] = memref.tensor_load %[[MEMREF]] : memref // CHECK: return %[[TENSOR]] - %0 = linalg.fill(%arg0, %c0) : tensor, f32 -> tensor + %0 = linalg.fill(%c0, %arg0) : f32, tensor -> tensor return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -35,7 +35,7 @@ %14 = affine.min affine_map<(d0) -> (-d0 + 192, 24)>(%arg3) %16 = memref.subview %out[%arg3] [%14] [1] : memref<192xf32, #map> to memref - linalg.fill(%16, %cst) : memref, f32 + linalg.fill(%cst, %16) : f32, memref linalg.yield } return @@ -684,7 +684,7 @@ %c0_i32 = constant 0 : i32 %c0 = constant 0 : index %cst = constant 0.000000e+00 : f32 - %0 = linalg.fill(%arg0, %c0_i32) : tensor<7x7xi32>, i32 -> tensor<7x7xi32> + %0 = linalg.fill(%c0_i32, %arg0) : i32, tensor<7x7xi32> -> tensor<7x7xi32> %1 = linalg.matmul ins(%arg1, %arg1: tensor<7x7xf32>, tensor<7x7xf32>) outs(%arg1: tensor<7x7xf32>) -> tensor<7x7xf32> %2 = linalg.generic #trait outs(%arg0 : tensor<7x7xi32>) { @@ -720,7 +720,7 @@ %c21 = constant 21 : index %c42 = constant 42 : index %0 = linalg.init_tensor [%c21, %c42] : tensor - %1 = linalg.fill(%0, %arg1) : tensor, f32 -> tensor + %1 = linalg.fill(%arg1, %0) : f32, tensor -> tensor %2 = memref.dim %arg0, %c0 : tensor %3 = memref.dim %arg0, %c1 : tensor %4 = tensor.insert_slice %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor into tensor @@ -728,7 +728,7 @@ } // CHECK-LABEL: func @propogate_casts // CHECK: %[[INIT:.+]] = linalg.init_tensor [21, 42] -// CHECK: %[[FILL:.+]] = linalg.fill(%[[INIT]], %{{.+}}) +// CHECK: %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]]) // CHECK: %[[INSERTED:.+]] = tensor.insert_slice %{{.+}} into %[[FILL]] // CHECK: %[[RESULT:.+]] = tensor.cast %[[INSERTED]] // CHECK: return %[[RESULT]] @@ -752,8 +752,8 @@ %zero = constant 0.0 : f32 // CHECK: %[[INIT:.+]] = linalg.init_tensor [6, 4] : tensor<6x4xf32> %init = linalg.init_tensor [1, 2, 3, 4] : tensor<1x2x3x4xf32> - // CHECK: %[[FILL:.+]] = linalg.fill(%[[INIT]], %cst) : tensor<6x4xf32>, f32 -> tensor<6x4xf32> - %fill = linalg.fill(%init, %zero) : tensor<1x2x3x4xf32>, f32 -> tensor<1x2x3x4xf32> + // CHECK: %[[FILL:.+]] = linalg.fill(%cst, %[[INIT]]) : f32, tensor<6x4xf32> -> tensor<6x4xf32> + %fill = linalg.fill(%zero, %init) : f32, tensor<1x2x3x4xf32> -> tensor<1x2x3x4xf32> %reshape = linalg.tensor_collapse_shape %fill [[0, 1, 2], [3]] : tensor<1x2x3x4xf32> into tensor<6x4xf32> // CHECK: return %[[FILL]] : tensor<6x4xf32> @@ -767,8 +767,8 @@ func @fold_fill_reshape_dynamic(%arg0 : tensor) -> tensor { %zero = constant 0.0 : f32 // CHECK: %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %[[ARG0]] - %0 = linalg.fill(%arg0, %zero) : tensor, f32 -> tensor - // CHECK: %[[RESULT:.+]] = linalg.fill(%[[RESHAPE]], %{{.+}}) + %0 = linalg.fill(%zero, %arg0) : f32, tensor -> tensor + // CHECK: %[[RESULT:.+]] = linalg.fill(%{{.+}}, %[[RESHAPE]]) %1 = linalg.tensor_collapse_shape %0 [[0, 1, 2], [3, 4]] : tensor into tensor // CHECK: return %[[RESULT]] diff --git a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir --- a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir @@ -355,7 +355,7 @@ // CHECK-SAME: {__inplace_results_attr__ = ["false"]} %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - %FA = linalg.fill(%ssA, %f0) : tensor<4x4xf32>, f32 -> tensor<4x4xf32> + %FA = linalg.fill(%f0, %ssA) : f32, tensor<4x4xf32> -> tensor<4x4xf32> %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor into tensor @@ -382,7 +382,7 @@ %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor to tensor<4x?xf32> %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32> - %FB = linalg.fill(%sssB, %f0) : tensor<4x4xf32>, f32 -> tensor<4x4xf32> + %FB = linalg.fill(%f0, %sssB) : f32, tensor<4x4xf32> -> tensor<4x4xf32> %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32> %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor into tensor @@ -405,7 +405,7 @@ // CHECK-SAME: {__inplace_results_attr__ = ["true"]} %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssC = tensor.extract_slice %sC[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - %FC = linalg.fill(%ssC, %f0) : tensor<4x4xf32>, f32 -> tensor<4x4xf32> + %FC = linalg.fill(%f0, %ssC) : f32, tensor<4x4xf32> -> tensor<4x4xf32> %rsC = tensor.insert_slice %FC into %sC[0, 0][12345, 67890][1, 1] : tensor<4x4xf32> into tensor %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor into tensor diff --git a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir --- a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir @@ -12,8 +12,8 @@ /// Inplaceable, no alloc // CHECK-NOT: alloc - // CHECK: linalg.fill(%[[I]], %[[F0]]) : memref, f32 - %r = linalg.fill(%A, %f0) : tensor, f32 -> tensor + // CHECK: linalg.fill(%[[F0]], %[[I]]) : f32, memref + %r = linalg.fill(%f0, %A) : f32, tensor -> tensor // CHECK: %[[R:.*]] = memref.tensor_load %[[I]] : memref // CHECK: return %[[R]] : tensor @@ -37,8 +37,8 @@ // CHECK: %[[F0:.*]] = constant 0.000000e+00 : f32 %f0 = constant 0.0 : f32 - // CHECK: linalg.fill(%[[I2]], %[[F0]]) : memref, f32 - %r = linalg.fill(%A, %f0) : tensor, f32 -> tensor + // CHECK: linalg.fill(%[[F0]], %[[I2]]) : f32, memref + %r = linalg.fill(%f0, %A) : f32, tensor -> tensor // CHECK: dealloc %[[ALLOC]] : memref // CHECK: %[[R:.*]] = memref.tensor_load %[[I2]] : memref @@ -58,8 +58,8 @@ /// Cross-op multiple uses of %A, the first op which has interfering reads must alloc. // CHECK: %[[ALLOC:.*]] = memref.alloc // CHECK: %[[CAST:.*]] = memref.cast %[[ALLOC]] - // CHECK: linalg.fill(%[[CAST]] - %f = linalg.fill(%A, %f0) : tensor, f32 -> tensor + // CHECK: linalg.fill({{.*}}, %[[CAST]] + %f = linalg.fill(%f0, %A) : f32, tensor -> tensor /// The second op has no interfering reads and can reuse. // CHECK-NOT: alloc @@ -175,8 +175,8 @@ %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor /// Overwrite BUFFER_CAST_A inplace. - // CHECK: linalg.fill(%[[BUFFER_CAST_A]] - %r1 = linalg.fill(%r0, %f0) : tensor, f32 -> tensor + // CHECK: linalg.fill({{.*}}, %[[BUFFER_CAST_A]] + %r1 = linalg.fill(%f0, %r0) : f32, tensor -> tensor return %r1: tensor } @@ -191,8 +191,8 @@ // CHECK: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref, f32 -> tensor + // CHECK: linalg.fill({{.*}}, %[[BUFFER_CAST_A]] + %r0 = linalg.fill(%f0, %A) : f32, tensor -> tensor // CHECK-NOT: alloc // CHECK: %[[SV:.*]] = memref.subview %[[BUFFER_CAST_A]] @@ -241,9 +241,9 @@ // So we need to bufferize it out of place and make a new alloc. // CHECK-DAG: %[[ALLOC:.*]] = memref.alloc({{.*}}) : memref // CHECK-DAG: %[[ALLOC_CAST_DYNAMIC:.*]] = memref.cast %[[ALLOC]] : memref to memref - %r1 = linalg.fill(%A, %f0) : tensor, f32 -> tensor + %r1 = linalg.fill(%f0, %A) : f32, tensor -> tensor // CHECK-DAG: %[[RET_A:.*]] = memref.tensor_load %[[BUFFER_CAST_A]] : memref) -> tensor<1xf32> { %cst = constant 0.0 : f32 %init = linalg.init_tensor [1] : tensor<1xf32> - %fill = linalg.fill(%init, %cst) : tensor<1xf32>, f32 -> tensor<1xf32> + %fill = linalg.fill(%cst, %init) : f32, tensor<1xf32> -> tensor<1xf32> %add = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} @@ -287,7 +287,7 @@ // CHECK: %[[INPUT_RESHAPE:.+]] = linalg.tensor_collapse_shape %{{.+}} {{\[}}[0, 1]] : tensor<1x1000xf32> into tensor<1000xf32> // CHECK: %[[INIT:.+]] = linalg.init_tensor [] : tensor -// CHECK: %[[FILL:.+]] = linalg.fill(%[[INIT]], %cst) : tensor, f32 -> tensor +// CHECK: %[[FILL:.+]] = linalg.fill(%cst, %[[INIT]]) : f32, tensor -> tensor // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["reduction"] @@ -331,7 +331,7 @@ %c3 = constant 3 : index %0 = memref.dim %arg0, %c3 : tensor<1x?x1x?xf32> %1 = linalg.init_tensor [1, %0] : tensor<1x?xf32> - %2 = linalg.fill(%1, %cst) : tensor<1x?xf32>, f32 -> tensor<1x?xf32> + %2 = linalg.fill(%cst, %1) : f32, tensor<1x?xf32> -> tensor<1x?xf32> %3 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], @@ -350,7 +350,7 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor<1x?x1x?xf32> // CHECK-DAG: %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %[[ARG0]] {{\[}}[0, 1, 2], [3]] // CHECK: %[[INIT:.+]] = linalg.init_tensor [%{{.+}}] : tensor -// CHECK: %[[FILL:.+]] = linalg.fill(%[[INIT]], %{{.+}}) +// CHECK: %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]]) // CHECK: %[[RESULT:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]]] // CHECK-SAME: iterator_types = ["parallel", "reduction"] @@ -365,7 +365,7 @@ %cst = constant 1.000000e+00 : f32 %c3 = constant 3 : index %1 = linalg.init_tensor [1, 1] : tensor<1x1xf32> - %2 = linalg.fill(%1, %cst) : tensor<1x1xf32>, f32 -> tensor<1x1xf32> + %2 = linalg.fill(%cst, %1) : f32, tensor<1x1xf32> -> tensor<1x1xf32> %3 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], @@ -384,7 +384,7 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor<1x?x1x1xf32> // CHECK-DAG: %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %[[ARG0]] {{\[}}[0, 1, 2], [3]] // CHECK: %[[INIT:.+]] = linalg.init_tensor [1] : tensor<1xf32> -// CHECK: %[[FILL:.+]] = linalg.fill(%[[INIT]], %{{.+}}) +// CHECK: %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]]) // CHECK: %[[RESULT:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]]] // CHECK-SAME: iterator_types = ["parallel", "reduction"] @@ -400,7 +400,7 @@ %c2 = constant 2 : index %0 = memref.dim %arg0, %c2 : tensor %1 = linalg.init_tensor [%0, 1] : tensor - %2 = linalg.fill(%1, %cst) : tensor, f32 -> tensor + %2 = linalg.fill(%cst, %1) : f32, tensor -> tensor %3 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], @@ -419,7 +419,7 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor // CHECK-DAG: %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %[[ARG0]] {{\[}}[0, 1], [2, 3]] // CHECK: %[[INIT:.+]] = linalg.init_tensor [%{{.+}}] : tensor -// CHECK: %[[FILL:.+]] = linalg.fill(%[[INIT]], %{{.+}}) +// CHECK: %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]]) // CHECK: %[[RESULT:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]]] // CHECK-SAME: iterator_types = ["parallel", "reduction"] diff --git a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir --- a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir +++ b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir @@ -29,7 +29,7 @@ %c0 = constant 0: index %f0 = constant 0.0: f32 %alloc = memref.alloc() : memref<32 x f32> - linalg.fill(%alloc, %f0): memref<32 x f32>, f32 + linalg.fill(%f0, %alloc) : f32, memref<32 x f32> %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32> linalg.copy(%in, %subview): memref, memref<16 x f32> %0 = vector.transfer_read %alloc[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32> @@ -69,7 +69,7 @@ %alloc = memref.alloc() : memref<128 x i8> %view = memref.view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32> %subview = memref.subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32> - linalg.fill(%view, %f0): memref<32 x f32>, f32 + linalg.fill(%f0, %view) : f32, memref<32 x f32> linalg.copy(%in, %subview): memref, memref<16 x f32> %0 = vector.transfer_read %view[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32> memref.dealloc %alloc : memref<128 x i8> @@ -129,7 +129,7 @@ %f0 = constant 0.0: f32 %f1 = constant 1.0: f32 %alloc = memref.alloc() : memref<32 x f32> - linalg.fill(%alloc, %f0): memref<32 x f32>, f32 + linalg.fill(%f0, %alloc) : f32, memref<32 x f32> %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32> linalg.copy(%in, %subview): memref, memref<16 x f32> "some_interleaved_use"(%subview) : (memref<16 x f32>) -> () diff --git a/mlir/test/Dialect/Linalg/fusion-pattern.mlir b/mlir/test/Dialect/Linalg/fusion-pattern.mlir --- a/mlir/test/Dialect/Linalg/fusion-pattern.mlir +++ b/mlir/test/Dialect/Linalg/fusion-pattern.mlir @@ -4,7 +4,7 @@ func @basic_fusion(%arg0: memref, %arg1: memref, %arg2: memref) { %cst = constant 0.000000e+00 : f32 - linalg.fill(%arg2, %cst) : memref, f32 + linalg.fill(%cst, %arg2) : f32, memref linalg.matmul {__internal_linalg_transform__ = "basic_fusion"} ins(%arg0, %arg1 : memref, memref) outs(%arg2 : memref) @@ -28,7 +28,7 @@ // CHECK-DAG: %[[C64:.+]] = constant 64 : index // CHECK-DAG: %[[C16:.+]] = constant 16 : index // CHECK-DAG: %[[CST:.+]] = constant 0.0{{.*}} : f32 -// CHECK-DAG: linalg.fill(%[[ARG2]], %[[CST]]) +// CHECK-DAG: linalg.fill(%[[CST]], %[[ARG2]]) // CHECK-SAME: __internal_linalg_transform__ = "after_basic_fusion_original" // CHECK-DAG: %[[M:.+]] = memref.dim %[[ARG0]], %[[C0]] // CHECK-DAG: %[[N:.+]] = memref.dim %[[ARG1]], %[[C1]] @@ -53,7 +53,7 @@ // CHECK: %[[TILE_N_3:.+]] = affine.min #[[MAP5]](%[[IV1]])[%[[N_2]], %[[N]]] // CHECK: %[[SV3_2:.+]] = memref.subview %[[ARG2]][%[[IV0]], %[[IV1]]] // CHECK-SAME: [%[[TILE_M_3]], %[[TILE_N_3]]] -// CHECK: linalg.fill(%[[SV3_2]], %[[CST]]) +// CHECK: linalg.fill(%[[CST]], %[[SV3_2]]) // CHECK-SAME: __internal_linalg_transform__ = "after_basic_fusion_producer" // CHECK: scf.for %[[IV2:.+]] = %[[C0]] to %[[K]] step %[[C16]] { // CHECK: %[[TILE_K:.+]] = affine.min #[[MAP3]](%[[IV2]])[%[[K]]] @@ -79,7 +79,7 @@ %arg2: memref, %arg3: memref) { %cst = constant 0.000000e+00 : f32 linalg.copy(%arg1, %arg2) : memref, memref - linalg.fill(%arg3, %cst) : memref, f32 + linalg.fill(%cst, %arg3) : f32, memref linalg.matmul {__internal_linalg_transform__ = "rhs_fusion"} ins(%arg0, %arg2 : memref, memref) outs(%arg3 : memref) @@ -161,7 +161,7 @@ %arg2: memref, %arg3: memref) { %cst = constant 0.000000e+00 : f32 linalg.copy(%arg0, %arg1) : memref, memref - linalg.fill(%arg3, %cst) : memref, f32 + linalg.fill(%cst, %arg3) : f32, memref linalg.matmul {__internal_linalg_transform__ = "two_operand_fusion"} ins(%arg1, %arg2 : memref, memref) outs(%arg3 : memref) @@ -186,7 +186,7 @@ // CHECK-DAG: %[[CST:.+]] = constant 0.0{{.*}} : f32 // CHECK: linalg.copy(%[[ARG0]], %[[ARG1]]) // CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_original" -// CHECK: linalg.fill(%[[ARG3]], %[[CST]]) +// CHECK: linalg.fill(%[[CST]], %[[ARG3]]) // CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_original" // CHECK-DAG: %[[M:.+]] = memref.dim %[[ARG1]], %[[C0]] // CHECK: scf.parallel (%[[IV0:.+]]) = @@ -213,7 +213,7 @@ // CHECK-SAME: [%[[TILE_M_5]], %[[K]]] // CHECK: linalg.copy(%[[SV3]], %[[SV3_2]]) // CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_producer" -// CHECK: linalg.fill(%[[SV2_2]], %[[CST]]) +// CHECK: linalg.fill(%[[CST]], %[[SV2_2]]) // CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_producer" // CHECK-DAG: %[[N_2:.+]] = memref.dim %[[ARG2]], %[[C1]] // CHECK: scf.parallel (%[[IV1:.+]]) = @@ -428,7 +428,7 @@ %c64 = constant 64 : index %c16 = constant 16 : index %cst = constant 0.000000e+00 : f32 - linalg.fill(%arg2, %cst) : memref, f32 + linalg.fill(%cst, %arg2) : f32, memref %0 = memref.dim %arg0, %c0 : memref %1 = memref.dim %arg1, %c1 : memref %2 = memref.dim %arg0, %c1 : memref @@ -463,7 +463,7 @@ func @basic_conv_fusion(%arg0: memref, %arg1: memref, %arg2: memref) { %cst = constant 0.000000e+00 : f32 - linalg.fill(%arg2, %cst) : memref, f32 + linalg.fill(%cst, %arg2) : f32, memref linalg.conv(%arg0, %arg1, %arg2) { dilations = [1, 1], strides = [1, 1], __internal_linalg_transform__ = "basic_fusion"} : diff --git a/mlir/test/Dialect/Linalg/fusion-sequence.mlir b/mlir/test/Dialect/Linalg/fusion-sequence.mlir --- a/mlir/test/Dialect/Linalg/fusion-sequence.mlir +++ b/mlir/test/Dialect/Linalg/fusion-sequence.mlir @@ -9,7 +9,7 @@ %d0 = memref.dim %arg0, %c0 : memref %d1 = memref.dim %arg1, %c1 : memref %0 = memref.alloc(%d0, %d1) : memref - linalg.fill(%0, %cst) : memref, f32 + linalg.fill(%cst, %0) : f32, memref linalg.matmul ins(%arg0, %arg1 : memref, memref) outs(%0 : memref) linalg.generic @@ -42,7 +42,7 @@ // CHECK-DAG: %[[SV_ARG0:.+]] = memref.subview %[[ARG0]][%[[IV0]], 0] // CHECK-DAG: %[[SV_ARG1:.+]] = memref.subview %[[ARG1]][0, %[[IV1]]] // CHECK: %[[SV_TEMP_2:.+]] = memref.subview %[[TEMP]][%[[IV0]], %[[IV1]]] -// CHECK: linalg.fill(%[[SV_TEMP_2]], %{{.+}}) +// CHECK: linalg.fill(%{{.+}}, %[[SV_TEMP_2]]) // CHECK: linalg.matmul // CHECK-SAME: ins(%[[SV_ARG0]], %[[SV_ARG1]] // CHECK-SAME: : memref, memref) @@ -69,13 +69,13 @@ %n3 = memref.dim %arg3, %c1 : memref %0 = memref.alloc(%m, %n1) : memref %1 = memref.alloc(%m, %n2) : memref - linalg.fill(%0, %cst) : memref, f32 + linalg.fill(%cst, %0) : f32, memref linalg.matmul ins(%arg0, %arg1 : memref, memref) outs(%0 : memref) - linalg.fill(%1, %cst) : memref, f32 + linalg.fill(%cst, %1) : f32, memref linalg.matmul ins(%0, %arg2 : memref, memref) outs(%1 : memref) - linalg.fill(%arg4, %cst) : memref, f32 + linalg.fill(%cst, %arg4) : f32, memref linalg.matmul ins(%1, %arg3 : memref, memref) outs(%arg4 : memref) return @@ -124,15 +124,15 @@ // CHECK: %[[N0:.+]] = memref.dim %[[ARG0]], %[[C1]] // CHECK: %[[SV_ARG0:.+]] = memref.subview %[[ARG0]][%[[IV0]], 0] // CHECK-SAME: [%[[TILE_M_5]], %[[N0]]] -// CHECK: linalg.fill(%[[SV_ALLOC1]], %{{.+}}) +// CHECK: linalg.fill(%{{.+}}, %[[SV_ALLOC1]]) // CHECK: linalg.matmul ins(%[[SV_ARG0]], %[[ARG1]] // CHECK-SAME: : memref, memref) // CHECK-SAME: outs(%[[SV_ALLOC1]] : memref) -// CHECK: linalg.fill(%[[SV_ALLOC2]], %{{.+}}) +// CHECK: linalg.fill(%{{.+}}, %[[SV_ALLOC2]]) // CHECK: linalg.matmul ins(%[[SV_ALLOC1]], %[[ARG2]] // CHECK-SAME: : memref, memref) // CHECK-SAME: outs(%[[SV_ALLOC2]] : memref) -// CHECK: linalg.fill(%[[SV_ARG4_2]], %{{.+}}) +// CHECK: linalg.fill(%{{.+}}, %[[SV_ARG4_2]]) // CHECK: linalg.matmul ins(%[[SV_ALLOC3]], %[[ARG3]] // CHECK-SAME: : memref, memref) // CHECK-SAME: outs(%[[SV_ARG4]] : memref) diff --git a/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir b/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir --- a/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir +++ b/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir @@ -251,7 +251,7 @@ func @matmul_out_fusion(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { %c0 = constant 0.0 : f32 - %0 = linalg.fill(%arg0, %c0) : tensor, f32 -> tensor + %0 = linalg.fill(%c0, %arg0) : f32, tensor -> tensor %1 = linalg.matmul {__internal_linalg_transform__ = "out_fusion"} ins(%arg1, %arg2 : tensor, tensor) outs(%0 : tensor) -> tensor @@ -268,7 +268,7 @@ // CHECK: scf.for %[[I:.*]]{{.*}}iter_args(%{{.*}} = %[[ARG0]]) -> (tensor) { // CHECK: scf.for %[[J:.*]] // CHECK: %[[ST:.*]] = tensor.extract_slice %[[ARG0]] -// CHECK: %[[ST_FILL:.*]] = linalg.fill(%[[ST]], %[[C0]]) {__internal_linalg_transform__ = "after_out_fusion_producer"} : tensor, f32 -> tensor +// CHECK: %[[ST_FILL:.*]] = linalg.fill(%[[C0]], %[[ST]]) {__internal_linalg_transform__ = "after_out_fusion_producer"} : f32, tensor -> tensor // CHECK: %[[ST_MM_RES:.*]] = scf.for %[[K:.*]]{{.*}}iter_args(%[[BB:.*]] = %[[ST_FILL]]) -> (tensor) { // CHECK-NOT: fill // CHECK: %[[ST_MM:.*]] = linalg.matmul {__internal_linalg_transform__ = "after_out_fusion"} ins(%{{.*}}, %{{.*}} : tensor, tensor) outs(%[[BB]] : tensor) -> tensor @@ -304,7 +304,7 @@ // TLOOP: %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[I]], 0] // TLOOP: %[[B_SUB:.*]] = tensor.extract_slice %[[B_]][0, %[[J]]] // TLOOP: %[[OUT_SUB:.*]] = tensor.extract_slice %[[OUT_]][%[[I]], %[[J]]] -// TLOOP: %[[INIT_SUB:.*]] = linalg.fill(%[[OUT_SUB]], %[[C0_F32_]]) +// TLOOP: %[[INIT_SUB:.*]] = linalg.fill(%[[C0_F32_]], %[[OUT_SUB]]) // TLOOP: %[[AB_SUB:.*]] = linalg.tiled_loop (%[[K:.*]]) = (%[[C0]]) // TLOOP-SAME: to (%[[DIM_A__1]]) step (%[[C16]]) diff --git a/mlir/test/Dialect/Linalg/fusion.mlir b/mlir/test/Dialect/Linalg/fusion.mlir --- a/mlir/test/Dialect/Linalg/fusion.mlir +++ b/mlir/test/Dialect/Linalg/fusion.mlir @@ -678,7 +678,7 @@ func @fill_and_conv(%arg0: memref, %arg1: memref<2x3x1x1xf32>, %arg2: memref) { %cst = constant 0.000000e+00 : f32 - linalg.fill(%arg2, %cst) : memref, f32 + linalg.fill(%cst, %arg2) : f32, memref %c4 = constant 4 : index %c1 = constant 1 : index diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir --- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir @@ -472,7 +472,7 @@ // ----- func @generalize_fill(%output: memref, %value : f32) { - linalg.fill(%output, %value) : memref, f32 + linalg.fill(%value, %output) : f32, memref return } diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -641,7 +641,7 @@ { %0 = linalg.init_tensor [%arg0, %arg1] : tensor // expected-error @+1 {{expected fill op with no result value to use memref type}} - linalg.fill(%0, %arg2) : tensor, f32 + linalg.fill(%arg2, %0) : f32, tensor } // ----- @@ -649,7 +649,7 @@ func @illegal_fill_memref_with_return(%arg0 : memref, %arg1 : f32) -> memref { // expected-error @+1 {{unexpected #results > #outputs}} - %0 = linalg.fill(%arg0, %arg1) : memref, f32 -> memref + %0 = linalg.fill(%arg1, %arg0) : f32, memref -> memref return %0 : memref } @@ -659,7 +659,7 @@ (%arg0 : memref, %arg1 : f32) -> tensor { // expected-error @+1 {{unexpected #results > #outputs}} - %0 = linalg.fill(%arg0, %arg1) : memref, f32 -> tensor + %0 = linalg.fill(%arg1, %arg0) : f32, memref -> tensor return %0 : tensor } @@ -669,7 +669,7 @@ (%arg0 : tensor, %arg1 : f32) -> memref { // expected-error @+1 {{expected type of operand #1 ('tensor') to match type of corresponding result ('memref')}} - %0 = linalg.fill(%arg0, %arg1) : tensor, f32 -> memref + %0 = linalg.fill(%arg1, %arg0) : f32, tensor -> memref return %0 : memref } diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -182,7 +182,7 @@ // CHECKPARALLEL: store %[[res]], %{{.*}}[] : memref func @fill_view(%arg0: memref, %arg1: f32) { - linalg.fill(%arg0, %arg1) : memref, f32 + linalg.fill(%arg1, %arg0) : f32, memref return } // CHECK-LABEL: func @fill_view( @@ -196,7 +196,7 @@ // CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}] : memref func @fill_view0(%arg0: memref, %arg1: f32) { - linalg.fill(%arg0, %arg1) : memref, f32 + linalg.fill(%arg1, %arg0) : f32, memref return } // CHECK-LABEL: func @fill_view0(%{{.*}}: memref, %{{.*}}: f32) { @@ -206,7 +206,7 @@ // CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref func @fill_view3(%arg0: memref, %arg1: f32) { - linalg.fill(%arg0, %arg1) : memref, f32 + linalg.fill(%arg1, %arg0) : f32, memref return } // CHECK-LABEL: func @fill_view3( diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir --- a/mlir/test/Dialect/Linalg/named-ops.mlir +++ b/mlir/test/Dialect/Linalg/named-ops.mlir @@ -4,7 +4,7 @@ func @depthwise_conv_2d_input_nhwc_filter_hwcf_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> { %zero = constant 0.000000e+00 : f32 %init = linalg.init_tensor [2, 3, 4, 2, 3] : tensor<2x3x4x2x3xf32> - %fill = linalg.fill(%init, %zero) : tensor<2x3x4x2x3xf32>, f32 -> tensor<2x3x4x2x3xf32> + %fill = linalg.fill(%zero, %init) : f32, tensor<2x3x4x2x3xf32> -> tensor<2x3x4x2x3xf32> // CHECK: %{{.+}} = linalg.depthwise_conv_2d_input_nhwc_filter_hwcf // CHECK-SAME: {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>) @@ -57,7 +57,7 @@ func @depthwise_conv_2d_input_nhwc_filter_hwcf_tensor_dilated(%input: tensor<2x8x9x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x6x7x2x3xf32> { %zero = constant 0.000000e+00 : f32 %init = linalg.init_tensor [2, 6, 7, 2, 3] : tensor<2x6x7x2x3xf32> - %fill = linalg.fill(%init, %zero) : tensor<2x6x7x2x3xf32>, f32 -> tensor<2x6x7x2x3xf32> + %fill = linalg.fill(%zero, %init) : f32, tensor<2x6x7x2x3xf32> -> tensor<2x6x7x2x3xf32> // CHECK: %{{.+}} = linalg.depthwise_conv_2d_input_nhwc_filter_hwcf // CHECK-SAME: {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>) @@ -326,7 +326,7 @@ %fake = linalg.init_tensor [3, 3] : tensor<3x3xf32> %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xf32> %cst = constant 0.000000e+00 : f32 - %fill = linalg.fill(%init, %cst) : tensor<1x2x2x1xf32>, f32 -> tensor<1x2x2x1xf32> + %fill = linalg.fill(%cst, %init) : f32, tensor<1x2x2x1xf32> -> tensor<1x2x2x1xf32> %res = linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>) outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> @@ -360,7 +360,7 @@ %fake = linalg.init_tensor [3, 3] : tensor<3x3xf32> %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xf32> %cst = constant 0.000000e+00 : f32 - %fill = linalg.fill(%init, %cst) : tensor<1x2x2x1xf32>, f32 -> tensor<1x2x2x1xf32> + %fill = linalg.fill(%cst, %init) : f32, tensor<1x2x2x1xf32> -> tensor<1x2x2x1xf32> %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>) outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> @@ -394,7 +394,7 @@ %fake = linalg.init_tensor [3, 3] : tensor<3x3xi8> %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xi8> %cst = constant 0 : i8 - %fill = linalg.fill(%init, %cst) : tensor<1x2x2x1xi8>, i8 -> tensor<1x2x2x1xi8> + %fill = linalg.fill(%cst, %init) : i8, tensor<1x2x2x1xi8> -> tensor<1x2x2x1xi8> %res = linalg.pooling_nhwc_i8_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xi8>, tensor<3x3xi8>) outs(%fill: tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> @@ -428,7 +428,7 @@ %fake = linalg.init_tensor [3, 3] : tensor<3x3xi16> %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xi16> %cst = constant 0 : i16 - %fill = linalg.fill(%init, %cst) : tensor<1x2x2x1xi16>, i16 -> tensor<1x2x2x1xi16> + %fill = linalg.fill(%cst, %init) : i16, tensor<1x2x2x1xi16> -> tensor<1x2x2x1xi16> %res = linalg.pooling_nhwc_i16_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xi16>, tensor<3x3xi16>) outs(%fill: tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> @@ -462,7 +462,7 @@ %fake = linalg.init_tensor [3, 3] : tensor<3x3xi32> %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xi32> %cst = constant 0 : i32 - %fill = linalg.fill(%init, %cst) : tensor<1x2x2x1xi32>, i32 -> tensor<1x2x2x1xi32> + %fill = linalg.fill(%cst, %init) : i32, tensor<1x2x2x1xi32> -> tensor<1x2x2x1xi32> %res = linalg.pooling_nhwc_i32_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xi32>, tensor<3x3xi32>) outs(%fill: tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> @@ -497,7 +497,7 @@ %fake = linalg.init_tensor [3, 3] : tensor<3x3xf32> %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xf32> %cst = constant 0.000000e+00 : f32 - %fill = linalg.fill(%init, %cst) : tensor<1x2x2x1xf32>, f32 -> tensor<1x2x2x1xf32> + %fill = linalg.fill(%cst, %init) : f32, tensor<1x2x2x1xf32> -> tensor<1x2x2x1xf32> %res = linalg.pooling_nhwc_min {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>) outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> diff --git a/mlir/test/Dialect/Linalg/promotion_options.mlir b/mlir/test/Dialect/Linalg/promotion_options.mlir --- a/mlir/test/Dialect/Linalg/promotion_options.mlir +++ b/mlir/test/Dialect/Linalg/promotion_options.mlir @@ -23,9 +23,9 @@ // CHECK: %[[T19:.+]] = memref.subview %[[T18]] // CHECK: %[[T20:.+]] = memref.alloc(%{{.*}}, %{{.*}}) : memref // CHECK: %[[T21:.+]] = memref.subview %[[T20]] -// CHECK: linalg.fill(%[[T19]], %[[C42]]) +// CHECK: linalg.fill(%[[C42]], %[[T19]]) // CHECK: linalg.copy(%[[T7]], %[[T19]]) -// CHECK: linalg.fill(%[[T21]], %[[C42]]) +// CHECK: linalg.fill(%[[C42]], %[[T21]]) // CHECK: linalg.copy(%[[T17]], %[[T21]]) // CHECK: linalg.matmul ins(%[[T19]], %[[T12]]{{.*}} outs(%[[T21]] // CHECK-NOT: linalg.fill diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -153,12 +153,12 @@ func @fill_view(%arg0: memref, %arg1: f32) { - linalg.fill(%arg0, %arg1) : memref, f32 + linalg.fill(%arg1, %arg0) : f32, memref return } // CHECK-LABEL: func @fill_view( // CHECK: %{{.*}}: memref, %{{.*}}: f32) { -// CHECK: linalg.fill(%{{.*}}, %{{.*}}) : memref, f32 +// CHECK: linalg.fill(%{{.*}}, %{{.*}}) : f32, memref // ----- @@ -174,12 +174,12 @@ func @fill_view3(%arg0: memref, %arg1: f32) { - linalg.fill(%arg0, %arg1) : memref, f32 + linalg.fill(%arg1, %arg0) : f32, memref return } // CHECK-LABEL: func @fill_view3( // CHECK: %{{.*}}: memref, %{{.*}}: f32) { -// CHECK: linalg.fill(%{{.*}}, %{{.*}}) : memref, f32 +// CHECK: linalg.fill(%{{.*}}, %{{.*}}) : f32, memref // ----- @@ -429,9 +429,9 @@ -> (tensor, tensor) { %c0 = constant 0 : index %0 = linalg.init_tensor [] : tensor - %1 = linalg.fill(%0, %arg2) : tensor, i32 -> tensor + %1 = linalg.fill(%arg2, %0) : i32, tensor -> tensor %2 = linalg.init_tensor [] : tensor - %3 = linalg.fill(%2, %arg2) : tensor, i32 -> tensor + %3 = linalg.fill(%arg2, %2) : i32, tensor -> tensor %4:2 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>, affine_map<(d0) -> ()>], iterator_types = ["reduction"]} @@ -704,10 +704,10 @@ func @fill_tensor(%arg0 : index, %arg1 : index, %arg2 : f32) -> tensor { %0 = linalg.init_tensor [%arg0, %arg1] : tensor - %1 = linalg.fill(%0, %arg2) : tensor, f32 -> tensor + %1 = linalg.fill(%arg2, %0) : f32, tensor -> tensor return %1 : tensor } -// CHECK: %{{.+}} = linalg.fill(%{{.+}}, %{{.+}}) : tensor, f32 -> tensor +// CHECK: %{{.+}} = linalg.fill(%{{.+}}, %{{.+}}) : f32, tensor -> tensor // ----- diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -77,7 +77,7 @@ %cst = constant 0.0 : f32 %init = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32> - %fill = linalg.fill(%init, %cst) : tensor<1x112x112x32xf32>, f32 -> tensor<1x112x112x32xf32> + %fill = linalg.fill(%cst, %init) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32> %conv = linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} @@ -121,7 +121,7 @@ // CHECK-SAME: (%[[INPUT:.+]]: tensor<1x225x225x3xf32>, %[[FILTER:.+]]: tensor<3x3x3x32xf32>, %[[ELEM:.+]]: tensor<1x112x112x32xf32>) // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32> -// CHECK-NEXT: %[[FILL:.+]] = linalg.fill(%[[INIT]], %cst) : tensor<1x112x112x32xf32>, f32 -> tensor<1x112x112x32xf32> +// CHECK-NEXT: %[[FILL:.+]] = linalg.fill(%cst, %[[INIT]]) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32> // CHECK-NEXT: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG0:.+]] = %[[FILL]]) // CHECK-NEXT: %[[OFFSET_H:.+]] = affine.apply #[[MAP0]](%[[IV0]]) @@ -159,7 +159,7 @@ %oc = memref.dim %elementwise, %c3 : tensor %init = linalg.init_tensor [%n, %oh, %ow, %oc] : tensor - %fill = linalg.fill(%init, %cst) : tensor, f32 -> tensor + %fill = linalg.fill(%cst, %init) : f32, tensor -> tensor %conv = linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} @@ -228,7 +228,7 @@ // CHECK-DAG: %[[ELEM_OC:.+]] = memref.dim %[[ELEM]], %[[C3]] : tensor // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[ELEM_N]], %[[ELEM_OH]], %[[ELEM_OW]], %[[ELEM_OC]]] : tensor -// CHECK: %[[FILL:.+]] = linalg.fill(%[[INIT]], %cst) : tensor, f32 -> tensor +// CHECK: %[[FILL:.+]] = linalg.fill(%cst, %[[INIT]]) : f32, tensor -> tensor // CHECK-DAG: %[[FILTER_H:.+]] = memref.dim %[[FILTER]], %[[C0]] : tensor // CHECK-DAG: %[[FILTER_W:.+]] = memref.dim %[[FILTER]], %[[C1]] : tensor @@ -319,7 +319,7 @@ linalg.yield %zero : f32 } : tensor<58x1xf32> to tensor<64x128xf32> - %fill = linalg.fill(%large_input, %zero) : tensor<64x128xf32>, f32 -> tensor<64x128xf32> + %fill = linalg.fill(%zero, %large_input) : f32, tensor<64x128xf32> -> tensor<64x128xf32> %for0 = scf.for %iv0 = %c0 to %d0 step %c16 iter_args(%arg0 = %fill) -> tensor<64x128xf32> { %for1 = scf.for %iv1 = %c0 to %d1 step %c32 iter_args(%arg1 = %arg0) -> tensor<64x128xf32> { diff --git a/mlir/test/Dialect/Linalg/tile.mlir b/mlir/test/Dialect/Linalg/tile.mlir --- a/mlir/test/Dialect/Linalg/tile.mlir +++ b/mlir/test/Dialect/Linalg/tile.mlir @@ -288,35 +288,35 @@ // TILE-234: linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs( func @fill_static(%arg0: memref<127x99xf32>, %arg1: f32) { - linalg.fill(%arg0, %arg1) : memref<127x99xf32>, f32 + linalg.fill(%arg1, %arg0) : f32, memref<127x99xf32> return } // TILE-2-LABEL: func @fill_static // TILE-2: for // TILE-2-NOT: for // TILE-2: memref.subview{{.*}} : memref<127x99xf32> -// TILE-2: linalg.fill{{.*}} : memref, f32 +// TILE-2: linalg.fill{{.*}} : f32, memref // TILE-02-LABEL: func @fill_static // TILE-02: for // TILE-02-NOT: for // TILE-02: memref.subview{{.*}} : memref<127x99xf32> -// TILE-02: linalg.fill{{.*}} : memref<127x?xf32, #[[$stride_99_1_layout_map]]>, f32 +// TILE-02: linalg.fill{{.*}} : f32, memref<127x?xf32, #[[$stride_99_1_layout_map]]> // TILE-002-LABEL: func @fill_static // TILE-002-NOT: for -// TILE-002: linalg.fill{{.*}} memref<127x99xf32>, f32 +// TILE-002: linalg.fill{{.*}} f32, memref<127x99xf32> // TILE-234-LABEL: func @fill_static // TILE-234: for // TILE-234: for // TILE-234-NOT: for // TILE-234: memref.subview{{.*}} : memref<127x99xf32> -// TILE-234: linalg.fill{{.*}} : memref, f32 +// TILE-234: linalg.fill{{.*}} : f32, memref func @fill(%arg0: memref, %arg1: f32) { - linalg.fill(%arg0, %arg1) : memref, f32 + linalg.fill(%arg1, %arg0) : f32, memref return } // TILE-2-LABEL: func @fill diff --git a/mlir/test/Dialect/Linalg/tiled-loops.mlir b/mlir/test/Dialect/Linalg/tiled-loops.mlir --- a/mlir/test/Dialect/Linalg/tiled-loops.mlir +++ b/mlir/test/Dialect/Linalg/tiled-loops.mlir @@ -25,7 +25,7 @@ : memref<192x192xf32> to memref<192x?xf32, #map1> %4 = memref.subview %C_[%i, %j] [%0, %2] [1, 1] : memref<192x192xf32> to memref - linalg.fill(%4, %cst) : memref, f32 + linalg.fill(%cst, %4) : f32, memref linalg.matmul ins(%1, %3 : memref, memref<192x?xf32, #map1>) outs(%4 : memref) @@ -63,7 +63,7 @@ ins (%A_ = %A: memref<192x192xf32>, %B_ = %B: memref<192x192xf32>) outs (%C_ = %C: memref) iterators["reduction", "reduction"] { - linalg.fill(%A_, %cst) : memref<192x192xf32>, f32 + linalg.fill(%cst, %A_) : f32, memref<192x192xf32> linalg.yield } return diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir --- a/mlir/test/Dialect/Linalg/transform-patterns.mlir +++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir @@ -296,8 +296,8 @@ %cf = constant 1.0 : f32 %3 = memref.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] : memref to memref - linalg.fill(%3, %cf) { __internal_linalg_transform__ = "_promote_views_aligned_"} - : memref, f32 + linalg.fill(%cf, %3) { __internal_linalg_transform__ = "_promote_views_aligned_"} + : f32, memref return } // CHECK-LABEL: func @aligned_promote_fill @@ -306,9 +306,9 @@ // CHECK: %[[a0:.*]] = memref.alloc() {alignment = 32 : i64} : memref<32000000xi8> // CHECK: %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<32000000xi8> to memref // CHECK: %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref to memref -// CHECK: linalg.fill(%[[v0]], {{.*}}) : memref, f32 +// CHECK: linalg.fill({{.*}}, %[[v0]]) : f32, memref // CHECK: linalg.copy(%[[s0]], %[[l0]]) : memref, memref -// CHECK: linalg.fill(%[[v0]], %[[cf]]) : memref, f32 +// CHECK: linalg.fill(%[[cf]], %[[v0]]) : f32, memref func @aligned_promote_fill_complex(%arg0: memref, offset: ?, strides: [?, 1]>) { %c2000 = constant 2000 : index @@ -319,8 +319,8 @@ %cc = complex.create %cf, %cf : complex %3 = memref.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] : memref, offset: ?, strides: [?, 1]> to memref, offset: ?, strides: [?, ?]> - linalg.fill(%3, %cc) { __internal_linalg_transform__ = "_promote_views_aligned_"} - : memref, offset: ?, strides: [?, ?]>, complex + linalg.fill(%cc, %3) { __internal_linalg_transform__ = "_promote_views_aligned_"} + : complex, memref, offset: ?, strides: [?, ?]> return } // CHECK-LABEL: func @aligned_promote_fill_complex @@ -329,9 +329,9 @@ // CHECK: %[[a0:.*]] = memref.alloc() {alignment = 32 : i64} : memref<64000000xi8> // CHECK: %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<64000000xi8> to memref> // CHECK: %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref> to memref, #[[$STRIDED_2D_u_1]]> -// CHECK: linalg.fill(%[[v0]], {{.*}}) : memref>, complex +// CHECK: linalg.fill({{.*}}, %[[v0]]) : complex, memref> // CHECK: linalg.copy(%[[s0]], %[[l0]]) : memref, #map{{.*}}>, memref, #map{{.*}}> -// CHECK: linalg.fill(%[[v0]], %[[cc]]) : memref>, complex +// CHECK: linalg.fill(%[[cc]], %[[v0]]) : complex, memref> func @tile_permute_parallel_loop(%arg0: memref, %arg1: memref, diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -157,7 +157,7 @@ func @test_vectorize_fill(%A : memref<8x16xf32>, %arg0 : f32) { // CHECK: %[[V:.*]] = vector.broadcast {{.*}} : f32 to vector<8x16xf32> // CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32> - linalg.fill(%A, %arg0) : memref<8x16xf32>, f32 + linalg.fill(%arg0, %A) : f32, memref<8x16xf32> return } @@ -167,7 +167,7 @@ func @test_vectorize_fill_scalar(%A : memref, %arg0 : f32) { // CHECK-SAME: (%[[M:.*]]: memref, %[[V:.*]]: f32) // CHECK: store %[[V]], %[[M]][] : memref - linalg.fill(%A, %arg0) : memref, f32 + linalg.fill(%arg0, %A) : f32, memref return } @@ -584,7 +584,7 @@ // CHECK: %[[V4:.*]] = addi %[[DIM3]], %[[C3]] : index // CHECK: %[[V5:.*]] = addi %[[V4]], %[[C2]] : index // CHECK: %[[INIT:.*]] = linalg.init_tensor [6, %[[V1]], %[[V2]], %[[V5]]] : tensor<6x?x?x?xf32> -// CHECK: %[[FILL:.*]] = linalg.fill(%[[INIT]], %{{.*}}) : tensor<6x?x?x?xf32>, f32 -> tensor<6x?x?x?xf32> +// CHECK: %[[FILL:.*]] = linalg.fill(%{{.*}}, %[[INIT]]) : f32, tensor<6x?x?x?xf32> -> tensor<6x?x?x?xf32> // CHECK: %[[SRCDIM:.*]] = memref.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32> // CHECK: %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32> // CHECK: return %[[RESULT]] diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir --- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir @@ -75,7 +75,7 @@ // LINALG: scf.yield %[[A]], %[[i]], %[[j]] : memref, index, index // LINALG: } else { // slow path, fill tmp alloc and yield a memref_casted version of it - // LINALG: linalg.fill(%[[alloc]], %cst) : memref<4x8xf32>, f32 + // LINALG: linalg.fill(%cst, %[[alloc]]) : f32, memref<4x8xf32> // LINALG: %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref // LINALG: %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[d0]], %[[i]], %[[c4]]) // LINALG: %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]]) @@ -167,7 +167,7 @@ // LINALG-SAME: memref, index, index // LINALG: } else { // slow path, fill tmp alloc and yield a memref_casted version of it - // LINALG: linalg.fill(%[[alloc]], %cst) : memref<4x8xf32>, f32 + // LINALG: linalg.fill(%cst, %[[alloc]]) : f32, memref<4x8xf32> // LINALG: %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[c7]], %[[i]], %[[c4]]) // LINALG: %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]]) // LINALG: %[[sv:.*]] = memref.subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1] diff --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir --- a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir +++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir @@ -65,8 +65,8 @@ %RHS10 = memref.alloc() {alignment = 64} : memref<1x10xf32> %DST10 = memref.alloc() {alignment = 64} : memref<1x10xf32> - linalg.fill(%LHS10, %f1) : memref<1x10xf32>, f32 - linalg.fill(%RHS10, %f1) : memref<1x10xf32>, f32 + linalg.fill(%f1, %LHS10) : f32, memref<1x10xf32> + linalg.fill(%f1, %RHS10) : f32, memref<1x10xf32> %LHS = memref.cast %LHS10 : memref<1x10xf32> to memref %RHS = memref.cast %RHS10 : memref<1x10xf32> to memref diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir @@ -59,9 +59,9 @@ %B = memref.alloc() : !row_major_B %C = memref.alloc() : !row_major_C - linalg.fill(%A, %v1) : !row_major_A, !elem_type_a - linalg.fill(%B, %v1) : !row_major_B, !elem_type_b - linalg.fill(%C, %v0) : !row_major_C, !elem_type_c + linalg.fill(%v1, %A) : !elem_type_a, !row_major_A + linalg.fill(%v1, %B) : !elem_type_b, !row_major_B + linalg.fill(%v0, %C) : !elem_type_c, !row_major_C %c0 = constant 0: index %c1 = constant 1: index @@ -71,7 +71,7 @@ /// Preheating run: scf.for %arg0 = %c0 to %iters step %c1 { %z = constant 0.0 : !elem_type_c - linalg.fill(%C, %z) : !row_major_C, !elem_type_c + linalg.fill(%z, %C) : !elem_type_c, !row_major_C call @matmul(%A, %B, %C) : (!row_major_A, !row_major_B, !row_major_C) -> () } %t_start_matmul = call @rtclock() : () -> f64 @@ -81,7 +81,7 @@ // Once linalg on tensors is ready, fusing fill at the register level will // be easy. %z = constant 0.0 : !elem_type_c - linalg.fill(%C, %z) : !row_major_C, !elem_type_c + linalg.fill(%z, %C) : !elem_type_c, !row_major_C call @matmul(%A, %B, %C) : (!row_major_A, !row_major_B, !row_major_C) -> () } %t_end_matmul = call @rtclock() : () -> f64 @@ -90,7 +90,7 @@ // CHECK: {{^0$}} %C_ref = memref.alloc() : !row_major_C - linalg.fill(%C_ref, %v0) : !row_major_C, !elem_type_c + linalg.fill(%v0, %C_ref) : !elem_type_c, !row_major_C linalg.matmul ins(%A, %B : !row_major_A, !row_major_B) outs(%C_ref: !row_major_C) %act = memref.cast %C : !row_major_C to memref<*xf32> diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir @@ -69,9 +69,9 @@ %cB = memref.alloc() : !column_major_B %cC = memref.alloc() : !column_major_C - linalg.fill(%cA, %f1) : !column_major_A, !elem_type_a - linalg.fill(%cB, %f1) : !column_major_B, !elem_type_b - linalg.fill(%cC, %f0) : !column_major_C, !elem_type_c + linalg.fill(%f1, %cA) : !elem_type_a, !column_major_A + linalg.fill(%f1, %cB) : !elem_type_b, !column_major_B + linalg.fill(%f0, %cC) : !elem_type_c, !column_major_C %c0 = constant 0: index %c1 = constant 1: index @@ -87,7 +87,7 @@ // This is accounts for about 10-15% perf hit on small sizes. // Once linalg on tensors is ready, fusing fill at the register level will // be easy. - linalg.fill(%C, %f0) : !row_major_C, !elem_type_c + linalg.fill(%f0, %C) : !elem_type_c, !row_major_C call @matmul_column_major_as_row_major(%cA, %cB, %cC, %A, %B, %C) : (!column_major_A, !column_major_B, !column_major_C, !row_major_A, !row_major_B, !row_major_C) -> () @@ -98,7 +98,7 @@ // CHECK: {{^0$}} %cC_ref = memref.alloc() : !column_major_C - linalg.fill(%cC_ref, %f0) : !column_major_C, !elem_type_c + linalg.fill(%f0, %cC_ref) : !elem_type_c, !column_major_C linalg.matmul_column_major ins(%cA, %cB : !column_major_A, !column_major_B) outs(%cC_ref: !column_major_C) %act1 = memref.cast %cC : !column_major_C to memref<*xf32> @@ -109,7 +109,7 @@ // CHECK: {{^0$}} %C_ref = memref.alloc() : !row_major_C - linalg.fill(%C_ref, %f0) : !row_major_C, !elem_type_c + linalg.fill(%f0, %C_ref) : !elem_type_c, !row_major_C linalg.matmul ins(%A, %B : !row_major_A, !row_major_B) outs(%C_ref: !row_major_C) %act2 = memref.cast %C : !row_major_C to memref<*xf32> diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir @@ -59,9 +59,9 @@ %B = memref.alloc() : !row_major_B %C = memref.alloc() : !row_major_C - linalg.fill(%A, %v1) : !row_major_A, !elem_type_a - linalg.fill(%B, %v1) : !row_major_B, !elem_type_b - linalg.fill(%C, %v0) : !row_major_C, !elem_type_c + linalg.fill(%v1, %A) : !elem_type_a, !row_major_A + linalg.fill(%v1, %B) : !elem_type_b, !row_major_B + linalg.fill(%v0, %C) : !elem_type_c, !row_major_C %c0 = constant 0: index %c1 = constant 1: index @@ -70,7 +70,7 @@ /// Run and dump performance for matmul. /// Preheating run: scf.for %arg0 = %c0 to %iters step %c1 { - linalg.fill(%C, %v0) : !row_major_C, !elem_type_c + linalg.fill(%v0, %C) : !elem_type_c, !row_major_C call @matmul(%A, %B, %C) : (!row_major_A, !row_major_B, !row_major_C) -> () } %t_start_matmul = call @rtclock() : () -> f64 @@ -79,7 +79,7 @@ // This is accounts for about 10-15% perf hit on small sizes. // Once linalg on tensors is ready, fusing fill at the register level will // be easy. - linalg.fill(%C, %v0) : !row_major_C, !elem_type_c + linalg.fill(%v0, %C) : !elem_type_c, !row_major_C call @matmul(%A, %B, %C) : (!row_major_A, !row_major_B, !row_major_C) -> () } %t_end_matmul = call @rtclock() : () -> f64 @@ -88,7 +88,7 @@ // CHECK: {{^0$}} %C_ref = memref.alloc() : !row_major_C - linalg.fill(%C_ref, %v0) : !row_major_C, !elem_type_c + linalg.fill(%v0, %C_ref) : !elem_type_c, !row_major_C linalg.matmul_i8_i8_i32 ins(%A, %B : !row_major_A, !row_major_B) outs(%C_ref: !row_major_C) %res = memref.cast %C : !row_major_C to memref<*xi32> diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir @@ -12,7 +12,7 @@ %x = memref.dim %A, %c0 : memref %y = memref.dim %B, %c1 : memref %C = memref.alloc(%x, %y) : memref - linalg.fill(%C, %f0) : memref, f32 + linalg.fill(%f0, %C) : f32, memref linalg.matmul ins(%A, %B: memref, memref) outs(%C: memref) return %C : memref @@ -26,7 +26,7 @@ %x = memref.dim %A, %c1 : memref %n = memref.dim %B, %c1 : memref %C = memref.alloc(%m, %n) : memref - linalg.fill(%C, %f0) : memref, f32 + linalg.fill(%f0, %C) : f32, memref scf.for %i = %c0 to %n step %c1 { %b = memref.subview %B[0, %i][%x, 1][1, 1] : memref to memref %c = memref.subview %C[0, %i][%m, 1][1, 1] : memref to memref @@ -46,8 +46,8 @@ %val2 = constant 17.0 : f32 %A = memref.alloc(%m, %x) : memref %B = memref.alloc(%x, %n) : memref - linalg.fill(%A, %val1) : memref, f32 - linalg.fill(%B, %val2) : memref, f32 + linalg.fill(%val1, %A) : f32, memref + linalg.fill(%val2, %B) : f32, memref memref.store %val1, %B[%c0, %c0] : memref %C1 = call @matmul(%A, %B) : (memref, memref) -> memref %C2 = call @matvec(%A, %B) : (memref, memref) -> memref diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir @@ -25,7 +25,7 @@ // Creates and returns a 1-D buffer of size %s1 filled with the value %f func @alloc_1d_filled_f32(%s1 : index, %f : f32) -> memref { %buf = memref.alloc(%s1) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir @@ -25,7 +25,7 @@ // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir @@ -25,7 +25,7 @@ // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir @@ -25,7 +25,7 @@ // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir @@ -25,7 +25,7 @@ // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir @@ -25,7 +25,7 @@ // Creates and returns a 2-D buffer of size (%s1, %s2) filled with the value %f func @alloc_2d_filled_f32(%s1 : index, %s2 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir @@ -25,7 +25,7 @@ // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir @@ -25,7 +25,7 @@ // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir @@ -25,7 +25,7 @@ // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir @@ -25,7 +25,7 @@ // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir @@ -25,7 +25,7 @@ // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir @@ -25,7 +25,7 @@ // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir @@ -25,7 +25,7 @@ // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir @@ -25,7 +25,7 @@ // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir @@ -25,7 +25,7 @@ // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref - linalg.fill(%buf, %f) : memref, f32 + linalg.fill(%f, %buf) : f32, memref return %buf : memref } diff --git a/mlir/test/mlir-cpu-runner/async.mlir b/mlir/test/mlir-cpu-runner/async.mlir --- a/mlir/test/mlir-cpu-runner/async.mlir +++ b/mlir/test/mlir-cpu-runner/async.mlir @@ -26,7 +26,7 @@ %c4 = constant 4.0 : f32 %A = memref.alloc() : memref<4xf32> - linalg.fill(%A, %c0) : memref<4xf32>, f32 + linalg.fill(%c0, %A) : f32, memref<4xf32> // CHECK: [0, 0, 0, 0] %U = memref.cast %A : memref<4xf32> to memref<*xf32> diff --git a/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir --- a/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir +++ b/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir @@ -7,14 +7,14 @@ %cf1 = constant 1.00000e+00 : f32 - linalg.fill(%A, %cf1) : memref<16x16xf32>, f32 - linalg.fill(%B, %cf1) : memref<16x16xf32>, f32 + linalg.fill(%cf1, %A) : f32, memref<16x16xf32> + linalg.fill(%cf1, %B) : f32, memref<16x16xf32> %reps = constant 1 : index %t_start = call @rtclock() : () -> f64 affine.for %arg0 = 0 to 5 { - linalg.fill(%C, %cf1) : memref<16x16xf32>, f32 + linalg.fill(%cf1, %C) : f32, memref<16x16xf32> call @sgemm_naive(%A, %B, %C) : (memref<16x16xf32>, memref<16x16xf32>, memref<16x16xf32>) -> () } %t_end = call @rtclock() : () -> f64 diff --git a/mlir/test/mlir-cpu-runner/unranked_memref.mlir b/mlir/test/mlir-cpu-runner/unranked_memref.mlir --- a/mlir/test/mlir-cpu-runner/unranked_memref.mlir +++ b/mlir/test/mlir-cpu-runner/unranked_memref.mlir @@ -45,18 +45,18 @@ %f10 = constant 10.00000e+00 : f32 %V = memref.cast %A : memref<10x3xf32, 0> to memref - linalg.fill(%V, %f10) : memref, f32 + linalg.fill(%f10, %V) : f32, memref %U = memref.cast %A : memref<10x3xf32, 0> to memref<*xf32> call @print_memref_f32(%U) : (memref<*xf32>) -> () %V2 = memref.cast %U : memref<*xf32> to memref - linalg.fill(%V2, %f5) : memref, f32 + linalg.fill(%f5, %V2) : f32, memref %U2 = memref.cast %V2 : memref to memref<*xf32> call @print_memref_f32(%U2) : (memref<*xf32>) -> () %V3 = memref.cast %V2 : memref to memref<*xf32> %V4 = memref.cast %V3 : memref<*xf32> to memref - linalg.fill(%V4, %f2) : memref, f32 + linalg.fill(%f2, %V4) : f32, memref %U3 = memref.cast %V2 : memref to memref<*xf32> call @print_memref_f32(%U3) : (memref<*xf32>) -> () @@ -81,7 +81,7 @@ func @return_two_var_memref_caller() { %0 = memref.alloca() : memref<4x3xf32> %c0f32 = constant 1.0 : f32 - linalg.fill(%0, %c0f32) : memref<4x3xf32>, f32 + linalg.fill(%c0f32, %0) : f32, memref<4x3xf32> %1:2 = call @return_two_var_memref(%0) : (memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>) call @print_memref_f32(%1#0) : (memref<*xf32>) -> () call @print_memref_f32(%1#1) : (memref<*xf32>) -> () @@ -96,7 +96,7 @@ func @return_var_memref_caller() { %0 = memref.alloca() : memref<4x3xf32> %c0f32 = constant 1.0 : f32 - linalg.fill(%0, %c0f32) : memref<4x3xf32>, f32 + linalg.fill(%c0f32, %0) : f32, memref<4x3xf32> %1 = call @return_var_memref(%0) : (memref<4x3xf32>) -> memref<*xf32> call @print_memref_f32(%1) : (memref<*xf32>) -> () return diff --git a/mlir/test/mlir-cpu-runner/utils.mlir b/mlir/test/mlir-cpu-runner/utils.mlir --- a/mlir/test/mlir-cpu-runner/utils.mlir +++ b/mlir/test/mlir-cpu-runner/utils.mlir @@ -19,7 +19,7 @@ %f = constant 2.00000e+00 : f32 %A = memref.alloc() : memref<16xf32> %B = memref.cast %A: memref<16xf32> to memref - linalg.fill(%B, %f) : memref, f32 + linalg.fill(%f, %B) : f32, memref %U = memref.cast %B : memref to memref<*xf32> call @print_memref_f32(%U): (memref<*xf32>) -> () memref.dealloc %A : memref<16xf32> @@ -33,7 +33,7 @@ %f4 = constant 4.00000e+00 : f32 %A = memref.alloc() : memref<3x4x5xf32> %B = memref.cast %A: memref<3x4x5xf32> to memref - linalg.fill(%B, %f) : memref, f32 + linalg.fill(%f, %B) : f32, memref %c2 = constant 2 : index memref.store %f4, %B[%c2, %c2, %c2]: memref diff --git a/mlir/test/python/dialects/linalg/ops.py b/mlir/test/python/dialects/linalg/ops.py --- a/mlir/test/python/dialects/linalg/ops.py +++ b/mlir/test/python/dialects/linalg/ops.py @@ -59,7 +59,7 @@ # CHECK-LABEL: func @fill_tensor # CHECK-SAME: %[[OUT:[0-9a-z]+]]: tensor<12x?xf32> # CHECK-NEXT: %[[CST:.*]] = constant 0.0{{.*}} : f32 - # CHECK-NEXT: %[[RES:.*]] = linalg.fill(%[[OUT]], %[[CST]]) : tensor<12x?xf32>, f32 -> tensor<12x?xf32> + # CHECK-NEXT: %[[RES:.*]] = linalg.fill(%[[CST]], %[[OUT]]) : f32, tensor<12x?xf32> -> tensor<12x?xf32> # CHECK-NEXT: return %[[RES]] : tensor<12x?xf32> @builtin.FuncOp.from_py_func( RankedTensorType.get((12, -1), f32)) @@ -73,7 +73,7 @@ # CHECK-LABEL: func @fill_buffer # CHECK-SAME: %[[OUT:[0-9a-z]+]]: memref<12x?xf32> # CHECK-NEXT: %[[CST:.*]] = constant 0.0{{.*}} : f32 - # CHECK-NEXT: linalg.fill(%[[OUT]], %[[CST]]) : memref<12x?xf32>, f32 + # CHECK-NEXT: linalg.fill(%[[CST]], %[[OUT]]) : f32, memref<12x?xf32> # CHECK-NEXT: return @builtin.FuncOp.from_py_func( MemRefType.get((12, -1), f32)) diff --git a/mlir/test/python/dialects/linalg/opsrun.py b/mlir/test/python/dialects/linalg/opsrun.py --- a/mlir/test/python/dialects/linalg/opsrun.py +++ b/mlir/test/python/dialects/linalg/opsrun.py @@ -25,9 +25,9 @@ %A = memref.alloc() : memref<4x16xf32> %B = memref.alloc() : memref<16x8xf32> %C = memref.alloc() : memref<4x8xf32> - linalg.fill(%A, %v1) : memref<4x16xf32>, f32 - linalg.fill(%B, %v2) : memref<16x8xf32>, f32 - linalg.fill(%C, %v0) : memref<4x8xf32>, f32 + linalg.fill(%v1, %A) : f32, memref<4x16xf32> + linalg.fill(%v2, %B) : f32, memref<16x8xf32> + linalg.fill(%v0, %C) : f32, memref<4x8xf32> call @matmul_on_buffers(%A, %B, %C) : (memref<4x16xf32>, memref<16x8xf32>, memref<4x8xf32>) -> ()