diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1593,7 +1593,7 @@ TransposeOp::getRegionBuilder() { return [](mlir::ImplicitLocOpBuilder &b, mlir::Block &block, mlir::ArrayRef) { - b.create(block.getArguments().back()); + b.create(block.getArguments().front()); }; } diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -1522,3 +1522,69 @@ %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation %2 = transform.structured.vectorize %1 } + +// ----- + +func.func @vectorize_map(%arg0: memref<64xf32>, + %arg1: memref<64xf32>, %arg2: memref<64xf32>) { + linalg.map ins(%arg0, %arg1 : memref<64xf32>, memref<64xf32>) + outs(%arg2 : memref<64xf32>) + (%in: f32, %in_0: f32) { + %0 = arith.addf %in, %in_0 : f32 + linalg.yield %0 : f32 + } + return +} +// CHECK-LABEL: func @vectorize_map +// CHECK: %[[LHS:.*]] = vector.transfer_read +// CHECK-NEXT: %[[RHS:.*]] = vector.transfer_read +// CHECK-NEXT: arith.addf %[[LHS]], %[[RHS]] : vector<64xf32> + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.map"]} in %arg1 + %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation + %2 = transform.structured.vectorize %1 +} + +// ----- + +func.func @vectorize_transpose(%arg0: memref<16x32x64xf32>, + %arg1: memref<32x64x16xf32>) { + linalg.transpose ins(%arg0 : memref<16x32x64xf32>) + outs(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0] + return +} +// CHECK-LABEL: func @vectorize_transpose +// CHECK: vector.transpose +// CHECK-SAME: [1, 2, 0] : vector<16x32x64xf32> to vector<32x64x16xf32> + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.transpose"]} in %arg1 + %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation + %2 = transform.structured.vectorize %1 +} + +// ----- + +func.func @vectorize_reduce(%arg0: memref<16x32x64xf32>, + %arg1: memref<16x64xf32>) { + linalg.reduce ins(%arg0 : memref<16x32x64xf32>) + outs(%arg1 : memref<16x64xf32>) dimensions = [1] + (%in: f32, %init: f32) { + %0 = arith.addf %in, %init : f32 + linalg.yield %0 : f32 + } + return +} +// CHECK-LABEL: func @vectorize_reduce +// CHECK: vector.multi_reduction +// CHECK-SAME: : vector<16x32x64xf32> to vector<16x64xf32> + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.reduce"]} in %arg1 + %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation + %2 = transform.structured.vectorize %1 +} diff --git a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir --- a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir @@ -208,7 +208,7 @@ // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C16]] step %[[C1]] { // CHECK: scf.for %[[J:.*]] = %[[C0]] to %[[C32]] step %[[C1]] { // CHECK: scf.for %[[K:.*]] = %[[C0]] to %[[C64]] step %[[C1]] { -// CHECK: %[[ELEM:.*]] = memref.load %[[OUT]][%[[J]], %[[K]], %[[I]]] +// CHECK: %[[ELEM:.*]] = memref.load %[[IN]][%[[I]], %[[J]], %[[K]]] // CHECK: memref.store %[[ELEM]], %[[OUT]][%[[J]], %[[K]], %[[I]]] // -----