diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -1593,7 +1593,7 @@
 TransposeOp::getRegionBuilder() {
   return [](mlir::ImplicitLocOpBuilder &b, mlir::Block &block,
             mlir::ArrayRef<mlir::NamedAttribute>) {
-    b.create<linalg::YieldOp>(block.getArguments().back());
+    b.create<linalg::YieldOp>(block.getArguments().front());
   };
 }
 
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -1522,3 +1522,69 @@
   %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation
   %2 = transform.structured.vectorize %1
 }
+
+// -----
+
+func.func @vectorize_map(%arg0: memref<64xf32>,
+    %arg1: memref<64xf32>, %arg2: memref<64xf32>) {
+  linalg.map ins(%arg0, %arg1 : memref<64xf32>, memref<64xf32>)
+             outs(%arg2 : memref<64xf32>)
+    (%in: f32, %in_0: f32) {
+      %0 = arith.addf %in, %in_0 : f32
+      linalg.yield %0 : f32
+    }
+  return
+}
+// CHECK-LABEL: func @vectorize_map
+// CHECK:         %[[LHS:.*]] = vector.transfer_read
+// CHECK-NEXT:    %[[RHS:.*]] = vector.transfer_read
+// CHECK-NEXT:    arith.addf %[[LHS]], %[[RHS]] : vector<64xf32>
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+  %0 = transform.structured.match ops{["linalg.map"]} in %arg1
+  %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation
+  %2 = transform.structured.vectorize %1
+}
+
+// -----
+
+func.func @vectorize_transpose(%arg0: memref<16x32x64xf32>,
+                               %arg1: memref<32x64x16xf32>) {
+  linalg.transpose ins(%arg0 : memref<16x32x64xf32>)
+                   outs(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0]
+  return
+}
+// CHECK-LABEL: func @vectorize_transpose
+// CHECK:         vector.transpose
+// CHECK-SAME:      [1, 2, 0] : vector<16x32x64xf32> to vector<32x64x16xf32>
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+  %0 = transform.structured.match ops{["linalg.transpose"]} in %arg1
+  %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation
+  %2 = transform.structured.vectorize %1
+}
+
+// -----
+
+func.func @vectorize_reduce(%arg0: memref<16x32x64xf32>,
+                  %arg1: memref<16x64xf32>) {
+  linalg.reduce ins(%arg0 : memref<16x32x64xf32>)
+                outs(%arg1 : memref<16x64xf32>) dimensions = [1]
+    (%in: f32, %init: f32) {
+      %0 = arith.addf %in, %init : f32
+      linalg.yield %0 : f32
+    }
+  return
+}
+// CHECK-LABEL: func @vectorize_reduce
+// CHECK:         vector.multi_reduction <add>
+// CHECK-SAME:    : vector<16x32x64xf32> to vector<16x64xf32>
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+  %0 = transform.structured.match ops{["linalg.reduce"]} in %arg1
+  %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation
+  %2 = transform.structured.vectorize %1
+}
diff --git a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir
--- a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir
@@ -208,7 +208,7 @@
 // CHECK:     scf.for %[[I:.*]] = %[[C0]] to %[[C16]] step %[[C1]] {
 // CHECK:       scf.for %[[J:.*]] = %[[C0]] to %[[C32]] step %[[C1]] {
 // CHECK:         scf.for %[[K:.*]] = %[[C0]] to %[[C64]] step %[[C1]] {
-// CHECK:           %[[ELEM:.*]] = memref.load %[[OUT]][%[[J]], %[[K]], %[[I]]]
+// CHECK:           %[[ELEM:.*]] = memref.load %[[IN]][%[[I]], %[[J]], %[[K]]]
 // CHECK:           memref.store %[[ELEM]], %[[OUT]][%[[J]], %[[K]], %[[I]]]
 
 // -----