diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
@@ -585,83 +585,12 @@
 };
 } // namespace
 
-namespace {
-/// Convert `extract_slice` operations to rank-reduced versions.
-struct RankReducedExtractSliceOp
-    : public OpRewritePattern<tensor::ExtractSliceOp> {
-  using OpRewritePattern<tensor::ExtractSliceOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,
-                                PatternRewriter &rewriter) const override {
-    RankedTensorType resultType = sliceOp.getType();
-    SmallVector<OpFoldResult> offsets = sliceOp.getMixedOffsets();
-    SmallVector<OpFoldResult> sizes = sliceOp.getMixedSizes();
-    SmallVector<OpFoldResult> strides = sliceOp.getMixedStrides();
-    auto reassociation = getReassociationMapForFoldingUnitDims(sizes);
-    if (!reassociation ||
-        reassociation->size() == static_cast<size_t>(resultType.getRank()))
-      return failure();
-    auto rankReducedType =
-        tensor::ExtractSliceOp::inferCanonicalRankReducedResultType(
-            reassociation->size(), sliceOp.getSourceType(), offsets, sizes,
-            strides)
-            .cast<RankedTensorType>();
-
-    Location loc = sliceOp.getLoc();
-    Value newSlice = rewriter.create<tensor::ExtractSliceOp>(
-        loc, rankReducedType, sliceOp.getSource(), offsets, sizes, strides);
-    rewriter.replaceOpWithNewOp<tensor::ExpandShapeOp>(
-        sliceOp, resultType, newSlice, *reassociation);
-    return success();
-  }
-};
-
-/// Convert `insert_slice` operations to rank-reduced versions.
-/// This patterns works with both InsertSliceOp and ParallelInsertSliceOp.
-template <typename InsertOpTy>
-struct RankReducedInsertSliceOp : public OpRewritePattern<InsertOpTy> {
-  using OpRewritePattern<InsertOpTy>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(InsertOpTy insertSliceOp,
-                                PatternRewriter &rewriter) const override {
-    RankedTensorType sourceType = insertSliceOp.getSourceType();
-    SmallVector<OpFoldResult> offsets = insertSliceOp.getMixedOffsets();
-    SmallVector<OpFoldResult> sizes = insertSliceOp.getMixedSizes();
-    SmallVector<OpFoldResult> strides = insertSliceOp.getMixedStrides();
-    auto reassociation = getReassociationMapForFoldingUnitDims(sizes);
-    if (!reassociation ||
-        reassociation->size() == static_cast<size_t>(sourceType.getRank()))
-      return failure();
-    Location loc = insertSliceOp.getLoc();
-    tensor::CollapseShapeOp reshapedSource;
-    {
-      OpBuilder::InsertionGuard g(rewriter);
-      // The only difference between InsertSliceOp and ParallelInsertSliceOp is
-      // the insertion point is just before the ParallelCombiningOp in the
-      // parallel case.
-      if (std::is_same<InsertOpTy, tensor::ParallelInsertSliceOp>::value)
-        rewriter.setInsertionPoint(insertSliceOp->getParentOp());
-      reshapedSource = rewriter.create<tensor::CollapseShapeOp>(
-          loc, insertSliceOp.getSource(), *reassociation);
-    }
-    rewriter.replaceOpWithNewOp<InsertOpTy>(
-        insertSliceOp, reshapedSource, insertSliceOp.getDest(),
-        insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(),
-        insertSliceOp.getMixedStrides());
-    return success();
-  }
-};
-} // namespace
-
 /// Patterns that are used to canonicalize the use of unit-extent dims for
 /// broadcasting.
 void mlir::linalg::populateFoldUnitExtentDimsPatterns(
     RewritePatternSet &patterns) {
   auto *context = patterns.getContext();
-  patterns.add<FoldUnitDimLoops, AddInitOperandsToInput, ReplaceUnitExtents,
-               RankReducedExtractSliceOp,
-               RankReducedInsertSliceOp<tensor::InsertSliceOp>,
-               RankReducedInsertSliceOp<tensor::ParallelInsertSliceOp>>(
+  patterns.add<FoldUnitDimLoops, AddInitOperandsToInput, ReplaceUnitExtents>(
       context);
   linalg::FillOp::getCanonicalizationPatterns(patterns, context);
   tensor::CollapseShapeOp::getCanonicalizationPatterns(patterns, context);
diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@@ -1,212 +1,6 @@
 // RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.module(func.func(linalg-fold-unit-extent-dims))" | FileCheck %s
 
-#accesses = [
-  affine_map<(i, j, k, l, m) -> (i, k, m)>,
-  affine_map<(i, j, k, l, m) -> ()>,
-  affine_map<(i, j, k, l, m) -> (i, k, j, l, m)>
-]
-
-#trait = {
-  iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"],
-  indexing_maps = #accesses,
-  library_call = "some_external_func"
-}
-
-func.func @drop_one_trip_loops(%arg0 : tensor<?x1x?xf32>, %arg1 : f32, %shape: tensor<?x1x?x1x?xf32>) -> tensor<?x1x?x1x?xf32> {
-  %0 = linalg.generic #trait
-     ins(%arg0, %arg1 : tensor<?x1x?xf32>, f32)
-    outs(%shape : tensor<?x1x?x1x?xf32>) {
-       ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) :
-         linalg.yield %arg3 : f32
-       } -> tensor<?x1x?x1x?xf32>
-  return %0 : tensor<?x1x?x1x?xf32>
-}
-//   CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)>
-//   CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1, d2) -> ()>
-//   CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-// CHECK-LABEL: func @drop_one_trip_loops
-//       CHECK: tensor.collapse_shape %{{.*}} {{\[}}[0, 1], [2]]
-//       CHECK: linalg.generic
-//  CHECK-SAME:   indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP3]]]
-//  CHECK-SAME:   iterator_types = ["parallel", "parallel", "parallel"]
-//       CHECK: tensor.expand_shape %{{.*}} {{\[}}[0, 1], [2, 3], [4]]
-
-// -----
-
-#accesses = [
-  affine_map<(i, j, k, l, m) -> (i, k, m)>,
-  affine_map<(i, j, k, l, m) -> (i, k, j, l, m)>
-]
-
-#trait = {
-  iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"],
-  indexing_maps = #accesses,
-  library_call = "some_external_func"
-}
-
-func.func @drop_one_trip_loops_indexed
-  (%arg0 : tensor<?x1x?xi32>, %shape: tensor<?x1x?x1x?xi32>) -> tensor<?x1x?x1x?xi32>
-{
-  %0 = linalg.generic #trait
-     ins(%arg0 : tensor<?x1x?xi32>)
-    outs(%shape: tensor<?x1x?x1x?xi32>) {
-       ^bb0(%arg6 : i32, %arg7 : i32) :
-         %idx0 = linalg.index 0 : index
-         %idx1 = linalg.index 1 : index
-         %idx2 = linalg.index 2 : index
-         %idx3 = linalg.index 3 : index
-         %idx4 = linalg.index 4 : index
-         %1 = arith.addi %idx0, %idx1 : index
-         %2 = arith.subi %1, %idx2 : index
-         %3 = arith.subi %2, %idx3 : index
-         %4 = arith.addi %3, %idx4 : index
-         %5 = arith.index_cast %4 : index to i32
-         %6 = arith.addi %5, %arg6 : i32
-         linalg.yield %6 : i32
-       } -> tensor<?x1x?x1x?xi32>
-  return %0 : tensor<?x1x?x1x?xi32>
-}
-// The subtractions disappear the access map of the output tensor maps its unit
-// dimensions 1 and 3 to the index dimensions 2 and 3.
-// CHECK-LABEL: func @drop_one_trip_loops_indexed
-//       CHECK:   linalg.generic
-//       CHECK:   ^{{.+}}(
-//  CHECK-SAME:     %[[ARG4:[a-zA-Z0-9]+]]: i32, %{{.*}}: i32)
-//       CHECK:     %[[IDX0:.+]] = linalg.index 0 : index
-//       CHECK:     %[[IDX1:.+]] = linalg.index 1 : index
-//       CHECK:     %[[IDX2:.+]] = linalg.index 2 : index
-//       CHECK:     %[[T3:.+]] = arith.addi %[[IDX0]], %[[IDX1]]
-//       CHECK:     %[[T4:.+]] = arith.addi %[[T3]], %[[IDX2]]
-//       CHECK:     %[[T5:.+]] = arith.index_cast %[[T4]] : index to i32
-//       CHECK:     %[[T6:.+]] = arith.addi %[[T5]], %[[ARG4]] : i32
-//       CHECK:     linalg.yield %[[T6]] : i32
-
-// -----
-
-#map0 = affine_map<(i, j) -> (i, j)>
-#access = [#map0, #map0]
-#trait = {
-  iterator_types = ["parallel", "parallel"],
-  indexing_maps = #access,
-  library_call = "some_external_func"
-}
-
-func.func @drop_all_loops(%arg0 : tensor<1x1xf32>) -> tensor<1x1xf32>
-{
-  %0 = linalg.generic #trait
-     ins(%arg0 : tensor<1x1xf32>)
-    outs(%arg0 : tensor<1x1xf32>) {
-       ^bb0(%arg1: f32, %arg2: f32) :
-         linalg.yield %arg1 : f32
-       } -> tensor<1x1xf32>
-  return %0 : tensor<1x1xf32>
-}
-//       CHECK: #[[$MAP0:.*]] = affine_map<() -> ()>
-// CHECK-LABEL: func @drop_all_loops
-//       CHECK:   tensor.collapse_shape %{{.*}} []
-//       CHECK:   linalg.generic
-//  CHECK-SAME:     indexing_maps = [#[[$MAP0]], #[[$MAP0]]]
-//  CHECK-SAME:     iterator_types = []
-
-// -----
-
-#map0 = affine_map<(i, j) -> (i, j)>
-#access = [#map0, #map0]
-#trait = {
-  iterator_types = ["parallel", "parallel"],
-  indexing_maps = #access,
-  library_call = "some_external_func"
-}
-
-func.func @drop_all_loops_indexed
-  (%arg0 : tensor<1x1xi32>) -> tensor<1x1xi32>{
-  %0 = linalg.generic #trait
-     ins(%arg0 : tensor<1x1xi32>)
-    outs(%arg0 : tensor<1x1xi32>) {
-       ^bb0(%arg3: i32, %arg4: i32) :
-         %idx0 = linalg.index 0 : index
-         %idx1 = linalg.index 1 : index
-         %1 = arith.addi %idx0, %idx1 : index
-         %2 = arith.index_cast %1 : index to i32
-         %3 = arith.addi %2, %arg3 : i32
-         linalg.yield %3 : i32
-       } -> tensor<1x1xi32>
-  return %0 : tensor<1x1xi32>
-}
-
-// CHECK-LABEL: func @drop_all_loops_indexed
-//       CHECK:   linalg.generic
-//       CHECK:   ^{{.+}}(%[[ARG1:.+]]: i32, %[[ARG2:.+]]: i32)
-//       CHECK:     linalg.yield %[[ARG1]] : i32
-
-// -----
-
-#accesses = [
-  affine_map<(d0) -> (0, d0)>,
-  affine_map<(d0) -> (d0)>
-]
-
-#trait = {
-  indexing_maps = #accesses,
-  iterator_types = ["parallel"],
-  library_call = "some_external_fn"
-}
-
-func.func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> {
-  %0 = linalg.generic #trait
-     ins(%arg0 : tensor<1x5xf32>)
-    outs(%shape : tensor<5xf32>) {
-  ^bb0(%arg2: f32, %arg3: f32):
-    linalg.yield %arg2 : f32
-  } -> tensor<5xf32>
-  return %0 : tensor<5xf32>
-}
-//   CHECK: #[[$MAP1:.*]] = affine_map<(d0) -> (d0)>
-
-// CHECK-LABEL: func @leading_dim_1_canonicalization
-//       CHECK:   tensor.collapse_shape %{{.*}} {{\[}}[0, 1]]
-//       CHECK:   linalg.generic
-//  CHECK-SAME:     indexing_maps = [#[[$MAP1]], #[[$MAP1]]]
-//  CHECK-SAME:     iterator_types = ["parallel"]
-
-// -----
-
-#accesses = [
-  affine_map<(d0, d1) -> (0, d1)>,
-  affine_map<(d0, d1) -> (d0, 0)>,
-  affine_map<(d0, d1) -> (d0, d1)>
-]
-
-#trait = {
-  indexing_maps = #accesses,
-  iterator_types = ["parallel", "parallel"],
-  library_call = "some_external_fn"
-}
-
-func.func @broadcast_test(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>, %shape : tensor<5x5xf32>) -> tensor<5x5xf32>
-{
-  %0 = tensor.expand_shape %arg0 [[0, 1]] : tensor<5xf32> into tensor<1x5xf32>
-  %1 = tensor.expand_shape %arg1 [[0, 1]] : tensor<5xf32> into tensor<5x1xf32>
-  %2 = linalg.generic #trait
-     ins(%0, %1 : tensor<1x5xf32>, tensor<5x1xf32>)
-    outs(%shape : tensor<5x5xf32>) {
-       ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
-         %3 = arith.addf %arg3, %arg4 : f32
-         linalg.yield %3 : f32
-       } -> tensor<5x5xf32>
-  return %2 : tensor<5x5xf32>
-}
-//   CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1)>
-//   CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0)>
-//   CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)>
-// CHECK-LABEL: func @broadcast_test
-//   CHECK-NOT:   linalg.tensor_{{.*}}shape
-//       CHECK:   linalg.generic
-//  CHECK-SAME:     indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]]
-//  CHECK-SAME:     iterator_types = ["parallel", "parallel"]
-//   CHECK-NOT:   linalg.tensor_{{.*}}shape
 
-// -----
 
 #accesses = [
   affine_map<(d0, d1) -> (0, 0)>,
@@ -296,34 +90,6 @@
 //       CHECK: %[[GENERIC_RESHAPE:.+]] = tensor.expand_shape %[[GENERIC]] [] : tensor<f32> into tensor<1xf32>
 //       CHECK: return %[[GENERIC_RESHAPE:.+]] : tensor<1xf32>
 
-
-// -----
-
-func.func @fold_slice(
-    %arg0 : tensor<1x?x?x1x?x1x1xf32>, %arg1 : tensor<1x?x?x?x?x1x1xf32>,
-    %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index,
-    %arg6 : index, %arg7 : index) -> (tensor<1x?x?x1x?x1x1xf32>, tensor<1x?x?x1x?x1x1xf32>) {
-  %0 = tensor.extract_slice %arg0[0, %arg2, %arg3, 0, %arg4, 0, 0]
-                             [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] :
-      tensor<1x?x?x1x?x1x1xf32> to tensor<1x?x?x1x?x1x1xf32>
-  %1 = tensor.extract_slice %arg1[%arg2, 0, %arg3, 0, 0, %arg4, 0]
-                             [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] :
-      tensor<1x?x?x?x?x1x1xf32> to tensor<1x?x?x1x?x1x1xf32>
-  return %0, %1 : tensor<1x?x?x1x?x1x1xf32>, tensor<1x?x?x1x?x1x1xf32>
-}
-//      CHECK: func @fold_slice
-// CHECK-SAME:   %[[ARG0:.+]]: tensor<1x?x?x1x?x1x1xf32>
-// CHECK-SAME:   %[[ARG1:.+]]: tensor<1x?x?x?x?x1x1xf32>
-//      CHECK:   %[[SLICE1:.+]] = tensor.extract_slice %[[ARG0]]
-// CHECK-SAME:       to tensor<?x?x?xf32>
-//      CHECK:   %[[RESULT1:.+]] = tensor.expand_shape %[[SLICE1]]
-// CHECK-SAME:       [0, 1], [2], [3, 4, 5, 6]
-//      CHECK:   %[[SLICE2:.+]] = tensor.extract_slice %[[ARG1]]
-// CHECK-SAME:       to tensor<?x?x?xf32>
-//      CHECK:   %[[RESULT2:.+]] = tensor.expand_shape %[[SLICE2]]
-// CHECK-SAME:       [0, 1], [2], [3, 4, 5, 6]
-//      CHECK:   return %[[RESULT1]], %[[RESULT2]]
-
 // -----
 
 func.func @unit_dim_for_reduction(%arg0: tensor<1x?x1x?xf32>) -> tensor<1x?xf32> {
@@ -430,30 +196,6 @@
 
 // -----
 
-func.func @slice_unit_dims(%arg0: tensor<1x3xf32>) -> tensor<1x1xf32> {
-  %0 = tensor.extract_slice %arg0[0, 2] [1, 1] [1, 1] : tensor<1x3xf32> to tensor<1x1xf32>
-  return %0 : tensor<1x1xf32>
-}
-// CHECK-LABEL: func @slice_unit_dims
-//       CHECK:   %[[SLICE:.+]] = tensor.extract_slice
-//  CHECK-SAME:     tensor<1x3xf32> to tensor<f32>
-//       CHECK:   %[[RESULT:.+]] = tensor.expand_shape %[[SLICE]] []
-//       CHECK:   return %[[RESULT]]
-
-// -----
-
-func.func @insert_slice_unit_dims(%arg0: tensor<1x3xf32>, %arg1: tensor<1x1xf32>) -> tensor<1x3xf32> {
-  %0 = tensor.insert_slice %arg1 into %arg0[0, 2] [1, 1] [1, 1] : tensor<1x1xf32> into tensor<1x3xf32>
-  return %0 : tensor<1x3xf32>
-}
-// CHECK-LABEL: func @insert_slice_unit_dims
-//       CHECK:   %[[RESHAPE:.+]] = tensor.collapse_shape %{{.+}} []
-//       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[RESHAPE]]
-//  CHECK-SAME:     tensor<f32> into tensor<1x3xf32>
-//       CHECK:   return %[[RESULT]]
-
-// -----
-
 #accesses = [
   affine_map<(i, j, k, l, m) -> (i, k, m)>,
   affine_map<(i, j, k, l, m) -> ()>,
@@ -828,26 +570,6 @@
 
 // -----
 
-func.func @reduce_dispatch_0() -> tensor<4x2xf32> {
-  %c2 = arith.constant 2 : index
-  %c4 = arith.constant 4 : index
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = tensor.empty() : tensor<4x2xf32>
-  %res = scf.foreach_thread (%arg0, %arg1) in (%c4, %c2) shared_outs(%o = %0) -> (tensor<4x2xf32>) {
-    %1 = tensor.empty() : tensor<1x1xf32>
-    %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32>
-    scf.foreach_thread.perform_concurrently {
-      //      CHECK: tensor.parallel_insert_slice %{{[0-9a-z]*}} into %{{[0-9a-z]*}}
-      // CHECK-SAME: [%{{.*}}, %{{.*}}] [1, 1] [1, 1] : tensor<f32> into tensor<4x2xf32>
-      tensor.parallel_insert_slice %2 into %o[%arg0, %arg1] [1, 1] [1, 1] :
-        tensor<1x1xf32> into tensor<4x2xf32>
-    }
-  }
-  return %res: tensor<4x2xf32>
-}
-
-// -----
-
 #map0 = affine_map<(i, j) -> (i, j)>
 #access = [#map0, #map0]
 #trait = {