diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -96,6 +96,57 @@
     results.push_back(*depthwise);
     return DiagnosedSilenceableFailure::success();
   }
+  FailureOr<LinalgOp> poolingNhwcSum = tryApply<
+      DownscaleSizeOneWindowed2DConvolution<PoolingNhwcSumOp, PoolingNwcSumOp>>(
+      target);
+  if (succeeded(poolingNhwcSum)) {
+    results.push_back(*poolingNhwcSum);
+    return DiagnosedSilenceableFailure::success();
+  }
+  FailureOr<LinalgOp> poolingNchwSum = tryApply<
+      DownscaleSizeOneWindowed2DConvolution<PoolingNchwSumOp, PoolingNcwSumOp>>(
+      target);
+  if (succeeded(poolingNchwSum)) {
+    results.push_back(*poolingNchwSum);
+    return DiagnosedSilenceableFailure::success();
+  }
+  FailureOr<LinalgOp> poolingNhwcMax = tryApply<
+      DownscaleSizeOneWindowed2DConvolution<PoolingNhwcMaxOp, PoolingNwcMaxOp>>(
+      target);
+  if (succeeded(poolingNhwcMax)) {
+    results.push_back(*poolingNhwcMax);
+    return DiagnosedSilenceableFailure::success();
+  }
+  FailureOr<LinalgOp> poolingNhwcMaxUnsigned =
+      tryApply<DownscaleSizeOneWindowed2DConvolution<PoolingNhwcMaxUnsignedOp,
+                                                     PoolingNwcMaxUnsignedOp>>(
+          target);
+  if (succeeded(poolingNhwcMaxUnsigned)) {
+    results.push_back(*poolingNhwcMaxUnsigned);
+    return DiagnosedSilenceableFailure::success();
+  }
+  FailureOr<LinalgOp> poolingNhwcMin = tryApply<
+      DownscaleSizeOneWindowed2DConvolution<PoolingNhwcMinOp, PoolingNwcMinOp>>(
+      target);
+  if (succeeded(poolingNhwcMin)) {
+    results.push_back(*poolingNhwcMin);
+    return DiagnosedSilenceableFailure::success();
+  }
+  FailureOr<LinalgOp> poolingNhwcMinUnsigned =
+      tryApply<DownscaleSizeOneWindowed2DConvolution<PoolingNhwcMinUnsignedOp,
+                                                     PoolingNwcMinUnsignedOp>>(
+          target);
+  if (succeeded(poolingNhwcMinUnsigned)) {
+    results.push_back(*poolingNhwcMinUnsigned);
+    return DiagnosedSilenceableFailure::success();
+  }
+  FailureOr<LinalgOp> poolingNchwMax = tryApply<
+      DownscaleSizeOneWindowed2DConvolution<PoolingNchwMaxOp, PoolingNcwMaxOp>>(
+      target);
+  if (succeeded(poolingNchwMax)) {
+    results.push_back(*poolingNchwMax);
+    return DiagnosedSilenceableFailure::success();
+  }
   results.assign(1, nullptr);
   return emitDefaultSilenceableFailure(target);
 }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -628,8 +628,50 @@
         ohIndex = 2;
         owIndex = 3;
       })
+      .Case([&](linalg::PoolingNhwcSumOp op) {
+        khIndex = 0;
+        kwIndex = 1;
+        ohIndex = 1;
+        owIndex = 2;
+      })
+      .Case([&](linalg::PoolingNchwSumOp op) {
+        khIndex = 0;
+        kwIndex = 1;
+        ohIndex = 2;
+        owIndex = 3;
+      })
+      .Case([&](linalg::PoolingNhwcMaxOp op) {
+        khIndex = 0;
+        kwIndex = 1;
+        ohIndex = 1;
+        owIndex = 2;
+      })
+      .Case([&](linalg::PoolingNhwcMaxUnsignedOp op) {
+        khIndex = 0;
+        kwIndex = 1;
+        ohIndex = 1;
+        owIndex = 2;
+      })
+      .Case([&](linalg::PoolingNhwcMinOp op) {
+        khIndex = 0;
+        kwIndex = 1;
+        ohIndex = 1;
+        owIndex = 2;
+      })
+      .Case([&](linalg::PoolingNhwcMinUnsignedOp op) {
+        khIndex = 0;
+        kwIndex = 1;
+        ohIndex = 1;
+        owIndex = 2;
+      })
+      .Case([&](linalg::PoolingNchwMaxOp op) {
+        khIndex = 0;
+        kwIndex = 1;
+        ohIndex = 2;
+        owIndex = 3;
+      })
       .Default([&](Operation *op) {
-        llvm_unreachable("unexpected conv2d operation.");
+        llvm_unreachable("unexpected conv2d/pool2d operation.");
       });
 
   // Only handle the case where at least one of the window dimensions is
@@ -688,6 +730,20 @@
                                                               Conv1DNwcWcfOp>;
 template struct linalg::DownscaleSizeOneWindowed2DConvolution<Conv2DNchwFchwOp,
                                                               Conv1DNcwFcwOp>;
+template struct linalg::DownscaleSizeOneWindowed2DConvolution<PoolingNhwcSumOp,
+                                                              PoolingNwcSumOp>;
+template struct linalg::DownscaleSizeOneWindowed2DConvolution<PoolingNchwSumOp,
+                                                              PoolingNcwSumOp>;
+template struct linalg::DownscaleSizeOneWindowed2DConvolution<PoolingNhwcMaxOp,
+                                                              PoolingNwcMaxOp>;
+template struct linalg::DownscaleSizeOneWindowed2DConvolution<
+    PoolingNhwcMaxUnsignedOp, PoolingNwcMaxUnsignedOp>;
+template struct linalg::DownscaleSizeOneWindowed2DConvolution<PoolingNhwcMinOp,
+                                                              PoolingNwcMinOp>;
+template struct linalg::DownscaleSizeOneWindowed2DConvolution<
+    PoolingNhwcMinUnsignedOp, PoolingNwcMinUnsignedOp>;
+template struct linalg::DownscaleSizeOneWindowed2DConvolution<PoolingNchwMaxOp,
+                                                              PoolingNcwMaxOp>;
 
 FailureOr<DepthwiseConv1DNwcWcOp>
 DownscaleDepthwiseConv2DNhwcHwcOp::returningMatchAndRewrite(
@@ -765,4 +821,15 @@
                                                      Conv1DNcwFcwOp>,
                DownscaleDepthwiseConv2DNhwcHwcOp>(patterns.getContext(),
                                                   benefit);
+  patterns.add<
+      DownscaleSizeOneWindowed2DConvolution<PoolingNhwcSumOp, PoolingNwcSumOp>,
+      DownscaleSizeOneWindowed2DConvolution<PoolingNchwSumOp, PoolingNcwSumOp>,
+      DownscaleSizeOneWindowed2DConvolution<PoolingNhwcMaxOp, PoolingNwcMaxOp>,
+      DownscaleSizeOneWindowed2DConvolution<PoolingNhwcMaxUnsignedOp,
+                                            PoolingNwcMaxUnsignedOp>,
+      DownscaleSizeOneWindowed2DConvolution<PoolingNhwcMinOp, PoolingNwcMinOp>,
+      DownscaleSizeOneWindowed2DConvolution<PoolingNhwcMinUnsignedOp,
+                                            PoolingNwcMinUnsignedOp>,
+      DownscaleSizeOneWindowed2DConvolution<PoolingNchwMaxOp, PoolingNcwMaxOp>>(
+      patterns.getContext(), benefit);
 }
diff --git a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir
@@ -56,6 +56,132 @@
   return %0: tensor<1x1x56x96xf32>
 }
 
+// CHECK-LABEL: @pooling_nhwc_sum
+// CHECK-SAME: %[[ARG0:.+]]: tensor<?x1x?x?xf32>,
+// CHECK-SAME: %[[ARG1:.+]]: tensor<1x?xf32>
+// CHECK-SAME: %[[ARG2:.+]]: tensor<?x1x?x?xf32>
+func.func @pooling_nhwc_sum(%input: tensor<?x1x?x?xf32>, %filter: tensor<1x?xf32>, %init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32> {
+  // CHECK: %[[SLICE0:.+]] = tensor.extract_slice %[[ARG0]]
+  // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %[[ARG1]]
+  // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[ARG2]]
+  // CHECK: %[[SLICERES:.+]] = linalg.pooling_nwc_sum
+  // CHECK: %[[RES:.+]] = tensor.insert_slice %[[SLICERES]] into %[[ARG2]]
+  %0 = linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>,
+                                strides = dense<1> : tensor<2xi64>}
+     ins (%input, %filter: tensor<?x1x?x?xf32>, tensor<1x?xf32>)
+    outs (%init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32>
+  // CHECK: return %[[RES]]
+  return %0 : tensor<?x1x?x?xf32>
+}
+
+// CHECK-LABEL: @pooling_nchw_sum
+// CHECK-SAME: (%[[ARG0:[0-9a-z]+]]: tensor<?x?x1x?xf32>,
+// CHECK-SAME: %[[ARG1:[0-9a-z]+]]: tensor<1x?xf32>,
+// CHECK-SAME: %[[ARG2:[0-9a-z]+]]: tensor<?x?x1x?xf32>)
+func.func @pooling_nchw_sum(%input: tensor<?x?x1x?xf32>, %filter: tensor<1x?xf32>, %init: tensor<?x?x1x?xf32>) -> tensor<?x?x1x?xf32> {
+  // CHECK: %[[SLICE0:.+]] = tensor.extract_slice %[[ARG0]]
+  // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %[[ARG1]]
+  // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[ARG2]]
+  // CHECK: %[[SLICERES:.+]] = linalg.pooling_ncw_sum
+  // CHECK: %[[RES:.+]] = tensor.insert_slice %[[SLICERES]] into %[[ARG2]]
+  %0 = linalg.pooling_nchw_sum {dilations = dense<1> : tensor<2xi64>,
+                                strides = dense<1> : tensor<2xi64>}
+     ins (%input, %filter: tensor<?x?x1x?xf32>, tensor<1x?xf32>)
+    outs (%init: tensor<?x?x1x?xf32>) -> tensor<?x?x1x?xf32>
+  // CHECK: return %[[RES]]
+  return %0 : tensor<?x?x1x?xf32>
+}
+
+// CHECK-LABEL: @pooling_nhwc_max
+// CHECK-SAME: %[[ARG0:.+]]: tensor<?x1x?x?xf32>,
+// CHECK-SAME: %[[ARG1:.+]]: tensor<1x?xf32>
+// CHECK-SAME: %[[ARG2:.+]]: tensor<?x1x?x?xf32>
+func.func @pooling_nhwc_max(%input: tensor<?x1x?x?xf32>, %filter: tensor<1x?xf32>, %init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32> {
+  // CHECK: %[[SLICE0:.+]] = tensor.extract_slice %[[ARG0]]
+  // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %[[ARG1]]
+  // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[ARG2]]
+  // CHECK: %[[SLICERES:.+]] = linalg.pooling_nwc_max
+  // CHECK: %[[RES:.+]] = tensor.insert_slice %[[SLICERES]] into %[[ARG2]]
+  %0 = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>,
+                                strides = dense<1> : tensor<2xi64>}
+     ins (%input, %filter: tensor<?x1x?x?xf32>, tensor<1x?xf32>)
+    outs (%init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32>
+  // CHECK: return %[[RES]]
+  return %0 : tensor<?x1x?x?xf32>
+}
+
+// CHECK-LABEL: @pooling_nhwc_max_unsigned
+// CHECK-SAME: %[[ARG0:.+]]: tensor<?x1x?x?xf32>,
+// CHECK-SAME: %[[ARG1:.+]]: tensor<1x?xf32>
+// CHECK-SAME: %[[ARG2:.+]]: tensor<?x1x?x?xf32>
+func.func @pooling_nhwc_max_unsigned(%input: tensor<?x1x?x?xf32>, %filter: tensor<1x?xf32>, %init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32> {
+  // CHECK: %[[SLICE0:.+]] = tensor.extract_slice %[[ARG0]]
+  // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %[[ARG1]]
+  // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[ARG2]]
+  // CHECK: %[[SLICERES:.+]] = linalg.pooling_nwc_max_unsigned
+  // CHECK: %[[RES:.+]] = tensor.insert_slice %[[SLICERES]] into %[[ARG2]]
+  %0 = linalg.pooling_nhwc_max_unsigned {dilations = dense<1> : tensor<2xi64>,
+                                strides = dense<1> : tensor<2xi64>}
+     ins (%input, %filter: tensor<?x1x?x?xf32>, tensor<1x?xf32>)
+    outs (%init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32>
+  // CHECK: return %[[RES]]
+  return %0 : tensor<?x1x?x?xf32>
+}
+
+// CHECK-LABEL: @pooling_nhwc_min
+// CHECK-SAME: %[[ARG0:.+]]: tensor<?x1x?x?xf32>,
+// CHECK-SAME: %[[ARG1:.+]]: tensor<1x?xf32>
+// CHECK-SAME: %[[ARG2:.+]]: tensor<?x1x?x?xf32>
+func.func @pooling_nhwc_min(%input: tensor<?x1x?x?xf32>, %filter: tensor<1x?xf32>, %init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32> {
+  // CHECK: %[[SLICE0:.+]] = tensor.extract_slice %[[ARG0]]
+  // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %[[ARG1]]
+  // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[ARG2]]
+  // CHECK: %[[SLICERES:.+]] = linalg.pooling_nwc_min
+  // CHECK: %[[RES:.+]] = tensor.insert_slice %[[SLICERES]] into %[[ARG2]]
+  %0 = linalg.pooling_nhwc_min {dilations = dense<1> : tensor<2xi64>,
+                                strides = dense<1> : tensor<2xi64>}
+     ins (%input, %filter: tensor<?x1x?x?xf32>, tensor<1x?xf32>)
+    outs (%init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32>
+  // CHECK: return %[[RES]]
+  return %0 : tensor<?x1x?x?xf32>
+}
+
+// CHECK-LABEL: @pooling_nhwc_min_unsigned
+// CHECK-SAME: %[[ARG0:.+]]: tensor<?x1x?x?xf32>,
+// CHECK-SAME: %[[ARG1:.+]]: tensor<1x?xf32>
+// CHECK-SAME: %[[ARG2:.+]]: tensor<?x1x?x?xf32>
+func.func @pooling_nhwc_min_unsigned(%input: tensor<?x1x?x?xf32>, %filter: tensor<1x?xf32>, %init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32> {
+  // CHECK: %[[SLICE0:.+]] = tensor.extract_slice %[[ARG0]]
+  // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %[[ARG1]]
+  // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[ARG2]]
+  // CHECK: %[[SLICERES:.+]] = linalg.pooling_nwc_min_unsigned
+  // CHECK: %[[RES:.+]] = tensor.insert_slice %[[SLICERES]] into %[[ARG2]]
+  %0 = linalg.pooling_nhwc_min_unsigned {dilations = dense<1> : tensor<2xi64>,
+                                strides = dense<1> : tensor<2xi64>}
+     ins (%input, %filter: tensor<?x1x?x?xf32>, tensor<1x?xf32>)
+    outs (%init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32>
+  // CHECK: return %[[RES]]
+  return %0 : tensor<?x1x?x?xf32>
+}
+
+// CHECK-LABEL: @pooling_nchw_max
+// CHECK-SAME: (%[[ARG0:[0-9a-z]+]]: tensor<?x?x1x?xf32>,
+// CHECK-SAME: %[[ARG1:[0-9a-z]+]]: tensor<1x?xf32>,
+// CHECK-SAME: %[[ARG2:[0-9a-z]+]]: tensor<?x?x1x?xf32>)
+func.func @pooling_nchw_max(%input: tensor<?x?x1x?xf32>, %filter: tensor<1x?xf32>, %init: tensor<?x?x1x?xf32>) -> tensor<?x?x1x?xf32> {
+  // CHECK: %[[SLICE0:.+]] = tensor.extract_slice %[[ARG0]]
+  // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %[[ARG1]]
+  // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[ARG2]]
+  // CHECK: %[[SLICERES:.+]] = linalg.pooling_ncw_max
+  // CHECK: %[[RES:.+]] = tensor.insert_slice %[[SLICERES]] into %[[ARG2]]
+  %0 = linalg.pooling_nchw_max {dilations = dense<1> : tensor<2xi64>,
+                                strides = dense<1> : tensor<2xi64>}
+     ins (%input, %filter: tensor<?x?x1x?xf32>, tensor<1x?xf32>)
+    outs (%init: tensor<?x?x1x?xf32>) -> tensor<?x?x1x?xf32>
+  // CHECK: return %[[RES]]
+  return %0 : tensor<?x?x1x?xf32>
+}
+
 transform.sequence failures(propagate) {
 ^bb1(%arg1: !pdl.operation):
   %0 = transform.structured.match interface{LinalgOp} in %arg1