diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml
@@ -791,87 +791,6 @@
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
-metadata: !LinalgOpMetadata
-  name: depthwise_conv_2d_input_nhwc_filter_hwc_poly
-  cpp_class_name: DepthwiseConv2DInputNhwcFilterHwcPolyOp
-  doc: |-
-    Performs depth-wise 2-D convolution.
-
-    Numeric casting is performed on the operands to the inner multiply, promoting
-    them to the same data type as the accumulator/output.
-structured_op: !LinalgStructuredOpConfig
-  args:
-  - !LinalgOperandDefConfig
-    name: I
-    usage: InputOperand
-    type_var: T1
-    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
-      (s0, s1, s2, s3)>
-  - !LinalgOperandDefConfig
-    name: K
-    usage: InputOperand
-    type_var: T2
-    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
-      (s4, s5, s3)>
-  - !LinalgOperandDefConfig
-    name: O
-    usage: OutputOperand
-    type_var: U
-    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
-      (s0, s6, s7, s3)>
-  - !LinalgOperandDefConfig
-    name: strides
-    usage: IndexAttribute
-    type_var: I64
-    attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
-      -> (s8, s9)>
-  - !LinalgOperandDefConfig
-    name: dilations
-    usage: IndexAttribute
-    type_var: I64
-    attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
-      -> (s10, s11)>
-  indexing_maps: !LinalgIndexingMapsConfig
-    static_indexing_maps:
-    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9,
-      s10, s11] -> (d0, d1 * s8 + d3 * s10, d2 * s9 + d4 * s11, d5)>
-    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9,
-      s10, s11] -> (d3, d4, d5)>
-    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9,
-      s10, s11] -> (d0, d1, d2, d5)>
-  iterator_types:
-  - parallel
-  - parallel
-  - parallel
-  - reduction
-  - reduction
-  - parallel
-  assignments:
-  - !ScalarAssign
-    arg: O
-    value: !ScalarExpression
-      scalar_apply:
-        fn_name: add
-        operands:
-        - !ScalarExpression
-          scalar_arg: O
-        - !ScalarExpression
-          scalar_apply:
-            fn_name: mul
-            operands:
-            - !ScalarExpression
-              symbolic_cast:
-                type_var: U
-                operands:
-                - !ScalarExpression
-                  scalar_arg: I
-            - !ScalarExpression
-              symbolic_cast:
-                type_var: U
-                operands:
-                - !ScalarExpression
-                  scalar_arg: K
---- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_2d_nhwc_hwcf_q
   cpp_class_name: Conv2DNhwcHwcfQOp
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc
@@ -1,9 +1,3 @@
-ods_def<MatmulColumnMajorOp>
-implements_interface<LinalgContractionOpInterface> :
-def matmul_column_major(A: f32(K, M), B: f32(N, K)) -> (C: f32(N, M)) {
-  C(n, m) = AddFOp<k>(C(n, m), MulFOp(A(k, m), B(n, k)));
-}
-
 ods_def<MatmulI8I8I32Op>
 implements_interface<LinalgContractionOpInterface> :
 def matmul_i8_i8_i32(A: i8(M, K), B: i8(K, N)) -> (C: i32(M, N)) {
@@ -12,139 +6,22 @@
   C(m, n) = AddIOp<k>(C(m, n), MulIOp(SignExtendIOp32(A(m, k)), SignExtendIOp32(B(k, n))));
 }
 
-ods_def<MatmulI16I16I32Op>
-implements_interface<LinalgContractionOpInterface> :
-def matmul_i16_i16_i32(A: i16(M, K), B: i16(K, N)) -> (C: i32(M, N)) {
-  C(m, n) = AddIOp<k>(C(m, n), MulIOp(SignExtendIOp32(A(m, k)), SignExtendIOp32(B(k, n))));
-}
-
-ods_def<MatmulI32I32I32Op>
-implements_interface<LinalgContractionOpInterface> :
-def matmul_i32_i32_i32(A: i32(M, K), B: i32(K, N)) -> (C: i32(M, N)) {
-  C(m, n) = AddIOp<k>(C(m, n), MulIOp(A(m, k), B(k, n)));
-}
-
-ods_def<MatvecI8I8I32Op>
-implements_interface<LinalgContractionOpInterface> :
-def matvec_i8_i8_i32(A: i8(M, N), y: i8(N)) -> (x: i32(M)) {
-  x(m) = AddIOp<n>(x(m), MulIOp(SignExtendIOp32(A(m, n)), SignExtendIOp32(y(n))));
-}
-
-ods_def<MatvecI16I16I32Op>
-implements_interface<LinalgContractionOpInterface> :
-def matvec_i16_i16_i32(A: i16(M, N), y: i16(N)) -> (x: i32(M)) {
-  x(m) = AddIOp<n>(x(m), MulIOp(SignExtendIOp32(A(m, n)), SignExtendIOp32(y(n))));
-}
-
-ods_def<MatvecI32I32I32Op>
-implements_interface<LinalgContractionOpInterface> :
-def matvec_i32_i32_i32(A: i32(M, N), y: i32(N)) -> (x: i32(M)) {
-  x(m) = AddIOp<n>(x(m), MulIOp(A(m, n), y(n)));
-}
-
-ods_def<VecmatI8I8I32Op>
-implements_interface<LinalgContractionOpInterface> :
-def vecmat_i8_i8_i32(y: i8(M), A: i8(M, N)) -> (x: i32(N)) {
-  x(n) = AddIOp<m>(x(n), MulIOp(SignExtendIOp32(y(m)), SignExtendIOp32(A(m, n))));
-}
-
-ods_def<VecmatI16I16I32Op>
-implements_interface<LinalgContractionOpInterface> :
-def vecmat_i16_i16_i32(y: i16(M), A: i16(M, N)) -> (x: i32(N)) {
-  x(n) = AddIOp<m>(x(n), MulIOp(SignExtendIOp32(y(m)), SignExtendIOp32(A(m, n))));
-}
-
-ods_def<VecmatI32I32I32Op>
-implements_interface<LinalgContractionOpInterface> :
-def vecmat_i32_i32_i32(y: i32(M), A: i32(M, N)) -> (x: i32(N)) {
-  x(n) = AddIOp<m>(x(n), MulIOp(y(m), A(m, n)));
-}
-
-ods_def<DotI8I8I32Op>
-implements_interface<LinalgContractionOpInterface> :
-def dot_i8_i8_i32(A: i8(M), B: i8(M)) -> (C: i32()) {
-  C() = AddIOp<m>(C(), MulIOp(SignExtendIOp32(A(m)), SignExtendIOp32(B(m))));
-}
-
-ods_def<DotI16I16I32Op>
-implements_interface<LinalgContractionOpInterface> :
-def dot_i16_i16_i32(A: i16(M), B: i16(M)) -> (C: i32()) {
-  C() = AddIOp<m>(C(), MulIOp(SignExtendIOp32(A(m)), SignExtendIOp32(B(m))));
-}
-
-ods_def<DotI32I32I32Op>
-implements_interface<LinalgContractionOpInterface> :
-def dot_i32_i32_i32(A: i32(M), B: i32(M)) -> (C: i32()) {
-  C() = AddIOp<m>(C(), MulIOp(A(m), B(m)));
-}
-
-ods_def<BatchMatmulI8I8I32Op>
-implements_interface<LinalgContractionOpInterface> :
-def batch_matmul_i8_i8_i32(A: i8(Batch, M, K), B: i8(Batch, K, N)) -> (C: i32(Batch, M, N)) {
-  C(b, m, n) =
-      AddIOp<k>(C(b, m, n), MulIOp(SignExtendIOp32(A(b, m, k)), SignExtendIOp32(B(b, k, n))));
-}
-
-ods_def<BatchMatmulI16I16I32Op>
-implements_interface<LinalgContractionOpInterface> :
-def batch_matmul_i16_i16_i32(A: i16(Batch, M, K), B: i16(Batch, K, N)) -> (C: i32(Batch, M, N)) {
-  C(b, m, n) =
-      AddIOp<k>(C(b, m, n), MulIOp(SignExtendIOp32(A(b, m, k)), SignExtendIOp32(B(b, k, n))));
-}
-
-
-ods_def<BatchMatmulI32I32I32Op>
-implements_interface<LinalgContractionOpInterface> :
-def batch_matmul_i32_i32_i32(A: i32(Batch, M, K), B: i32(Batch, K, N)) -> (C: i32(Batch, M, N)) {
-  C(b, m, n) = AddIOp<k>(C(b, m, n), MulIOp(A(b, m, k), B(b, k, n)));
-}
-
 ods_def<ConvWOp>:
 def conv_1d(I: f32(W), K: f32(KW)) -> (O: f32(W)) {
   O(w) = AddFOp<kw>(O(w), MulFOp(I(w + kw), K(kw)));
 }
 
-ods_def<ConvNWCOp>:
-def conv_1d_nwc(I: f32(N, W, C), K: f32(F, KW, C)) -> (O: f32(N, W, F)) {
-  O(n, w, f) = AddFOp<kw>(O(n, w, f), MulFOp(I(n, w + kw, c), K(f, kw, c)));
-}
-
-ods_def<ConvNCWOp>:
-def conv_1d_ncw(I: f32(N, C, W), K: f32(F, C, KW)) -> (O: f32(N, F, W)) {
-  O(n, f, w) = AddFOp<kw>(O(n, f, w), MulFOp(I(n, c, w + kw), K(f, c, kw)));
-}
-
 ods_def<ConvHWOp>:
 def conv_2d(I: f32(H, W), K: f32(KH, KW)) -> (O: f32(H, W)) {
   O(h, w) = AddFOp<kh, kw>(O(h, w), MulFOp(I(h + kh, w + kw), K(kh, kw)));
 }
 
-ods_def<ConvNHWCOp>:
-def conv_2d_nhwc(I: f32(N, H, W, C), K: f32(F, KH, KW, C)) -> (O: f32(N, H, W, F)) {
-  O(n, h, w, f) = AddFOp<kh, kw>(
-      O(n, h, w, f), MulFOp(I(n, h + kh, w + kw, c), K(f, kh, kw, c)));
-}
-
 ods_def<ConvDHWOp>:
 def conv_3d(I: f32(D, H, W), K: f32(KD, KH, KW)) -> (O: f32(D, H, W)) {
   O(d, h, w) = AddFOp<kd, kh, kw>(
       O(d, h, w), MulFOp(I(d + kd, h + kh, w + kw), K(kd, kh, kw)));
 }
 
-ods_def<ConvNDHWCOp>:
-def conv_3d_ndhwc(I: f32(N, D, H, W, C), K: f32(F, KD, KH, KW, C)) -> (O: f32(N, D, H, W, F)) {
-  O(n, d, h, w, f) = AddFOp<kd, kh, kw>(
-      O(n, d, h, w, f),
-      MulFOp(I(n, d + kd, h + kh, w + kw, c), K(f, kd, kh, kw, c)));
-}
-
-ods_def<ConvNCDHWOp>:
-def conv_3d_ncdhw(I: f32(N, C, D, H, W), K: f32(F, C, KD, KH, KW)) -> (O: f32(N, F, D, H, W)) {
-  O(n, f, d, h, w) = AddFOp<kd, kh, kw>(
-      O(n, f, d, h, w),
-      MulFOp(I(n, c, d + kd, h + kh, w + kw), K(f, c, kd, kh, kw)));
-}
-
 ods_def<DepthwiseConvInputNHWCFilterHWCFOp>:
 def depthwise_conv_2d_input_nhwc_filter_hwcf
       (I: f32(N, IH, IW, CI), K: f32(KH, KW, CI, CO))
@@ -238,23 +115,6 @@
       MulFOp(I(n, w * strides[0] + kw * dilations[0], c), K(kw, c, f)));
 }
 
-ods_def<ConvInputNCWFilterWCFOp>:
-def conv_1d_input_ncw_filter_wcf(I: f32(N, C, W), K: f32(KW, C, F)) -> (O: f32(N, F, W))
-  attr(strides: 1xi64, dilations: 1xi64)
-""" A 1-D convolution given NCW layout input and WCF layout filter.
-
-Computes a 1-D convolution given 3-D input and filter. The data layout
-of input is NCW and the data layout of filter is WCF.
-
-The indexing maps for these three tensors contain 5 dimensions, following the
-order of (`N`, `F`, `W`, `KW`, `C`).
-"""
-{
-  O(n, f, w) = AddFOp<kw>(
-      O(n, f, w),
-      MulFOp(I(n, c, w * strides[0] + kw * dilations[0]), K(kw, c, f)));
-}
-
 ods_def<ConvInputNHWCFilterHWCFOp>:
 def conv_2d_input_nhwc_filter_hwcf(I: f32(N, H, W, C), K: f32(KH, KW, C, F)) -> (O: f32(N, H, W, F))
   attr(strides: 2xi64, dilations: 2xi64)
@@ -273,26 +133,6 @@
                               K(kh, kw, c, f)));
 }
 
-ods_def<ConvInputNCHWFilterHWCFOp>:
-def conv_2d_input_nchw_filter_hwcf
-    (I: f32(N, C, H, W), K: f32(KH, KW, C, F))
-  -> (O: f32(N, F, H, W))
-  attr(strides: 2xi64, dilations: 2xi64)
-""" A 2-D convolution given NCHW layout input and HWCF layout filter.
-
-Computes a 2-D convolution given 4-D input and filter. The data layout
-of input is NCHW and the data layout of filter is HWCF.
-
-The indexing maps for these three tensors contain 7 dimensions, following the
-order of (`N`, `F`, `H`, `W`, `KH`, `KW`, `C`).
-"""
-{
-  O(n, f, h, w) = AddFOp<kh, kw>(
-      O(n, f, h, w), MulFOp(I(n, c, h * strides[0] + kh * dilations[0],
-                                w * strides[1] + kw * dilations[1]),
-                              K(kh, kw, c, f)));
-}
-
 ods_def<ConvInputNDHWCFilterDHWCFOp>:
 def conv_3d_input_ndhwc_filter_dhwcf
     (I: f32(N, D, H, W, C), K: f32(KD, KH, KW, C, F))
@@ -313,24 +153,3 @@
                                    w * strides[2] + kw * dilations[2], c),
                                  K(kd, kh, kw, c, f)));
 }
-
-ods_def<ConvInputNCDHWFilterDHWCFOp>:
-def conv_3d_input_ncdhw_filter_dhwcf
-    (I: f32(N, C, D, H, W), K: f32(KD, KH, KW, C, F))
-  -> (O: f32(N, F, D, H, W))
-  attr(strides: 3xi64, dilations: 3xi64)
-""" A 3-D convolution given NCDHW layout input and DHWCF layout filter.
-
-Computes a 3-D convolution given 5-D input and filter. The data layout
-of input is NCDHW and the data layout of filter is DHWCF.
-
-The indexing maps for these three tensors contain 9 dimensions, following the
-order of (`N`, `F`, `D`, `H`, `W`, `KD`, `KH`, `KW`, `C`).
-"""
-{
-  O(n, f, d, h, w) = AddFOp<kd, kh, kw>(
-      O(n, f, d, h, w), MulFOp(I(n, c, d * strides[0] + kd * dilations[0],
-                                   h * strides[1] + kh * dilations[1],
-                                   w * strides[2] + kw * dilations[2]),
-                                 K(kd, kh, kw, c, f)));
-}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1168,42 +1168,24 @@
   populateVectorizationPatterns<ConvWOp, 1>(tiling, promotion, vectorization,
                                             tileSizes);
 
-  populateVectorizationPatterns<ConvNWCOp, 3>(tiling, promotion, vectorization,
-                                              tileSizes);
   populateVectorizationPatterns<ConvInputNWCFilterWCFOp, 3>(
       tiling, promotion, vectorization, tileSizes);
 
-  populateVectorizationPatterns<ConvNCWOp, 3>(tiling, promotion, vectorization,
-                                              tileSizes);
-  populateVectorizationPatterns<ConvInputNCWFilterWCFOp, 3>(
-      tiling, promotion, vectorization, tileSizes);
-
   populateVectorizationPatterns<ConvHWOp, 2>(tiling, promotion, vectorization,
                                              tileSizes);
 
-  populateVectorizationPatterns<ConvNHWCOp, 4>(tiling, promotion, vectorization,
-                                               tileSizes);
   populateVectorizationPatterns<ConvInputNHWCFilterHWCFOp, 4>(
       tiling, promotion, vectorization, tileSizes);
 
   populateVectorizationPatterns<Conv2DNchwOp, 4>(tiling, promotion,
                                                  vectorization, tileSizes);
-  populateVectorizationPatterns<ConvInputNCHWFilterHWCFOp, 4>(
-      tiling, promotion, vectorization, tileSizes);
 
   populateVectorizationPatterns<ConvDHWOp, 3>(tiling, promotion, vectorization,
                                               tileSizes);
 
-  populateVectorizationPatterns<ConvNDHWCOp, 5>(tiling, promotion,
-                                                vectorization, tileSizes);
   populateVectorizationPatterns<ConvInputNDHWCFilterDHWCFOp, 5>(
       tiling, promotion, vectorization, tileSizes);
 
-  populateVectorizationPatterns<ConvNCDHWOp, 5>(tiling, promotion,
-                                                vectorization, tileSizes);
-  populateVectorizationPatterns<ConvInputNCDHWFilterDHWCFOp, 5>(
-      tiling, promotion, vectorization, tileSizes);
-
   patterns.push_back(std::move(tiling));
   patterns.push_back(std::move(promotion));
   patterns.push_back(std::move(vectorization));
diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py
--- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py
+++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py
@@ -177,24 +177,6 @@
   O[D.n, D.oh, D.ow, D.f] += cast(
       U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c
            ]) * cast(U, K[D.kh, D.kw, D.c, D.f])
-
-@linalg_structured_op
-def depthwise_conv_2d_input_nhwc_filter_hwc_poly(
-    I=TensorDef(T1, S.N, S.IH, S.IW, S.C),
-    K=TensorDef(T2, S.KH, S.KW, S.C),
-    O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True),
-    strides=AttributeDef(S.SH, S.SW),
-    dilations=AttributeDef(S.DH, S.DW)):
-  """Performs depth-wise 2-D convolution.
-
-  Numeric casting is performed on the operands to the inner multiply, promoting
-  them to the same data type as the accumulator/output.
-  """
-  domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c)
-  O[D.n, D.oh, D.ow, D.c] += cast(
-      U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW,
-           D.c]) * cast(U, K[D.kh, D.kw, D.c])
-
 @linalg_structured_op
 def conv_2d_nhwc_hwcf_q(
     I=TensorDef(T1, S.N, S.IH, S.IW, S.C),
diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
--- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
@@ -182,140 +182,6 @@
 
 // -----
 
-func @conv_1d_input_ncw_filter_wcf(%input: memref<?x?x?xf32>, %filter: memref<?x?x?xf32>, %output: memref<?x?x?xf32>) {
-  linalg.conv_1d_input_ncw_filter_wcf {dilations = dense<1> : tensor<1xi64>,
-                                       strides = dense<1> : tensor<1xi64>}
-     ins (%input, %filter: memref<?x?x?xf32>, memref<?x?x?xf32>)
-    outs (%output: memref<?x?x?xf32>)
-  return
-}
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d4, d2 + d3)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d3, d4, d1)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
-
-// CHECK: func @conv_1d_input_ncw_filter_wcf
-// CHECK: linalg.generic
-// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel"]}
-// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?xf32>, memref<?x?x?xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?xf32>)
-
-// CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
-// CHECK-NEXT:      %[[MUL:.+]] = mulf %[[BBARG0]], %[[BBARG1]] : f32
-// CHECK-NEXT:      %[[ADD:.+]] = addf %[[BBARG2]], %[[MUL]] : f32
-// CHECK-NEXT:      linalg.yield %[[ADD]] : f32
-
-// -----
-
-func @conv_2d_input_nhwc_filter_hwcf(%input: memref<?x?x?x?xf32>, %filter: memref<?x?x?x?xf32>, %output: memref<?x?x?x?xf32>) {
-  linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<2> : tensor<2xi64>,
-                                         strides = dense<3> : tensor<2xi64>}
-     ins (%input, %filter: memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-    outs (%output: memref<?x?x?x?xf32>)
-  return
-}
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 * 3 + d4 * 2, d2 * 3 + d5 * 2, d6)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
-
-// CHECK: func @conv_2d_input_nhwc_filter_hwcf
-
-// CHECK: linalg.generic
-// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "parallel"]}
-// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?x?xf32>)
-
-// CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
-// CHECK-NEXT:      %[[MUL:.+]] = mulf %[[BBARG0]], %[[BBARG1]] : f32
-// CHECK-NEXT:      %[[ADD:.+]] = addf %[[BBARG2]], %[[MUL]] : f32
-// CHECK-NEXT:      linalg.yield %[[ADD]] : f32
-
-// -----
-
-func @conv_2d_input_nchw_filter_hwcf(%input: memref<?x?x?x?xf32>, %filter: memref<?x?x?x?xf32>, %output: memref<?x?x?x?xf32>) {
-  linalg.conv_2d_input_nchw_filter_hwcf {dilations = dense<1> : tensor<2xi64>,
-                                         strides = dense<1> : tensor<2xi64>}
-     ins (%input, %filter: memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-    outs (%output: memref<?x?x?x?xf32>)
-  return
-}
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d6, d2 + d4, d3 + d5)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d1)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
-
-// CHECK: func @conv_2d_input_nchw_filter_hwcf
-
-// CHECK: linalg.generic
-// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "parallel"]}
-// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?x?xf32>)
-
-// CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
-// CHECK-NEXT:      %[[MUL:.+]] = mulf %[[BBARG0]], %[[BBARG1]] : f32
-// CHECK-NEXT:      %[[ADD:.+]] = addf %[[BBARG2]], %[[MUL]] : f32
-// CHECK-NEXT:      linalg.yield %[[ADD]] : f32
-
-// -----
-
-func @conv_3d_input_ndhwc_filter_dhwcf(%input: memref<?x?x?x?x?xf32>, %filter: memref<?x?x?x?x?xf32>, %output: memref<?x?x?x?x?xf32>) {
-  linalg.conv_3d_input_ndhwc_filter_dhwcf {dilations = dense<1> : tensor<3xi64>,
-                                           strides = dense<1> : tensor<3xi64>}
-     ins (%input, %filter: memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-    outs (%output: memref<?x?x?x?x?xf32>)
-  return
-}
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d0, d1 + d5, d2 + d6, d3 + d7, d8)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d5, d6, d7, d8, d4)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d0, d1, d2, d3, d4)>
-
-// CHECK: func @conv_3d_input_ndhwc_filter_dhwcf
-
-// CHECK: linalg.generic
-// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "parallel"]}
-// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?x?x?xf32>)
-
-// CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
-// CHECK-NEXT:      %[[MUL:.+]] = mulf %[[BBARG0]], %[[BBARG1]] : f32
-// CHECK-NEXT:      %[[ADD:.+]] = addf %[[BBARG2]], %[[MUL]] : f32
-// CHECK-NEXT:      linalg.yield %[[ADD]] : f32
-
-// -----
-
-func @conv_3d_input_ncdhw_filter_dhwcf(%input: memref<?x?x?x?x?xf32>, %filter: memref<?x?x?x?x?xf32>, %output: memref<?x?x?x?x?xf32>) {
-  linalg.conv_3d_input_ncdhw_filter_dhwcf {dilations = dense<1> : tensor<3xi64>,
-                                           strides = dense<1> : tensor<3xi64>}
-     ins (%input, %filter: memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-    outs (%output: memref<?x?x?x?x?xf32>)
-  return
-}
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d0, d8, d2 + d5, d3 + d6, d4 + d7)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d5, d6, d7, d8, d1)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d0, d1, d2, d3, d4)>
-
-// CHECK: func @conv_3d_input_ncdhw_filter_dhwcf
-
-// CHECK: linalg.generic
-// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "parallel"]}
-// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?x?x?xf32>)
-
-// CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
-// CHECK-NEXT:      %[[MUL:.+]] = mulf %[[BBARG0]], %[[BBARG1]] : f32
-// CHECK-NEXT:      %[[ADD:.+]] = addf %[[BBARG2]], %[[MUL]] : f32
-// CHECK-NEXT:      linalg.yield %[[ADD]] : f32
-
-// -----
-
 func @generalize_fill(%output: memref<?x?xf32>, %value : f32) {
   linalg.fill(%value, %output) : f32, memref<?x?xf32>
   return
diff --git a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir
--- a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir
@@ -30,36 +30,6 @@
 
 // -----
 
-func @generalize_depthwise_conv_2d_input_nhwc_filter_hwc_poly_f32(%input : tensor<1x4x16x1xf32>, %filter: tensor<2x2x1xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> {
-  %0 = linalg.depthwise_conv_2d_input_nhwc_filter_hwc_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %filter : tensor<1x4x16x1xf32>, tensor<2x2x1xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
-  return %0: tensor<1x2x4x1xf32>
-}
-
-// CHECK-LABEL: @generalize_depthwise_conv_2d_input_nhwc_filter_hwc_poly_f32
-// CHECK:      ^{{.*}}(%[[IN_ARG:.+]]: f32, %[[FILTER_ARG:.+]]: f32, %[[OUT_ARG:.+]]: f32)
-// CHECK-NEXT:   %[[MUL:.+]] = mulf %[[IN_ARG]], %[[FILTER_ARG]] : f32
-// CHECK-NEXT:   %[[ADD:.+]] = addf %[[OUT_ARG]], %[[MUL]] : f32
-// CHECK-NEXT:   linalg.yield %[[ADD]] : f32
-// CHECK-NEXT: -> tensor<1x2x4x1xf32>
-
-// -----
-
-func @generalize_depthwise_conv_2d_input_nhwc_filter_hwc_poly_i32(%input : tensor<1x4x16x1xi32>, %filter: tensor<2x2x1xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
-  %0 = linalg.depthwise_conv_2d_input_nhwc_filter_hwc_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %filter : tensor<1x4x16x1xi32>, tensor<2x2x1xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
-  return %0: tensor<1x2x4x1xi32>
-}
-
-// CHECK-LABEL: @generalize_depthwise_conv_2d_input_nhwc_filter_hwc_poly_i32
-// CHECK:      ^{{.*}}(%[[IN_ARG:.+]]: i32, %[[FILTER_ARG:.+]]: i32, %[[OUT_ARG:.+]]: i32)
-// CHECK-NEXT:   %[[MUL:.+]] = muli %[[IN_ARG]], %[[FILTER_ARG]] : i32
-// CHECK-NEXT:   %[[ADD:.+]] = addi %[[OUT_ARG]], %[[MUL]] : i32
-// CHECK-NEXT:   linalg.yield %[[ADD]] : i32
-// CHECK-NEXT: -> tensor<1x2x4x1xi32>
-
-// -----
-
 func @generalize_pooling_nhwc_max_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> {
   %0 = linalg.pooling_nhwc_max {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
     ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir
--- a/mlir/test/Dialect/Linalg/named-ops.mlir
+++ b/mlir/test/Dialect/Linalg/named-ops.mlir
@@ -16,24 +16,6 @@
   return %0 : tensor<2x3x4x2x3xf32>
 }
 
-// CHECK-LABEL: func @conv_2d_nchw_tensor
-func @conv_2d_nchw_tensor(%input: tensor<2x2x4x5xf32>, %filter: tensor<4x2x3x3xf32>) -> tensor<2x4x2x3xf32> {
-    %cst = constant 0.000000e+00 : f32
-    %init = linalg.init_tensor [2, 4, 2, 3] : tensor<2x4x2x3xf32>
-    %fill = linalg.fill(%cst, %init) : f32, tensor<2x4x2x3xf32> -> tensor<2x4x2x3xf32>
-// CHECK:           %{{.+}} = linalg.conv_2d_nchw
-// CHECK-SAME:       {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>}
-// CHECK-SAME:       ins(%{{.+}}, %{{.+}} : tensor<2x2x4x5xf32>, tensor<4x2x3x3xf32>)
-// CHECK-SAME:       outs(%{{.+}} : tensor<2x4x2x3xf32>) -> tensor<2x4x2x3xf32>
-// CHECK:           return %{{.+}} : tensor<2x4x2x3xf32>
-// CHECK:         }
-    %0 = linalg.conv_2d_nchw
-    {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>}
-    ins(%input, %filter: tensor<2x2x4x5xf32>, tensor<4x2x3x3xf32>)
-    outs(%fill : tensor<2x4x2x3xf32>) -> tensor<2x4x2x3xf32>
-    return %0 : tensor<2x4x2x3xf32>
-}
-
 // CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwcf_memref
 func @depthwise_conv_2d_input_nhwc_filter_hwcf_memref(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x3x4x2x3xf32>) {
   // CHECK:      linalg.depthwise_conv_2d_input_nhwc_filter_hwcf
@@ -174,38 +156,6 @@
 
 // -----
 
-// CHECK-LABEL: func @conv_1d_input_ncw_filter_wcf
-func @conv_1d_input_ncw_filter_wcf(%input: tensor<?x?x?xf32>, %filter: tensor<?x?x?xf32>, %init: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
-  // CHECK:      %{{.+}} = linalg.conv_1d_input_ncw_filter_wcf
-  // CHECK-SAME:   dilations = dense<1> : tensor<1xi64>
-  // CHECK-SAME:   strides = dense<1> : tensor<1xi64>
-  // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
-  %0 = linalg.conv_1d_input_ncw_filter_wcf {dilations = dense<1> : tensor<1xi64>,
-                                            strides = dense<1> : tensor<1xi64>}
-     ins (%input, %filter: tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-    outs (%init: tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
-  return %0 : tensor<?x?x?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @conv_1d_input_ncw_filter_wcf
-func @conv_1d_input_ncw_filter_wcf(%input: memref<?x?x?xf32>, %filter: memref<?x?x?xf32>, %output: memref<?x?x?xf32>) {
-  // CHECK:      linalg.conv_1d_input_ncw_filter_wcf
-  // CHECK-SAME:   dilations = dense<1> : tensor<1xi64>
-  // CHECK-SAME:   strides = dense<1> : tensor<1xi64>
-  // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<?x?x?xf32>, memref<?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<?x?x?xf32>)
-  linalg.conv_1d_input_ncw_filter_wcf {dilations = dense<1> : tensor<1xi64>,
-                                       strides = dense<1> : tensor<1xi64>}
-     ins (%input, %filter: memref<?x?x?xf32>, memref<?x?x?xf32>)
-    outs (%output: memref<?x?x?xf32>)
-  return
-}
-
-// -----
-
 // CHECK-LABEL: func @conv_2d_input_nhwc_filter_hwcf
 func @conv_2d_input_nhwc_filter_hwcf(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?xf32>, %init: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
   // CHECK:      %{{.+}} = linalg.conv_2d_input_nhwc_filter_hwcf
@@ -238,38 +188,6 @@
 
 // -----
 
-// CHECK-LABEL: func @conv_2d_input_nchw_filter_hwcf
-func @conv_2d_input_nchw_filter_hwcf(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?xf32>, %init: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
-  // CHECK:      %{{.+}} = linalg.conv_2d_input_nchw_filter_hwcf
-  // CHECK-SAME:   dilations = dense<1> : tensor<2xi64>
-  // CHECK-SAME:   strides = dense<1> : tensor<2xi64>
-  // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
-  %0 = linalg.conv_2d_input_nchw_filter_hwcf {dilations = dense<1> : tensor<2xi64>,
-                                              strides = dense<1> : tensor<2xi64>}
-     ins (%input, %filter: tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-    outs (%init: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
-  return %0 : tensor<?x?x?x?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @conv_2d_input_nchw_filter_hwcf
-func @conv_2d_input_nchw_filter_hwcf(%input: memref<?x?x?x?xf32>, %filter: memref<?x?x?x?xf32>, %output: memref<?x?x?x?xf32>) {
-  // CHECK:      linalg.conv_2d_input_nchw_filter_hwcf
-  // CHECK-SAME:   dilations = dense<1> : tensor<2xi64>
-  // CHECK-SAME:   strides = dense<1> : tensor<2xi64>
-  // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<?x?x?x?xf32>)
-  linalg.conv_2d_input_nchw_filter_hwcf {dilations = dense<1> : tensor<2xi64>,
-                                         strides = dense<1> : tensor<2xi64>}
-     ins (%input, %filter: memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-    outs (%output: memref<?x?x?x?xf32>)
-  return
-}
-
-// -----
-
 // CHECK-LABEL: func @conv_3d_input_ndhwc_filter_dhwcf
 func @conv_3d_input_ndhwc_filter_dhwcf(%input: tensor<?x?x?x?x?xf32>, %filter: tensor<?x?x?x?x?xf32>, %init: tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32> {
   // CHECK:      %{{.+}} = linalg.conv_3d_input_ndhwc_filter_dhwcf
@@ -302,38 +220,6 @@
 
 // -----
 
-// CHECK-LABEL: func @conv_3d_input_ncdhw_filter_dhwcf
-func @conv_3d_input_ncdhw_filter_dhwcf(%input: tensor<?x?x?x?x?xf32>, %filter: tensor<?x?x?x?x?xf32>, %init: tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32> {
-  // CHECK:      %{{.+}} = linalg.conv_3d_input_ncdhw_filter_dhwcf
-  // CHECK-SAME:   dilations = dense<1> : tensor<3xi64>
-  // CHECK-SAME:   strides = dense<1> : tensor<3xi64>
-  // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
-  %0 = linalg.conv_3d_input_ncdhw_filter_dhwcf {dilations = dense<1> : tensor<3xi64>,
-                                                strides = dense<1> : tensor<3xi64>}
-     ins (%input, %filter: tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>)
-    outs (%init: tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
-  return %0 : tensor<?x?x?x?x?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @conv_3d_input_ncdhw_filter_dhwcf
-func @conv_3d_input_ncdhw_filter_dhwcf(%input: memref<?x?x?x?x?xf32>, %filter: memref<?x?x?x?x?xf32>, %output: memref<?x?x?x?x?xf32>) {
-  // CHECK:      linalg.conv_3d_input_ncdhw_filter_dhwcf
-  // CHECK-SAME:   dilations = dense<1> : tensor<3xi64>
-  // CHECK-SAME:   strides = dense<1> : tensor<3xi64>
-  // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<?x?x?x?x?xf32>)
-  linalg.conv_3d_input_ncdhw_filter_dhwcf {dilations = dense<1> : tensor<3xi64>,
-                                           strides = dense<1> : tensor<3xi64>}
-     ins (%input, %filter: memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-    outs (%output: memref<?x?x?x?x?xf32>)
-  return
-}
-
-// -----
-
 // CHECK-LABEL: func @pooling_nhwc_sum_tensor
 // CHECK:         %{{.+}} = linalg.pooling_nhwc_sum
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
deleted file mode 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
+++ /dev/null
@@ -1,136 +0,0 @@
-// RUN: export M=24 && export K=64 && export N=192 && export ITERS=10 && \
-// RUN: cat %s | sed 's@${M}@'"$M"'@g'| sed 's@${K}@'"$K"'@g' | sed 's@${N}@'"$N"'@g'| sed 's@${ITERS}@'"$ITERS"'@g'| \
-// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul_column_major_as_row_major anchor-op=linalg.matmul_column_major register-tile-sizes=16,0,32 vectorize" | \
-// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul_column_major_as_row_major anchor-op=linalg.matmul register-tile-sizes=12,32,16 vectorize" | \
-// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.fill register-tile-sizes=4,16 vectorize" | \
-
-// TODO: linalg.copy vectorization in the presence of permutation map fails. Enable when addressed.
-// R_UN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,16 vectorize" | \
-
-// RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
-// Activate to dump assembly
-// R_UN:   -dump-object-file -object-filename=/tmp/a.o \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
-// Use tee to both print to stderr and FileCheck
-// RUN: tee -a /dev/stderr | FileCheck %s
-
-!elem_type_a = type f32
-!elem_type_b = type f32
-!elem_type_c = type f32
-!row_major_A = type memref<${M}x${K}x!elem_type_a>
-!row_major_B = type memref<${K}x${N}x!elem_type_b>
-!row_major_C = type memref<${M}x${N}x!elem_type_c>
-!column_major_A = type memref<${K}x${M}x!elem_type_a>
-!column_major_B = type memref<${N}x${K}x!elem_type_b>
-!column_major_C = type memref<${N}x${M}x!elem_type_c>
-
-func @matmul_column_major_as_row_major(
-  %ca: !column_major_A, %cb: !column_major_B, %cc: !column_major_C,
-   %a: !row_major_A,     %b: !row_major_B,     %c: !row_major_C)
-// TODO: activate manually for now.
-// attributes { passthrough = [["target-cpu", "skylake-avx512"], ["prefer-vector-width", "512"]]}
-{
-  linalg.copy(%ca, %a) {inputPermutation = affine_map<(i, j) -> (j, i)> } : !column_major_A, !row_major_A
-  linalg.copy(%cb, %b) {inputPermutation = affine_map<(i, j) -> (j, i)> } : !column_major_B, !row_major_B
-  linalg.matmul ins(%a, %b : !row_major_A, !row_major_B)
-    outs(%c: !row_major_C)
-  linalg.copy(%c, %cc) {inputPermutation = affine_map<(i, j) -> (j, i)> } : !row_major_C, !column_major_C
-  return
-}
-
-func @print_perf(%iters: index, %total_time: f64) {
-  %c2 = constant 2 : index
-  %cM = constant ${M} : index
-  %cN = constant ${N} : index
-  %cK = constant ${K} : index
-
-  %mn = muli %cM, %cN : index
-  %mnk = muli %mn, %cK : index
-
-  // 2*M*N*K.
-  %flops_per_iter = muli %c2, %mnk : index
-  %flops = muli %iters, %flops_per_iter : index
-  %flops_i64 = index_cast %flops : index to i64
-  %flops_f = sitofp %flops_i64 : i64 to f64
-  %flops_per_s = divf %flops_f, %total_time : f64
-  vector.print %flops_per_s : f64
-
-  return
-}
-
-func @main() {
-  %f0 = constant 0.0 : !elem_type_c
-  %f1 = constant 1.0 : !elem_type_a
-
-  %cA = memref.alloc() : !column_major_A
-  %cB = memref.alloc() : !column_major_B
-  %cC = memref.alloc() : !column_major_C
-
-  linalg.fill(%f1, %cA) : !elem_type_a, !column_major_A
-  linalg.fill(%f1, %cB) : !elem_type_b, !column_major_B
-  linalg.fill(%f0, %cC) : !elem_type_c, !column_major_C
-
-  %c0 = constant 0: index
-  %c1 = constant 1: index
-  %iters = constant ${ITERS}: index
-
-  /// Run and dump performance for matmul_column_major as a row-major
-  %A = memref.alloc() : !row_major_A
-  %B = memref.alloc() : !row_major_B
-  %C = memref.alloc() : !row_major_C
-  %t_start_matmul_column_major_as_row_major = call @rtclock() : () -> f64
-  scf.for %arg0 = %c0 to %iters step %c1 {
-    // linalg.matmul writes %C in place, need to reset it to zero every time.
-    // This is accounts for about 10-15% perf hit on small sizes.
-    // Once linalg on tensors is ready, fusing fill at the register level will
-    // be easy.
-    linalg.fill(%f0, %C) : !elem_type_c, !row_major_C
-    call @matmul_column_major_as_row_major(%cA, %cB, %cC, %A, %B, %C) :
-      (!column_major_A, !column_major_B, !column_major_C,
-       !row_major_A, !row_major_B, !row_major_C) -> ()
-  }
-  %t_end_matmul_column_major_as_row_major = call @rtclock() : () -> f64
-  %tmatmul_column_major_as_row_major = subf %t_end_matmul_column_major_as_row_major, %t_start_matmul_column_major_as_row_major: f64
-  call @print_perf(%iters, %tmatmul_column_major_as_row_major) : (index, f64) -> ()
-
-  // CHECK: {{^0$}}
-  %cC_ref = memref.alloc() : !column_major_C
-  linalg.fill(%f0, %cC_ref) : !elem_type_c, !column_major_C
-  linalg.matmul_column_major ins(%cA, %cB : !column_major_A, !column_major_B)
-    outs(%cC_ref: !column_major_C)
-  %act1 = memref.cast %cC : !column_major_C to memref<*xf32>
-  %exp1 = memref.cast %cC_ref : !column_major_C to memref<*xf32>
-  %errors1 = call @verifyMemRefF32(%act1, %exp1) : (memref<*xf32>, memref<*xf32>) -> i64
-  vector.print %errors1 : i64
-  memref.dealloc %cC_ref : !column_major_C
-
-  // CHECK: {{^0$}}
-  %C_ref = memref.alloc() : !row_major_C
-  linalg.fill(%f0, %C_ref) : !elem_type_c, !row_major_C
-  linalg.matmul ins(%A, %B : !row_major_A, !row_major_B)
-    outs(%C_ref: !row_major_C)
-  %act2 = memref.cast %C : !row_major_C to memref<*xf32>
-  %exp2 = memref.cast %C_ref : !row_major_C to memref<*xf32>
-  %errors2 = call @verifyMemRefF32(%act2, %exp2) : (memref<*xf32>, memref<*xf32>) -> i64
-  vector.print %errors2 : i64
-  memref.dealloc %C_ref : !row_major_C
-
-  memref.dealloc %A : !row_major_A
-  memref.dealloc %B : !row_major_B
-  memref.dealloc %C : !row_major_C
-
-  memref.dealloc %cA : !column_major_A
-  memref.dealloc %cB : !column_major_B
-  memref.dealloc %cC : !column_major_C
-
-  return
-}
-
-func private @rtclock() -> f64
-func private @verifyMemRefF32(memref<*xf32>, memref<*xf32>) -> i64 attributes { llvm.emit_c_interface }
-
-// TODO: init with random, run and check output.
-// func private @fill_random_f32(memref<*xf32>)
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir
deleted file mode 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir
+++ /dev/null
@@ -1,70 +0,0 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-func private @print_memref_f32(memref<*xf32>)
-
-// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
-func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
-  %buf = memref.alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
-  linalg.fill(%f, %buf) : f32, memref<?x?x?xf32>
-  return %buf : memref<?x?x?xf32>
-}
-
-func @conv_1d_input_ncw_filter_wcf(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32>, %arg2: memref<?x?x?xf32>) {
-  linalg.conv_1d_input_ncw_filter_wcf {dilations = dense<1> : tensor<1xi64>,
-                         strides = dense<1> : tensor<1xi64>}
-     ins (%arg0, %arg1: memref<?x?x?xf32>, memref<?x?x?xf32>)
-    outs (%arg2: memref<?x?x?xf32>)
-  return
-}
-
-func @main() {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c3 = constant 3 : index
-  %c6 = constant 6 : index
-  %c8 = constant 8 : index
-  %f10 = constant 10.00000e+00 : f32
-  %val = constant 2.00000e+00 : f32
-  %zero = constant 0.00000e+00 : f32
-
-  %filter1D_ncw = call @alloc_3d_filled_f32(%c3, %c1, %c1, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
-  %in1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c8, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
-  %out1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c6, %zero) : (index, index, index, f32) -> (memref<?x?x?xf32>)
-
-  memref.store %f10, %in1D_ncw[%c0, %c0, %c3] : memref<?x?x?xf32>
-  call @conv_1d_input_ncw_filter_wcf(%in1D_ncw, %filter1D_ncw, %out1D_ncw) : (memref<?x?x?xf32>, memref<?x?x?xf32>, memref<?x?x?xf32>) -> ()
-  %out1D_ncw_ = memref.cast %out1D_ncw : memref<?x?x?xf32> to memref<*xf32>
-  call @print_memref_f32(%out1D_ncw_): (memref<*xf32>) -> ()
-
-  memref.dealloc %filter1D_ncw : memref<?x?x?xf32>
-  memref.dealloc %in1D_ncw : memref<?x?x?xf32>
-  memref.dealloc %out1D_ncw : memref<?x?x?xf32>
-  return
-}
-
-// CHECK:       Unranked Memref {{.*}}
-// CHECK-NEXT:  [
-// CHECK-SAME:   [
-// CHECK-SAME:    [12, 28, 28, 28, 12, 12]
-// CHECK-SAME:   ]
-// CHECK-SAME:  ]
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir
deleted file mode 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir
+++ /dev/null
@@ -1,68 +0,0 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-func private @print_memref_f32(memref<*xf32>)
-
-// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
-func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
-  %buf = memref.alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
-  linalg.fill(%f, %buf) : f32, memref<?x?x?xf32>
-  return %buf : memref<?x?x?xf32>
-}
-
-func @conv_1d_ncw(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32>, %arg2: memref<?x?x?xf32>) {
-  linalg.conv_1d_ncw ins (%arg0, %arg1: memref<?x?x?xf32>, memref<?x?x?xf32>)
-                    outs (%arg2: memref<?x?x?xf32>)
-  return
-}
-
-func @main() {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c3 = constant 3 : index
-  %c6 = constant 6 : index
-  %c8 = constant 8 : index
-  %f10 = constant 10.00000e+00 : f32
-  %val = constant 2.00000e+00 : f32
-  %zero = constant 0.00000e+00 : f32
-
-  %filter1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c3, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
-  %in1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c8, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
-  %out1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c6, %zero) : (index, index, index, f32) -> (memref<?x?x?xf32>)
-
-  memref.store %f10, %in1D_ncw[%c0, %c0, %c3] : memref<?x?x?xf32>
-  call @conv_1d_ncw(%in1D_ncw, %filter1D_ncw, %out1D_ncw) : (memref<?x?x?xf32>, memref<?x?x?xf32>, memref<?x?x?xf32>) -> ()
-  %out1D_ncw_ = memref.cast %out1D_ncw : memref<?x?x?xf32> to memref<*xf32>
-  call @print_memref_f32(%out1D_ncw_): (memref<*xf32>) -> ()
-
-  memref.dealloc %filter1D_ncw : memref<?x?x?xf32>
-  memref.dealloc %in1D_ncw : memref<?x?x?xf32>
-  memref.dealloc %out1D_ncw : memref<?x?x?xf32>
-  return
-}
-
-// CHECK:       Unranked Memref {{.*}}
-// CHECK-NEXT:  [
-// CHECK-SAME:   [
-// CHECK-SAME:    [12, 28, 28, 28, 12, 12]
-// CHECK-SAME:   ]
-// CHECK-SAME:  ]
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir
deleted file mode 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir
+++ /dev/null
@@ -1,79 +0,0 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-func private @print_memref_f32(memref<*xf32>)
-
-// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
-func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
-  %buf = memref.alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
-  linalg.fill(%f, %buf) : f32, memref<?x?x?xf32>
-  return %buf : memref<?x?x?xf32>
-}
-
-func @conv_1d_nwc(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32>, %arg2: memref<?x?x?xf32>) {
-  linalg.conv_1d_nwc ins (%arg0, %arg1: memref<?x?x?xf32>, memref<?x?x?xf32>)
-                    outs (%arg2: memref<?x?x?xf32>)
-  return
-}
-
-func @main() {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c3 = constant 3 : index
-  %c6 = constant 6 : index
-  %c8 = constant 8 : index
-  %f10 = constant 10.00000e+00 : f32
-  %val = constant 2.00000e+00 : f32
-  %zero = constant 0.00000e+00 : f32
-
-  %filter1D_nwc = call @alloc_3d_filled_f32(%c1, %c3, %c1, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
-  %in1D_nwc = call @alloc_3d_filled_f32(%c3, %c8, %c1, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
-  %out1D_nwc = call @alloc_3d_filled_f32(%c3, %c6, %c1, %zero) : (index, index, index, f32) -> (memref<?x?x?xf32>)
-
-  memref.store %f10, %in1D_nwc[%c0, %c3, %c0] : memref<?x?x?xf32>
-  call @conv_1d_nwc(%in1D_nwc, %filter1D_nwc, %out1D_nwc) : (memref<?x?x?xf32>, memref<?x?x?xf32>, memref<?x?x?xf32>) -> ()
-  %out1D_nwc_ = memref.cast %out1D_nwc : memref<?x?x?xf32> to memref<*xf32>
-  call @print_memref_f32(%out1D_nwc_): (memref<*xf32>) -> ()
-
-  memref.dealloc %filter1D_nwc : memref<?x?x?xf32>
-  memref.dealloc %in1D_nwc : memref<?x?x?xf32>
-  memref.dealloc %out1D_nwc : memref<?x?x?xf32>
-  return
-}
-
-// CHECK:       Unranked Memref {{.*}}
-// CHECK-NEXT:  [
-// CHECK-SAME:   [
-// CHECK-SAME:    [12],
-// CHECK-COUNT-3: [28],
-// CHECK-NEXT:    [12],
-// CHECK-NEXT:    [12]
-// CHECK-SAME:   ],
-// CHECK-NEXT:   [
-// CHECK-COUNT-5: [12],
-// CHECK-NEXT:    [12]
-// CHECK-SAME:   ],
-// CHECK-NEXT:   [
-// CHECK-COUNT-5: [12],
-// CHECK-NEXT:    [12]
-// CHECK-SAME:   ]
-// CHECK-SAME:  ]
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir
deleted file mode 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir
+++ /dev/null
@@ -1,83 +0,0 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3"  -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-func private @print_memref_f32(memref<*xf32>)
-
-// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
-func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref<?x?x?x?xf32> {
-  %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
-  linalg.fill(%f, %buf) : f32, memref<?x?x?x?xf32>
-  return %buf : memref<?x?x?x?xf32>
-}
-
-func @conv_2d_input_nchw_filter_hwcf(%arg0: memref<?x?x?x?xf32>, %arg1: memref<?x?x?x?xf32>, %arg2: memref<?x?x?x?xf32>) {
-  linalg.conv_2d_input_nchw_filter_hwcf {dilations = dense<1> : tensor<2xi64>,
-                          strides = dense<1> : tensor<2xi64>}
-     ins (%arg0, %arg1: memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-    outs (%arg2: memref<?x?x?x?xf32>)
-  return
-}
-
-func @main() {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c3 = constant 3 : index
-  %c6 = constant 6 : index
-  %c8 = constant 8 : index
-  %f10 = constant 10.00000e+00 : f32
-  %val = constant 2.00000e+00 : f32
-  %zero = constant 0.00000e+00 : f32
-
-  %filter2D_nchw = call @alloc_4d_filled_f32(%c3, %c3, %c1, %c1, %val) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
-  %in2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c8, %c8, %val) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
-  %out2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c6, %c6, %zero) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
-
-  memref.store %f10, %in2D_nchw[%c0, %c0, %c0, %c3] : memref<?x?x?x?xf32>
-  call @conv_2d_input_nchw_filter_hwcf(%in2D_nchw, %filter2D_nchw, %out2D_nchw) : (memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>) -> ()
-  %out2D_nchw_ = memref.cast %out2D_nchw : memref<?x?x?x?xf32> to memref<*xf32>
-  call @print_memref_f32(%out2D_nchw_): (memref<*xf32>) -> ()
-
-  memref.dealloc %filter2D_nchw : memref<?x?x?x?xf32>
-  memref.dealloc %in2D_nchw : memref<?x?x?x?xf32>
-  memref.dealloc %out2D_nchw : memref<?x?x?x?xf32>
-  return
-}
-
-// CHECK:       Unranked Memref {{.*}}
-// CHECK-NEXT:  [
-// CHECK-SAME:   [
-// CHECK-SAME:    [
-// CHECK-SAME:     [36,     52,     52,     52,     36,     36],
-// CHECK-COUNT-5:  [36,     36,     36,     36,     36,     36]
-// CHECK-SAME:    ]
-// CHECK-SAME:   ],
-// CHECK-NEXT:   [
-// CHECK-SAME:    [
-// CHECK-COUNT-6:  [36,     36,     36,     36,     36,     36]
-// CHECK-SAME:    ]
-// CHECK-SAME:   ],
-// CHECK-NEXT:   [
-// CHECK-SAME:    [
-// CHECK-COUNT-6:  [36,     36,     36,     36,     36,     36]
-// CHECK-SAME:    ]
-// CHECK-SAME:   ]
-// CHECK-SAME:  ]
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir
deleted file mode 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir
+++ /dev/null
@@ -1,83 +0,0 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3"  -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-func private @print_memref_f32(memref<*xf32>)
-
-// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
-func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref<?x?x?x?xf32> {
-  %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
-  linalg.fill(%f, %buf) : f32, memref<?x?x?x?xf32>
-  return %buf : memref<?x?x?x?xf32>
-}
-
-func @conv_2d_nchw(%arg0: memref<?x?x?x?xf32>, %arg1: memref<?x?x?x?xf32>, %arg2: memref<?x?x?x?xf32>) {
-  linalg.conv_2d_nchw
-  {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>}
-  ins (%arg0, %arg1: memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-  outs (%arg2: memref<?x?x?x?xf32>)
-  return
-}
-
-func @main() {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c3 = constant 3 : index
-  %c6 = constant 6 : index
-  %c8 = constant 8 : index
-  %f10 = constant 10.00000e+00 : f32
-  %val = constant 2.00000e+00 : f32
-  %zero = constant 0.00000e+00 : f32
-
-  %filter2D_nchw = call @alloc_4d_filled_f32(%c1, %c1, %c3, %c3, %val) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
-  %in2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c8, %c8, %val) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
-  %out2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c6, %c6, %zero) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
-
-  memref.store %f10, %in2D_nchw[%c0, %c0, %c0, %c3] : memref<?x?x?x?xf32>
-  call @conv_2d_nchw(%in2D_nchw, %filter2D_nchw, %out2D_nchw) : (memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>) -> ()
-  %out2D_nchw_ = memref.cast %out2D_nchw : memref<?x?x?x?xf32> to memref<*xf32>
-  call @print_memref_f32(%out2D_nchw_): (memref<*xf32>) -> ()
-
-  memref.dealloc %filter2D_nchw : memref<?x?x?x?xf32>
-  memref.dealloc %in2D_nchw : memref<?x?x?x?xf32>
-  memref.dealloc %out2D_nchw : memref<?x?x?x?xf32>
-  return
-}
-
-// CHECK:       Unranked Memref {{.*}}
-// CHECK-NEXT:  [
-// CHECK-SAME:   [
-// CHECK-SAME:    [
-// CHECK-SAME:     [36,     52,     52,     52,     36,     36],
-// CHECK-COUNT-5:  [36,     36,     36,     36,     36,     36]
-// CHECK-SAME:    ]
-// CHECK-SAME:   ],
-// CHECK-NEXT:   [
-// CHECK-SAME:    [
-// CHECK-COUNT-6:  [36,     36,     36,     36,     36,     36]
-// CHECK-SAME:    ]
-// CHECK-SAME:   ],
-// CHECK-NEXT:   [
-// CHECK-SAME:    [
-// CHECK-COUNT-6:  [36,     36,     36,     36,     36,     36]
-// CHECK-SAME:    ]
-// CHECK-SAME:   ]
-// CHECK-SAME:  ]
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir
deleted file mode 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir
+++ /dev/null
@@ -1,127 +0,0 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-func private @print_memref_f32(memref<*xf32>)
-
-// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
-func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref<?x?x?x?xf32> {
-  %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
-  linalg.fill(%f, %buf) : f32, memref<?x?x?x?xf32>
-  return %buf : memref<?x?x?x?xf32>
-}
-
-func @conv_2d_nhwc(%arg0: memref<?x?x?x?xf32>, %arg1: memref<?x?x?x?xf32>, %arg2: memref<?x?x?x?xf32>) {
-  linalg.conv_2d_nhwc ins (%arg0, %arg1: memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-                     outs (%arg2: memref<?x?x?x?xf32>)
-  return
-}
-
-func @main() {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c3 = constant 3 : index
-  %c6 = constant 6 : index
-  %c8 = constant 8 : index
-  %f10 = constant 10.00000e+00 : f32
-  %val = constant 2.00000e+00 : f32
-  %zero = constant 0.00000e+00 : f32
-
-  %filter2D_nhwc = call @alloc_4d_filled_f32(%c1, %c3, %c3, %c3, %val) :(index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
-  %in2D_nhwc = call @alloc_4d_filled_f32(%c3, %c8, %c8, %c3, %val) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
-  %out2D_nhwc = call @alloc_4d_filled_f32(%c3, %c6, %c6, %c1, %zero) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
-
-  memref.store %f10, %in2D_nhwc[%c0, %c0, %c3, %c0] : memref<?x?x?x?xf32>
-  call @conv_2d_nhwc(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>) -> ()
-  %out2D_nhwc_ = memref.cast %out2D_nhwc : memref<?x?x?x?xf32> to memref<*xf32>
-  call @print_memref_f32(%out2D_nhwc_): (memref<*xf32>) -> ()
-
-  memref.dealloc %filter2D_nhwc : memref<?x?x?x?xf32>
-  memref.dealloc %in2D_nhwc : memref<?x?x?x?xf32>
-  memref.dealloc %out2D_nhwc : memref<?x?x?x?xf32>
-  return
-}
-
-// CHECK:       Unranked Memref {{.*}}
-// CHECK-NEXT:  [
-// CHECK-SAME:   [
-// CHECK-SAME:    [
-// CHECK-SAME:     [108],
-// CHECK-COUNT-3:  [124],
-// CHECK-COUNT-2:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ]
-// CHECK-SAME:   ],
-// CHECK-NEXT:   [
-// CHECK-SAME:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ]
-// CHECK-SAME:   ],
-// CHECK-NEXT:   [
-// CHECK-SAME:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-COUNT-6:  [108]
-// CHECK-SAME:    ]
-// CHECK-SAME:   ]
-// CHECK-SAME:  ]
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir
deleted file mode 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir
+++ /dev/null
@@ -1,90 +0,0 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-func private @print_memref_f32(memref<*xf32>)
-
-// Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f
-func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref<?x?x?x?x?xf32> {
-  %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref<?x?x?x?x?xf32>
-  linalg.fill(%f, %buf) : f32, memref<?x?x?x?x?xf32>
-  return %buf : memref<?x?x?x?x?xf32>
-}
-
-func @conv_3d_input_ncdhw_filter_dhwcf(%arg0: memref<?x?x?x?x?xf32>, %arg1: memref<?x?x?x?x?xf32>, %arg2: memref<?x?x?x?x?xf32>) {
-  linalg.conv_3d_input_ncdhw_filter_dhwcf {dilations = dense<1> : tensor<3xi64>,
-                           strides = dense<1> : tensor<3xi64>}
-     ins (%arg0, %arg1: memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-    outs (%arg2: memref<?x?x?x?x?xf32>)
-  return
-}
-
-func @main() {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c3 = constant 3 : index
-  %c6 = constant 6 : index
-  %c8 = constant 8 : index
-  %f10 = constant 10.00000e+00 : f32
-  %val = constant 2.00000e+00 : f32
-  %zero = constant 0.00000e+00 : f32
-
-  %filter3D_ncdhw = call @alloc_5d_filled_f32(%c3, %c3, %c3, %c1, %c1, %val) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
-  %in3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c8, %c8, %c8, %val) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
-  %out3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c6, %c6, %c6, %zero) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
-
-  memref.store %f10, %in3D_ncdhw[%c0, %c0, %c0, %c0, %c3] : memref<?x?x?x?x?xf32>
-  call @conv_3d_input_ncdhw_filter_dhwcf(%in3D_ncdhw, %filter3D_ncdhw, %out3D_ncdhw) : (memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>) -> ()
-  %out3D_ncdhw_ = memref.cast %out3D_ncdhw : memref<?x?x?x?x?xf32> to memref<*xf32>
-  call @print_memref_f32(%out3D_ncdhw_): (memref<*xf32>) -> ()
-
-  memref.dealloc %filter3D_ncdhw : memref<?x?x?x?x?xf32>
-  memref.dealloc %in3D_ncdhw : memref<?x?x?x?x?xf32>
-  memref.dealloc %out3D_ncdhw : memref<?x?x?x?x?xf32>
-  return
-}
-
-// CHECK:       Unranked Memref {{.*}}
-// CHECK-NEXT:  [
-// CHECK-SAME:   [
-// CHECK-SAME:    [
-// CHECK-SAME:     [
-// CHECK-SAME:      [108,      124,      124,      124,      108,      108],
-// CHECK-COUNT-5:   [108,      108,      108,      108,      108,      108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108,      108,      108,      108,      108,      108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108,      108,      108,      108,      108,      108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108,      108,      108,      108,      108,      108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108,      108,      108,      108,      108,      108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108,      108,      108,      108,      108,      108]
-// CHECK-SAME:     ]
-// CHECK-SAME:    ]
-// CHECK-SAME:   ]
-// CHECK-SAME:  ]
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir
deleted file mode 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir
+++ /dev/null
@@ -1,88 +0,0 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-func private @print_memref_f32(memref<*xf32>)
-
-// Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f
-func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref<?x?x?x?x?xf32> {
-  %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref<?x?x?x?x?xf32>
-  linalg.fill(%f, %buf) : f32, memref<?x?x?x?x?xf32>
-  return %buf : memref<?x?x?x?x?xf32>
-}
-
-func @conv_3d_ncdhw(%arg0: memref<?x?x?x?x?xf32>, %arg1: memref<?x?x?x?x?xf32>, %arg2: memref<?x?x?x?x?xf32>) {
-  linalg.conv_3d_ncdhw ins (%arg0, %arg1: memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-                      outs (%arg2: memref<?x?x?x?x?xf32>)
-  return
-}
-
-func @main() {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c3 = constant 3 : index
-  %c6 = constant 6 : index
-  %c8 = constant 8 : index
-  %f10 = constant 10.00000e+00 : f32
-  %val = constant 2.00000e+00 : f32
-  %zero = constant 0.00000e+00 : f32
-
-  %filter3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c3, %c3, %c3, %val) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
-  %in3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c8, %c8, %c8, %val) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
-  %out3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c6, %c6, %c6, %zero) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
-
-  memref.store %f10, %in3D_ncdhw[%c0, %c0, %c0, %c0, %c3] : memref<?x?x?x?x?xf32>
-  call @conv_3d_ncdhw(%in3D_ncdhw, %filter3D_ncdhw, %out3D_ncdhw) : (memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>) -> ()
-  %out3D_ncdhw_ = memref.cast %out3D_ncdhw : memref<?x?x?x?x?xf32> to memref<*xf32>
-  call @print_memref_f32(%out3D_ncdhw_): (memref<*xf32>) -> ()
-
-  memref.dealloc %filter3D_ncdhw : memref<?x?x?x?x?xf32>
-  memref.dealloc %in3D_ncdhw : memref<?x?x?x?x?xf32>
-  memref.dealloc %out3D_ncdhw : memref<?x?x?x?x?xf32>
-  return
-}
-
-// CHECK:       Unranked Memref {{.*}}
-// CHECK-NEXT:  [
-// CHECK-SAME:   [
-// CHECK-SAME:    [
-// CHECK-SAME:     [
-// CHECK-SAME:      [108,      124,      124,      124,      108,      108],
-// CHECK-COUNT-5:   [108,      108,      108,      108,      108,      108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108,      108,      108,      108,      108,      108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108,      108,      108,      108,      108,      108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108,      108,      108,      108,      108,      108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108,      108,      108,      108,      108,      108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108,      108,      108,      108,      108,      108]
-// CHECK-SAME:     ]
-// CHECK-SAME:    ]
-// CHECK-SAME:   ]
-// CHECK-SAME:  ]
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir
deleted file mode 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir
+++ /dev/null
@@ -1,190 +0,0 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-
-func private @print_memref_f32(memref<*xf32>)
-
-// Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f
-func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref<?x?x?x?x?xf32> {
-  %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref<?x?x?x?x?xf32>
-  linalg.fill(%f, %buf) : f32, memref<?x?x?x?x?xf32>
-  return %buf : memref<?x?x?x?x?xf32>
-}
-
-func @conv_3d_ndhwc(%arg0: memref<?x?x?x?x?xf32>, %arg1: memref<?x?x?x?x?xf32>, %arg2: memref<?x?x?x?x?xf32>) {
-  linalg.conv_3d_ndhwc ins (%arg0, %arg1: memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-                      outs (%arg2: memref<?x?x?x?x?xf32>)
-  return
-}
-
-
-func @main() {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c3 = constant 3 : index
-  %c6 = constant 6 : index
-  %c8 = constant 8 : index
-  %f10 = constant 10.00000e+00 : f32
-  %val = constant 2.00000e+00 : f32
-  %zero = constant 0.00000e+00 : f32
-
-  %filter3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c3, %c3, %c3, %c1, %val) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
-  %in3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c8, %c8, %c8, %c1, %val) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
-  %out3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c6, %c6, %c6, %c1, %zero) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
-
-  memref.store %f10, %in3D_ndhwc[%c0, %c0, %c0, %c3, %c0] : memref<?x?x?x?x?xf32>
-  call @conv_3d_ndhwc(%in3D_ndhwc, %filter3D_ndhwc, %out3D_ndhwc) : (memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>) -> ()
-  %out3D_ndhwc_ = memref.cast %out3D_ndhwc : memref<?x?x?x?x?xf32> to memref<*xf32>
-  call @print_memref_f32(%out3D_ndhwc_): (memref<*xf32>) -> ()
-
-  memref.dealloc %filter3D_ndhwc : memref<?x?x?x?x?xf32>
-  memref.dealloc %in3D_ndhwc : memref<?x?x?x?x?xf32>
-  memref.dealloc %out3D_ndhwc : memref<?x?x?x?x?xf32>
-  return
-}
-
-// CHECK:       Unranked Memref {{.*}}
-// CHECK-NEXT:  [
-// CHECK-SAME:   [
-// CHECK-SAME:    [
-// CHECK-SAME:     [
-// CHECK-SAME:      [108],
-// CHECK-COUNT-3:   [124],
-// CHECK-COUNT-2:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-SAME:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-SAME:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-SAME:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-SAME:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ]
-// CHECK-SAME:    ],
-// CHECK-NEXT:    [
-// CHECK-SAME:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ],
-// CHECK-NEXT:     [
-// CHECK-COUNT-6:   [108]
-// CHECK-SAME:     ]
-// CHECK-SAME:    ]
-// CHECK-SAME:   ]
-// CHECK-SAME:  ]
diff --git a/mlir/test/python/integration/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py
--- a/mlir/test/python/integration/dialects/linalg/opsrun.py
+++ b/mlir/test/python/integration/dialects/linalg/opsrun.py
@@ -244,71 +244,6 @@
 test_fill_generic()
 
 
-def test_conv_builtin():
-  with Context() as ctx, Location.unknown():
-    module = Module.create()
-    f64 = F64Type.get()
-    i32 = IntegerType.get_signless(32)
-    with InsertionPoint(module.body):
-
-      @builtin.FuncOp.from_py_func(
-          MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2, 1), f64),
-          MemRefType.get((1, 2, 4, 1), i32))
-      def conv_on_buffers(input, filter, output):
-        linalg.depthwise_conv_2d_input_nhwc_filter_hwc_poly(
-            input, filter, outs=[output], strides=[2, 4], dilations=[1, 2])
-
-    execution_engine = ExecutionEngine(transform(module, conv_boiler))
-
-    # TODO: FFI-based solution to allow testing and printing with python code.
-    # Prepare arguments: one result i32.
-    # Arguments must be passed as pointers.
-    c_int_p = ctypes.c_int * 1
-    res = c_int_p(-1)
-    execution_engine.invoke("main", res)
-
-    log("RESULT: ", res[0])
-    # CHECK: RESULT: 8
-
-
-test_conv_builtin()
-
-
-def test_conv_generic():
-  with Context() as ctx, Location.unknown():
-    module = Module.create()
-    f64 = F64Type.get()
-    i32 = IntegerType.get_signless(32)
-    with InsertionPoint(module.body):
-
-      @builtin.FuncOp.from_py_func(
-          MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2, 1), f64),
-          MemRefType.get((1, 2, 4, 1), i32))
-      def conv_on_buffers(input, filter, output):
-        linalg.depthwise_conv_2d_input_nhwc_filter_hwc_poly(
-            input,
-            filter,
-            outs=[output],
-            strides=[2, 4],
-            dilations=[1, 2],
-            emit_generic=True)
-
-    execution_engine = ExecutionEngine(transform(module, conv_boiler))
-
-    # TODO: FFI-based solution to allow testing and printing with python code.
-    # Prepare arguments: one result i32.
-    # Arguments must be passed as pointers.
-    c_int_p = ctypes.c_int * 1
-    res = c_int_p(-1)
-    execution_engine.invoke("main", res)
-
-    log("RESULT: ", res[0])
-    # CHECK: RESULT: 8
-
-
-test_conv_generic()
-
-
 def test_max_pooling_builtin():
   with Context() as ctx, Location.unknown():
     module = Module.create()