diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc
@@ -151,6 +151,45 @@
       std_mulf(I(n, c, d + kd, h + kh, w + kw), K(f, c, kd, kh, kw)));
 }
 
+ods_def<DepthwiseConvInputNHWCFilterHWCFOp>:
+def depthwise_conv_2d_input_nhwc_filter_hwcf
+      (I: f32(N, IH, IW, CI), K: f32(KH, KW, CI, CO))
+   -> (O: f32(N, OH, OW, CI, CO))
+  attr(strides: 2xi64)
+"""A general depth-wise 2-D convolution operation.
+
+This operation performs depth-wise 2-D convolution over an input `I` and filter
+`F` and generates output `O` using the following computation:
+
+```
+  O(n, oh, ow, ci, co) = std_addf<kh, kw>(
+      O(n, oh, ow, ci, co),
+      std_mulf(I(n, oh * strides[0] + kh, ow * strides[1] + kw, ci),
+               K(kh, kw, ci, co)));
+```
+
+where
+
+* `I` is a 4-D tensor with shape `(N, IH, IW, CI)`.
+* `F` is a 4-D tensor with shape `(KH, KW, CI, CO)`.
+* `O` is a 5-D tensor with shape `(N, OH, OW, CI, CO)`.
+* `strides` is a 2-element vector attribute for window strides along the
+  height/width dimension.
+
+The indexing maps for these three tensors contain 7 dimensions, following the
+order of (`N`, `OH`, `OW`, `CI`, `CO`, `KH`, `KW`).
+
+Note: this op only supports any channel multiplier, which is `CO`. To map back
+to 4D result as DepthwiseConvInputNHWCFilterHWCOp, you will have to create a
+Linalg reshape op which collapses `CI` and `CO` into one dimension.
+"""
+{
+  O(n, oh, ow, ci, co) = std_addf<kh, kw>(
+      O(n, oh, ow, ci, co),
+      std_mulf(I(n, oh * strides[0] + kh, ow * strides[1] + kw, ci),
+               K(kh, kw, ci, co)));
+}
+
 ods_def<DepthwiseConvInputNHWCFilterHWCOp>:
 def depthwise_conv_2d_input_nhwc_filter_hwc
       (I: f32(N, IH, IW, C), K: f32(KH, KW, C))
@@ -162,8 +201,10 @@
 `F` and generates output `O` using the following computation:
 
 ```
-O(n, oh, ow, c) = std_addf<kh, kw>(std_mulf(
-  I(n, oh * strides[0] + kh, ow * strides[1] + kw, c), K(kh, kw, c)))
+O(n, oh, ow, c) = std_addf<kh, kw>(
+    O(n, oh, ow, c),
+    std_mulf(I(n, oh * strides[0] + kh, ow * strides[1] + kw, c),
+             K(kh, kw, c)));
 ```
 
 where
diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
--- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
@@ -76,6 +76,33 @@
 
 // -----
 
+func @depthwise_conv_2d_input_nhwc_filter_hwcf(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x3x4x2x3xf32>) {
+  linalg.depthwise_conv_2d_input_nhwc_filter_hwcf
+     { strides = dense<1> : tensor<2xi64> }
+     ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
+    outs(%output : memref<2x3x4x2x3xf32>)
+  return
+}
+
+// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d5, d2 + d6, d3)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d5, d6, d3, d4)>
+// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4)>
+
+// CHECK: func @depthwise_conv_2d_input_nhwc_filter_hwcf
+
+// CHECK: linalg.generic
+// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
+// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
+// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
+// CHECK-SAME: outs(%{{.+}} : memref<2x3x4x2x3xf32>)
+
+// CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
+// CHECK-NEXT:      %[[MUL:.+]] = mulf %[[BBARG0]], %[[BBARG1]] : f32
+// CHECK-NEXT:      %[[ADD:.+]] = addf %[[BBARG2]], %[[MUL]] : f32
+// CHECK-NEXT:      linalg.yield %[[ADD]] : f32
+
+// -----
+
 func @depthwise_conv_2d_input_nhwc_filter_hwc(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) {
   linalg.depthwise_conv_2d_input_nhwc_filter_hwc {strides = dense<2> : vector<2xi64>}
     ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>)
diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir
--- a/mlir/test/Dialect/Linalg/named-ops.mlir
+++ b/mlir/test/Dialect/Linalg/named-ops.mlir
@@ -1,5 +1,34 @@
 // RUN: mlir-opt -split-input-file -verify-diagnostics %s | FileCheck %s
 
+// CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwcf_tensor
+func @depthwise_conv_2d_input_nhwc_filter_hwcf_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> {
+  %zero = constant 0.000000e+00 : f32
+  %init = linalg.init_tensor [2, 3, 4, 2, 3] : tensor<2x3x4x2x3xf32>
+  %fill = linalg.fill(%init, %zero) : tensor<2x3x4x2x3xf32>, f32 -> tensor<2x3x4x2x3xf32>
+  // CHECK:      %{{.+}} = linalg.depthwise_conv_2d_input_nhwc_filter_hwcf
+  // CHECK-SAME:   {strides = dense<1> : tensor<2xi64>}
+  // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>)
+  // CHECK-SAME:   outs(%{{.+}} : tensor<2x3x4x2x3xf32>)
+  %0 = linalg.depthwise_conv_2d_input_nhwc_filter_hwcf
+     { strides = dense<1> : tensor<2xi64> }
+     ins(%input, %filter : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>)
+    outs(%fill : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32>
+  return %0 : tensor<2x3x4x2x3xf32>
+}
+
+// CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwcf_memref
+func @depthwise_conv_2d_input_nhwc_filter_hwcf_memref(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x3x4x2x3xf32>) {
+  // CHECK:      linalg.depthwise_conv_2d_input_nhwc_filter_hwcf
+  // CHECK-SAME:   {strides = dense<1> : tensor<2xi64>}
+  // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
+  // CHECK-SAME:   outs(%{{.+}} : memref<2x3x4x2x3xf32>)
+  linalg.depthwise_conv_2d_input_nhwc_filter_hwcf
+     { strides = dense<1> : tensor<2xi64> }
+     ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
+    outs(%output : memref<2x3x4x2x3xf32>)
+  return
+}
+
 // CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwc_tensor
 func @depthwise_conv_2d_input_nhwc_filter_hwc_tensor(%input: tensor<1x113x113x96xf32>, %filter: tensor<3x3x96xf32>) -> tensor<1x56x56x96xf32> {
   %init = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32>
diff --git a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc
--- a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc
+++ b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc
@@ -85,12 +85,12 @@
 // Test attribute definitions
 // ODS-LABEL: def Test4Op
 // ODS: F32ArrayAttr:$array_attr,
-// ODS: F32:$f32_attr,
+// ODS: F32Attr:$f32_attr,
 // ODS: RankedF32ElementsAttr<[4]>:$fvec_attr,
-// ODS: I32:$i32_attr,
-// ODS: I64:$i64_attr,
+// ODS: I32Attr:$i32_attr,
+// ODS: I64Attr:$i64_attr,
 // ODS: RankedI32ElementsAttr<[5, 6]>:$ivec_attr,
-// ODS: OptionalAttr<F32>:$optional_attr
+// ODS: OptionalAttr<F32Attr>:$optional_attr
 //
 // ODS: bool hasDynamicIndexingMaps();
 // ODS: LogicalResult verifyIndexingMapRequiredAttributes();
diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp
--- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp
+++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp
@@ -1173,7 +1173,7 @@
 
     // Returns the function to get values at the given indices from this
     // attribute.
-    std::string getValueFn(ArrayRef<uint64_t> indices) const;
+    llvm::Optional<std::string> getValueFn(ArrayRef<uint64_t> indices) const;
   };
 
   //===--------------------------------------------------------------------===//
@@ -1840,16 +1840,19 @@
 
     const auto &dims = attr.second.vectorDims;
     if (!dims.empty()) {
+      // Vector case
       SmallVector<std::string, 4> dimStrs;
       for (uint64_t dim : dims)
         dimStrs.push_back(std::to_string(dim));
       odsType = llvm::formatv("Ranked{0}ElementsAttr<[{1}]>", odsType,
                               llvm::join(dimStrs, ", "));
-    }
-
-    assert(dims.empty() || !attr.second.isArray);
-    if (attr.second.isArray)
+    } else if (attr.second.isArray) {
+      // Array case
       odsType = llvm::formatv("{0}ArrayAttr", odsType);
+    } else {
+      // Scalar case
+      odsType = llvm::formatv("{0}Attr", odsType);
+    }
 
     if (attr.second.isOptional)
       odsType = llvm::formatv("OptionalAttr<{0}>", odsType);
@@ -2241,13 +2244,14 @@
     StringRef attrName = attrUse.value().attrName;
     auto it = registeredAttrs.find(attrName.str());
     assert(it != registeredAttrs.end() && "uses should point to valid attr!");
-    std::string getValueFn = it->second.getValueFn(attrUse.value().indices);
-    if (getValueFn.empty()) {
+    llvm::Optional<std::string> getValueFn =
+        it->second.getValueFn(attrUse.value().indices);
+    if (!getValueFn) {
       (void)parser.emitError("unimplemented getValueFn for attribute: " +
                              attrName);
       return;
     }
-    std::string cstVal = llvm::formatv("{0}().{1}", attrName, getValueFn);
+    std::string cstVal = llvm::formatv("{0}(){1}", attrName, *getValueFn);
     const char *cstFmt =
         "\n\tauto cst{0} = getAffineConstantExpr({1}, context);";
     mapsStringStream << llvm::formatv(cstFmt, attrUse.index(), cstVal);
@@ -2373,10 +2377,10 @@
                       expressionsStr, yieldStr);
 }
 
-std::string
+llvm::Optional<std::string>
 TCParser::RegisteredAttr::getValueFn(ArrayRef<uint64_t> indices) const {
   if (isArray)
-    return "";
+    return llvm::None;
 
   if (!vectorDims.empty()) {
     SmallVector<std::string, 4> indexStrs;
@@ -2384,20 +2388,20 @@
       indexStrs.push_back(std::to_string(index));
     std::string indexList = llvm::join(indexStrs, ", ");
     if (elementType == "f32")
-      return llvm::formatv("getValue<float>({ {0} })", indexList);
+      return llvm::formatv(".getValue<float>({ {0} })", indexList).str();
     if (elementType == "i32")
-      return llvm::formatv("getValue<int>({ {0} })", indexList);
+      return llvm::formatv(".getValue<int>({ {0} })", indexList).str();
     if (elementType == "i64")
-      return llvm::formatv("getValue<int64_t>({ {0} })", indexList);
+      return llvm::formatv(".getValue<int64_t>({ {0} })", indexList).str();
 
-    return "";
+    return llvm::None;
   }
 
   if (elementType == "f32")
-    return "getValue().convertToFloat()";
+    return std::string(".convertToFloat()");
   if (elementType == "i32" || elementType == "i64")
-    return "getInt()";
-  return "";
+    return std::string("");
+  return llvm::None;
 }
 
 /// Iterate over each Tensor Comprehension def.