diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc @@ -181,6 +181,43 @@ std_mulf(I(n, c, d + kd, h + kh, w + kw), K(f, c, kd, kh, kw))); } +ods_def: +def depthwise_conv_2d_input_nhwc_filter_hwcf + (I: f32(N, IH, IW, CI), K: f32(KH, KW, CI, CO)) + -> (O: f32(N, OH, OW, O)) + attr(channelMultiplier: i64, strides: 2xi64) +"""A general depth-wise 2-D convolution operation. + +This operation performs depth-wise 2-D convolution over an input `I` and filter +`F` and generates output `O` using the following computation: + +``` +O(n, oh, ow, co + ci * channelMultiplier) = std_addf( + O(n, oh, ow, co + ci * channelMultiplier), + std_mulf(I(n, oh * strides[0] + kh, ow * strides[1] + kw, ci), + K(kh, kw, ci, co))); +``` + +where + +* `I` is a 4-D tensor with shape `(N, IH, IW, CI)`. +* `F` is a 4-D tensor with shape `(KH, KW, CI, CO)`. +* `O` is a 4-D tensor with shape `(N, OH, OW, CO + CI * channelMultiplier)`. +* `strides` is a 2-element vector attribute for window strides along the + height/width dimension. +* `channelMultiplier` is an i64 attribute which stands for channel + multiplier. + +The indexing maps for these three tensors contain 7 dimensions, following the +order of (`N`, `OH`, `OW`, `CO`, `CI`, `KH`, `KW`). +""" +{ + O(n, oh, ow, co + ci * channelMultiplier) = std_addf( + O(n, oh, ow, co + ci * channelMultiplier), + std_mulf(I(n, oh * strides[0] + kh, ow * strides[1] + kw, ci), + K(kh, kw, ci, co))); +} + ods_def: def depthwise_conv_2d_input_nhwc_filter_hwc (I: f32(N, IH, IW, C), K: f32(KH, KW, C)) @@ -192,8 +229,10 @@ `F` and generates output `O` using the following computation: ``` -O(n, oh, ow, c) = std_addf(std_mulf( - I(n, oh * strides[0] + kh, ow * strides[1] + kw, c), K(kh, kw, c))) +O(n, oh, ow, c) = std_addf( + O(n, oh, ow, c), + std_mulf(I(n, oh * strides[0] + kh, ow * strides[1] + kw, c), + K(kh, kw, c))); ``` where diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir --- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir @@ -76,6 +76,33 @@ // ----- +func @depthwise_conv_2d_input_nhwc_filter_hwcf(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x3x4x6xf32>) { + linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + { channelMultiplier = 3 : i64, strides = dense<1> : tensor<2xi64> } + ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) + outs(%output : memref<2x3x4x6xf32>) + return +} + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d5, d2 + d6, d4)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d5, d6, d4, d3)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3 + d4 * 3)> + +// CHECK: func @depthwise_conv_2d_input_nhwc_filter_hwcf + +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) +// CHECK-SAME: outs(%{{.+}} : memref<2x3x4x6xf32>) + +// CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) +// CHECK-NEXT: %[[MUL:.+]] = mulf %[[BBARG0]], %[[BBARG1]] : f32 +// CHECK-NEXT: %[[ADD:.+]] = addf %[[BBARG2]], %[[MUL]] : f32 +// CHECK-NEXT: linalg.yield %[[ADD]] : f32 + +// ----- + func @depthwise_conv_2d_input_nhwc_filter_hwc(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { linalg.depthwise_conv_2d_input_nhwc_filter_hwc {strides = dense<2> : vector<2xi64>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir --- a/mlir/test/Dialect/Linalg/named-ops.mlir +++ b/mlir/test/Dialect/Linalg/named-ops.mlir @@ -1,5 +1,36 @@ // RUN: mlir-opt -split-input-file -verify-diagnostics %s | FileCheck %s +// CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwcf_tensor +func @depthwise_conv_2d_input_nhwc_filter_hwcf_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x6xf32> { + %zero = constant 0.000000e+00 : f32 + %init = linalg.init_tensor [2, 3, 4, 6] : tensor<2x3x4x6xf32> + %fill = linalg.fill(%init, %zero) : tensor<2x3x4x6xf32>, f32 -> tensor<2x3x4x6xf32> + // CHECK: %{{.+}} = linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + // CHECK-SAME: {channelMultiplier = 3 : i64, + // CHECK-SAME: strides = dense<1> : tensor<2xi64>} + // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>) + // CHECK-SAME: outs(%{{.+}} : tensor<2x3x4x6xf32>) + %0 = linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + { channelMultiplier = 3 : i64, strides = dense<1> : tensor<2xi64> } + ins(%input, %filter : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>) + outs(%fill : tensor<2x3x4x6xf32>) -> tensor<2x3x4x6xf32> + return %0 : tensor<2x3x4x6xf32> +} + +// CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwcf_memref +func @depthwise_conv_2d_input_nhwc_filter_hwcf_memref(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x3x4x6xf32>) { + // CHECK: linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + // CHECK-SAME: {channelMultiplier = 3 : i64, + // CHECK-SAME: strides = dense<1> : tensor<2xi64>} + // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) + // CHECK-SAME: outs(%{{.+}} : memref<2x3x4x6xf32>) + linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + { channelMultiplier = 3 : i64, strides = dense<1> : tensor<2xi64> } + ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) + outs(%output : memref<2x3x4x6xf32>) + return +} + // CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwc_tensor func @depthwise_conv_2d_input_nhwc_filter_hwc_tensor(%input: tensor<1x113x113x96xf32>, %filter: tensor<3x3x96xf32>) -> tensor<1x56x56x96xf32> { %init = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32> diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp @@ -1173,7 +1173,7 @@ // Returns the function to get values at the given indices from this // attribute. - std::string getValueFn(ArrayRef indices) const; + llvm::Optional getValueFn(ArrayRef indices) const; }; //===--------------------------------------------------------------------===// @@ -1841,16 +1841,19 @@ const auto &dims = attr.second.vectorDims; if (!dims.empty()) { + // Vector case SmallVector dimStrs; for (uint64_t dim : dims) dimStrs.push_back(std::to_string(dim)); odsType = llvm::formatv("Ranked{0}ElementsAttr<[{1}]>", odsType, llvm::join(dimStrs, ", ")); - } - - assert(dims.empty() || !attr.second.isArray); - if (attr.second.isArray) + } else if (attr.second.isArray) { + // Array case odsType = llvm::formatv("{0}ArrayAttr", odsType); + } else { + // Scalar case + odsType = llvm::formatv("{0}Attr", odsType); + } if (attr.second.isOptional) odsType = llvm::formatv("OptionalAttr<{0}>", odsType); @@ -2242,13 +2245,14 @@ StringRef attrName = attrUse.value().attrName; auto it = registeredAttrs.find(attrName.str()); assert(it != registeredAttrs.end() && "uses should point to valid attr!"); - std::string getValueFn = it->second.getValueFn(attrUse.value().indices); - if (getValueFn.empty()) { + llvm::Optional getValueFn = + it->second.getValueFn(attrUse.value().indices); + if (!getValueFn) { (void)parser.emitError("unimplemented getValueFn for attribute: " + attrName); return; } - std::string cstVal = llvm::formatv("{0}().{1}", attrName, getValueFn); + std::string cstVal = llvm::formatv("{0}(){1}", attrName, *getValueFn); const char *cstFmt = "\n\tauto cst{0} = getAffineConstantExpr({1}, context);"; mapsStringStream << llvm::formatv(cstFmt, attrUse.index(), cstVal); @@ -2374,10 +2378,10 @@ expressionsStr, yieldStr); } -std::string +llvm::Optional TCParser::RegisteredAttr::getValueFn(ArrayRef indices) const { if (isArray) - return ""; + return llvm::None; if (!vectorDims.empty()) { SmallVector indexStrs; @@ -2385,20 +2389,20 @@ indexStrs.push_back(std::to_string(index)); std::string indexList = llvm::join(indexStrs, ", "); if (elementType == "f32") - return llvm::formatv("getValue({ {0} })", indexList); + return llvm::formatv(".getValue({ {0} })", indexList).str(); if (elementType == "i32") - return llvm::formatv("getValue({ {0} })", indexList); + return llvm::formatv(".getValue({ {0} })", indexList).str(); if (elementType == "i64") - return llvm::formatv("getValue({ {0} })", indexList); + return llvm::formatv(".getValue({ {0} })", indexList).str(); - return ""; + return llvm::None; } if (elementType == "f32") - return "getValue().convertToFloat()"; + return std::string(".convertToFloat()"); if (elementType == "i32" || elementType == "i64") - return "getInt()"; - return ""; + return std::string(""); + return llvm::None; } /// Iterate over each Tensor Comprehension def.