diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -628,10 +628,10 @@ scalar_arg: B --- !LinalgOpConfig metadata: !LinalgOpMetadata - name: conv_2d_nchw - cpp_class_name: Conv2DNchwOp + name: conv_1d + cpp_class_name: Conv1DOp doc: |- - Performs 2-D convolution. + Performs 1-D convolution with no channels. This is primarily used for testing. Numeric casting is performed on the operands to the inner multiply, promoting them to the same data type as the accumulator/output. @@ -641,46 +641,225 @@ name: I usage: InputOperand type_var: T1 - shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12] - -> (s0, s1, s2, s3)> + shape_map: affine_map<()[s0, s1, s2] -> (s0)> - !LinalgOperandDefConfig name: K usage: InputOperand type_var: T2 - shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12] - -> (s4, s1, s5, s6)> + shape_map: affine_map<()[s0, s1, s2] -> (s1)> - !LinalgOperandDefConfig name: O usage: OutputOperand type_var: U - shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12] - -> (s0, s4, s7, s8, s1)> + shape_map: affine_map<()[s0, s1, s2] -> (s2)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1)[s0, s1, s2] -> (d0 + d1)> + - affine_map<(d0, d1)[s0, s1, s2] -> (d1)> + - affine_map<(d0, d1)[s0, s1, s2] -> (d0)> + iterator_types: + - parallel + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: K +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: conv_2d + cpp_class_name: Conv2DOp + doc: |- + Performs 2-D convolution with no channels. This is primarily used for testing. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2, s3)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4, s5)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0 + d2, d1 + d3)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d2, d3)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1)> + iterator_types: + - parallel + - parallel + - reduction + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: K +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: conv_3d + cpp_class_name: Conv3DOp + doc: |- + Performs 3-D convolution with no channels. This is primarily used for testing. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8] -> (s0, s1, s2)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8] -> (s3, s4, s5)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8] -> (s6, s7, s8)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8] -> (d0 + + d3, d1 + d4, d2 + d5)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8] -> (d3, + d4, d5)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8] -> (d0, + d1, d2)> + iterator_types: + - parallel + - parallel + - parallel + - reduction + - reduction + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: K +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: conv_1d_nwc_wcf + cpp_class_name: Conv1DNwcWcfOp + doc: |- + Performs 1-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7] -> (s0, s1, s2)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7] -> (s3, s2, s4)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7] -> (s0, s5, s4)> - !LinalgOperandDefConfig name: strides usage: IndexAttribute type_var: I64 - attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, - s12] -> (s9, s10)> + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7] -> (s6)> - !LinalgOperandDefConfig name: dilations usage: IndexAttribute type_var: I64 - attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, - s12] -> (s11, s12)> + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7] -> (s7)> indexing_maps: !LinalgIndexingMapsConfig static_indexing_maps: - - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8, - s9, s10, s11, s12] -> (d0, d4, d2 * s9 + d5 * s11, d3 * s10 + d6 * s12)> - - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8, - s9, s10, s11, s12] -> (d1, d4, d5, d6)> - - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8, - s9, s10, s11, s12] -> (d0, d1, d2, d3)> + - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6, s7] -> (d0, d1 * + s6 + d3 * s7, d4)> + - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6, s7] -> (d3, d4, + d2)> + - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6, s7] -> (d0, d1, + d2)> iterator_types: - parallel - parallel - parallel - - parallel - - reduction - reduction - reduction assignments: @@ -906,6 +1085,286 @@ - !ScalarExpression scalar_arg: KZp --- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: conv_3d_ndhwc_dhwcf + cpp_class_name: Conv3DNdhwcDhwcfOp + doc: |- + Performs 3-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16, s17] -> (s0, s1, s2, s3, s4)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16, s17] -> (s5, s6, s7, s4, s8)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16, s17] -> (s0, s9, s10, s11, s8)> + - !LinalgOperandDefConfig + name: strides + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13, s14, s15, s16, s17] -> (s12, s13, s14)> + - !LinalgOperandDefConfig + name: dilations + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13, s14, s15, s16, s17] -> (s15, s16, s17)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6, + s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17] -> (d0, d1 * s12 + d5 * + s15, d2 * s13 + d6 * s16, d3 * s14 + d7 * s17, d8)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6, + s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17] -> (d5, d6, d7, d8, d4)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6, + s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17] -> (d0, d1, d2, d3, d4)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + - reduction + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: K +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: depthwise_conv2D_nhw + cpp_class_name: DepthwiseConv2DNhwOp + doc: |- + Performs depth-wise 2-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. Multiplier is set to 1 + which is a special case for most dpethwise convolutions. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s1, s2, s3)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s4, s5, s3)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s6, s7, s3)> + - !LinalgOperandDefConfig + name: strides + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s8, s9)> + - !LinalgOperandDefConfig + name: dilations + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s10, s11)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d0, d1 * s8 + d4 * s10, d2 * s9 + d5 * s11, d3)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d4, d5, d3)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d0, d1, d2, d3)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: K +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: depthwise_conv2D_nhw_q + cpp_class_name: DepthwiseConv2DNhwQOp + doc: |- + Performs depth-wise 2-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s1, s2, s3)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s4, s5, s3)> + - !LinalgOperandDefConfig + name: IZp + usage: InputOperand + type_var: I32 + - !LinalgOperandDefConfig + name: KZp + usage: InputOperand + type_var: I32 + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s6, s7, s3)> + - !LinalgOperandDefConfig + name: strides + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s8, s9)> + - !LinalgOperandDefConfig + name: dilations + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s10, s11)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d0, d1 * s8 + d4 * s10, d2 * s9 + d5 * s11, d3)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d4, d5, d3)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> ()> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> ()> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d0, d1, d2, d3)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + scalar_apply: + fn_name: sub + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: IZp + - !ScalarExpression + scalar_apply: + fn_name: sub + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: K + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: KZp +--- !LinalgOpConfig metadata: !LinalgOpMetadata name: depthwise_conv2D_nhwc cpp_class_name: DepthwiseConv2DNhwcOp diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc @@ -5,151 +5,3 @@ // C(m, n) += cast(A(m, k)) * cast(B(k, n)) C(m, n) = AddIOp(C(m, n), MulIOp(SignExtendIOp32(A(m, k)), SignExtendIOp32(B(k, n)))); } - -ods_def: -def conv_1d(I: f32(W), K: f32(KW)) -> (O: f32(W)) { - O(w) = AddFOp(O(w), MulFOp(I(w + kw), K(kw))); -} - -ods_def: -def conv_2d(I: f32(H, W), K: f32(KH, KW)) -> (O: f32(H, W)) { - O(h, w) = AddFOp(O(h, w), MulFOp(I(h + kh, w + kw), K(kh, kw))); -} - -ods_def: -def conv_3d(I: f32(D, H, W), K: f32(KD, KH, KW)) -> (O: f32(D, H, W)) { - O(d, h, w) = AddFOp( - O(d, h, w), MulFOp(I(d + kd, h + kh, w + kw), K(kd, kh, kw))); -} - -ods_def: -def depthwise_conv_2d_input_nhwc_filter_hwcf - (I: f32(N, IH, IW, CI), K: f32(KH, KW, CI, CO)) - -> (O: f32(N, OH, OW, CI, CO)) - attr(strides: 2xi64, dilations: 2xi64) -"""A general depth-wise 2-D convolution operation. - -This operation performs depth-wise 2-D convolution over an input `I` and filter -`F` and generates output `O` using the following computation: - -``` - O(n, oh, ow, ci, co) = AddFOp( - O(n, oh, ow, ci, co), - MulFOp(I(n, oh * strides[0] + kh * dilations[0], ow * strides[1] + kw * dilations[1], ci), - K(kh, kw, ci, co))); -``` - -where - -* `I` is a 4-D tensor with shape `(N, IH, IW, CI)`. -* `F` is a 4-D tensor with shape `(KH, KW, CI, CO)`. -* `O` is a 5-D tensor with shape `(N, OH, OW, CI, CO)`. -* `strides` is a 2-element vector attribute for window strides along the - height/width dimension. - -The indexing maps for these three tensors contain 7 dimensions, following the -order of (`N`, `OH`, `OW`, `CI`, `CO`, `KH`, `KW`). - -Note: this op only supports any channel multiplier, which is `CO`. To map back -to 4D result as DepthwiseConvInputNHWCFilterHWCOp, you will have to create a -Linalg reshape op which collapses `CI` and `CO` into one dimension. -""" -{ - O(n, oh, ow, ci, co) = AddFOp( - O(n, oh, ow, ci, co), - MulFOp(I(n, oh * strides[0] + kh * dilations[0], ow * strides[1] + kw * dilations[1], ci), - K(kh, kw, ci, co))); -} - -ods_def: -def depthwise_conv_2d_input_nhwc_filter_hwc - (I: f32(N, IH, IW, C), K: f32(KH, KW, C)) - -> (O: f32(N, OH, OW, C)) - attr(strides: 2xi64, dilations: 2xi64) -"""A depth-wise 2-D convolution operation. - -This operation performs depth-wise 2-D convolution over an input `I` and filter -`F` and generates output `O` using the following computation: - -``` -O(n, oh, ow, c) = AddFOp( - O(n, oh, ow, c), - MulFOp(I(n, oh * strides[0] + kh * dilations[0], ow * strides[1] + kw * dilations[1], c), - K(kh, kw, c))); -``` - -where - -* `I` is a 4-D tensor with shape `(N, IH, IW, C)`. -* `F` is a 3-D tensor with shape `(KH, KW, C)`. -* `O` is a 4-D tensor with shape `(N, OH, OW, C)`. -* `strides` is a 2-element vector attribute for window strides along the - height/width dimension. - -The indexing maps for these three tensors contain 6 dimensions, following the -order of (`N`, `OH`, `OW`, `C`, `KH`, `KW`). - -Note: this op only supports channel multiplier == 1. -""" -{ - O(n, oh, ow, c) = AddFOp( - O(n, oh, ow, c), - MulFOp(I(n, oh * strides[0] + kh * dilations[0], ow * strides[1] + kw * dilations[1], c), - K(kh, kw, c))); -} - -ods_def: -def conv_1d_input_nwc_filter_wcf(I: f32(N, W, C), K: f32(KW, C, F)) -> (O: f32(N, W, F)) - attr(strides: 1xi64, dilations: 1xi64) -""" A 1-D convolution given NWC layout input and WCF layout filter. - -Computes a 1-D convolution given 3-D input and filter. The data layout -of input is NWC and the data layout of filter is WCF. - -The indexing maps for these three tensors contain 5 dimensions, following the -order of (`N`, `W`, `F`, `KW`, `C`). -""" -{ - O(n, w, f) = AddFOp( - O(n, w, f), - MulFOp(I(n, w * strides[0] + kw * dilations[0], c), K(kw, c, f))); -} - -ods_def: -def conv_2d_input_nhwc_filter_hwcf(I: f32(N, H, W, C), K: f32(KH, KW, C, F)) -> (O: f32(N, H, W, F)) - attr(strides: 2xi64, dilations: 2xi64) -""" A 2-D convolution given NHWC layout input and HWCF layout filter. - -Computes a 2-D convolution given 4-D input and filter. The data layout -of input is NHWC and the data layout of filter is HWCF. - -The indexing maps for these three tensors contain 7 dimensions, following the -order of (`N`, `H`, `W`, `F`, `KH`, `KW`, `C`). -""" -{ - O(n, h, w, f) = AddFOp( - O(n, h, w, f), MulFOp(I(n, h * strides[0] + kh * dilations[0], - w * strides[1] + kw * dilations[1], c), - K(kh, kw, c, f))); -} - -ods_def: -def conv_3d_input_ndhwc_filter_dhwcf - (I: f32(N, D, H, W, C), K: f32(KD, KH, KW, C, F)) - -> (O: f32(N, D, H, W, F)) - attr(strides: 3xi64, dilations: 3xi64) -""" A 3-D convolution given NDHWC layout input and DHWCF layout filter. - -Computes a 3-D convolution given 5-D input and filter. The data layout -of input is NDHWC and the data layout of filter is DHWCF. - -The indexing maps for these three tensors contain 9 dimensions, following the -order of (`N`, `D`, `H`, `W`, `F`, `KD`, `KH`, `KW`, `C`). -""" -{ - O(n, d, h, w, f) = AddFOp( - O(n, d, h, w, f), MulFOp(I(n, d * strides[0] + kd * dilations[0], - h * strides[1] + kh * dilations[1], - w * strides[2] + kw * dilations[2], c), - K(kd, kh, kw, c, f))); -} diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1127,7 +1127,7 @@ return success(); } -using ConvOpConst = ConvOpVectorization; +using ConvOpConst = ConvOpVectorization; /// Inserts tiling, promotion and vectorization pattern for ConvOp /// conversion into corresponding pattern lists. @@ -1165,25 +1165,22 @@ RewritePatternSet tiling(context); RewritePatternSet promotion(context); RewritePatternSet vectorization(context); - populateVectorizationPatterns(tiling, promotion, vectorization, - tileSizes); - - populateVectorizationPatterns( - tiling, promotion, vectorization, tileSizes); + populateVectorizationPatterns(tiling, promotion, vectorization, + tileSizes); - populateVectorizationPatterns(tiling, promotion, vectorization, + populateVectorizationPatterns(tiling, promotion, vectorization, tileSizes); - populateVectorizationPatterns( - tiling, promotion, vectorization, tileSizes); + populateVectorizationPatterns(tiling, promotion, vectorization, + tileSizes); - populateVectorizationPatterns(tiling, promotion, - vectorization, tileSizes); + populateVectorizationPatterns(tiling, promotion, + vectorization, tileSizes); - populateVectorizationPatterns(tiling, promotion, vectorization, - tileSizes); + populateVectorizationPatterns(tiling, promotion, + vectorization, tileSizes); - populateVectorizationPatterns( + populateVectorizationPatterns( tiling, promotion, vectorization, tileSizes); patterns.push_back(std::move(tiling)); diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -145,21 +145,63 @@ C[None] += cast(U, A[D.m]) * cast(U, B[D.m]) @linalg_structured_op -def conv_2d_nchw( - I=TensorDef(T1, S.N, S.C, S.IH, S.IW), - K=TensorDef(T2, S.F, S.C, S.KH, S.KW), - O=TensorDef(U, S.N, S.F, S.OH, S.OW, S.C, output=True), - strides=AttributeDef(S.SH, S.SW), - dilations=AttributeDef(S.DH, S.DW)): - """Performs 2-D convolution. +def conv_1d( + I=TensorDef(T1, S.IW), + K=TensorDef(T2, S.KW), + O=TensorDef(U, S.OW, output=True)): + """Performs 1-D convolution with no channels. This is primarily used for testing. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + """ + domain(D.ow, D.kw) + O[D.ow] += cast( + U, I[D.ow + D.kw]) * cast(U, K[D.kw]) + +@linalg_structured_op +def conv_2d( + I=TensorDef(T1, S.IH, S.IW), + K=TensorDef(T2, S.KH, S.KW), + O=TensorDef(U, S.OH, S.OW, output=True)): + """Performs 2-D convolution with no channels. This is primarily used for testing. Numeric casting is performed on the operands to the inner multiply, promoting them to the same data type as the accumulator/output. """ - domain(D.n, D.f, D.oh, D.ow, D.c, D.kh, D.kw) - O[D.n, D.f, D.oh, D.ow] += cast( - U, I[D.n, D.c, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, - ]) * cast(U, K[D.f, D.c, D.kh, D.kw]) + domain(D.oh, D.ow, D.kh, D.kw) + O[D.oh, D.ow] += cast( + U, I[D.oh + D.kh, D.ow + D.kw]) * cast(U, K[D.kh, D.kw]) + +@linalg_structured_op +def conv_3d( + I=TensorDef(T1, S.ID, S.IH, S.IW), + K=TensorDef(T2, S.KD, S.KH, S.KW), + O=TensorDef(U, S.OD, S.OH, S.OW, output=True)): + """Performs 3-D convolution with no channels. This is primarily used for testing. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + """ + domain(D.od, D.oh, D.ow, D.kd, D.kh, D.kw) + O[D.od, D.oh, D.ow] += cast( + U, I[D.od + D.kd, D.oh + D.kh, D.ow + D.kw]) * cast(U, K[D.kd, D.kh, D.kw]) + +@linalg_structured_op +def conv_1d_nwc_wcf( + I=TensorDef(T1, S.N, S.IW, S.C), + K=TensorDef(T2, S.KW, S.C, S.F), + O=TensorDef(U, S.N, S.OW, S.F, output=True), + strides=AttributeDef(S.SW), + dilations=AttributeDef(S.DW)): + """Performs 1-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + """ + domain(D.n, D.ow, D.f, D.kw, D.c) + O[D.n, D.ow, D.f] += cast( + U, I[D.n, D.ow * S.SW + D.kw * S.DW, D.c + ]) * cast(U, K[D.kw, D.c, D.f]) @linalg_structured_op def conv_2d_nhwc_hwcf( @@ -177,6 +219,7 @@ O[D.n, D.oh, D.ow, D.f] += cast( U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c ]) * cast(U, K[D.kh, D.kw, D.c, D.f]) + @linalg_structured_op def conv_2d_nhwc_hwcf_q( I=TensorDef(T1, S.N, S.IH, S.IW, S.C), @@ -197,6 +240,61 @@ U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c ]) - cast(U, IZp)) * (cast(U, K[D.kh, D.kw, D.c, D.f]) - cast(U, KZp)) +@linalg_structured_op +def conv_3d_ndhwc_dhwcf( + I=TensorDef(T1, S.N, S.ID, S.IH, S.IW, S.C), + K=TensorDef(T2, S.KD, S.KH, S.KW, S.C, S.F), + O=TensorDef(U, S.N, S.OD, S.OH, S.OW, S.F, output=True), + strides=AttributeDef(S.SD, S.SH, S.SW), + dilations=AttributeDef(S.DD, S.DH, S.DW)): + """Performs 3-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + """ + domain(D.n, D.od, D.oh, D.ow, D.f, D.kd, D.kh, D.kw, D.c) + O[D.n, D.od, D.oh, D.ow, D.f] += cast( + U, I[D.n, D.od * S.SD + D.kd * S.DD, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c + ]) * cast(U, K[D.kd, D.kh, D.kw, D.c, D.f]) + +@linalg_structured_op +def depthwise_conv2D_nhw( + I=TensorDef(T1, S.N, S.IH, S.IW, S.IC), + K=TensorDef(T2, S.KH, S.KW, S.IC), + O=TensorDef(U, S.N, S.OH, S.OW, S.IC, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + """Performs depth-wise 2-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. Multiplier is set to 1 + which is a special case for most dpethwise convolutions. + """ + domain(D.n, D.oh, D.ow, D.ic, D.kh, D.kw) + O[D.n, D.oh, D.ow, D.ic] += cast( + U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, + D.ic]) * cast(U, K[D.kh, D.kw, D.ic]) + +@linalg_structured_op +def depthwise_conv2D_nhw_q( + I=TensorDef(T1, S.N, S.IH, S.IW, S.IC), + K=TensorDef(T2, S.KH, S.KW, S.IC), + IZp=ScalarDef(I32), + KZp=ScalarDef(I32), + O=TensorDef(U, S.N, S.OH, S.OW, S.IC, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + """Performs depth-wise 2-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + """ + domain(D.n, D.oh, D.ow, D.ic, D.kh, D.kw) + O[D.n, D.oh, D.ow, D.ic] += ( + (cast(U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, + D.ic]) - cast(U, IZp)) * + (cast(U, K[D.kh, D.kw, D.ic]) - cast(U, KZp))) + @linalg_structured_op def depthwise_conv2D_nhwc( I=TensorDef(T1, S.N, S.IH, S.IW, S.IC), diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir --- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir @@ -76,8 +76,8 @@ // ----- -func @depthwise_conv_2d_input_nhwc_filter_hwcf(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x3x4x2x3xf32>) { - linalg.depthwise_conv_2d_input_nhwc_filter_hwcf +func @depthwise_conv2D_nhwc(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x3x4x2x3xf32>) { + linalg.depthwise_conv2D_nhwc { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) outs(%output : memref<2x3x4x2x3xf32>) @@ -88,7 +88,7 @@ // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d5, d6, d3, d4)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4)> -// CHECK: func @depthwise_conv_2d_input_nhwc_filter_hwcf +// CHECK: func @depthwise_conv2D_nhwc // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] @@ -103,8 +103,8 @@ // ----- -func @depthwise_conv_2d_input_nhwc_filter_hwcf(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x2x3x2x3xf32>) { - linalg.depthwise_conv_2d_input_nhwc_filter_hwcf +func @depthwise_conv2D_nhwc(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x2x3x2x3xf32>) { + linalg.depthwise_conv2D_nhwc { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) outs(%output : memref<2x2x3x2x3xf32>) @@ -115,7 +115,7 @@ // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d5, d6, d3, d4)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4)> -// CHECK: func @depthwise_conv_2d_input_nhwc_filter_hwcf +// CHECK: func @depthwise_conv2D_nhwc // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] @@ -130,8 +130,8 @@ // ----- -func @depthwise_conv_2d_input_nhwc_filter_hwc(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { - linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} +func @depthwise_conv2D_nhw(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { + linalg.depthwise_conv2D_nhw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) outs(%output: memref<1x56x56x96xf32>) return @@ -141,7 +141,7 @@ // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d5, d3)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> -// CHECK: func @depthwise_conv_2d_input_nhwc_filter_hwc +// CHECK: func @depthwise_conv2D_nhw // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] @@ -156,8 +156,8 @@ // ----- -func @conv_1d_input_nwc_filter_wcf(%input: memref, %filter: memref, %output: memref) { - linalg.conv_1d_input_nwc_filter_wcf {dilations = dense<1> : tensor<1xi64>, +func @conv_1d_nwc_wcf(%input: memref, %filter: memref, %output: memref) { + linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: memref, memref) outs (%output: memref) @@ -167,11 +167,11 @@ // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d3, d4, d2)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)> -// CHECK: func @conv_1d_input_nwc_filter_wcf +// CHECK: func @conv_1d_nwc_wcf // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] -// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel"]} +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) // CHECK-SAME: outs(%{{.+}} : memref) diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -573,7 +573,7 @@ func @invalid_static_2d_conv(%input : memref<1x3x4x2xf32>, %filter: memref<3x2x2x1xf32>, %output: memref<1x2x3x1xf32>) { // expected-error @+1 {{inferred input/output operand #0 has shape's dimension #1 to be greater than or equal to 4, but found 3}} - linalg.conv_2d_input_nhwc_filter_hwcf + linalg.conv_2d_nhwc_hwcf { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %filter : memref<1x3x4x2xf32>, memref<3x2x2x1xf32>) outs(%output : memref<1x2x3x1xf32>) diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir --- a/mlir/test/Dialect/Linalg/named-ops.mlir +++ b/mlir/test/Dialect/Linalg/named-ops.mlir @@ -1,81 +1,81 @@ // RUN: mlir-opt -split-input-file -verify-diagnostics %s | FileCheck %s -// CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwcf_tensor -func @depthwise_conv_2d_input_nhwc_filter_hwcf_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> { +// CHECK-LABEL: func @depthwise_conv2D_nhwc_tensor +func @depthwise_conv2D_nhwc_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> { %zero = constant 0.000000e+00 : f32 %init = linalg.init_tensor [2, 3, 4, 2, 3] : tensor<2x3x4x2x3xf32> %fill = linalg.fill(%zero, %init) : f32, tensor<2x3x4x2x3xf32> -> tensor<2x3x4x2x3xf32> - // CHECK: %{{.+}} = linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + // CHECK: %{{.+}} = linalg.depthwise_conv2D_nhwc // CHECK-SAME: {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>) // CHECK-SAME: outs(%{{.+}} : tensor<2x3x4x2x3xf32>) - %0 = linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + %0 = linalg.depthwise_conv2D_nhwc { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>) outs(%fill : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32> return %0 : tensor<2x3x4x2x3xf32> } -// CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwcf_memref -func @depthwise_conv_2d_input_nhwc_filter_hwcf_memref(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x3x4x2x3xf32>) { - // CHECK: linalg.depthwise_conv_2d_input_nhwc_filter_hwcf +// CHECK-LABEL: func @depthwise_conv2D_nhwc_memref +func @depthwise_conv2D_nhwc_memref(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x3x4x2x3xf32>) { + // CHECK: linalg.depthwise_conv2D_nhwc // CHECK-SAME: {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) // CHECK-SAME: outs(%{{.+}} : memref<2x3x4x2x3xf32>) - linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + linalg.depthwise_conv2D_nhwc { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) outs(%output : memref<2x3x4x2x3xf32>) return } -// CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwc_tensor -func @depthwise_conv_2d_input_nhwc_filter_hwc_tensor(%input: tensor<1x113x113x96xf32>, %filter: tensor<3x3x96xf32>) -> tensor<1x56x56x96xf32> { +// CHECK-LABEL: func @depthwise_conv2D_nhw_tensor +func @depthwise_conv2D_nhw_tensor(%input: tensor<1x113x113x96xf32>, %filter: tensor<3x3x96xf32>) -> tensor<1x56x56x96xf32> { %init = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32> - // CHECK: %{{.+}} = linalg.depthwise_conv_2d_input_nhwc_filter_hwc + // CHECK: %{{.+}} = linalg.depthwise_conv2D_nhw // CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x113x113x96xf32>, tensor<3x3x96xf32>) // CHECK-SAME: outs(%{{.+}} : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32> - %0 = linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} + %0 = linalg.depthwise_conv2D_nhw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: tensor<1x113x113x96xf32>, tensor<3x3x96xf32>) outs(%init: tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32> return %0: tensor<1x56x56x96xf32> } -// CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwc_memref -func @depthwise_conv_2d_input_nhwc_filter_hwc_memref(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { - // CHECK: linalg.depthwise_conv_2d_input_nhwc_filter_hwc +// CHECK-LABEL: func @depthwise_conv2D_nhw_memref +func @depthwise_conv2D_nhw_memref(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { + // CHECK: linalg.depthwise_conv2D_nhw // CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x113x113x96xf32>, memref<3x3x96xf32>) // CHECK-SAME: outs(%{{.+}} : memref<1x56x56x96xf32>) - linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} + linalg.depthwise_conv2D_nhw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) outs(%output: memref<1x56x56x96xf32>) return } -func @depthwise_conv_2d_input_nhwc_filter_hwcf_tensor_dilated(%input: tensor<2x8x9x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x6x7x2x3xf32> { +func @depthwise_conv2D_nhwc_tensor_dilated(%input: tensor<2x8x9x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x6x7x2x3xf32> { %zero = constant 0.000000e+00 : f32 %init = linalg.init_tensor [2, 6, 7, 2, 3] : tensor<2x6x7x2x3xf32> %fill = linalg.fill(%zero, %init) : f32, tensor<2x6x7x2x3xf32> -> tensor<2x6x7x2x3xf32> - // CHECK: %{{.+}} = linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + // CHECK: %{{.+}} = linalg.depthwise_conv2D_nhwc // CHECK-SAME: {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>) // CHECK-SAME: outs(%{{.+}} : tensor<2x6x7x2x3xf32>) - %0 = linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + %0 = linalg.depthwise_conv2D_nhwc { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>) outs(%fill : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32> return %0 : tensor<2x6x7x2x3xf32> } -// CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwcf_memref_dilated -func @depthwise_conv_2d_input_nhwc_filter_hwcf_memref_dilated(%input: memref<2x8x9x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x6x7x2x3xf32>) { - // CHECK: linalg.depthwise_conv_2d_input_nhwc_filter_hwcf +// CHECK-LABEL: func @depthwise_conv2D_nhwc_memref_dilated +func @depthwise_conv2D_nhwc_memref_dilated(%input: memref<2x8x9x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x6x7x2x3xf32>) { + // CHECK: linalg.depthwise_conv2D_nhwc // CHECK-SAME: {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x8x9x2xf32>, memref<2x2x2x3xf32>) // CHECK-SAME: outs(%{{.+}} : memref<2x6x7x2x3xf32>) - linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + linalg.depthwise_conv2D_nhwc { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x8x9x2xf32>, memref<2x2x2x3xf32>) outs(%output : memref<2x6x7x2x3xf32>) @@ -86,7 +86,7 @@ func @depthwise_conv_2d_input_nhwc_filter_missing_stride(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { // expected-error @+1 {{missing indexing map required attribute 'strides'}} - linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : vector<2xi64>} + linalg.depthwise_conv2D_nhw {dilations = dense<1> : vector<2xi64>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) outs(%output: memref<1x56x56x96xf32>) return @@ -96,7 +96,7 @@ func @depthwise_conv_2d_input_nhwc_filter_missing_dilations(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { // expected-error @+1 {{missing indexing map required attribute 'dilations'}} - linalg.depthwise_conv_2d_input_nhwc_filter_hwc {strides = dense<1> : vector<2xi64>} + linalg.depthwise_conv2D_nhw {strides = dense<1> : vector<2xi64>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) outs(%output: memref<1x56x56x96xf32>) return @@ -106,7 +106,7 @@ func @depthwise_conv_2d_input_nhwc_filter_wrong_stride_element_type(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { // expected-error @+1 {{incorrect element type for indexing map required attribute 'strides'}} - linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2.0> : vector<2xf32>} + linalg.depthwise_conv2D_nhw {dilations = dense<1> : vector<2xi64>, strides = dense<2.0> : vector<2xf32>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) outs(%output: memref<1x56x56x96xf32>) return @@ -116,7 +116,7 @@ func @depthwise_conv_2d_input_nhwc_filter_wrong_stride_size(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { // expected-error @+1 {{incorrect shape for indexing map required attribute 'strides'}} - linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<3xi64> } + linalg.depthwise_conv2D_nhw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<3xi64> } ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) outs(%output: memref<1x56x56x96xf32>) return @@ -124,14 +124,14 @@ // ----- -// CHECK-LABEL: func @conv_1d_input_nwc_filter_wcf -func @conv_1d_input_nwc_filter_wcf(%input: tensor, %filter: tensor, %init: tensor) -> tensor { - // CHECK: %{{.+}} = linalg.conv_1d_input_nwc_filter_wcf +// CHECK-LABEL: func @conv_1d_nwc_wcf +func @conv_1d_nwc_wcf(%input: tensor, %filter: tensor, %init: tensor) -> tensor { + // CHECK: %{{.+}} = linalg.conv_1d_nwc_wcf // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor - %0 = linalg.conv_1d_input_nwc_filter_wcf {dilations = dense<1> : tensor<1xi64>, + %0 = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: tensor, tensor) outs (%init: tensor) -> tensor @@ -140,14 +140,14 @@ // ----- -// CHECK-LABEL: func @conv_1d_input_nwc_filter_wcf -func @conv_1d_input_nwc_filter_wcf(%input: memref, %filter: memref, %output: memref) { - // CHECK: linalg.conv_1d_input_nwc_filter_wcf +// CHECK-LABEL: func @conv_1d_nwc_wcf +func @conv_1d_nwc_wcf(%input: memref, %filter: memref, %output: memref) { + // CHECK: linalg.conv_1d_nwc_wcf // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) // CHECK-SAME: outs(%{{.+}} : memref) - linalg.conv_1d_input_nwc_filter_wcf {dilations = dense<1> : tensor<1xi64>, + linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: memref, memref) outs (%output: memref) @@ -156,14 +156,14 @@ // ----- -// CHECK-LABEL: func @conv_2d_input_nhwc_filter_hwcf -func @conv_2d_input_nhwc_filter_hwcf(%input: tensor, %filter: tensor, %init: tensor) -> tensor { - // CHECK: %{{.+}} = linalg.conv_2d_input_nhwc_filter_hwcf +// CHECK-LABEL: func @conv_2d_nhwc_hwcf +func @conv_2d_nhwc_hwcf(%input: tensor, %filter: tensor, %init: tensor) -> tensor { + // CHECK: %{{.+}} = linalg.conv_2d_nhwc_hwcf // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor - %0 = linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, + %0 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor) outs (%init: tensor) -> tensor @@ -172,14 +172,14 @@ // ----- -// CHECK-LABEL: func @conv_2d_input_nhwc_filter_hwcf -func @conv_2d_input_nhwc_filter_hwcf(%input: memref, %filter: memref, %output: memref) { - // CHECK: linalg.conv_2d_input_nhwc_filter_hwcf +// CHECK-LABEL: func @conv_2d_nhwc_hwcf +func @conv_2d_nhwc_hwcf(%input: memref, %filter: memref, %output: memref) { + // CHECK: linalg.conv_2d_nhwc_hwcf // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) // CHECK-SAME: outs(%{{.+}} : memref) - linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, + linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: memref, memref) outs (%output: memref) @@ -188,14 +188,14 @@ // ----- -// CHECK-LABEL: func @conv_3d_input_ndhwc_filter_dhwcf -func @conv_3d_input_ndhwc_filter_dhwcf(%input: tensor, %filter: tensor, %init: tensor) -> tensor { - // CHECK: %{{.+}} = linalg.conv_3d_input_ndhwc_filter_dhwcf +// CHECK-LABEL: func @conv_3d_ndhwc_dhwcf +func @conv_3d_ndhwc_dhwcf(%input: tensor, %filter: tensor, %init: tensor) -> tensor { + // CHECK: %{{.+}} = linalg.conv_3d_ndhwc_dhwcf // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor - %0 = linalg.conv_3d_input_ndhwc_filter_dhwcf {dilations = dense<1> : tensor<3xi64>, + %0 = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%input, %filter: tensor, tensor) outs (%init: tensor) -> tensor @@ -204,14 +204,14 @@ // ----- -// CHECK-LABEL: func @conv_3d_input_ndhwc_filter_dhwcf -func @conv_3d_input_ndhwc_filter_dhwcf(%input: memref, %filter: memref, %output: memref) { - // CHECK: linalg.conv_3d_input_ndhwc_filter_dhwcf +// CHECK-LABEL: func @conv_3d_ndhwc_dhwcf +func @conv_3d_ndhwc_dhwcf(%input: memref, %filter: memref, %output: memref) { + // CHECK: linalg.conv_3d_ndhwc_dhwcf // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) // CHECK-SAME: outs(%{{.+}} : memref) - linalg.conv_3d_input_ndhwc_filter_dhwcf {dilations = dense<1> : tensor<3xi64>, + linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%input, %filter: memref, memref) outs (%output: memref) diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -79,7 +79,7 @@ %init = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32> %fill = linalg.fill(%cst, %init) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32> - %conv = linalg.conv_2d_input_nhwc_filter_hwcf + %conv = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> @@ -133,7 +133,7 @@ // CHECK-NEXT: %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> // CHECK-NEXT: %[[ST_FILTER:.+]] = tensor.extract_slice %[[FILTER]][0, 0, 0, %[[IV2]]] [3, 3, 3, 4] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x4xf32> // CHECK-NEXT: %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> -// CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_input_nhwc_filter_hwcf +// CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_nhwc_hwcf // CHECK-SAME: ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<1x17x33x3xf32>, tensor<3x3x3x4xf32>) // CHECK-SAME: outs(%[[ST_FILL]] : tensor<1x8x16x4xf32>) // CHECK-NEXT: %[[ADD:.+]] = linalg.generic @@ -161,7 +161,7 @@ %init = linalg.init_tensor [%n, %oh, %ow, %oc] : tensor %fill = linalg.fill(%cst, %init) : f32, tensor -> tensor - %conv = linalg.conv_2d_input_nhwc_filter_hwcf + %conv = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor, tensor) outs(%fill : tensor) -> tensor @@ -271,7 +271,7 @@ // CHECK-NEXT: %[[SIZE_ELEM_OC_3:.+]] = affine.min #[[BOUND2_MAP_2]](%[[IV3]], %[[IV2]])[%[[FILL_C]], %[[ELEM_OC]]] // CHECK-NEXT: %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] // CHECK-SAME: [%[[SIZE_ELEM_N_2]], %[[SIZE_ELEM_OH_2]], %[[SIZE_ELEM_OW_2]], %[[SIZE_ELEM_OC_3]]] -// CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_input_nhwc_filter_hwcf +// CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_nhwc_hwcf // CHECK-SAME: ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor, tensor) // CHECK-SAME: outs(%[[ST_FILL]] : tensor) -> tensor // CHECK-NEXT: %[[ST_ADD:.+]] = linalg.generic diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir rename from mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir rename to mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir @@ -29,8 +29,8 @@ return %buf : memref } -func @conv_1d_input_nwc_filter_wcf(%arg0: memref, %arg1: memref, %arg2: memref) { - linalg.conv_1d_input_nwc_filter_wcf {dilations = dense<1> : tensor<1xi64>, +func @conv_1d_nwc_wcf(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%arg0, %arg1: memref, memref) outs (%arg2: memref) @@ -52,7 +52,7 @@ %out1D_nwc = call @alloc_3d_filled_f32(%c3, %c6, %c1, %zero) : (index, index, index, f32) -> (memref) memref.store %f10, %in1D_nwc[%c0, %c3, %c0] : memref - call @conv_1d_input_nwc_filter_wcf(%in1D_nwc, %filter1D_nwc, %out1D_nwc) : (memref, memref, memref) -> () + call @conv_1d_nwc_wcf(%in1D_nwc, %filter1D_nwc, %out1D_nwc) : (memref, memref, memref) -> () %out1D_nwc_ = memref.cast %out1D_nwc : memref to memref<*xf32> call @print_memref_f32(%out1D_nwc_): (memref<*xf32>) -> () diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir rename from mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir rename to mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir @@ -29,8 +29,8 @@ return %buf : memref } -func @conv_2d_input_nhwc_filter_hwcf(%arg0: memref, %arg1: memref, %arg2: memref) { - linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, +func @conv_2d_nhwc_hwcf(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%arg0, %arg1: memref, memref) outs (%arg2: memref) @@ -52,7 +52,7 @@ %out2D_nhwc = call @alloc_4d_filled_f32(%c3, %c6, %c6, %c1, %zero) : (index, index, index, index, f32) -> (memref) memref.store %f10, %in2D_nhwc[%c0, %c0, %c3, %c0] : memref - call @conv_2d_input_nhwc_filter_hwcf(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (memref, memref, memref) -> () + call @conv_2d_nhwc_hwcf(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (memref, memref, memref) -> () %out2D_nhwc_ = memref.cast %out2D_nhwc : memref to memref<*xf32> call @print_memref_f32(%out2D_nhwc_): (memref<*xf32>) -> () diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir rename from mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir rename to mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir @@ -29,8 +29,8 @@ return %buf : memref } -func @conv_3d_input_ndhwc_filter_dhwcf(%arg0: memref, %arg1: memref, %arg2: memref) { - linalg.conv_3d_input_ndhwc_filter_dhwcf {dilations = dense<1> : tensor<3xi64>, +func @conv_3d_ndhwc_dhwcf(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%arg0, %arg1: memref, memref) outs (%arg2: memref) @@ -53,7 +53,7 @@ %out3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c6, %c6, %c6, %c1, %zero) : (index, index, index, index, index, f32) -> (memref) memref.store %f10, %in3D_ndhwc[%c0, %c0, %c0, %c3, %c0] : memref - call @conv_3d_input_ndhwc_filter_dhwcf(%in3D_ndhwc, %filter3D_ndhwc, %out3D_ndhwc) : (memref, memref, memref) -> () + call @conv_3d_ndhwc_dhwcf(%in3D_ndhwc, %filter3D_ndhwc, %out3D_ndhwc) : (memref, memref, memref) -> () %out3D_ndhwc_ = memref.cast %out3D_ndhwc : memref to memref<*xf32> call @print_memref_f32(%out3D_ndhwc_): (memref<*xf32>) -> ()