diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -628,10 +628,10 @@ scalar_arg: B --- !LinalgOpConfig metadata: !LinalgOpMetadata - name: conv_2d_nchw - cpp_class_name: Conv2DNchwOp + name: conv_1d + cpp_class_name: Conv1DOp doc: |- - Performs 2-D convolution. + Performs 1-D convolution with no channels. Numeric casting is performed on the operands to the inner multiply, promoting them to the same data type as the accumulator/output. @@ -641,46 +641,225 @@ name: I usage: InputOperand type_var: T1 - shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12] - -> (s0, s1, s2, s3)> + shape_map: affine_map<()[s0, s1, s2] -> (s0)> - !LinalgOperandDefConfig name: K usage: InputOperand type_var: T2 - shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12] - -> (s4, s1, s5, s6)> + shape_map: affine_map<()[s0, s1, s2] -> (s1)> - !LinalgOperandDefConfig name: O usage: OutputOperand type_var: U - shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12] - -> (s0, s4, s7, s8, s1)> + shape_map: affine_map<()[s0, s1, s2] -> (s2)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1)[s0, s1, s2] -> (d0 + d1)> + - affine_map<(d0, d1)[s0, s1, s2] -> (d1)> + - affine_map<(d0, d1)[s0, s1, s2] -> (d0)> + iterator_types: + - parallel + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: K +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: conv_2d + cpp_class_name: Conv2DOp + doc: |- + Performs 2-D convolution with no channels. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2, s3)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4, s5)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0 + d2, d1 + d3)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d2, d3)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1)> + iterator_types: + - parallel + - parallel + - reduction + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: K +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: conv_3d + cpp_class_name: Conv3DOp + doc: |- + Performs 3-D convolution with no channels. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8] -> (s0, s1, s2)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8] -> (s3, s4, s5)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8] -> (s6, s7, s8)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8] -> (d0 + + d3, d1 + d4, d2 + d5)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8] -> (d3, + d4, d5)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8] -> (d0, + d1, d2)> + iterator_types: + - parallel + - parallel + - parallel + - reduction + - reduction + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: K +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: conv_1d_nwc_wcf + cpp_class_name: Conv1DNwcWcfOp + doc: |- + Performs 1-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7] -> (s0, s1, s2)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7] -> (s3, s2, s4)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7] -> (s0, s5, s4)> - !LinalgOperandDefConfig name: strides usage: IndexAttribute type_var: I64 - attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, - s12] -> (s9, s10)> + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7] -> (s6)> - !LinalgOperandDefConfig name: dilations usage: IndexAttribute type_var: I64 - attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, - s12] -> (s11, s12)> + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7] -> (s7)> indexing_maps: !LinalgIndexingMapsConfig static_indexing_maps: - - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8, - s9, s10, s11, s12] -> (d0, d4, d2 * s9 + d5 * s11, d3 * s10 + d6 * s12)> - - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8, - s9, s10, s11, s12] -> (d1, d4, d5, d6)> - - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8, - s9, s10, s11, s12] -> (d0, d1, d2, d3)> + - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6, s7] -> (d0, d1 * + s6 + d3 * s7, d4)> + - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6, s7] -> (d3, d4, + d2)> + - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6, s7] -> (d0, d1, + d2)> iterator_types: - parallel - parallel - parallel - - parallel - - reduction - reduction - reduction assignments: @@ -906,6 +1085,286 @@ - !ScalarExpression scalar_arg: KZp --- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: conv_3d_ndhwc_dhwcf + cpp_class_name: Conv3DNdhwcDhwcfOp + doc: |- + Performs 3-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16, s17] -> (s0, s1, s2, s3, s4)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16, s17] -> (s5, s6, s7, s4, s8)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16, s17] -> (s0, s9, s10, s11, s8)> + - !LinalgOperandDefConfig + name: strides + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13, s14, s15, s16, s17] -> (s12, s13, s14)> + - !LinalgOperandDefConfig + name: dilations + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13, s14, s15, s16, s17] -> (s15, s16, s17)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6, + s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17] -> (d0, d1 * s12 + d5 * + s15, d2 * s13 + d6 * s16, d3 * s14 + d7 * s17, d8)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6, + s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17] -> (d5, d6, d7, d8, d4)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6, + s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17] -> (d0, d1, d2, d3, d4)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + - reduction + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: K +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: depthwise_conv2D_nhw + cpp_class_name: DepthwiseConv2DNhwOp + doc: |- + Performs depth-wise 2-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. Multiplier is set to 1 + which is a special case for most dpethwise convolutions. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s1, s2, s3)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s4, s5, s3)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s6, s7, s3)> + - !LinalgOperandDefConfig + name: strides + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s8, s9)> + - !LinalgOperandDefConfig + name: dilations + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s10, s11)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d0, d1 * s8 + d4 * s10, d2 * s9 + d5 * s11, d3)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d4, d5, d3)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d0, d1, d2, d3)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: K +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: depthwise_conv2D_nhw_q + cpp_class_name: DepthwiseConv2DNhwQOp + doc: |- + Performs depth-wise 2-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s1, s2, s3)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s4, s5, s3)> + - !LinalgOperandDefConfig + name: IZp + usage: InputOperand + type_var: I32 + - !LinalgOperandDefConfig + name: KZp + usage: InputOperand + type_var: I32 + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s6, s7, s3)> + - !LinalgOperandDefConfig + name: strides + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s8, s9)> + - !LinalgOperandDefConfig + name: dilations + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s10, s11)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d0, d1 * s8 + d4 * s10, d2 * s9 + d5 * s11, d3)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d4, d5, d3)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> ()> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> ()> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d0, d1, d2, d3)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + scalar_apply: + fn_name: sub + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: IZp + - !ScalarExpression + scalar_apply: + fn_name: sub + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: K + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: KZp +--- !LinalgOpConfig metadata: !LinalgOpMetadata name: depthwise_conv2D_nhwc cpp_class_name: DepthwiseConv2DNhwcOp diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc @@ -5,151 +5,3 @@ // C(m, n) += cast(A(m, k)) * cast(B(k, n)) C(m, n) = AddIOp(C(m, n), MulIOp(SignExtendIOp32(A(m, k)), SignExtendIOp32(B(k, n)))); } - -ods_def: -def conv_1d(I: f32(W), K: f32(KW)) -> (O: f32(W)) { - O(w) = AddFOp(O(w), MulFOp(I(w + kw), K(kw))); -} - -ods_def: -def conv_2d(I: f32(H, W), K: f32(KH, KW)) -> (O: f32(H, W)) { - O(h, w) = AddFOp(O(h, w), MulFOp(I(h + kh, w + kw), K(kh, kw))); -} - -ods_def: -def conv_3d(I: f32(D, H, W), K: f32(KD, KH, KW)) -> (O: f32(D, H, W)) { - O(d, h, w) = AddFOp( - O(d, h, w), MulFOp(I(d + kd, h + kh, w + kw), K(kd, kh, kw))); -} - -ods_def: -def depthwise_conv_2d_input_nhwc_filter_hwcf - (I: f32(N, IH, IW, CI), K: f32(KH, KW, CI, CO)) - -> (O: f32(N, OH, OW, CI, CO)) - attr(strides: 2xi64, dilations: 2xi64) -"""A general depth-wise 2-D convolution operation. - -This operation performs depth-wise 2-D convolution over an input `I` and filter -`F` and generates output `O` using the following computation: - -``` - O(n, oh, ow, ci, co) = AddFOp( - O(n, oh, ow, ci, co), - MulFOp(I(n, oh * strides[0] + kh * dilations[0], ow * strides[1] + kw * dilations[1], ci), - K(kh, kw, ci, co))); -``` - -where - -* `I` is a 4-D tensor with shape `(N, IH, IW, CI)`. -* `F` is a 4-D tensor with shape `(KH, KW, CI, CO)`. -* `O` is a 5-D tensor with shape `(N, OH, OW, CI, CO)`. -* `strides` is a 2-element vector attribute for window strides along the - height/width dimension. - -The indexing maps for these three tensors contain 7 dimensions, following the -order of (`N`, `OH`, `OW`, `CI`, `CO`, `KH`, `KW`). - -Note: this op only supports any channel multiplier, which is `CO`. To map back -to 4D result as DepthwiseConvInputNHWCFilterHWCOp, you will have to create a -Linalg reshape op which collapses `CI` and `CO` into one dimension. -""" -{ - O(n, oh, ow, ci, co) = AddFOp( - O(n, oh, ow, ci, co), - MulFOp(I(n, oh * strides[0] + kh * dilations[0], ow * strides[1] + kw * dilations[1], ci), - K(kh, kw, ci, co))); -} - -ods_def: -def depthwise_conv_2d_input_nhwc_filter_hwc - (I: f32(N, IH, IW, C), K: f32(KH, KW, C)) - -> (O: f32(N, OH, OW, C)) - attr(strides: 2xi64, dilations: 2xi64) -"""A depth-wise 2-D convolution operation. - -This operation performs depth-wise 2-D convolution over an input `I` and filter -`F` and generates output `O` using the following computation: - -``` -O(n, oh, ow, c) = AddFOp( - O(n, oh, ow, c), - MulFOp(I(n, oh * strides[0] + kh * dilations[0], ow * strides[1] + kw * dilations[1], c), - K(kh, kw, c))); -``` - -where - -* `I` is a 4-D tensor with shape `(N, IH, IW, C)`. -* `F` is a 3-D tensor with shape `(KH, KW, C)`. -* `O` is a 4-D tensor with shape `(N, OH, OW, C)`. -* `strides` is a 2-element vector attribute for window strides along the - height/width dimension. - -The indexing maps for these three tensors contain 6 dimensions, following the -order of (`N`, `OH`, `OW`, `C`, `KH`, `KW`). - -Note: this op only supports channel multiplier == 1. -""" -{ - O(n, oh, ow, c) = AddFOp( - O(n, oh, ow, c), - MulFOp(I(n, oh * strides[0] + kh * dilations[0], ow * strides[1] + kw * dilations[1], c), - K(kh, kw, c))); -} - -ods_def: -def conv_1d_input_nwc_filter_wcf(I: f32(N, W, C), K: f32(KW, C, F)) -> (O: f32(N, W, F)) - attr(strides: 1xi64, dilations: 1xi64) -""" A 1-D convolution given NWC layout input and WCF layout filter. - -Computes a 1-D convolution given 3-D input and filter. The data layout -of input is NWC and the data layout of filter is WCF. - -The indexing maps for these three tensors contain 5 dimensions, following the -order of (`N`, `W`, `F`, `KW`, `C`). -""" -{ - O(n, w, f) = AddFOp( - O(n, w, f), - MulFOp(I(n, w * strides[0] + kw * dilations[0], c), K(kw, c, f))); -} - -ods_def: -def conv_2d_input_nhwc_filter_hwcf(I: f32(N, H, W, C), K: f32(KH, KW, C, F)) -> (O: f32(N, H, W, F)) - attr(strides: 2xi64, dilations: 2xi64) -""" A 2-D convolution given NHWC layout input and HWCF layout filter. - -Computes a 2-D convolution given 4-D input and filter. The data layout -of input is NHWC and the data layout of filter is HWCF. - -The indexing maps for these three tensors contain 7 dimensions, following the -order of (`N`, `H`, `W`, `F`, `KH`, `KW`, `C`). -""" -{ - O(n, h, w, f) = AddFOp( - O(n, h, w, f), MulFOp(I(n, h * strides[0] + kh * dilations[0], - w * strides[1] + kw * dilations[1], c), - K(kh, kw, c, f))); -} - -ods_def: -def conv_3d_input_ndhwc_filter_dhwcf - (I: f32(N, D, H, W, C), K: f32(KD, KH, KW, C, F)) - -> (O: f32(N, D, H, W, F)) - attr(strides: 3xi64, dilations: 3xi64) -""" A 3-D convolution given NDHWC layout input and DHWCF layout filter. - -Computes a 3-D convolution given 5-D input and filter. The data layout -of input is NDHWC and the data layout of filter is DHWCF. - -The indexing maps for these three tensors contain 9 dimensions, following the -order of (`N`, `D`, `H`, `W`, `F`, `KD`, `KH`, `KW`, `C`). -""" -{ - O(n, d, h, w, f) = AddFOp( - O(n, d, h, w, f), MulFOp(I(n, d * strides[0] + kd * dilations[0], - h * strides[1] + kh * dilations[1], - w * strides[2] + kw * dilations[2], c), - K(kd, kh, kw, c, f))); -} diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1127,7 +1127,7 @@ return success(); } -using ConvOpConst = ConvOpVectorization; +using ConvOpConst = ConvOpVectorization; /// Inserts tiling, promotion and vectorization pattern for ConvOp /// conversion into corresponding pattern lists. @@ -1165,25 +1165,22 @@ RewritePatternSet tiling(context); RewritePatternSet promotion(context); RewritePatternSet vectorization(context); - populateVectorizationPatterns(tiling, promotion, vectorization, - tileSizes); - - populateVectorizationPatterns( - tiling, promotion, vectorization, tileSizes); + populateVectorizationPatterns(tiling, promotion, vectorization, + tileSizes); - populateVectorizationPatterns(tiling, promotion, vectorization, + populateVectorizationPatterns(tiling, promotion, vectorization, tileSizes); - populateVectorizationPatterns( - tiling, promotion, vectorization, tileSizes); + populateVectorizationPatterns(tiling, promotion, vectorization, + tileSizes); - populateVectorizationPatterns(tiling, promotion, - vectorization, tileSizes); + populateVectorizationPatterns(tiling, promotion, + vectorization, tileSizes); - populateVectorizationPatterns(tiling, promotion, vectorization, - tileSizes); + populateVectorizationPatterns(tiling, promotion, + vectorization, tileSizes); - populateVectorizationPatterns( + populateVectorizationPatterns( tiling, promotion, vectorization, tileSizes); patterns.push_back(std::move(tiling)); diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -145,21 +145,63 @@ C[None] += cast(U, A[D.m]) * cast(U, B[D.m]) @linalg_structured_op -def conv_2d_nchw( - I=TensorDef(T1, S.N, S.C, S.IH, S.IW), - K=TensorDef(T2, S.F, S.C, S.KH, S.KW), - O=TensorDef(U, S.N, S.F, S.OH, S.OW, S.C, output=True), - strides=AttributeDef(S.SH, S.SW), - dilations=AttributeDef(S.DH, S.DW)): - """Performs 2-D convolution. +def conv_1d( + I=TensorDef(T1, S.IW), + K=TensorDef(T2, S.KW), + O=TensorDef(U, S.OW, output=True)): + """Performs 1-D convolution with no channels. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + """ + domain(D.ow, D.kw) + O[D.ow] += cast( + U, I[D.ow + D.kw]) * cast(U, K[D.kw]) + +@linalg_structured_op +def conv_2d( + I=TensorDef(T1, S.IH, S.IW), + K=TensorDef(T2, S.KH, S.KW), + O=TensorDef(U, S.OH, S.OW, output=True)): + """Performs 2-D convolution with no channels. Numeric casting is performed on the operands to the inner multiply, promoting them to the same data type as the accumulator/output. """ - domain(D.n, D.f, D.oh, D.ow, D.c, D.kh, D.kw) - O[D.n, D.f, D.oh, D.ow] += cast( - U, I[D.n, D.c, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, - ]) * cast(U, K[D.f, D.c, D.kh, D.kw]) + domain(D.oh, D.ow, D.kh, D.kw) + O[D.oh, D.ow] += cast( + U, I[D.oh + D.kh, D.ow + D.kw]) * cast(U, K[D.kh, D.kw]) + +@linalg_structured_op +def conv_3d( + I=TensorDef(T1, S.ID, S.IH, S.IW), + K=TensorDef(T2, S.KD, S.KH, S.KW), + O=TensorDef(U, S.OD, S.OH, S.OW, output=True)): + """Performs 3-D convolution with no channels. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + """ + domain(D.od, D.oh, D.ow, D.kd, D.kh, D.kw) + O[D.od, D.oh, D.ow] += cast( + U, I[D.od + D.kd, D.oh + D.kh, D.ow + D.kw]) * cast(U, K[D.kd, D.kh, D.kw]) + +@linalg_structured_op +def conv_1d_nwc_wcf( + I=TensorDef(T1, S.N, S.IW, S.C), + K=TensorDef(T2, S.KW, S.C, S.F), + O=TensorDef(U, S.N, S.OW, S.F, output=True), + strides=AttributeDef(S.SW), + dilations=AttributeDef(S.DW)): + """Performs 1-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + """ + domain(D.n, D.ow, D.f, D.kw, D.c) + O[D.n, D.ow, D.f] += cast( + U, I[D.n, D.ow * S.SW + D.kw * S.DW, D.c + ]) * cast(U, K[D.kw, D.c, D.f]) @linalg_structured_op def conv_2d_nhwc_hwcf( @@ -177,6 +219,7 @@ O[D.n, D.oh, D.ow, D.f] += cast( U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c ]) * cast(U, K[D.kh, D.kw, D.c, D.f]) + @linalg_structured_op def conv_2d_nhwc_hwcf_q( I=TensorDef(T1, S.N, S.IH, S.IW, S.C), @@ -197,6 +240,61 @@ U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c ]) - cast(U, IZp)) * (cast(U, K[D.kh, D.kw, D.c, D.f]) - cast(U, KZp)) +@linalg_structured_op +def conv_3d_ndhwc_dhwcf( + I=TensorDef(T1, S.N, S.ID, S.IH, S.IW, S.C), + K=TensorDef(T2, S.KD, S.KH, S.KW, S.C, S.F), + O=TensorDef(U, S.N, S.OD, S.OH, S.OW, S.F, output=True), + strides=AttributeDef(S.SD, S.SH, S.SW), + dilations=AttributeDef(S.DD, S.DH, S.DW)): + """Performs 3-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + """ + domain(D.n, D.od, D.oh, D.ow, D.f, D.kd, D.kh, D.kw, D.c) + O[D.n, D.od, D.oh, D.ow, D.f] += cast( + U, I[D.n, D.od * S.SD + D.kd * S.DD, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c + ]) * cast(U, K[D.kd, D.kh, D.kw, D.c, D.f]) + +@linalg_structured_op +def depthwise_conv2D_nhw( + I=TensorDef(T1, S.N, S.IH, S.IW, S.IC), + K=TensorDef(T2, S.KH, S.KW, S.IC), + O=TensorDef(U, S.N, S.OH, S.OW, S.IC, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + """Performs depth-wise 2-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. Multiplier is set to 1 + which is a special case for most dpethwise convolutions. + """ + domain(D.n, D.oh, D.ow, D.ic, D.kh, D.kw) + O[D.n, D.oh, D.ow, D.ic] += cast( + U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, + D.ic]) * cast(U, K[D.kh, D.kw, D.ic]) + +@linalg_structured_op +def depthwise_conv2D_nhw_q( + I=TensorDef(T1, S.N, S.IH, S.IW, S.IC), + K=TensorDef(T2, S.KH, S.KW, S.IC), + IZp=ScalarDef(I32), + KZp=ScalarDef(I32), + O=TensorDef(U, S.N, S.OH, S.OW, S.IC, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + """Performs depth-wise 2-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + """ + domain(D.n, D.oh, D.ow, D.ic, D.kh, D.kw) + O[D.n, D.oh, D.ow, D.ic] += ( + (cast(U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, + D.ic]) - cast(U, IZp)) * + (cast(U, K[D.kh, D.kw, D.ic]) - cast(U, KZp))) + @linalg_structured_op def depthwise_conv2D_nhwc( I=TensorDef(T1, S.N, S.IH, S.IW, S.IC), diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir --- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir @@ -76,8 +76,8 @@ // ----- -func @depthwise_conv_2d_input_nhwc_filter_hwcf(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x3x4x2x3xf32>) { - linalg.depthwise_conv_2d_input_nhwc_filter_hwcf +func @depthwise_conv2D_nhwc(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x3x4x2x3xf32>) { + linalg.depthwise_conv2D_nhwc { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) outs(%output : memref<2x3x4x2x3xf32>) @@ -88,7 +88,7 @@ // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d5, d6, d3, d4)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4)> -// CHECK: func @depthwise_conv_2d_input_nhwc_filter_hwcf +// CHECK: func @depthwise_conv2D_nhwc // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] @@ -103,8 +103,8 @@ // ----- -func @depthwise_conv_2d_input_nhwc_filter_hwcf(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x2x3x2x3xf32>) { - linalg.depthwise_conv_2d_input_nhwc_filter_hwcf +func @depthwise_conv2D_nhwc(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x2x3x2x3xf32>) { + linalg.depthwise_conv2D_nhwc { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) outs(%output : memref<2x2x3x2x3xf32>) @@ -115,7 +115,7 @@ // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d5, d6, d3, d4)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4)> -// CHECK: func @depthwise_conv_2d_input_nhwc_filter_hwcf +// CHECK: func @depthwise_conv2D_nhwc // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] @@ -130,8 +130,8 @@ // ----- -func @depthwise_conv_2d_input_nhwc_filter_hwc(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { - linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} +func @depthwise_conv2D_nhw(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { + linalg.depthwise_conv2D_nhw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) outs(%output: memref<1x56x56x96xf32>) return @@ -141,7 +141,7 @@ // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d5, d3)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> -// CHECK: func @depthwise_conv_2d_input_nhwc_filter_hwc +// CHECK: func @depthwise_conv2D_nhw // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] @@ -156,8 +156,8 @@ // ----- -func @conv_1d_input_nwc_filter_wcf(%input: memref, %filter: memref, %output: memref) { - linalg.conv_1d_input_nwc_filter_wcf {dilations = dense<1> : tensor<1xi64>, +func @conv_1d_nwc_wcf(%input: memref, %filter: memref, %output: memref) { + linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: memref, memref) outs (%output: memref) @@ -167,11 +167,11 @@ // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d3, d4, d2)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)> -// CHECK: func @conv_1d_input_nwc_filter_wcf +// CHECK: func @conv_1d_nwc_wcf // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] -// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel"]} +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) // CHECK-SAME: outs(%{{.+}} : memref) diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -573,7 +573,7 @@ func @invalid_static_2d_conv(%input : memref<1x3x4x2xf32>, %filter: memref<3x2x2x1xf32>, %output: memref<1x2x3x1xf32>) { // expected-error @+1 {{inferred input/output operand #0 has shape's dimension #1 to be greater than or equal to 4, but found 3}} - linalg.conv_2d_input_nhwc_filter_hwcf + linalg.conv_2d_nhwc_hwcf { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %filter : memref<1x3x4x2xf32>, memref<3x2x2x1xf32>) outs(%output : memref<1x2x3x1xf32>) diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir --- a/mlir/test/Dialect/Linalg/named-ops.mlir +++ b/mlir/test/Dialect/Linalg/named-ops.mlir @@ -1,81 +1,81 @@ // RUN: mlir-opt -split-input-file -verify-diagnostics %s | FileCheck %s -// CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwcf_tensor -func @depthwise_conv_2d_input_nhwc_filter_hwcf_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> { +// CHECK-LABEL: func @depthwise_conv2D_nhwc_tensor +func @depthwise_conv2D_nhwc_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> { %zero = constant 0.000000e+00 : f32 %init = linalg.init_tensor [2, 3, 4, 2, 3] : tensor<2x3x4x2x3xf32> %fill = linalg.fill(%zero, %init) : f32, tensor<2x3x4x2x3xf32> -> tensor<2x3x4x2x3xf32> - // CHECK: %{{.+}} = linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + // CHECK: %{{.+}} = linalg.depthwise_conv2D_nhwc // CHECK-SAME: {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>) // CHECK-SAME: outs(%{{.+}} : tensor<2x3x4x2x3xf32>) - %0 = linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + %0 = linalg.depthwise_conv2D_nhwc { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>) outs(%fill : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32> return %0 : tensor<2x3x4x2x3xf32> } -// CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwcf_memref -func @depthwise_conv_2d_input_nhwc_filter_hwcf_memref(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x3x4x2x3xf32>) { - // CHECK: linalg.depthwise_conv_2d_input_nhwc_filter_hwcf +// CHECK-LABEL: func @depthwise_conv2D_nhwc_memref +func @depthwise_conv2D_nhwc_memref(%input: memref<2x4x5x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x3x4x2x3xf32>) { + // CHECK: linalg.depthwise_conv2D_nhwc // CHECK-SAME: {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) // CHECK-SAME: outs(%{{.+}} : memref<2x3x4x2x3xf32>) - linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + linalg.depthwise_conv2D_nhwc { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) outs(%output : memref<2x3x4x2x3xf32>) return } -// CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwc_tensor -func @depthwise_conv_2d_input_nhwc_filter_hwc_tensor(%input: tensor<1x113x113x96xf32>, %filter: tensor<3x3x96xf32>) -> tensor<1x56x56x96xf32> { +// CHECK-LABEL: func @depthwise_conv2D_nhw_tensor +func @depthwise_conv2D_nhw_tensor(%input: tensor<1x113x113x96xf32>, %filter: tensor<3x3x96xf32>) -> tensor<1x56x56x96xf32> { %init = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32> - // CHECK: %{{.+}} = linalg.depthwise_conv_2d_input_nhwc_filter_hwc + // CHECK: %{{.+}} = linalg.depthwise_conv2D_nhw // CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x113x113x96xf32>, tensor<3x3x96xf32>) // CHECK-SAME: outs(%{{.+}} : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32> - %0 = linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} + %0 = linalg.depthwise_conv2D_nhw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: tensor<1x113x113x96xf32>, tensor<3x3x96xf32>) outs(%init: tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32> return %0: tensor<1x56x56x96xf32> } -// CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwc_memref -func @depthwise_conv_2d_input_nhwc_filter_hwc_memref(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { - // CHECK: linalg.depthwise_conv_2d_input_nhwc_filter_hwc +// CHECK-LABEL: func @depthwise_conv2D_nhw_memref +func @depthwise_conv2D_nhw_memref(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { + // CHECK: linalg.depthwise_conv2D_nhw // CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x113x113x96xf32>, memref<3x3x96xf32>) // CHECK-SAME: outs(%{{.+}} : memref<1x56x56x96xf32>) - linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} + linalg.depthwise_conv2D_nhw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) outs(%output: memref<1x56x56x96xf32>) return } -func @depthwise_conv_2d_input_nhwc_filter_hwcf_tensor_dilated(%input: tensor<2x8x9x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x6x7x2x3xf32> { +func @depthwise_conv2D_nhwc_tensor_dilated(%input: tensor<2x8x9x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x6x7x2x3xf32> { %zero = constant 0.000000e+00 : f32 %init = linalg.init_tensor [2, 6, 7, 2, 3] : tensor<2x6x7x2x3xf32> %fill = linalg.fill(%zero, %init) : f32, tensor<2x6x7x2x3xf32> -> tensor<2x6x7x2x3xf32> - // CHECK: %{{.+}} = linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + // CHECK: %{{.+}} = linalg.depthwise_conv2D_nhwc // CHECK-SAME: {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>) // CHECK-SAME: outs(%{{.+}} : tensor<2x6x7x2x3xf32>) - %0 = linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + %0 = linalg.depthwise_conv2D_nhwc { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>) outs(%fill : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32> return %0 : tensor<2x6x7x2x3xf32> } -// CHECK-LABEL: func @depthwise_conv_2d_input_nhwc_filter_hwcf_memref_dilated -func @depthwise_conv_2d_input_nhwc_filter_hwcf_memref_dilated(%input: memref<2x8x9x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x6x7x2x3xf32>) { - // CHECK: linalg.depthwise_conv_2d_input_nhwc_filter_hwcf +// CHECK-LABEL: func @depthwise_conv2D_nhwc_memref_dilated +func @depthwise_conv2D_nhwc_memref_dilated(%input: memref<2x8x9x2xf32>, %filter: memref<2x2x2x3xf32>, %output: memref<2x6x7x2x3xf32>) { + // CHECK: linalg.depthwise_conv2D_nhwc // CHECK-SAME: {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x8x9x2xf32>, memref<2x2x2x3xf32>) // CHECK-SAME: outs(%{{.+}} : memref<2x6x7x2x3xf32>) - linalg.depthwise_conv_2d_input_nhwc_filter_hwcf + linalg.depthwise_conv2D_nhwc { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x8x9x2xf32>, memref<2x2x2x3xf32>) outs(%output : memref<2x6x7x2x3xf32>) @@ -86,7 +86,7 @@ func @depthwise_conv_2d_input_nhwc_filter_missing_stride(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { // expected-error @+1 {{missing indexing map required attribute 'strides'}} - linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : vector<2xi64>} + linalg.depthwise_conv2D_nhw {dilations = dense<1> : vector<2xi64>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) outs(%output: memref<1x56x56x96xf32>) return @@ -96,7 +96,7 @@ func @depthwise_conv_2d_input_nhwc_filter_missing_dilations(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { // expected-error @+1 {{missing indexing map required attribute 'dilations'}} - linalg.depthwise_conv_2d_input_nhwc_filter_hwc {strides = dense<1> : vector<2xi64>} + linalg.depthwise_conv2D_nhw {strides = dense<1> : vector<2xi64>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) outs(%output: memref<1x56x56x96xf32>) return @@ -106,7 +106,7 @@ func @depthwise_conv_2d_input_nhwc_filter_wrong_stride_element_type(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { // expected-error @+1 {{incorrect element type for indexing map required attribute 'strides'}} - linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2.0> : vector<2xf32>} + linalg.depthwise_conv2D_nhw {dilations = dense<1> : vector<2xi64>, strides = dense<2.0> : vector<2xf32>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) outs(%output: memref<1x56x56x96xf32>) return @@ -116,7 +116,7 @@ func @depthwise_conv_2d_input_nhwc_filter_wrong_stride_size(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { // expected-error @+1 {{incorrect shape for indexing map required attribute 'strides'}} - linalg.depthwise_conv_2d_input_nhwc_filter_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<3xi64> } + linalg.depthwise_conv2D_nhw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<3xi64> } ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) outs(%output: memref<1x56x56x96xf32>) return @@ -124,14 +124,14 @@ // ----- -// CHECK-LABEL: func @conv_1d_input_nwc_filter_wcf -func @conv_1d_input_nwc_filter_wcf(%input: tensor, %filter: tensor, %init: tensor) -> tensor { - // CHECK: %{{.+}} = linalg.conv_1d_input_nwc_filter_wcf +// CHECK-LABEL: func @conv_1d_nwc_wcf +func @conv_1d_nwc_wcf(%input: tensor, %filter: tensor, %init: tensor) -> tensor { + // CHECK: %{{.+}} = linalg.conv_1d_nwc_wcf // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor - %0 = linalg.conv_1d_input_nwc_filter_wcf {dilations = dense<1> : tensor<1xi64>, + %0 = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: tensor, tensor) outs (%init: tensor) -> tensor @@ -140,14 +140,14 @@ // ----- -// CHECK-LABEL: func @conv_1d_input_nwc_filter_wcf -func @conv_1d_input_nwc_filter_wcf(%input: memref, %filter: memref, %output: memref) { - // CHECK: linalg.conv_1d_input_nwc_filter_wcf +// CHECK-LABEL: func @conv_1d_nwc_wcf +func @conv_1d_nwc_wcf(%input: memref, %filter: memref, %output: memref) { + // CHECK: linalg.conv_1d_nwc_wcf // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) // CHECK-SAME: outs(%{{.+}} : memref) - linalg.conv_1d_input_nwc_filter_wcf {dilations = dense<1> : tensor<1xi64>, + linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: memref, memref) outs (%output: memref) @@ -156,14 +156,14 @@ // ----- -// CHECK-LABEL: func @conv_2d_input_nhwc_filter_hwcf -func @conv_2d_input_nhwc_filter_hwcf(%input: tensor, %filter: tensor, %init: tensor) -> tensor { - // CHECK: %{{.+}} = linalg.conv_2d_input_nhwc_filter_hwcf +// CHECK-LABEL: func @conv_2d_nhwc_hwcf +func @conv_2d_nhwc_hwcf(%input: tensor, %filter: tensor, %init: tensor) -> tensor { + // CHECK: %{{.+}} = linalg.conv_2d_nhwc_hwcf // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor - %0 = linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, + %0 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor) outs (%init: tensor) -> tensor @@ -172,14 +172,14 @@ // ----- -// CHECK-LABEL: func @conv_2d_input_nhwc_filter_hwcf -func @conv_2d_input_nhwc_filter_hwcf(%input: memref, %filter: memref, %output: memref) { - // CHECK: linalg.conv_2d_input_nhwc_filter_hwcf +// CHECK-LABEL: func @conv_2d_nhwc_hwcf +func @conv_2d_nhwc_hwcf(%input: memref, %filter: memref, %output: memref) { + // CHECK: linalg.conv_2d_nhwc_hwcf // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) // CHECK-SAME: outs(%{{.+}} : memref) - linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, + linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: memref, memref) outs (%output: memref) @@ -188,14 +188,14 @@ // ----- -// CHECK-LABEL: func @conv_3d_input_ndhwc_filter_dhwcf -func @conv_3d_input_ndhwc_filter_dhwcf(%input: tensor, %filter: tensor, %init: tensor) -> tensor { - // CHECK: %{{.+}} = linalg.conv_3d_input_ndhwc_filter_dhwcf +// CHECK-LABEL: func @conv_3d_ndhwc_dhwcf +func @conv_3d_ndhwc_dhwcf(%input: tensor, %filter: tensor, %init: tensor) -> tensor { + // CHECK: %{{.+}} = linalg.conv_3d_ndhwc_dhwcf // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor - %0 = linalg.conv_3d_input_ndhwc_filter_dhwcf {dilations = dense<1> : tensor<3xi64>, + %0 = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%input, %filter: tensor, tensor) outs (%init: tensor) -> tensor @@ -204,14 +204,14 @@ // ----- -// CHECK-LABEL: func @conv_3d_input_ndhwc_filter_dhwcf -func @conv_3d_input_ndhwc_filter_dhwcf(%input: memref, %filter: memref, %output: memref) { - // CHECK: linalg.conv_3d_input_ndhwc_filter_dhwcf +// CHECK-LABEL: func @conv_3d_ndhwc_dhwcf +func @conv_3d_ndhwc_dhwcf(%input: memref, %filter: memref, %output: memref) { + // CHECK: linalg.conv_3d_ndhwc_dhwcf // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) // CHECK-SAME: outs(%{{.+}} : memref) - linalg.conv_3d_input_ndhwc_filter_dhwcf {dilations = dense<1> : tensor<3xi64>, + linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%input, %filter: memref, memref) outs (%output: memref) diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -79,7 +79,7 @@ %init = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32> %fill = linalg.fill(%cst, %init) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32> - %conv = linalg.conv_2d_input_nhwc_filter_hwcf + %conv = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> @@ -133,7 +133,7 @@ // CHECK-NEXT: %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> // CHECK-NEXT: %[[ST_FILTER:.+]] = tensor.extract_slice %[[FILTER]][0, 0, 0, %[[IV2]]] [3, 3, 3, 4] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x4xf32> // CHECK-NEXT: %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> -// CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_input_nhwc_filter_hwcf +// CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_nhwc_hwcf // CHECK-SAME: ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<1x17x33x3xf32>, tensor<3x3x3x4xf32>) // CHECK-SAME: outs(%[[ST_FILL]] : tensor<1x8x16x4xf32>) // CHECK-NEXT: %[[ADD:.+]] = linalg.generic @@ -161,7 +161,7 @@ %init = linalg.init_tensor [%n, %oh, %ow, %oc] : tensor %fill = linalg.fill(%cst, %init) : f32, tensor -> tensor - %conv = linalg.conv_2d_input_nhwc_filter_hwcf + %conv = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor, tensor) outs(%fill : tensor) -> tensor @@ -271,7 +271,7 @@ // CHECK-NEXT: %[[SIZE_ELEM_OC_3:.+]] = affine.min #[[BOUND2_MAP_2]](%[[IV3]], %[[IV2]])[%[[FILL_C]], %[[ELEM_OC]]] // CHECK-NEXT: %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] // CHECK-SAME: [%[[SIZE_ELEM_N_2]], %[[SIZE_ELEM_OH_2]], %[[SIZE_ELEM_OW_2]], %[[SIZE_ELEM_OC_3]]] -// CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_input_nhwc_filter_hwcf +// CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_nhwc_hwcf // CHECK-SAME: ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor, tensor) // CHECK-SAME: outs(%[[ST_FILL]] : tensor) -> tensor // CHECK-NEXT: %[[ST_ADD:.+]] = linalg.generic diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir rename from mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir rename to mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir @@ -29,8 +29,8 @@ return %buf : memref } -func @conv_1d_input_nwc_filter_wcf(%arg0: memref, %arg1: memref, %arg2: memref) { - linalg.conv_1d_input_nwc_filter_wcf {dilations = dense<1> : tensor<1xi64>, +func @conv_1d_nwc_wcf(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%arg0, %arg1: memref, memref) outs (%arg2: memref) @@ -52,7 +52,7 @@ %out1D_nwc = call @alloc_3d_filled_f32(%c3, %c6, %c1, %zero) : (index, index, index, f32) -> (memref) memref.store %f10, %in1D_nwc[%c0, %c3, %c0] : memref - call @conv_1d_input_nwc_filter_wcf(%in1D_nwc, %filter1D_nwc, %out1D_nwc) : (memref, memref, memref) -> () + call @conv_1d_nwc_wcf(%in1D_nwc, %filter1D_nwc, %out1D_nwc) : (memref, memref, memref) -> () %out1D_nwc_ = memref.cast %out1D_nwc : memref to memref<*xf32> call @print_memref_f32(%out1D_nwc_): (memref<*xf32>) -> () diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir rename from mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir rename to mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir @@ -29,8 +29,8 @@ return %buf : memref } -func @conv_2d_input_nhwc_filter_hwcf(%arg0: memref, %arg1: memref, %arg2: memref) { - linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, +func @conv_2d_nhwc_hwcf(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%arg0, %arg1: memref, memref) outs (%arg2: memref) @@ -52,7 +52,7 @@ %out2D_nhwc = call @alloc_4d_filled_f32(%c3, %c6, %c6, %c1, %zero) : (index, index, index, index, f32) -> (memref) memref.store %f10, %in2D_nhwc[%c0, %c0, %c3, %c0] : memref - call @conv_2d_input_nhwc_filter_hwcf(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (memref, memref, memref) -> () + call @conv_2d_nhwc_hwcf(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (memref, memref, memref) -> () %out2D_nhwc_ = memref.cast %out2D_nhwc : memref to memref<*xf32> call @print_memref_f32(%out2D_nhwc_): (memref<*xf32>) -> () diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir rename from mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir rename to mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir @@ -29,8 +29,8 @@ return %buf : memref } -func @conv_3d_input_ndhwc_filter_dhwcf(%arg0: memref, %arg1: memref, %arg2: memref) { - linalg.conv_3d_input_ndhwc_filter_dhwcf {dilations = dense<1> : tensor<3xi64>, +func @conv_3d_ndhwc_dhwcf(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%arg0, %arg1: memref, memref) outs (%arg2: memref) @@ -53,7 +53,7 @@ %out3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c6, %c6, %c6, %c1, %zero) : (index, index, index, index, index, f32) -> (memref) memref.store %f10, %in3D_ndhwc[%c0, %c0, %c0, %c3, %c0] : memref - call @conv_3d_input_ndhwc_filter_dhwcf(%in3D_ndhwc, %filter3D_ndhwc, %out3D_ndhwc) : (memref, memref, memref) -> () + call @conv_3d_ndhwc_dhwcf(%in3D_ndhwc, %filter3D_ndhwc, %out3D_ndhwc) : (memref, memref, memref) -> () %out3D_ndhwc_ = memref.cast %out3D_ndhwc : memref to memref<*xf32> call @print_memref_f32(%out3D_ndhwc_): (memref<*xf32>) -> ()