diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp @@ -73,15 +73,16 @@ // Calculating the output width/height using the formula: // H = ((IH+pad_top+pad_bottom-(dilation_y*(KH-1)+1))/stride_y)+1 // W = ((IW+pad_left+pad_right-(dilation_x*(KW-1)+1))/stride_x)+1 -static mlir::Value -getConvOutputDim(Location loc, Value initDim, Attribute padBeforeAttr, - Attribute padAfterAttr, Value kernelDim, Attribute strideAttr, - Attribute dilationAttr, Type inputETy, OpBuilder &rewriter) { - ImplicitLocOpBuilder builder(loc, rewriter); - auto one = rewriter.create( - loc, IntegerAttr::get(initDim.getType(), 1)); +static mlir::Value getConvOrPoolOutputDim(Location loc, Value inputDim, + Attribute padBeforeAttr, + Attribute padAfterAttr, + Value kernelDim, Attribute strideAttr, + Attribute dilationAttr, Type inputETy, + ImplicitLocOpBuilder &builder) { + auto one = builder.create( + loc, IntegerAttr::get(inputDim.getType(), 1)); Value padBefore = reifyConstantDim(padBeforeAttr, builder); - Value paddedBefore = builder.create(initDim, padBefore); + Value paddedBefore = builder.create(inputDim, padBefore); Value padAfter = reifyConstantDim(padAfterAttr, builder); Value paddedAfter = builder.create(paddedBefore, padAfter); @@ -93,14 +94,36 @@ Value subtract = builder.create(paddedAfter, addOne); Value stride = reifyConstantDim(strideAttr, builder); Value divide = builder.create(subtract, stride); - return builder.create(divide, one); + return builder.create(divide, one); +} + +template +static Value getKernelDim(Location loc, T kernel, int64_t dim, + ImplicitLocOpBuilder &builder) { + return nullptr; +} + +template <> +// For convolution, the kernel is a value. +Value getKernelDim(Location loc, Value kernel, int64_t dim, + ImplicitLocOpBuilder &builder) { + return builder.create(loc, kernel, dim).getResult(); +} + +template <> +// For pooling, the kernel is an attribute. +Value getKernelDim(Location loc, ArrayAttr kernel, int64_t dim, + ImplicitLocOpBuilder &builder) { + Attribute kernelDimAttr = kernel.getValue()[dim - 1]; + return reifyConstantDim(kernelDimAttr, builder); } -// Creates a vector of the dynamic output dims for Conv2D and Depthwise_Conv2D -static SmallVector inferDynamicDimsForConv( - Location loc, Value input, Value weight, ShapedType resultTy, - ArrayAttr padAttr, ArrayAttr strideAttr, ArrayAttr dilationAttr, - int64_t weightHDim, int64_t weightWDim, OpBuilder &rewriter) { +// Creates a vector of the dynamic output dims convolution and pooling ops. +template +static SmallVector inferDynamicDimsForConvOrPool( + Location loc, Value input, T weight, ShapedType resultTy, ArrayAttr padAttr, + ArrayAttr strideAttr, ArrayAttr dilationAttr, int64_t weightHDim, + int64_t weightWDim, OpBuilder &rewriter) { ShapedType inputTy = input.getType().cast(); Type inputETy = inputTy.getElementType(); int64_t inputRank = inputTy.getRank(); @@ -114,30 +137,29 @@ dynDims[i] = rewriter.create(loc, input, i); } + ImplicitLocOpBuilder builder(loc, rewriter); // Dynamic input height if (inputTy.isDynamicDim(heightDim)) { - Value initHDim = - rewriter.create(loc, input, heightDim).getResult(); - Value kernelHDim = - rewriter.create(loc, weight, weightHDim).getResult(); + Value inputHDim = + builder.create(loc, input, heightDim).getResult(); + Value kernelHDim = getKernelDim(loc, weight, weightHDim, builder); // H = F(IH, pad_top, pad_bottom, dilation_y, KH, stride_y) - dynDims[heightDim] = getConvOutputDim( - loc, initHDim, padAttr.getValue()[0], padAttr.getValue()[1], kernelHDim, - strideAttr.getValue()[0], dilationAttr.getValue()[0], inputETy, - rewriter); + dynDims[heightDim] = getConvOrPoolOutputDim( + loc, inputHDim, padAttr.getValue()[0], padAttr.getValue()[1], + kernelHDim, strideAttr.getValue()[0], dilationAttr.getValue()[0], + inputETy, builder); } // Dynamic input weight if (inputTy.isDynamicDim(weightDim)) { - Value initWDim = - rewriter.create(loc, input, weightDim).getResult(); - Value kernelWDim = - rewriter.create(loc, weight, weightWDim).getResult(); + Value inputWDim = + builder.create(loc, input, weightDim).getResult(); + Value kernelWDim = getKernelDim(loc, weight, weightWDim, builder); // W = F(IW, pad_left, pad_right, dilation_x, KW, stride_x) - dynDims[weightDim] = getConvOutputDim( - loc, initWDim, padAttr.getValue()[2], padAttr.getValue()[3], kernelWDim, - strideAttr.getValue()[1], dilationAttr.getValue()[1], inputETy, - rewriter); + dynDims[weightDim] = getConvOrPoolOutputDim( + loc, inputWDim, padAttr.getValue()[2], padAttr.getValue()[3], + kernelWDim, strideAttr.getValue()[1], dilationAttr.getValue()[1], + inputETy, builder); } SmallVector filteredDims = condenseValues(dynDims); @@ -191,7 +213,7 @@ return rewriter.notifyMatchFailure( op, "tosa.conv ops does not support unsigned integer input"); - SmallVector filteredDims = inferDynamicDimsForConv( + SmallVector filteredDims = inferDynamicDimsForConvOrPool( loc, input, weight, resultTy, padAttr, strideTosaAttr, dilationTosaAttr, /*weightHDim=*/1, /*weightWDim=*/2, rewriter); @@ -356,7 +378,7 @@ op, "tosa.depthwise_conv ops require static shapes"); // Compute output dynamic dims - SmallVector filteredDims = inferDynamicDimsForConv( + SmallVector filteredDims = inferDynamicDimsForConvOrPool( loc, input, weight, resultTy, padAttr, strideTosaAttr, dilationTosaAttr, 0, 1, rewriter); @@ -692,11 +714,15 @@ ShapedType resultTy = op.getType().template cast(); Type resultETy = inputTy.getElementType(); - auto dynamicDimsOr = - checkHasDynamicBatchDims(rewriter, op, {input, op.getOutput()}); - if (!dynamicDimsOr.has_value()) - return failure(); - SmallVector dynamicDims = dynamicDimsOr.value(); + auto kernelAttr = op->getAttr("kernel").cast(); + auto padAttr = op->getAttr("pad").cast(); + auto strideTosaAttr = op->getAttr("stride").cast(); + ArrayAttr dilationTosaAttr = rewriter.getI64ArrayAttr({1, 1}); + + SmallVector filteredDims = inferDynamicDimsForConvOrPool( + loc, input, kernelAttr, resultTy, padAttr, strideTosaAttr, + dilationTosaAttr, + /*weightHDim=*/1, /*weightWDim=*/2, rewriter); // Determine what the initial value needs to be for the max pool op. Attribute initialAttr; @@ -733,7 +759,7 @@ // Create the linalg op that performs pooling. Value initTensor = rewriter.create( - loc, dynamicDims, resultTy.getShape(), resultTy.getElementType()); + loc, filteredDims, resultTy.getShape(), resultTy.getElementType()); Value filledInitTensor = rewriter @@ -769,11 +795,15 @@ inElementTy.isa() ? rewriter.getI32Type() : inElementTy; ShapedType accTy = resultTy.clone(accETy); - auto dynamicDimsOr = - checkHasDynamicBatchDims(rewriter, op, {input, op.getOutput()}); - if (!dynamicDimsOr.has_value()) - return failure(); - SmallVector dynamicDims = dynamicDimsOr.value(); + auto kernelAttr = op->getAttr("kernel").cast(); + auto padArrayAttr = op->getAttr("pad").cast(); + auto strideTosaAttr = op->getAttr("stride").cast(); + ArrayAttr dilationTosaAttr = rewriter.getI64ArrayAttr({1, 1}); + + SmallVector filteredDims = inferDynamicDimsForConvOrPool( + loc, input, kernelAttr, resultTy, padArrayAttr, strideTosaAttr, + dilationTosaAttr, + /*weightHDim=*/1, /*weightWDim=*/2, rewriter); // Apply padding as necessary. llvm::SmallVector pad; @@ -795,7 +825,7 @@ // Create the linalg op that performs pooling. Value poolInitTensor = rewriter.create( - loc, dynamicDims, accTy.getShape(), accETy); + loc, filteredDims, accTy.getShape(), accETy); Value filledInitTensor = rewriter @@ -820,7 +850,7 @@ auto affineMap = rewriter.getMultiDimIdentityMap(resultTy.getRank()); Value genericInitTensor = rewriter.create( - loc, dynamicDims, resultTy.getShape(), resultETy); + loc, filteredDims, resultTy.getShape(), resultETy); auto genericOp = rewriter.create( loc, ArrayRef({resultTy}), ValueRange{poolingOp}, diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir @@ -165,15 +165,21 @@ } // CHECK-LABEL: @max_pool_dyn -func.func @max_pool_dyn(%arg0: tensor) -> () { - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]] +func.func @max_pool_dyn(%arg0: tensor) -> () { + // CHECK: %[[C0:.+]] = arith.constant 0 : index + // CHECK: %[[DIM0:.+]] = tensor.dim %arg0, %[[C0]] : tensor + // CHECK: %[[C1:.+]] = arith.constant 1 : index + // CHECK: %[[DIM1:.+]] = tensor.dim %arg0, %[[C1]] : tensor + // CHECK: arith.constant 2 : index + // CHECK: %[[C2:.+]] = arith.constant 2 : index + // CHECK: %[[DIM2:.+]] = tensor.dim %arg0, %[[C2]] : tensor + // CHECK: %[[PAD:.+]] = tensor.pad %arg0 // CHECK: %[[CONST:.+]] = arith.constant -3.40282347E+38 - // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 4, 32, 62] - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CONST]]{{.*}}outs(%[[INIT]] + // CHECK: %[[INIT:.+]] = linalg.init_tensor + // CHECK: %[[FILL:.+]] = linalg.fill ins(%cst_18 : f32) outs(%20 : tensor) -> tensor // CHECK: %[[KERNEL:.+]] = linalg.init_tensor [3, 3] - // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %[[KERNEL]] : tensor, tensor<3x3xf32>) outs(%[[FILL]] : tensor) - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor) -> (tensor) + // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%[[PAD]], %[[KERNEL]] : tensor, tensor<3x3xf32>) outs(%[[FILL]] : tensor) -> tensor + %0 = "tosa.max_pool2d"(%arg0) {kernel = [3, 3], pad = [1, 1, 1, 1], stride = [2, 2]} : (tensor) -> (tensor) return } @@ -279,6 +285,25 @@ return %0 : tensor } +// CHECK-LABEL: @avg_pool_dyn_h +func.func @avg_pool_dyn_h(%arg0: tensor<2x?x34x62xf32>) -> (tensor<2x?x33x62xf32>) { + // CHECK: %[[C1:.+]] = arith.constant 1 + // CHECK: %[[DIM1:.+]] = tensor.dim %arg0, %[[C1]] + // CHECK: arith.addi + // CHECK: arith.addi + // CHECK: arith.addi + // CHECK: %[[RESULT:.+]] = arith.addi + // CHECK: %[[PAD:.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: %[[POOLINIT:.+]] = linalg.init_tensor [2, %[[RESULT]], 33, 62] + // CHECK: %[[FILL:.+]] = linalg.fill + // CHECK: %[[KERNEL:.+]] = linalg.init_tensor [4, 4] + // CHECK: %[[POOL:.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%[[PAD]], %[[KERNEL]] : tensor<2x?x36x62xf32>, tensor<4x4xf32>) outs(%[[FILL]] : tensor<2x?x33x62xf32>) + // CHECK: %[[INIT:.+]] = linalg.init_tensor [2, %[[RESULT]], 33, 62] + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[POOL]] : tensor<2x?x33x62xf32>) outs(%[[INIT]] : tensor<2x?x33x62xf32>) + %0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor<2x?x34x62xf32>) -> (tensor<2x?x33x62xf32>) + return %0 : tensor<2x?x33x62xf32> +} + // ----- // CHECK-LABEL: @avg_pool_i8 @@ -405,7 +430,7 @@ // CHECK: %[[SUBTRACTED:.+]] = arith.subi %[[ADD_PAD_1]], %[[ADD_ONE]] : index // CHECK: %[[STRIDE_H:.+]] = arith.constant 1 : index // CHECK: %[[DIVIDED:.+]] = arith.divui %[[SUBTRACTED]], %[[STRIDE_H]] : index - // CHECK: %[[H_OUT:.+]] = arith.subi %[[DIVIDED]], %[[ONE]] : index + // CHECK: %[[H_OUT:.+]] = arith.addi %[[DIVIDED]], %[[ONE]] : index // Computing output width // CHECK: %[[C2:.+]] = arith.constant 2 @@ -424,7 +449,7 @@ // CHECK: %[[SUBTRACTED_0:.+]] = arith.subi %[[ADD_PAD_3]], %[[ADD_ONE_0]] : index // CHECK: %[[STRIDE_W:.+]] = arith.constant 1 : index // CHECK: %[[DIVIDED_0:.+]] = arith.divui %[[SUBTRACTED_0]], %[[STRIDE_W]] : index - // CHECK: %[[W_OUT:.+]] = arith.subi %[[DIVIDED_0]], %[[ONE_0]] : index + // CHECK: %[[W_OUT:.+]] = arith.addi %[[DIVIDED_0]], %[[ONE_0]] : index // Running convolution // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]>