diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -222,22 +222,24 @@ nPar > 0 ? O(ivs) = fillOp.value() : O() = fillOp.value(); } +// Create a padded view into the given `input` tensor using the 'indices' +// to access the tensor. `skipPadding` lists the dimensions for which no padding +// is needed e.g. the non-spatial dimensions for convolutions. template -static Value getConvOpInput(ConvOp convOp, StdIndexedValue im, - MutableArrayRef imIdx) { +Value getPaddedInput(Value input, ArrayRef indices, + ArrayRef skipPadding, Value padValue) { // TODO: add a level of indirection to linalg.generic. - if (!convOp.padding()) - return im(imIdx); + + IndexedValueType indexedInput(input); auto *context = ScopedContext::getContext(); Value zeroIndex = std_constant_index(0); SmallVector conds; SmallVector clampedImIdx; - for (auto iter : llvm::enumerate(imIdx)) { + for (auto iter : llvm::enumerate(indices)) { int idx = iter.index(); auto dim = iter.value(); - // Only need to iterate over the window dimensions. - if (idx == 0 || idx == static_cast(imIdx.size()) - 1) { + if (is_contained(skipPadding, idx)) { clampedImIdx.push_back(dim); continue; } @@ -250,7 +252,7 @@ conds.push_back(leftOutOfBound); else conds.push_back(conds.back() || leftOutOfBound); - Value rightBound = std_dim(convOp.input(), idx); + Value rightBound = std_dim(input, idx); conds.push_back(conds.back() || (sge(dim, rightBound))); // When padding is involved, the indices will only be shifted to negative, @@ -262,14 +264,73 @@ clampedImIdx.push_back(affine_max(dim.getType(), maxMap, ValueRange{dim})); } - auto &b = ScopedContext::getBuilderRef(); - Type type = convOp.input().getType().cast().getElementType(); - Value zero = std_constant(type, b.getZeroAttr(type)); - Value readInput = im(clampedImIdx); + Value readInput = indexedInput(clampedImIdx); return conds.empty() ? readInput - : (Value)std_select(conds.back(), zero, readInput); + : (Value)std_select(conds.back(), padValue, readInput); +} + +namespace { + +/// The padding value for a given Op depends on the semantics of the Op. +/// The identity value for ConvOp and PoolingSumOp is 0, for PoolingMaxOp is +/// -inf or minInt and for PoolingMinOp is inf or maxInt. +template +Attribute getPadValueAttr(Type type) { + llvm_unreachable("Unexpected op type for getPadValueAttr"); + return {}; +} + +template <> +Attribute getPadValueAttr(Type type) { + auto &b = ScopedContext::getBuilderRef(); + if (auto floatType = type.dyn_cast()) { + return b.getFloatAttr( + floatType, + APFloat::getInf(floatType.getFloatSemantics(), /*Negative*/ true)); + } + if (auto intType = type.dyn_cast()) { + unsigned width = intType.getWidth(); + // The select instruction used to lower the PoolingMin uses a signed + // comparison, use a signed constant irrespective of the signedness of the + // integer type. + return b.getIntegerAttr(intType, APInt::getSignedMinValue(width)); + } + llvm_unreachable("Unsupported data type for PoolingMaxOp"); + return {}; } +template <> +Attribute getPadValueAttr(Type type) { + auto &b = ScopedContext::getBuilderRef(); + if (auto floatType = type.dyn_cast()) { + return b.getFloatAttr(floatType, + APFloat::getInf(floatType.getFloatSemantics())); + } + if (auto intType = type.dyn_cast()) { + unsigned width = intType.getWidth(); + // The select instruction used to lower the PoolingMin uses a signed + // comparison, use a signed constant irrespective of the signedness of the + // integer type. + return b.getIntegerAttr(intType, APInt::getSignedMaxValue(width)); + } + llvm_unreachable("Unsupported data type for PoolingMinOp"); + return {}; +} + +template <> +Attribute getPadValueAttr(Type type) { + auto &b = ScopedContext::getBuilderRef(); + return b.getZeroAttr(type); +} + +template <> +Attribute getPadValueAttr(Type type) { + auto &b = ScopedContext::getBuilderRef(); + return b.getZeroAttr(type); +} + +} // namespace + /// Returns true is `convOp` has a non-zero padding. static bool hasPadding(ConvOp convOp) { for (unsigned i = 0, e = convOp.getNumSpatialDimensions(); i < e; ++i) { @@ -301,8 +362,12 @@ // which is not allowed by affine.load. Override to use an StdIndexedValue // when there is non-zero padding. if (hasPadding(convOp)) { - StdIndexedValue I(convOp.input()); - Value paddedInput = getConvOpInput(convOp, I, imIdx); + Type type = convOp.input().getType().cast().getElementType(); + Value padValue = std_constant(type, getPadValueAttr(type)); + Value paddedInput = getPaddedInput( + convOp.input(), imIdx, + /* Only need to pad the window dimensions */ + {0, static_cast(imIdx.size()) - 1}, padValue); O(oIdx) += F(fIdx) * paddedInput; } else { IndexedValueType I(convOp.input()); @@ -310,15 +375,36 @@ } } +template +static bool hasPadding(PoolingOp poolingOp) { + for (unsigned i = 0, e = poolingOp.getNumWindowLoops(); i < e; ++i) { + if (poolingOp.getLowPad(i) > 0 || poolingOp.getHighPad(i) > 0) + return true; + } + return false; +} + +template +static Value getPoolingInput(PoolingOp op, ArrayRef inputIndices) { + if (hasPadding(op)) { + Type type = + op.input().getType().template cast().getElementType(); + Value padValue = std_constant(type, getPadValueAttr(type)); + return getPaddedInput(op.input(), inputIndices, + /*Pad every dimension*/ {}, + padValue); + } + IndexedValueType input(op.input()); + return input(inputIndices); +} + template -static void emitPoolingMinMaxScalarImplementation(ArrayRef allIvs, - OpType op) { +void emitPoolingMinMaxScalarImplementation(ArrayRef allIvs, OpType op) { InputAndOutputIndices indices = getInputAndOutputIndices(allIvs, op); // Emit scalar form. IndexedValueType output(op.output()); - IndexedValueType input(op.input()); Value lhs = output(indices.outputs); - Value rhs = input(indices.inputs); + Value rhs = getPoolingInput(op, indices.inputs); using edsc::op::sgt; using edsc::op::slt; Value value = std::is_same() @@ -342,10 +428,11 @@ template static void emitScalarImplementation(ArrayRef allIvs, PoolingSumOp op) { auto indices = getInputAndOutputIndices(allIvs, op); - IndexedValueType input(op.input()), output(op.output()); + IndexedValueType output(op.output()); // Emit scalar form. - output(indices.outputs) += input(indices.inputs); + output(indices.outputs) += + getPoolingInput(op, indices.inputs); } /// Emits the MLIR for the scalar part of the indexed generic op by: diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -17,6 +17,8 @@ // CHECKLOOP-DAG: #[[$convLowerBound:.*]] = affine_map<()[s0] -> (s0 floordiv 2)> // CHECKLOOP-DAG: #[[$convUpperBound:.*]] = affine_map<()[s0, s1] -> (s1 + s0 floordiv 2 - s0 + 1)> // CHECKLOOP-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)> +// CHECKLOOP-DAG: #[[$stride1Dilation1Padding1:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 1)> +// CHECKLOOP-DAG: #[[$stride1Dilation1Padding2:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 2)> // CHECKPARALLEL-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> // CHECKPARALLEL-DAG: #[[$strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> @@ -31,6 +33,8 @@ // CHECKPARALLEL-DAG: #[[$convLowerBound:.*]] = affine_map<()[s0] -> (s0 floordiv 2)> // CHECKPARALLEL-DAG: #[[$convUpperBound:.*]] = affine_map<()[s0, s1] -> (s1 + s0 floordiv 2 - s0 + 1)> // CHECKPARALLEL-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)> +// CHECKPARALLEL-DAG: #[[$stride1Dilation1Padding1:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 1)> +// CHECKPARALLEL-DAG: #[[$stride1Dilation1Padding2:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 2)> func @matmul(%arg0: memref, %M: index, %N: index, %K: index) { @@ -470,6 +474,102 @@ // CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 // CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +func @pooling_max_padding(%arg0: memref, + %arg1: memref, + %arg2: memref) { + linalg.pooling_max(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } : + memref, memref, memref + return +} +// CHECKLOOP-LABEL: func @pooling_max_padding +// CHECKLOOP: %[[PAD:.*]] = constant 0xFF800000 : f32 +// CHECKLOOP: %[[WX:.*]] = dim %arg1, %c0 : memref +// CHECKLOOP: %[[WY:.*]] = dim %arg1, %c1 : memref +// CHECKLOOP: %[[OX:.*]] = dim %arg2, %c0 : memref +// CHECKLOOP: %[[OY:.*]] = dim %arg2, %c1 : memref +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECKLOOP: %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 +// CHECKLOOP: %[[CMP:.*]] = cmpf "ogt", %[[RHS]], %[[SEL]] : f32 +// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32 +// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @pooling_max_padding +// CHECKPARALLEL: %[[PAD:.*]] = constant 0xFF800000 : f32 +// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, %c0 : memref +// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, %c1 : memref +// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, %c0 : memref +// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, %c1 : memref +// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECKPARALLEL: %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 +// CHECKPARALLEL: %[[CMP:.*]] = cmpf "ogt", %[[RHS]], %[[SEL]] : f32 +// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32 +// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +func @pooling_max_padding_i32(%arg0: memref, + %arg1: memref, + %arg2: memref) { + linalg.pooling_max(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } : + memref, memref, memref + return +} +// CHECKLOOP-LABEL: func @pooling_max_padding_i32 +// CHECKLOOP: %[[PAD:.*]] = constant -2147483648 : i32 +// CHECKLOOP: %[[WX:.*]] = dim %arg1, %c0 : memref +// CHECKLOOP: %[[WY:.*]] = dim %arg1, %c1 : memref +// CHECKLOOP: %[[OX:.*]] = dim %arg2, %c0 : memref +// CHECKLOOP: %[[OY:.*]] = dim %arg2, %c1 : memref +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECKLOOP: %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 +// CHECKLOOP: %[[CMP:.*]] = cmpi "sgt", %[[RHS]], %[[SEL]] : i32 +// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32 +// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @pooling_max_padding_i32 +// CHECKPARALLEL: %[[PAD:.*]] = constant -2147483648 : i32 +// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, %c0 : memref +// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, %c1 : memref +// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, %c0 : memref +// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, %c1 : memref +// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECKPARALLEL: %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 +// CHECKPARALLEL: %[[CMP:.*]] = cmpi "sgt", %[[RHS]], %[[SEL]] : i32 +// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32 +// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + func @pooling_min(%arg0: memref, %arg1: memref, %arg2: memref) { @@ -508,6 +608,102 @@ // CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 // CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +func @pooling_min_padding(%arg0: memref, + %arg1: memref, + %arg2: memref) { + linalg.pooling_min(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } : + memref, memref, memref + return +} +// CHECKLOOP-LABEL: func @pooling_min_padding +// CHECKLOOP: %[[PAD:.*]] = constant 0x7F800000 : f32 +// CHECKLOOP: %[[WX:.*]] = dim %arg1, %c0 : memref +// CHECKLOOP: %[[WY:.*]] = dim %arg1, %c1 : memref +// CHECKLOOP: %[[OX:.*]] = dim %arg2, %c0 : memref +// CHECKLOOP: %[[OY:.*]] = dim %arg2, %c1 : memref +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECKLOOP: %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 +// CHECKLOOP: %[[CMP:.*]] = cmpf "olt", %[[RHS]], %[[SEL]] : f32 +// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32 +// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @pooling_min_padding +// CHECKPARALLEL: %[[PAD:.*]] = constant 0x7F800000 : f32 +// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, %c0 : memref +// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, %c1 : memref +// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, %c0 : memref +// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, %c1 : memref +// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECKPARALLEL: %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 +// CHECKPARALLEL: %[[CMP:.*]] = cmpf "olt", %[[RHS]], %[[SEL]] : f32 +// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32 +// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +func @pooling_min_padding_i32(%arg0: memref, + %arg1: memref, + %arg2: memref) { + linalg.pooling_min(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } : + memref, memref, memref + return +} +// CHECKLOOP-LABEL: func @pooling_min_padding_i32 +// CHECKLOOP: %[[PAD:.*]] = constant 2147483647 : i32 +// CHECKLOOP: %[[WX:.*]] = dim %arg1, %c0 : memref +// CHECKLOOP: %[[WY:.*]] = dim %arg1, %c1 : memref +// CHECKLOOP: %[[OX:.*]] = dim %arg2, %c0 : memref +// CHECKLOOP: %[[OY:.*]] = dim %arg2, %c1 : memref +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECKLOOP: %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 +// CHECKLOOP: %[[CMP:.*]] = cmpi "slt", %[[RHS]], %[[SEL]] : i32 +// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32 +// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @pooling_min_padding_i32 +// CHECKPARALLEL: %[[PAD:.*]] = constant 2147483647 : i32 +// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, %c0 : memref +// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, %c1 : memref +// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, %c0 : memref +// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, %c1 : memref +// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECKPARALLEL: %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 +// CHECKPARALLEL: %[[CMP:.*]] = cmpi "slt", %[[RHS]], %[[SEL]] : i32 +// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32 +// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + func @pooling_sum(%arg0: memref, %arg1: memref, %arg2: memref) { @@ -546,6 +742,98 @@ // CHECKPARALLEL: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32 // CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +func @pooling_sum_padding(%arg0: memref, + %arg1: memref, + %arg2: memref) { + linalg.pooling_sum(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } : + memref, memref, memref + return +} +// CHECKLOOP-LABEL: func @pooling_sum_padding +// CHECKLOOP: %[[PAD:.*]] = constant 0.000000e+00 : f32 +// CHECKLOOP: %[[WX:.*]] = dim %arg1, %c0 : memref +// CHECKLOOP: %[[WY:.*]] = dim %arg1, %c1 : memref +// CHECKLOOP: %[[OX:.*]] = dim %arg2, %c0 : memref +// CHECKLOOP: %[[OY:.*]] = dim %arg2, %c1 : memref +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECKLOOP: %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 +// CHECKLOOP: %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %[[RES:.*]] = addf %[[RHS]], %[[SEL]] : f32 +// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @pooling_sum_padding +// CHECKPARALLEL: %[[PAD:.*]] = constant 0.000000e+00 : f32 +// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, %c0 : memref +// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, %c1 : memref +// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, %c0 : memref +// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, %c1 : memref +// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECKPARALLEL: %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 +// CHECKPARALLEL: %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %[[RES:.*]] = addf %[[RHS]], %[[SEL]] : f32 +// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +func @pooling_sum_padding_i32(%arg0: memref, + %arg1: memref, + %arg2: memref) { + linalg.pooling_sum(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } : + memref, memref, memref + return +} +// CHECKLOOP-LABEL: func @pooling_sum_padding_i32 +// CHECKLOOP: %[[PAD:.*]] = constant 0 : i32 +// CHECKLOOP: %[[WX:.*]] = dim %arg1, %c0 : memref +// CHECKLOOP: %[[WY:.*]] = dim %arg1, %c1 : memref +// CHECKLOOP: %[[OX:.*]] = dim %arg2, %c0 : memref +// CHECKLOOP: %[[OY:.*]] = dim %arg2, %c1 : memref +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECKLOOP: %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 +// CHECKLOOP: %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %[[RES:.*]] = addi %[[RHS]], %[[SEL]] : i32 +// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @pooling_sum_padding_i32 +// CHECKPARALLEL: %[[PAD:.*]] = constant 0 : i32 +// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, %c0 : memref +// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, %c1 : memref +// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, %c0 : memref +// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, %c1 : memref +// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECKPARALLEL: %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 +// CHECKPARALLEL: %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %[[RES:.*]] = addi %[[RHS]], %[[SEL]] : i32 +// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + #accesses = [ affine_map<(i, j, k) -> (i, j)>, affine_map<(i, j, k) -> (i, j, k)>,