diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h @@ -25,6 +25,7 @@ #include "mlir/Interfaces/CopyOpInterface.h" #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" +#include "mlir/Interfaces/TilingInterface.h" #include "mlir/Interfaces/ViewLikeInterface.h" #include "mlir/Support/LLVM.h" diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -18,6 +18,7 @@ include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/LoopLikeInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/Interfaces/TilingInterface.td" include "mlir/Interfaces/ViewLikeInterface.td" // Base class for Linalg dialect ops that do not correspond to library calls. @@ -130,7 +131,10 @@ def Linalg_PadTensorOp : Linalg_Op<"pad_tensor", [AttrSizedOperandSegments, NoSideEffect, - DeclareOpInterfaceMethods]> { + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]> { let summary = "tensor pad operation"; let description = [{ `linalg.pad_tensor` is an operation that pads the `source` tensor diff --git a/mlir/include/mlir/Interfaces/CMakeLists.txt b/mlir/include/mlir/Interfaces/CMakeLists.txt --- a/mlir/include/mlir/Interfaces/CMakeLists.txt +++ b/mlir/include/mlir/Interfaces/CMakeLists.txt @@ -6,6 +6,7 @@ add_mlir_interface(InferTypeOpInterface) add_mlir_interface(LoopLikeInterface) add_mlir_interface(SideEffectInterfaces) +add_mlir_interface(TilingInterface) add_mlir_interface(VectorInterfaces) add_mlir_interface(ViewLikeInterface) diff --git a/mlir/include/mlir/Interfaces/TilingInterface.h b/mlir/include/mlir/Interfaces/TilingInterface.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Interfaces/TilingInterface.h @@ -0,0 +1,26 @@ +//===- TilingInterface.h - Interface for tiling operations ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions of the TilingInterface defined in +// `TilingInterface.td`. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_INTERFACES_TILINGINTERFACE_H_ +#define MLIR_INTERFACES_TILINGINTERFACE_H_ + +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Operation.h" +#include "mlir/Interfaces/ViewLikeInterface.h" +#include "mlir/Support/LLVM.h" + +/// Include the ODS generated interface header files. +#include "mlir/Interfaces/TilingInterface.h.inc" + +#endif // MLIR_INTERFACES_TILINGINTERFACE_H_ diff --git a/mlir/include/mlir/Interfaces/TilingInterface.td b/mlir/include/mlir/Interfaces/TilingInterface.td new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Interfaces/TilingInterface.td @@ -0,0 +1,95 @@ +//===- TilingInterface.td - Interface for tiling operations *- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains an interface to allow operations to generate a tiled +// implementation of themselves. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TILINGINTERFACE +#define MLIR_TILINGINTERFACE + +include "mlir/IR/OpBase.td" + +def TilingInterface : OpInterface<"TilingInterface"> { + let description = [{ + Interface for allowing operations to expose information needed to + tile them (similar to LinalgOp, but without having access to + indexing maps) + }]; + let cppNamespace = "::mlir"; + let methods = [ + InterfaceMethod< + /*desc=*/[{ + Returns a list of operands into which the result of the + tiled implementation is written into. With `tensor` + operands, this will be used as the initial tensor into which + the tiled results are inserted into. With `memref` operands, + this will be the operand into which the result of the tiled + operation is written into. + }], + /*retType=*/"SmallVector", + /*methodName=*/"getDestinationOperands", + /*args=*/(ins "OpBuilder &":$b), + /*methodBody=*/"", + /*defaultImplementation=*/"return ValueRange{};" + >, + InterfaceMethod< + /*desc=*/[{ + Returns a list of `StringRef`s that describe the number of + loops and the iterator types of the operation. The list is + expected to use + `getParallelIteratorTypeName()`/`getReductionIteratorTypeName()` + from MLIR Structured Op Utils. + }], + /*retType=*/"SmallVector", + /*methodName=*/"getLoopIteratorTypes" + >, + InterfaceMethod< + /*desc=*/[{ + Returns a list of ranges that describe the loop bounds and + step for the loops of the operation. + }], + /*retTy=*/"SmallVector", + /*methodName=*/"getLoopBounds", + /*args=*/(ins "OpBuilder &":$b) + >, + InterfaceMethod< + /*desc=*/[{ + Method to generate the tiled implementation of an operation. + + The iteration space of the operation is returned by + `getLoopBounds`. The caller provides the information of the + tile within this iteration space whose implementation the + caller needs. + - `dest` are the Value into which the result of the tiled + operation is to be inserted into. The type of the `dest` + Values is same as the types returned by + `getDestinationOperands` method. + - `offsets` provides the offset of the tile within the + iteration space + - `sizes` provides the size of the tile. + + The method returns the operation that is the tiled + implementation. + }], + /*retType=*/"Operation *", + /*methodName=*/"getTiledImplementation", + /*args=*/(ins + "OpBuilder &":$b, + "ValueRange ":$dest, + "ArrayRef ":$offsets, + "ArrayRef ":$sizes), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return nullptr; + }] + > + ]; +} +#endif // MLIR_TILINGINTERFACE diff --git a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt --- a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt @@ -18,9 +18,11 @@ MLIRIR MLIRParser MLIRSideEffectInterfaces - MLIRViewLikeInterface + MLIRSCF MLIRStandard MLIRMath MLIRMemRef MLIRTensor + MLIRTilingInterface + MLIRViewLikeInterface ) diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -15,6 +15,7 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" @@ -1203,6 +1204,253 @@ return success(); } +//===----------------------------------------------------------------------===// +// Methods related to PadTensor tiling. +//===----------------------------------------------------------------------===// + +/// Given an OpFoldResult, return a Value. If the OpFoldResult is an Attribute, +/// it must be of type Integer. +static Value getAsValue(OpBuilder &builder, Location loc, OpFoldResult ofr) { + if (auto val = ofr.dyn_cast()) + return val; + auto intVal = getConstantIntValue(ofr); + auto intAttr = ofr.dyn_cast().dyn_cast_or_null(); + assert(intVal && "expected Value or IntegerAttr"); + return builder.create(loc, intAttr.getInt()); +} + +SmallVector PadTensorOp::getDestinationOperands(OpBuilder &b) { + ReifiedRankedShapedTypeDims reifiedShapes; + (void)reifyResultShapes(b, reifiedShapes); + Value initTensor = b.create(getLoc(), reifiedShapes[0], + getResultType().getElementType()); + return {initTensor}; +} + +SmallVector PadTensorOp::getLoopIteratorTypes() { + SmallVector iteratorTypes(getResultType().getRank(), + getParallelIteratorTypeName()); + return iteratorTypes; +} + +SmallVector PadTensorOp::getLoopBounds(OpBuilder &b) { + ReifiedRankedShapedTypeDims reifiedShapes; + (void)reifyResultShapes(b, reifiedShapes); + Value zero = b.create(getLoc(), 0); + Value one = b.create(getLoc(), 1); + // Initialize all the ranges to {zero, one, one}. All the `ub`s are + // overwritten. + SmallVector loopRanges(reifiedShapes[0].size(), {zero, one, one}); + for (auto ub : enumerate(reifiedShapes[0])) + loopRanges[ub.index()].size = ub.value(); + return loopRanges; +} + +Operation *PadTensorOp::getTiledImplementation(OpBuilder &b, ValueRange dest, + ArrayRef offsets, + ArrayRef sizes) { + // Only constant padding value supported. + Value padValue = getConstantPaddingValue(); + if (!padValue) + return nullptr; + + // Helper variables and functions for various arithmetic operations. These are + // used extensively for computing new offset/length and padding values. + Location loc = getLoc(); + AffineExpr dim0, dim1; + bindDims(b.getContext(), dim0, dim1); + // Add two integers. + auto addMap = AffineMap::get(2, 0, {dim0 + dim1}); + auto add = [&](Value v1, Value v2) { + return b.createOrFold(loc, addMap, ValueRange{v1, v2}); + }; + // Subtract two integers. + auto subMap = AffineMap::get(2, 0, {dim0 - dim1}); + auto sub = [&](Value v1, Value v2) { + return b.createOrFold(loc, subMap, ValueRange{v1, v2}); + }; + // Take the minimum of two integers. + auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext()); + auto min = [&](Value v1, Value v2) { + return b.createOrFold(loc, idMap, ValueRange{v1, v2}); + }; + // Take the maximum of two integers. + auto max = [&](Value v1, Value v2) { + return b.createOrFold(loc, idMap, ValueRange{v1, v2}); + }; + // Zero index-typed integer. + auto zero = b.create(loc, 0); + + // Helper function for filling static/dynamic low/high padding indices vectors + // of PadTensorOp. + auto appendIndex = [&](Value val, SmallVector &dynIndices, + SmallVector &staticIndices) { + if (auto constInt = getConstantIntValue(val)) { + staticIndices.push_back(*constInt); + } else { + staticIndices.push_back(ShapedType::kDynamicSize); + dynIndices.push_back(val); + } + }; + + // Compute new offsets, lengths, low padding, high padding. + SmallVector newOffsets, newLengths, newStrides; + SmallVector newLows, newHighs; + SmallVector staticNewLows, staticNewHighs; + // Set to true if the original data source is not read at all. + bool hasZeroLen = false; + // Same as hasZeroLen, but for dynamic dimension sizes. This condition + // is true if the original data source turns out to be unused at runtime. + Value dynHasZeroLenCond; + + int64_t rank = getSourceType().getRank(); + for (unsigned dim = 0; dim < rank; ++dim) { + auto low = getAsValue(b, loc, getMixedLowPad()[dim]); + bool hasLowPad = getConstantIntValue(low) != static_cast(0); + auto high = getAsValue(b, loc, getMixedHighPad()[dim]); + bool hasHighPad = getConstantIntValue(high) != static_cast(0); + auto offset = getAsValue(b, loc, offsets[dim]); + auto length = getAsValue(b, loc, sizes[dim]); + auto srcSize = b.createOrFold(loc, source(), dim); + + // The new amount of low padding is `low - offset`. Except for the case + // where none of the low padding is read. In that case, the new amount of + // low padding is zero. + // + // Optimization: If low = 0, then newLow = 0. + Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero; + appendIndex(newLow, newLows, staticNewLows); + + // Start reading the data from position `offset - low`. Since the original + // read may have started in the low padding zone, this value could be + // negative. Therefore, start reading from: + // + // max(offset - low, 0) + // + // The original read could also have started in the high padding zone. + // In that case, set the offset to the end of source tensor. The new + // ExtractSliceOp length will be zero in that case. (Effectively reading no + // data from the source.) + // + // Optimization: If low = 0, then the formula can be simplified. + Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize) + : min(offset, srcSize); + newOffsets.push_back(getAsOpFoldResult(newOffset)); + + // The original ExtractSliceOp was reading until position `offset + length`. + // Therefore, the corresponding position within the source tensor is: + // + // offset + length - low + // + // In case the original ExtractSliceOp stopped reading within the low + // padding zone, this value can be negative. In that case, the end position + // of the read should be zero. (Similar to newOffset.) + // + // The original read could also have stopped in the high padding zone. + // In that case, set the end positition of the read should be the end of the + // source tensor. (Similar to newOffset.) + // + // endLoc = min(max(offset - low + length, 0), srcSize) + // + // The new ExtractSliceOp length is `endLoc - newOffset`. + // + // Optimization: If low = 0, then the formula can be simplified. + Value endLoc = hasLowPad + ? min(max(add(sub(offset, low), length), zero), srcSize) + : min(add(offset, length), srcSize); + Value newLength = sub(endLoc, newOffset); + newLengths.push_back(getAsOpFoldResult(newLength)); + + // Check if newLength is zero. In that case, no SubTensorOp should be + // executed. + if (auto newLengthInt = getConstantIntValue(newLength)) { + hasZeroLen |= *newLengthInt == 0; + } else { + Value check = b.create(loc, CmpIPredicate::eq, newLength, zero); + dynHasZeroLenCond = dynHasZeroLenCond + ? b.create(loc, check, dynHasZeroLenCond) + : check; + } + + // The amount of high padding is simply the number of elements remaining, + // so that the result has the same length as the original ExtractSliceOp. + // As an optimization, if the original high padding is zero, then the new + // high padding must also be zero. + Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero; + appendIndex(newHigh, newHighs, staticNewHighs); + + // Only unit stride supported. + newStrides.push_back(b.getIndexAttr(1)); + } + + // The shape of the result can be obtained from the sizes passed in. + SmallVector dynDims; + SmallVector shape; + dispatchIndexOpFoldResults(sizes, dynDims, shape, ShapedType::kDynamicSize); + RankedTensorType resultType = + RankedTensorType::get(shape, getResultType().getElementType()); + + // Insert cast to ensure that types match. (May be folded away.) + auto castResult = [&](Value val) -> Operation * { + auto castOp = b.create(loc, resultType, val); + return castOp; + }; + + // In cases where the original data source is unused: Emit a GenerateOp and + // do not generate a SliceOp. (The result shape of the SliceOp would + // have a dimension of size 0, the semantics of which is unclear.) + auto createGenerateOp = [&]() { + // Create GenerateOp. + auto generateOp = b.create( + loc, resultType, dynDims, + [&](OpBuilder &builder, Location gLoc, ValueRange indices) { + builder.create(gLoc, padValue); + }); + return castResult(generateOp); + }; + + // Emit a SliceOp and a PadTensorOp. Should not be used in cases where + // the result shape of the new SliceOp has a zero dimension. + auto createPadTensorOfSubTensor = [&]() { + // Create pad_tensor(subtensor(x)). + auto newSliceOp = b.create( + loc, source(), newOffsets, newLengths, newStrides); + auto newPadTensorOp = b.create( + loc, newSliceOp, staticNewLows, staticNewHighs, newLows, newHighs); + + // Copy region to new PadTensorOp. + BlockAndValueMapping bvm; + region().cloneInto(&newPadTensorOp.getRegion(), bvm); + + // Cast result and return. + return castResult(newPadTensorOp); + }; + + // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known + // that the original data source x is not used. + if (hasZeroLen) { + return createGenerateOp(); + } + + // If there are dynamic dimensions: Generate an scf.if check to avoid creating + // SliceOps with result dimensions of size 0 at runtime. + if (dynHasZeroLenCond) { + auto result = b.create( + loc, resultType, dynHasZeroLenCond, + /*thenBuilder=*/ + [&](OpBuilder &b, Location loc) { + b.create(loc, createGenerateOp()->getResult(0)); + }, + /*elseBuilder=*/ + [&](OpBuilder &b, Location loc) { + b.create(loc, + createPadTensorOfSubTensor()->getResult(0)); + }); + return result; + } + return createPadTensorOfSubTensor(); +} + namespace { // Folds linalg.pad_tensor when padding is static zeros. struct FoldStaticZeroPadding : public OpRewritePattern { diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -373,11 +373,12 @@ options.tileSizeComputationFunction(builder, op); assert(static_cast(tileSizes.size()) == rank); // Compute lower and upper bounds of the loop nest. + SmallVector ranges = op.getLoopBounds(builder); SmallVector lbs, dims, steps; for (int64_t i = 0; i < rank; ++i) { if (!isZero(tileSizes[i])) { - lbs.push_back(builder.create(loc, 0)); - dims.push_back(builder.create(loc, op.output(), i)); + lbs.push_back(ranges[i].offset); + dims.push_back(ranges[i].size); steps.push_back(tileSizes[i]); } } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -648,16 +648,6 @@ return success(); } -/// Given an OpFoldResult, return a Value. If the OpFoldResult is an Attribute, -/// it must be of type Integer. -static Value asValue(OpBuilder &builder, Location loc, OpFoldResult ofr) { - if (auto val = ofr.dyn_cast()) - return val; - auto intVal = getConstantIntValue(ofr); - assert(intVal && "expected Value or IntegerAttr"); - return builder.create(loc, *intVal); -} - LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite( tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const { auto padOp = sliceOp.source().getDefiningOp(); @@ -666,227 +656,12 @@ // Only unit stride supported. if (!sliceOp.hasUnitStride()) return failure(); - // Only constant padding value supported. - Value padValue = padOp.getConstantPaddingValue(); - if (!padValue) - return failure(); - - // Helper variables and functions for various arithmetic operations. These are - // used extensively for computing new offset/length and padding values. - Location loc = sliceOp.getLoc(); - AffineExpr dim0, dim1; - bindDims(rewriter.getContext(), dim0, dim1); - // Add two integers. - auto addMap = AffineMap::get(2, 0, {dim0 + dim1}); - auto add = [&](Value v1, Value v2) { - return rewriter.createOrFold(loc, addMap, - ValueRange{v1, v2}); - }; - // Subtract two integers. - auto subMap = AffineMap::get(2, 0, {dim0 - dim1}); - auto sub = [&](Value v1, Value v2) { - return rewriter.createOrFold(loc, subMap, - ValueRange{v1, v2}); - }; - // Take the minimum of two integers. - auto idMap = AffineMap::getMultiDimIdentityMap(2, rewriter.getContext()); - auto min = [&](Value v1, Value v2) { - return rewriter.createOrFold(loc, idMap, ValueRange{v1, v2}); - }; - // Take the maximum of two integers. - auto max = [&](Value v1, Value v2) { - return rewriter.createOrFold(loc, idMap, ValueRange{v1, v2}); - }; - // Zero index-typed integer. - auto zero = rewriter.create(loc, 0); - - // Helper function for filling static/dynamic low/high padding indices vectors - // of PadTensorOp. - auto appendIndex = [&](Value val, SmallVector &dynIndices, - SmallVector &staticIndices) { - if (auto constInt = getConstantIntValue(val)) { - staticIndices.push_back(*constInt); - } else { - staticIndices.push_back(ShapedType::kDynamicSize); - dynIndices.push_back(val); - } - }; - - // Compute new offsets, lengths, low padding, high padding. - SmallVector newOffsets, newLengths, newStrides; - SmallVector newLows, newHighs; - SmallVector staticNewLows, staticNewHighs; - // Set to true if the original data source is not read at all. - bool hasZeroLen = false; - // Same as hasZeroLen, but for dynamic dimension sizes. This condition - // is true if the original data source turns out to be unused at runtime. - Value dynHasZeroLenCond; - - int64_t rank = padOp.getSourceType().getRank(); - for (unsigned dim = 0; dim < rank; ++dim) { - auto low = asValue(rewriter, loc, padOp.getMixedLowPad()[dim]); - bool hasLowPad = getConstantIntValue(low) != static_cast(0); - auto high = asValue(rewriter, loc, padOp.getMixedHighPad()[dim]); - bool hasHighPad = getConstantIntValue(high) != static_cast(0); - auto offset = asValue(rewriter, loc, sliceOp.getMixedOffsets()[dim]); - auto length = asValue(rewriter, loc, sliceOp.getMixedSizes()[dim]); - auto srcSize = - rewriter.createOrFold(loc, padOp.source(), dim); - - // The new amount of low padding is `low - offset`. Except for the case - // where none of the low padding is read. In that case, the new amount of - // low padding is zero. - // - // Optimization: If low = 0, then newLow = 0. - Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero; - appendIndex(newLow, newLows, staticNewLows); - - // Start reading the data from position `offset - low`. Since the original - // read may have started in the low padding zone, this value could be - // negative. Therefore, start reading from: - // - // max(offset - low, 0) - // - // The original read could also have started in the high padding zone. - // In that case, set the offset to the end of source tensor. The new - // ExtractSliceOp length will be zero in that case. (Effectively reading no - // data from the source.) - // - // Optimization: If low = 0, then the formula can be simplified. - Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize) - : min(offset, srcSize); - newOffsets.push_back(getAsOpFoldResult(newOffset)); - - // The original ExtractSliceOp was reading until position `offset + length`. - // Therefore, the corresponding position within the source tensor is: - // - // offset + length - low - // - // In case the original ExtractSliceOp stopped reading within the low - // padding zone, this value can be negative. In that case, the end position - // of the read should be zero. (Similar to newOffset.) - // - // The original read could also have stopped in the high padding zone. - // In that case, set the end positition of the read should be the end of the - // source tensor. (Similar to newOffset.) - // - // endLoc = min(max(offset - low + length, 0), srcSize) - // - // The new ExtractSliceOp length is `endLoc - newOffset`. - // - // Optimization: If low = 0, then the formula can be simplified. - Value endLoc = hasLowPad - ? min(max(add(sub(offset, low), length), zero), srcSize) - : min(add(offset, length), srcSize); - Value newLength = sub(endLoc, newOffset); - newLengths.push_back(getAsOpFoldResult(newLength)); - - // Check if newLength is zero. In that case, no SubTensorOp should be - // executed. - if (auto newLengthInt = getConstantIntValue(newLength)) { - hasZeroLen |= *newLengthInt == 0; - } else { - Value check = rewriter.create( - loc, CmpIPredicate::eq, newLength, zero); - dynHasZeroLenCond = - dynHasZeroLenCond - ? rewriter.create(loc, check, dynHasZeroLenCond) - : check; - } - - // The amount of high padding is simply the number of elements remaining, - // so that the result has the same length as the original ExtractSliceOp. - // As an optimization, if the original high padding is zero, then the new - // high padding must also be zero. - Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero; - appendIndex(newHigh, newHighs, staticNewHighs); - - // Only unit stride supported. - newStrides.push_back(rewriter.getIndexAttr(1)); - } - - // Insert cast to ensure that types match. (May be folded away.) - auto castResult = [&](Value val) -> Value { - auto castOp = rewriter.create(loc, sliceOp.getType(), val); - return castOp; - }; - - // In cases where the original data source is unused: Emit a GenerateOp and - // do not generate a SliceOp. (The result shape of the SliceOp would - // have a dimension of size 0, the semantics of which is unclear.) - auto createGenerateOp = [&]() { - // The shape of the GenerateOp is the same as the existing SliceOp. - RankedTensorType type = sliceOp.getType(); - SmallVector dynDims; - for (unsigned i = 0; i < type.getRank(); ++i) { - if (type.isDynamicDim(i)) - dynDims.push_back(asValue(rewriter, loc, sliceOp.getMixedSizes()[i])); - } - - // Create GenerateOp. - auto generateOp = rewriter.create(loc, type, dynDims); - - // Copy region to new op. - BlockAndValueMapping bvm; - padOp.region().cloneInto(&generateOp.getRegion(), bvm); - // Rewrite linalg::YieldOp to tensor::YieldOp. - { - OpBuilder::InsertionGuard guard(rewriter); - auto yieldOp = dyn_cast( - generateOp.getRegion().front().getTerminator()); - assert(yieldOp && "malformed PadTensorOp: expected YieldOp terminator"); - assert(yieldOp.values().size() == 1); - rewriter.setInsertionPoint(yieldOp); - rewriter.replaceOpWithNewOp( - yieldOp, yieldOp.values()[0]); - } - - return castResult(generateOp); - }; - - // Emit a SliceOp and a PadTensorOp. Should not be used in cases where - // the result shape of the new SliceOp has a zero dimension. - auto createPadTensorOfSubTensor = [&]() { - // Create pad_tensor(subtensor(x)). - auto newSliceOp = rewriter.create( - loc, padOp.source(), newOffsets, newLengths, newStrides); - auto newPadTensorOp = rewriter.create( - loc, newSliceOp, staticNewLows, staticNewHighs, newLows, newHighs); - - // Copy region to new PadTensorOp. - BlockAndValueMapping bvm; - padOp.region().cloneInto(&newPadTensorOp.getRegion(), bvm); - - // Cast result and return. - return castResult(newPadTensorOp); - }; - - // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known - // that the original data source x is not used. - if (hasZeroLen) { - rewriter.replaceOp(sliceOp, createGenerateOp()); - return success(); - } - - // If there are dynamic dimensions: Generate an scf.if check to avoid creating - // SliceOps with result dimensions of size 0 at runtime. - if (dynHasZeroLenCond) { - auto result = rewriter.create( - loc, sliceOp.getType(), dynHasZeroLenCond, - /*thenBuilder=*/ - [&](OpBuilder &b, Location loc) { - b.create(loc, createGenerateOp()); - }, - /*elseBuilder=*/ - [&](OpBuilder &b, Location loc) { - b.create(loc, createPadTensorOfSubTensor()); - }); - rewriter.replaceOp(sliceOp, result.getResult(0)); - return success(); - } + Operation *tiledPadOp = padOp.getTiledImplementation( + rewriter, /*dest=*/ValueRange{}, sliceOp.getMixedOffsets(), + sliceOp.getMixedSizes()); // All shapes are static and the data source is actually used. Rewrite into // pad_tensor(subtensor(x)). - rewriter.replaceOp(sliceOp, createPadTensorOfSubTensor()); + rewriter.replaceOp(sliceOp, tiledPadOp->getResults()); return success(); } diff --git a/mlir/lib/Dialect/Utils/StaticValueUtils.cpp b/mlir/lib/Dialect/Utils/StaticValueUtils.cpp --- a/mlir/lib/Dialect/Utils/StaticValueUtils.cpp +++ b/mlir/lib/Dialect/Utils/StaticValueUtils.cpp @@ -90,6 +90,4 @@ auto v1 = ofr1.dyn_cast(), v2 = ofr2.dyn_cast(); return v1 && v1 == v2; } - } // namespace mlir - diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt --- a/mlir/lib/Interfaces/CMakeLists.txt +++ b/mlir/lib/Interfaces/CMakeLists.txt @@ -8,6 +8,7 @@ InferTypeOpInterface.cpp LoopLikeInterface.cpp SideEffectInterfaces.cpp + TilingInterface.cpp VectorInterfaces.cpp ViewLikeInterface.cpp ) @@ -37,6 +38,6 @@ add_mlir_interface_library(InferTypeOpInterface) add_mlir_interface_library(LoopLikeInterface) add_mlir_interface_library(SideEffectInterfaces) +add_mlir_interface_library(TilingInterface) add_mlir_interface_library(VectorInterfaces) add_mlir_interface_library(ViewLikeInterface) - diff --git a/mlir/lib/Interfaces/TilingInterface.cpp b/mlir/lib/Interfaces/TilingInterface.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Interfaces/TilingInterface.cpp @@ -0,0 +1,18 @@ +//===- TilingInterface.cpp - Tiling interface -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions of the interface in `TilingInterface.td`. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Interfaces/TilingInterface.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" + +using namespace mlir; + +#include "mlir/Interfaces/TilingInterface.cpp.inc" diff --git a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir --- a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir +++ b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir @@ -3,14 +3,18 @@ // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,3" -cse -split-input-file | \ // RUN: FileCheck %s -check-prefix=TILE1 -// TILE2-LABEL: func @dynamic_pad_tensor( +// TILE2-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)> +// TILE2-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)> +// TILE2: func @dynamic_pad_tensor( // TILE2-SAME: %[[IN:.*]]: tensor, %[[OUT:.*]]: tensor // TILE2-DAG: %[[C0:.*]] = constant 0 : index // TILE2-DAG: %[[C1:.*]] = constant 1 : index // TILE2-DAG: %[[C2:.*]] = constant 2 : index // TILE2-DAG: %[[C3:.*]] = constant 3 : index -// TILE2: %[[DIM0:.*]] = tensor.dim %[[OUT]], %[[C0]] -// TILE2: %[[DIM1:.*]] = tensor.dim %[[OUT]], %[[C1]] +// TILE2: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]] +// TILE2: %[[DIM0:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN0]]] +// TILE2: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] +// TILE2: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]] // TILE2: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]] // TILE2: scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = // TILE2: %[[SWAP_RESULT:.*]] = scf.if @@ -21,12 +25,14 @@ // TILE2: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] // TILE2: return %[[RESULT]] -// TILE1-LABEL: func @dynamic_pad_tensor( +// TILE1-DAG: #[[MAP:.*]] = affine_map<()[s0] -> (s0 + 7)> +// TILE1: func @dynamic_pad_tensor( // TILE1-SAME: %[[IN:.*]]: tensor, %[[OUT:.*]]: tensor // TILE1-DAG: %[[C0:.*]] = constant 0 : index // TILE1-DAG: %[[C1:.*]] = constant 1 : index // TILE1-DAG: %[[C3:.*]] = constant 3 : index -// TILE1: %[[DIM1:.*]] = tensor.dim %[[OUT]], %[[C1]] +// TILE1: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] +// TILE1: %[[DIM1:.*]] = affine.apply #[[MAP]]()[%[[DIM_IN1]]] // TILE1: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = // TILE1: %[[DIM0:.*]] = tensor.dim %[[OUT]], %[[C0]] // TILE1: %[[SWAP_RESULT:.*]] = scf.if diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -757,6 +757,13 @@ actual = ":SideEffectInterfacesTdFiles", ) +td_library( + name = "TilingInterfaceTdFiles", + srcs = ["include/mlir/Interfaces/TilingInterface.td"], + includes = ["include"], + deps = [":OpBaseTdFiles"], +) + td_library( name = "VectorInterfacesTdFiles", srcs = ["include/mlir/Interfaces/VectorInterfaces.td"], @@ -4603,6 +4610,24 @@ actual = "SideEffectInterfaces", ) +gentbl_cc_library( + name = "TilingInterfaceIncGen", + strip_include_prefix = "include", + tbl_outs = [ + ( + ["-gen-op-interface-decls"], + "include/mlir/Interfaces/TilingInterface.h.inc", + ), + ( + ["-gen-op-interface-defs"], + "include/mlir/Interfaces/TilingInterface.cpp.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Interfaces/TilingInterface.td", + deps = [":TilingInterfaceTdFiles"], +) + cc_library( name = "Analysis", srcs = glob( @@ -5790,6 +5815,7 @@ ":LoopLikeInterfaceTdFiles", ":OpBaseTdFiles", ":SideEffectInterfacesTdFiles", + ":TilingInterfaceTdFiles", ":ViewLikeInterfaceTdFiles", ], ) @@ -6045,10 +6071,12 @@ ":MathDialect", ":MemRefDialect", ":Parser", + ":SCFDialect", ":SideEffectInterfaces", ":StandardOps", ":Support", ":TensorDialect", + ":TilingInterface", ":ViewLikeInterface", "//llvm:Support", ], @@ -6129,6 +6157,21 @@ ], ) +cc_library( + name = "TilingInterface", + srcs = ["lib/Interfaces/TilingInterface.cpp"], + hdrs = ["include/mlir/Interfaces/TilingInterface.h"], + includes = ["include"], + deps = [ + ":IR", + ":Support", + ":TensorDialect", + ":TilingInterfaceIncGen", + ":ViewLikeInterface", + "//llvm:Support", + ], +) + td_library( name = "VectorOpsTdFiles", srcs = ["include/mlir/Dialect/Vector/VectorOps.td"],