diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h @@ -392,6 +392,13 @@ OpFoldResult makeComposedFoldedAffineApply(RewriterBase &b, Location loc, AffineExpr expr, ArrayRef operands); +/// Variant of `makeComposedFoldedAffineApply` suitable for multi-result maps. +/// Note that this may create as many affine.apply operations as the map has +/// results given that affine.apply must be single-result. +SmallVector +makeComposedFoldedMultiResultAffineApply(RewriterBase &b, Location loc, + AffineMap map, + ArrayRef operands); /// Returns an AffineMinOp obtained by composing `map` and `operands` with /// AffineApplyOps supplying those operands. @@ -405,16 +412,17 @@ AffineMap map, ArrayRef operands); +/// Constructs an AffineMinOp that computes a maximum across the results of +/// applying `map` to `operands`, then immediately attempts to fold it. If +/// folding results in a constant value, erases all created ops. +OpFoldResult makeComposedFoldedAffineMax(RewriterBase &b, Location loc, + AffineMap map, + ArrayRef operands); + /// Returns the values obtained by applying `map` to the list of values. SmallVector applyMapToValues(OpBuilder &b, Location loc, AffineMap map, ValueRange values); -/// Returns the values obtained by applying `map` to the list of values, which -/// may be known constants. -SmallVector applyMapToValues(RewriterBase &b, Location loc, - AffineMap map, - ArrayRef values); - /// Given an affine map `map` and its input `operands`, this method composes /// into `map`, maps of AffineApplyOps whose results are the values in /// `operands`, iteratively until no more of `operands` are the result of an diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td @@ -1133,7 +1133,7 @@ let extraClassDeclaration = [{ /// Return the flat list of all operand dimension sizes in the order they /// appear in the operands. - SmallVector createFlatListOfOperandDims(OpBuilder &, Location); + SmallVector createFlatListOfOperandDims(OpBuilder &, Location); /// Return the flat list of all operands' static dimension sizes in the /// order they appear in the operands. All operand dimension sizes have to diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -409,7 +409,8 @@ using LoopIndexToRangeIndexMap = DenseMap; std::tuple, LoopIndexToRangeIndexMap> makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map, - ValueRange allShapeSizes, ValueRange allTileSizes); + ArrayRef allShapeSizes, + ArrayRef allTileSizes); /// A description of a multi-size tiling comprising tile sizes and numbers of /// tiles, expressed as Values which may or may not be constant. Multi-size diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -48,6 +48,8 @@ /// Helper function that creates a memref::DimOp or tensor::DimOp depending on /// the type of `source`. Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim); +OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value source, + int64_t dim); /// Given an operation, retrieves the value of each dynamic dimension through /// constructing the necessary DimOp operators. @@ -179,16 +181,17 @@ /// Computes tile offsets, given a list of loop `ivs` and `tileSizes`. In case a /// tile size is zero (i.e., no tiling), the corresponding offset is also zero. -SmallVector computeTileOffsets(OpBuilder &b, Location loc, - ValueRange ivs, ValueRange tileSizes); +SmallVector computeTileOffsets(OpBuilder &b, Location loc, + ArrayRef ivs, + ArrayRef tileSizes); /// Computes tile sizes, given a list of `tileSizes` and dimension /// sizes (`sizeBounds`). In case a tile size is zero (i.e., no tiling), the /// corresponding result size is the corresponding value from `sizeBounds`. /// Note: The returned tile sizes are closed intervals. -SmallVector computeTileSizes(OpBuilder &b, Location loc, - ValueRange tileSizes, - ArrayRef sizeBounds); +SmallVector computeTileSizes(OpBuilder &b, Location loc, + ArrayRef tileSizes, + ArrayRef sizeBounds); /// Returns the list of tensor output types produced when the given structured /// operation `op` is applied to the given `operands`. Note that `operands` are @@ -217,8 +220,9 @@ /// controls whether to omit the partial/boundary tile condition check in cases /// where we statically know that it is unnecessary. Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, - ValueRange tileSizes, AffineMap map, ValueRange lbs, - ValueRange ubs, ValueRange subShapeSizes, + ArrayRef tileSizes, AffineMap map, + ArrayRef lbs, ArrayRef ubs, + ArrayRef subShapeSizes, bool omitPartialTileCheck); /// Creates extract_slice/subview ops for all `valuesToTile` of the given @@ -232,18 +236,20 @@ /// Note that a constant zero in `tileSizes` means no tiling at that implicit /// loop. The number of non-zero values in `tileSizes` should be equal to the /// number of values in `ivs`. -SmallVector makeTiledShapes(OpBuilder &builder, Location loc, - LinalgOp linalgOp, - ArrayRef valuesToTile, - ValueRange ivs, ValueRange tileSizes, - ArrayRef sizeBounds, - bool omitPartialTileCheck); +SmallVector makeTiledShapes(OpBuilder &builder, Location loc, + LinalgOp linalgOp, ValueRange valuesToTile, + ArrayRef ivs, + ArrayRef tileSizes, + ArrayRef sizeBounds, + bool omitPartialTileCheck); /// Add the specified offsets to any `linalg.index` ops contained in the given /// `linalgOp`. The offsets are provided in the same order as iteration space /// dimensions. Null offests are assumed to be zero. -void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef offests); -void offsetIndices(RewriterBase &b, LinalgOp linalgOp, ArrayRef offests); +void offsetIndices(OpBuilder &b, LinalgOp linalgOp, + ArrayRef offests); +void offsetIndices(RewriterBase &b, LinalgOp linalgOp, + ArrayRef offests); using FusableOpDependencesTy = llvm::MapVector< Operation *, diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -790,33 +790,6 @@ values); } -OpFoldResult -mlir::makeComposedFoldedAffineApply(RewriterBase &b, Location loc, - AffineMap map, - ArrayRef operands) { - assert(map.getNumResults() == 1 && "building affine.apply with !=1 result"); - - SmallVector constants; - SmallVector actualValues; - materializeConstants(b, loc, operands, constants, actualValues); - composeAffineMapAndOperands(&map, &actualValues); - OpFoldResult result = createOrFold(b, loc, actualValues, map); - if (result.is()) { - for (Operation *op : constants) - b.eraseOp(op); - } - return result; -} - -OpFoldResult -mlir::makeComposedFoldedAffineApply(RewriterBase &b, Location loc, - AffineExpr expr, - ArrayRef operands) { - return makeComposedFoldedAffineApply( - b, loc, AffineMap::inferFromExprList(ArrayRef{expr}).front(), - operands); -} - /// Composes the given affine map with the given list of operands, pulling in /// the maps from any affine.apply operations that supply the operands. static void composeMultiResultAffineMap(AffineMap &map, @@ -847,6 +820,44 @@ canonicalizeMapAndOperands(&map, &operands); } +OpFoldResult +mlir::makeComposedFoldedAffineApply(RewriterBase &b, Location loc, + AffineMap map, + ArrayRef operands) { + assert(map.getNumResults() == 1 && "building affine.apply with !=1 result"); + + SmallVector constants; + SmallVector actualValues; + materializeConstants(b, loc, operands, constants, actualValues); + composeAffineMapAndOperands(&map, &actualValues); + OpFoldResult result = createOrFold(b, loc, actualValues, map); + + // Constants are always folded into affine min/max because they can be + // represented as constant expressions, so delete them. + for (Operation *op : constants) + b.eraseOp(op); + return result; +} + +OpFoldResult +mlir::makeComposedFoldedAffineApply(RewriterBase &b, Location loc, + AffineExpr expr, + ArrayRef operands) { + return makeComposedFoldedAffineApply( + b, loc, AffineMap::inferFromExprList(ArrayRef{expr}).front(), + operands); +} + +SmallVector mlir::makeComposedFoldedMultiResultAffineApply( + RewriterBase &b, Location loc, AffineMap map, + ArrayRef operands) { + return llvm::to_vector(llvm::map_range( + llvm::seq(0, map.getNumResults()), [&](unsigned i) { + return makeComposedFoldedAffineApply(b, loc, map.getSubMap({i}), + operands); + })); +} + Value mlir::makeComposedAffineMin(OpBuilder &b, Location loc, AffineMap map, ValueRange operands) { SmallVector allOperands = llvm::to_vector(operands); @@ -854,22 +865,36 @@ return b.createOrFold(loc, b.getIndexType(), map, allOperands); } -OpFoldResult -mlir::makeComposedFoldedAffineMin(RewriterBase &b, Location loc, AffineMap map, - ArrayRef operands) { +template +static OpFoldResult makeComposedFoldedMinMax(RewriterBase &b, Location loc, + AffineMap map, + ArrayRef operands) { SmallVector constants; SmallVector actualValues; materializeConstants(b, loc, operands, constants, actualValues); composeMultiResultAffineMap(map, actualValues); OpFoldResult result = - createOrFold(b, loc, actualValues, b.getIndexType(), map); - if (result.is()) { - for (Operation *op : constants) - b.eraseOp(op); - } + createOrFold(b, loc, actualValues, b.getIndexType(), map); + + // Constants are always folded into affine min/max because they can be + // represented as constant expressions, so delete them. + for (Operation *op : constants) + b.eraseOp(op); return result; } +OpFoldResult +mlir::makeComposedFoldedAffineMin(RewriterBase &b, Location loc, AffineMap map, + ArrayRef operands) { + return makeComposedFoldedMinMax(b, loc, map, operands); +} + +OpFoldResult +mlir::makeComposedFoldedAffineMax(RewriterBase &b, Location loc, AffineMap map, + ArrayRef operands) { + return makeComposedFoldedMinMax(b, loc, map, operands); +} + /// Fully compose map with operands and canonicalize the result. /// Return the `createOrFold`'ed AffineApply op. static Value createFoldedComposedAffineApply(OpBuilder &b, Location loc, @@ -896,40 +921,6 @@ return res; } -SmallVector -mlir::applyMapToValues(RewriterBase &b, Location loc, AffineMap map, - ArrayRef values) { - // Materialize constants and keep track of produced operations so we can clean - // them up later. - SmallVector constants; - SmallVector actualValues; - materializeConstants(b, loc, values, constants, actualValues); - - // Compose, fold and construct maps for each result independently because they - // may simplify more effectively. - SmallVector results; - results.reserve(map.getNumResults()); - bool foldedAll = true; - for (auto i : llvm::seq(0, map.getNumResults())) { - AffineMap submap = map.getSubMap({i}); - SmallVector operands = actualValues; - fullyComposeAffineMapAndOperands(&submap, &operands); - canonicalizeMapAndOperands(&submap, &operands); - results.push_back(createOrFold(b, loc, operands, submap)); - if (!results.back().is()) - foldedAll = false; - } - - // If the entire map could be folded, remove the constants that were used in - // the initial ops. - if (foldedAll) { - for (Operation *constant : constants) - b.eraseOp(constant); - } - - return results; -} - // A symbol may appear as a dim in affine.apply operations. This function // canonicalizes dims that are valid symbols into actual symbols. template diff --git a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt --- a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt @@ -16,6 +16,7 @@ LINK_LIBS PUBLIC MLIRAffineDialect MLIRArithmeticDialect + MLIRArithmeticUtils MLIRBufferizationDialect MLIRDialectUtils MLIRInferTypeOpInterface diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp @@ -10,6 +10,7 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" +#include "mlir/Dialect/Arithmetic/Utils/Utils.h" #include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" @@ -486,13 +487,20 @@ return b.createOrFold(loc, source, dim); llvm_unreachable("Expected MemRefType or TensorType"); } +static OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value source, + int64_t dim) { + auto shapedType = source.getType().cast(); + if (!shapedType.hasRank() || shapedType.isDynamicDim(dim)) + return createOrFoldDimOp(b, loc, source, dim); + return b.getIndexAttr(shapedType.getDimSize(dim)); +} -SmallVector LinalgOp::createFlatListOfOperandDims(OpBuilder &b, - Location loc) { - SmallVector res; +SmallVector LinalgOp::createFlatListOfOperandDims(OpBuilder &b, + Location loc) { + SmallVector res; for (OpOperand *opOperand : getInputAndOutputOperands()) { for (int64_t i = 0, e = getRank(opOperand); i < e; ++i) - res.push_back(createOrFoldDimOp(b, loc, opOperand->get(), i)); + res.push_back(createFoldedDimOp(b, loc, opOperand->get(), i)); } return res; } @@ -510,14 +518,13 @@ unsigned numDims = map.getNumDims(), numRes = map.getNumResults(); auto viewSizes = createFlatListOfOperandDims(b, loc); SmallVector res(numDims); - Value zeroVal = b.create(loc, 0); - Value oneVal = b.create(loc, 1); for (unsigned idx = 0; idx < numRes; ++idx) { auto result = map.getResult(idx); if (auto d = result.dyn_cast()) { if (res[d.getPosition()].offset) continue; - res[d.getPosition()] = Range{zeroVal, viewSizes[idx], oneVal}; + res[d.getPosition()] = + Range{b.getIndexAttr(0), viewSizes[idx], b.getIndexAttr(1)}; } } return res; @@ -591,9 +598,11 @@ outputDims.set(resultShapesSubMapPos.first, resultShapesSubMapPos.second); HasAffineDimExprVisitor checkDimExpr(std::move(outputDims)); Location loc = getOperation()->getLoc(); - auto allResultDimValues = - applyMapToValues(b, loc, resultShapesFromInputShapesMap, - createFlatListOfOperandDims(b, loc)); + IRRewriter rewriter(b); + SmallVector allResultDimValues = + makeComposedFoldedMultiResultAffineApply( + rewriter, loc, resultShapesFromInputShapesMap, + createFlatListOfOperandDims(b, loc)); int64_t pos = 0; ArrayRef shapeExprs = resultShapesFromInputShapesMap.getResults(); for (OpOperand *opOperand : getOutputOperands()) { @@ -602,7 +611,8 @@ if (checkDimExpr.visit(shapeExprs[pos])) shapes.push_back(createOrFoldDimOp(b, loc, opOperand->get(), dim)); else - shapes.push_back(allResultDimValues[pos]); + shapes.push_back( + getValueOrCreateConstantIndexOp(b, loc, allResultDimValues[pos])); pos++; } reifiedReturnShapes.emplace_back(std::move(shapes)); diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -630,12 +630,8 @@ // plus low padding sizes. SmallVector newOffsets; for (const auto &p : llvm::zip(lowPads, oldOffsets)) { - Value padValue = getValueOrCreateConstantIndexOp( - rewriter, srcPadOp.getLoc(), std::get<0>(p)); - Value offsetValue = getValueOrCreateConstantIndexOp( - rewriter, insertOp.getLoc(), std::get<1>(p)); - newOffsets.push_back( - applyMapToValues(rewriter, loc, addMap, {offsetValue, padValue})[0]); + newOffsets.push_back(makeComposedFoldedAffineApply( + rewriter, loc, addMap, {std::get<0>(p), std::get<1>(p)})); } SmallVector newSizes; diff --git a/mlir/lib/Dialect/Linalg/Transforms/BubbleUpExtractSlice.cpp b/mlir/lib/Dialect/Linalg/Transforms/BubbleUpExtractSlice.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/BubbleUpExtractSlice.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/BubbleUpExtractSlice.cpp @@ -18,6 +18,7 @@ #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Linalg/Passes.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" using namespace mlir; @@ -88,40 +89,35 @@ } auto linalgLoc = linalgOp.getLoc(); - auto allShapeSizes = + SmallVector allShapeSizes = linalgOp.createFlatListOfOperandDims(rewriter, linalgLoc); AffineMap shapeSizesToLoopsMap = linalgOp.getShapesToLoopsMap(); if (!shapeSizesToLoopsMap) { return rewriter.notifyMatchFailure( linalgOp, "failed to get loops map from shape sizes"); } - auto sizeBounds = applyMapToValues(rewriter, linalgLoc, - shapeSizesToLoopsMap, allShapeSizes); - - auto sliceLoc = sliceOp.getLoc(); - auto offsetVals = getValueOrCreateConstantIndexOp( - rewriter, sliceLoc, sliceOp.getMixedOffsets()); - auto sizeVals = getValueOrCreateConstantIndexOp(rewriter, sliceLoc, - sliceOp.getMixedSizes()); + SmallVector sizeBounds = + makeComposedFoldedMultiResultAffineApply( + rewriter, linalgLoc, shapeSizesToLoopsMap, allShapeSizes); // The offsets and sizes from the slice operation only give you the tile // size of the output. Use that compute the tile sizes and offsets of the // loops. For loops not used to access the output, set the tile sizes to // loop bounds and set the offset to 0. - Value zero = rewriter.create(linalgLoc, 0); - SmallVector tileOffsets(sizeBounds.size(), zero); - SmallVector tileSizes = sizeBounds; + SmallVector tileOffsets(sizeBounds.size(), + rewriter.getIndexAttr(0)); + SmallVector tileSizes = sizeBounds; for (auto const &result : enumerate(indexingMap.getResults())) { unsigned position = result.value().cast().getPosition(); - tileOffsets[position] = offsetVals[result.index()]; - tileSizes[position] = sizeVals[result.index()]; + tileOffsets[position] = sliceOp.getMixedOffsets()[result.index()]; + tileSizes[position] = sliceOp.getMixedSizes()[result.index()]; } SmallVector valuesToTile = linalgOp.getInputAndOutputOperands(); - - SmallVector tiledOperands = makeTiledShapes( - rewriter, linalgLoc, linalgOp, valuesToTile, tileOffsets, tileSizes, - sizeBounds, /*omitPartialTileCheck=*/true); + SmallVector tiledOperands = + makeTiledShapes(rewriter, linalgLoc, linalgOp, valuesToTile, + tileOffsets, tileSizes, sizeBounds, + /*omitPartialTileCheck=*/true); SmallVector resultTensorTypes; for (OpOperand *opOperand : linalgOp.getOutputTensorOperands()) diff --git a/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp b/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp @@ -109,7 +109,9 @@ auto allShapesSizes = cast(op.getOperation()).createFlatListOfOperandDims(b, loc); AffineMap map = op.getShapesToLoopsMap(); - return getAsOpFoldResult(applyMapToValues(b, loc, map, allShapesSizes)); + IRRewriter rewriter(b); + return makeComposedFoldedMultiResultAffineApply(rewriter, loc, map, + allShapesSizes); } /// Helper method to permute the list of `values` based on the `map`. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -113,24 +113,24 @@ /// obtained from the producer itself, since they are not tiled + fused. static LinalgOp fuse(OpBuilder &b, LinalgOp producer, const DenseMap &fusedLoopsAndRanges) { - SmallVector ivs, tileSizes, sizeBounds; - SmallVector loopRanges; + SmallVector ivs, tileSizes, sizeBounds; + SmallVector loopRanges; Location loc = producer.getLoc(); - auto zero = b.create(loc, 0); for (unsigned i = 0, e = producer.getNumLoops(); i < e; ++i) { auto shapeDim = getShapeDefiningLoopRange(producer, i); - Value dim = createOrFoldDimOp(b, loc, shapeDim.shape, shapeDim.dimension); + OpFoldResult dim = + createFoldedDimOp(b, loc, shapeDim.shape, shapeDim.dimension); sizeBounds.push_back(dim); auto it = fusedLoopsAndRanges.find(i); if (it != fusedLoopsAndRanges.end()) { - ivs.push_back(materializeOpFoldResult(b, loc, it->second.offset)); - tileSizes.push_back(materializeOpFoldResult(b, loc, it->second.size)); + ivs.push_back(it->second.offset); + tileSizes.push_back(it->second.size); loopRanges.push_back(it->second); LLVM_DEBUG(llvm::dbgs() << "tiled loop#" << i << " with LoopRange " << loopRanges.back() << "\n"); } else { - tileSizes.push_back(zero); + tileSizes.push_back(b.getIndexAttr(0)); loopRanges.push_back(Range{b.getIndexAttr(0), dim, b.getIndexAttr(1)}); LLVM_DEBUG(llvm::dbgs() << "full loop#" << i << " with LoopRange " << loopRanges.back() << "\n"); @@ -166,10 +166,8 @@ Operation *clonedOp = producer.clone(b, loc, resultTypes, clonedShapes); // Shift all IndexOp results by the tile offset. - SmallVector allIvs; - llvm::transform(loopRanges, std::back_inserter(allIvs), [&](Range range) { - return materializeOpFoldResult(b, loc, range.offset); - }); + SmallVector allIvs = llvm::to_vector( + llvm::map_range(loopRanges, [&](Range range) { return range.offset; })); offsetIndices(b, clonedOp, allIvs); return clonedOp; diff --git a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp @@ -141,30 +141,27 @@ Location loc = producerOp.getLoc(); // Obtain the `producerOp` loop bounds and the `sliceOp` ranges. - SmallVector producerLoopBounds; + SmallVector producerLoopBounds; llvm::transform(producerOp.createLoopRanges(b, loc), - std::back_inserter(producerLoopBounds), [&](Range range) { - return materializeOpFoldResult(b, loc, range.size); - }); + std::back_inserter(producerLoopBounds), + [&](Range range) { return range.size; }); SmallVector sliceOpRanges = sliceOp.getOrCreateRanges(b, loc); // Tile the producer operands given the `sliceOp` ranges. Iterate the // `tiledSliceDimIndices` and store the tile offset and size for the tiled // slice dimension. - auto zero = b.create(loc, 0); - SmallVector tileIvs(producerOp.getNumLoops(), nullptr); - SmallVector tileSizes(producerOp.getNumLoops(), zero); - SmallVector allIvs(producerOp.getNumLoops(), nullptr); + SmallVector tileIvs(producerOp.getNumLoops(), nullptr); + SmallVector tileSizes(producerOp.getNumLoops(), + b.getIndexAttr(0)); + SmallVector allIvs(producerOp.getNumLoops(), nullptr); for (auto it : zip(tiledSliceDimIndices, tiledProducerLoopIndices)) { int64_t tiledSliceDim = std::get<0>(it); int64_t tiledProducerLoop = std::get<1>(it); - tileIvs[tiledProducerLoop] = - materializeOpFoldResult(b, loc, sliceOpRanges[tiledSliceDim].offset); - tileSizes[tiledProducerLoop] = - materializeOpFoldResult(b, loc, sliceOpRanges[tiledSliceDim].size); + tileIvs[tiledProducerLoop] = sliceOpRanges[tiledSliceDim].offset; + tileSizes[tiledProducerLoop] = sliceOpRanges[tiledSliceDim].size; allIvs[tiledProducerLoop] = tileIvs[tiledProducerLoop]; } - erase_value(tileIvs, nullptr); + erase_value(tileIvs, OpFoldResult()); SmallVector tiledOperands = producerOp.getInputAndOutputOperands(); tiledOperands = makeTiledShapes(b, loc, producerOp, tiledOperands, tileIvs, tileSizes, producerLoopBounds, diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -13,6 +13,7 @@ #include #include "PassDetail.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arithmetic/Utils/Utils.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" @@ -36,20 +37,27 @@ #define DEBUG_TYPE "linalg-tiling" -static bool isZero(Value v) { - if (auto cst = v.getDefiningOp()) +static bool isZero(OpFoldResult v) { + if (!v) + return false; + if (auto attr = v.dyn_cast()) { + IntegerAttr intAttr = attr.dyn_cast(); + return intAttr && intAttr.getValue().isZero(); + } + if (auto cst = v.get().getDefiningOp()) return cst.value() == 0; return false; } std::tuple, LoopIndexToRangeIndexMap> mlir::linalg::makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map, - ValueRange allShapeSizes, - ValueRange allTileSizes) { + ArrayRef allShapeSizes, + ArrayRef allTileSizes) { assert(allTileSizes.size() == map.getNumResults()); // Apply `map` to get shape sizes in loop order. - auto shapeSizes = applyMapToValues(b, loc, map, allShapeSizes); - SmallVector tileSizes(allTileSizes.begin(), allTileSizes.end()); + SmallVector shapeSizes = + makeComposedFoldedMultiResultAffineApply(b, loc, map, allShapeSizes); + SmallVector tileSizes(allTileSizes.begin(), allTileSizes.end()); // Traverse the tile sizes, which are in loop order, erase zeros everywhere. LoopIndexToRangeIndexMap loopIndexToRangeIndex; @@ -80,7 +88,7 @@ continue; en.value() = ivs[rangeIndex->second]; } - offsetIndices(b, op, allIvs); + offsetIndices(b, op, getAsOpFoldResult(allIvs)); } /// Asserts that the given index-typed value is strictly positive. If the value @@ -121,14 +129,15 @@ // Find the trip count of the iteration space dimension for which the tile // sizes are computed. - // TODO: update createFlatListOfOperandDims to return OpFoldResults and avoid - // littering by useless constant materialization. - SmallVector allShapes = + SmallVector allShapes = op.createFlatListOfOperandDims(b, b.getLoc()); AffineMap shapesToLoops = op.getShapesToLoopsMap(); - SmallVector loopRanges = - applyMapToValues(b, op.getLoc(), shapesToLoops, allShapes); - Value tripCount = loopRanges[dimension]; + IRRewriter rewriter(b); + SmallVector loopRanges = + makeComposedFoldedMultiResultAffineApply(rewriter, op.getLoc(), + shapesToLoops, allShapes); + Value tripCount = + materializeOpFoldResult(rewriter, op.getLoc(), loopRanges[dimension]); // Compute the tile sizes and the respective numbers of tiles. AffineExpr s0 = b.getAffineSymbolExpr(0); @@ -181,15 +190,6 @@ subsetExtractOp.getMixedSizes(), subsetExtractOp.getMixedStrides()); } -/// Build an `affine_max` of all the `vals`. -static OpFoldResult buildMax(OpBuilder &b, Location loc, - ArrayRef vals) { - SmallVector args = getValueOrCreateConstantIndexOp(b, loc, vals); - return b.createOrFold( - loc, AffineMap::getMultiDimIdentityMap(vals.size(), loc.getContext()), - args); -} - /// Returns true if the maximum tile offset `tileSize * numThreads-1` is less /// than `iterationSize`. static bool canOmitTileOffsetInBoundsCheck(OpFoldResult tileSize, @@ -203,6 +203,24 @@ return *tileSizeConst * (*numThreadsConst - 1) < *iterSizeConst; } +/// Build an `affine_max` of all the `vals`. +static OpFoldResult buildMax(OpBuilder &b, Location loc, + ArrayRef vals) { + IRRewriter rewriter(b); + return makeComposedFoldedAffineMax( + rewriter, loc, + AffineMap::getMultiDimIdentityMap(vals.size(), loc.getContext()), vals); +} + +/// Build an `affine_min` of all the `vals`. +static OpFoldResult buildMin(OpBuilder &b, Location loc, + ArrayRef vals) { + IRRewriter rewriter(b); + return makeComposedFoldedAffineMin( + rewriter, loc, + AffineMap::getMultiDimIdentityMap(vals.size(), loc.getContext()), vals); +} + /// Rewrite a TilingInterface `op` to a tiled `scf.foreach_thread`. The /// tiling is specified by the number of tiles/threads `numThreads` and the /// optional nominal tile size `nominalTileSizes`. If `nominalTilSizes` is @@ -242,7 +260,6 @@ return materializeOpFoldResult(ilocb, ofr); })); - Value zero = b.create(loc, 0); Operation *tiledOp = nullptr; // Create the ForeachThreadOp. We don't use the lambda body-builder @@ -273,9 +290,9 @@ AffineExpr i, j, M, N, O; bindDims(b.getContext(), i, j); bindSymbols(b.getContext(), M, N, O); - Value size = loopRanges[loopIdx].size; - Value offset = loopRanges[loopIdx].offset; - Value threadId = threadIds[threadIdIdx]; + OpFoldResult size = loopRanges[loopIdx].size; + OpFoldResult offset = loopRanges[loopIdx].offset; + OpFoldResult threadId = threadIds[threadIdIdx]; // Symbolic fixed max size per thread. // TODO: floor + 0/1 depending on case for better load-balancing. OpFoldResult tileSizePerThread = @@ -295,9 +312,8 @@ if (!isConstantIntValue(residualTileSize, 0)) { OpFoldResult sizeMinusOffsetPerThread = makeComposedFoldedAffineApply( b, loc, -i + M, {offsetPerThread, size}); - tileSizePerThread = makeComposedFoldedAffineMin( - b, loc, AffineMap::getMultiDimIdentityMap(2, b.getContext()), - ArrayRef{sizeMinusOffsetPerThread, tileSizePerThread}); + tileSizePerThread = + buildMin(b, loc, {sizeMinusOffsetPerThread, tileSizePerThread}); } tiledOffsets.push_back(offsetPerThread); @@ -305,7 +321,8 @@ if (!omitTileOffsetBoundsCheck && !canOmitTileOffsetInBoundsCheck(tileSizePerThread, nonZeroNumThreads[threadIdIdx], size)) - tileSizePerThread = buildMax(b, loc, {zero, tileSizePerThread}); + tileSizePerThread = + buildMax(b, loc, {b.getIndexAttr(0), tileSizePerThread}); tiledSizes.push_back(tileSizePerThread); ++threadIdIdx; @@ -380,7 +397,7 @@ template static FailureOr -tileLinalgOpImpl(RewriterBase &b, LinalgOp op, ValueRange tileSizes, +tileLinalgOpImpl(RewriterBase &b, LinalgOp op, ArrayRef tileSizes, const LinalgTilingOptions &options) { auto nLoops = op.getNumLoops(); // Initial tile sizes may be too big, only take the first nLoops. @@ -395,7 +412,8 @@ } // 1. Build the tiled loop ranges. - auto allShapeSizes = op.createFlatListOfOperandDims(b, op.getLoc()); + SmallVector allShapeSizes = + op.createFlatListOfOperandDims(b, op.getLoc()); AffineMap shapeSizesToLoopsMap = op.getShapesToLoopsMap(); if (!shapeSizesToLoopsMap) return failure(); @@ -460,11 +478,14 @@ static_cast(op.getNumInputsAndOutputs()) && "expect the number of operands and inputs and outputs to match"); SmallVector valuesToTile = operandValuesToUse; - auto sizeBounds = - applyMapToValues(b, loc, shapeSizesToLoopsMap, allShapeSizes); - SmallVector tiledOperands = - makeTiledShapes(b, loc, op, valuesToTile, interchangedIvs, tileSizes, - sizeBounds, /*omitPartialTileCheck=*/false); + IRRewriter rewriter(b); + SmallVector sizeBounds = + makeComposedFoldedMultiResultAffineApply( + rewriter, loc, shapeSizesToLoopsMap, allShapeSizes); + SmallVector tiledOperands = makeTiledShapes( + b, loc, op, valuesToTile, getAsOpFoldResult(interchangedIvs), tileSizes, + sizeBounds, + /*omitPartialTileCheck=*/false); SmallVector resultTensorTypes = getTensorOutputTypes(op, tiledOperands); @@ -518,11 +539,10 @@ // dimension. This convention is significantly simpler to handle instead of // adjusting affine maps to account for missing dimensions. auto nLoops = op.getNumLoops(); - SmallVector tileSizeVector = - options.tileSizeComputationFunction(b, op); + SmallVector tileSizeVector = + getAsOpFoldResult(options.tileSizeComputationFunction(b, op)); if (tileSizeVector.size() < nLoops) { - auto zero = b.create(op.getLoc(), 0); - tileSizeVector.append(nLoops - tileSizeVector.size(), zero); + tileSizeVector.append(nLoops - tileSizeVector.size(), b.getIndexAttr(0)); } return tileLinalgOpImpl(b, op, tileSizeVector, options); @@ -555,24 +575,22 @@ newPadOp = cast(builder.clone(*op.getOperation())); // Get rank and tile sizes. int64_t rank = op.getResultType().getRank(); - SmallVector tileSizes = - options.tileSizeComputationFunction(builder, op); + SmallVector tileSizes = + getAsOpFoldResult(options.tileSizeComputationFunction(builder, op)); // Normalize untiled padding dimensions to 0. - Value zero = builder.create(loc, 0); - tileSizes.append(rank - tileSizes.size(), zero); + tileSizes.append(rank - tileSizes.size(), builder.getIndexAttr(0)); // Compute lower and upper bounds of the loop nest. TilingInterface tilingInterface = dyn_cast(op.getOperation()); SmallVector ranges = tilingInterface.getIterationDomain(builder); - SmallVector lbs, dims, allDims, steps; + SmallVector lbs, dims, steps; + SmallVector allDims; for (int64_t i = 0; i < rank; ++i) { - Value materializedSize = - materializeOpFoldResult(builder, loc, ranges[i].size); - allDims.push_back(materializedSize); + allDims.push_back(ranges[i].size); if (!isZero(tileSizes[i])) { lbs.push_back(materializeOpFoldResult(builder, loc, ranges[i].offset)); - dims.push_back(materializedSize); - steps.push_back(tileSizes[i]); + dims.push_back(materializeOpFoldResult(builder, loc, ranges[i].size)); + steps.push_back(materializeOpFoldResult(builder, loc, tileSizes[i])); } } // Generate loop nest: One loop per dimension. @@ -583,9 +601,11 @@ [&](OpBuilder &b, Location loc, ValueRange localIvs, ValueRange iterArgs) -> scf::ValueVector { // Compute offsets and sizes of ExtractSliceOp. - SmallVector offsets = - computeTileOffsets(b, loc, localIvs, tileSizes); - SmallVector sizes = computeTileSizes(b, loc, tileSizes, allDims); + SmallVector localIVVector = llvm::to_vector(localIvs); + SmallVector offsets = computeTileOffsets( + b, loc, getAsOpFoldResult(localIVVector), tileSizes); + SmallVector sizes = + computeTileSizes(b, loc, tileSizes, allDims); // Create ExtractSliceOp: Extract a tile from the tensor::PadOp. // Note: The tensor::PadOp is located outside of the loop nest. It is // later moved inside by ExtractSliceOfPadTensorSwapPattern. diff --git a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp @@ -32,7 +32,7 @@ LinalgOpTy> { /// Return the destination operands. SmallVector getDestinationOperands(Operation *op, OpBuilder &b) const { - return llvm::cast(op).getOutputOperands(); + return cast(op).getOutputOperands(); } /// Return the loop iterator type. @@ -50,13 +50,16 @@ b.setInsertionPoint(op); Location loc = op->getLoc(); LinalgOp linalgOp = cast(op); - auto allShapesSizes = linalgOp.createFlatListOfOperandDims(b, loc); + SmallVector allShapesSizes = + linalgOp.createFlatListOfOperandDims(b, loc); AffineMap map = linalgOp.getShapesToLoopsMap(); - Value zero = b.create(loc, 0); - Value one = b.create(loc, 1); - return llvm::to_vector(llvm::map_range( - applyMapToValues(b, loc, map, allShapesSizes), [&](Value v) { - return Range{zero, v, one}; + + IRRewriter rewriter(b); + return llvm::to_vector( + llvm::map_range(map.getResults(), [&](AffineExpr loopExpr) { + OpFoldResult ofr = makeComposedFoldedAffineApply( + rewriter, loc, loopExpr, allShapesSizes); + return Range{b.getIndexAttr(0), ofr, b.getIndexAttr(1)}; })); } @@ -71,11 +74,8 @@ Location loc = op->getLoc(); LinalgOp linalgOp = cast(op); SmallVector valuesToTile = linalgOp.getInputAndOutputOperands(); - SmallVector offsetValues = - getValueOrCreateConstantIndexOp(b, loc, offsets); SmallVector tiledOperands = makeTiledShapes( - b, loc, linalgOp, valuesToTile, offsetValues, - getValueOrCreateConstantIndexOp(b, loc, sizes), {}, true); + b, loc, linalgOp, valuesToTile, offsets, sizes, {}, true); SmallVector resultTensorTypes = llvm::to_vector(llvm::map_range( linalgOp.getOutputTensorOperands(), [&](OpOperand *opOperand) { @@ -84,7 +84,7 @@ Operation *tiledOp = linalgOp.clone(b, loc, resultTensorTypes, tiledOperands); - offsetIndices(b, cast(tiledOp), offsetValues); + offsetIndices(b, cast(tiledOp), offsets); return {tiledOp}; } @@ -102,28 +102,16 @@ AffineExpr d0; bindDims(b.getContext(), d0); - - auto fullyComposeAffineMapAndOperands = [](OpBuilder &builder, Location loc, - AffineExpr expr, - ValueRange operands) -> Value { - AffineMap map = AffineMap::inferFromExprList({expr}).front(); - SmallVector normalizedOperands(operands.begin(), operands.end()); - mlir::fullyComposeAffineMapAndOperands(&map, &normalizedOperands); - canonicalizeMapAndOperands(&map, &normalizedOperands); - return builder.createOrFold(loc, map, normalizedOperands); - }; - - SmallVector sizeVals = - getValueOrCreateConstantIndexOp(b, loc, sizes); - SmallVector subShapeSizes = - llvm::to_vector(llvm::map_range(sizeVals, [&](Value v) { - return fullyComposeAffineMapAndOperands(b, loc, d0 - 1, v); + IRRewriter rewriter(b); + SmallVector subShapeSizes = + llvm::to_vector(llvm::map_range(sizes, [&](OpFoldResult ofr) { + return makeComposedFoldedAffineApply(rewriter, loc, d0 - 1, ofr); })); + OpOperand *outOperand = linalgOp.getOutputOperand(resultNumber); Value sliceOpResult = - makeTiledShape(b, loc, outOperand->get(), sizeVals, - linalgOp.getTiedIndexingMap(outOperand), - getValueOrCreateConstantIndexOp(b, loc, offsets), + makeTiledShape(b, loc, outOperand->get(), sizes, + linalgOp.getTiedIndexingMap(outOperand), offsets, /*ubs*/ {}, subShapeSizes, true); auto sliceOp = sliceOpResult.getDefiningOp(); if (!sliceOp) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -142,14 +142,18 @@ if (!linalgOp) return tileSizes; Location loc = linalgOp.getLoc(); - auto allShapeSizes = linalgOp.createFlatListOfOperandDims(b, loc); + SmallVector allShapeSizes = + linalgOp.createFlatListOfOperandDims(b, loc); AffineMap map = linalgOp.getShapesToLoopsMap(); if (!map) return tileSizes; - auto shapeSizes = applyMapToValues(b, loc, map, allShapeSizes); + IRRewriter rewriter(b); + SmallVector shapeSizes = + makeComposedFoldedMultiResultAffineApply(rewriter, loc, map, + allShapeSizes); // If the shape size is dynamic, tile by 1. Otherwise, do not tile (tile // size 0). - for (Value shapeSize : shapeSizes) + for (OpFoldResult shapeSize : shapeSizes) tileSizes.push_back(getConstantIntValue(shapeSize) ? b.create(loc, 0) : b.create(loc, 1)); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -42,8 +42,14 @@ using namespace mlir::linalg; using namespace mlir::scf; -static bool isZero(Value v) { - if (auto cst = v.getDefiningOp()) +static bool isZero(OpFoldResult v) { + if (!v) + return false; + if (auto attr = v.dyn_cast()) { + IntegerAttr intAttr = attr.dyn_cast(); + return intAttr && intAttr.getValue().isZero(); + } + if (auto cst = v.get().getDefiningOp()) return cst.value() == 0; return false; } @@ -59,7 +65,7 @@ // `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0] // struct TileCheck : public AffineExprVisitor { - TileCheck(ValueRange tileSizes) : tileSizes(tileSizes) {} + TileCheck(ArrayRef tileSizes) : tileSizes(tileSizes) {} void visitDimExpr(AffineDimExpr expr) { isTiled |= !isZero(tileSizes[expr.getPosition()]); @@ -72,12 +78,12 @@ "nonpositive multiplying coefficient"); } bool isTiled = false; - ValueRange tileSizes; + ArrayRef tileSizes; }; } // namespace -static bool isTiled(AffineExpr expr, ValueRange tileSizes) { +static bool isTiled(AffineExpr expr, ArrayRef tileSizes) { if (!expr) return false; TileCheck t(tileSizes); @@ -86,7 +92,7 @@ } // Checks whether the `map varies with respect to a non-zero `tileSize`. -static bool isTiled(AffineMap map, ValueRange tileSizes) { +static bool isTiled(AffineMap map, ArrayRef tileSizes) { if (!map) return false; for (unsigned r = 0; r < map.getNumResults(); ++r) @@ -201,6 +207,14 @@ llvm_unreachable("Expected MemRefType or TensorType"); } +OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value source, + int64_t dim) { + auto shapedType = source.getType().cast(); + if (!shapedType.hasRank() || shapedType.isDynamicDim(dim)) + return createOrFoldDimOp(b, loc, source, dim); + return b.getIndexAttr(shapedType.getDimSize(dim)); +} + /// Given an operation, retrieves the value of each dynamic dimension through /// constructing the necessary DimOp operators. SmallVector getDynOperands(Location loc, Value val, OpBuilder &b) { @@ -788,18 +802,10 @@ assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops"); } -static Value fullyComposeAndAffineApply(OpBuilder &b, Location loc, - AffineExpr expr, ValueRange operands) { - AffineMap map = AffineMap::inferFromExprList({expr}).front(); - SmallVector normalizedOperands(operands.begin(), operands.end()); - mlir::fullyComposeAffineMapAndOperands(&map, &normalizedOperands); - canonicalizeMapAndOperands(&map, &normalizedOperands); - return b.createOrFold(loc, map, normalizedOperands); -} - Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, - ValueRange tileSizes, AffineMap map, ValueRange lbs, - ValueRange ubs, ValueRange subShapeSizes, + ArrayRef tileSizes, AffineMap map, + ArrayRef lbs, ArrayRef ubs, + ArrayRef subShapeSizes, bool omitPartialTileCheck) { auto shapedType = valueToTile.getType().dyn_cast(); assert(shapedType && "only shaped types can be tiled"); @@ -815,8 +821,8 @@ LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: for dim#" << r); if (!isTiled(map.getSubMap({r}), tileSizes)) { offsets.push_back(builder.getIndexAttr(0)); - Value dim = createOrFoldDimOp(builder, loc, valueToTile, r); - sizes.push_back(getAsOpFoldResult(dim)); + OpFoldResult dim = createFoldedDimOp(builder, loc, valueToTile, r); + sizes.push_back(dim); strides.push_back(builder.getIndexAttr(1)); LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n"); continue; @@ -827,14 +833,15 @@ // (i.e. the op does not subsample, stepping occurs in the loop). auto m = map.getSubMap({r}); LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: submap: " << m << "\n"); - auto offset = applyMapToValues(builder, loc, m, lbs).front(); - offsets.push_back(getAsOpFoldResult(offset)); - auto closedIntSize = - applyMapToValues(builder, loc, m, subShapeSizes).front(); + IRRewriter rewriter(builder); + OpFoldResult offset = makeComposedFoldedAffineApply(rewriter, loc, m, lbs); + offsets.push_back(offset); + OpFoldResult closedIntSize = + makeComposedFoldedAffineApply(rewriter, loc, m, subShapeSizes); // Resulting size needs to be made half open interval again. AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext()); - Value size = - fullyComposeAndAffineApply(builder, loc, s0 + 1, closedIntSize); + OpFoldResult size = + makeComposedFoldedAffineApply(rewriter, loc, s0 + 1, closedIntSize); LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: raw size: " << size << "\n"); LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new offset: " << offset << "\n"); @@ -844,7 +851,7 @@ // We statically know that the partial/boundary tile condition is // unnecessary. LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n"); - sizes.push_back(getAsOpFoldResult(size)); + sizes.push_back(size); continue; } @@ -854,10 +861,10 @@ // b. The subshape size is 1. According to the way the loops are set up, // tensors with "0" dimensions would never be constructed. int64_t shapeSize = shape[r]; - auto sizeCst = size.getDefiningOp(); - auto hasTileSizeOne = sizeCst && sizeCst.value() == 1; + Optional sizeCst = getConstantIntValue(size); + auto hasTileSizeOne = sizeCst && *sizeCst == 1; auto dividesEvenly = sizeCst && !ShapedType::isDynamic(shapeSize) && - ((shapeSize % sizeCst.value()) == 0); + ((shapeSize % *sizeCst) == 0); if (!hasTileSizeOne && !dividesEvenly) { LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize << ", size: " << size @@ -878,25 +885,25 @@ AffineMap plusOneMap = AffineMap::inferFromExprList({ArrayRef{dim0 + 1}}) .front(); - auto maxIndices = llvm::to_vector<8>(llvm::map_range(ubs, [&](Value ub) { - return makeComposedAffineApply(builder, loc, minusOneMap, {ub}) - .getResult(); - })); - Value maxIndex = applyMapToValues(builder, loc, m, maxIndices).front(); - Value d = makeComposedAffineApply(builder, loc, plusOneMap, {maxIndex}); + SmallVector maxIndices = + llvm::to_vector(llvm::map_range(ubs, [&](OpFoldResult ub) { + return makeComposedFoldedAffineApply(rewriter, loc, minusOneMap, + {ub}); + })); + OpFoldResult maxIndex = + makeComposedFoldedAffineApply(rewriter, loc, m, maxIndices); + OpFoldResult d = + makeComposedFoldedAffineApply(rewriter, loc, plusOneMap, {maxIndex}); // Compute min(dim - offset, size) to avoid out-of-bounds accesses. AffineMap minMap = AffineMap::inferFromExprList( {ArrayRef{dim1 - dim2, dim0}}) .front(); - SmallVector operands{size, d, offset}; - fullyComposeAffineMapAndOperands(&minMap, &operands); - canonicalizeMapAndOperands(&minMap, &operands); - size = builder.create(loc, builder.getIndexType(), minMap, - operands); + size = + makeComposedFoldedAffineMin(rewriter, loc, minMap, {size, d, offset}); } LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n"); - sizes.push_back(getAsOpFoldResult(size)); + sizes.push_back(size); } auto *sliceOp = TypeSwitch(shapedType) @@ -914,31 +921,31 @@ return sliceOp->getResult(0); } -SmallVector computeTileOffsets(OpBuilder &b, Location loc, - ValueRange ivs, ValueRange tileSizes) { - SmallVector offsets; +SmallVector computeTileOffsets(OpBuilder &b, Location loc, + ArrayRef ivs, + ArrayRef tileSizes) { + SmallVector offsets; for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) { LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n"); bool isTiled = !isZero(tileSizes[idx]); - offsets.push_back( - isTiled ? ivs[idxIvs++] - : b.create(loc, 0).getResult()); + offsets.push_back(isTiled ? ivs[idxIvs++] : b.getIndexAttr(0)); LLVM_DEBUG(llvm::dbgs() << "computeTileOffsets: " << offsets.back() << "\n"); } return offsets; } -SmallVector computeTileSizes(OpBuilder &b, Location loc, - ValueRange tileSizes, - ArrayRef sizeBounds) { - SmallVector sizes; +SmallVector computeTileSizes(OpBuilder &b, Location loc, + ArrayRef tileSizes, + ArrayRef sizeBounds) { + SmallVector sizes; for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) { bool isTiled = !isZero(tileSizes[idx]); // Before composing, we need to make range a closed interval. - Value size = isTiled ? tileSizes[idx] : sizeBounds[idx]; + OpFoldResult size = isTiled ? tileSizes[idx] : sizeBounds[idx]; AffineExpr d0 = getAffineDimExpr(0, b.getContext()); - sizes.push_back(fullyComposeAndAffineApply(b, loc, d0 - 1, size)); + IRRewriter rewriter(b); + sizes.push_back(makeComposedFoldedAffineApply(rewriter, loc, d0 - 1, size)); LLVM_DEBUG(llvm::dbgs() << "computeTileSizes: " << sizes.back() << "\n"); } return sizes; @@ -981,6 +988,9 @@ Value materializeOpFoldResult(ImplicitLocOpBuilder &builder, OpFoldResult opFoldResult) { + if (!opFoldResult) + return nullptr; + if (auto value = opFoldResult.dyn_cast()) return value; auto attr = opFoldResult.get().cast(); @@ -993,27 +1003,27 @@ return materializeOpFoldResult(b, opFoldResult); } -SmallVector makeTiledShapes(OpBuilder &b, Location loc, - LinalgOp linalgOp, - ArrayRef valuesToTile, - ValueRange ivs, ValueRange tileSizes, - ArrayRef sizeBounds, - bool omitPartialTileCheck) { +SmallVector makeTiledShapes(OpBuilder &b, Location loc, + LinalgOp linalgOp, ValueRange valuesToTile, + ArrayRef ivs, + ArrayRef tileSizes, + ArrayRef sizeBounds, + bool omitPartialTileCheck) { assert(ivs.size() == static_cast(llvm::count_if( llvm::make_range(tileSizes.begin(), tileSizes.end()), - [](Value v) { return !isZero(v); })) && + [](OpFoldResult v) { return !isZero(v); })) && "expected as many ivs as non-zero sizes"); // Construct (potentially temporary) mins and maxes on which to apply maps // that define tile subshapes. - SmallVector lbs = computeTileOffsets(b, loc, ivs, tileSizes); - SmallVector subShapeSizes = + SmallVector lbs = computeTileOffsets(b, loc, ivs, tileSizes); + SmallVector subShapeSizes = computeTileSizes(b, loc, tileSizes, sizeBounds); assert(static_cast(valuesToTile.size()) == linalgOp.getNumInputsAndOutputs() && "expected one value to tile for every operand"); - SmallVector tiledShapes; + SmallVector tiledShapes; tiledShapes.reserve(valuesToTile.size()); for (OpOperand *opOperand : linalgOp.getInputAndOutputOperands()) { Value shapedOp = valuesToTile[opOperand->getOperandNumber()]; @@ -1040,28 +1050,30 @@ return tiledShapes; } -void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef offsets) { +void offsetIndices(OpBuilder &b, LinalgOp linalgOp, + ArrayRef offsets) { IRRewriter rewriter(b); offsetIndices(rewriter, linalgOp, offsets); } void offsetIndices(RewriterBase &b, LinalgOp linalgOp, - ArrayRef offsets) { + ArrayRef offsets) { if (!linalgOp.hasIndexSemantics()) return; for (IndexOp indexOp : linalgOp.getBlock()->getOps()) { - if (indexOp.dim() >= offsets.size() || offsets[indexOp.dim()] == nullptr) + if (indexOp.dim() >= offsets.size() || !offsets[indexOp.dim()]) continue; OpBuilder::InsertionGuard guard(b); b.setInsertionPointAfter(indexOp); AffineExpr index, offset; bindDims(b.getContext(), index, offset); - AffineApplyOp applyOp = makeComposedAffineApply( + OpFoldResult applied = makeComposedFoldedAffineApply( b, indexOp.getLoc(), index + offset, - ValueRange{indexOp.getResult(), offsets[indexOp.dim()]}); - b.replaceOpWithIf(indexOp, applyOp.getResult(), [&](OpOperand &use) { - return use.getOwner() != applyOp; + {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.dim()]}); + Value materialized = materializeOpFoldResult(b, indexOp.getLoc(), applied); + b.replaceOpWithIf(indexOp, materialized, [&](OpOperand &use) { + return use.getOwner() != materialized.getDefiningOp(); }); } } diff --git a/mlir/lib/Dialect/Utils/StaticValueUtils.cpp b/mlir/lib/Dialect/Utils/StaticValueUtils.cpp --- a/mlir/lib/Dialect/Utils/StaticValueUtils.cpp +++ b/mlir/lib/Dialect/Utils/StaticValueUtils.cpp @@ -52,6 +52,8 @@ /// Given a value, try to extract a constant Attribute. If this fails, return /// the original value. OpFoldResult getAsOpFoldResult(Value val) { + if (!val) + return OpFoldResult(); Attribute attr; if (matchPattern(val, m_Constant(&attr))) return attr; diff --git a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir --- a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir +++ b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir @@ -65,7 +65,7 @@ return %3, %4 : index, index } // CHECK: #[[MAP0:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> -// CHECK: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s1 - s0)> +// CHECK: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (-s0 + s1)> // CHECK: func @remove_dim_result_uses // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -196,7 +196,7 @@ // CHECK: #[[BOUND8_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s1, -d0 + s0, 8)> // CHECK: #[[BOUND16_MAP:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 16)> // CHECK: #[[X2_MAP:.+]] = affine_map<(d0) -> (d0 * 2)> -// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d1 * -2 + s0 + s1 * 2 - 2, d0 * 2 + s0 - 2)> +// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * -2 + s0 * 2 + s1 - 2, d1 * 2 + s1 - 2)> // CHECK: #[[BOUND16_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s1, -d0 + s0, 16)> // CHECK: #[[BOUND4_MAP:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)> // CHECK: #[[BOUND2_MAP:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> @@ -234,13 +234,13 @@ // CHECK-NEXT: scf.for %[[IV1:.+]] = %{{.+}} to %[[ELEM_OH]] // CHECK-NEXT: %[[SIZE_ELEM_OH:.+]] = affine.min #[[BOUND16_MAP]](%[[IV1]])[%[[ELEM_OH]]] // CHECK-NEXT: %[[OFFSET_OH:.+]] = affine.apply #[[X2_MAP]](%[[IV1]]) -// CHECK-NEXT: %[[SIZE_INPUT_H:.+]] = affine.min #[[INPUT_BOUND]](%[[SIZE_ELEM_OH]], %[[IV1]])[%[[FILTER_H]], %[[FILL_H]]] +// CHECK-NEXT: %[[SIZE_INPUT_H:.+]] = affine.min #[[INPUT_BOUND]](%[[IV1]], %[[SIZE_ELEM_OH]])[%[[FILL_H]], %[[FILTER_H]]] // CHECK-NEXT: %[[SIZE_ELEM_OH_2:.+]] = affine.min #[[BOUND16_MAP_2]](%[[IV1]])[%[[FILL_H]], %[[ELEM_OH]]] // CHECK-NEXT: scf.for %[[IV2:.+]] = %{{.+}} to %[[ELEM_OW]] // CHECK-NEXT: %[[SIZE_ELEM_OW:.+]] = affine.min #[[BOUND4_MAP]](%[[IV2]])[%[[ELEM_OW]]] // CHECK-NEXT: %[[SIZE_ELEM_OC:.+]] = affine.min #[[BOUND2_MAP]](%[[IV2]])[%[[ELEM_OC]]] // CHECK-NEXT: %[[OFFSET_OW:.+]] = affine.apply #[[X2_MAP]](%[[IV2]]) -// CHECK-NEXT: %[[SIZE_INPUT_W:.+]] = affine.min #[[INPUT_BOUND]](%[[SIZE_ELEM_OW]], %[[IV2]])[%[[FILTER_W]], %[[FILL_W]]] +// CHECK-NEXT: %[[SIZE_INPUT_W:.+]] = affine.min #[[INPUT_BOUND]](%[[IV2]], %[[SIZE_ELEM_OW]])[%[[FILL_W]], %[[FILTER_W]]] // CHECK-NEXT: %[[ST_INPUT:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], %[[OFFSET_OH]], %[[OFFSET_OW]], 0] // CHECK-SAME: [%[[SIZE_INPUT_N]], %[[SIZE_INPUT_H]], %[[SIZE_INPUT_W]], %[[INPUT_C]]] // CHECK-NEXT: %[[SIZE_ELEM_OW_2:.+]] = affine.min #[[BOUND4_MAP_2]](%[[IV2]])[%[[FILL_W]], %[[ELEM_OW]]] diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir --- a/mlir/test/Dialect/Linalg/tile-conv.mlir +++ b/mlir/test/Dialect/Linalg/tile-conv.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3" | FileCheck %s -// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s0 + 1)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s0 + 2)> +// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s1 + 1)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s1 + 2)> // CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> // CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 3)> @@ -24,8 +24,8 @@ // CHECK-DAG: %[[T3:.*]] = memref.dim %[[ARG2]], %[[C1]] // CHECK: scf.for %[[ARG3:.*]] = %[[C0]] to %[[T2]] step %[[C2]] // CHECK: scf.for %[[ARG4:.*]] = %[[C0]] to %[[T3]] step %[[C3]] -// CHECK: %[[T4:.*]] = affine.min #[[MAP0]](%[[ARG3]])[%[[T0]], %[[T2]]] -// CHECK: %[[T5:.*]] = affine.min #[[MAP1]](%[[ARG4]])[%[[T1]], %[[T3]]] +// CHECK: %[[T4:.*]] = affine.min #[[MAP0]](%[[ARG3]])[%[[T2]], %[[T0]]] +// CHECK: %[[T5:.*]] = affine.min #[[MAP1]](%[[ARG4]])[%[[T3]], %[[T1]]] // CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]] [%[[T4]], %[[T5]]] // CHECK: %[[T6:.*]] = affine.min #[[MAP2]](%[[ARG3]])[%[[T2]] // CHECK: %[[T7:.*]] = affine.min #[[MAP3]](%[[ARG4]])[%[[T3]]] diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir --- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir @@ -61,15 +61,14 @@ %five = arith.constant 5.0 : f32 %init = linalg.init_tensor [12, 25] : tensor<12x25xf32> -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C5:.+]] = arith.constant 5 : index -// CHECK-DAG: %[[C7:.+]] = arith.constant 7 : index - // CHECK: %[[INIT:.+]] = linalg.init_tensor [12, 25] +// CHECK-DAG: %[[C5:.+]] = arith.constant 5 : index +// CHECK-DAG: %[[C7:.+]] = arith.constant 7 : index // CHECK: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %[[C5]] iter_args(%[[FOR_ARG0:.+]] = %[[INIT]]) // CHECK: scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %[[C7]] iter_args(%[[FOR_ARG1:.+]] = %[[FOR_ARG0]]) // CHECK: %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]] // CHECK: %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE0]] : tensor) +// CHECK: %[[C4:.+]] = arith.constant 4 : index // CHECK: scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %[[C4]] iter_args(%[[FOR_ARG2:.+]] = %[[FILL]]) // CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[INPUT]] // CHECK: %[[OUT_SLICE2:.+]] = tensor.extract_slice %[[FOR_ARG2]][0, 0]