diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -86,7 +86,27 @@ bool isFusableInto(const LinalgDependenceGraph &graph, LinalgOp consumer, Value consumedView, LinalgOp producer); -/// Creates extract_slice/subview ops for all `tiledOperands` of the given +/// Compute tile offsets, given a list of loop `ivs` and `tileSizes`. In case a +/// tile size is zero (i.e., no tiling), the corresponding offset is also zero. +SmallVector computeTileOffsets(OpBuilder &b, Location loc, + ValueRange ivs, ValueRange tileSizes); + +/// Compute tile sizes, given a list of loop `ivs`, `tileSizes` and dimension +/// sizes (`sizeBounds`). In case a tile size is zero (i.e., no tiling), the +/// corresponding result size is the corresponding value from `sizeBounds`. +/// Note: The returned tile sizes are closed intervals. +SmallVector computeTileSizes(OpBuilder &b, Location loc, ValueRange ivs, + ValueRange tileSizes, + ArrayRef sizeBounds); + +/// Creates an extract_slice/subview op for a single `valueToTile` with +/// `builder`. This new operation extracts a tile of `valueToTile`, starting +/// at offsets `lbs` and with sizes `subShapeSizes`. +Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, + ValueRange tileSizes, AffineMap map, ValueRange lbs, + ValueRange subShapeSizes); + +/// Creates extract_slice/subview ops for all `valuesToTile` of the given /// `linalgOp` with `builder`, assuming `linalgOp` is being fused into a loop /// nest for tiling with the given induction variables `ivs` and tile sizes /// `tileSizes`. `sizeBounds` are the iteration space bounds for *all* the @@ -97,7 +117,7 @@ /// number of values in `ivs`. SmallVector makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, - ArrayRef tiledOperands, + ArrayRef valuesToTile, ValueRange ivs, ValueRange tileSizes, ArrayRef sizeBounds); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -532,6 +532,112 @@ assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops"); } +Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, + ValueRange tileSizes, AffineMap map, ValueRange lbs, + ValueRange subShapeSizes) { + auto shapedType = valueToTile.getType().dyn_cast(); + assert(shapedType && "only shaped types can be tiled"); + ArrayRef shape = shapedType.getShape(); + int64_t rank = shapedType.getRank(); + + // Construct a new subview / extract_slice for the tile. + SmallVector offsets, sizes, strides; + offsets.reserve(rank); + sizes.reserve(rank); + strides.reserve(rank); + for (unsigned r = 0; r < rank; ++r) { + LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: for dim#" << r); + if (!isTiled(map.getSubMap({r}), tileSizes)) { + offsets.push_back(builder.getIndexAttr(0)); + Value dim = createOrFoldDimOp(builder, loc, valueToTile, r); + sizes.push_back(dim); + strides.push_back(builder.getIndexAttr(1)); + LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n"); + continue; + } + LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subsize...\n"); + + // Tiling creates a new slice at the proper index, the slice step is 1 + // (i.e. the op does not subsample, stepping occurs in the loop). + auto m = map.getSubMap({r}); + LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: submap: " << m << "\n"); + auto offset = applyMapToValues(builder, loc, m, lbs).front(); + offsets.push_back(offset); + auto closedIntSize = + applyMapToValues(builder, loc, m, subShapeSizes).front(); + // Resulting size needs to be made half open interval again. + AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext()); + Value size = makeComposedAffineApply(builder, loc, s0 + 1, closedIntSize); + LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: raw size: " << size << "\n"); + + // The size of the subview / extract_slice should be trimmed to avoid + // out-of-bounds accesses, unless we statically know the subshape size + // divides the shape size evenly. + int64_t shapeSize = shape[r]; + auto sizeCst = size.getDefiningOp(); + if (ShapedType::isDynamic(shapeSize) || !sizeCst || + (shapeSize % sizeCst.getValue()) != 0) { + LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize + << ", size: " << size + << ": make sure in bound with affine.min\n"); + AffineExpr dim0, dim1, dim2; + bindDims(builder.getContext(), dim0, dim1, dim2); + // Compute min(size, dim - offset) to avoid out-of-bounds accesses. + AffineMap minMap = + AffineMap::inferFromExprList( + ArrayRef>{{dim0, dim1 - dim2}}) + .front(); + Value d = createOrFoldDimOp(builder, loc, valueToTile, r); + SmallVector operands{size, d, offset}; + fullyComposeAffineMapAndOperands(&minMap, &operands); + size = builder.create(loc, builder.getIndexType(), minMap, + operands); + } + + sizes.push_back(size); + LLVM_DEBUG(llvm::dbgs() + << "makeTiledShape: new offset: " << offset << "\n"); + LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n"); + strides.push_back(builder.getIndexAttr(1)); + } + + Operation *sliceOp = shapedType.isa() + ? builder.create( + loc, valueToTile, offsets, sizes, strides) + : builder.create( + loc, valueToTile, offsets, sizes, strides); + return sliceOp->getResult(0); +} + +SmallVector computeTileOffsets(OpBuilder &b, Location loc, + ValueRange ivs, ValueRange tileSizes) { + SmallVector offsets; + for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) { + LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n"); + bool isTiled = !isZero(tileSizes[idx]); + offsets.push_back(isTiled ? ivs[idxIvs++] + : b.create(loc, 0).getResult()); + LLVM_DEBUG(llvm::dbgs() + << "computeTileOffsets: " << offsets.back() << "\n"); + } + return offsets; +} + +SmallVector computeTileSizes(OpBuilder &b, Location loc, ValueRange ivs, + ValueRange tileSizes, + ArrayRef sizeBounds) { + SmallVector sizes; + for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) { + bool isTiled = !isZero(tileSizes[idx]); + // Before composing, we need to make range a closed interval. + Value size = isTiled ? tileSizes[idx] : sizeBounds[idx]; + AffineExpr d0 = getAffineDimExpr(0, b.getContext()); + sizes.push_back(makeComposedAffineApply(b, loc, d0 - 1, size)); + LLVM_DEBUG(llvm::dbgs() << "computeTileSizes: " << sizes.back() << "\n"); + } + return sizes; +} + SmallVector makeTiledShapes(OpBuilder &b, Location loc, LinalgOp linalgOp, ArrayRef valuesToTile, @@ -544,31 +650,18 @@ // Construct (potentially temporary) mins and maxes on which to apply maps // that define tile subshapes. - SmallVector lbs, subShapeSizes; - for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) { - LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n"); - bool isTiled = !isZero(tileSizes[idx]); - lbs.push_back(isTiled ? ivs[idxIvs++] - : (Value)b.create(loc, 0)); - // Before composing, we need to make range a closed interval. - Value size = isTiled ? tileSizes[idx] : sizeBounds[idx]; - AffineExpr d0 = getAffineDimExpr(0, b.getContext()); - subShapeSizes.push_back(makeComposedAffineApply(b, loc, d0 - 1, size)); - LLVM_DEBUG(llvm::dbgs() << "lb: " << lbs.back() << "\n"); - LLVM_DEBUG(llvm::dbgs() << "size: " << subShapeSizes.back() << "\n"); - } + SmallVector lbs = computeTileOffsets(b, loc, ivs, tileSizes); + SmallVector subShapeSizes = + computeTileSizes(b, loc, ivs, tileSizes, sizeBounds); assert(static_cast(valuesToTile.size()) == linalgOp.getNumInputsAndOutputs() && "expected one value to tile for every operand"); - MLIRContext *context = b.getContext(); SmallVector tiledShapes; tiledShapes.reserve(valuesToTile.size()); for (OpOperand *opOperand : linalgOp.getInputAndOutputOperands()) { Value shapedOp = valuesToTile[opOperand->getOperandNumber()]; LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp); - int64_t rank = linalgOp.getRank(opOperand); - ArrayRef shape = linalgOp.getShape(opOperand); AffineMap map = linalgOp.getTiedIndexingMap(opOperand); // If the shape is not tiled, we can use it as is. if (!isTiled(map, tileSizes)) { @@ -579,71 +672,8 @@ } LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n"); - // Construct a new subview / extract_slice for the tile. - SmallVector offsets, sizes, strides; - offsets.reserve(rank); - sizes.reserve(rank); - strides.reserve(rank); - for (unsigned r = 0; r < rank; ++r) { - LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for dim#" << r); - if (!isTiled(map.getSubMap({r}), tileSizes)) { - offsets.push_back(b.getIndexAttr(0)); - Value dim = createOrFoldDimOp(b, loc, shapedOp, r); - sizes.push_back(dim); - strides.push_back(b.getIndexAttr(1)); - LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n"); - continue; - } - LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subsize...\n"); - - // Tiling creates a new slice at the proper index, the slice step is 1 - // (i.e. the op does not subsample, stepping occurs in the loop). - auto m = map.getSubMap({r}); - LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: submap: " << map << "\n"); - auto offset = applyMapToValues(b, loc, m, lbs).front(); - offsets.push_back(offset); - auto closedIntSize = applyMapToValues(b, loc, m, subShapeSizes).front(); - // Resulting size needs to be made half open interval again. - AffineExpr s0 = getAffineSymbolExpr(0, b.getContext()); - Value size = makeComposedAffineApply(b, loc, s0 + 1, closedIntSize); - LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: raw size: " << size << "\n"); - - // The size of the subview / extract_slice should be trimmed to avoid - // out-of-bounds accesses, unless we statically know the subshape size - // divides the shape size evenly. - int64_t shapeSize = shape[r]; - auto sizeCst = size.getDefiningOp(); - if (ShapedType::isDynamic(shapeSize) || !sizeCst || - (shapeSize % sizeCst.getValue()) != 0) { - LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: shapeSize=" << shapeSize - << ", size: " << size - << ": make sure in bound with affine.min\n"); - AffineExpr dim0, dim1, dim2; - bindDims(context, dim0, dim1, dim2); - // Compute min(size, dim - offset) to avoid out-of-bounds accesses. - AffineMap minMap = - AffineMap::inferFromExprList( - ArrayRef>{{dim0, dim1 - dim2}}) - .front(); - Value d = createOrFoldDimOp(b, loc, shapedOp, r); - SmallVector operands{size, d, offset}; - fullyComposeAffineMapAndOperands(&minMap, &operands); - size = b.create(loc, b.getIndexType(), minMap, operands); - } - - sizes.push_back(size); - LLVM_DEBUG(llvm::dbgs() - << "makeTiledShapes: new offset: " << offset << "\n"); - LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: new size: " << size << "\n"); - strides.push_back(b.getIndexAttr(1)); - } - - if (opOperand->get().getType().isa()) - tiledShapes.push_back( - b.create(loc, shapedOp, offsets, sizes, strides)); - else - tiledShapes.push_back(b.create( - loc, shapedOp, offsets, sizes, strides)); + tiledShapes.push_back( + makeTiledShape(b, loc, shapedOp, tileSizes, map, lbs, subShapeSizes)); } return tiledShapes;