diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -89,11 +89,30 @@ /// Returns a list of PromotionInfo which hold the promoted buffer and the /// full and partial views indexing into the buffer. // TODO: revisit dynamicBuffers option. -LinalgOp promoteSubViewOperands(OpBuilder &b, LinalgOp op, - llvm::SetVector subViews, - bool dynamicBuffers = false, - int64_t alignment = 0, - OperationFolder *folder = nullptr); +struct LinalgPromotionOptions { + /// Indices of subViews to promote. If `None`, try to promote all operands. + Optional> operandsToPromote = None; + LinalgPromotionOptions &setOperandsToPromote(ArrayRef operands) { + operandsToPromote = DenseSet(); + operandsToPromote->insert(operands.begin(), operands.end()); + return *this; + } + /// Allow the use of dynamicaly-sized buffers. + bool dynamicBuffers = false; + LinalgPromotionOptions &setDynamicBuffers(unsigned dynamic) { + dynamicBuffers = dynamic; + return *this; + } + /// Alignment of promoted buffer. If `None` do not specify alignment. + Optional alignment = None; + LinalgPromotionOptions &setAlignment(unsigned align) { + alignment = align; + return *this; + } +}; +LinalgOp promoteSubViews(OpBuilder &b, LinalgOp op, + LinalgPromotionOptions options, + OperationFolder *folder = nullptr); /// Emit a suitable vector form for a Linalg op with fully static shape. void vectorizeLinalgOp(OpBuilder &builder, Operation *op); @@ -265,27 +284,11 @@ template struct LinalgPromotionPattern : public LinalgBasePromotionPattern { - LinalgPromotionPattern(MLIRContext *context, - ArrayRef operandsToPromote = {}, - unsigned alignment = 0, + LinalgPromotionPattern(MLIRContext *context, LinalgPromotionOptions options, LinalgMarker marker = LinalgMarker(), PatternBenefit benefit = 1) - : LinalgBasePromotionPattern(OpTy::getOperationName(), context, - operandsToPromote, alignment, marker, - benefit) {} - LinalgPromotionPattern(MLIRContext *context, - ArrayRef operandsToPromote, - LinalgMarker marker = LinalgMarker(), - PatternBenefit benefit = 1) - : LinalgPromotionPattern(context, operandsToPromote, 0, marker, benefit) { - } - LinalgPromotionPattern(MLIRContext *context, unsigned alignment, - LinalgMarker marker = LinalgMarker(), - PatternBenefit benefit = 1) - : LinalgPromotionPattern(context, {}, alignment, marker, benefit) {} - LinalgPromotionPattern(MLIRContext *context, LinalgMarker marker, - PatternBenefit benefit = 1) - : LinalgPromotionPattern(context, {}, 0, marker, benefit) {} + : LinalgBasePromotionPattern(OpTy::getOperationName(), context, options, + marker, benefit) {} }; /// @@ -342,8 +345,6 @@ return failure(); if (failed(marker.checkAndNotify(rewriter, linalgOp))) return failure(); - if (failed(promoteSubviewsLinalgOpPrecondition(op))) - return failure(); if (loweringType == LinalgLoweringType::LibraryCall) { // TODO: Move lowering to library calls here. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -45,7 +45,40 @@ #define DEBUG_TYPE "linalg-promotion" -/// If `size` comes from an AffineMinOp and one of the dimensions of AffineMin +namespace { +struct LinalgOpInstancePromotionOptions { + LinalgOpInstancePromotionOptions(LinalgOp op, + const LinalgPromotionOptions &options); + /// SubViews to promote. + SetVector subViews; + /// Allow the use of dynamicaly-sized buffers. + bool dynamicBuffers; + /// Alignment of promoted buffer. + Optional alignment; +}; +} // namespace + +LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions( + LinalgOp linalgOp, const LinalgPromotionOptions &options) + : subViews(), dynamicBuffers(options.dynamicBuffers), + alignment(options.alignment) { + if (options.operandsToPromote.hasValue()) { + for (unsigned idx : options.operandsToPromote.getValue()) { + auto *op = linalgOp.getBuffer(idx).getDefiningOp(); + if (auto sv = dyn_cast_or_null(op)) + subViews.insert(sv); + } + } else { + unsigned nBuffers = linalgOp.getNumInputsAndOutputBuffers(); + for (unsigned idx = 0; idx < nBuffers; ++idx) { + auto *op = linalgOp.getBuffer(idx).getDefiningOp(); + if (auto sv = dyn_cast_or_null(op)) + subViews.insert(sv); + } + } +} + +/// If `size` comes from an AffineMinOp and one of the values of AffineMinOp /// is a constant then return a new value set to the smallest such constant. /// Otherwise return size. static Value extractSmallestConstantBoundingSize(OpBuilder &b, Location loc, @@ -53,25 +86,26 @@ auto affineMinOp = dyn_cast_or_null(size.getDefiningOp()); if (!affineMinOp) return size; - if (!llvm::any_of(affineMinOp.getAffineMap().getResults(), [](AffineExpr e) { - return e.dyn_cast(); - })) - return size; int64_t minConst = std::numeric_limits::max(); for (auto e : affineMinOp.getAffineMap().getResults()) if (auto cst = e.dyn_cast()) minConst = std::min(minConst, cst.getValue()); - assert(minConst != std::numeric_limits::max()); - return b.create(loc, minConst); + return (minConst == std::numeric_limits::max()) + ? size + : b.create(loc, minConst); } +/// Alloc a new buffer of `size`. If `dynamicBuffers` is true allocate exactly +/// the size needed, otherwise try to allocate a static bounding box. static Value allocBuffer(Type elementType, Value size, bool dynamicBuffers, - OperationFolder *folder, int64_t alignment = 0) { + OperationFolder *folder, + Optional alignment = None) { auto *ctx = size.getContext(); auto width = llvm::divideCeil(elementType.getIntOrFloatBitWidth(), 8); IntegerAttr alignment_attr; - if (alignment) - alignment_attr = IntegerAttr::get(IntegerType::get(64, ctx), alignment); + if (alignment.hasValue()) + alignment_attr = + IntegerAttr::get(IntegerType::get(64, ctx), alignment.getValue()); if (!dynamicBuffers) if (auto cst = dyn_cast_or_null(size.getDefiningOp())) return std_alloc( @@ -100,11 +134,11 @@ // To account for general boundary effects, padding must be performed on the // boundary tiles. For now this is done with an unconditional `fill` op followed // by a partial `copy` op. -static PromotionInfo promoteFullTileBuffer(OpBuilder &b, Location loc, - SubViewOp subView, - bool dynamicBuffers, - int64_t alignment, - OperationFolder *folder) { +static PromotionInfo promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, + SubViewOp subView, + bool dynamicBuffers, + Optional alignment, + OperationFolder *folder) { auto zero = folded_std_constant_index(folder, 0); auto one = folded_std_constant_index(folder, 1); @@ -117,8 +151,10 @@ for (auto en : llvm::enumerate(subView.getRanges())) { auto rank = en.index(); auto rangeValue = en.value(); - // Try to extract a tight constant + // Try to extract a tight constant. + LLVM_DEBUG(llvm::dbgs() << "Extract tightest: " << rangeValue.size << "\n"); Value size = extractSmallestConstantBoundingSize(b, loc, rangeValue.size); + LLVM_DEBUG(llvm::dbgs() << "Extracted tightest: " << size << "\n"); allocSize = folded_std_muli(folder, allocSize, size); fullSizes.push_back(size); partialSizes.push_back(folded_std_dim(folder, subView, rank)); @@ -136,26 +172,26 @@ return PromotionInfo{buffer, fullLocalView, partialLocalView}; } -SmallVector -mlir::linalg::promoteSubViews(OpBuilder &b, Location loc, - ArrayRef subViews, bool dynamicBuffers, - int64_t alignment, OperationFolder *folder) { - if (subViews.empty()) +static SmallVector +promoteSubViews(OpBuilder &b, Location loc, + LinalgOpInstancePromotionOptions options, + OperationFolder *folder) { + if (options.subViews.empty()) return {}; ScopedContext scope(b, loc); SmallVector res; - res.reserve(subViews.size()); + res.reserve(options.subViews.size()); DenseMap promotionInfoMap; - for (auto v : subViews) { + for (auto v : options.subViews) { SubViewOp subView = cast(v.getDefiningOp()); - auto promotionInfo = promoteFullTileBuffer(b, loc, subView, dynamicBuffers, - alignment, folder); + auto promotionInfo = promoteSubviewAsNewBuffer( + b, loc, subView, options.dynamicBuffers, options.alignment, folder); promotionInfoMap.insert(std::make_pair(subView.getResult(), promotionInfo)); res.push_back(promotionInfo); } - for (auto v : subViews) { + for (auto v : options.subViews) { SubViewOp subView = cast(v.getDefiningOp()); auto info = promotionInfoMap.find(v); if (info == promotionInfoMap.end()) @@ -172,7 +208,7 @@ linalg_fill(info->second.fullLocalView, fillVal); } - for (auto v : subViews) { + for (auto v : options.subViews) { auto info = promotionInfoMap.find(v); if (info == promotionInfoMap.end()) continue; @@ -182,11 +218,9 @@ return res; } -LinalgOp mlir::linalg::promoteSubViewOperands(OpBuilder &b, LinalgOp op, - SetVector subViews, - bool dynamicBuffers, - int64_t alignment, - OperationFolder *folder) { +static void promoteSubViews(OpBuilder &b, LinalgOp op, + LinalgOpInstancePromotionOptions options, + OperationFolder *folder) { assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); if (auto convOp = dyn_cast(op.getOperation())) { @@ -196,17 +230,15 @@ } // 1. Promote the specified views and use them in the new op. - ScopedContext scope(b, op.getLoc()); - auto promotedBufferAndViews = - promoteSubViews(b, op.getLoc(), subViews.getArrayRef(), dynamicBuffers, - alignment, folder); + auto loc = op.getLoc(); + auto promotedBufferAndViews = promoteSubViews(b, loc, options, folder); SmallVector opViews; opViews.reserve(op.getNumInputsAndOutputs()); SmallVector, 8> writebackViews; - writebackViews.reserve(subViews.size()); + writebackViews.reserve(promotedBufferAndViews.size()); unsigned promotedIdx = 0; for (auto view : op.getInputsAndOutputBuffers()) { - if (subViews.count(view) != 0) { + if (options.subViews.count(view) != 0) { opViews.push_back(promotedBufferAndViews[promotedIdx].fullLocalView); writebackViews.emplace_back(std::make_pair( view, promotedBufferAndViews[promotedIdx].partialLocalView)); @@ -219,67 +251,54 @@ // 2. Append all other operands as they appear, this enforces that such // operands are not views. This is to support cases such as FillOp taking // extra scalars etc. - auto operands = getAssumedNonViewOperands(op); - opViews.append(operands.begin(), operands.end()); - LinalgOp res = op.clone(b, op.getLoc(), opViews); + // Keep a reference to output buffers; + DenseSet originalOutputs(op.getOutputBuffers().begin(), + op.getOutputBuffers().end()); + op.getOperation()->setOperands(0, opViews.size(), opViews); + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointAfter(op); + ScopedContext scope(b, loc); // 3. Emit write-back for the promoted output views: copy the partial view. - for (auto viewAndPartialLocalView : writebackViews) { - // WARNING: MUST use the old op to determine whether the operand view is an - // output. - bool isOutput = - op.getIndexOfOutputBuffer(viewAndPartialLocalView.first).hasValue(); - if (isOutput) + for (auto viewAndPartialLocalView : writebackViews) + if (originalOutputs.count(viewAndPartialLocalView.first)) linalg_copy(viewAndPartialLocalView.second, viewAndPartialLocalView.first); - } - // 4. Dealloc local buffers. + // 4. Dealloc all local buffers. for (const auto &pi : promotedBufferAndViews) std_dealloc(pi.buffer); - - return res; -} - -static void promoteSubViews(FuncOp f, bool dynamicBuffers) { - SmallVector toErase; - OperationFolder folder(f.getContext()); - f.walk([dynamicBuffers, &folder, &toErase](LinalgOp op) { - if (!op.hasBufferSemantics()) - return; - - // TODO(ntv) some heuristic here to decide what to promote. Atm only float - // and integer buffers can be promoted. - SetVector subViews; - OpBuilder b(op); - for (auto it : op.getInputsAndOutputBuffers()) - if (auto sv = dyn_cast_or_null(it.getDefiningOp())) - if (sv.getType().getElementType().isSignlessIntOrFloat()) - subViews.insert(sv); - if (!subViews.empty()) { - promoteSubViewOperands(b, op, subViews, dynamicBuffers, 0, &folder); - toErase.push_back(op); - } - }); - for (auto op : toErase) - op.erase(); } -LogicalResult mlir::linalg::promoteSubviewsLinalgOpPrecondition( - Operation *op, llvm::Optional> operandIndicesToPromote) { +LogicalResult +mlir::linalg::promoteSubviewsPrecondition(Operation *op, + LinalgPromotionOptions options) { LinalgOp linOp = dyn_cast(op); // Transformation applies to buffers only. if (!linOp || !linOp.hasBufferSemantics()) return failure(); + // Check that at least one of the requested operands is indeed a subview. for (auto en : llvm::enumerate(linOp.getInputsAndOutputBuffers())) { auto sv = isa_and_nonnull(en.value().getDefiningOp()); - if (sv && (!operandIndicesToPromote.hasValue() || - operandIndicesToPromote->count(en.index()))) - return success(); + if (sv) + if (!options.operandsToPromote.hasValue() || + options.operandsToPromote->count(en.index())) + return success(); } + // TODO: Check all subviews requested are bound by a static constant. + // TODO: Check that the total footprint fits within a given size. return failure(); } +LinalgOp mlir::linalg::promoteSubViews(OpBuilder &b, LinalgOp linalgOp, + LinalgPromotionOptions options, + OperationFolder *folder) { + LinalgOpInstancePromotionOptions linalgOptions(linalgOp, options); + ::promoteSubViews( + b, linalgOp, LinalgOpInstancePromotionOptions(linalgOp, options), folder); + return linalgOp; +} + namespace { struct LinalgPromotionPass : public LinalgPromotionBase { LinalgPromotionPass() = default; @@ -288,11 +307,20 @@ } void runOnFunction() override { - promoteSubViews(getFunction(), dynamicBuffers); + OperationFolder folder(&getContext()); + getFunction().walk([this, &folder](LinalgOp op) { + auto options = LinalgPromotionOptions().setDynamicBuffers(dynamicBuffers); + if (failed(promoteSubviewsPrecondition(op, options))) + return; + LLVM_DEBUG(llvm::dbgs() << "Promote: " << *(op.getOperation()) << "\n"); + OpBuilder b(op); + promoteSubViews(b, op, options, &folder); + }); } }; } // namespace +// TODO: support more transformation options in the pass. std::unique_ptr> mlir::createLinalgPromotionPass(bool dynamicBuffers) { return std::make_unique(dynamicBuffers); diff --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp --- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp @@ -30,63 +30,79 @@ void runOnFunction() override; - Option testPatterns{*this, "test-patterns", - llvm::cl::desc("Test a mixed set of patterns"), - llvm::cl::init(false)}; + Option testPatterns1{*this, "test-patterns", + llvm::cl::desc("Test a mixed set of patterns"), + llvm::cl::init(false)}; + Option testMatmulToVectorPatterns1dTiling{ + *this, "test-matmul-to-vector-patterns-tile-1d", + llvm::cl::desc( + "Test a fused pass that applies patterns from matmul to vectors via " + "1-d tiling"), + llvm::cl::init(false)}; + Option testMatmulToVectorPatterns2dTiling{ + *this, "test-matmul-to-vector-patterns-tile-2d", + llvm::cl::desc( + "Test a fused pass that applies patterns from matmul to vectors via " + "2-d tiling"), + llvm::cl::init(false)}; }; } // end anonymous namespace static void applyPatterns(FuncOp funcOp) { MLIRContext *ctx = funcOp.getContext(); - OwningRewritePatternList patterns; //===--------------------------------------------------------------------===// // Linalg tiling patterns. //===--------------------------------------------------------------------===// patterns.insert>( - ctx, LinalgTilingOptions().setTileSizes({2000, 3000, 4000}), + ctx, + /*tileSizes=*/ArrayRef{2000, 3000, 4000}, LinalgMarker({"MEM", {}}, "L3")); patterns.insert>( - ctx, LinalgTilingOptions().setTileSizes({200, 300, 400}), + ctx, + /*tileSizes=*/ArrayRef{200, 300, 400}, LinalgMarker({"L3"}, "L2")); patterns.insert>( - ctx, LinalgTilingOptions().setTileSizes({20, 30, 40}), - LinalgMarker({"L2"}, "L1")); + ctx, + /*tileSizes=*/ArrayRef{20, 30, 40}, LinalgMarker({"L2"}, "L1")); patterns.insert>( - ctx, LinalgTilingOptions().setTileSizes({2, 3, 4}), - LinalgMarker({"L1"}, "REG")); + ctx, + /*tileSizes=*/ArrayRef{2, 3, 4}, LinalgMarker({"L1"}, "REG")); patterns.insert>( ctx, - LinalgTilingOptions().setTileSizes({5, 6}).setLoopType( - LinalgTilingLoopType::ParallelLoops), - LinalgMarker({}, "L1")); + /*tileSizes=*/ArrayRef{5, 6}, LinalgMarker({}, "L1")); patterns.insert>( - ctx, LinalgTilingOptions().setTileSizes(8000), - LinalgMarker({"MEM", "L3", "L2", {}}, "REG")); + ctx, + /*tileSizes=*/ArrayRef{8000}, + LinalgMarker({"MEM", "L3", "L2", {}}, "L1")); + patterns.insert>( + ctx, + /*tileSizes=*/ArrayRef{8}, LinalgMarker({"L1"}, "REG")); //===--------------------------------------------------------------------===// // Linalg tiling and permutation patterns. //===--------------------------------------------------------------------===// patterns.insert>( ctx, - LinalgTilingOptions() - .setTileSizes({2000, 3000, 4000}) - .setInterchange({1, 2, 0}), + /*tileSizes=*/ArrayRef{2000, 3000, 4000}, + /*interchangeVector=*/ArrayRef{1, 2, 0}, LinalgMarker({"__with_perm__"}, "L2__with_perm__")); patterns.insert>( ctx, - LinalgTilingOptions() - .setTileSizes({200, 300, 400}) - .setInterchange({1, 0, 2}), + /*tileSizes=*/ArrayRef{200, 300, 400}, + /*interchangeVector=*/ArrayRef{1, 0, 2}, LinalgMarker({"L2__with_perm__"}, "L1__with_perm__")); patterns.insert>( - ctx, LinalgTilingOptions().setTileSizes({20, 30, 40}), + ctx, + /*tileSizes=*/ArrayRef{20, 30, 40}, LinalgMarker({"L1__with_perm__"}, "REG__with_perm__")); patterns.insert>( - ctx, LinalgTilingOptions().setTileSizes({5, 6}).setInterchange({1, 0}), + ctx, + /*tileSizes=*/ArrayRef{5, 6}, + /*interchangeVector=*/ArrayRef{1, 0}, LinalgMarker({"__with_perm__"}, "L1__with_perm__")); //===--------------------------------------------------------------------===// @@ -120,15 +136,13 @@ // Linalg subview operands promotion. //===--------------------------------------------------------------------===// patterns.insert>( - ctx, LinalgMarker({"_promote_views_"}, "_views_promoted_")); + ctx, LinalgPromotionOptions(), + LinalgMarker({"_promote_views_"}, "_views_promoted_")); patterns.insert>( - ctx, - /*operandsToPromote=*/ArrayRef{0}, + ctx, LinalgPromotionOptions().setOperandsToPromote({0}), LinalgMarker({"_promote_first_view_"}, "_first_view_promoted_")); patterns.insert>( - ctx, - /*operandsToPromote=*/ArrayRef{0}, - /*alignment=*/32, + ctx, LinalgPromotionOptions().setOperandsToPromote({0}).setAlignment(32), LinalgMarker({"_promote_views_aligned_"}, "_views_aligned_promoted_")); applyPatternsAndFoldGreedily(funcOp, patterns);