Diff 370976

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

Show First 20 Lines • Show All 1,030 Lines • ▼ Show 20 Lines	ConvOpVectorization(MLIRContext *context, SmallVector<bool, 4> msk)
assert(msk.size() == N && "Mask size does not match rank");		assert(msk.size() == N && "Mask size does not match rank");
this->mask = msk;		this->mask = msk;
}		}

LogicalResult matchAndRewrite(ConvOp minOp,		LogicalResult matchAndRewrite(ConvOp minOp,
PatternRewriter &rewriter) const override;		PatternRewriter &rewriter) const override;
};		};

		/// Rewrite a TiledLoopOp with bounds/step that potentially do not divide evenly
		/// into a TiledLoopOp where the step divides the iteration space evenly,
		/// followed by another TiledLoopOp for the last (partial) iteration (if any).
		/// This transformation is called "loop peeling".
		///
		/// This function peels the `idx`-th loop of the TiledLoopOp. To tile all loops
		/// in the loop nest, this function must be called multiple times.
		///
		/// After loop peeling, this function tries to simplify/canonicalize affine.min
		/// and affine.max ops in the body of the two TiledLoopOps. For more details,
		/// refer to `mlir::scf::peelAndCanonicalizeForLoop`.
		///
		/// The return value indicates whether the loop was rewritten or not. Loops are
		/// not rewritten if:
		/// * Loop step size is 1 or
		/// * Loop bounds and step size are static, and step already divides the
		/// iteration space evenly.
		///
		/// Note: This function rewrites the given TiledLoopOp in-place and clones the
		/// TileLoopOp operation for the last iteration. It replaces all uses of the
		/// unpeeled TiledLoopOp with the results of the newly generated TiledLoopOp.
		LogicalResult peelAndCanonicalizeTiledLoop(RewriterBase &rewriter,
		TiledLoopOp loopOp, int64_t idx,
		TiledLoopOp &result);

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Support for staged pattern application.		// Support for staged pattern application.
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
/// Helper function to allow applying rewrite patterns, interleaved with more		/// Helper function to allow applying rewrite patterns, interleaved with more
/// global transformations, in a staged fashion:		/// global transformations, in a staged fashion:
/// 1. the first stage consists of a list of FrozenRewritePatternSet. Each		/// 1. the first stage consists of a list of FrozenRewritePatternSet. Each
/// FrozenRewritePatternSet in this list is applied once, in order.		/// FrozenRewritePatternSet in this list is applied once, in order.
/// 2. the second stage consists of a single OwningRewritePattern that is		/// 2. the second stage consists of a single OwningRewritePattern that is
Show All 23 Lines

mlir/include/mlir/Dialect/SCF/Transforms.h

	Show First 20 Lines • Show All 105 Lines • ▼ Show 20 Lines
	/// iteration space evenly.			/// iteration space evenly.
	///			///
	/// Note: This function rewrites the given scf.for loop in-place and creates a			/// Note: This function rewrites the given scf.for loop in-place and creates a
	/// new scf.if operation for the last iteration. It replaces all uses of the			/// new scf.if operation for the last iteration. It replaces all uses of the
	/// unpeeled loop with the results of the newly generated scf.if.			/// unpeeled loop with the results of the newly generated scf.if.
	LogicalResult peelAndCanonicalizeForLoop(RewriterBase &rewriter, ForOp forOp,			LogicalResult peelAndCanonicalizeForLoop(RewriterBase &rewriter, ForOp forOp,
	scf::IfOp &ifOp);			scf::IfOp &ifOp);

				/// Try to simplify a min/max operation `op` after loop peeling. This function
				/// can simplify min/max operations such as (ub is the previous upper bound of
				/// the unpeeled loop):
				/// ```
				/// #map = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>
				/// %r = affine.min #affine.min #map(%iv)[%step, %ub]
				/// ```
				/// and rewrites them into (in the case the peeled loop):
				/// ```
				/// %r = %step
				/// ```
				/// min/max operations inside the partial iteration are rewritten in a similar
				/// way.
				LogicalResult rewritePeeledMinMaxOp(RewriterBase &rewriter, Operation *op,
				AffineMap map, ValueRange operands,
				bool isMin, Value iv, Value ub, Value step,
				bool insideLoop);

	/// Tile a parallel loop of the form			/// Tile a parallel loop of the form
	/// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)			/// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
	/// step (%arg4, %arg5)			/// step (%arg4, %arg5)
	///			///
	/// into			/// into
	/// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)			/// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
	/// step (%arg4*tileSize[0],			/// step (%arg4*tileSize[0],
	/// %arg5*tileSize[1])			/// %arg5*tileSize[1])
	▲ Show 20 Lines • Show All 66 Lines • Show Last 20 Lines

mlir/lib/Dialect/Linalg/Transforms/Loops.cpp

//===- Loops.cpp - conversion from Linalg named and generic ops to loops --===//		//===- Loops.cpp - conversion from Linalg named and generic ops to loops --===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "PassDetail.h"		#include "PassDetail.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"		#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"		#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
#include "mlir/Dialect/Linalg/Passes.h"		#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"		#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"		#include "mlir/Dialect/Linalg/Utils/Utils.h"
		#include "mlir/Dialect/SCF/Transforms.h"
#include "mlir/Dialect/StandardOps/Utils/Utils.h"		#include "mlir/Dialect/StandardOps/Utils/Utils.h"
#include "mlir/IR/AffineExpr.h"		#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"		#include "mlir/IR/AffineMap.h"
#include "mlir/IR/BlockAndValueMapping.h"		#include "mlir/IR/BlockAndValueMapping.h"
#include "mlir/Support/LLVM.h"		#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/DialectConversion.h"		#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/FoldUtils.h"		#include "mlir/Transforms/FoldUtils.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"		#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
▲ Show 20 Lines • Show All 605 Lines • ▼ Show 20 Lines	void runOnFunction() override {
MLIRContext *context = &getContext();		MLIRContext *context = &getContext();
RewritePatternSet patterns(context);		RewritePatternSet patterns(context);
populateTiledLoopToSCFPattern(patterns);		populateTiledLoopToSCFPattern(patterns);
(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));		(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
}		}
};		};
} // namespace		} // namespace

		/// Rewrite a TiledLoopOp with bounds/step that potentially do not divide evenly
		/// into two TiledLoopOps: One where the step divides the iteration space
		/// evenly, followed another one for the last (partial) iteration (if any). This
		/// function only rewrites the `idx`-th loop of the loop nest represented by
		/// the TiledLoopOp. To peel the entire loop nest, this function must be called
		/// multiple times.
		///
		/// This function rewrites the given TiledLoopOp in-place and creates a new
		/// TiledLoopOp for the last iteration. It replaces all uses of the original
		/// TiledLoopOp with the results of the newly generated one.
		///
		/// The newly generated TiledLoopOp is returned via `result`. The boundary
		/// at which the loop is split (new upper bound) is returned via `splitBound`.
		/// The return value indicates whether the TiledLoopOp was rewritten or not.
		static LogicalResult peelTiledLoop(RewriterBase &b, TiledLoopOp loopOp,
		int64_t idx, TiledLoopOp &result,
		nicolasvasilacheUnsubmitted Done Reply Inline Actions side note: we tend to try and move away from unsigned as it creates a bunch of unnecessary issues in a bunch of places. @mehdi_amini -> int64_t ? nicolasvasilache: side note: we tend to try and move away from unsigned as it creates a bunch of unnecessary…
		Value &splitBound) {
		Value lb = loopOp.lowerBound()[idx], ub = loopOp.upperBound()[idx],
		herhutUnsubmitted Done Reply Inline Actions nit: should this be moved down to where the rewriting starts? herhut: nit: should this be moved down to where the rewriting starts?
		step = loopOp.step()[idx];
		auto ubInt = getConstantIntValue(ub);

		auto loc = loopOp.getLoc();
		AffineExpr exprLb, exprUb, exprStep;
		bindSymbols(b.getContext(), exprLb, exprUb, exprStep);
		// New upper bound: %ub - (%ub - %lb) mod %step
		auto modMap = AffineMap::get(0, 3, {exprUb - ((exprUb - exprLb) % exprStep)});
		SmallVector<Value> operands{lb, ub, step};
		mlir::canonicalizeMapAndOperands(&modMap, &operands);
		modMap = mlir::simplifyAffineMap(modMap);
		nicolasvasilacheUnsubmitted Not Done Reply Inline Actions 1l ? nicolasvasilache: 1l ?
		springermAuthorUnsubmitted Done Reply Inline Actions I vagely remember some awkwardness with integer literals. `1l` is `long` or sth., but depending on the architecture that may be 64-bit or sth else. So I thought the static cast may be safer? springerm: I vagely remember some awkwardness with integer literals. `1l` is `long` or sth., but depending…
		RewriterBase::InsertionGuard guard(b);
		b.setInsertionPoint(loopOp);
		splitBound = b.createOrFold<AffineApplyOp>(loc, modMap, operands);
		// No specialization necessary if step already divides upper bound evenly.
		if (splitBound == ub \|\| (ubInt && ubInt == getConstantIntValue(splitBound)))
		return failure();

		nicolasvasilacheUnsubmitted Done Reply Inline Actions How about renaming lb, ub, step to lbVal, ubVal, stepVal and writing the AffineExpr in a more intuitive way? AffineExpr lb, ub, step; bindDims(b.getContext(), lb, ub); bindSymbols(b.getContext(), step; auto modMap = AffineMap::get(3, 0, {ub - ((ub - lb) % step)}); Note that your AffineExpr is illegal as dim divides dim; which makes me wonder how your tests pass? nicolasvasilache: How about renaming lb, ub, step to lbVal, ubVal, stepVal and writing the AffineExpr in a more…
		springermAuthorUnsubmitted Done Reply Inline Actions Btw parts of this is copied from the SCF loop peeling pattern. (But they are too different to share the same impl.) springerm: Btw parts of this is copied from the SCF loop peeling pattern. (But they are too different to…
		// Create remainder loop.
		b.setInsertionPointAfter(loopOp);
		auto remainderLoop = cast<TiledLoopOp>(b.clone(*loopOp.getOperation()));
		nicolasvasilacheUnsubmitted Not Done Reply Inline Actions Please check against the degenerate case as I would hope that the mod could be simplified to 0 in certain cases. nicolasvasilache: Please check against the degenerate case as I would hope that the mod could be simplified to 0…
		springermAuthorUnsubmitted Done Reply Inline Actions The above checks should already handle this: // No specialization necessary if step already divides upper bound evenly. if (lbInt && ubInt && stepInt && (ubInt - lbInt) % stepInt == 0) return failure(); // No specialization necessary if step size is 1. if (stepInt == static_cast<int64_t>(1)) return failure(); springerm:* The above checks should already handle this: ``` // No specialization necessary if step…
		nicolasvasilacheUnsubmitted Done Reply Inline Actions I'd expect `b.createOrFold<AffineApplyOp>` can (or will be able to) also simplify in symbolic cases. The check you have currently only works for the static case. I'd think you could drop the 2 early-exit cases above and just check createOrFold against the 0 constant. It would potentially introduce an extra `constant 0 : index` but simplify the code. I think I would take that tradeoff. nicolasvasilache: I'd expect `b.createOrFold<AffineApplyOp>` can (or will be able to) also simplify in symbolic…
		loopOp.replaceAllUsesWith(remainderLoop->getResults());
		// Outputs: Take tensors from main loop's results. Take memrefs from main
		tpoppUnsubmitted Done Reply Inline Actions This needs to be updated some for memrefs I believe. When the loop op is bufferized, the loop op will have "outputs" but no "results" such as linalg.tiled_loop (%i) = (%c0) to (%c24) step (%c4) ins(%lhs, %rhs : memref<24x64xi8>, memref<24x64xi8>) outs(%out : memref<24x64xi8>) iterators("parallel") distribution("block_x") { tpopp: This needs to be updated some for memrefs I believe. When the loop op is bufferized, the loop…
		springermAuthorUnsubmitted Done Reply Inline Actions Nice catch springerm: Nice catch
		// loop's outputs.
		SmallVector<Value> remainderOutputs;
		for (unsigned o = 0, t = 0; o < loopOp.getNumOutputs(); ++o) {
		remainderOutputs.push_back(loopOp.outputs()[o].getType().isa<MemRefType>()
		? loopOp.outputs()[o]
		: loopOp->getResult(t++));
		}
		remainderLoop.outputsMutable().assign(remainderOutputs);

		// Set new loop bounds.
		b.updateRootInPlace(loopOp, [&]() {
		SmallVector<Value> ubs = loopOp.upperBound();
		ubs[idx] = splitBound;
		loopOp.upperBoundMutable().assign(ubs);
		});
		SmallVector<Value> lbs = remainderLoop.lowerBound();
		lbs[idx] = splitBound;
		remainderLoop.lowerBoundMutable().assign(lbs);

		result = remainderLoop;
		return success();
		}

		template <typename OpTy, bool IsMin>
		static void
		rewriteAffineOpAfterPeeling(RewriterBase &rewriter, TiledLoopOp mainLoop,
		TiledLoopOp remainderLoop, Value mainIv,
		Value remainderIv, Value ub, Value step) {
		mainLoop.walk([&](OpTy affineOp) {
		AffineMap map = affineOp.getAffineMap();
		(void)scf::rewritePeeledMinMaxOp(rewriter, affineOp, map,
		affineOp.operands(), IsMin, mainIv, ub,
		step, /insideLoop=/true);
		});
		remainderLoop.walk([&](OpTy affineOp) {
		AffineMap map = affineOp.getAffineMap();
		(void)scf::rewritePeeledMinMaxOp(rewriter, affineOp, map,
		affineOp.operands(), IsMin, remainderIv,
		ub, step, /insideLoop=/false);
		});
		}

		LogicalResult mlir::linalg::peelAndCanonicalizeTiledLoop(RewriterBase &rewriter,
		TiledLoopOp loopOp,
		int64_t idx,
		TiledLoopOp &result) {
		int64_t numLoops = loopOp.iterator_types().size();
		if (idx < 0 \|\| numLoops <= idx)
		return failure();
		// Only parallel iterator supported.
		if (!isParallelIterator(loopOp.iterator_types()[idx]))
		return failure();

		Value ub = loopOp.upperBound()[idx];
		TiledLoopOp remainderLoop;
		Value splitBound;
		if (failed(peelTiledLoop(rewriter, loopOp, idx, remainderLoop, splitBound)))
		return failure();

		// Rewrite affine.min and affine.max ops.
		Value mainIv = loopOp.getInductionVars()[idx], step = loopOp.step()[idx],
		remainderIv = remainderLoop.getInductionVars()[idx];

		rewriteAffineOpAfterPeeling<AffineMinOp, /IsMin=/true>(
		rewriter, loopOp, remainderLoop, mainIv, remainderIv, ub, step);
		rewriteAffineOpAfterPeeling<AffineMaxOp, /IsMin=/false>(
		rewriter, loopOp, remainderLoop, mainIv, remainderIv, ub, step);

		result = remainderLoop;
		return success();
		}

void mlir::linalg::populateTiledLoopToSCFPattern(RewritePatternSet &patterns) {		void mlir::linalg::populateTiledLoopToSCFPattern(RewritePatternSet &patterns) {
patterns.add<TiledLoopToSCFPattern>(patterns.getContext());		patterns.add<TiledLoopToSCFPattern>(patterns.getContext());
}		}

std::unique_ptr<OperationPass<FuncOp>>		std::unique_ptr<OperationPass<FuncOp>>
mlir::createConvertLinalgTiledLoopsToSCFPass() {		mlir::createConvertLinalgTiledLoopsToSCFPass() {
return std::make_unique<LowerTiledLoopsToSCF>();		return std::make_unique<LowerTiledLoopsToSCF>();
}		}
Show All 34 Lines

mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp

Show First 20 Lines • Show All 318 Lines • ▼ Show 20 Lines
/// ```		/// ```
/// #map = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>		/// #map = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>
/// %r = affine.min #affine.min #map(%iv)[%step, %ub]		/// %r = affine.min #affine.min #map(%iv)[%step, %ub]
/// ```		/// ```
/// and rewrites them into (in the case the peeled loop):		/// and rewrites them into (in the case the peeled loop):
/// ```		/// ```
/// %r = %step		/// %r = %step
/// ```		/// ```
/// min/max operations inside the generated scf.if operation are rewritten in		/// min/max operations inside the partial iteration are rewritten in a similar
/// a similar way.		/// way.
///		///
/// This function builds up a set of constraints, capable of proving that:		/// This function builds up a set of constraints, capable of proving that:
/// * Inside the peeled loop: min(step, ub - iv) == step		/// * Inside the peeled loop: min(step, ub - iv) == step
/// * Inside the scf.if operation: min(step, ub - iv) == ub - iv		/// * Inside the partial iteration: min(step, ub - iv) == ub - iv
///		///
/// Returns `success` if the given operation was replaced by a new operation;		/// Returns `success` if the given operation was replaced by a new operation;
/// `failure` otherwise.		/// `failure` otherwise.
///		///
/// Note: `ub` is the previous upper bound of the loop (before peeling).		/// Note: `ub` is the previous upper bound of the loop (before peeling).
/// `insideLoop` must be true for min/max ops inside the loop and false for		/// `insideLoop` must be true for min/max ops inside the loop and false for
/// affine.min ops inside the scf.for op. For an explanation of the other		/// affine.min ops inside the partial iteration. For an explanation of the other
		herhutUnsubmitted Done Reply Inline Actions nit: this comment seems of. `scf.for` is also a loop. herhut: nit: this comment seems of. `scf.for` is also a loop.
/// parameters, see comment of `canonicalizeMinMaxOpInLoop`.		/// parameters, see comment of `canonicalizeMinMaxOpInLoop`.
static LogicalResult rewritePeeledMinMaxOp(RewriterBase &rewriter,		LogicalResult mlir::scf::rewritePeeledMinMaxOp(RewriterBase &rewriter,
Operation *op, AffineMap map,		Operation *op, AffineMap map,
ValueRange operands, bool isMin,		ValueRange operands, bool isMin,
Value iv, Value ub, Value step,		Value iv, Value ub, Value step,
bool insideLoop) {		bool insideLoop) {
FlatAffineValueConstraints constraints;		FlatAffineValueConstraints constraints;
constraints.appendDimId({iv, ub, step});		constraints.appendDimId({iv, ub, step});
if (auto constUb = getConstantIntValue(ub))		if (auto constUb = getConstantIntValue(ub))
constraints.addBound(FlatAffineConstraints::EQ, 1, *constUb);		constraints.addBound(FlatAffineConstraints::EQ, 1, *constUb);
if (auto constStep = getConstantIntValue(step))		if (auto constStep = getConstantIntValue(step))
constraints.addBound(FlatAffineConstraints::EQ, 2, *constStep);		constraints.addBound(FlatAffineConstraints::EQ, 2, *constStep);

// Add loop peeling invariant. This is the main piece of knowledge that		// Add loop peeling invariant. This is the main piece of knowledge that
Show All 15 Lines	LogicalResult mlir::scf::rewritePeeledMinMaxOp(RewriterBase &rewriter,
return canonicalizeMinMaxOp(rewriter, op, map, operands, isMin, constraints);		return canonicalizeMinMaxOp(rewriter, op, map, operands, isMin, constraints);
}		}

template <typename OpTy, bool IsMin>		template <typename OpTy, bool IsMin>
static void		static void
rewriteAffineOpAfterPeeling(RewriterBase &rewriter, ForOp forOp, scf::IfOp ifOp,		rewriteAffineOpAfterPeeling(RewriterBase &rewriter, ForOp forOp, scf::IfOp ifOp,
Value iv, Value splitBound, Value ub, Value step) {		Value iv, Value splitBound, Value ub, Value step) {
forOp.walk([&](OpTy affineOp) {		forOp.walk([&](OpTy affineOp) {
(void)rewritePeeledMinMaxOp(rewriter, affineOp, affineOp.getAffineMap(),		AffineMap map = affineOp.getAffineMap();
		(void)scf::rewritePeeledMinMaxOp(rewriter, affineOp, map,
affineOp.operands(), IsMin, iv, ub, step,		affineOp.operands(), IsMin, iv, ub, step,
/insideLoop=/true);		/insideLoop=/true);
});		});
ifOp.walk([&](OpTy affineOp) {		ifOp.walk([&](OpTy affineOp) {
(void)rewritePeeledMinMaxOp(rewriter, affineOp, affineOp.getAffineMap(),		AffineMap map = affineOp.getAffineMap();
		(void)scf::rewritePeeledMinMaxOp(rewriter, affineOp, map,
affineOp.operands(), IsMin, splitBound, ub,		affineOp.operands(), IsMin, splitBound, ub,
step, /insideLoop=/false);		step, /insideLoop=/false);
});		});
}		}

LogicalResult mlir::scf::peelAndCanonicalizeForLoop(RewriterBase &rewriter,		LogicalResult mlir::scf::peelAndCanonicalizeForLoop(RewriterBase &rewriter,
ForOp forOp,		ForOp forOp,
scf::IfOp &ifOp) {		scf::IfOp &ifOp) {
Value ub = forOp.upperBound();		Value ub = forOp.upperBound();
Value splitBound;		Value splitBound;
▲ Show 20 Lines • Show All 172 Lines • Show Last 20 Lines

mlir/test/Dialect/Linalg/tiled-loop-peeling.mlir

This file was added.

				// RUN: mlir-opt %s -allow-unregistered-dialect -test-linalg-transform-patterns=test-tiled-loop-peeling=2 -split-input-file \| FileCheck %s -check-prefix=CHECK-TILE-2
				// RUN: mlir-opt %s -allow-unregistered-dialect -test-linalg-transform-patterns=test-tiled-loop-peeling=0,1,2 -split-input-file \| FileCheck %s -check-prefix=CHECK-TILE-012

				// CHECK-TILE-2-LABEL: func @tiled_loop_3d_tensor(
				// CHECK-TILE-2-SAME: %[[input:.]]: tensor<?x?x?xf32>, %[[s0:.]]: index, %[[s1:.]]: index, %[[s2:.]]: index
				// CHECK-TILE-2-DAG: %[[c0:.*]] = constant 0 : index
				// CHECK-TILE-2-DAG: %[[c1:.*]] = constant 1 : index
				// CHECK-TILE-2-DAG: %[[c2:.*]] = constant 2 : index
				// CHECK-TILE-2: %[[dim0:.*]] = tensor.dim %[[input]], %[[c0]]
				// CHECK-TILE-2: %[[dim1:.*]] = tensor.dim %[[input]], %[[c1]]
				// CHECK-TILE-2: %[[dim2:.*]] = tensor.dim %[[input]], %[[c2]]
				// CHECK-TILE-2: %[[init_tensor:.*]] = linalg.init_tensor
				// CHECK-TILE-2: %[[split_bound:.*]] = affine.apply
				// CHECK-TILE-2: %[[r1:.]] = linalg.tiled_loop (%[[iv0:.]], %[[iv1:.]], %[[iv2:.]]) = (%[[c0]], %[[c0]], %[[c0]])
				// CHECK-TILE-2-SAME: to (%[[dim0]], %[[dim1]], %[[split_bound]])
				// CHECK-TILE-2-SAME: step (%[[s0]], %[[s1]], %[[s2]])
				// CHECK-TILE-2-SAME: ins (%[[loop_in1:.*]] = %[[input]]: tensor<?x?x?xf32>)
				// CHECK-TILE-2-SAME: outs (%[[loop_out1:.*]] = %[[init_tensor]]: tensor<?x?x?xf32>) {
				// CHECK-TILE-2: %[[min0_1:.*]] = affine.min
				// CHECK-TILE-2: %[[min1_1:.*]] = affine.min
				// CHECK-TILE-2: %[[in_slice1:.*]] = tensor.extract_slice %[[loop_in1]][%[[iv0]], %[[iv1]], %[[iv2]]] [%[[min0_1]], %[[min1_1]], %[[s2]]]
				// CHECK-TILE-2: %[[out_slice1:.*]] = tensor.extract_slice %[[loop_out1]][%[[iv0]], %[[iv1]], %[[iv2]]] [%[[min0_1]], %[[min1_1]], %[[s2]]]
				// CHECK-TILE-2: %[[mod_slice1:.]] = tensor.insert_slice %{{.}} into %[[loop_out1]][%[[iv0]], %[[iv1]], %[[iv2]]] [%[[min0_1]], %[[min1_1]], %[[s2]]]
				// CHECK-TILE-2: linalg.yield %[[mod_slice1]]
				// CHECK-TILE-2: %[[r2:.]] = linalg.tiled_loop (%[[iv0:.]], %[[iv1:.]], %[[iv2:.]]) = (%[[c0]], %[[c0]], %[[split_bound]])
				// CHECK-TILE-2-SAME: to (%[[dim0]], %[[dim1]], %[[dim2]])
				// CHECK-TILE-2-SAME: step (%[[s0]], %[[s1]], %[[s2]])
				// CHECK-TILE-2-SAME: ins (%[[loop_in2:.*]] = %[[input]]: tensor<?x?x?xf32>)
				// CHECK-TILE-2-SAME: outs (%[[loop_out2:.*]] = %[[r1]]: tensor<?x?x?xf32>) {
				// CHECK-TILE-2: %[[min0_2:.*]] = affine.min
				// CHECK-TILE-2: %[[min1_2:.*]] = affine.min
				// CHECK-TILE-2: %[[apply2:.*]] = affine.apply
				// CHECK-TILE-2: %[[in_slice2:.*]] = tensor.extract_slice %[[loop_in1]][%[[iv0]], %[[iv1]], %[[iv2]]] [%[[min0_2]], %[[min1_2]], %[[apply2]]]
				// CHECK-TILE-2: %[[out_slice2:.*]] = tensor.extract_slice %[[loop_out1]][%[[iv0]], %[[iv1]], %[[iv2]]] [%[[min0_2]], %[[min1_2]], %[[apply2]]]
				// CHECK-TILE-2: %[[mod_slice2:.]] = tensor.insert_slice %{{.}} into %[[loop_out1]][%[[iv0]], %[[iv1]], %[[iv2]]] [%[[min0_2]], %[[min1_2]], %[[apply2]]]
				// CHECK-TILE-2: linalg.yield %[[mod_slice2]]
				// CHECK-TILE-2: return %[[r2]]

				// CHECK-TILE-012-LABEL: func @tiled_loop_3d_tensor
				// CHECK-TILE-012: linalg.tiled_loop {{.*}} {
				herhutUnsubmitted Done Reply Inline Actions I don't understand where the nesting of loops comes from. herhut: I don't understand where the nesting of loops comes from.
				springermAuthorUnsubmitted Done Reply Inline Actions Good catch. This test case was wrong. And the scary thing is that it passed because it matched ops from the following test case. (Now fixed by using `RUN: ...-LABEL:`. springerm: Good catch. This test case was wrong. And the scary thing is that it passed because it matched…
				// CHECK-TILE-012: linalg.yield
				// CHECK-TILE-012: }
				// CHECK-TILE-012: linalg.tiled_loop {{.*}} {
				// CHECK-TILE-012: linalg.yield
				// CHECK-TILE-012: }
				// CHECK-TILE-012: linalg.tiled_loop {{.*}} {
				// CHECK-TILE-012: linalg.yield
				// CHECK-TILE-012: }
				// CHECK-TILE-012: linalg.tiled_loop {{.*}} {
				// CHECK-TILE-012: linalg.yield
				// CHECK-TILE-012: }
				// CHECK-TILE-012: linalg.tiled_loop {{.*}} {
				// CHECK-TILE-012: linalg.yield
				// CHECK-TILE-012: }
				// CHECK-TILE-012: linalg.tiled_loop {{.*}} {
				// CHECK-TILE-012: linalg.yield
				// CHECK-TILE-012: }
				// CHECK-TILE-012: linalg.tiled_loop {{.*}} {
				// CHECK-TILE-012: linalg.yield
				// CHECK-TILE-012: }
				// CHECK-TILE-012: linalg.tiled_loop {{.*}} {
				// CHECK-TILE-012: linalg.yield
				// CHECK-TILE-012: }
				// CHECK-TILE-012-NOT: linalg.tiled_loop

				func @tiled_loop_3d_tensor(%arg0: tensor<?x?x?xf32>, %s0: index, %s1: index,
				%s2: index) -> tensor<?x?x?xf32> {
				%cst = constant 0.000000e+00 : f32
				%c0 = constant 0 : index
				%c1 = constant 1 : index
				%c2 = constant 2 : index
				%c8 = constant 8 : index
				%dim0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
				%dim1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
				%dim2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
				%output = linalg.init_tensor [%dim0, %dim1, %dim2] : tensor<?x?x?xf32>
				%result = linalg.tiled_loop
				(%arg1, %arg2, %arg3) = (%c0, %c0, %c0) to (%dim0, %dim1, %dim2)
				step (%s0, %s1, %s2) ins (%arg4 = %arg0: tensor<?x?x?xf32>)
				outs (%arg5 = %output: tensor<?x?x?xf32>) {
				%min0 = affine.min affine_map<(d0, d1)[s0] -> (d1, -d0 + s0)>(%arg1, %s0)[%dim0]
				%min1 = affine.min affine_map<(d0, d1)[s0] -> (d1, -d0 + s0)>(%arg2, %s1)[%dim1]
				%min2 = affine.min affine_map<(d0, d1)[s0] -> (d1, -d0 + s0)>(%arg3, %s2)[%dim2]
				%in_slice = tensor.extract_slice %arg4[%arg1, %arg2, %arg3] [%min0, %min1, %min2] [1, 1, 1]: tensor<?x?x?xf32> to tensor<?x?x?xf32>
				%out_slice = tensor.extract_slice %arg5[%arg1, %arg2, %arg3] [%min0, %min1, %min2] [1, 1, 1] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
				%comp = "computation"(%in_slice, %out_slice) : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
				%updated_slice = tensor.insert_slice %comp into %arg5[%arg1, %arg2, %arg3] [%min0, %min1, %min2] [1, 1, 1] : tensor<?x?x?xf32> into tensor<?x?x?xf32>
				linalg.yield %updated_slice : tensor<?x?x?xf32>
				}
				return %result : tensor<?x?x?xf32>
				}

				// -----

				// CHECK-TILE-2-LABEL: func @tiled_loop_3d_memref(
				// CHECK-TILE-2-SAME: %[[input:.]]: memref<?x?x?xf32>, %[[output:.]]: memref<?x?x?xf32>, %[[s0:.]]: index, %[[s1:.]]: index, %[[s2:.*]]: index
				// CHECK-TILE-2-DAG: %[[c0:.*]] = constant 0 : index
				// CHECK-TILE-2-DAG: %[[c1:.*]] = constant 1 : index
				// CHECK-TILE-2-DAG: %[[c2:.*]] = constant 2 : index
				// CHECK-TILE-2: %[[dim0:.*]] = memref.dim %[[input]], %[[c0]]
				// CHECK-TILE-2: %[[dim1:.*]] = memref.dim %[[input]], %[[c1]]
				// CHECK-TILE-2: %[[dim2:.*]] = memref.dim %[[input]], %[[c2]]
				// CHECK-TILE-2: %[[split_bound:.*]] = affine.apply
				// CHECK-TILE-2: linalg.tiled_loop (%[[iv0:.]], %[[iv1:.]], %[[iv2:.*]]) = (%[[c0]], %[[c0]], %[[c0]])
				// CHECK-TILE-2-SAME: to (%[[dim0]], %[[dim1]], %[[split_bound]])
				// CHECK-TILE-2-SAME: step (%[[s0]], %[[s1]], %[[s2]])
				// CHECK-TILE-2-SAME: ins (%[[loop_in1:.*]] = %[[input]]: memref<?x?x?xf32>)
				// CHECK-TILE-2-SAME: outs (%[[loop_out1:.*]] = %[[output]]: memref<?x?x?xf32>) {
				// CHECK-TILE-2: %[[min0_1:.*]] = affine.min
				// CHECK-TILE-2: %[[min1_1:.*]] = affine.min
				// CHECK-TILE-2: memref.subview %[[loop_in1]][%[[iv0]], %[[iv1]], %[[iv2]]] [%[[min0_1]], %[[min1_1]], %[[s2]]]
				// CHECK-TILE-2: linalg.yield
				// CHECK-TILE-2: linalg.tiled_loop (%[[iv0:.]], %[[iv1:.]], %[[iv2:.*]]) = (%[[c0]], %[[c0]], %[[split_bound]])
				// CHECK-TILE-2-SAME: to (%[[dim0]], %[[dim1]], %[[dim2]])
				// CHECK-TILE-2-SAME: step (%[[s0]], %[[s1]], %[[s2]])
				// CHECK-TILE-2-SAME: ins (%[[loop_in2:.*]] = %[[input]]: memref<?x?x?xf32>)
				// CHECK-TILE-2-SAME: outs (%[[loop_out2:.*]] = %[[output]]: memref<?x?x?xf32>) {
				// CHECK-TILE-2: %[[min0_2:.*]] = affine.min
				// CHECK-TILE-2: %[[min1_2:.*]] = affine.min
				// CHECK-TILE-2: %[[apply2:.*]] = affine.apply
				// CHECK-TILE-2: memref.subview %[[loop_in1]][%[[iv0]], %[[iv1]], %[[iv2]]] [%[[min0_2]], %[[min1_2]], %[[apply2]]]
				// CHECK-TILE-2: linalg.yield
				// CHECK-TILE-2: return

				// CHECK-TILE-012-LABEL: func @tiled_loop_3d_memref

				!memref_subview_type = type memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)>>

				func @tiled_loop_3d_memref(%arg0: memref<?x?x?xf32>, %output: memref<?x?x?xf32>,
				%s0: index, %s1: index, %s2: index) {
				%cst = constant 0.000000e+00 : f32
				%c0 = constant 0 : index
				%c1 = constant 1 : index
				%c2 = constant 2 : index
				%c8 = constant 8 : index
				%dim0 = memref.dim %arg0, %c0 : memref<?x?x?xf32>
				%dim1 = memref.dim %arg0, %c1 : memref<?x?x?xf32>
				%dim2 = memref.dim %arg0, %c2 : memref<?x?x?xf32>
				linalg.tiled_loop
				(%arg1, %arg2, %arg3) = (%c0, %c0, %c0) to (%dim0, %dim1, %dim2)
				step (%s0, %s1, %s2) ins (%arg4 = %arg0: memref<?x?x?xf32>)
				outs (%arg5 = %output : memref<?x?x?xf32>) {
				%min0 = affine.min affine_map<(d0, d1)[s0] -> (d1, -d0 + s0)>(%arg1, %s0)[%dim0]
				%min1 = affine.min affine_map<(d0, d1)[s0] -> (d1, -d0 + s0)>(%arg2, %s1)[%dim1]
				%min2 = affine.min affine_map<(d0, d1)[s0] -> (d1, -d0 + s0)>(%arg3, %s2)[%dim2]
				%in_slice = memref.subview %arg4[%arg1, %arg2, %arg3] [%min0, %min1, %min2] [1, 1, 1]: memref<?x?x?xf32> to !memref_subview_type
				"computation"(%in_slice) : (!memref_subview_type) -> memref<?x?x?xf32>
				tpoppUnsubmitted Done Reply Inline Actions This example gets unlucky with the current TiledLoop semantics. If there were an "outs" specified, I believe this would fail as the final peel after peeling would have "outs" be empty. (see my other comment) tpopp: This example gets unlucky with the current TiledLoop semantics. If there were an "outs"…
				linalg.yield
				}
				return
				}

				// -----

				// CHECK-TILE-2-LABEL: func @step_1_do_not_peel
				// CHECK-TILE-2: linalg.tiled_loop
				// CHECK-TILE-2-NOT: linalg.tiled_loop

				// CHECK-TILE-012-LABEL: func @step_1_do_not_peel

				func @step_1_do_not_peel(%arg0: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
				%cst = constant 0.000000e+00 : f32
				%c0 = constant 0 : index
				%c1 = constant 1 : index
				%c2 = constant 2 : index
				%c8 = constant 8 : index
				%dim0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
				%dim1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
				%dim2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
				%output = linalg.init_tensor [%dim0, %dim1, %dim2] : tensor<?x?x?xf32>
				%result = linalg.tiled_loop
				(%arg1, %arg2, %arg3) = (%c0, %c0, %c0) to (%dim0, %dim1, %dim2)
				step (%c1, %c1, %c1) ins (%arg4 = %arg0: tensor<?x?x?xf32>)
				outs (%arg5 = %output: tensor<?x?x?xf32>) {
				%in_slice = tensor.extract_slice %arg4[%arg1, %arg2, %arg3] [%c1, %c1, %c1] [1, 1, 1]: tensor<?x?x?xf32> to tensor<?x?x?xf32>
				%out_slice = tensor.extract_slice %arg5[%arg1, %arg2, %arg3] [%c1, %c1, %c1] [1, 1, 1] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
				%comp = "computation"(%in_slice, %out_slice) : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
				%updated_slice = tensor.insert_slice %comp into %arg5[%arg1, %arg2, %arg3] [%c1, %c1, %c1] [1, 1, 1] : tensor<?x?x?xf32> into tensor<?x?x?xf32>
				linalg.yield %updated_slice : tensor<?x?x?xf32>
				}
				return %result : tensor<?x?x?xf32>
				}

				// -----

				// CHECK-TILE-2-LABEL: func @divides_evenly_do_not_peel
				// CHECK-TILE-2: linalg.tiled_loop
				// CHECK-TILE-2-NOT: linalg.tiled_loop

				// CHECK-TILE-012-LABEL: func @divides_evenly_do_not_peel

				func @divides_evenly_do_not_peel(%arg0: tensor<?x?x?xf32>, %s: index)
				-> tensor<?x?x?xf32> {
				%cst = constant 0.000000e+00 : f32
				%c0 = constant 0 : index
				%c1 = constant 1 : index
				%c2 = constant 2 : index
				%c8 = constant 8 : index
				%c64 = constant 64 : index
				%dim0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
				%dim1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
				%dim2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
				%output = linalg.init_tensor [%dim0, %dim1, %dim2] : tensor<?x?x?xf32>
				%result = linalg.tiled_loop
				(%arg1, %arg2, %arg3) = (%c0, %c0, %c0) to (%dim0, %dim1, %c64)
				step (%s, %s, %c8) ins (%arg4 = %arg0: tensor<?x?x?xf32>)
				outs (%arg5 = %output: tensor<?x?x?xf32>) {
				%in_slice = tensor.extract_slice %arg4[%arg1, %arg2, %arg3] [%c1, %c1, %c1] [1, 1, 1]: tensor<?x?x?xf32> to tensor<?x?x?xf32>
				%out_slice = tensor.extract_slice %arg5[%arg1, %arg2, %arg3] [%c1, %c1, %c1] [1, 1, 1] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
				%comp = "computation"(%in_slice, %out_slice) : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
				%updated_slice = tensor.insert_slice %comp into %arg5[%arg1, %arg2, %arg3] [%c1, %c1, %c1] [1, 1, 1] : tensor<?x?x?xf32> into tensor<?x?x?xf32>
				linalg.yield %updated_slice : tensor<?x?x?xf32>
				}
				return %result : tensor<?x?x?xf32>
				}

mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp

Show All 16 Lines
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"		#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"		#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"		#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/Vector/VectorOps.h"		#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/Pass/Pass.h"		#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"		#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

#include "llvm/ADT/SetVector.h"		#include "llvm/ADT/SetVector.h"
		#include "llvm/ADT/SmallVector.h"

using namespace mlir;		using namespace mlir;
using namespace mlir::linalg;		using namespace mlir::linalg;

namespace {		namespace {
struct TestLinalgTransforms		struct TestLinalgTransforms
: public PassWrapper<TestLinalgTransforms, FunctionPass> {		: public PassWrapper<TestLinalgTransforms, FunctionPass> {
TestLinalgTransforms() = default;		TestLinalgTransforms() = default;
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines	Option<bool> testSwapSubTensorPadTensor{
llvm::cl::init(false)};		llvm::cl::init(false)};
ListOption<int64_t> tileSizesForPadding{		ListOption<int64_t> tileSizesForPadding{
*this, "tile-sizes-for-padding",		*this, "tile-sizes-for-padding",
llvm::cl::desc("Linalg tile sizes when tile+pad"), llvm::cl::ZeroOrMore,		llvm::cl::desc("Linalg tile sizes when tile+pad"), llvm::cl::ZeroOrMore,
llvm::cl::MiscFlags::CommaSeparated};		llvm::cl::MiscFlags::CommaSeparated};
ListOption<unsigned> testInterchangePattern{		ListOption<unsigned> testInterchangePattern{
*this, "test-interchange-pattern", llvm::cl::MiscFlags::CommaSeparated,		*this, "test-interchange-pattern", llvm::cl::MiscFlags::CommaSeparated,
llvm::cl::desc("Test the interchange pattern.")};		llvm::cl::desc("Test the interchange pattern.")};
		ListOption<unsigned> testTiledLoopPeeling{
		*this, "test-tiled-loop-peeling",
		llvm::cl::desc("Test peeling of linalg.tiled_loop ops"),
		llvm::cl::OneOrMore, llvm::cl::MiscFlags::CommaSeparated};
};		};
} // end anonymous namespace		} // end anonymous namespace

static void applyPatterns(FuncOp funcOp) {		static void applyPatterns(FuncOp funcOp) {
MLIRContext *ctx = funcOp.getContext();		MLIRContext *ctx = funcOp.getContext();
RewritePatternSet patterns(ctx);		RewritePatternSet patterns(ctx);

//===--------------------------------------------------------------------===//		//===--------------------------------------------------------------------===//
▲ Show 20 Lines • Show All 450 Lines • ▼ Show 20 Lines	static void applyInterchangePattern(FuncOp funcOp,
RewritePatternSet interchangePattern(context);		RewritePatternSet interchangePattern(context);
interchangePattern.add<GenericOpInterchangePattern>(		interchangePattern.add<GenericOpInterchangePattern>(
context, interchangeVector,		context, interchangeVector,
LinalgTransformationFilter(ArrayRef<Identifier>{},		LinalgTransformationFilter(ArrayRef<Identifier>{},
Identifier::get("interchange", context)));		Identifier::get("interchange", context)));
(void)applyPatternsAndFoldGreedily(funcOp, std::move(interchangePattern));		(void)applyPatternsAndFoldGreedily(funcOp, std::move(interchangePattern));
}		}

		static constexpr char kPeeledLoopsLabel[] = "__peeled_loops__";

		namespace {
		/// Peel TiledLoopOps, i.e., split them into two loops: One loop where the
		/// `idx`-th loop contains only "full" iterations and a second loop for the
		/// remaining partial iteration (if any).
		struct TiledLoopPeelingPattern : public OpRewritePattern<TiledLoopOp> {
		TiledLoopPeelingPattern(MLIRContext *ctx, int64_t idx)
		: OpRewritePattern<TiledLoopOp>(ctx), idx(idx) {}

		LogicalResult matchAndRewrite(TiledLoopOp loopOp,
		PatternRewriter &rewriter) const override {
		SmallVector<int64_t> peeledLoops;
		if (loopOp->hasAttr(kPeeledLoopsLabel)) {
		auto attr = loopOp->getAttr(kPeeledLoopsLabel).cast<ArrayAttr>();
		peeledLoops =
		llvm::to_vector<4>(llvm::map_range(attr, [](Attribute attr) {
		return attr.cast<IntegerAttr>().getInt();
		}));
		// Check if the loop was already peeled.
		if (llvm::find(peeledLoops, idx) != peeledLoops.end())
		return failure();
		}

		if (static_cast<int64_t>(loopOp.iterator_types().size()) <= idx)
		return failure();

		// Peel loop and canonicalize.
		TiledLoopOp result;
		if (failed(linalg::peelAndCanonicalizeTiledLoop(rewriter, loopOp, idx,
		result)))
		return failure();
		peeledLoops.push_back(idx);

		// Apply label, so that the same loop is not rewritten a second time.
		herhutUnsubmitted Done Reply Inline Actions Could we also have a mode where the peeled off loop is not rewritten at all? Currently, the peeled off loop gets further peeled along other dimensions. The idea being that, if it is a boundary anyway, we might avoid specializing further in the interest of code size. herhut: Could we also have a mode where the peeled off loop is not rewritten at all? Currently, the…
		springermAuthorUnsubmitted Done Reply Inline Actions I'm adding this in a separate commit if that's OK. springerm: I'm adding this in a separate commit if that's OK.
		rewriter.updateRootInPlace(loopOp, [&]() {
		loopOp->setAttr(kPeeledLoopsLabel, rewriter.getI64ArrayAttr(peeledLoops));
		});
		result->setAttr(kPeeledLoopsLabel, rewriter.getI64ArrayAttr(peeledLoops));
		return success();
		}

		/// Index of loop to peel.
		int64_t idx;
		};
		} // namespace

		static void applyTiledLoopPeelingPattern(FuncOp funcOp,
		ArrayRef<unsigned> loops) {
		MLIRContext *ctx = funcOp.getContext();
		RewritePatternSet patterns(ctx);
		for (unsigned idx : loops)
		patterns.add<TiledLoopPeelingPattern>(ctx, idx);
		(void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));

		// Drop the marker.
		funcOp.walk([](TiledLoopOp op) { op->removeAttr(kPeeledLoopsLabel); });
		}

/// Apply transformations specified as patterns.		/// Apply transformations specified as patterns.
void TestLinalgTransforms::runOnFunction() {		void TestLinalgTransforms::runOnFunction() {
auto lambda = [&](void *) {		auto lambda = [&](void *) {
getFunction().walk([](LinalgOp op) {		getFunction().walk([](LinalgOp op) {
op->removeAttr(LinalgTransforms::kLinalgTransformMarker);		op->removeAttr(LinalgTransforms::kLinalgTransformMarker);
});		});
};		};
std::unique_ptr<void, decltype(lambda)> cleanupGuard{(void *)1, lambda};		std::unique_ptr<void, decltype(lambda)> cleanupGuard{(void *)1, lambda};
Show All 21 Lines	void TestLinalgTransforms::runOnFunction() {
if (testGenericToVectorPattern)		if (testGenericToVectorPattern)
return applyLinalgToVectorPatterns(getFunction());		return applyLinalgToVectorPatterns(getFunction());
if (testTransformPadTensor)		if (testTransformPadTensor)
return applyPadTensorToGenericPatterns(getFunction());		return applyPadTensorToGenericPatterns(getFunction());
if (testGeneralizePadTensor)		if (testGeneralizePadTensor)
return applyGeneralizePadTensorPatterns(getFunction());		return applyGeneralizePadTensorPatterns(getFunction());
if (testSwapSubTensorPadTensor)		if (testSwapSubTensorPadTensor)
return applyExtractSliceOfPadTensorSwapPattern(getFunction());		return applyExtractSliceOfPadTensorSwapPattern(getFunction());
		if (testTiledLoopPeeling.hasValue())
		return applyTiledLoopPeelingPattern(getFunction(), testTiledLoopPeeling);
if (testTileAndPadPattern)		if (testTileAndPadPattern)
return applyTileAndPadPattern(getFunction(), tileSizesForPadding);		return applyTileAndPadPattern(getFunction(), tileSizesForPadding);
if (testHoistPadding) {		if (testHoistPadding) {
getFunction().walk([&](linalg::PadTensorOp padTensorOp) {		getFunction().walk([&](linalg::PadTensorOp padTensorOp) {
(void)linalg::hoistPaddingOnTensors(padTensorOp, testHoistPadding);		(void)linalg::hoistPaddingOnTensors(padTensorOp, testHoistPadding);
});		});
}		}
if (testInterchangePattern.hasValue())		if (testInterchangePattern.hasValue())
Show All 10 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][linalg] linalg.tiled_loop peeling
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 370976

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

mlir/include/mlir/Dialect/SCF/Transforms.h

mlir/lib/Dialect/Linalg/Transforms/Loops.cpp

mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp

mlir/test/Dialect/Linalg/tiled-loop-peeling.mlir

mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][linalg] linalg.tiled_loop peelingClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 370976

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

mlir/include/mlir/Dialect/SCF/Transforms.h

mlir/lib/Dialect/Linalg/Transforms/Loops.cpp

mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp

mlir/test/Dialect/Linalg/tiled-loop-peeling.mlir

mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp

[mlir][linalg] linalg.tiled_loop peeling
ClosedPublic