Diff 462989

mlir/include/mlir/Dialect/SCF/IR/SCFOps.td

Show First 20 Lines • Show All 244 Lines • ▼ Show 20 Lines	let extraClassDeclaration = [{
MutableArrayRef<OpOperand> getIterOpOperands() {		MutableArrayRef<OpOperand> getIterOpOperands() {
return		return
getOperation()->getOpOperands().drop_front(getNumControlOperands());		getOperation()->getOpOperands().drop_front(getNumControlOperands());
}		}

void setLowerBound(Value bound) { getOperation()->setOperand(0, bound); }		void setLowerBound(Value bound) { getOperation()->setOperand(0, bound); }
void setUpperBound(Value bound) { getOperation()->setOperand(1, bound); }		void setUpperBound(Value bound) { getOperation()->setOperand(1, bound); }
void setStep(Value step) { getOperation()->setOperand(2, step); }		void setStep(Value step) { getOperation()->setOperand(2, step); }
		void setIterArg(unsigned iterArgNum, Value iterArgValue) {
		getOperation()->setOperand(iterArgNum + getNumControlOperands(), iterArgValue);
		}

/// Number of induction variables, always 1 for scf::ForOp.		/// Number of induction variables, always 1 for scf::ForOp.
unsigned getNumInductionVars() { return 1; }		unsigned getNumInductionVars() { return 1; }
/// Number of region arguments for loop-carried values		/// Number of region arguments for loop-carried values
unsigned getNumRegionIterArgs() {		unsigned getNumRegionIterArgs() {
return getBody()->getNumArguments() - getNumInductionVars();		return getBody()->getNumArguments() - getNumInductionVars();
}		}
/// Number of operands controlling the loop: lb, ub, step		/// Number of operands controlling the loop: lb, ub, step
unsigned getNumControlOperands() { return 3; }		unsigned getNumControlOperands() { return 3; }
/// Does the operation hold operands for loop-carried values		/// Does the operation hold operands for loop-carried values
bool hasIterOperands() {		bool hasIterOperands() {
return getOperation()->getNumOperands() > getNumControlOperands();		return getOperation()->getNumOperands() > getNumControlOperands();
}		}
/// Get Number of loop-carried values		/// Get Number of loop-carried values
unsigned getNumIterOperands() {		unsigned getNumIterOperands() {
return getOperation()->getNumOperands() - getNumControlOperands();		return getOperation()->getNumOperands() - getNumControlOperands();
}		}
		/// Get the iter arg number for an operand. If it isnt an iter arg
		/// operand return llvm::None.
		Optional<unsigned> getIterArgNumberForOpOperand(OpOperand &opOperand) {
		if (opOperand.getOwner() != getOperation())
		return llvm::None;
		unsigned operandNumber = opOperand.getOperandNumber();
		if (operandNumber < getNumControlOperands())
		return llvm::None;
		return operandNumber - getNumControlOperands();
		}

/// Get the region iter arg that corresponds to an OpOperand.		/// Get the region iter arg that corresponds to an OpOperand.
/// This helper prevents internal op implementation detail leakage to		/// This helper prevents internal op implementation detail leakage to
/// clients by hiding the operand / block argument mapping.		/// clients by hiding the operand / block argument mapping.
BlockArgument getRegionIterArgForOpOperand(OpOperand &opOperand) {		BlockArgument getRegionIterArgForOpOperand(OpOperand &opOperand) {
assert(opOperand.getOperandNumber() >= getNumControlOperands() &&		assert(opOperand.getOperandNumber() >= getNumControlOperands() &&
"expected an iter args operand");		"expected an iter args operand");
assert(opOperand.getOwner() == getOperation() &&		assert(opOperand.getOwner() == getOperation() &&
"opOperand does not belong to this scf::ForOp operation");		"opOperand does not belong to this scf::ForOp operation");
▲ Show 20 Lines • Show All 721 Lines • Show Last 20 Lines

mlir/include/mlir/Dialect/SCF/Utils/Utils.h

	Show All 38 Lines
	/// values returned by the callback should match the number of new			/// values returned by the callback should match the number of new
	/// initialization values. This function			/// initialization values. This function
	/// - Moves (i.e. doesnt clone) operations from the `loop` to the newly created			/// - Moves (i.e. doesnt clone) operations from the `loop` to the newly created
	/// loop			/// loop
	/// - Replaces the uses of `loop` with the new loop.			/// - Replaces the uses of `loop` with the new loop.
	/// - `loop` isnt erased, but is left in a "no-op" state where the body of the			/// - `loop` isnt erased, but is left in a "no-op" state where the body of the
	/// loop just yields the basic block arguments that correspond to the			/// loop just yields the basic block arguments that correspond to the
	/// initialization values of a loop. The loop is dead after this method.			/// initialization values of a loop. The loop is dead after this method.
	/// - All uses of the `newIterOperands` within the generated new loop			/// - If `replaceIterOperandsUsesInLoop` is true, all uses of the
	/// are replaced with the corresponding `BlockArgument` in the loop body.			/// `newIterOperands` within the generated new loop are replaced
				nicolasvasilacheUnsubmitted Done Reply Inline Actions nit: double are replaced nicolasvasilache: nit: double are replaced
				/// with the corresponding `BlockArgument` in the loop body.
	using NewYieldValueFn = std::function<SmallVector<Value>(			using NewYieldValueFn = std::function<SmallVector<Value>(
	OpBuilder &b, Location loc, ArrayRef<BlockArgument> newBBArgs)>;			OpBuilder &b, Location loc, ArrayRef<BlockArgument> newBBArgs)>;
	scf::ForOp replaceLoopWithNewYields(OpBuilder &builder, scf::ForOp loop,			scf::ForOp replaceLoopWithNewYields(OpBuilder &builder, scf::ForOp loop,
	ValueRange newIterOperands,			ValueRange newIterOperands,
	const NewYieldValueFn &newYieldValuesFn);			const NewYieldValueFn &newYieldValuesFn,
				bool replaceIterOperandsUsesInLoop = true);

	/// Update a perfectly nested loop nest to yield new values from the innermost			/// Update a perfectly nested loop nest to yield new values from the innermost
	/// loop and propagating it up through the loop nest. This function			/// loop and propagating it up through the loop nest. This function
	/// - Expects `loopNest` to be a perfectly nested loop with outer most loop			/// - Expects `loopNest` to be a perfectly nested loop with outer most loop
	/// first and innermost loop last.			/// first and innermost loop last.
	/// - `newIterOperands` are the initialization values to be used for the			/// - `newIterOperands` are the initialization values to be used for the
	/// outermost loop			/// outermost loop
	/// - `newYielValueFn` is the callback that generates the new values to be			/// - `newYielValueFn` is the callback that generates the new values to be
	/// yielded from within the innermost loop.			/// yielded from within the innermost loop.
	/// - The original loops are not erased, but are left in a "no-op" state where			/// - The original loops are not erased, but are left in a "no-op" state where
	/// the body of the loop just yields the basic block arguments that correspond			/// the body of the loop just yields the basic block arguments that correspond
	/// to the initialization values of a loop. The original loops are dead after			/// to the initialization values of a loop. The original loops are dead after
	/// this method.			/// this method.
	/// - All uses of the `newIterOperands` within the generated new loop			/// - If `replaceIterOperandsUsesInLoop` is true, all uses of the
	/// are replaced with the corresponding `BlockArgument` in the loop body.			/// `newIterOperands` within the generated new loop are replaced with the
				nicolasvasilacheUnsubmitted Done Reply Inline Actions incomplete sentence. nicolasvasilache: incomplete sentence.
				/// corresponding `BlockArgument` in the loop body.
	SmallVector<scf::ForOp>			SmallVector<scf::ForOp>
	replaceLoopNestWithNewYields(OpBuilder &builder, ArrayRef<scf::ForOp> loopNest,			replaceLoopNestWithNewYields(OpBuilder &builder, ArrayRef<scf::ForOp> loopNest,
	ValueRange newIterOperands,			ValueRange newIterOperands,
	const NewYieldValueFn &newYieldValueFn);			const NewYieldValueFn &newYieldValueFn,
				bool replaceIterOperandsUsesInLoop = true);

	/// Outline a region with a single block into a new FuncOp.			/// Outline a region with a single block into a new FuncOp.
	/// Assumes the FuncOp result types is the type of the yielded operands of the			/// Assumes the FuncOp result types is the type of the yielded operands of the
	/// single block. This constraint makes it easy to determine the result.			/// single block. This constraint makes it easy to determine the result.
	/// This method also clones the `arith::ConstantIndexOp` at the start of			/// This method also clones the `arith::ConstantIndexOp` at the start of
	/// `outlinedFuncBody` to alloc simple canonicalizations.			/// `outlinedFuncBody` to alloc simple canonicalizations.
	/// Creates a new FuncOp and thus cannot be used in a FuncOp pass.			/// Creates a new FuncOp and thus cannot be used in a FuncOp pass.
	/// The client is responsible for providing a unique `funcName` that will not			/// The client is responsible for providing a unique `funcName` that will not
	▲ Show 20 Lines • Show All 97 Lines • Show Last 20 Lines

mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp

Show First 20 Lines • Show All 161 Lines • ▼ Show 20 Lines	auto loop = builder.create<scf::ForOp>(
});		});
offsets[loopRange.index()] = loop.getInductionVar();		offsets[loopRange.index()] = loop.getInductionVar();
loops.push_back(loop);		loops.push_back(loop);
builder.setInsertionPoint(loop.getBody()->getTerminator());		builder.setInsertionPoint(loop.getBody()->getTerminator());
}		}
return loops;		return loops;
}		}

		/// If the tiled operation is in destination passing style, update the
		nicolasvasilacheUnsubmitted Done Reply Inline Actions nit: is in nicolasvasilache: nit: is in
		/// slice of the destination used (which refers to the untiled destination)
		/// to use the corresponding region argument of the innermost loop.
		///
		/// ```mlir
		/// %0 =
		/// scf.for %iv0 = ... iter_args(%arg = %0) {
		/// %1 = tensor.extract_slice %0
		/// %2 = tiled_op
		/// %3 = tensor.insert_slice %2 into %arg
		/// scf.yield %3
		/// }
		/// ```
		///
		/// is transformed to
		///
		/// ```mlir
		/// scf.for %iv0 = ... iter_args(%arg = %0) {
		/// %1 = tensor.extract_slice %arg
		/// %2 = tiled_op
		/// %3 = tensor.insert_slice %2 into %arg
		/// scf.yield %3
		mravishankarAuthorUnsubmitted Done Reply Inline Actions @nicolasvasilache responding to some of the comments from https://reviews.llvm.org/D134144 here. This is just moved here from below. I forgot that all the results of the tiled operation need to be yielded the same time for things to work correctly. The reason being that to get the tile offset and size of the result computed you need to use the offset and size of the iteration space. If you just yield one result of the tiled operation at a time, after the first yield, the offset of the iteration space (which depends on the induction variable of the loop) becomes invalid, but is still referenced in the `offset` computation. So you need to first compute the `resultTileOffsets and` resultTileSizes` of all the results before you yield them. Note that this is only an issue for returning the tiled results for the consumer (or the first tiled op). After that, if you fuse a producer and need to yield the value of the tiled producer, you have the `resultTileOffset` and `resultTileSizes` from the slice op that is used. Since after the yield the IR is in a consistent state, processing other slices doesnt have an issue. So this "batch processing of yields" only needs to happen for the tiled operation.. In any case, this method is just "move" of code from below. This is what was always done (I just had forgotten this). You left a comment about front-loading the use of `SubsetExtractOpInterface`. I think that is a larger change that has to happen as a generalization of the whole `TilingInterface`. That probably needs to be done later. Also note that for just the tiling, you dont have slices. So we need a different container for the offsets and sizes that does not need the op itself. So there are things to be handled here w.r.t to SubsetExtractOpInterface` that are sort of unrelated to this change. This is really fixing a bug. mravishankar: @nicolasvasilache responding to some of the comments from https://reviews.llvm.org/D134144 here.
		nicolasvasilacheUnsubmitted Not Done Reply Inline Actions this move creates significant reviewcomplexity, can it be undone? Is the objective to just add `/replaceIterOperandsUsesInLoop =/false` ? If so, it will be much easier to just do without moving the code and reducing the review complexity. Then you can refactor as part of the NFC. I am unable to tell whether for (auto tiledResult : llvm::enumerate(tilingResult.tiledOp->getResults())) { if (failed(op.getResultTilePosition( rewriter, tiledResult.index(), offsets, sizes, resultTileOffsetsList[tiledResult.index()], resultTileSizesList[tiledResult.index()]))) { return rewriter.notifyMatchFailure( op, "unable to get insertion position of tiled result"); } } + changes in the function is NFC compared to: for (auto resultNum : llvm::seq<unsigned>(0, op->getNumResults())) { SmallVector<OpFoldResult> resultTileOffsets, resultTileSizes; if (failed(op.getResultTilePosition(b, resultNum, offsets, sizes, resultTileOffsets, resultTileSizes))) { op.emitOpError("unable to get position of result ") << resultNum << " of the tiled implementation"; return {}; } If the answer is yes, please make it easier to accept your PR by avoiding complex refactorings mixed with non-trivial functional changes: leave the lambda where it was and just add `/replaceIterOperandsUsesInLoop =/false`. If the answer is no, we need significantly more comments here to understand why not. nicolasvasilache: 1. this move creates significant reviewcomplexity, can it be undone? Is the objective to just…
		mravishankarAuthorUnsubmitted Done Reply Inline Actions Inlined the function. The comment doesnt point to where the code is now, but it is just that change. mravishankar: Inlined the function. The comment doesnt point to where the code is now, but it is just that…
		nicolasvasilacheUnsubmitted Done Reply Inline Actions Great, thanks for removing that cognitive barrier! nicolasvasilache: Great, thanks for removing that cognitive barrier!
		/// }
		/// ```
		/// TODO: This can be made much cleaner when `DestinationStyleOp` interface is
		/// available generally.
		static void
		updateDestinationOperandsForTiledOp(OpBuilder &builder,
		ValueRange tiledOpDestinationValues,
		nicolasvasilacheUnsubmitted Done Reply Inline Actions Plz add some TODO about SubsetExtractOpInterface nicolasvasilache: Plz add some TODO about SubsetExtractOpInterface
		ValueRange bbArgsList) {
		for (auto destValue : llvm::enumerate(tiledOpDestinationValues)) {
		auto sliceOp = destValue.value().getDefiningOp<tensor::ExtractSliceOp>();
		if (!sliceOp)
		continue;
		sliceOp.setOperand(0, bbArgsList[destValue.index()]);
		}
		}

scf::TileUsingSCFForOp::TileUsingSCFForOp(MLIRContext *context,		scf::TileUsingSCFForOp::TileUsingSCFForOp(MLIRContext *context,
scf::SCFTilingOptions options,		scf::SCFTilingOptions options,
PatternBenefit benefit)		PatternBenefit benefit)
: OpInterfaceRewritePattern<TilingInterface>(context, benefit),		: OpInterfaceRewritePattern<TilingInterface>(context, benefit),
options(std::move(options)) {}		options(std::move(options)) {}

scf::TileUsingSCFForOp::TileUsingSCFForOp(StringRef opName,		scf::TileUsingSCFForOp::TileUsingSCFForOp(StringRef opName,
MLIRContext *context,		MLIRContext *context,
▲ Show 20 Lines • Show All 98 Lines • ▼ Show 20 Lines	scf::TileUsingSCFForOp::returningMatchAndRewrite(

if (op->getNumResults() == 0) {		if (op->getNumResults() == 0) {
rewriter.eraseOp(op);		rewriter.eraseOp(op);
return tilingResult;		return tilingResult;
}		}

// 5. If the original operations has results, modify the loop nest to yield		// 5. If the original operations has results, modify the loop nest to yield
// the replacement values.		// the replacement values.
SmallVector<Value> replacements;
if (tilingResult.loops.empty()) {		if (tilingResult.loops.empty()) {
// 5a. If there were no loops, the tiled implementation results are the		// 5a. If there were no loops, the tiled implementation results are the
// replacements.		// replacements.
rewriter.replaceOp(op, tilingResult.tiledOp->getResults());		rewriter.replaceOp(op, tilingResult.tiledOp->getResults());
return tilingResult;		return tilingResult;
}		}

// 5b. `scf.for` with tensor semantics requires the loop nest to yield the		// 6. Yield the results of the tiled operation from the loop nest as
		// replacements for the original untiled ops.
		if (tilingResult.tiledOp->getNumResults() != op->getNumResults()) {
		return rewriter.notifyMatchFailure(
		tilingResult.tiledOp,
		"expected tiled op to have as many results as the untiled operation");
		}

		// `scf.for` with tensor semantics requires the loop nest to yield the
// replacement values using destructive updates. Use the `TilingInterface`		// replacement values using destructive updates. Use the `TilingInterface`
// to get the position of the result tiles and use that to generate the		// to get the position of the result tiles and use that to generate the
// destructive update pattern, i.e.,		// destructive update pattern, i.e.,
//		//
// ```mlir		// ```mlir
// scf.for %iv0 = ... {		// scf.for %iv0 = ... {
// %0 = tiled_op		// %0 = tiled_op
// }		// }
Show All 29 Lines	for (auto resultNum : llvm::seq<unsigned>(0, op->getNumResults())) {
newBBArgs[resultNum], resultTileOffsets, resultTileSizes,		newBBArgs[resultNum], resultTileOffsets, resultTileSizes,
resultTileStrides);		resultTileStrides);
yieldedValues.push_back(yieldedValue);		yieldedValues.push_back(yieldedValue);
}		}
return yieldedValues;		return yieldedValues;
};		};
SmallVector<scf::ForOp> newLoops = replaceLoopNestWithNewYields(		SmallVector<scf::ForOp> newLoops = replaceLoopNestWithNewYields(
rewriter, tilingResult.loops, op.getDestinationOperands(rewriter),		rewriter, tilingResult.loops, op.getDestinationOperands(rewriter),
yieldValueFn);		yieldValueFn, /replaceIterOperandsUsesInLoops =/false);
for (const auto &loop : llvm::enumerate(tilingResult.loops)) {		for (const auto &loop : llvm::enumerate(tilingResult.loops)) {
rewriter.eraseOp(loop.value());		rewriter.eraseOp(loop.value());
tilingResult.loops[loop.index()] = newLoops[loop.index()];		tilingResult.loops[loop.index()] = newLoops[loop.index()];
}		}
rewriter.replaceOp(op, tilingResult.loops.front().getResults());		rewriter.replaceOp(op, tilingResult.loops.front().getResults());
return tilingResult;		return tilingResult;
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
		nicolasvasilacheUnsubmitted Done Reply Inline Actions Treating all the code above this line as unnecessary change and assuming that adding`/replaceIterOperandsUsesInLoop =/false.` is the only useful change. nicolasvasilache: Treating all the code above this line as unnecessary change and assuming that…
// TileConsumerAndFuseProducersUsingSCFForOp pattern implementation.		// TileConsumerAndFuseProducersUsingSCFForOp pattern implementation.
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

scf::TileConsumerAndFuseProducersUsingSCFForOp::		scf::TileConsumerAndFuseProducersUsingSCFForOp::
TileConsumerAndFuseProducersUsingSCFForOp(MLIRContext *context,		TileConsumerAndFuseProducersUsingSCFForOp(MLIRContext *context,
scf::SCFTilingOptions options,		scf::SCFTilingOptions options,
PatternBenefit benefit)		PatternBenefit benefit)
: OpInterfaceRewritePattern<TilingInterface>(context, benefit),		: OpInterfaceRewritePattern<TilingInterface>(context, benefit),
tilingPattern(context, std::move(options)) {}		tilingPattern(context, std::move(options)) {}

scf::TileConsumerAndFuseProducersUsingSCFForOp::		scf::TileConsumerAndFuseProducersUsingSCFForOp::
TileConsumerAndFuseProducersUsingSCFForOp(StringRef opName,		TileConsumerAndFuseProducersUsingSCFForOp(StringRef opName,
MLIRContext *context,		MLIRContext *context,
scf::SCFTilingOptions options,		scf::SCFTilingOptions options,
PatternBenefit benefit)		PatternBenefit benefit)
: OpInterfaceRewritePattern<TilingInterface>(context, benefit),		: OpInterfaceRewritePattern<TilingInterface>(context, benefit),
tilingPattern(context, std::move(options)) {}		tilingPattern(context, std::move(options)) {}

/// Return the `Value` that is defined by an operation that implements		/// Return the untiled producer whose slice is used in a tiled consumer. The
/// the `TilingInterface`. Looks through `iter_args` of scf.for nest		/// method traverses the tile loop nest (`loops`) if needed, and returns the
/// if required.		/// `iter_args` of the outer most that is encountered. Traversing the iter_args
static Optional<OpResult> getFusableProducer(Value v) {		/// indicates that this is a destination operand of the consumer. If there was
while (auto blockArg = v.dyn_cast<BlockArgument>()) {		/// no loop traversal needed, the second value of the returned tuple is empty.
auto loopOp = dyn_cast<scf::ForOp>(blockArg.getOwner()->getParentOp());		static std::tuple<OpResult, Optional<OpOperand *>>
if (!loopOp)		getUntiledProducerFromSliceSource(OpOperand *source,
		nicolasvasilacheUnsubmitted Not Done Reply Inline Actions this functions needs a significantly better name that is descriptive of what it does. `walkBackXAndReturnY` or something nicolasvasilache: this functions needs a significantly better name that is descriptive of what it does.
		mravishankarAuthorUnsubmitted Done Reply Inline Actions Went with `getUntiledProducerFromSliceSource`. mravishankar: Went with `getUntiledProducerFromSliceSource`.
		nicolasvasilacheUnsubmitted Done Reply Inline Actions Thanks! nicolasvasilache: Thanks!
return llvm::None;		ArrayRef<scf::ForOp> loops) {
v = loopOp.getOpOperandForRegionIterArg(blockArg).get();		Optional<OpOperand *> destinationIterArg;
}		auto loopIt = loops.rbegin();
if (!isa_and_nonnull<TilingInterface>(v.getDefiningOp()))		while (auto iterArg = source->get().dyn_cast<BlockArgument>()) {
return llvm::None;		scf::ForOp loop = *loopIt;
return v.cast<OpResult>();		if (iterArg.getOwner()->getParentOp() != loop)
}		break;
		source = &loop.getOpOperandForRegionIterArg(iterArg);
// Replace iter args of the outer most loop with region args of the inner most		loopIt++;
// one.		}
static void replaceIterArgs(scf::ForOp outerFor, scf::ForOp innerFor,		if (loopIt == loops.rend())
PatternRewriter &rewriter) {		destinationIterArg = source;
assert(outerFor.getNumIterOperands() == innerFor.getNumIterOperands() &&		return {source->get().dyn_cast<OpResult>(), destinationIterArg};
"expect same number of iter args");
Block block = &(innerFor.getRegion().begin());
for (auto it :
llvm::zip(outerFor.getIterOperands(), innerFor.getRegionIterArgs())) {
Value source = std::get<0>(it);
Value target = std::get<1>(it);
source.replaceUsesWithIf(target, [&](OpOperand &use) {
return use.getOwner()->getBlock() == block;
});
}
}		}

FailureOr<scf::SCFTileAndFuseResult>		FailureOr<scf::SCFTileAndFuseResult>
scf::TileConsumerAndFuseProducersUsingSCFForOp::returningMatchAndRewrite(		scf::TileConsumerAndFuseProducersUsingSCFForOp::returningMatchAndRewrite(
TilingInterface op, PatternRewriter &rewriter) const {		TilingInterface op, PatternRewriter &rewriter) const {
// This transformation is only valid for ops that return values (i.e. not		// This transformation is only valid for ops that return values (i.e. not
// valid to use with operations that have memref operands).		// valid to use with operations that have memref operands).
if (!op->getNumResults()) {		if (!op->getNumResults()) {
Show All 32 Lines	scf::TileConsumerAndFuseProducersUsingSCFForOp::returningMatchAndRewrite(
OpBuilder::InsertionGuard g(rewriter);		OpBuilder::InsertionGuard g(rewriter);
while (!candidates.empty()) {		while (!candidates.empty()) {
// 2a. Traverse the slices in BFS fashion.		// 2a. Traverse the slices in BFS fashion.
tensor::ExtractSliceOp candidateSliceOp = candidates.front();		tensor::ExtractSliceOp candidateSliceOp = candidates.front();
candidates.pop_front();		candidates.pop_front();

// 2b. Get the producer of the source (potentially walking through		// 2b. Get the producer of the source (potentially walking through
// `iter_args` of nested `scf.for`)		// `iter_args` of nested `scf.for`)
Optional<OpResult> fusableProducer =		auto [fusableProducer, destinationIterArg] =
getFusableProducer(candidateSliceOp.getSource());		getUntiledProducerFromSliceSource(&candidateSliceOp->getOpOperand(0),
		tileAndFuseResult.loops);
		nicolasvasilacheUnsubmitted Not Done Reply Inline Actions you can now `auto [x, y] = ...` nicolasvasilache: you can now `auto [x, y] = ...`
		mravishankarAuthorUnsubmitted Done Reply Inline Actions TIL.... mravishankar: TIL....
if (!fusableProducer)		if (!fusableProducer)
continue;		continue;

// 2c. Generate the tiled implementation of the producer of the source		// 2c. Generate the tiled implementation of the producer of the source
rewriter.setInsertionPoint(candidateSliceOp);		rewriter.setInsertionPoint(candidateSliceOp);
FailureOr<Value> fusedProducerValue =		FailureOr<Value> fusedProducerValue =
tensor::replaceExtractSliceWithTiledProducer(rewriter, candidateSliceOp,		tensor::replaceExtractSliceWithTiledProducer(rewriter, candidateSliceOp,
fusableProducer.value());		fusableProducer);
if (failed(fusedProducerValue))		if (failed(fusedProducerValue))
continue;		continue;
rewriter.replaceOp(candidateSliceOp, fusedProducerValue.value());		rewriter.replaceOp(candidateSliceOp, fusedProducerValue.value());

// 2d. The operands of the fused producer might themselved be slices of		// 2d. The operands of the fused producer might themselved be slices of
// values produced by operations that implement the `TilingInterface`.		// values produced by operations that implement the `TilingInterface`.
// Add these operations to the worklist.		// Add these operations to the worklist.
Operation *fusedProducer = fusedProducerValue->getDefiningOp();		Operation *fusedProducer = fusedProducerValue->getDefiningOp();
tileAndFuseResult.tiledAndFusedOps.push_back(fusedProducer);		tileAndFuseResult.tiledAndFusedOps.push_back(fusedProducer);
addCandidateSlices(fusedProducer, candidates);		addCandidateSlices(fusedProducer, candidates);

// 2e. If the operation being fused creates a value that is used as `outs`		// 2e. If the slice is for a destination operand, for example,
		nicolasvasilacheUnsubmitted Done Reply Inline Actions At this point, if the slice is for a destination operand, the IR is ill-formed because XXX (with a clear example with multiple loops and clear pointing out where in the IR the problem is and why). Problematic IR Example To correct this, we need to update Y and Z as follows: - iter_arg ... - destination of the slice ... Corrected IR example. nicolasvasilache: ``` At this point, if the slice is for a destination operand, the IR is ill-formed because XXX…
// in the tiled operation, the result of the unfused operation will be
// used in the `iter_args` of the tiled loop generated. When the
// operation is fused, this use in `iter_args` needs to be modified to
// use the destination of the fused operation. For example, starting
// with
//		//
// ```mlir		// ```mlir
// %0 = linalg.init_tensor ...		// %0 = linalg.init
// %1 = linalg.fill ... outs(%0:...)...		// %1 = linalg.fill .. outs(%0 : )
// %2 = linalg.matmul ... outs(%1:...)....		// %2 = scf.for .. iter_args(%arg0 = %1) {
		// %3 = scf.for .. iter_args(%arg1 = %arg0) {
		// %4 = tensor.extract_slice %arg1 [..]
		// .. = linalg.matmul .. outs(%4 : )
		// }
		// }
// ```		// ```
//		//
// First the `linalg.matmul` gets tiled		// the IR is currently
//		//
// ```mlir		// ```
// %0 = linalg.init_tensor		// %0 = linalg.init
// %1 = linalg.fill		// %1 = linalg.fill
// %2 = scf.for .... iter_args(%arg0 = %1)...		// %2 = scf.for .. iter_args(%arg0 = %1 /* incorrect value */ ) {
// ...		// %3 = scf.for .. iter_args(%arg1 = %arg0) {
// ... = linalg.matmul ...		// %4 = tensor.extract_slice %0 /incorrect value / [..]
//		// %5 = linalg.fill .. outs(%4 : )
		// .. = linalg.matmul .. outs(%5 : )
		// }
		// }
// ```		// ```
//		//
// When the `linalg.fill` gets fused, the `iter_args` needs to be		// The untiled `linalg.fill` is still used as the `init_value` since it
// modified		// was originally a destination operand of the untiled `linalg.matmul`.
		// When fusing an operand that is a destination operand.
		// - Update the iter_arg of the outer most loop to use the destination
		nicolasvasilacheUnsubmitted Not Done Reply Inline Actions Could you please highlight %arg0 = %1 /incorrect value/) and tensor.extract_slice %0 /* incorrect value / ? nicolasvasilache:* Could you please highlight ``` %arg0 = %1 /incorrect value/) ``` and ``` tensor.
		// of the untiled producer.
		// - Update the destination of the slice of the tiled producer generated
		// to use the same basic block argument as the slice that was used to
		// generate inplace the tiled implementation of the producer.
		// With this the IR will be.
//		//
// ```mlir
// %0 = linalg.init_tensor
// %1 = scf.for ... iter_args(%arg0 = %0)...
// ...
// %2 = linalg.fill ...
// %3 = linalg.matmul ... outs(%2: ...)...
// ```		// ```
TilingInterface unfusedProducerOp =		// %0 = linalg.init
cast<TilingInterface>(fusableProducer->getOwner());		// %1 = scf.for .. iter_args(%arg0 = %0 /* corrected value */ ) {
		nicolasvasilacheUnsubmitted Not Done Reply Inline Actions Could you please highlight %arg0 = %0 /now this is correct/) and tensor.extract_slice %arg1 /* now this is correct / ? nicolasvasilache:* Could you please highlight %arg0 = %0 /now this is correct/) and tensor.extract_slice %arg1…
scf::ForOp outerMostTiledLoop = tileAndFuseResult.loops.front();		// %2 = scf.for .. iter_args(%arg1 = %arg0) {
SmallVector<Value> unfusedProducerOpDestValues =		// %3 = tensor.extract_slice %arg1 /* corrected value */ [..]
unfusedProducerOp.getDestinationOperands(rewriter);		// %4 = linalg.fill .. outs(%3 : )
for (OpOperand &uses : unfusedProducerOp->getUses()) {		// .. = linalg.matmul .. outs(%4 : )
if (uses.getOwner() == outerMostTiledLoop.getOperation()) {		// }
unsigned resultNumber = uses.get().cast<OpResult>().getResultNumber();		// }
unsigned operandNumber = uses.getOperandNumber();		// ```
outerMostTiledLoop->setOperand(		// TODO: This can be modeled better if the `DestinationStyleOpInterface`.
operandNumber, unfusedProducerOpDestValues[resultNumber]);		// Update to use that when it does become available.
		scf::ForOp outerMostLoop = tileAndFuseResult.loops.front();
		Optional<unsigned> iterArgNumber;
		if (destinationIterArg) {
		iterArgNumber = outerMostLoop.getIterArgNumberForOpOperand(
		*destinationIterArg.value());
		}
		if (iterArgNumber) {
		unsigned resultNumber = fusableProducer.getResultNumber();
		if (auto producerOp =
		dyn_cast<TilingInterface>(fusableProducer.getOwner())) {
		SmallVector<Value> destination =
		producerOp.getDestinationOperands(rewriter);
		outerMostLoop.setIterArg(iterArgNumber.value(),
		destination[resultNumber]);
		}
		if (auto tiledAndFusedInterfaceOp =
		fusedProducerValue.value().getDefiningOp<TilingInterface>()) {
		scf::ForOp innerMostLoop = tileAndFuseResult.loops.back();
		nicolasvasilacheUnsubmitted Done Reply Inline Actions can we use (or add) some helper function to avoid magic indexings in clients of this op (there should already be something available). nicolasvasilache: can we use (or add) some helper function to avoid magic indexings in clients of this op (there…
		SmallVector<Value> destination =
		tiledAndFusedInterfaceOp.getDestinationOperands(rewriter);
		updateDestinationOperandsForTiledOp(
		rewriter, destination[resultNumber],
		innerMostLoop.getRegionIterArgs()[iterArgNumber.value()]);
}		}
}		}
}		}
replaceIterArgs(tileAndFuseResult.loops.front(),
tileAndFuseResult.loops.back(), rewriter);
return tileAndFuseResult;		return tileAndFuseResult;
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// LowerToLoopsUsingSCFForOp		// LowerToLoopsUsingSCFForOp
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

FailureOr<SmallVector<scf::ForOp>>		FailureOr<SmallVector<scf::ForOp>>
Show All 32 Lines

mlir/lib/Dialect/SCF/Utils/Utils.cpp

Show All 34 Lines	struct LoopParams {
Value upperBound;		Value upperBound;
Value step;		Value step;
};		};
} // namespace		} // namespace

scf::ForOp		scf::ForOp
mlir::replaceLoopWithNewYields(OpBuilder &builder, scf::ForOp loop,		mlir::replaceLoopWithNewYields(OpBuilder &builder, scf::ForOp loop,
ValueRange newIterOperands,		ValueRange newIterOperands,
const NewYieldValueFn &newYieldValuesFn) {		const NewYieldValueFn &newYieldValuesFn,
		bool replaceIterOperandsUsesInLoop) {
// Create a new loop before the existing one, with the extra operands.		// Create a new loop before the existing one, with the extra operands.
OpBuilder::InsertionGuard g(builder);		OpBuilder::InsertionGuard g(builder);
builder.setInsertionPoint(loop);		builder.setInsertionPoint(loop);
auto operands = llvm::to_vector(loop.getIterOperands());		auto operands = llvm::to_vector(loop.getIterOperands());
operands.append(newIterOperands.begin(), newIterOperands.end());		operands.append(newIterOperands.begin(), newIterOperands.end());
scf::ForOp newLoop = builder.create<scf::ForOp>(		scf::ForOp newLoop = builder.create<scf::ForOp>(
loop.getLoc(), loop.getLowerBound(), loop.getUpperBound(), loop.getStep(),		loop.getLoc(), loop.getLowerBound(), loop.getUpperBound(), loop.getStep(),
operands, [](OpBuilder &, Location, Value, ValueRange) {});		operands, [](OpBuilder &, Location, Value, ValueRange) {});
Show All 22 Lines	mlir::replaceLoopWithNewYields(OpBuilder &builder, scf::ForOp loop,

// Remap the BlockArguments from the original loop to the new loop		// Remap the BlockArguments from the original loop to the new loop
// BlockArguments.		// BlockArguments.
ArrayRef<BlockArgument> bbArgs = loopBody->getArguments();		ArrayRef<BlockArgument> bbArgs = loopBody->getArguments();
for (auto it :		for (auto it :
llvm::zip(bbArgs, newLoopBody->getArguments().take_front(bbArgs.size())))		llvm::zip(bbArgs, newLoopBody->getArguments().take_front(bbArgs.size())))
std::get<0>(it).replaceAllUsesWith(std::get<1>(it));		std::get<0>(it).replaceAllUsesWith(std::get<1>(it));

		if (replaceIterOperandsUsesInLoop) {
// Replace all uses of `newIterOperands` with the corresponding basic block		// Replace all uses of `newIterOperands` with the corresponding basic block
// arguments.		// arguments.
for (auto it : llvm::zip(newIterOperands, newBBArgs)) {		for (auto it : llvm::zip(newIterOperands, newBBArgs)) {
std::get<0>(it).replaceUsesWithIf(std::get<1>(it), [&](OpOperand &use) {		std::get<0>(it).replaceUsesWithIf(std::get<1>(it), [&](OpOperand &use) {
Operation *user = use.getOwner();		Operation *user = use.getOwner();
return newLoop->isProperAncestor(user);		return newLoop->isProperAncestor(user);
});		});
}		}
		}

// Replace all uses of the original loop with corresponding values from the		// Replace all uses of the original loop with corresponding values from the
// new loop.		// new loop.
loop.replaceAllUsesWith(		loop.replaceAllUsesWith(
newLoop.getResults().take_front(loop.getNumResults()));		newLoop.getResults().take_front(loop.getNumResults()));

// Add a fake yield to the original loop body that just returns the		// Add a fake yield to the original loop body that just returns the
// BlockArguments corresponding to the iter_args. This makes it a no-op loop.		// BlockArguments corresponding to the iter_args. This makes it a no-op loop.
// The loop is dead. The caller is expected to erase it.		// The loop is dead. The caller is expected to erase it.
builder.setInsertionPointToEnd(loopBody);		builder.setInsertionPointToEnd(loopBody);
builder.create<scf::YieldOp>(loop->getLoc(), loop.getRegionIterArgs());		builder.create<scf::YieldOp>(loop->getLoc(), loop.getRegionIterArgs());

return newLoop;		return newLoop;
}		}

SmallVector<scf::ForOp> mlir::replaceLoopNestWithNewYields(		SmallVector<scf::ForOp> mlir::replaceLoopNestWithNewYields(
OpBuilder &builder, ArrayRef<scf::ForOp> loopNest,		OpBuilder &builder, ArrayRef<scf::ForOp> loopNest,
ValueRange newIterOperands, const NewYieldValueFn &newYieldValueFn) {		ValueRange newIterOperands, const NewYieldValueFn &newYieldValueFn,
		bool replaceIterOperandsUsesInLoop) {
if (loopNest.empty())		if (loopNest.empty())
return {};		return {};
SmallVector<scf::ForOp> newLoopNest(loopNest.size());		SmallVector<scf::ForOp> newLoopNest(loopNest.size());

newLoopNest.back() = replaceLoopWithNewYields(		newLoopNest.back() = replaceLoopWithNewYields(
builder, loopNest.back(), newIterOperands, newYieldValueFn);		builder, loopNest.back(), newIterOperands, newYieldValueFn);

for (unsigned loopDepth :		for (unsigned loopDepth :
llvm::reverse(llvm::seq<unsigned>(0, loopNest.size() - 1))) {		llvm::reverse(llvm::seq<unsigned>(0, loopNest.size() - 1))) {
NewYieldValueFn fn = [&](OpBuilder &innerBuilder, Location loc,		NewYieldValueFn fn = [&](OpBuilder &innerBuilder, Location loc,
ArrayRef<BlockArgument> innerNewBBArgs) {		ArrayRef<BlockArgument> innerNewBBArgs) {
SmallVector<Value> newYields(		SmallVector<Value> newYields(
newLoopNest[loopDepth + 1]->getResults().take_back(		newLoopNest[loopDepth + 1]->getResults().take_back(
newIterOperands.size()));		newIterOperands.size()));
return newYields;		return newYields;
};		};
newLoopNest[loopDepth] = replaceLoopWithNewYields(		newLoopNest[loopDepth] =
builder, loopNest[loopDepth], newIterOperands, fn);		replaceLoopWithNewYields(builder, loopNest[loopDepth], newIterOperands,
		fn, replaceIterOperandsUsesInLoop);
		if (!replaceIterOperandsUsesInLoop) {
		nicolasvasilacheUnsubmitted Not Done Reply Inline Actions Comments and examples needed here, I have no idea why this post-hoc patchup is happening here and why. Can this be achieved in a more idiomatic fashion, without post-hoc patchup, by e.g. splitting `replaceLoopWithNewYields` in multiple parts? nicolasvasilache: Comments and examples needed here, I have no idea why this post-hoc patchup is happening here…
		mravishankarAuthorUnsubmitted Done Reply Inline Actions I think this can be simplified. Instead of going bottom up from inner to outer, we can try making this go top down, from outer to inner... I can do this after the fact. Maybe not add to the patch right now? mravishankar: I think this can be simplified. Instead of going bottom up from inner to outer, we can try…
		nicolasvasilacheUnsubmitted Done Reply Inline Actions ok, but let's please do it as part of this list of patches. Before or after the NFC is fine, but please don't let this linger, approving conditioned on that cleanup arriving very soon. nicolasvasilache: ok, but let's please do it as part of this list of patches. Before or after the NFC is fine…
		/// The yield is expected to producer the following structure
		/// ```
		/// %0 = scf.for ... iter_args(%arg0 = %init) {
		/// %1 = scf.for ... iter_args(%arg1 = %arg0) {
		/// scf.yield %yield
		/// }
		/// }
		/// ```
		///
		/// since the yield is propagated from inside out, after the inner
		/// loop is processed the IR is in this form
		///
		/// ```
		/// scf.for ... iter_args {
		/// %1 = scf.for ... iter_args(%arg1 = %init) {
		/// scf.yield %yield
		/// }
		/// ```
		///
		/// If `replaceIterOperandUsesInLoops` is true, there is nothing to do.
		/// `%init` will be replaced with `%arg0` when it is created for the
		/// outer loop. But without that this has to be done explicitly.
		unsigned subLen = newIterOperands.size();
		unsigned subStart =
		newLoopNest[loopDepth + 1].getNumIterOperands() - subLen;
		auto resetOperands =
		newLoopNest[loopDepth + 1].getInitArgsMutable().slice(subStart,
		subLen);
		resetOperands.assign(
		newLoopNest[loopDepth].getRegionIterArgs().take_back(subLen));
		}
}		}
return newLoopNest;		return newLoopNest;
}		}

/// Outline a region with a single block into a new FuncOp.		/// Outline a region with a single block into a new FuncOp.
/// Assumes the FuncOp result types is the type of the yielded operands of the		/// Assumes the FuncOp result types is the type of the yielded operands of the
/// single block. This constraint makes it easy to determine the result.		/// single block. This constraint makes it easy to determine the result.
/// This method also clones the `arith::ConstantIndexOp` at the start of		/// This method also clones the `arith::ConstantIndexOp` at the start of
▲ Show 20 Lines • Show All 831 Lines • Show Last 20 Lines

mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir

	Show All 24 Lines
	// CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]			// CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]
	// CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV0]], %[[IV1]]]			// CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV0]], %[[IV1]]]
	// CHECK: %[[FILL_TILE:.+]] = linalg.fill			// CHECK: %[[FILL_TILE:.+]] = linalg.fill
	// CHECK-SAME: outs(%[[INIT_TILE]] :			// CHECK-SAME: outs(%[[INIT_TILE]] :
	// CHECK: %[[GEMM_TILE:.+]] = linalg.matmul			// CHECK: %[[GEMM_TILE:.+]] = linalg.matmul
	// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] :			// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] :
	// CHECK-SAME: outs(%[[FILL_TILE]] :			// CHECK-SAME: outs(%[[FILL_TILE]] :
	// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[ITERARG1]][%[[IV0]], %[[IV1]]]			// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[ITERARG1]][%[[IV0]], %[[IV1]]]
	// CHECK scf.yield %[[INSERT]]			// CHECK: scf.yield %[[INSERT]]

	// -----			// -----

	func.func @gemm_generic_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,			func.func @gemm_generic_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
	%arg2 : tensor<?xf32>) -> tensor<?x?xf32> {			%arg2 : tensor<?xf32>) -> tensor<?x?xf32> {
	%c0 = arith.constant 0 : index			%c0 = arith.constant 0 : index
	%c1 = arith.constant 1 : index			%c1 = arith.constant 1 : index
	%cst = arith.constant 0.0 : f32			%cst = arith.constant 0.0 : f32
	Show All 21 Lines
	// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?xf32>)			// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?xf32>)
	// CHECK: %[[INIT:.+]] = linalg.init_tensor			// CHECK: %[[INIT:.+]] = linalg.init_tensor
	// CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] =			// CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] =
	// CHECK-SAME: iter_args(%[[ITERARG0:.+]] = %[[INIT]])			// CHECK-SAME: iter_args(%[[ITERARG0:.+]] = %[[INIT]])
	// CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] =			// CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] =
	// CHECK-SAME: iter_args(%[[ITERARG1:.+]] = %[[ITERARG0]])			// CHECK-SAME: iter_args(%[[ITERARG1:.+]] = %[[ITERARG0]])
	// CHECK-DAG: %[[LHS_TILE:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]			// CHECK-DAG: %[[LHS_TILE:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
	// CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]			// CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]
	// CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV0]], %[[IV1]]]			// CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[INIT]][%[[IV0]], %[[IV1]]]
	// CHECK: %[[FILL_TILE:.+]] = linalg.fill			// CHECK: %[[FILL_TILE:.+]] = linalg.fill
	// CHECK-SAME: outs(%[[INIT_TILE]] :			// CHECK-SAME: outs(%[[INIT_TILE]] :
	// CHECK: %[[GEMM_TILE:.+]] = linalg.matmul			// CHECK: %[[GEMM_TILE:.+]] = linalg.matmul
	// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] :			// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] :
	// CHECK-SAME: outs(%[[FILL_TILE]] :			// CHECK-SAME: outs(%[[FILL_TILE]] :
	// CHECK-DAG: %[[BIAS_TILE:.+]] = tensor.extract_slice %[[ARG2]][%[[IV1]]]			// CHECK-DAG: %[[BIAS_TILE:.+]] = tensor.extract_slice %[[ARG2]][%[[IV1]]]
	// CHECK-DAG: %[[OUTS_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV0]], %[[IV1]]]			// CHECK-DAG: %[[OUTS_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV0]], %[[IV1]]]
	// CHECK: %[[GENERIC_TILE:.+]] = linalg.generic			// CHECK: %[[GENERIC_TILE:.+]] = linalg.generic
	// CHECK-SAME: ins(%[[GEMM_TILE]], %[[BIAS_TILE]] :			// CHECK-SAME: ins(%[[GEMM_TILE]], %[[BIAS_TILE]] :
	// CHECK-SAME: outs(%[[OUTS_TILE]] :			// CHECK-SAME: outs(%[[OUTS_TILE]] :
	// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV0]], %[[IV1]]]			// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV0]], %[[IV1]]]
	// CHECK scf.yield %[[INSERT]]			// CHECK: scf.yield %[[INSERT]]

	// -----			// -----

	func.func @gemm_gemm_fusion(%lhs0 : tensor<?x?xf32>, %rhs0 : tensor<?x?xf32>, %rhs1 : tensor<?x?xf32>) -> tensor<?x?xf32> {			func.func @gemm_gemm_fusion(%lhs0 : tensor<?x?xf32>, %rhs0 : tensor<?x?xf32>, %rhs1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
	%c0 = arith.constant 0 : index			%c0 = arith.constant 0 : index
	%c1 = arith.constant 1 : index			%c1 = arith.constant 1 : index
	%cst = arith.constant 0.0 : f32			%cst = arith.constant 0.0 : f32
	%d0 = tensor.dim %lhs0, %c0 : tensor<?x?xf32>			%d0 = tensor.dim %lhs0, %c0 : tensor<?x?xf32>
	Show All 33 Lines
	// CHECK-DAG: %[[RHS1_TILE:.+]] = tensor.extract_slice %[[RHS1]][0, 0]			// CHECK-DAG: %[[RHS1_TILE:.+]] = tensor.extract_slice %[[RHS1]][0, 0]
	// CHECK-DAG: %[[INIT1_TILE:.+]] = tensor.extract_slice %[[ITERARG]][%[[IV]], 0]			// CHECK-DAG: %[[INIT1_TILE:.+]] = tensor.extract_slice %[[ITERARG]][%[[IV]], 0]
	// CHECK: %[[FILL1_TILE:.+]] = linalg.fill			// CHECK: %[[FILL1_TILE:.+]] = linalg.fill
	// CHECK-SAME: outs(%[[INIT1_TILE]] :			// CHECK-SAME: outs(%[[INIT1_TILE]] :
	// CHECK: %[[GEMM1_TILE:.+]] = linalg.matmul			// CHECK: %[[GEMM1_TILE:.+]] = linalg.matmul
	// CHECK-SAME: ins(%[[GEMM0_TILE]], %[[RHS1_TILE]] :			// CHECK-SAME: ins(%[[GEMM0_TILE]], %[[RHS1_TILE]] :
	// CHECK-SAME: outs(%[[FILL1_TILE]] :			// CHECK-SAME: outs(%[[FILL1_TILE]] :
	// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GEMM1_TILE]] into %[[ITERARG]][%[[IV]], 0]			// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GEMM1_TILE]] into %[[ITERARG]][%[[IV]], 0]
	// CHECK scf.yield %[[INSERT]]			// CHECK: scf.yield %[[INSERT]]

	// -----			// -----

	func.func @gemm_transpose_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {			func.func @gemm_transpose_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
	%c0 = arith.constant 0 : index			%c0 = arith.constant 0 : index
	%c1 = arith.constant 1 : index			%c1 = arith.constant 1 : index
	%cst = arith.constant 0.0 : f32			%cst = arith.constant 0.0 : f32
	%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>			%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
	Show All 35 Lines
	// CHECK: %[[GEMM_TILE:.+]] = linalg.matmul			// CHECK: %[[GEMM_TILE:.+]] = linalg.matmul
	// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] :			// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] :
	// CHECK-SAME: outs(%[[FILL_TILE]] :			// CHECK-SAME: outs(%[[FILL_TILE]] :
	// CHECK-DAG: %[[OUTS_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV1]], %[[IV0]]]			// CHECK-DAG: %[[OUTS_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV1]], %[[IV0]]]
	// CHECK: %[[GENERIC_TILE:.+]] = linalg.generic			// CHECK: %[[GENERIC_TILE:.+]] = linalg.generic
	// CHECK-SAME: ins(%[[GEMM_TILE]] :			// CHECK-SAME: ins(%[[GEMM_TILE]] :
	// CHECK-SAME: outs(%[[OUTS_TILE]] :			// CHECK-SAME: outs(%[[OUTS_TILE]] :
	// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV1]], %[[IV0]]]			// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV1]], %[[IV0]]]
	// CHECK scf.yield %[[INSERT]]			// CHECK: scf.yield %[[INSERT]]

	// -----			// -----

	func.func @interchange_matmul_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {			func.func @interchange_matmul_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
	%c0 = arith.constant 0 : index			%c0 = arith.constant 0 : index
	%c1 = arith.constant 1 : index			%c1 = arith.constant 1 : index
	%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>			%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
	%d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>			%d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
	Show All 19 Lines
	// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>)			// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
	// CHECK: %[[INIT:.+]] = linalg.init_tensor			// CHECK: %[[INIT:.+]] = linalg.init_tensor
	// CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] =			// CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] =
	// CHECK-SAME: iter_args(%[[ITERARG0:.+]] = %[[INIT]])			// CHECK-SAME: iter_args(%[[ITERARG0:.+]] = %[[INIT]])
	// CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] =			// CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] =
	// CHECK-SAME: iter_args(%[[ITERARG1:.+]] = %[[ITERARG0]])			// CHECK-SAME: iter_args(%[[ITERARG1:.+]] = %[[ITERARG0]])
	// CHECK-DAG: %[[LHS_TILE:.+]] = tensor.extract_slice %[[ARG0]][%[[IV1]], 0]			// CHECK-DAG: %[[LHS_TILE:.+]] = tensor.extract_slice %[[ARG0]][%[[IV1]], 0]
	// CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV0]]]			// CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV0]]]
	// CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV1]], %[[IV0]]]			// CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[INIT]][%[[IV1]], %[[IV0]]]
	// CHECK: %[[FILL_TILE:.+]] = linalg.fill			// CHECK: %[[FILL_TILE:.+]] = linalg.fill
	// CHECK-SAME: outs(%[[INIT_TILE]] :			// CHECK-SAME: outs(%[[INIT_TILE]] :
	// CHECK: %[[GEMM_TILE:.+]] = linalg.matmul			// CHECK: %[[GEMM_TILE:.+]] = linalg.matmul
	// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] :			// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] :
	// CHECK-SAME: outs(%[[FILL_TILE]] :			// CHECK-SAME: outs(%[[FILL_TILE]] :
	// CHECK: %[[INIT_TILE_2:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV1]], %[[IV0]]]			// CHECK: %[[INIT_TILE_2:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV1]], %[[IV0]]]
	// CHECK: %[[GENERIC_TILE:.+]] = linalg.generic			// CHECK: %[[GENERIC_TILE:.+]] = linalg.generic
	// CHECK-SAME: ins(%[[GEMM_TILE]] :			// CHECK-SAME: ins(%[[GEMM_TILE]] :
	// CHECK-SAME: outs(%[[INIT_TILE_2]] :			// CHECK-SAME: outs(%[[INIT_TILE_2]] :
	// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV1]], %[[IV0]]]			// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV1]], %[[IV0]]]
	// CHECK scf.yield %[[INSERT]]			// CHECK: scf.yield %[[INSERT]]

	// -----			// -----

	func.func @matmul_plus_matmul(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>,			func.func @matmul_plus_matmul(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>,
	%arg2: tensor<?x?xf32>) -> tensor<?x?xf32>{			%arg2: tensor<?x?xf32>) -> tensor<?x?xf32>{
	%c0 = arith.constant 0 : index			%c0 = arith.constant 0 : index
	%c1 = arith.constant 1 : index			%c1 = arith.constant 1 : index
	%0 = tensor.dim %arg2, %c0 : tensor<?x?xf32>			%0 = tensor.dim %arg2, %c0 : tensor<?x?xf32>
	▲ Show 20 Lines • Show All 163 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][TilingInterface] Fix `iter_args` handling in tile (and fuse).
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 462989

mlir/include/mlir/Dialect/SCF/IR/SCFOps.td

mlir/include/mlir/Dialect/SCF/Utils/Utils.h

mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp

mlir/lib/Dialect/SCF/Utils/Utils.cpp

mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][TilingInterface] Fix `iter_args` handling in tile (and fuse).ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 462989

mlir/include/mlir/Dialect/SCF/IR/SCFOps.td

mlir/include/mlir/Dialect/SCF/Utils/Utils.h

mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp

mlir/lib/Dialect/SCF/Utils/Utils.cpp

mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir

[mlir][TilingInterface] Fix `iter_args` handling in tile (and fuse).
ClosedPublic