Diff 447950

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

	Show All 14 Lines
	#include "mlir/Dialect/Linalg/Utils/Utils.h"			#include "mlir/Dialect/Linalg/Utils/Utils.h"
	#include "mlir/Dialect/MemRef/IR/MemRef.h"			#include "mlir/Dialect/MemRef/IR/MemRef.h"
	#include "mlir/Dialect/SCF/Utils/Utils.h"			#include "mlir/Dialect/SCF/Utils/Utils.h"
	#include "mlir/Dialect/Tensor/IR/Tensor.h"			#include "mlir/Dialect/Tensor/IR/Tensor.h"
	#include "mlir/Dialect/Utils/StaticValueUtils.h"			#include "mlir/Dialect/Utils/StaticValueUtils.h"
	#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"			#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"
	#include "mlir/Dialect/X86Vector/Transforms.h"			#include "mlir/Dialect/X86Vector/Transforms.h"
	#include "mlir/IR/PatternMatch.h"			#include "mlir/IR/PatternMatch.h"
				#include "mlir/Interfaces/TilingInterface.h"
	#include "mlir/Transforms/DialectConversion.h"			#include "mlir/Transforms/DialectConversion.h"
	#include "llvm/ADT/SmallBitVector.h"			#include "llvm/ADT/SmallBitVector.h"
	#include "llvm/ADT/SmallSet.h"			#include "llvm/ADT/SmallSet.h"

	namespace mlir {			namespace mlir {
	namespace bufferization {			namespace bufferization {
	class BufferizeTypeConverter;			class BufferizeTypeConverter;
	} // namespace bufferization			} // namespace bufferization
	▲ Show 20 Lines • Show All 98 Lines • ▼ Show 20 Lines
	/// %7 = tensor.extract_slice %0[42, 0][86, 32][1, 1]			/// %7 = tensor.extract_slice %0[42, 0][86, 32][1, 1]
	/// %8 = tensor.extract_slice %6[42, 0][86, 64][1, 1]			/// %8 = tensor.extract_slice %6[42, 0][86, 64][1, 1]
	/// %9 = linalg.matmul ins(%7, %1 : tensor<86x32xf32>, tensor<32x64xf32>)			/// %9 = linalg.matmul ins(%7, %1 : tensor<86x32xf32>, tensor<32x64xf32>)
	/// outs(%8 : tensor<86x64xf32>)			/// outs(%8 : tensor<86x64xf32>)
	/// tensor.insert_slice %5 into %6[42, 0][86, 64][1, 1]			/// tensor.insert_slice %5 into %6[42, 0][86, 64][1, 1]
	///			///
	/// Note that there is no simplification other than constant propagation applied			/// Note that there is no simplification other than constant propagation applied
	/// to slice extraction and insertion.			/// to slice extraction and insertion.
	std::pair<LinalgOp, LinalgOp> splitOp(RewriterBase &rewriter, LinalgOp op,			std::pair<TilingInterface, TilingInterface> splitOp(RewriterBase &rewriter,
				TilingInterface op,
	unsigned dimension,			unsigned dimension,
	OpFoldResult splitPoint);			OpFoldResult splitPoint);

	/// Perform standalone tiling of a single LinalgOp by `tileSizes`.			/// Perform standalone tiling of a single LinalgOp by `tileSizes`.
	/// and permute the loop nest according to `interchangeVector`			/// and permute the loop nest according to `interchangeVector`
	/// The permutation is expressed as a list of integers that specify			/// The permutation is expressed as a list of integers that specify
	/// the new ordering of the loop nest. The length of `interchangeVector`			/// the new ordering of the loop nest. The length of `interchangeVector`
	/// must be equal to the length of `tileSizes`.			/// must be equal to the length of `tileSizes`.
	/// An empty vector is interpreted as the identity permutation and the			/// An empty vector is interpreted as the identity permutation and the
	/// transformation returns early.			/// transformation returns early.
	▲ Show 20 Lines • Show All 1,369 Lines • Show Last 20 Lines

mlir/include/mlir/Dialect/Linalg/Utils/Utils.h

Show First 20 Lines • Show All 171 Lines • ▼ Show 20 Lines	bool isProducerLastWriteOfView(const LinalgDependenceGraph &graph,
LinalgOp producer);		LinalgOp producer);

/// Checks whether fusing the specific `producer` of the `consumedView` is		/// Checks whether fusing the specific `producer` of the `consumedView` is
/// feasible. This checks `producer` is the last write of `consumedView` and		/// feasible. This checks `producer` is the last write of `consumedView` and
/// that no interleaved dependence would be violated (RAW, WAR or WAW).		/// that no interleaved dependence would be violated (RAW, WAR or WAW).
bool isFusableInto(const LinalgDependenceGraph &graph, LinalgOp consumer,		bool isFusableInto(const LinalgDependenceGraph &graph, LinalgOp consumer,
Value consumedView, LinalgOp producer);		Value consumedView, LinalgOp producer);

/// Creates either a memref.subview or a tensor.extract_slice with the given
/// offsets/sizes/strides based on the type of `value`.
Value createSlice(OpBuilder &builder, Location loc, Value value,
ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes,
ArrayRef<OpFoldResult> strides);

/// Computes tile offsets, given a list of loop `ivs` and `tileSizes`. In case a		/// Computes tile offsets, given a list of loop `ivs` and `tileSizes`. In case a
/// tile size is zero (i.e., no tiling), the corresponding offset is also zero.		/// tile size is zero (i.e., no tiling), the corresponding offset is also zero.
SmallVector<Value> computeTileOffsets(OpBuilder &b, Location loc,		SmallVector<Value> computeTileOffsets(OpBuilder &b, Location loc,
ValueRange ivs, ValueRange tileSizes);		ValueRange ivs, ValueRange tileSizes);

/// Computes tile sizes, given a list of `tileSizes` and dimension		/// Computes tile sizes, given a list of `tileSizes` and dimension
/// sizes (`sizeBounds`). In case a tile size is zero (i.e., no tiling), the		/// sizes (`sizeBounds`). In case a tile size is zero (i.e., no tiling), the
/// corresponding result size is the corresponding value from `sizeBounds`.		/// corresponding result size is the corresponding value from `sizeBounds`.
▲ Show 20 Lines • Show All 300 Lines • Show Last 20 Lines

mlir/include/mlir/Interfaces/TilingInterface.td

Show First 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	let methods = [
The iteration space of the operation is returned by		The iteration space of the operation is returned by
`getIterationDomain`. The caller provides the information of the		`getIterationDomain`. The caller provides the information of the
tile within this iteration space whose implementation the		tile within this iteration space whose implementation the
caller needs.		caller needs.
- `dest` are the Value into which the result of the tiled		- `dest` are the Value into which the result of the tiled
operation is to be inserted into. The type of the `dest`		operation is to be inserted into. The type of the `dest`
Values is same as the types returned by		Values is same as the types returned by
`getDestinationOperands` method.		`getDestinationOperands` method.
- `offsets` provides the offset of the tile within the		- `offsets` provides the offset of the tile in the coordinate system
iteration space		of the original iteration space, i.e., if an iteration space
		dimension had non-zero offset, it must be included in the offset
		provided here (as opposed to zero-based offset "relative" to the
		iteration space).
- `sizes` provides the size of the tile.		- `sizes` provides the size of the tile.
- `tileDestOperands` specifies whether to also tile `dest` operands		- `tileDestOperands` specifies whether to also tile `dest` operands
or not. Avoiding tiling `dest` operands can be useful for		or not. Avoiding tiling `dest` operands can be useful for
composition with various looping container ops.		composition with various looping container ops.

The method returns the operation that is the tiled		The method returns the operation that is the tiled
implementation.		implementation.
}],		}],
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines	let methods = [
and fuse. The method returns failure if the operation can't be		and fuse. The method returns failure if the operation can't be
tiled to generate the result tile. In practical terms this		tiled to generate the result tile. In practical terms this
implies it cannot be tiled and fused with its consumers.		implies it cannot be tiled and fused with its consumers.

- `dest` are the Value into which the result of the tiled		- `dest` are the Value into which the result of the tiled
operation is to be inserted into. The type of the `dest`		operation is to be inserted into. The type of the `dest`
Values is same as the types returned by		Values is same as the types returned by
`getDestinationOperands` method.		`getDestinationOperands` method.
- `offsets` provides the offset of the tile within the		- `offsets` provides the offset of the tile in the coordinate system
iteration space		of the original iteration space, i.e., if an iteration space
		dimension had non-zero offset, it must be included in the offset
		provided here (as opposed to zero-based offset "relative" to the
		iteration space).
- `sizes` provides the size of the tile.		- `sizes` provides the size of the tile.
- `tileDestOperands` specifies whether to also tile `dest` operands		- `tileDestOperands` specifies whether to also tile `dest` operands
or not. Avoiding tiling `dest` operands can be useful for		or not. Avoiding tiling `dest` operands can be useful for
composition with various looping container ops.		composition with various looping container ops.
}],		}],
/retType=/"FailureOr<Value>",		/retType=/"FailureOr<Value>",
/methodName=/"generateResultTileValue",		/methodName=/"generateResultTileValue",
/args=/(ins		/args=/(ins
Show All 14 Lines

mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp

Show First 20 Lines • Show All 734 Lines • ▼ Show 20 Lines	for (const auto &pair : llvm::zip(payload, splitPoints)) {
if (getDimension() >= linalgOp.getNumLoops()) {		if (getDimension() >= linalgOp.getNumLoops()) {
auto diag = emitSilenceableError() << "dimension " << getDimension()		auto diag = emitSilenceableError() << "dimension " << getDimension()
<< " does not exist in target op";		<< " does not exist in target op";
diag.attachNote(target->getLoc()) << "target op";		diag.attachNote(target->getLoc()) << "target op";
return diag;		return diag;
}		}

rewriter.setInsertionPoint(linalgOp);		rewriter.setInsertionPoint(linalgOp);
std::tie(first.emplace_back(), second.emplace_back()) =		std::tie(first.emplace_back(), second.emplace_back()) = linalg::splitOp(
linalg::splitOp(rewriter, linalgOp, getDimension(), std::get<1>(pair));		rewriter, cast<TilingInterface>(linalgOp.getOperation()),
		getDimension(), std::get<1>(pair));
}		}

results.set(getFirst().cast<OpResult>(), first);		results.set(getFirst().cast<OpResult>(), first);
results.set(getSecond().cast<OpResult>(), second);		results.set(getSecond().cast<OpResult>(), second);
return DiagnosedSilenceableFailure::success();		return DiagnosedSilenceableFailure::success();
}		}

void SplitOp::getEffects(		void SplitOp::getEffects(
▲ Show 20 Lines • Show All 347 Lines • Show Last 20 Lines

mlir/lib/Dialect/Linalg/Transforms/Split.cpp

	//===- Split.cpp - Structured op splitting --------------------------------===//			//===- Split.cpp - Structured op splitting --------------------------------===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "mlir/Dialect/Affine/IR/AffineOps.h"			#include "mlir/Dialect/Affine/IR/AffineOps.h"
	#include "mlir/Dialect/Linalg/Transforms/Transforms.h"			#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
	#include "mlir/Dialect/Linalg/Utils/Utils.h"
	#include "mlir/Dialect/Utils/StaticValueUtils.h"			#include "mlir/Dialect/Utils/StaticValueUtils.h"
				#include "mlir/IR/AffineExpr.h"
				#include "mlir/IR/Attributes.h"
				#include "mlir/IR/BuiltinAttributes.h"
				#include "mlir/IR/OpDefinition.h"
				#include "mlir/Interfaces/TilingInterface.h"

	#include "llvm/ADT/STLExtras.h"			#include "llvm/ADT/STLExtras.h"
				#include "llvm/ADT/SmallVector.h"

	using namespace mlir;			using namespace mlir;
	using namespace mlir::linalg;			using namespace mlir::linalg;

	/// Extract the slices of `operands` supplied to the given operation `op` such
	/// that they are sufficient to execute the op for the subset of its iteration
	/// space defined by `splitIterationSpace`. The subset is a part of the original
	/// iteration space split at the given `dimension`. If `offset` is provided, it
	/// indicates the iterator value at which the dimension has been split and
	/// requires the "high" part starting at the given offset of the operands to be
	/// generated; otherwise, the "low" part with no offset is generated. Note that
	/// `operands` are not necessarily the actual operands of `op`.
	static SmallVector<Value>
	getOperandSlices(RewriterBase &b, Location loc, LinalgOp op,
	ValueRange splitIterationSpace, ValueRange operands,
	unsigned dimension, Value offset = nullptr) {
	SmallVector<Value> slices;
	slices.reserve(op.getNumInputsAndOutputs());
	for (OpOperand *opOperand : op.getInputAndOutputOperands()) {
	auto type = opOperand->get().getType().dyn_cast<ShapedType>();
	AffineMap indexing = op.getTiedIndexingMap(opOperand);

	// If the type is not sliceable, or the slice is requested along the
	// dimension that is not used in indexing this type, just use the entire
	// operand.
	if (!type \|\| dimension >= indexing.getNumDims() \|\|
	!indexing.isFunctionOfDim(dimension)) {
	slices.push_back(opOperand->get());
	continue;
	}

	SmallVector<OpFoldResult> sizes;
	sizes.reserve(indexing.getNumResults());
	for (AffineExpr dimIndexing : indexing.getResults()) {
	sizes.push_back(makeComposedFoldedAffineApply(
	b, loc, dimIndexing,
	getAsOpFoldResult(llvm::to_vector(splitIterationSpace))));
	}
	SmallVector<OpFoldResult> offsets(type.getRank(), b.getIndexAttr(0));
	SmallVector<OpFoldResult> strides(type.getRank(), b.getIndexAttr(1));

	if (offset) {
	offsets[dimension] = offset;
	offsets = applyMapToValues(b, loc, indexing, offsets);
	}

	slices.push_back(createSlice(b, loc,
	operands[opOperand->getOperandNumber()],
	offsets, sizes, strides));
	}

	return slices;
	}

	/// Creates a part of the given `op` split along the iteration space `dimension`			/// Creates a part of the given `op` split along the iteration space `dimension`
	/// with the given `size` and an optional `offset` (default 0). Makes slices			/// with the given `size` and an optional `offset` (default 0). Makes slices
	/// of operands, using the input operands of the original op and the output			/// of operands, using the input operands of the original op and the output
	/// operands provided as `resultOperands`. Expects `splitIterationSpace` to be			/// operands provided as `resultOperands`. Expects `offsets` and `sizes` to
	/// a list of values representing the shape of the iteration space of the			/// define the shape of the iteration space of the original op. Returns the
	/// original op and updates it to be the iteration space of the curent part.			/// split-out op as well as the output operand values updated with the partial
	/// Returns the split-out op as well as the output operand values updated with			/// results produced by this op through `results`.
	/// the partial results produced by this op through `results`.			static TilingInterface
	static LinalgOp			createSplitPart(RewriterBase &b, Location loc, TilingInterface op,
	createSplitPart(RewriterBase &b, Location loc, LinalgOp op,			ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes,
	ValueRange resultOperands,			ValueRange resultOperands, unsigned dimension,
	llvm::MutableArrayRef<Value> splitIterationSpace,			OpFoldResult size, OpFoldResult offset,
	unsigned dimension, OpFoldResult size,			SmallVectorImpl<Value> &results) {
	SmallVectorImpl<Value> &results, Value offset = nullptr) {			// Iteration space of the current part.
	ImplicitLocOpBuilder implicit(op.getLoc(), b);			SmallVector<OpFoldResult> sizesCopy = llvm::to_vector(sizes);
	splitIterationSpace[dimension] = materializeOpFoldResult(implicit, size);			SmallVector<OpFoldResult> offsetsCopy = llvm::to_vector(offsets);
	SmallVector<Value> operands = llvm::to_vector(			sizesCopy[dimension] = size;
	llvm::map_range(op.getInputOperands(),			offsetsCopy[dimension] = offset;
	[](OpOperand *opOperand) { return opOperand->get(); }));
	llvm::append_range(operands, resultOperands);			// Create the part as it it were a single tile.
	operands = getOperandSlices(b, loc, op, splitIterationSpace, operands,			SmallVector<Operation *> tiled =
	dimension, offset);			op.getTiledImplementation(b, resultOperands, offsetsCopy, sizesCopy,
	Operation *part =			/tileDestOperands=/true);
	op.clone(b, loc, getTensorOutputTypes(op, operands), operands);			assert(tiled.size() == 1 && "expected a single result from tiling");
	results = insertSlicesBack(b, loc, op, operands, part->getResults());			auto part = cast<TilingInterface>(tiled.front());
	return cast<LinalgOp>(part);
				// Insert the results back and populate the `results` list.
				for (auto i : llvm::seq<unsigned>(0, part->getNumResults())) {
				SmallVector<OpFoldResult> resultOffsets, resultSizes;
				if (failed(op.getResultTilePosition(b, i, offsetsCopy, sizesCopy,
				resultOffsets, resultSizes)))
				return nullptr;
				SmallVector<OpFoldResult> resultStrides(resultOffsets.size(),
				b.getIndexAttr(1));
				Value inserted = b.create<tensor::InsertSliceOp>(
				loc, part->getResult(i), resultOperands[i], resultOffsets, resultSizes,
				resultStrides);
				results.push_back(inserted);
				}

				return part;
	}			}

	std::pair<LinalgOp, LinalgOp> linalg::splitOp(RewriterBase &rewriter,			std::pair<TilingInterface, TilingInterface>
	LinalgOp op, unsigned dimension,			linalg::splitOp(RewriterBase &rewriter, TilingInterface op, unsigned dimension,
	OpFoldResult splitPoint) {			OpFoldResult splitPoint) {
				// Compute the iteration space.
				SmallVector<Range> iterationSpace = op.getIterationDomain(rewriter);

	// Bail out on dimension overflow.			// Bail out on dimension overflow.
	if (dimension >= op.getNumLoops())			if (dimension >= iterationSpace.size())
	return std::make_pair(op, LinalgOp());			return std::make_pair(op, TilingInterface());

	// Compute the iteration space size as values.			SmallVector<OpFoldResult> offsets =
	SmallVector<Value, 4> allShapes =			getAsOpFoldResult(llvm::to_vector(llvm::map_range(
	op.createFlatListOfOperandDims(rewriter, op.getLoc());			iterationSpace, [](const Range &range) { return range.offset; })));
	AffineMap shapesToLoops = op.getShapesToLoopsMap();			SmallVector<OpFoldResult> sizes =
	SmallVector<Value, 4> iterationSpaceShapes =			getAsOpFoldResult(llvm::to_vector(llvm::map_range(
	applyMapToValues(rewriter, op.getLoc(), shapesToLoops, allShapes);			iterationSpace, [](const Range &range) { return range.size; })));

	// Update the iteration space to have `splitPoint` as the size of `dimension`			// Adjust the split point so that it doesn't overflow the size.
	// and use it to slice operands and results for a new, smaller instance of the			AffineExpr d0, d1, d2;
	// `op`. Adjust the size if necessary to prevent overflows. Insert the partial			bindDims(rewriter.getContext(), d0, d1, d2);
	// results back.
	OpFoldResult dimSize = getAsOpFoldResult(iterationSpaceShapes[dimension]);
	OpFoldResult minSplitPoint = makeComposedFoldedAffineMin(			OpFoldResult minSplitPoint = makeComposedFoldedAffineMin(
	rewriter, op->getLoc(),			rewriter, op.getLoc(),
	AffineMap::getMultiDimIdentityMap(/numDims=/2, rewriter.getContext()),			AffineMap::inferFromExprList(ArrayRef<AffineExpr>{d0, d1 + d2}).front(),
	{splitPoint, dimSize});			{splitPoint, offsets[dimension], sizes[dimension]});
	SmallVector<Value> splitIterationSpace =
	llvm::to_vector(iterationSpaceShapes);			// Compute the size of the second part. Return early if the second part would
	SmallVector<Value> originalResults = llvm::to_vector(			// have an empty iteration space.
	llvm::map_range(op.getOutputOperands(),
	[](OpOperand *opOperand) { return opOperand->get(); }));
	SmallVector<Value> firstResults;
	LinalgOp first = createSplitPart(rewriter, op.getLoc(), op, originalResults,
	splitIterationSpace, dimension,
	minSplitPoint, firstResults);

	// Update the iteration space to cover the remaining part of the original
	// space, then create another instance of the `op` in that space. The size of
	// the remaining part may become zero, but is never negative because of the
	// adjustment above.
	AffineExpr d0 = rewriter.getAffineDimExpr(0);
	AffineExpr d1 = rewriter.getAffineDimExpr(1);
	OpFoldResult remainingSize = makeComposedFoldedAffineApply(			OpFoldResult remainingSize = makeComposedFoldedAffineApply(
	rewriter, op.getLoc(), d0 - d1, {dimSize, minSplitPoint});			rewriter, op.getLoc(), d0 + d1 - d2,
				{iterationSpace[dimension].offset, iterationSpace[dimension].size,
				minSplitPoint});
				if (auto attr = remainingSize.dyn_cast<Attribute>()) {
				if (attr.cast<IntegerAttr>().getValue().isZero())
				return {op, TilingInterface()};
				}

				// Create the first part.
				SmallVector<Value> firstResults;
				TilingInterface firstPart = createSplitPart(
				rewriter, op.getLoc(), op, offsets, sizes,
				op.getDestinationOperands(rewriter), dimension, minSplitPoint,
				getAsOpFoldResult(iterationSpace[dimension].offset), firstResults);

				ftynseAuthorUnsubmitted Done Reply Inline Actions It wasn't clear to me from the documentation if the offset supplied to `getTiledImplementation` is relative to the iteration space as returned by the interface or should also include the offset along the dimension in the space (which may be non-zero in the interface). Please advise. ftynse: It wasn't clear to me from the documentation if the offset supplied to `getTiledImplementation`…
				mravishankarUnsubmitted Done Reply Inline Actions Good point! let me throw the question right back at you. What would make more sense? The current implementation of how the `TilingInterface` is used is the if the iterations space is `(lb0, ub0, step)` the offset that is passed to `getTiledImplementation` is `lb0 + tileSize * tileID` . So it is not "relative to iteration space". But that can be changed, and indeed now is the time to change it. If relative makes more sense then we can change it, but if it is relative you also need the iteration space to get the absolute position (the same is true in reverse though. If you want relative, you need the iteration space). mravishankar: Good point! let me throw the question right back at you. What would make more sense? The…
				ftynseAuthorUnsubmitted Done Reply Inline Actions I have a very slight preference for it to be relative. Two reasons: (1) this matches the conceptual model of the subsetting (insert/extractslice, subview) operations in which the offset/stride is relative to the operand that may be a subset itself; (2) it somehow feels that the client of the TilingInterface would rarely care about absolute positions. But, in the end, I am fine with both ways as long as there is a note in the documentation that says which one it is :) ftynse: I have a very slight preference for it to be relative. Two reasons: (1) this matches the…
				mravishankarUnsubmitted Done Reply Inline Actions Had an offline discussion on this. Staying with absolute documents the current state of the implementation. There is argument to be made for both, so leaving it as absolute for now until we have more data on whether relative or absolute is better. mravishankar: Had an offline discussion on this. Staying with absolute documents the current state of the…
				// Need to pretend that the original op now takes as operands firstResults,
				// otherwise tiling interface implementation will take the wrong value to
				// produce data tiles.
				rewriter.updateRootInPlace(op, [&]() {
				unsigned numTotalOperands = op->getNumOperands();
				unsigned numOutputOperands = firstResults.size();
				op->setOperands(numTotalOperands - numOutputOperands, numOutputOperands,
				firstResults);
				});

				// Create the second part.
				OpFoldResult totalOffset = makeComposedFoldedAffineApply(
				rewriter, op.getLoc(), d0 + d1, {offsets[dimension], minSplitPoint});
	SmallVector<Value> secondResults;			SmallVector<Value> secondResults;
	ImplicitLocOpBuilder implicit(op.getLoc(), rewriter);			TilingInterface secondPart =
	Value splitPointValue = materializeOpFoldResult(implicit, minSplitPoint);			createSplitPart(rewriter, op.getLoc(), op, offsets, sizes, firstResults,
	LinalgOp second = createSplitPart(			dimension, remainingSize, totalOffset, secondResults);
	rewriter, op.getLoc(), op, firstResults, splitIterationSpace, dimension,
	remainingSize, secondResults, splitPointValue);

	// Fixup the linalg.index results in the second part.
	SmallVector<Value> ivAdditions;
	ivAdditions.resize(splitIterationSpace.size());
	ivAdditions[dimension] = splitPointValue;
	linalg::offsetIndices(rewriter, cast<LinalgOp>(second), ivAdditions);

	// Replace the original op with the results of the two newly created ops.			// Replace the original op with the results of the two newly created ops.
	rewriter.replaceOp(op, secondResults);			rewriter.replaceOp(op, secondResults);
	return std::make_pair(first, second);			return {firstPart, secondPart};
	}			}

mlir/lib/Dialect/Linalg/Utils/Utils.cpp

Show First 20 Lines • Show All 907 Lines • ▼ Show 20 Lines	auto sliceOp = TypeSwitch<ShapedType, Operation >(shapedType)
builder, loc, valueToTile, offsets, sizes, strides);		builder, loc, valueToTile, offsets, sizes, strides);
})		})
.Default([](ShapedType) -> Operation * {		.Default([](ShapedType) -> Operation * {
llvm_unreachable("Unexpected shaped type");		llvm_unreachable("Unexpected shaped type");
});		});
return sliceOp->getResult(0);		return sliceOp->getResult(0);
}		}

Value createSlice(OpBuilder &builder, Location loc, Value value,
ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes,
ArrayRef<OpFoldResult> strides) {
if (value.getType().isa<MemRefType>()) {
return builder.create<memref::SubViewOp>(loc, value, offsets, sizes,
strides);
}

// This intentionally does not attempt to compose the extractslice operations.
assert(value.getType().isa<RankedTensorType>() &&
"expected a ranked tensor type");
return builder.create<tensor::ExtractSliceOp>(loc, value, offsets, sizes,
strides);
}

SmallVector<Value> computeTileOffsets(OpBuilder &b, Location loc,		SmallVector<Value> computeTileOffsets(OpBuilder &b, Location loc,
ValueRange ivs, ValueRange tileSizes) {		ValueRange ivs, ValueRange tileSizes) {
SmallVector<Value> offsets;		SmallVector<Value> offsets;
for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {		for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");		LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");
bool isTiled = !isZero(tileSizes[idx]);		bool isTiled = !isZero(tileSizes[idx]);
offsets.push_back(		offsets.push_back(
isTiled ? ivs[idxIvs++]		isTiled ? ivs[idxIvs++]
▲ Show 20 Lines • Show All 168 Lines • Show Last 20 Lines

mlir/test/Dialect/Linalg/multisize-tiling-full.mlir

Show First 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	func.func @two_d(%arg0: tensor<10x34xf32>,
// respectively, and in this order.		// respectively, and in this order.
// Check the full code for the first quadrant, the data flow for the second		// Check the full code for the first quadrant, the data flow for the second
// quadrant and only the overall code structure for the remaining quadrants.		// quadrant and only the overall code structure for the remaining quadrants.
// The canonicalizer is able to recover static shapes of for linalg.generic		// The canonicalizer is able to recover static shapes of for linalg.generic
// instances, use those to differentiate the quadrants.		// instances, use those to differentiate the quadrants.

// CHECK: %[[SLICE_1:.+]] = tensor.extract_slice %[[OUT]][0, 0] [4, 34] [1, 1]		// CHECK: %[[SLICE_1:.+]] = tensor.extract_slice %[[OUT]][0, 0] [4, 34] [1, 1]
// CHECK: scf.for %[[I1:.+]] = %{{.}} to %{{.}} step %{{.*}} iter_args(%[[ITERARG_1:.+]] = %[[SLICE_1]])		// CHECK: scf.for %[[I1:.+]] = %{{.}} to %{{.}} step %{{.*}} iter_args(%[[ITERARG_1:.+]] = %[[SLICE_1]])
// CHECK: %[[INSLICE_1:.+]] = tensor.extract_slice %[[IN]][%[[I1]], 0] [2, 34] [1, 1]
// CHECK: %[[OUTSLICE_1:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [2, 34] [1, 1]		// CHECK: %[[OUTSLICE_1:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [2, 34] [1, 1]

// CHECK: %[[SLICE_2:.+]] = tensor.extract_slice %[[OUTSLICE_1]][0, 0] [2, 16] [1, 1]		// CHECK: %[[SLICE_2:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [2, 16] [1, 1]
// CHECK: %[[LOOPRES:.+]] = scf.for %[[I2:.+]] = %{{.}} to %{{.}} step %{{.*}} iter_args(%[[ITERARG_2:.+]] = %[[SLICE_2]])		// CHECK: %[[LOOPRES:.+]] = scf.for %[[I2:.+]] = %{{.}} to %{{.}} step %{{.*}} iter_args(%[[ITERARG_2:.+]] = %[[SLICE_2]])
// CHECK: %[[INSLICE_2:.+]] = tensor.extract_slice %[[INSLICE_1]][0, %[[I2]]] [2, 8] [1, 1]		// CHECK: %[[INSLICE_2:.+]] = tensor.extract_slice %[[IN]][%[[I1]], %[[I2]]] [2, 8] [1, 1]
// CHECK: %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [2, 8] [1, 1]		// CHECK: %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [2, 8] [1, 1]
// CHECK: %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) outs(%[[OUTSLICE_2]] : tensor<2x8xf32>)		// CHECK: %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) outs(%[[OUTSLICE_2]] : tensor<2x8xf32>)
// CHECK: %[[RESPARTIAL:.+]] = tensor.insert_slice %[[RESSLICE_1]] into %[[ITERARG_2]]		// CHECK: %[[RESPARTIAL:.+]] = tensor.insert_slice %[[RESSLICE_1]] into %[[ITERARG_2]]
// CHECK: scf.yield %[[RESPARTIAL]]		// CHECK: scf.yield %[[RESPARTIAL]]

// CHECK: %[[INSERTED:.+]] = tensor.insert_slice %[[LOOPRES]] into %[[OUTSLICE_1]][0, 0] [2, 16] [1, 1]		// CHECK: %[[INSERTED:.+]] = tensor.insert_slice %[[LOOPRES]] into %[[OUTSLICE_1]][%[[I1]], 0] [2, 16] [1, 1]
// CHECK: %[[OUTSLICE_3:.+]] = tensor.extract_slice %[[INSERTED]][0, 16] [2, 18] [1, 1]		// CHECK: %[[OUTSLICE_3:.+]] = tensor.extract_slice %[[INSERTED]][0, 16] [2, 18] [1, 1]
// CHECK: scf.for %{{.}} iter_args(%{{.}} = %[[OUTSLICE_3]])		// CHECK: scf.for %{{.}} iter_args(%{{.}} = %[[OUTSLICE_3]])
// CHECK-COUNT-2: tensor.extract_slice		// CHECK-COUNT-2: tensor.extract_slice
// CHECK: linalg.generic {{.}} ins(%{{.}} : tensor<2x9xf32>)		// CHECK: linalg.generic {{.}} ins(%{{.}} : tensor<2x9xf32>)
// CHECK: tensor.insert_slice		// CHECK: tensor.insert_slice
// CHECK: scf.yield		// CHECK: scf.yield
// CHECK: %[[INSERTED_2:.+]] = tensor.insert_slice %{{.*}} into %[[INSERTED]]		// CHECK: %[[INSERTED_2:.+]] = tensor.insert_slice %{{.*}} into %[[INSERTED]]
// CHECK: %[[INSERTED_3:.+]] = tensor.insert_slice %[[INSERTED_2]] into %[[ITERARG_1]]		// CHECK: %[[INSERTED_3:.+]] = tensor.insert_slice %[[INSERTED_2]] into %[[ITERARG_1]]
// CHECK: scf.yield %[[INSERTED_3]]		// CHECK: scf.yield %[[INSERTED_3]]

// CHECK: tensor.insert_slice		// CHECK: tensor.insert_slice
// CHECK: tensor.extract_slice		// CHECK: tensor.extract_slice
// CHECK: scf.for		// CHECK: scf.for
// CHECK-COUNT-3: tensor.extract_slice		// CHECK-COUNT-2: tensor.extract_slice
// CHECK: scf.for		// CHECK: scf.for
// CHECK-COUNT-2: tensor.extract_slice		// CHECK-COUNT-2: tensor.extract_slice
// CHECK: linalg.generic {{.}} ins(%{{.}} : tensor<3x8xf32>)		// CHECK: linalg.generic {{.}} ins(%{{.}} : tensor<3x8xf32>)
// CHECK: tensor.insert_slice		// CHECK: tensor.insert_slice
// CHECK: scf.yield		// CHECK: scf.yield
// CHECK: tensor.insert_slice		// CHECK: tensor.insert_slice
// CHECK: tensor.extract_slice		// CHECK: tensor.extract_slice
// CHECK: scf.for		// CHECK: scf.for
Show All 11 Lines

mlir/test/Dialect/Linalg/transform-op-split.mlir

// RUN: mlir-opt %s --test-transform-dialect-interpreter --split-input-file -verify-diagnostics \| FileCheck %s		// RUN: mlir-opt %s --test-transform-dialect-interpreter --split-input-file -verify-diagnostics \| FileCheck %s
// RUN: mlir-opt %s --test-transform-dialect-interpreter --canonicalize --split-input-file -verify-diagnostics \| FileCheck %s --check-prefix=CANON

transform.with_pdl_patterns {		transform.with_pdl_patterns {
^bb0(%arg0: !pdl.operation):		^bb0(%arg0: !pdl.operation):
transform.sequence %arg0 {		transform.sequence %arg0 {
^bb1(%arg1: !pdl.operation):		^bb1(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1		%0 = transform.structured.match ops{["linalg.generic"]} in %arg1
%1:2 = transform.structured.split %0 after 42 { dimension = 0 }		%1:2 = transform.structured.split %0 after 42 { dimension = 0 }
}		}
}		}

func.func private @elem(%arg0: f32, %arg1: index, %arg2: index) -> f32		func.func private @elem(%arg0: f32, %arg1: index, %arg2: index) -> f32

// CHECK: #[[$ADD_42_MAP:.+]] = affine_map<(d0) -> (d0 + 42)>		// CHECK: #[[$ADD_42_MAP:.+]] = affine_map<(d0) -> (d0 + 42)>
// CHECK: #[[$ADD_10_MAP:.+]] = affine_map<(d0) -> (d0 + 10)>

// CHECK-LABEL: @one_d_static		// CHECK-LABEL: @one_d_static
// CHECK-SAME: %[[IN:.+]]: tensor<100xf32>, %[[OUT:.+]]: tensor<100xf32>		// CHECK-SAME: %[[IN:.+]]: tensor<100xf32>, %[[OUT:.+]]: tensor<100xf32>
func.func @one_d_static(%arg0: tensor<100xf32>, %arg1: tensor<100xf32>) -> tensor<100xf32> {		func.func @one_d_static(%arg0: tensor<100xf32>, %arg1: tensor<100xf32>) -> tensor<100xf32> {
// CHECK: %[[IN_SLICE_LOW:.+]] = tensor.extract_slice %[[IN]][0] [42] [1] : tensor<100xf32> to tensor<42xf32>		// CHECK: %[[IN_SLICE_LOW:.+]] = tensor.extract_slice %[[IN]][0] [42] [1] : tensor<100xf32> to tensor<42xf32>
// CHECK: %[[OUT_SLICE_LOW:.+]] = tensor.extract_slice %[[OUT]][0] [42] [1] : tensor<100xf32> to tensor<42xf32>		// CHECK: %[[OUT_SLICE_LOW:.+]] = tensor.extract_slice %[[OUT]][0] [42] [1] : tensor<100xf32> to tensor<42xf32>
// CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic		// CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic
// CHECK: ins(%[[IN_SLICE_LOW]]		// CHECK: ins(%[[IN_SLICE_LOW]]
Show All 23 Lines	func.func @one_d_static(%arg0: tensor<100xf32>, %arg1: tensor<100xf32>) -> tensor<100xf32> {
} -> tensor<100xf32>		} -> tensor<100xf32>

// CHECK: return %[[RES]]		// CHECK: return %[[RES]]
return %0 : tensor<100xf32>		return %0 : tensor<100xf32>
}		}

// CHECK-LABEL: @one_d_static_overflow		// CHECK-LABEL: @one_d_static_overflow
// CHECK-SAME: %[[IN:.+]]: tensor<10xf32>, %[[OUT:.+]]: tensor<10xf32>		// CHECK-SAME: %[[IN:.+]]: tensor<10xf32>, %[[OUT:.+]]: tensor<10xf32>
// CANON-LABEL: @one_d_static_overflow
// CANON-SAME: %[[IN:.+]]: tensor<10xf32>, %[[OUT:.+]]: tensor<10xf32>
func.func @one_d_static_overflow(%arg0: tensor<10xf32>, %arg1: tensor<10xf32>) -> tensor<10xf32> {		func.func @one_d_static_overflow(%arg0: tensor<10xf32>, %arg1: tensor<10xf32>) -> tensor<10xf32> {
// CHECK: %[[IN_SLICE_LOW:.+]] = tensor.extract_slice %[[IN]][0] [10] [1] : tensor<10xf32> to tensor<10xf32>		// Folding is sufficiently powerful to detect the static overflow and avoid
// CHECK: %[[OUT_SLICE_LOW:.+]] = tensor.extract_slice %[[OUT]][0] [10] [1] : tensor<10xf32> to tensor<10xf32>		// the splitting altogether.
// CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic		// CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic
// CHECK: ins(%[[IN_SLICE_LOW]]		// CHECK: ins(%[[IN]]
// CHECK: outs(%[[OUT_SLICE_LOW]]		// CHECK: outs(%[[OUT]]
// CHECK: linalg.index 0		// CHECK: linalg.index 0
// CHECK: func.call @elem		// CHECK: func.call @elem
// CHECK: %[[RES_PARTIAL:.+]] = tensor.insert_slice %[[RES_SLICE_LOW]] into %[[OUT]][0] [10] [1]
//
// Due to overflow, the first part of the split computes everything and the
// insert/extract slices are folded away by the canonicalizer.
// CANON: %[[RES_PARTIAL:.+]] = linalg.generic
// CANON: ins(%[[IN]]
// CANON: outs(%[[OUT]]
// CANON: linalg.index 0
// CANON: func.call @elem
// The second part operates on zero-sized slices that are not currently
// folded away.
//
// CHECK: %[[IN_SLICE_HIGH:.+]] = tensor.extract_slice %[[IN]][10] [0] [1] : tensor<10xf32> to tensor<0xf32>
// CHECK: %[[OUT_SLICE_HIGH:.+]] = tensor.extract_slice %[[RES_PARTIAL]][10] [0] [1] : tensor<10xf32> to tensor<0xf32>
// CHECK: %[[RES_SLICE_HIGH:.+]] = linalg.generic
// CHECK: ins(%[[IN_SLICE_HIGH]]
// CHECK: outs(%[[OUT_SLICE_HIGH]]
// CHECK: %[[IDX:.+]] = linalg.index 0
// CHECK: affine.apply #[[$ADD_10_MAP]](%[[IDX]])
// CHECK: func.call @elem
// CHECK: %[[RES:.+]] = tensor.insert_slice %[[RES_SLICE_HIGH]] into %[[RES_PARTIAL]][10] [0] [1]
%0 = linalg.generic {		%0 = linalg.generic {
indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],		indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
iterator_types = ["parallel"]		iterator_types = ["parallel"]
}		}
ins(%arg0: tensor<10xf32>) outs(%arg1: tensor<10xf32>) {		ins(%arg0: tensor<10xf32>) outs(%arg1: tensor<10xf32>) {
^bb0(%0: f32, %1: f32):		^bb0(%0: f32, %1: f32):
%i = linalg.index 0 : index		%i = linalg.index 0 : index
%call_res = func.call @elem(%0, %i, %i) : (f32, index, index) -> f32		%call_res = func.call @elem(%0, %i, %i) : (f32, index, index) -> f32
Show All 18 Lines

// CHECK: #[[$MAP_MIN_100:.+]] = affine_map<()[s0] -> (s0, 100)>		// CHECK: #[[$MAP_MIN_100:.+]] = affine_map<()[s0] -> (s0, 100)>
// CHECK: #[[$MAP_S_MINUS_100:.+]] = affine_map<()[s0] -> (-s0 + 100)>		// CHECK: #[[$MAP_S_MINUS_100:.+]] = affine_map<()[s0] -> (-s0 + 100)>

// CHECK-LABEL: @dynamic		// CHECK-LABEL: @dynamic
func.func @dynamic(%arg0: tensor<100xf32>, %arg1: tensor<100xf32>) -> tensor<100xf32> {		func.func @dynamic(%arg0: tensor<100xf32>, %arg1: tensor<100xf32>) -> tensor<100xf32> {
// CHECK: %[[SPLIT:.+]] = call @get_size		// CHECK: %[[SPLIT:.+]] = call @get_size
// CHECK: %[[SPLIT_LOW:.+]] = affine.min #[[$MAP_MIN_100]]()[%[[SPLIT]]		// CHECK: %[[SPLIT_LOW:.+]] = affine.min #[[$MAP_MIN_100]]()[%[[SPLIT]]
		// CHECK: %[[SPLIT_HIGH_1:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]]
// CHECK: %[[IN_SLICE_LOW:.+]] = tensor.extract_slice %[[IN:.+]][0] [%[[SPLIT_LOW]]] [1] : tensor<100xf32> to tensor<?xf32>		// CHECK: %[[IN_SLICE_LOW:.+]] = tensor.extract_slice %[[IN:.+]][0] [%[[SPLIT_LOW]]] [1] : tensor<100xf32> to tensor<?xf32>
// CHECK: %[[OUT_SLICE_LOW:.+]] = tensor.extract_slice %[[OUT:.+]][0] [%[[SPLIT_LOW]]] [1] : tensor<100xf32> to tensor<?xf32>		// CHECK: %[[OUT_SLICE_LOW:.+]] = tensor.extract_slice %[[OUT:.+]][0] [%[[SPLIT_LOW]]] [1] : tensor<100xf32> to tensor<?xf32>
// CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic		// CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic
// CHECK: ins(%[[IN_SLICE_LOW]]		// CHECK: ins(%[[IN_SLICE_LOW]]
// CHECK: outs(%[[OUT_SLICE_LOW]]		// CHECK: outs(%[[OUT_SLICE_LOW]]
// CHECK: %[[PARTIAL:.+]] = tensor.insert_slice %[[RES_SLICE_LOW]] into %[[OUT]][0] [%[[SPLIT_LOW]]] [1]		// CHECK: %[[PARTIAL:.+]] = tensor.insert_slice %[[RES_SLICE_LOW]] into %[[OUT]][0] [%[[SPLIT_LOW]]] [1]
//		//
// CHECK: %[[SPLIT_HIGH_1:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]]
// CHECK: %[[SPLIT_HIGH_2:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]]		// CHECK: %[[SPLIT_HIGH_2:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]]
// CHECK: %[[IN_SLICE_HIGH:.+]] = tensor.extract_slice %[[IN:.+]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_2]]] [1] : tensor<100xf32> to tensor<?xf32>		// CHECK: %[[IN_SLICE_HIGH:.+]] = tensor.extract_slice %[[IN:.+]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_2]]] [1] : tensor<100xf32> to tensor<?xf32>
// CHECK: %[[SPLIT_HIGH_3:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]]		// CHECK: %[[SPLIT_HIGH_3:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]]
// CHECK: %[[OUT_SLICE_HIGH:.+]] = tensor.extract_slice %[[PARTIAL:.+]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_3]]] [1] : tensor<100xf32> to tensor<?xf32>		// CHECK: %[[OUT_SLICE_HIGH:.+]] = tensor.extract_slice %[[PARTIAL:.+]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_3]]] [1] : tensor<100xf32> to tensor<?xf32>
// CHECK: %[[RES_SLICE_HIGH:.+]] = linalg.generic		// CHECK: %[[RES_SLICE_HIGH:.+]] = linalg.generic
// CHECK: ins(%[[IN_SLICE_HIGH]]		// CHECK: ins(%[[IN_SLICE_HIGH]]
// CHECK: outs(%[[OUT_SLICE_HIGH]]		// CHECK: outs(%[[OUT_SLICE_HIGH]]
// CHECK: tensor.insert_slice %[[RES_SLICE_HIGH]] into %[[PARTIAL]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_3]]] [1]		// CHECK: %[[SPLIT_HIGH_4:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]]
		// CHECK: tensor.insert_slice %[[RES_SLICE_HIGH]] into %[[PARTIAL]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_4]]] [1]
%0 = func.call @get_size() : () -> index		%0 = func.call @get_size() : () -> index
%1 = linalg.generic {		%1 = linalg.generic {
indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],		indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
iterator_types = ["parallel"]		iterator_types = ["parallel"]
}		}
ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) {		ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) {
^bb0(%3: f32, %4: f32):		^bb0(%3: f32, %4: f32):
%5 = arith.addf %3, %4 : f32		%5 = arith.addf %3, %4 : f32
Show All 25 Lines	func.func @two_d(%arg0: tensor<10x34xf32>,
// CHECK: %[[OUT_1:.+]] = tensor.extract_slice %[[OUT:.+]][0, 0]		// CHECK: %[[OUT_1:.+]] = tensor.extract_slice %[[OUT:.+]][0, 0]
// CHECK: %[[RES_1:.+]] = linalg.generic		// CHECK: %[[RES_1:.+]] = linalg.generic
// CHECK-SAME: ins(%[[IN_1]] : tensor<4x34xf32>)		// CHECK-SAME: ins(%[[IN_1]] : tensor<4x34xf32>)
// CHECK-SAME: outs(%[[OUT_1]] : tensor<4x34xf32>)		// CHECK-SAME: outs(%[[OUT_1]] : tensor<4x34xf32>)
// CHECK: %[[PARTIAL_1:.+]] = tensor.insert_slice %[[RES_1]] into %[[OUT]]		// CHECK: %[[PARTIAL_1:.+]] = tensor.insert_slice %[[RES_1]] into %[[OUT]]
//		//
// CHECK: %[[IN_2:.+]] = tensor.extract_slice %[[IN]]		// CHECK: %[[IN_2:.+]] = tensor.extract_slice %[[IN]]
// CHECK: %[[OUT_2:.+]] = tensor.extract_slice %[[PARTIAL_1]]		// CHECK: %[[OUT_2:.+]] = tensor.extract_slice %[[PARTIAL_1]]
// CHECK: %[[IN_21:.+]] = tensor.extract_slice %[[IN_2]]		// Note that `extract_slice` taking a slice from another `extract_slice` result
// CHECK: %[[OUT_21:.+]] = tensor.extract_slice %[[OUT_2]]		// is folded to use the operand of the first `extract_slice`.
		// CHECK: %[[IN_21:.+]] = tensor.extract_slice %[[IN]]
		// CHECK: %[[OUT_21:.+]] = tensor.extract_slice %[[PARTIAL_1]]
// CHECK: %[[RES_21:.+]] = linalg.generic		// CHECK: %[[RES_21:.+]] = linalg.generic
// CHECK-SAME: ins(%[[IN_21]] : tensor<6x16xf32>)		// CHECK-SAME: ins(%[[IN_21]] : tensor<6x16xf32>)
// CHECK-SAME: outs(%[[OUT_21]] : tensor<6x16xf32>)		// CHECK-SAME: outs(%[[OUT_21]] : tensor<6x16xf32>)
// CHECK: %[[PARTIAL_21:.+]] = tensor.insert_slice %[[RES_21]] into %[[OUT_2]]		// CHECK: %[[PARTIAL_21:.+]] = tensor.insert_slice %[[RES_21]] into %[[OUT_2]]
//		//
// CHECK: %[[IN_22:.+]] = tensor.extract_slice %[[IN_2]]		// CHECK: %[[IN_22:.+]] = tensor.extract_slice %[[IN]]
// CHECK: %[[OUT_22:.+]] = tensor.extract_slice %[[PARTIAL_21]]		// CHECK: %[[OUT_22:.+]] = tensor.extract_slice %[[PARTIAL_21]]
// CHECK: %[[RES_22:.+]] = linalg.generic		// CHECK: %[[RES_22:.+]] = linalg.generic
// CHECK-SAME: ins(%[[IN_22]] : tensor<6x18xf32>)		// CHECK-SAME: ins(%[[IN_22]] : tensor<6x18xf32>)
// CHECK-SAME: outs(%[[OUT_22]] : tensor<6x18xf32>)		// CHECK-SAME: outs(%[[OUT_22]] : tensor<6x18xf32>)
// CHECK: %[[PARTIAL_22:.+]] = tensor.insert_slice %[[RES_22]] into %[[PARTIAL_21]]		// CHECK: %[[PARTIAL_22:.+]] = tensor.insert_slice %[[RES_22]] into %[[PARTIAL_21]]
// CHECK: %[[PARTIAL_2:.+]] = tensor.insert_slice %[[PARTIAL_22]] into %[[PARTIAL_1]]		// CHECK: %[[PARTIAL_2:.+]] = tensor.insert_slice %[[PARTIAL_22]] into %[[PARTIAL_1]]
%0 = linalg.generic {		%0 = linalg.generic {
indexing_maps = [affine_map<(i, j) -> (i, j)>,		indexing_maps = [affine_map<(i, j) -> (i, j)>,
▲ Show 20 Lines • Show All 134 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir] Partially port splitting transform to TilingInterface
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 447950

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

mlir/include/mlir/Dialect/Linalg/Utils/Utils.h

mlir/include/mlir/Interfaces/TilingInterface.td

mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp

mlir/lib/Dialect/Linalg/Transforms/Split.cpp

mlir/lib/Dialect/Linalg/Utils/Utils.cpp

mlir/test/Dialect/Linalg/multisize-tiling-full.mlir

mlir/test/Dialect/Linalg/transform-op-split.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir] Partially port splitting transform to TilingInterfaceClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 447950

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

mlir/include/mlir/Dialect/Linalg/Utils/Utils.h

mlir/include/mlir/Interfaces/TilingInterface.td

mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp

mlir/lib/Dialect/Linalg/Transforms/Split.cpp

mlir/lib/Dialect/Linalg/Utils/Utils.cpp

mlir/test/Dialect/Linalg/multisize-tiling-full.mlir

mlir/test/Dialect/Linalg/transform-op-split.mlir

[mlir] Partially port splitting transform to TilingInterface
ClosedPublic