Diff 304735

mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h

	Show All 39 Lines
	};			};

	/// Data structure for holding a dependence graph that operates on LinalgOp and			/// Data structure for holding a dependence graph that operates on LinalgOp and
	/// views as SSA values.			/// views as SSA values.
	class LinalgDependenceGraph {			class LinalgDependenceGraph {
	public:			public:
	struct LinalgOpView {			struct LinalgOpView {
	Operation *op;			Operation *op;
	Value view;			unsigned operandIndex;
				antiagainstUnsubmitted Done Reply Inline Actions Nit: I'd suggest to use `operandIndex`. Number usually means total count to me. antiagainst: Nit: I'd suggest to use `operandIndex`. Number usually means total count to me.
	};			};
	struct LinalgDependenceGraphElem {			struct LinalgDependenceGraphElem {
	// dependentOpView may be either:			// dependentOpView may be either:
	// 1. src in the case of dependencesIntoGraphs.			// 1. src in the case of dependencesIntoGraphs.
	// 2. dst in the case of dependencesFromDstGraphs.			// 2. dst in the case of dependencesFromDstGraphs.
	LinalgOpView dependentOpView;			LinalgOpView dependentOpView;
	// View in the op that is used to index in the graph:			// View in the op that is used to index in the graph:
	// 1. src in the case of dependencesFromDstGraphs.			// 1. src in the case of dependencesFromDstGraphs.
	// 2. dst in the case of dependencesIntoGraphs.			// 2. dst in the case of dependencesIntoGraphs.
	Value indexingView;			LinalgOpView indexingOpView;
	};			};
	using LinalgDependences = SmallVector<LinalgDependenceGraphElem, 8>;			using LinalgDependences = SmallVector<LinalgDependenceGraphElem, 8>;
	using DependenceGraph = DenseMap<Operation *, LinalgDependences>;			using DependenceGraph = DenseMap<Operation *, LinalgDependences>;
	using dependence_iterator = LinalgDependences::const_iterator;			using dependence_iterator = LinalgDependences::const_iterator;
	using dependence_range = iterator_range<dependence_iterator>;			using dependence_range = iterator_range<dependence_iterator>;

	enum DependenceType { RAR = 0, RAW, WAR, WAW, NumTypes };			enum DependenceType { RAR = 0, RAW, WAR, WAW, NumTypes };
	static StringRef getDependenceTypeStr(DependenceType depType);			static StringRef getDependenceTypeStr(DependenceType depType);
	▲ Show 20 Lines • Show All 111 Lines • Show Last 20 Lines

mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td

Show First 20 Lines • Show All 549 Lines • ▼ Show 20 Lines	InterfaceMethod<
return llvm::to_vector<4>(		return llvm::to_vector<4>(
llvm::map_range(inputOutputTypes, [](Type type) -> ShapedType {		llvm::map_range(inputOutputTypes, [](Type type) -> ShapedType {
return type.cast<ShapedType>();		return type.cast<ShapedType>();
}));		}));
}]		}]
>,		>,
InterfaceMethod<		InterfaceMethod<
/desc=/[{		/desc=/[{
Return the position of the shaped operand in the operand list.		Return the first position of the shaped operand in the operand list.
}],		}],
/retTy=/"Optional<unsigned>",		/retTy=/"Optional<unsigned>",
/methodName=/"getIndexOfShapedOperand",		/methodName=/"getIndexOfShapedOperand",
/args=/(ins "Value":$value),		/args=/(ins "Value":$value),
/methodBody=/"",		/methodBody=/"",
/defaultImplementation=/[{		/defaultImplementation=/[{
Optional<unsigned> inputIndex = getIndexOfInput(value);		Optional<unsigned> inputIndex = getIndexOfInput(value);
if (inputIndex.hasValue()) return inputIndex.getValue();		if (inputIndex.hasValue()) return inputIndex.getValue();
Optional<unsigned> outputIndex = getIndexOfOutputBuffer(value);		Optional<unsigned> outputIndex = getIndexOfOutputBuffer(value);
if (outputIndex.hasValue())		if (outputIndex.hasValue())
return $_op.getNumInputs() + outputIndex.getValue();		return $_op.getNumInputs() + outputIndex.getValue();
Optional<unsigned> initTensorIndex = getIndexOfInitTensor(value);		Optional<unsigned> initTensorIndex = getIndexOfInitTensor(value);
if (initTensorIndex.hasValue())		if (initTensorIndex.hasValue())
return $_op.getNumInputs() + $_op.getNumOutputBuffers() + initTensorIndex.getValue();		return $_op.getNumInputs() + $_op.getNumOutputBuffers() + initTensorIndex.getValue();
return llvm::None;		return llvm::None;
}]		}]
>,		>,
		InterfaceMethod<
		/desc=/[{
		Returns the operand index given the input index. Returns None
		hanchungUnsubmitted Done Reply Inline Actions s/then/the hanchung: s/then/the
		of the input index is invalid.
		}],
		/retTy=/"Optional<unsigned>",
		hanchungUnsubmitted Done Reply Inline Actions s/retTyp/retTy hanchung: s/retTyp/retTy
		/methodName=/"getOperandIndexForInputIndex",
		/args=/(ins "unsigned":$input_index),
		/methodBody=/"",
		/defaultImplementation=/[{
		if (input_index >= $_op.getNumInputs())
		return llvm::None;
		return input_index;
		hanchungUnsubmitted Done Reply Inline Actions format this? (see above examples) hanchung: format this? (see above examples)
		}]
		>,
		InterfaceMethod<
		/desc=/[{
		Returns the operand index given the output index. Returns None
		hanchungUnsubmitted Done Reply Inline Actions s/then/the hanchung: s/then/the
		of the output index is invalid.
		}],
		/retTy=/"Optional<unsigned>",
		hanchungUnsubmitted Done Reply Inline Actions s/retTyp/retTy hanchung: s/retTyp/retTy
		/methodName=/"getOperandIndexForOutputIndex",
		/args=/(ins "unsigned":$output_index),
		/methodBody=/"",
		/defaultImplementation=/[{
		if (output_index >= $_op.getNumOutputs())
		return llvm::None;
		return output_index + $_op.getNumInputs();
		}]
		>,
		InterfaceMethod<
		/desc=/[{
		Returns the input index given the operand index. Return None
		if the operand index doesnt corresponding to an input.
		}],
		/retTy=/"Optional<unsigned>",
		/methodName=/"getInputIndex",
		/args=/(ins "unsigned":$operand_index),
		/methodBody=/"",
		/defaultImplementation=/[{
		if (operand_index >= $_op.getNumInputs())
		return llvm::None;
		return operand_index;
		}]
		>,
		InterfaceMethod<
		/desc=/[{
		Returns the output index given the operand index. Return None
		if the operand index doesnt corresponding to an output.
		}],
		/retTy=/"Optional<unsigned>",
		/methodName=/"getOutputIndex",
		/args=/(ins "unsigned":$operand_index),
		/methodBody=/"",
		/defaultImplementation=/[{
		if (operand_index < $_op.getNumInputs() \|\|
		operand_index >= $_op.getNumInputs() + $_op.getNumOutputs())
		return llvm::None;
		return operand_index - $_op.getNumInputs();
		}]
		>,

//===------------------------------------------------------------------===//		//===------------------------------------------------------------------===//
// Other interface methods.		// Other interface methods.
//===------------------------------------------------------------------===//		//===------------------------------------------------------------------===//
InterfaceMethod<		InterfaceMethod<
/desc=/[{		/desc=/[{
Return the iterator types attribute within the current operation.		Return the iterator types attribute within the current operation.
}],		}],
▲ Show 20 Lines • Show All 204 Lines • Show Last 20 Lines

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

Show All 9 Lines
#define DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H_		#define DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H_

#include "mlir/Dialect/Linalg/Utils/Utils.h"		#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/Vector/VectorOps.h"		#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/IR/Identifier.h"		#include "mlir/IR/Identifier.h"
#include "mlir/IR/PatternMatch.h"		#include "mlir/IR/PatternMatch.h"
#include "mlir/Transforms/Bufferize.h"		#include "mlir/Transforms/Bufferize.h"
#include "llvm/ADT/SmallBitVector.h"		#include "llvm/ADT/SmallBitVector.h"
		#include "llvm/ADT/SmallSet.h"

namespace mlir {		namespace mlir {
class BufferizeTypeConverter;		class BufferizeTypeConverter;
class FrozenRewritePatternList;		class FrozenRewritePatternList;

namespace linalg {		namespace linalg {

struct LinalgFusionOptions;		struct LinalgFusionOptions;
▲ Show 20 Lines • Show All 398 Lines • ▼ Show 20 Lines	if (tensorResults.empty())
rewriter.eraseOp(op);		rewriter.eraseOp(op);
else		else
rewriter.replaceOp(op, tensorResults);		rewriter.replaceOp(op, tensorResults);
return success();		return success();
}		}
};		};

struct LinalgFusionOptions {		struct LinalgFusionOptions {
/// Optional list of operands indices to use for fusion. When unspecified,		/// List of operands indices to use for fusion.
/// only one fusion is done, i.e., the pattern returns after the first fusion.		llvm::SmallSet<unsigned, 1> indicesToFuse = {};
		antiagainstUnsubmitted Done Reply Inline Actions SmallSet? I'd assume typically we don't have a large number of indices to fuse. antiagainst: SmallSet? I'd assume typically we don't have a large number of indices to fuse.
Optional<DenseSet<unsigned>> indicesToFuse = None;
LinalgFusionOptions &setIndicesToFuse(ArrayRef<int64_t> operands) {		LinalgFusionOptions &setIndicesToFuse(ArrayRef<int64_t> operands) {
indicesToFuse = DenseSet<unsigned>();		indicesToFuse.insert(operands.begin(), operands.end());
indicesToFuse->insert(operands.begin(), operands.end());
return *this;		return *this;
}		}
};		};

struct LinalgBaseTileAndFusePattern : public RewritePattern {		struct LinalgBaseTileAndFusePattern : public RewritePattern {
LinalgBaseTileAndFusePattern(StringRef opName, MLIRContext *context,		LinalgBaseTileAndFusePattern(StringRef opName, MLIRContext *context,
const LinalgDependenceGraph &dependenceGraph,		const LinalgDependenceGraph &dependenceGraph,
LinalgTilingOptions tilingOptions,		LinalgTilingOptions tilingOptions,
▲ Show 20 Lines • Show All 333 Lines • Show Last 20 Lines

mlir/include/mlir/IR/AffineMap.h

	Show First 20 Lines • Show All 317 Lines • ▼ Show 20 Lines
	///			///
	/// Returns the map:			/// Returns the map:
	///			///
	/// ```mlir			/// ```mlir
	/// (i, j, k) -> (i, k, k, j, i, j)			/// (i, j, k) -> (i, k, k, j, i, j)
	/// ```			/// ```
	AffineMap concatAffineMaps(ArrayRef<AffineMap> maps);			AffineMap concatAffineMaps(ArrayRef<AffineMap> maps);

				AffineMap getProjectedMap(AffineMap map,
				ArrayRef<unsigned> projectedDimensions);

	inline raw_ostream &operator<<(raw_ostream &os, AffineMap map) {			inline raw_ostream &operator<<(raw_ostream &os, AffineMap map) {
	map.print(os);			map.print(os);
	return os;			return os;
	}			}
	} // end namespace mlir			} // end namespace mlir

	namespace llvm {			namespace llvm {

	Show All 21 Lines

mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp

Show First 20 Lines • Show All 102 Lines • ▼ Show 20 Lines	for (unsigned j = i + 1; j < e; ++j) {
addDependencesBetween(cast<LinalgOp>(ops[i]), cast<LinalgOp>(ops[j]));		addDependencesBetween(cast<LinalgOp>(ops[i]), cast<LinalgOp>(ops[j]));
}		}
}		}
}		}

void LinalgDependenceGraph::addDependenceElem(DependenceType dt,		void LinalgDependenceGraph::addDependenceElem(DependenceType dt,
LinalgOpView indexingOpView,		LinalgOpView indexingOpView,
LinalgOpView dependentOpView) {		LinalgOpView dependentOpView) {
LLVM_DEBUG(dbgs() << "\nAdd dep type " << getDependenceTypeStr(dt) << ":\t"		LLVM_DEBUG(dbgs() << "\nAdd dep type " << getDependenceTypeStr(dt) << ":\t ("
<< indexingOpView.op << " -> " << dependentOpView.op);		<< *indexingOpView.op << ", " << indexingOpView.operandIndex
		<< ") -> \n\t\t(" << *dependentOpView.op << ", "
		<< dependentOpView.operandIndex << ")");
dependencesFromGraphs[dt][indexingOpView.op].push_back(		dependencesFromGraphs[dt][indexingOpView.op].push_back(
LinalgDependenceGraphElem{dependentOpView, indexingOpView.view});		LinalgDependenceGraphElem{dependentOpView, indexingOpView});
dependencesIntoGraphs[dt][dependentOpView.op].push_back(		dependencesIntoGraphs[dt][dependentOpView.op].push_back(
LinalgDependenceGraphElem{indexingOpView, dependentOpView.view});		LinalgDependenceGraphElem{indexingOpView, dependentOpView});
}		}

LinalgDependenceGraph::dependence_range		LinalgDependenceGraph::dependence_range
LinalgDependenceGraph::getDependencesFrom(		LinalgDependenceGraph::getDependencesFrom(
LinalgOp src, LinalgDependenceGraph::DependenceType dt) const {		LinalgOp src, LinalgDependenceGraph::DependenceType dt) const {
return getDependencesFrom(src.getOperation(), dt);		return getDependencesFrom(src.getOperation(), dt);
}		}

Show All 17 Lines	LinalgDependenceGraph::getDependencesInto(
Operation *dst, LinalgDependenceGraph::DependenceType dt) const {		Operation *dst, LinalgDependenceGraph::DependenceType dt) const {
auto iter = dependencesIntoGraphs[dt].find(dst);		auto iter = dependencesIntoGraphs[dt].find(dst);
if (iter == dependencesIntoGraphs[dt].end())		if (iter == dependencesIntoGraphs[dt].end())
return llvm::make_range(nullptr, nullptr);		return llvm::make_range(nullptr, nullptr);
return llvm::make_range(iter->second.begin(), iter->second.end());		return llvm::make_range(iter->second.begin(), iter->second.end());
}		}

void LinalgDependenceGraph::addDependencesBetween(LinalgOp src, LinalgOp dst) {		void LinalgDependenceGraph::addDependencesBetween(LinalgOp src, LinalgOp dst) {
for (auto srcView : src.getOutputBuffers()) { // W		for (auto srcView : llvm::enumerate(src.getOutputBuffers())) { // W
		unsigned srcIndex =
		src.getOperandIndexForOutputIndex(srcView.index()).getValue();
// RAW graph		// RAW graph
for (auto dstView : dst.getInputBuffers()) { // R		for (auto dstView : llvm::enumerate(dst.getInputBuffers())) { // R
if (aliases.alias(srcView, dstView)) { // if alias, fill RAW		if (aliases.alias(srcView.value(),
		dstView.value())) { // if alias, fill RAW
		unsigned dstIndex =
		dst.getOperandIndexForInputIndex(dstView.index()).getValue();
addDependenceElem(DependenceType::RAW,		addDependenceElem(DependenceType::RAW,
LinalgOpView{src.getOperation(), srcView},		LinalgOpView{src.getOperation(), srcIndex},
LinalgOpView{dst.getOperation(), dstView});		LinalgOpView{dst.getOperation(), dstIndex});
}		}
}		}
// WAW graph		// WAW graph
for (auto dstView : dst.getOutputBuffers()) { // W		for (auto dstView : llvm::enumerate(dst.getOutputBuffers())) { // W
if (aliases.alias(srcView, dstView)) { // if alias, fill WAW		if (aliases.alias(srcView.value(),
		dstView.value())) { // if alias, fill WAW
		hanchungUnsubmitted Done Reply Inline Actions IIRC, "W" would be `index + number of inputs`, and "R" would be `number of inputs`. I feel we could miss any of one in a future change, so I think it would be good to have something like `getLinalgOpViewW(int idx, LinalgOp op)` and `getLinalgOpViewR(int idx, LinalgOp op)` and both of them will return a `LinalgOpView`. hanchung: IIRC, "W" would be `index + number of inputs`, and "R" would be `number of inputs`. I feel we…
		mravishankarAuthorUnsubmitted Done Reply Inline Actions Changed it to use the `getIndexOfShapedOperand` method. mravishankar: Changed it to use the `getIndexOfShapedOperand` method.
		hanchungUnsubmitted Done Reply Inline Actions nice! hanchung: nice!
		unsigned dstIndex =
		dst.getOperandIndexForOutputIndex(dstView.index()).getValue();
addDependenceElem(DependenceType::WAW,		addDependenceElem(DependenceType::WAW,
LinalgOpView{src.getOperation(), srcView},		LinalgOpView{src.getOperation(), srcIndex},
LinalgOpView{dst.getOperation(), dstView});		LinalgOpView{dst.getOperation(), dstIndex});
}		}
}		}
}		}
for (auto srcView : src.getInputBuffers()) { // R		for (auto srcView : llvm::enumerate(src.getInputBuffers())) { // R
		unsigned srcIndex =
		src.getOperandIndexForInputIndex(srcView.index()).getValue();
// RAR graph		// RAR graph
for (auto dstView : dst.getInputBuffers()) { // R		for (auto dstView : llvm::enumerate(dst.getInputBuffers())) { // R
if (aliases.alias(srcView, dstView)) { // if alias, fill RAR		if (aliases.alias(srcView.value(),
		dstView.value())) { // if alias, fill RAR
		unsigned dstIndex =
		dst.getOperandIndexForInputIndex(dstView.index()).getValue();
addDependenceElem(DependenceType::RAR,		addDependenceElem(DependenceType::RAR,
LinalgOpView{src.getOperation(), srcView},		LinalgOpView{src.getOperation(), srcIndex},
LinalgOpView{dst.getOperation(), dstView});		LinalgOpView{dst.getOperation(), dstIndex});
}		}
}		}
// WAR graph		// WAR graph
for (auto dstView : dst.getOutputBuffers()) { // W		for (auto dstView : llvm::enumerate(dst.getOutputBuffers())) { // W
if (aliases.alias(srcView, dstView)) { // if alias, fill WAR		if (aliases.alias(srcView.value(),
		dstView.value())) { // if alias, fill WAR
		unsigned dstIndex =
		dst.getOperandIndexForOutputIndex(dstView.index()).getValue();
addDependenceElem(DependenceType::WAR,		addDependenceElem(DependenceType::WAR,
LinalgOpView{src.getOperation(), srcView},		LinalgOpView{src.getOperation(), srcIndex},
LinalgOpView{dst.getOperation(), dstView});		LinalgOpView{dst.getOperation(), dstIndex});
}		}
}		}
}		}
}		}

SmallVector<Operation *, 8>		SmallVector<Operation *, 8>
LinalgDependenceGraph::findCoveringDependences(LinalgOp srcLinalgOp,		LinalgDependenceGraph::findCoveringDependences(LinalgOp srcLinalgOp,
LinalgOp dstLinalgOp) const {		LinalgOp dstLinalgOp) const {
Show All 31 Lines	LinalgDependenceGraph::findOperationsWithCoveringDependences(
// to an aliasing view on a src -> op -> dst path.		// to an aliasing view on a src -> op -> dst path.
// TODO: we are not considering paths yet, just interleaved positions.		// TODO: we are not considering paths yet, just interleaved positions.
for (auto dt : types) {		for (auto dt : types) {
for (auto dependence : getDependencesFrom(src, dt)) {		for (auto dependence : getDependencesFrom(src, dt)) {
auto interimPos = linalgOpPositions.lookup(dependence.dependentOpView.op);		auto interimPos = linalgOpPositions.lookup(dependence.dependentOpView.op);
// Skip if not interleaved.		// Skip if not interleaved.
if (interimPos >= dstPos \|\| interimPos <= srcPos)		if (interimPos >= dstPos \|\| interimPos <= srcPos)
continue;		continue;
if (view && !aliases.alias(view, dependence.indexingView))		linalg::LinalgOp consumer =
		cast<linalg::LinalgOp>(dependence.indexingOpView.op);
		Value consumerView =
		consumer.getShapedOperand(dependence.indexingOpView.operandIndex);
		if (view && !aliases.alias(view, consumerView))
continue;		continue;
		hanchungUnsubmitted Done Reply Inline Actions [optional] Do we want to capture the value for `consumer`? it's only used in a later statement -- `Value consumerView = ...` hanchung: [optional] Do we want to capture the value for `consumer`? it's only used in a later statement…
		mravishankarAuthorUnsubmitted Done Reply Inline Actions True, but the code is less readable IMO. mravishankar: True, but the code is less readable IMO.
auto *op = dependence.dependentOpView.op;		auto *op = dependence.dependentOpView.op;
LLVM_DEBUG(dbgs() << "\n***Found covering dependence of type "		LLVM_DEBUG(dbgs() << "\n***Found covering dependence of type "
<< getDependenceTypeStr(dt) << ": " << *src << " -> "		<< getDependenceTypeStr(dt) << ": " << *src << " -> "
<< *op << " on " << dependence.indexingView);		<< *op << " on " << consumerView);
res.push_back(op);		res.push_back(op);
}		}
}		}
return res;		return res;
}		}

bool LinalgDependenceGraph::hasDependenceFrom(		bool LinalgDependenceGraph::hasDependenceFrom(
LinalgOp srcLinalgOp, LinalgOp dstLinalgOp,		LinalgOp srcLinalgOp, LinalgOp dstLinalgOp,
▲ Show 20 Lines • Show All 71 Lines • Show Last 20 Lines

mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp

Show All 18 Lines
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"		#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"		#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"		#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/IR/AffineExpr.h"		#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"		#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Dominance.h"		#include "mlir/IR/Dominance.h"
#include "mlir/Support/LLVM.h"		#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"		#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "llvm/ADT/SetVector.h"		#include "llvm/ADT/MapVector.h"
#include "llvm/Support/CommandLine.h"		#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"		#include "llvm/Support/Debug.h"

		#include <set>

#define DEBUG_TYPE "linalg-fusion"		#define DEBUG_TYPE "linalg-fusion"

using namespace mlir;		using namespace mlir;
using namespace mlir::edsc;		using namespace mlir::edsc;
using namespace mlir::edsc::intrinsics;		using namespace mlir::edsc::intrinsics;
using namespace mlir::linalg;		using namespace mlir::linalg;

using llvm::dbgs;		using llvm::dbgs;
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines	static LinalgOp cloneWithLoopRanges(OpBuilder &b, Location loc, LinalgOp op,
SmallVector<Value, 8> clonedShapes;		SmallVector<Value, 8> clonedShapes;
clonedShapes.reserve(op.getNumShapedOperands());		clonedShapes.reserve(op.getNumShapedOperands());

// Iterate over the shape operands in order.		// Iterate over the shape operands in order.
// Extract the subranges from the linearized ranges.		// Extract the subranges from the linearized ranges.
for (auto en : llvm::enumerate(op.getShapedOperands())) {		for (auto en : llvm::enumerate(op.getShapedOperands())) {
unsigned shapedOperandIdx = en.index();		unsigned shapedOperandIdx = en.index();
AffineMap map = op.getIndexingMap(shapedOperandIdx);		AffineMap map = op.getIndexingMap(shapedOperandIdx);
LLVM_DEBUG(dbgs() << "shapedOperandIdx: " << shapedOperandIdx		LLVM_DEBUG(llvm::dbgs() << "shapedOperandIdx: " << shapedOperandIdx
<< " with indexingMap: " << map << "\n");		<< " with indexingMap: " << map << "\n");
SmallVector<Value, 4> offsets, sizes, strides;		SmallVector<Value, 4> offsets, sizes, strides;
inferShapeComponents(map, loopRanges, offsets, sizes, strides);		inferShapeComponents(map, loopRanges, offsets, sizes, strides);
Value shape = en.value();		Value shape = en.value();
Value sub = shape.getType().isa<MemRefType>()		Value sub = shape.getType().isa<MemRefType>()
? b.create<SubViewOp>(loc, shape, offsets, sizes, strides)		? b.create<SubViewOp>(loc, shape, offsets, sizes, strides)
.getResult()		.getResult()
: b.create<SubTensorOp>(loc, shape, offsets, sizes, strides)		: b.create<SubTensorOp>(loc, shape, offsets, sizes, strides)
.getResult();		.getResult();
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines	static ShapeDimension getShapeDefiningLoopRange(LinalgOp op,
unsigned loopDepth) {		unsigned loopDepth) {
auto maps = op.indexing_maps();		auto maps = op.indexing_maps();
// Iterate over the inputs and outputs in order.		// Iterate over the inputs and outputs in order.
// Extract the subranges from the linearized ranges.		// Extract the subranges from the linearized ranges.
SmallVector<Value, 8> ios(op.getInputsAndOutputBuffers());		SmallVector<Value, 8> ios(op.getInputsAndOutputBuffers());
for (auto en : llvm::enumerate(ios)) {		for (auto en : llvm::enumerate(ios)) {
unsigned idx = en.index();		unsigned idx = en.index();
auto map = maps[idx].cast<AffineMapAttr>().getValue();		auto map = maps[idx].cast<AffineMapAttr>().getValue();
LLVM_DEBUG(dbgs() << "getShapeDefiningLoopRange I/O idx: " << idx << "\n");		LLVM_DEBUG(llvm::dbgs()
LLVM_DEBUG(dbgs() << "getShapeDefiningLoopRange map: " << map << "\n");		<< "getShapeDefiningLoopRange I/O idx: " << idx << "\n");
		LLVM_DEBUG(llvm::dbgs()
		<< "getShapeDefiningLoopRange map: " << map << "\n");
Value shape = en.value();		Value shape = en.value();
SmallVector<Value, 8> shapeRanges(map.getNumResults(), nullptr);		SmallVector<Value, 8> shapeRanges(map.getNumResults(), nullptr);
for (auto en2 : llvm::enumerate(map.getResults())) {		for (auto en2 : llvm::enumerate(map.getResults())) {
if (loopDepth == en2.value().cast<AffineDimExpr>().getPosition()) {		if (loopDepth == en2.value().cast<AffineDimExpr>().getPosition()) {
LLVM_DEBUG(dbgs() << "getShapeDefiningLoopRange loopDepth: "		LLVM_DEBUG(llvm::dbgs() << "getShapeDefiningLoopRange loopDepth: "
<< loopDepth << "\n");		<< loopDepth << "\n");
LLVM_DEBUG(dbgs() << "getShapeDefiningLoopRange shape: " << shape		LLVM_DEBUG(llvm::dbgs()
<< "\n");		<< "getShapeDefiningLoopRange shape: " << shape << "\n");
return ShapeDimension{shape, static_cast<unsigned>(en2.index())};		return ShapeDimension{shape, static_cast<unsigned>(en2.index())};
}		}
}		}
}		}
llvm_unreachable("Expect to be able to extract a shape defining loop range");		llvm_unreachable("Expect to be able to extract a shape defining loop range");
}		}

/// Fuses the producer of `producerIdx` into the loop immediately enclosing		/// Fuses the producer of `producerIdx` into the loop immediately enclosing
Show All 14 Lines	assert((isa<SubViewOp>(shapeProducingOp) \|\|
isa<SubTensorOp>(shapeProducingOp)) &&		isa<SubTensorOp>(shapeProducingOp)) &&
"SubviewOp or SubTensorOp expected");		"SubviewOp or SubTensorOp expected");

// loopToOperandRangesMaps are permutations-only by construction:		// loopToOperandRangesMaps are permutations-only by construction:
// we can always identify a data dimension with a (at least one) loop		// we can always identify a data dimension with a (at least one) loop
// dimension.		// dimension.
// TODO: extend this with range inference.		// TODO: extend this with range inference.
AffineMap producerMap = producer.getOutputIndexingMap(producerIdx);		AffineMap producerMap = producer.getOutputIndexingMap(producerIdx);
LLVM_DEBUG(dbgs() << "Producer Idx: " << producerIdx		LLVM_DEBUG(llvm::dbgs() << "Producer Idx: " << producerIdx
<< ", producer map: " << producerMap << "\n");		<< ", producer map: " << producerMap << "\n");

unsigned nPar = producer.getNumParallelLoops();		unsigned nPar = producer.getNumParallelLoops();
unsigned nRed = producer.getNumReductionLoops();		unsigned nRed = producer.getNumReductionLoops();
unsigned nWin = producer.getNumWindowLoops();		unsigned nWin = producer.getNumWindowLoops();
SmallVector<Range, 8> loopRanges(nPar + nRed + nWin);		SmallVector<Range, 8> loopRanges(nPar + nRed + nWin);

// Iterate over dimensions identified by the producer map for `producerIdx`.		// Iterate over dimensions identified by the producer map for `producerIdx`.
// This defines a subset of the loop ranges that we need to complete later.		// This defines a subset of the loop ranges that we need to complete later.
Show All 31 Lines
// Some of these will be lifted in the future with better analysis.		// Some of these will be lifted in the future with better analysis.
static bool isStructurallyFusableProducer(LinalgOp producer, Value consumedView,		static bool isStructurallyFusableProducer(LinalgOp producer, Value consumedView,
LinalgOp consumer) {		LinalgOp consumer) {
assert(producer.hasBufferSemantics() &&		assert(producer.hasBufferSemantics() &&
"expected linalg op with buffer semantics");		"expected linalg op with buffer semantics");
assert(consumer.hasBufferSemantics() &&		assert(consumer.hasBufferSemantics() &&
"expected linalg op with buffer semantics");		"expected linalg op with buffer semantics");
if (producer.getNumOutputs() != 1) {		if (producer.getNumOutputs() != 1) {
LLVM_DEBUG(dbgs() << "\nNot structurally fusable (multi-output)");		LLVM_DEBUG(llvm::dbgs() << "\nNot structurally fusable (multi-output)");
return false;		return false;
}		}
// Only fuse when the producer block dominates.		// Only fuse when the producer block dominates.
DominanceInfo dom(producer.getOperation());		DominanceInfo dom(producer.getOperation());
if (!dom.dominates(producer.getOperation()->getBlock(),		if (!dom.dominates(producer.getOperation()->getBlock(),
consumer.getOperation()->getBlock())) {		consumer.getOperation()->getBlock())) {
LLVM_DEBUG(		LLVM_DEBUG(
dbgs()		llvm::dbgs()
<< "\nNot structurally fusable (producer block does not dominate)");		<< "\nNot structurally fusable (producer block does not dominate)");
return false;		return false;
}		}
return true;		return true;
}		}

bool mlir::linalg::isProducerLastWriteOfView(const LinalgDependenceGraph &graph,		bool mlir::linalg::isProducerLastWriteOfView(const LinalgDependenceGraph &graph,
LinalgOp consumer,		LinalgOp consumer,
Value consumedView,		Value consumedView,
LinalgOp producer) {		LinalgOp producer) {
assert(producer.hasBufferSemantics() &&		assert(producer.hasBufferSemantics() &&
"expected linalg op with buffer semantics");		"expected linalg op with buffer semantics");
assert(consumer.hasBufferSemantics() &&		assert(consumer.hasBufferSemantics() &&
"expected linalg op with buffer semantics");		"expected linalg op with buffer semantics");
// Make some simple structural checks that alleviate the need for more		// Make some simple structural checks that alleviate the need for more
// complex analyses.		// complex analyses.
if (!isStructurallyFusableProducer(producer, consumedView, consumer)) {		if (!isStructurallyFusableProducer(producer, consumedView, consumer)) {
LLVM_DEBUG(dbgs() << "\n***Not static last write due to structure:\t"		LLVM_DEBUG(llvm::dbgs() << "\n***Not static last write due to structure:\t"
<< *producer.getOperation());		<< *producer.getOperation());
return false;		return false;
}		}
// Check for any interleaved write to consumedView.		// Check for any interleaved write to consumedView.
if (!graph.findCoveringWrites(producer, consumer, consumedView).empty()) {		if (!graph.findCoveringWrites(producer, consumer, consumedView).empty()) {
LLVM_DEBUG(dbgs() << "\n***Not fusable due to interleaved write:\t"		LLVM_DEBUG(llvm::dbgs() << "\n***Not fusable due to interleaved write:\t"
<< *producer.getOperation());		<< *producer.getOperation());
return false;		return false;
}		}
return true;		return true;
}		}

bool mlir::linalg::isFusableInto(const LinalgDependenceGraph &graph,		bool mlir::linalg::isFusableInto(const LinalgDependenceGraph &graph,
LinalgOp consumer, Value consumedView,		LinalgOp consumer, Value consumedView,
LinalgOp producer) {		LinalgOp producer) {
assert(producer.hasBufferSemantics() &&		assert(producer.hasBufferSemantics() &&
"expected linalg op with buffer semantics");		"expected linalg op with buffer semantics");
assert(consumer.hasBufferSemantics() &&		assert(consumer.hasBufferSemantics() &&
"expected linalg op with buffer semantics");		"expected linalg op with buffer semantics");
if (!isProducerLastWriteOfView(graph, consumer, consumedView, producer))		if (!isProducerLastWriteOfView(graph, consumer, consumedView, producer))
return false;		return false;
// Check for any fusion-preventing dependence to any shape read/written that		// Check for any fusion-preventing dependence to any shape read/written that
// would violate dependences.		// would violate dependences.
if (!graph.findCoveringDependences(producer, consumer).empty()) {		if (!graph.findCoveringDependences(producer, consumer).empty()) {
LLVM_DEBUG(dbgs() << "\n***Not fusable due to an interleaved dependence:\t"		LLVM_DEBUG(llvm::dbgs()
		<< "\n***Not fusable due to an interleaved dependence:\t"
<< *producer.getOperation());		<< *producer.getOperation());
return false;		return false;
}		}
if (auto convOp = dyn_cast<linalg::ConvOp>(producer.getOperation())) {		if (auto convOp = dyn_cast<linalg::ConvOp>(producer.getOperation())) {
// TODO: add a level of indirection to linalg.generic.		// TODO: add a level of indirection to linalg.generic.
if (convOp.padding())		if (convOp.padding())
return false;		return false;
}		}
if (auto convOp = dyn_cast<linalg::ConvOp>(consumer.getOperation())) {		if (auto convOp = dyn_cast<linalg::ConvOp>(consumer.getOperation())) {
Show All 33 Lines
static Optional<LinalgDependenceGraph::LinalgDependenceGraphElem>		static Optional<LinalgDependenceGraph::LinalgDependenceGraphElem>
findFusableProducer(LinalgOp consumer, unsigned consumerIdx,		findFusableProducer(LinalgOp consumer, unsigned consumerIdx,
const LinalgDependenceGraph &dependenceGraph) {		const LinalgDependenceGraph &dependenceGraph) {
// Only consider RAW and WAW atm.		// Only consider RAW and WAW atm.
for (auto depType : {		for (auto depType : {
LinalgDependenceGraph::DependenceType::RAW,		LinalgDependenceGraph::DependenceType::RAW,
LinalgDependenceGraph::DependenceType::WAW,		LinalgDependenceGraph::DependenceType::WAW,
}) {		}) {
for (auto dependence :		for (auto dependence : llvm::make_filter_range(
dependenceGraph.getDependencesInto(consumer, depType)) {		dependenceGraph.getDependencesInto(consumer, depType),
		[consumerIdx](
		hanchungUnsubmitted Done Reply Inline Actions I think it's okay to pass without reference, because it is `unsigned` and we won't update the variable. It's a bit like passing an unsigned value without reference to a function. hanchung: I think it's okay to pass without reference, because it is `unsigned` and we won't update the…
		LinalgDependenceGraph::LinalgDependenceGraphElem elem) {
		return elem.indexingOpView.operandIndex == consumerIdx;
		})) {
auto producer = cast<LinalgOp>(dependence.dependentOpView.op);		auto producer = cast<LinalgOp>(dependence.dependentOpView.op);

// Check that the dependence is indeed on the input `consumerIdx` view.		// Check that the dependence is indeed on the input `consumerIdx` view.
auto consumedView = dependence.indexingView;		auto consumedView =
		consumer.getBuffer(dependence.indexingOpView.operandIndex);
if (!isSameSubView(consumer.getBuffer(consumerIdx), consumedView))		if (!isSameSubView(consumer.getBuffer(consumerIdx), consumedView))
continue;		continue;

// Consumer consumes this view, `isStructurallyFusableProducer` also		// Consumer consumes this view, `isStructurallyFusableProducer` also
// checks whether it is a strict subview of the producer view.		// checks whether it is a strict subview of the producer view.
auto producedView = dependence.dependentOpView.view;		auto producedView =
auto producerIdx =		producer.getBuffer(dependence.dependentOpView.operandIndex);
producer.getIndexOfOutputBuffer(producedView).getValue();		LLVM_DEBUG(llvm::dbgs()
// `consumerIdx` and `producerIdx` exist by construction.		<< "\n"
LLVM_DEBUG(dbgs() << "\n"
<< LinalgDependenceGraph::getDependenceTypeStr(depType)		<< LinalgDependenceGraph::getDependenceTypeStr(depType)
<< "producer: " << *producer.getOperation() << " view: "		<< "producer: " << *producer.getOperation()
<< producedView << " output index: " << producerIdx);		<< " view: " << producedView << " output index: "
(void)producerIdx;		<< dependence.dependentOpView.operandIndex -
		producer.getNumInputs()
		<< "\n");
		(void)producedView;

// Simple fusability checks.		// Simple fusability checks.
if (!isFusableInto(dependenceGraph, consumer, consumedView, producer))		if (!isFusableInto(dependenceGraph, consumer, consumedView, producer))
continue;		continue;

return dependence;		return dependence;
}		}
}		}
Show All 10 Lines	if (!fusableDependence)
return {};		return {};

LinalgOp producerOp = cast<LinalgOp>(fusableDependence->dependentOpView.op);		LinalgOp producerOp = cast<LinalgOp>(fusableDependence->dependentOpView.op);
// If producer is already in the same block as consumer, we are done.		// If producer is already in the same block as consumer, we are done.
if (consumer.getOperation()->getBlock() ==		if (consumer.getOperation()->getBlock() ==
producerOp.getOperation()->getBlock())		producerOp.getOperation()->getBlock())
return {};		return {};

Value producerView = fusableDependence->dependentOpView.view;		unsigned producerIdx = fusableDependence->dependentOpView.operandIndex -
Value consumerView = fusableDependence->indexingView;		producerOp.getNumInputs();
		Value consumerView = consumer.getShapedOperand(consumerIdx);

// Must be a subview or a slice to guarantee there are loops we can fuse		// Must be a subview or a slice to guarantee there are loops we can fuse
// into.		// into.
auto subView = consumerView.getDefiningOp<SubViewOp>();		auto subView = consumerView.getDefiningOp<SubViewOp>();
auto slice = consumerView.getDefiningOp<SliceOp>();		auto slice = consumerView.getDefiningOp<SliceOp>();
if (!subView && !slice) {		if (!subView && !slice) {
LLVM_DEBUG(dbgs() << "\nNot fusable (not a subview or slice)");		LLVM_DEBUG(llvm::dbgs() << "\nNot fusable (not a subview or slice)");
return {};		return {};
}		}

// Fuse `producer` just before `consumer`.		// Fuse `producer` just before `consumer`.
OpBuilder::InsertionGuard g(b);		OpBuilder::InsertionGuard g(b);
b.setInsertionPoint(consumer.getOperation());		b.setInsertionPoint(consumer.getOperation());
ScopedContext scope(b, consumer.getLoc());		ScopedContext scope(b, consumer.getLoc());
LLVM_DEBUG(dbgs() << "Fuse into consumer: " << *consumer << "\n");		LLVM_DEBUG(llvm::dbgs() << "Fuse into consumer: " << *consumer << "\n");
Optional<unsigned> producerIdxOpt =
producerOp.getIndexOfOutputBuffer(producerView);
assert(producerIdxOpt.hasValue() && "incorrect operand index");
unsigned producerIdx = producerIdxOpt.getValue();

auto fusedProducer = fuse(b, producerOp, producerIdx, consumer, consumerIdx);		auto fusedProducer = fuse(b, producerOp, producerIdx, consumer, consumerIdx);
return FusionInfo{producerOp, fusedProducer};		return FusionInfo{producerOp, fusedProducer};
}		}

/// Walk back use-def chain through scf::For yields.		/// Walk back use-def chain through scf::For yields.
/// Sets `producer` and `outputIndex` if it finds a producer LinalgOp		/// Sets `producer` and `outputIndex` if it finds a producer LinalgOp
static void getProducerOfTensor(Value tensor, LinalgOp &producer,		static void getProducerOfTensor(Value tensor, LinalgOp &producer,
Show All 27 Lines	Optional<FusionInfo> mlir::linalg::fuseProducerOfTensor(OpBuilder &b,
Value inputTensor = consumer.getInput(consumerIdx);		Value inputTensor = consumer.getInput(consumerIdx);
LinalgOp producerOp;		LinalgOp producerOp;
unsigned producerIdx;		unsigned producerIdx;
getProducerOfTensor(inputTensor, producerOp, producerIdx);		getProducerOfTensor(inputTensor, producerOp, producerIdx);

// Must be a subtensor to guarantee there are loops we can fuse into.		// Must be a subtensor to guarantee there are loops we can fuse into.
auto subTensor = inputTensor.getDefiningOp<SubTensorOp>();		auto subTensor = inputTensor.getDefiningOp<SubTensorOp>();
if (!subTensor \|\| !producerOp) {		if (!subTensor \|\| !producerOp) {
LLVM_DEBUG(dbgs() << "\nNot fusable (not a subtensor)");		LLVM_DEBUG(llvm::dbgs() << "\nNot fusable (not a subtensor)");
return {};		return {};
}		}

// If producer is already in the same block as consumer, we are done.		// If producer is already in the same block as consumer, we are done.
if (consumer.getOperation()->getBlock() ==		if (consumer.getOperation()->getBlock() ==
producerOp.getOperation()->getBlock())		producerOp.getOperation()->getBlock())
return {};		return {};

// Insert fused `producer` just before `consumer`.		// Insert fused `producer` just before `consumer`.
OpBuilder::InsertionGuard g(b);		OpBuilder::InsertionGuard g(b);
b.setInsertionPoint(consumer.getOperation());		b.setInsertionPoint(consumer.getOperation());
ScopedContext scope(b, consumer.getLoc());		ScopedContext scope(b, consumer.getLoc());
LLVM_DEBUG(dbgs() << "Fuse into consumer: " << *consumer << "\n");		LLVM_DEBUG(llvm::dbgs() << "Fuse into consumer: " << *consumer << "\n");
LinalgOp fusedProducer =		LinalgOp fusedProducer =
fuse(b, producerOp, producerIdx, consumer, consumerIdx);		fuse(b, producerOp, producerIdx, consumer, consumerIdx);

// Replace use.		// Replace use.
// Canonicalizations are not guaranteed to have happened before constructing		// Canonicalizations are not guaranteed to have happened before constructing
// `fusedProducer`. In the tensor case this can result in temporary type		// `fusedProducer`. In the tensor case this can result in temporary type
// mismatches. Insert a `tensor_cast` op to propagate the transformation		// mismatches. Insert a `tensor_cast` op to propagate the transformation
// invariant that types are compatible.		// invariant that types are compatible.
Value def = fusedProducer.getOperation()->getResult(producerIdx);		Value def = fusedProducer.getOperation()->getResult(producerIdx);
OpOperand &use = consumer.getOperation()->getOpOperand(consumerIdx);		OpOperand &use = consumer.getOperation()->getOpOperand(consumerIdx);
Type consumerType = use.get().getType();		Type consumerType = use.get().getType();
if (consumerType != def.getType())		if (consumerType != def.getType())
def = b.create<TensorCastOp>(fusedProducer.getLoc(), consumerType, def);		def = b.create<TensorCastOp>(fusedProducer.getLoc(), consumerType, def);
use.set(def);		use.set(def);
return FusionInfo{producerOp, fusedProducer};		return FusionInfo{producerOp, fusedProducer};
}		}

		/// Prune all dimensions that are of reduction iterator type from `map`.
		static AffineMap pruneReductionDimsFromMap(ArrayRef<Attribute> iteratorTypes,
		AffineMap map) {
		SmallVector<unsigned, 2> projectedDims;
		for (auto attr : llvm::enumerate(iteratorTypes)) {
		if (!isParallelIterator(attr.value()))
		projectedDims.push_back(attr.index());
		}
		return getProjectedMap(map, projectedDims);
		}

		using FusableOpDependencesTy = llvm::MapVector<
		Operation *,
		SmallVector<LinalgDependenceGraph::LinalgDependenceGraphElem, 1>>;

/// Returns the positions of the loop in `op` that can be tiled based on the		/// Returns the positions of the loop in `op` that can be tiled based on the
/// operations that are to be fused with it. For example, in a		/// operations that are to be fused with it. For example, in a
///		///
/// linalg.matmul ins(%a, %b : ...) outs(%c : ...)		/// linalg.matmul ins(%a, %b : ...) outs(%c : ...)
///		///
/// if the producer of %a needs to be fused with this op, only the `i` loop of		/// if the producer of %a needs to be fused with this op, only the `i` loop of
/// the matmul can be tiled while fusing. If producer of %a, and %b are to be		/// the matmul can be tiled while fusing. If producer of %a, and %b are to be
/// fused, then no loops can be tiled while fusing.		/// fused, then no loops can be tiled while fusing. The conditions used are:
static DenseSet<unsigned> collectTileAndFuseLoops(		/// 1. Only parallel loops can be used for tile + fuse. Find the number of
LinalgOp op, ArrayRef<LinalgDependenceGraph::LinalgDependenceGraphElem>		/// common outer parallel loops between the op and its producers being fused.
fusableDependences) {		/// 2. Of the parallel loops only some can be fused. Only those loops can be
		antiagainstUnsubmitted Done Reply Inline Actions A fusable loop's iteration space only touches ... antiagainst: A fusable loop's iteration space only touches ...
// 1. Only parallel loops can be used for tile + fuse. Find the number of		/// fused such where the fusable loops iteration space only touches one tile
// common outer parallel loops between the op and its producers being fused.		/// of the fused operation. This is because the producer (which is writing
		nicolasvasilacheUnsubmitted Done Reply Inline Actions It seems to me you are computing a simple projection of `map`. Is a composition of `map` with `AffineMap::getSubMap` appropriate for this? nicolasvasilache: It seems to me you are computing a simple projection of `map`. Is a composition of `map` with…
		mravishankarAuthorUnsubmitted Done Reply Inline Actions I couldnt work out how to use a compose to do this. Suggestions? mravishankar: I couldnt work out how to use a compose to do this. Suggestions?
		nicolasvasilacheUnsubmitted Done Reply Inline Actions On second look, not with `getSubMap` (which is already a composition with a permutation) but I think something like this could work for you: SmallVector<AffineExpr, 4> projection; for (auto attr : llvm::enumerate(iteratorTypes)) if (!isParallelIterator(attr.value())) projection.push_back(getAffineDimExpr(attr.index(), ctx)); unsigned projectionRank = projection.size(); projection.append(iteratorTypes.size() - projectionRank, getAffineConstant(0, ctx)); return AffineMap::get(projection.size(), 0, projectionRank, ctx).compose(map).getMajorSubMap(projectionRank); The first n-1 lines could be refactored into a new `AffineMap::getProjectionMap(ArrayRef<unsigned> positions, unsigned codomainRank)`. nicolasvasilache: On second look, not with `getSubMap` (which is already a composition with a permutation) but I…
		nicolasvasilacheUnsubmitted Done Reply Inline Actions actually 3 more things: the proper expression is prob `map.compose(AffineMap::get(...))` s/`!ifParallelIterator/isParallelIterator` you may need to drop the 0 results of the output AffineMap for your use case. nicolasvasilache: actually 3 more things: 1. the proper expression is prob `map.compose(AffineMap::get(...))` 2.
		mravishankarAuthorUnsubmitted Done Reply Inline Actions Nice! It actually helped me concretize something I was stuck at downstream of this change. Added the method as you asked for. PTAL mravishankar: Nice! It actually helped me concretize something I was stuck at downstream of this change.
		/// the fused subview) has update semantics. To compute this,
		/// a. Find the mapping from iterations in the consumer that write to the
		/// same location as the iterations in the producer. To do so use
		/// - indexing map of the fused view in the consumer : consumerIndexMap
		/// - indexing map of the fused view in the producer : producerIndexMap
		/// consumerLoopToProducerLoop =
		/// inverse(producerIndexMap).compose(consumerIndexMap)
		///
		/// Since an inverse computation is needed, we need to consider the projection
		/// of the producerIndexMap w.r.t the parallel loops. The actual fusable loops
		antiagainstUnsubmitted Done Reply Inline Actions The actual fusable loops are .. antiagainst: The actual fusable loops are ..
		/// are the dimensions of the consumerLoopToProducerLoop map that correspond to
		/// parallel loops and appear in the result of the map
		///
		/// Example 1:
		/// linalg.fill(%c, %cst)
		/// linalg.matmul ins(%a, %b) outs(%c)
		/// Number of parallel loops : 2
		/// producerIndexMap = affine_map<(i, j) ->(i , j)>
		/// consumerIndexMap = affine_map<(i, j, k) -> (i, j)>
		/// consumerLoopToProducerLoop = affine_map<(i, j, k) -> (i, j)>
		/// Fused dimensions : i, j
		///
		/// Example 2:
		/// linalg.matmul ins(%a, %b) outs(%c)
		/// linalg.generic {indexing_maps = [affine_map<(i, j) -> (j, i)>, ...
		/// iterator_types = ["parallel", "parallel"]}
		/// ins(%c) ...
		///
		/// Number of parallel loops = 2:
		/// producerIndexMap (projected to parallel loops) =
		/// affine_map<(i, j) -> (i, j)>
		/// consumerLoopToProducerLoop2 = affine_map<(i, j) -> (j, i)>
		/// Fused dimensions : i, j
		///
		/// Example 3:
		/// linalg.copy(%s, %b)
		/// linalg.matmul ins(%a, %b) outs(%c)
		///
		/// Number of parallel loops = 2
		/// produceIndexMap : affine_map<(i, j) -> (i, j)>
		/// consumerLoopToProduceLoops = affine_map<(i, j, k) -> (k, j)>
		/// submap with only parallel loops = affine_map<(i, j) -> (j)>
		/// Fused dimensions : j
		static std::set<unsigned>
		nicolasvasilacheUnsubmitted Done Reply Inline Actions You probably want to document the case for which you have done all this: `A + A^T` where A comes from a matmul ? nicolasvasilache: You probably want to document the case for which you have done all this: ` A + A^T` where A…
		mravishankarAuthorUnsubmitted Done Reply Inline Actions Right now this case doesnt really come here. It is treated as an unfusable dependence. When I add the promotion (or test it on tensors) I can add that example. It is actually easier since we just need to take the closest outer parallel loops. mravishankar: Right now this case doesnt really come here. It is treated as an unfusable dependence. When I…
		collectTileAndFuseLoops(LinalgOp op,
		const FusableOpDependencesTy &fusableDependences) {
auto getNumOuterParallelLoops = [](LinalgOp linalgOp) {		auto getNumOuterParallelLoops = [](LinalgOp linalgOp) {
return linalgOp.iterator_types()		return linalgOp.iterator_types()
.getValue()		.getValue()
.take_while([](Attribute attr) -> bool {		.take_while([](Attribute attr) -> bool {
return attr.cast<StringAttr>().getValue() ==		return attr.cast<StringAttr>().getValue() ==
getParallelIteratorTypeName();		getParallelIteratorTypeName();
})		})
.size();		.size();
};		};

		LLVM_DEBUG({
		llvm::dbgs() << "Op : ";
		op.getOperation()->print(llvm::dbgs(), OpPrintingFlags().useLocalScope());
		llvm::dbgs() << "\n";
		});

size_t numOuterParallelLoops = getNumOuterParallelLoops(op);		size_t numOuterParallelLoops = getNumOuterParallelLoops(op);
for (auto dependence : fusableDependences) {		for (auto dependence : fusableDependences) {
		linalg::LinalgOp producer = cast<linalg::LinalgOp>(dependence.first);
numOuterParallelLoops =		numOuterParallelLoops =
std::min(numOuterParallelLoops, getNumOuterParallelLoops(cast<LinalgOp>(		std::min(numOuterParallelLoops, getNumOuterParallelLoops(producer));
dependence.dependentOpView.op)));
}		}

// Need to compute what tiled loops can be "fused". Given the precondition		std::set<unsigned> fusableLoops;
// that all indexing map for the producer view is a projected permutation, we		auto range = llvm::seq<unsigned>(0, numOuterParallelLoops);
// can assert that the producer iterates over the dimensions of the "fused		fusableLoops.insert(range.begin(), range.end());
// view" only once. To be used a fused loop the producer should use this loop
// to access the fused view. For example, consider
//
// ```
// linalg.add ins(%a, %b) outs(%c)
// linalg.matmul ins(%d, %c) outs(%e)
// ```
//
// if `linalg.add` has the semantics of `c = a + b`, then the following
// tile+fuse code is correct.
//
// ```
// for j ... += TSj
// %sa = subview %a[0, %j][...]
// %sb = subview %b[0, %j][...]
// %sc = subview %c[0, %j][...]
// %sd = subview %d[0, 0][...]
// %se = subview %e[0, %j][...]
// linalg.add ins(%sa, %sb) outs(%sc)
// linalg.matmul ins(%sd, %sc) outs(%se)
// ```
//
// On the other hand tiling along i would be incorrect
//
// ```
// for %i .. += TSi
// %sa = subview %a[%i, 0][...]
// %sb = subview %b[%i, 0][...]
// %sc = subview %c[%i, 0][...]
// %sc2 = subview %c[0, 0][...]
// %sd = subview %d[%i, 0][...]
// %se = subview %e[%i, 0][...]
// linalg.add ins(%sa, %sb) outs(%sc)
// linalg.matmul ins(%sd, %sc2) outs(%se)
// ```
//
// The write to the subview `%sc` in `linalg.add` is performed after the read
// from it using `%sc2` violating the RAW dependence of the original code. To
// find such loops indexing map of the fused view in the consumer op is
// used. For the above example, this indexing map is
//
// affine_map<(d0, d1, d2) -> (d2, d1)>
//
// Since d0 is not in the result expressions of this map, it is not treated as
// tile + fuse loop, (but d1 is).
//
// TODO: The above is probably restrictive and there might be a generalization
// of these that might allow for more fusion opportunities. Explore based on
// needs.
SmallVector<DenseSet<unsigned>, 1> commonTilableLoops;
for (auto dependence : fusableDependences) {		for (auto dependence : fusableDependences) {
unsigned consumerIdx =		LLVM_DEBUG({
op.getIndexOfShapedOperand(dependence.indexingView).getValue();		llvm::dbgs() << "\t fusable :";
AffineMap consumerAccess = op.getIndexingMap(consumerIdx);		for (unsigned i : fusableLoops)
// Previously asserted that the consumerAccess map is a projected		llvm::dbgs() << " " << i;
// permutation, so all results are known to be AffineDimExprs. To remove		llvm::dbgs() << "\n";
// this restriction walk the expression to find which dimensions of the		});
// consumer loop appear in the `consumerAccess`.		linalg::LinalgOp producer = cast<linalg::LinalgOp>(dependence.first);
DenseSet<unsigned> positions;
for (auto expr : consumerAccess.getResults())		assert(!dependence.second.empty() &&
positions.insert(expr.cast<AffineDimExpr>().getPosition());		"unexpected producer but not dependences");
commonTilableLoops.emplace_back(std::move(positions));		AffineMap producerIndexingMap = producer.getIndexingMap(
}		dependence.second.front().dependentOpView.operandIndex);
		AffineMap prunedProducerIndexingMap = pruneReductionDimsFromMap(
// 2. Of the outer parallel loops, only those loops can be tiled + fused as		producer.iterator_types().getValue(), producerIndexingMap);
// computed above for all the fused dependences can be used to tile and fuse.		if (!prunedProducerIndexingMap.isPermutation())
DenseSet<unsigned> tilableParallelLoops;		return {};
for (auto index : llvm::seq<unsigned>(0, numOuterParallelLoops)) {
if (llvm::all_of(commonTilableLoops,		AffineMap consumerIndexingMap = op.getIndexingMap(
[&](const DenseSet<unsigned> &tilableLoops) {		dependence.second.front().indexingOpView.operandIndex);
return tilableLoops.count(index);		if (consumerIndexingMap.getNumResults() !=
}))		prunedProducerIndexingMap.getNumResults())
tilableParallelLoops.insert(index);		return {};

		LLVM_DEBUG({
		llvm::dbgs() << "\t producerMap : ";
		producerIndexingMap.print(llvm::dbgs());
		llvm::dbgs() << " pruned : ";
		prunedProducerIndexingMap.print(llvm::dbgs());
		llvm::dbgs() << "\n";
		llvm::dbgs() << "\t consumerMap : ";
		consumerIndexingMap.print(llvm::dbgs());
		llvm::dbgs() << "\n";
		});

		AffineMap invProducerIndexMap =
		inversePermutation(prunedProducerIndexingMap);
		if (!invProducerIndexMap)
		return {};

		AffineMap consumerLoopToProducerLoop =
		invProducerIndexMap.compose(consumerIndexingMap);

		LLVM_DEBUG({
		llvm::dbgs() << "\t consumerLoopToProducerLoop : ";
		consumerLoopToProducerLoop.print(llvm::dbgs());
		});

		std::set<unsigned> candidates;
		for (AffineExpr expr : consumerLoopToProducerLoop.getResults()) {
		AffineDimExpr dimExpr = expr.dyn_cast<AffineDimExpr>();
		if (!dimExpr)
		continue;
		unsigned position = dimExpr.getPosition();
		if (fusableLoops.count(position))
		candidates.insert(position);
		}
		LLVM_DEBUG({
		llvm::dbgs() << "\t candidates :";
		for (unsigned i : candidates)
		llvm::dbgs() << " " << i;
		llvm::dbgs() << "\n";
		});
		if (candidates.empty())
		return {};
		std::swap(candidates, fusableLoops);
}		}
return tilableParallelLoops;
		return fusableLoops;
}		}

/// Find all dependences that are to be fusable.		/// Find all dependences that are to be fusable.
static Optional<		static FusableOpDependencesTy
SmallVector<LinalgDependenceGraph::LinalgDependenceGraphElem, 1>>
findAllFusableDependences(LinalgOp op,		findAllFusableDependences(LinalgOp op,
const LinalgDependenceGraph &dependenceGraph,		const LinalgDependenceGraph &dependenceGraph,
const LinalgFusionOptions &fusionOptions) {		const LinalgFusionOptions &fusionOptions) {
SmallVector<LinalgDependenceGraph::LinalgDependenceGraphElem, 1>		FusableOpDependencesTy fusableDependences;
		nicolasvasilacheUnsubmitted Done Reply Inline Actions The control flow under this loop is somewhat unclear to me. Some conditions skip the current operand, some conditions exit the functions and the last one breaks out of the loop. Can we restructure/fission this loop to first rule out the empty case ? Processing the data 2 or 3 times seems minor compared to the expected gain in readability. nicolasvasilache: The control flow under this loop is somewhat unclear to me. Some conditions skip the current…
		mravishankarAuthorUnsubmitted Done Reply Inline Actions Split the loop to first find the index of operand to fuse, and then do the fusion of those mravishankar: Split the loop to first find the index of operand to fuse, and then do the fusion of those
fusableDependences;		// TODO: Currently fusion would not be legal if the fusable dependence is to
for (auto operand : llvm::enumerate(op.getInputsAndOutputBuffers())) {		// the same producer but different indexing map in the consumer. Fix this, but
if (fusionOptions.indicesToFuse &&		// in the meanwhile disallow such a fusion.
!fusionOptions.indicesToFuse->count(operand.index()))		DenseMap<Operation *, AffineMap> fusedProducerIndexingMap;
continue;		for (auto operandIndex : fusionOptions.indicesToFuse) {
Optional<LinalgDependenceGraph::LinalgDependenceGraphElem>		auto fusableDependence =
fusableDependence =		findFusableProducer(op, operandIndex, dependenceGraph);
findFusableProducer(op, operand.index(), dependenceGraph);
if (!fusableDependence)		if (!fusableDependence)
continue;		return FusableOpDependencesTy{};
		LinalgOp producerOp = cast<LinalgOp>(fusableDependence->dependentOpView.op);
		// Do not fuse dependences that are to operations not in the same basic
		// block. This avoid moving fused operations across loops that might
		// themselves carry dependency making the fusion illegal.
		if (producerOp.getOperation()->getBlock() !=
		nicolasvasilacheUnsubmitted Not Done Reply Inline Actions expose + reuse "areOpsInSameBlock" ? nicolasvasilache: expose + reuse "areOpsInSameBlock" ?
		op.getOperation()->getBlock()) {
		op.emitRemark("unhandled fusion of ops in different basic blocks");
		return FusableOpDependencesTy{};
		}
		hanchungUnsubmitted Not Done Reply Inline Actions Do we want tabs `\t` for indents? hanchung: Do we want tabs `\t` for indents?
		mravishankarAuthorUnsubmitted Done Reply Inline Actions Maybe, but its a simple check here. Not sure where to put that function. mravishankar: Maybe, but its a simple check here. Not sure where to put that function.
// Make sure that the indexing map of the view used for fusion in the		// Make sure that the indexing map of the view used for fusion in the
// producer is a projected permutation.		// producer is a projected permutation.
LinalgOp producerOp = cast<LinalgOp>(fusableDependence->dependentOpView.op);		unsigned producerIdx = fusableDependence->dependentOpView.operandIndex;
Value producerView = fusableDependence->dependentOpView.view;		AffineMap producerMap = producerOp.getIndexingMap(producerIdx);
unsigned producerIdx =
producerOp.getIndexOfOutputBuffer(producerView).getValue();
AffineMap producerMap = producerOp.getOutputIndexingMap(producerIdx);
if (!producerMap.isProjectedPermutation()) {		if (!producerMap.isProjectedPermutation()) {
op.emitError("unhandled non permutation indexing map for fused view in "		op.emitRemark("unhandled non permutation indexing map for fused view in "
"producer for operand at index ")		"producer for operand at index ")
<< operand.index();		<< operandIndex;
return llvm::None;		return FusableOpDependencesTy{};
}		}
Value consumerView = fusableDependence->indexingView;
unsigned consumerIdx = op.getIndexOfShapedOperand(consumerView).getValue();		unsigned consumerIdx = fusableDependence->indexingOpView.operandIndex;
if (!op.getIndexingMap(consumerIdx).isProjectedPermutation()) {		AffineMap consumerMap = op.getIndexingMap(consumerIdx);
op.emitError(		if (!consumerMap.isProjectedPermutation()) {
		op.emitRemark(
"unhandled case where indexing map for fused view in the consumer is "		"unhandled case where indexing map for fused view in the consumer is "
"not a projected permuration while fusing at index ")		"not a projected permutation while fusing at index ")
		nicolasvasilacheUnsubmitted Done Reply Inline Actions typo nicolasvasilache: typo
<< operand.index();		<< operandIndex;
return llvm::None;		return FusableOpDependencesTy{};
		}

		// Check if the producer is already a fusion candidate. Cannot fuse this
		// dependence if it has a different indexing map when used in the consumer.
		if (fusedProducerIndexingMap.count(producerOp.getOperation()) &&
		fusedProducerIndexingMap[producerOp.getOperation()] != consumerMap) {
		op.emitRemark("unhandled fusion to the same producer but with different "
		"indexing maps");
		return FusableOpDependencesTy{};
}		}
fusableDependences.push_back(*fusableDependence);		fusedProducerIndexingMap[producerOp.getOperation()] = consumerMap;
if (!fusionOptions.indicesToFuse)
break;		fusableDependences[producerOp.getOperation()].push_back(*fusableDependence);
}		}
return fusableDependences;		return fusableDependences;
}		}

static bool isZero(Value v) {		static bool isZero(Value v) {
if (auto cst = v.getDefiningOp<ConstantIndexOp>())		if (auto cst = v.getDefiningOp<ConstantIndexOp>())
return cst.getValue() == 0;		return cst.getValue() == 0;
return false;		return false;
Show All 13 Lines	if (!tilingOptions.interchangeVector.empty()) {
return llvm::None;		return llvm::None;
}		}

OpBuilder::InsertionGuard g(rewriter);		OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(op);		rewriter.setInsertionPoint(op);
ScopedContext scope(rewriter, op.getLoc());		ScopedContext scope(rewriter, op.getLoc());

// Find all the producers.		// Find all the producers.
Optional<SmallVector<LinalgDependenceGraph::LinalgDependenceGraphElem, 1>>		FusableOpDependencesTy fusableDependences =
fusableDependencesOpt =
findAllFusableDependences(op, dependenceGraph, fusionOptions);		findAllFusableDependences(op, dependenceGraph, fusionOptions);
if (!fusableDependencesOpt)		if (fusableDependences.empty())
return llvm::None;		return llvm::None;
ArrayRef<LinalgDependenceGraph::LinalgDependenceGraphElem> fusableDependences(
*fusableDependencesOpt);

// Enforce the convention that "tiling by zero" skips tiling a particular		// Enforce the convention that "tiling by zero" skips tiling a particular
// dimension. This convention is significantly simpler to handle instead of		// dimension. This convention is significantly simpler to handle instead of
// adjusting affine maps to account for missing dimensions.		// adjusting affine maps to account for missing dimensions.
auto nLoops = op.getNumLoops();		auto nLoops = op.getNumLoops();
SmallVector<Value, 4> tileSizeVector =		SmallVector<Value, 4> tileSizeVector =
tilingOptions.tileSizeComputationFunction(rewriter, op);		tilingOptions.tileSizeComputationFunction(rewriter, op);
if (tileSizeVector.size() < nLoops) {		if (tileSizeVector.size() < nLoops) {
auto zero = std_constant_index(0);		auto zero = std_constant_index(0);
tileSizeVector.append(nLoops - tileSizeVector.size(), zero);		tileSizeVector.append(nLoops - tileSizeVector.size(), zero);
}		}

TiledAndFusedLinalgOps ret;		TiledAndFusedLinalgOps ret;

// Find the loops that can be tiled and fused.		// Find the loops that can be tiled and fused.
DenseSet<unsigned> tileFuseLoops =		std::set<unsigned> tileFuseLoops =
collectTileAndFuseLoops(op, fusableDependences);		collectTileAndFuseLoops(op, fusableDependences);

// If there are no fusable dependences or there are no tile+fusable loops,		// If there are no fusable dependences or there are no tile+fusable loops,
// just return.		// just return.
if (fusableDependences.empty() \|\| tileFuseLoops.empty()) {		if (tileFuseLoops.empty()) {
return llvm::None;		return llvm::None;
}		}
		hanchungUnsubmitted Not Done Reply Inline Actions nit: remove trivial braces It's good to fix to me because you touched this condition. But it's okay to leave it this way because it doesn't really relate to the change... hanchung: nit: remove trivial braces It's good to fix to me because you touched this condition. But it's…

// Get the tile sizes for the first and second tiling steps. For the first		// Get the tile sizes for the first and second tiling steps. For the first
// step the tile size are set to zero for the loops that arent		// step the tile size are set to zero for the loops that arent
// fused. Similarly for the second step, the tile sizes are set to zero for		// fused. Similarly for the second step, the tile sizes are set to zero for
// the loops that are fused. For example, if for the following input		// the loops that are fused. For example, if for the following input
//		//
// ```		// ```
// linalg.add ins(%a, %b) outs(%c)		// linalg.add ins(%a, %b) outs(%c)
Show All 24 Lines	Optional<TiledLinalgOp> firstTiledOp =
tileLinalgOp(rewriter, op, firstTilingOptions);		tileLinalgOp(rewriter, op, firstTilingOptions);
if (!firstTiledOp)		if (!firstTiledOp)
return llvm::None;		return llvm::None;
ret.op = firstTiledOp->op;		ret.op = firstTiledOp->op;
ret.fusedLoops.assign(firstTiledOp->loops.begin(), firstTiledOp->loops.end());		ret.fusedLoops.assign(firstTiledOp->loops.begin(), firstTiledOp->loops.end());

rewriter.setInsertionPoint(ret.op);		rewriter.setInsertionPoint(ret.op);
// Fuse the operands.		// Fuse the operands.
for (auto producer : enumerate(fusableDependences)) {		for (auto dependence : fusableDependences) {
LinalgOp producerOp = cast<LinalgOp>(producer.value().dependentOpView.op);		LinalgOp producerOp = cast<LinalgOp>(dependence.first);
unsigned producerIdx =		unsigned producerIdx =
producerOp.getIndexOfOutputBuffer(producer.value().dependentOpView.view)		dependence.second.front().dependentOpView.operandIndex;
.getValue();
unsigned consumerIdx =		unsigned consumerIdx =
op.getIndexOfShapedOperand(producer.value().indexingView).getValue();		dependence.second.front().indexingOpView.operandIndex;
LinalgOp fusedOp =		LinalgOp fusedOp = fuse(rewriter, producerOp,
		nicolasvasilacheUnsubmitted Not Done Reply Inline Actions Please add some named accessors to make this nicer to read. nicolasvasilache: Please add some named accessors to make this nicer to read.
		mravishankarAuthorUnsubmitted Done Reply Inline Actions I am not sure I understand what you mean by named accessors mravishankar: I am not sure I understand what you mean by named accessors
fuse(rewriter, producerOp, producerIdx, ret.op, consumerIdx);		producerOp.getOutputIndex(producerIdx).getValue(),
		ret.op, consumerIdx);
		nicolasvasilacheUnsubmitted Done Reply Inline Actions this seems to leak internal linalgop operand ordering . Can we restructure and use input/output idx to hide such logic below the LinalgStructuredOpsInterface ? nicolasvasilache: this seems to leak internal linalgop operand ordering . Can we restructure and use input/output…
ret.fusedProducers.push_back(fusedOp);		ret.fusedProducers.push_back(fusedOp);
ret.originalProducers.push_back(producerOp);		ret.originalProducers.push_back(producerOp);
}		}

if (!llvm::all_of(tileSizes, isZero)) {		if (!llvm::all_of(tileSizes, isZero)) {
// Tile the remaining loops of the root operation.		// Tile the remaining loops of the root operation.
LinalgTilingOptions secondTilingOptions = tilingOptions;		LinalgTilingOptions secondTilingOptions = tilingOptions;
// The distribution is done only for the tile+fused loops.		// The distribution is done only for the tile+fused loops.
Show All 31 Lines

mlir/lib/IR/AffineMap.cpp

//===- AffineMap.cpp - MLIR Affine Map Classes ----------------------------===//		//===- AffineMap.cpp - MLIR Affine Map Classes ----------------------------===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "mlir/IR/AffineMap.h"		#include "mlir/IR/AffineMap.h"
#include "AffineMapDetail.h"		#include "AffineMapDetail.h"
#include "mlir/IR/Attributes.h"		#include "mlir/IR/Attributes.h"
#include "mlir/IR/StandardTypes.h"		#include "mlir/IR/StandardTypes.h"
#include "mlir/Support/LogicalResult.h"		#include "mlir/Support/LogicalResult.h"
#include "mlir/Support/MathExtras.h"		#include "mlir/Support/MathExtras.h"
		#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"		#include "llvm/ADT/StringRef.h"
#include "llvm/Support/raw_ostream.h"		#include "llvm/Support/raw_ostream.h"

using namespace mlir;		using namespace mlir;

namespace {		namespace {

// AffineExprConstantFolder evaluates an affine expression using constant		// AffineExprConstantFolder evaluates an affine expression using constant
▲ Show 20 Lines • Show All 422 Lines • ▼ Show 20 Lines	for (auto m : maps) {

numSymbols += m.getNumSymbols();		numSymbols += m.getNumSymbols();
numDims = std::max(m.getNumDims(), numDims);		numDims = std::max(m.getNumDims(), numDims);
}		}
return AffineMap::get(numDims, numSymbols, results,		return AffineMap::get(numDims, numSymbols, results,
maps.front().getContext());		maps.front().getContext());
}		}

		AffineMap mlir::getProjectedMap(AffineMap map,
		ArrayRef<unsigned> projectedDimensions) {
		DenseSet<unsigned> projectedDims(projectedDimensions.begin(),
		projectedDimensions.end());
		MLIRContext *context = map.getContext();
		SmallVector<AffineExpr, 4> resultExprs;
		for (auto dim : enumerate(llvm::seq<unsigned>(0, map.getNumDims()))) {
		if (!projectedDims.count(dim.value()))
		resultExprs.push_back(getAffineDimExpr(dim.index(), context));
		else
		resultExprs.push_back(getAffineConstantExpr(0, context));
		}
		return map.compose(AffineMap::get(
		map.getNumDims() - projectedDimensions.size(), 0, resultExprs, context));
		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// MutableAffineMap.		// MutableAffineMap.
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

MutableAffineMap::MutableAffineMap(AffineMap map)		MutableAffineMap::MutableAffineMap(AffineMap map)
: numDims(map.getNumDims()), numSymbols(map.getNumSymbols()),		: numDims(map.getNumDims()), numSymbols(map.getNumSymbols()),
context(map.getContext()) {		context(map.getContext()) {
for (auto result : map.getResults())		for (auto result : map.getResults())
Show All 34 Lines

mlir/test/Dialect/Linalg/fusion-pattern.mlir

	// RUN: mlir-opt %s -test-linalg-fusion-transform-patterns -canonicalize -cse -split-input-file \| FileCheck %s			// RUN: mlir-opt %s -test-linalg-fusion-transform-patterns -canonicalize -cse -split-input-file -verify-diagnostics \| FileCheck %s

	module {			module {
	func @basic_fusion(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>,			func @basic_fusion(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>,
	%arg2: memref<?x?xf32>) {			%arg2: memref<?x?xf32>) {
	%cst = constant 0.000000e+00 : f32			%cst = constant 0.000000e+00 : f32
	linalg.fill(%arg2, %cst) : memref<?x?xf32>, f32			linalg.fill(%arg2, %cst) : memref<?x?xf32>, f32
	linalg.matmul {__internal_linalg_transform__ = "basic_fusion"}			linalg.matmul {__internal_linalg_transform__ = "basic_fusion"}
	ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>)			ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>)
	▲ Show 20 Lines • Show All 280 Lines • ▼ Show 20 Lines
	// CHECK-SAME: ins(%[[SV6]], %[[SV7]]			// CHECK-SAME: ins(%[[SV6]], %[[SV7]]
	// CHECK-SAME: : memref<?x?xf32, #[[MAP1]]>, memref<?x?xf32, #[[MAP1]]>)			// CHECK-SAME: : memref<?x?xf32, #[[MAP1]]>, memref<?x?xf32, #[[MAP1]]>)
	// CHECK-SAME: outs(%[[SV8]] : memref<?x?xf32, #[[MAP1]]>)			// CHECK-SAME: outs(%[[SV8]] : memref<?x?xf32, #[[MAP1]]>)
	// CHECK: }			// CHECK: }
	// CHECK: }			// CHECK: }
	// CHECK: }			// CHECK: }
	// CHECK: linalg.matmul			// CHECK: linalg.matmul
	// CHECK-SAME: __internal_linalg_transform__ = "after_lhs_fusion_original"			// CHECK-SAME: __internal_linalg_transform__ = "after_lhs_fusion_original"

				// -----

				module {
				func @matmul_plus_matmul(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>,
				%arg2: memref<?x?xf32>) {
				%c0 = constant 0 : index
				%c1 = constant 1 : index
				%0 = dim %arg2, %c0 : memref<?x?xf32>
				%1 = dim %arg2, %c1 : memref<?x?xf32>
				%2 = alloc(%0, %1) : memref<?x?xf32>
				linalg.matmul ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>)
				outs(%2 : memref<?x?xf32>)
				linalg.generic
				{indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
				affine_map<(d0, d1) -> (d0, d1)>,
				affine_map<(d0, d1) -> (d0, d1)>],
				iterator_types = ["parallel", "parallel"],
				__internal_linalg_transform__ = "transpose_fusion"}
				ins(%2, %2 : memref<?x?xf32>, memref<?x?xf32>)
				outs(%arg2 : memref<?x?xf32>) {
				^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32) :
				%3 = addf %arg3, %arg4 : f32
				linalg.yield %3 : f32
				}
				return
				}
				}
				// CHECK: func @matmul_plus_matmul
				// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<?x?xf32>
				// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: memref<?x?xf32>
				// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: memref<?x?xf32>
				// CHECK: %[[T2:.+]] = alloc(%{{.}}, %{{.}}) : memref<?x?xf32>
				// CHECK: linalg.matmul
				// CHECK-SAME: after_transpose_fusion_original
				// CHECK: scf.parallel (%[[ARG3:[a-zA-Z0-9_]+]], %[[ARG4:.[a-zA-Z0-9_]+]])
				// CHECK: %[[T5:.+]] = subview %[[T2]][%[[ARG3]], %[[ARG4]]]
				// CHECK: %[[T6:.+]] = subview %[[ARG2]][%[[ARG3]], %[[ARG4]]]
				// CHECK: %[[T8:.+]] = subview %[[ARG0]][%[[ARG3]], 0]
				// CHECK: %[[T9:.+]] = subview %[[ARG1]][0, %[[ARG4]]]
				// CHECK: linalg.matmul
				// CHECK-SAME: after_transpose_fusion_producer
				// CHECK-SAME: ins(%[[T8]], %[[T9]]
				// CHECK-SAME: outs(%[[T5]]
				// CHECK-NOT: linalg.matmul
				// CHECK: linalg.generic
				// CHECK-SAME: ins(%[[T5]], %[[T5]]
				// CHECK-SAME: outs(%[[T6]]
				// CHECK-SAME: after_transpose_fusion

				// -----

				module {
				func @matmul_plus_transpose_matmul(%arg0: memref<?x?xf32>,
				%arg1: memref<?x?xf32>,
				%arg2: memref<?x?xf32>) {
				%c0 = constant 0 : index
				%c1 = constant 1 : index
				%0 = dim %arg2, %c0 : memref<?x?xf32>
				%1 = dim %arg2, %c1 : memref<?x?xf32>
				%2 = alloc(%0, %1) : memref<?x?xf32>
				linalg.matmul ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>)
				outs(%2 : memref<?x?xf32>)
				// expected-remark @+1 {{unhandled fusion to the same producer but with different indexing maps}}
				linalg.generic
				{indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
				affine_map<(d0, d1) -> (d1, d0)>,
				affine_map<(d0, d1) -> (d0, d1)>],
				iterator_types = ["parallel", "parallel"],
				__internal_linalg_transform__ = "transpose_fusion"}
				ins(%2, %2 : memref<?x?xf32>, memref<?x?xf32>)
				outs(%arg2 : memref<?x?xf32>) {
				^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32) :
				%3 = addf %arg3, %arg4 : f32
				linalg.yield %3 : f32
				}
				return
				}
				}

				// -----

				#map0 = affine_map<(d0)[s0] -> (32, -d0 + s0)>
				#map1 = affine_map<(d0)[s0] -> (64, -d0 + s0)>
				#map2 = affine_map<(d0)[s0] -> (16, -d0 + s0)>
				#map3 = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
				module {
				func @basic_no_fusion(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>,
				%arg2: memref<?x?xf32>) {
				%c0 = constant 0 : index
				%c1 = constant 1 : index
				%c2 = constant 2 : index
				%c32 = constant 32 : index
				%c64 = constant 64 : index
				%c16 = constant 16 : index
				%cst = constant 0.000000e+00 : f32
				linalg.fill(%arg2, %cst) : memref<?x?xf32>, f32
				%0 = dim %arg0, %c0 : memref<?x?xf32>
				%1 = dim %arg1, %c1 : memref<?x?xf32>
				%2 = dim %arg0, %c1 : memref<?x?xf32>
				scf.parallel (%arg3, %arg4) = (%c0, %c0) to (%0, %1) step (%c32, %c64) {
				scf.for %arg5 = %c0 to %2 step %c16 {
				%3 = affine.min #map0(%arg3)[%0]
				%4 = affine.min #map1(%arg4)[%1]
				%5 = affine.min #map2(%arg5)[%2]
				%6 = subview %arg0[%arg3, %arg5] [%3, %5] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #map3>
				%7 = subview %arg1[%arg5, %arg4] [%5, %4] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #map3>
				%8 = subview %arg2[%arg3, %arg4] [%3, %4] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #map3>
				// expected-remark @+1 {{unhandled fusion of ops in different basic blocks}}
				linalg.matmul {__internal_linalg_transform__ = "basic_fusion"}
				ins(%6, %7 : memref<?x?xf32, #map3>, memref<?x?xf32, #map3>)
				outs(%8 : memref<?x?xf32, #map3>)
				}
				scf.yield
				}
				return
				}
				}

mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp

Show All 37 Lines
static void fillFusionPatterns(MLIRContext *context,		static void fillFusionPatterns(MLIRContext *context,
const LinalgDependenceGraph &dependenceGraph,		const LinalgDependenceGraph &dependenceGraph,
OwningRewritePatternList &patterns) {		OwningRewritePatternList &patterns) {
patterns.insert<LinalgTileAndFusePattern<MatmulOp>>(		patterns.insert<LinalgTileAndFusePattern<MatmulOp>>(
context, dependenceGraph,		context, dependenceGraph,
LinalgTilingOptions()		LinalgTilingOptions()
.setTileSizes({32, 64, 16})		.setTileSizes({32, 64, 16})
.setLoopType(LinalgTilingLoopType::ParallelLoops),		.setLoopType(LinalgTilingLoopType::ParallelLoops),
LinalgFusionOptions(),		LinalgFusionOptions().setIndicesToFuse({2}),
LinalgMarker(Identifier::get("basic_fusion", context),		LinalgMarker(Identifier::get("basic_fusion", context),
Identifier::get("after_basic_fusion", context)),		Identifier::get("after_basic_fusion", context)),
LinalgMarker(ArrayRef<Identifier>(),		LinalgMarker(ArrayRef<Identifier>(),
Identifier::get("after_basic_fusion_producer", context)),		Identifier::get("after_basic_fusion_producer", context)),
LinalgMarker(ArrayRef<Identifier>(),		LinalgMarker(ArrayRef<Identifier>(),
Identifier::get("after_basic_fusion_original", context)));		Identifier::get("after_basic_fusion_original", context)));

patterns.insert<LinalgTileAndFusePattern<MatmulOp>>(		patterns.insert<LinalgTileAndFusePattern<MatmulOp>>(
Show All 31 Lines	patterns.insert<LinalgTileAndFusePattern<MatmulOp>>(
LinalgMarker(Identifier::get("two_operand_fusion", context),		LinalgMarker(Identifier::get("two_operand_fusion", context),
Identifier::get("after_two_operand_fusion", context)),		Identifier::get("after_two_operand_fusion", context)),
LinalgMarker(		LinalgMarker(
ArrayRef<Identifier>(),		ArrayRef<Identifier>(),
Identifier::get("after_two_operand_fusion_producer", context)),		Identifier::get("after_two_operand_fusion_producer", context)),
LinalgMarker(		LinalgMarker(
ArrayRef<Identifier>(),		ArrayRef<Identifier>(),
Identifier::get("after_two_operand_fusion_original", context)));		Identifier::get("after_two_operand_fusion_original", context)));

		patterns.insert<LinalgTileAndFusePattern<GenericOp>>(
		context, dependenceGraph,
		LinalgTilingOptions().setTileSizes({32, 64}).setLoopType(
		LinalgTilingLoopType::ParallelLoops),
		LinalgFusionOptions().setIndicesToFuse({0, 1}),
		LinalgMarker(Identifier::get("transpose_fusion", context),
		Identifier::get("after_transpose_fusion", context)),
		LinalgMarker(ArrayRef<Identifier>(),
		Identifier::get("after_transpose_fusion_producer", context)),
		LinalgMarker(
		ArrayRef<Identifier>(),
		Identifier::get("after_transpose_fusion_original", context)));
}		}

static void applyFusionPatterns(MLIRContext *context, FuncOp funcOp) {		static void applyFusionPatterns(MLIRContext *context, FuncOp funcOp) {
OwningRewritePatternList fusionPatterns;		OwningRewritePatternList fusionPatterns;
Aliases alias;		Aliases alias;
LinalgDependenceGraph dependenceGraph =		LinalgDependenceGraph dependenceGraph =
LinalgDependenceGraph::buildDependenceGraph(alias, funcOp);		LinalgDependenceGraph::buildDependenceGraph(alias, funcOp);
fillFusionPatterns(context, dependenceGraph, fusionPatterns);		fillFusionPatterns(context, dependenceGraph, fusionPatterns);
▲ Show 20 Lines • Show All 102 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][Linalg] Improve the logic to perform tile and fuse with better dependence tracking.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 304735

mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h

mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

mlir/include/mlir/IR/AffineMap.h

mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp

mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp

mlir/lib/IR/AffineMap.cpp

mlir/test/Dialect/Linalg/fusion-pattern.mlir

mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][Linalg] Improve the logic to perform tile and fuse with better dependence tracking.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 304735

mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h

mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

mlir/include/mlir/IR/AffineMap.h

mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp

mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp

mlir/lib/IR/AffineMap.cpp

mlir/test/Dialect/Linalg/fusion-pattern.mlir

mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp

[mlir][Linalg] Improve the logic to perform tile and fuse with better dependence tracking.
ClosedPublic