Diff 261296

mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h

Show First 20 Lines • Show All 345 Lines • ▼ Show 20 Lines	public:
}		}
};		};

/// This class provides the API for named Linalg StructuredOps.		/// This class provides the API for named Linalg StructuredOps.
template <typename ConcreteType>		template <typename ConcreteType>
class NamedStructuredOpTraits		class NamedStructuredOpTraits
: public OpTrait::TraitBase<ConcreteType, NamedStructuredOpTraits> {		: public OpTrait::TraitBase<ConcreteType, NamedStructuredOpTraits> {
public:		public:
llvm::Optional<SmallVector<StringRef, 8>> referenceIterators();		static SmallVector<StringRef, 8> referenceIterators(TypeRange inputTypes,
llvm::Optional<SmallVector<AffineMap, 8>> referenceIndexingMaps();		TypeRange outputTypes);
std::function<void(OpBuilder &, Location, ArrayRef<Value>)>
emitScalarImplementation();		static SmallVector<AffineMap, 8> referenceIndexingMaps(TypeRange inputTypes,
		TypeRange outputTypes);
};		};

} // namespace linalg		} // namespace linalg
} // namespace OpTrait		} // namespace OpTrait
} // namespace mlir		} // namespace mlir

#endif // MLIR_DIALECT_LINALG_LINALGTRAITS_H_		#endif // MLIR_DIALECT_LINALG_LINALGTRAITS_H_

mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp

Show All 27 Lines
#include "llvm/Support/MathExtras.h"		#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"		#include "llvm/Support/raw_ostream.h"

using namespace mlir;		using namespace mlir;
using namespace mlir::linalg;		using namespace mlir::linalg;

/// Forward declarations.		/// Forward declarations.
template <typename NamedStructuredOpType>		template <typename NamedStructuredOpType>
static void buildNamedStructuredOpRegion(Builder &builder,		static void buildNamedStructuredOpRegionAndAttributes(
OperationState &result,		Builder &builder, OperationState &result, TypeRange operandTypes,
TypeRange operandTypes,
TypeRange tensorResultTypes);		TypeRange tensorResultTypes);
template <typename NamedStructuredOpType>		template <typename NamedStructuredOpType>
static void printNamedStructuredOp(OpAsmPrinter &p, NamedStructuredOpType op);		static void printNamedStructuredOp(OpAsmPrinter &p, NamedStructuredOpType op);
template <typename NamedStructuredOpType>		template <typename NamedStructuredOpType>
static ParseResult parseNamedStructuredOp(OpAsmParser &parser,		static ParseResult parseNamedStructuredOp(OpAsmParser &parser,
OperationState &result);		OperationState &result);
template <typename NamedStructuredOpType>		template <typename NamedStructuredOpType>
static LogicalResult verifyNamedStructuredOp(NamedStructuredOpType op);		static LogicalResult verifyNamedStructuredOp(NamedStructuredOpType op);

▲ Show 20 Lines • Show All 1,032 Lines • ▼ Show 20 Lines	OpFoldResult TransposeOp::fold(ArrayRef<Attribute>) {
return {};		return {};
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Auto-generated Linalg named ops.		// Auto-generated Linalg named ops.
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

template <typename NamedStructuredOpType>		template <typename NamedStructuredOpType>
void buildNamedStructuredOpRegion(Builder &builder, OperationState &result,		void buildNamedStructuredOpRegionAndAttributes(Builder &builder,
		OperationState &result,
TypeRange operandTypes,		TypeRange operandTypes,
TypeRange tensorResultTypes) {		TypeRange tensorResultTypes) {
Region &region = *result.addRegion();		Region &region = *result.addRegion();
Block *body = new Block();		Block *body = new Block();
// TODO: atm all operands go through getElementTypeOrSelf,		// TODO: atm all operands go through getElementTypeOrSelf,
// reconsider when we have evidence we need to.		// reconsider when we have evidence we need to.
for (auto t : operandTypes)		for (auto t : operandTypes)
body->addArgument(getElementTypeOrSelf(t));		body->addArgument(getElementTypeOrSelf(t));
for (auto t : tensorResultTypes)		for (auto t : tensorResultTypes)
body->addArgument(getElementTypeOrSelf(t));		body->addArgument(getElementTypeOrSelf(t));
region.push_back(body);		region.push_back(body);

OpBuilder opBuilder(builder.getContext());		OpBuilder opBuilder(builder.getContext());
opBuilder.setInsertionPointToStart(&region.front());		opBuilder.setInsertionPointToStart(&region.front());
mlir::edsc::ScopedContext scope(opBuilder, builder.getUnknownLoc());		mlir::edsc::ScopedContext scope(opBuilder, builder.getUnknownLoc());
NamedStructuredOpType::regionBuilder(*body);		NamedStructuredOpType::regionBuilder(*body);

		auto indexingMaps = builder.getAffineMapArrayAttr(
		NamedStructuredOpType::referenceIndexingMaps(operandTypes,
		tensorResultTypes));
		result.addAttribute(getIndexingMapsAttrName(), indexingMaps);

		auto iterators =
		builder.getStrArrayAttr(NamedStructuredOpType::referenceIterators(
		operandTypes, tensorResultTypes));
		result.addAttribute(getIteratorTypesAttrName(), iterators);
}		}

template <typename NamedStructuredOpType>		template <typename NamedStructuredOpType>
static void printNamedStructuredOp(OpAsmPrinter &p, NamedStructuredOpType op) {		static void printNamedStructuredOp(OpAsmPrinter &p, NamedStructuredOpType op) {
		std::array<StringRef, 2> silentAttrNames{getIndexingMapsAttrName(),
		rriddleUnsubmitted Done Reply Inline Actions Can we use std::array here instead? rriddle: Can we use std::array here instead?
		getIteratorTypesAttrName()};
p << op.getOperationName() << ' ';		p << op.getOperationName() << ' ';
p.printOptionalAttrDict(op.getAttrs());		p.printOptionalAttrDict(op.getAttrs(), silentAttrNames);
p << ' ' << op.getOperands();		p << ' ' << op.getOperands();
p << ": (" << op.getOperandTypes() << ")";		p << ": (" << op.getOperandTypes() << ")";
auto outputTensorTypes = op.getResultTypes();		auto outputTensorTypes = op.getResultTypes();
if (!outputTensorTypes.empty())		if (!outputTensorTypes.empty())
p << " -> (" << outputTensorTypes << ")";		p << " -> (" << outputTensorTypes << ")";
}		}

template <typename NamedStructuredOpType>		template <typename NamedStructuredOpType>
Show All 15 Lines	static ParseResult parseNamedStructuredOp(OpAsmParser &parser,
// outputs are specified in the result type.		// outputs are specified in the result type.
SmallVector<Type, 8> tensorResultTypes;		SmallVector<Type, 8> tensorResultTypes;
if (parser.parseOptionalArrowTypeList(tensorResultTypes))		if (parser.parseOptionalArrowTypeList(tensorResultTypes))
return failure();		return failure();

if (!tensorResultTypes.empty())		if (!tensorResultTypes.empty())
result.addTypes(tensorResultTypes);		result.addTypes(tensorResultTypes);

buildNamedStructuredOpRegion<NamedStructuredOpType>(		buildNamedStructuredOpRegionAndAttributes<NamedStructuredOpType>(
parser.getBuilder(), result, operandTypes, tensorResultTypes);		parser.getBuilder(), result, operandTypes, tensorResultTypes);

return parser.resolveOperands(operandsInfo, operandTypes,		return parser.resolveOperands(operandsInfo, operandTypes,
parser.getCurrentLocation(), result.operands);		parser.getCurrentLocation(), result.operands);
}		}

template <typename NamedStructuredOpType>		template <typename NamedStructuredOpType>
static LogicalResult verifyNamedStructuredOp(NamedStructuredOpType op) {		static LogicalResult verifyNamedStructuredOp(NamedStructuredOpType op) {
Show All 10 Lines

mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp

Show First 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	SmallVector<Value, 4> emitLoopRanges(OpBuilder &b, Location loc, AffineMap map,
SmallVector<Value, 4> res;		SmallVector<Value, 4> res;
for (unsigned idx = 0, e = map.getNumResults(); idx < e; ++idx) {		for (unsigned idx = 0, e = map.getNumResults(); idx < e; ++idx) {
res.push_back(		res.push_back(
linalg_range(std_constant_index(0), sizes[idx], std_constant_index(1)));		linalg_range(std_constant_index(0), sizes[idx], std_constant_index(1)));
}		}
return res;		return res;
}		}

template <typename OpType>		template <typename IndexedValueType, typename OpType>
static void		static void inlineRegionAndEmitStore(OpType op, ArrayRef<Value> indexedValues,
inlineRegionAndEmitStdStore(OpType op, ArrayRef<Value> indexedValues,
ArrayRef<SmallVector<Value, 8>> indexing,		ArrayRef<SmallVector<Value, 8>> indexing,
ArrayRef<Value> outputBuffers) {		ArrayRef<Value> outputBuffers) {
auto &b = ScopedContext::getBuilder();		auto &b = ScopedContext::getBuilder();
auto &block = op.region().front();		auto &block = op.region().front();
BlockAndValueMapping map;		BlockAndValueMapping map;
map.map(block.getArguments(), indexedValues);		map.map(block.getArguments(), indexedValues);
for (auto &op : block.without_terminator()) {		for (auto &op : block.without_terminator()) {
assert(op.getNumRegions() == 0 && "expected a non-nested region");		assert(op.getNumRegions() == 0 && "expected a non-nested region");
auto *newOp = b.clone(op, map);		auto *newOp = b.clone(op, map);
map.map(op.getResults(), newOp->getResults());		map.map(op.getResults(), newOp->getResults());
}		}

Operation &terminator = block.back();		Operation &terminator = block.back();
assert(isa<YieldOp>(terminator) &&		assert(isa<YieldOp>(terminator) &&
"expected an yield op in the end of the region");		"expected a yield op in the end of the region");
for (unsigned i = 0, e = terminator.getNumOperands(); i < e; ++i) {		for (unsigned i = 0, e = terminator.getNumOperands(); i < e; ++i) {
std_store(map.lookupOrDefault(terminator.getOperand(i)), outputBuffers[i],		IndexedValueType O(outputBuffers[i]);
ArrayRef<Value>{indexing[i].begin(), indexing[i].end()});		O(indexing[i]) = map.lookupOrDefault(terminator.getOperand(i));
}		}
}		}

// Returns a pair that contains input indices and output indices of a		// Returns a pair that contains input indices and output indices of a
// SingleInputPoolingOp `op`.		// SingleInputPoolingOp `op`.
struct InputAndOutputIndices {		struct InputAndOutputIndices {
SmallVector<Value, 8> inputs;		SmallVector<Value, 8> inputs;
SmallVector<Value, 8> outputs;		SmallVector<Value, 8> outputs;
};		};
template <typename SingleInputPoolingOp>		template <typename SingleInputPoolingOp>
static InputAndOutputIndices getInputAndOutputIndices(ArrayRef<Value> allIvs,		static InputAndOutputIndices getInputAndOutputIndices(ArrayRef<Value> allIvs,
SingleInputPoolingOp op) {		SingleInputPoolingOp op) {
auto &b = ScopedContext::getBuilder();		auto &b = ScopedContext::getBuilder();
auto loc = ScopedContext::getLocation();		auto loc = ScopedContext::getLocation();
auto mapsRange = op.indexing_maps().template getAsRange<AffineMapAttr>();		auto mapsRange = op.indexing_maps().template getAsRange<AffineMapAttr>();
auto maps = llvm::to_vector<8>(		auto maps = llvm::to_vector<8>(
llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));		llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
return InputAndOutputIndices{		return InputAndOutputIndices{
makeCanonicalAffineApplies(b, loc, maps[0], allIvs),		makeCanonicalAffineApplies(b, loc, maps[0], allIvs),
makeCanonicalAffineApplies(b, loc, maps[2], allIvs)};		makeCanonicalAffineApplies(b, loc, maps[2], allIvs)};
}		}

namespace {		namespace {

// Generic loop emitter, to be specialized on an op-per op basis.		/// Emits the MLIR for the scalar part of the generic op by:
// TODO: Hook up to named ops interface and, later, retire when all named ops		/// 1. Emitting load ops for each input and output view in order. This is
		ftynseUnsubmitted Done Reply Inline Actions Does "1." actually emit stores? ftynse: Does "1." actually emit stores?
// are auto-generated.		/// achieved by applying the appropriate input or output map to the
		mravishankarUnsubmitted Done Reply Inline Actions This is not necessary for this CL, but you only need to emit loads for output views if there is a use of the corresponding arguments. Something I have been wanting to address for a while now. mravishankar: This is not necessary for this CL, but you only need to emit loads for output views if there is…
		/// enclosing induction variables.
		/// 2. Emitting a call to `op.fun()` that takes as arguments the scalars
		/// from point 1. above.
		/// 3. Emitting store ops to store the results of 2. to the output
		/// views.
		///
		ftynseUnsubmitted Done Reply Inline Actions Nit: `//` -> `///` ftynse: Nit: `//` -> `///`
		/// An example output may resemble:
		///
		/// ```
		/// loop.for %i = %c0 to %0 step %c1 {
		/// loop.for %j = %c0 to %1 step %c1 {
		/// loop.for %k = %c0 to %4 step %c1 {
		/// %11 = load %arg0[%i, %j] :
		/// memref<?x?xf32, stride_specification>
		/// %12 = load %arg1[%i, %j, %k] :
		/// memref<?x?x?xf32, stride_specification>
		/// %13 = load %arg2[%i, %k, %j] :
		/// memref<?x?x?xf32, stride_specification>
		/// %14:2 = call @foo(%11, %12, %13) : (f32, f32, f32) -> (f32, f32)
		/// store %14#0, %arg1[%i, %j, %k] :
		/// memref<?x?x?Xf32, stride_specification>
		/// store %14#1, %arg2[%i, %k, %j] :
		/// memref<?x?x?Xf32, stride_specification>
		/// }
		/// }
		/// }
		/// ```
		rriddleUnsubmitted Done Reply Inline Actions nit: Please use /// for comments. rriddle: nit: Please use /// for comments.
template <typename IndexedValueType, typename LinalgOpType>		template <typename IndexedValueType, typename LinalgOpType>
class LinalgScopedEmitter {		class LinalgScopedEmitter {
public:		public:
static void emitScalarImplementation(ArrayRef<Value> allIvs,		static void emitScalarImplementation(ArrayRef<Value> allIvs,
LinalgOpType linalgOp) {		LinalgOpType linalgOp) {
assert(linalgOp.hasBufferSemantics() &&		assert(linalgOp.hasBufferSemantics() &&
"expected linalg op with buffer semantics");		"expected linalg op with buffer semantics");
llvm_unreachable("NYI");		auto b = ScopedContext::getBuilder();
linalgOp.emitScalarImplementation()(ScopedContext::getBuilder(),		auto loc = ScopedContext::getLocation();
ScopedContext::getLocation(), allIvs);		unsigned nInputs = linalgOp.getNumInputs();
		unsigned nOutputs = linalgOp.getNumOutputs();
		SmallVector<Value, 4> indexedValues;
		indexedValues.reserve(nInputs + nOutputs);

		// TODO(mravishankar): Avoid the loads if the corresponding argument of the
		// region has no uses.
		// 1.a. Emit load from input views.
		for (unsigned i = 0; i < nInputs; ++i) {
		auto indexing = makeCanonicalAffineApplies(
		b, loc, linalgOp.getInputIndexingMap(i), allIvs);
		// Passing through IndexedValueType emits the proper load operation.
		indexedValues.push_back(IndexedValueType(linalgOp.getInput(i))(indexing));
		}
		// 1.b. Emit load from output views.
		for (unsigned i = 0; i < nOutputs; ++i) {
		auto indexing = makeCanonicalAffineApplies(
		b, loc, linalgOp.getOutputIndexingMap(i), allIvs);
		// Passing through IndexedValueType emits the proper load operation.
		indexedValues.push_back(
		IndexedValueType(linalgOp.getOutputBuffer(i))(indexing));
		}
		rriddleUnsubmitted Done Reply Inline Actions Depends on what you need, but you could use mlir::inlineRegion. You just need to provide an InlinerInterface. rriddle: Depends on what you need, but you could use mlir::inlineRegion. You just need to provide an…
		nicolasvasilacheAuthorUnsubmitted Done Reply Inline Actions Ack, keeping that for a global change in a separate revision. Thanks! nicolasvasilache: Ack, keeping that for a global change in a separate revision. Thanks!

		// TODO(ntv): When a region inliner exists, use it.
		// 2. Inline region, currently only works for a single basic block.
		// 3. Emit store.
		SmallVector<SmallVector<Value, 8>, 8> indexing;
		SmallVector<Value, 8> outputBuffers;
		for (unsigned i = 0; i < nOutputs; ++i) {
		indexing.push_back(makeCanonicalAffineApplies(
		b, loc, linalgOp.getOutputIndexingMap(i), allIvs));
		outputBuffers.push_back(linalgOp.getOutputBuffer(i));
		}
		inlineRegionAndEmitStore<IndexedValueType>(linalgOp, indexedValues,
		indexing, outputBuffers);
}		}
};		};

template <typename IndexedValueType>		template <typename IndexedValueType>
class LinalgScopedEmitter<IndexedValueType, CopyOp> {		class LinalgScopedEmitter<IndexedValueType, CopyOp> {
public:		public:
static void emitScalarImplementation(ArrayRef<Value> allIvs, CopyOp copyOp) {		static void emitScalarImplementation(ArrayRef<Value> allIvs, CopyOp copyOp) {
assert(copyOp.hasBufferSemantics() &&		assert(copyOp.hasBufferSemantics() &&
▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines	public:
}		}
};		};

template <typename IndexedValueType>		template <typename IndexedValueType>
class LinalgScopedEmitter<IndexedValueType, ConvOp> {		class LinalgScopedEmitter<IndexedValueType, ConvOp> {
public:		public:
/// Returns the input value of convOp. If the indices in `imIdx` is out of		/// Returns the input value of convOp. If the indices in `imIdx` is out of
/// boundary, returns 0 instead.		/// boundary, returns 0 instead.
static Value getConvOpInput(ConvOp convOp, IndexedValueType im,		static Value getConvOpInput(ConvOp convOp, StdIndexedValue im,
MutableArrayRef<Value> imIdx) {		MutableArrayRef<Value> imIdx) {
// TODO(ntv): add a level of indirection to linalg.generic.		// TODO(ntv): add a level of indirection to linalg.generic.
if (!convOp.padding())		if (!convOp.padding())
return im(imIdx);		return im(imIdx);

auto *context = ScopedContext::getContext();		auto *context = ScopedContext::getContext();
Value zeroIndex = std_constant_index(0);		Value zeroIndex = std_constant_index(0);
SmallVector<Value, 8> conds;		SmallVector<Value, 8> conds;
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines	static void emitScalarImplementation(ArrayRef<Value> allIvs, ConvOp convOp) {
auto maps = llvm::to_vector<8>(llvm::map_range(		auto maps = llvm::to_vector<8>(llvm::map_range(
mapsRange, [](AffineMapAttr a) { return a.getValue(); }));		mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
SmallVector<Value, 8> fIdx(		SmallVector<Value, 8> fIdx(
makeCanonicalAffineApplies(b, loc, maps[0], allIvs));		makeCanonicalAffineApplies(b, loc, maps[0], allIvs));
SmallVector<Value, 8> imIdx(		SmallVector<Value, 8> imIdx(
makeCanonicalAffineApplies(b, loc, maps[1], allIvs));		makeCanonicalAffineApplies(b, loc, maps[1], allIvs));
SmallVector<Value, 8> oIdx(		SmallVector<Value, 8> oIdx(
makeCanonicalAffineApplies(b, loc, maps[2], allIvs));		makeCanonicalAffineApplies(b, loc, maps[2], allIvs));
IndexedValueType F(convOp.filter()), I(convOp.input()), O(convOp.output());
		// Padded conv involves an affine.max in the memory access which is not
		// allowed by affine.load. Override to always use an StdIndexedValue.
		bondhugulaUnsubmitted Done Reply Inline Actions A couple of typos here: access and StdIndexedValue bondhugula: A couple of typos here: access and StdIndexedValue
		bondhugulaUnsubmitted Done Reply Inline Actions I might have missed this but a test case exercising this path is probably missing. bondhugula: I might have missed this but a test case exercising this path is probably missing.
		StdIndexedValue I(convOp.input());
		IndexedValueType F(convOp.filter()), O(convOp.output());

// Emit scalar form.		// Emit scalar form.
Value paddedInput = getConvOpInput(convOp, I, imIdx);		Value paddedInput = getConvOpInput(convOp, I, imIdx);
O(oIdx) += F(fIdx) * paddedInput;		O(oIdx) += F(fIdx) * paddedInput;
}		}
};		};

template <typename IndexedValueType>		template <typename IndexedValueType>
Show All 34 Lines	static void emitScalarImplementation(ArrayRef<Value> allIvs,
auto indices = getInputAndOutputIndices(allIvs, op);		auto indices = getInputAndOutputIndices(allIvs, op);
IndexedValueType input(op.input()), output(op.output());		IndexedValueType input(op.input()), output(op.output());

// Emit scalar form.		// Emit scalar form.
output(indices.outputs) += input(indices.inputs);		output(indices.outputs) += input(indices.inputs);
}		}
};		};

// Emits the MLIR for the scalar part of the generic op by:		/// Emits the MLIR for the scalar part of the indexed generic op by:
// 1. Emitting std_load and std_store ops for each input and output		/// 1. Emitting load ops for each input and output view in order. This is
// view in order. This is achieved by applying the appropriate input or		/// achieved by applying the appropriate input or output map to the
// output map to the enclosing induction variables.		/// enclosing induction variables.
// 2. Emitting a call to `op.fun()` that takes as arguments the scalars		/// 2. Emitting a call to `op.fun()` that takes as arguments the induction
// from point 1. above.		/// variables and the scalars from point 1. above.
// 3. Emitting std_store to store the results of 2. to the output		/// 3. Emitting store ops to store the results of 2. to the output views.
// views.		///
//		/// An example output may resemble:
// An example output may resemble:		///
//		/// ```
// ```		/// loop.for %i = %c0 to %0 step %c1 {
// loop.for %i = %c0 to %0 step %c1 {		/// loop.for %j = %c0 to %1 step %c1 {
// loop.for %j = %c0 to %1 step %c1 {		/// loop.for %k = %c0 to %4 step %c1 {
// loop.for %k = %c0 to %4 step %c1 {		/// %11 = load %arg0[%i, %j] :
// %11 = load %arg0[%i, %j] :		/// memref<?x?xf32, stride_specification>
// memref<?x?xf32, stride_specification>		/// %12 = load %arg1[%i, %j, %k] :
// %12 = load %arg1[%i, %j, %k] :		/// memref<?x?x?xf32, stride_specification>
// memref<?x?x?xf32, stride_specification>		/// %13 = load %arg2[%i, %k, %j] :
// %13 = load %arg2[%i, %k, %j] :		/// memref<?x?x?xf32, stride_specification>
// memref<?x?x?xf32, stride_specification>		/// %14:2 = call @foo(%i, %j, %k, %11, %12, %13) :
// %14:2 = call @foo(%11, %12, %13) : (f32, f32, f32) -> (f32, f32)		/// (index, index, index, f32, f32, f32) -> (f32, f32)
// store %14#0, %arg1[%i, %j, %k] :		/// store %14#0, %arg1[%i, %j, %k] :
// memref<?x?x?Xf32, stride_specification>		/// memref<?x?x?Xf32, stride_specification>
// store %14#1, %arg2[%i, %k, %j] :		/// store %14#1, %arg2[%i, %k, %j] :
// memref<?x?x?Xf32, stride_specification>		/// memref<?x?x?Xf32, stride_specification>
// }		/// }
// }		/// }
// }		/// }
// ```		/// ```
template <typename IndexedValueType>
class LinalgScopedEmitter<IndexedValueType, GenericOp> {
public:
static void emitScalarImplementation(ArrayRef<Value> allIvs,
GenericOp genericOp) {
assert(genericOp.hasBufferSemantics() &&
"expected linalg op with buffer semantics");
auto b = ScopedContext::getBuilder();
auto loc = ScopedContext::getLocation();
unsigned nInputs = genericOp.getNumInputs();
unsigned nOutputs = genericOp.getNumOutputs();
SmallVector<Value, 4> indexedValues(nInputs + nOutputs);

// 1.a. Emit std_load from input views.
for (unsigned i = 0; i < nInputs; ++i) {
auto indexing = makeCanonicalAffineApplies(
b, loc, genericOp.getInputIndexingMap(i), allIvs);
indexedValues[i] = std_load(genericOp.getInput(i), indexing);
}

// 1.b. Emit std_load from output views.
// TODO(mravishankar): Avoid the loads if the corresponding argument of the
// region has no uses.
for (unsigned i = 0; i < nOutputs; ++i) {
Value output = genericOp.getOutputBuffer(i);
auto indexing = makeCanonicalAffineApplies(
b, loc, genericOp.getOutputIndexingMap(i), allIvs);
indexedValues[nInputs + i] = std_load(output, indexing);
}

// TODO(ntv): When a region inliner exists, use it.
// 2. Inline region, currently only works for a single basic block.
// 3. Emit std_store.
SmallVector<SmallVector<Value, 8>, 8> indexing;
SmallVector<Value, 8> outputBuffers;
for (unsigned i = 0; i < nOutputs; ++i) {
indexing.push_back(makeCanonicalAffineApplies(
b, loc, genericOp.getOutputIndexingMap(i), allIvs));
outputBuffers.push_back(genericOp.getOutputBuffer(i));
}
inlineRegionAndEmitStdStore(genericOp, indexedValues, indexing,
outputBuffers);
}
};

// Emits the MLIR for the scalar part of the indexed generic op by:
// 1. Emitting std_load and std_store ops for each input and output view in
// order. This is achieved by applying the appropriate input or output map
// to the enclosing induction variables.
// 2. Emitting a call to `op.fun()` that takes as arguments the induction
// variables and the scalars from point 1. above.
// 3. Emitting std_store to store the results of 2. to the output views.
//
// An example output may resemble:
//
// ```
// loop.for %i = %c0 to %0 step %c1 {
// loop.for %j = %c0 to %1 step %c1 {
// loop.for %k = %c0 to %4 step %c1 {
// %11 = load %arg0[%i, %j] :
// memref<?x?xf32, stride_specification>
// %12 = load %arg1[%i, %j, %k] :
// memref<?x?x?xf32, stride_specification>
// %13 = load %arg2[%i, %k, %j] :
// memref<?x?x?xf32, stride_specification>
// %14:2 = call @foo(%i, %j, %k, %11, %12, %13) :
// (index, index, index, f32, f32, f32) -> (f32, f32)
// store %14#0, %arg1[%i, %j, %k] :
// memref<?x?x?Xf32, stride_specification>
// store %14#1, %arg2[%i, %k, %j] :
// memref<?x?x?Xf32, stride_specification>
// }
// }
// }
// ```
template <typename IndexedValueType>		template <typename IndexedValueType>
class LinalgScopedEmitter<IndexedValueType, IndexedGenericOp> {		class LinalgScopedEmitter<IndexedValueType, IndexedGenericOp> {
public:		public:
static void emitScalarImplementation(ArrayRef<Value> allIvs,		static void emitScalarImplementation(ArrayRef<Value> allIvs,
IndexedGenericOp indexedGenericOp) {		IndexedGenericOp indexedGenericOp) {
assert(indexedGenericOp.hasBufferSemantics() &&		assert(indexedGenericOp.hasBufferSemantics() &&
"expected linalg op with buffer semantics");		"expected linalg op with buffer semantics");
auto b = ScopedContext::getBuilder();		auto b = ScopedContext::getBuilder();
auto loc = ScopedContext::getLocation();		auto loc = ScopedContext::getLocation();
unsigned nInputs = indexedGenericOp.getNumInputs();		unsigned nInputs = indexedGenericOp.getNumInputs();
unsigned nOutputs = indexedGenericOp.getNumOutputs();		unsigned nOutputs = indexedGenericOp.getNumOutputs();
unsigned nLoops = allIvs.size();		unsigned nLoops = allIvs.size();
SmallVector<Value, 4> indexedValues(nLoops + nInputs + nOutputs);		SmallVector<Value, 4> indexedValues;
		indexedValues.reserve(nLoops + nInputs + nOutputs);
for (unsigned i = 0; i < nLoops; ++i) {		for (unsigned i = 0; i < nLoops; ++i)
indexedValues[i] = allIvs[i];		indexedValues.push_back(allIvs[i]);
}

// 1.a. Emit std_load from input views.		// TODO(mravishankar): Avoid the loads if the corresponding argument of the
		// region has no uses.
		// 1.a. Emit load from input views.
		mravishankarUnsubmitted Done Reply Inline Actions Ack :) mravishankar: Ack :)
for (unsigned i = 0; i < nInputs; ++i) {		for (unsigned i = 0; i < nInputs; ++i) {
Value input = indexedGenericOp.getInput(i);
auto indexing = makeCanonicalAffineApplies(		auto indexing = makeCanonicalAffineApplies(
b, loc, indexedGenericOp.getInputIndexingMap(i), allIvs);		b, loc, indexedGenericOp.getInputIndexingMap(i), allIvs);
indexedValues[nLoops + i] = std_load(input, indexing);		// Pass input i through IndexedValueType emits the proper load operation.
		indexedValues.push_back(
		IndexedValueType(indexedGenericOp.getInput(i))(indexing));
}		}
		// 1.b. Emit load from output views.
// 1.b. Emit std_load from output views.
for (unsigned i = 0; i < nOutputs; ++i) {		for (unsigned i = 0; i < nOutputs; ++i) {
Value output = indexedGenericOp.getOutputBuffer(i);
auto indexing = makeCanonicalAffineApplies(		auto indexing = makeCanonicalAffineApplies(
b, loc, indexedGenericOp.getOutputIndexingMap(i), allIvs);		b, loc, indexedGenericOp.getOutputIndexingMap(i), allIvs);
indexedValues[nLoops + nInputs + i] = std_load(output, indexing);		// Pass output i through IndexedValueType emits the proper load operation.
		indexedValues.push_back(
		IndexedValueType(indexedGenericOp.getOutputBuffer(i))(indexing));
}		}

// TODO(ntv): When a region inliner exists, use it.		// TODO(ntv): When a region inliner exists, use it.
// 2. Inline region, currently only works for a single basic block.		// 2. Inline region, currently only works for a single basic block.
// 3. Emit std_store.		// 3. Emit store.
SmallVector<SmallVector<Value, 8>, 8> indexing;		SmallVector<SmallVector<Value, 8>, 8> indexing;
SmallVector<Value, 8> outputBuffers;		SmallVector<Value, 8> outputBuffers;
for (unsigned i = 0; i < nOutputs; ++i) {		for (unsigned i = 0; i < nOutputs; ++i) {
indexing.push_back(makeCanonicalAffineApplies(		indexing.push_back(makeCanonicalAffineApplies(
b, loc, indexedGenericOp.getOutputIndexingMap(i), allIvs));		b, loc, indexedGenericOp.getOutputIndexingMap(i), allIvs));
outputBuffers.push_back(indexedGenericOp.getOutputBuffer(i));		outputBuffers.push_back(indexedGenericOp.getOutputBuffer(i));
}		}
inlineRegionAndEmitStdStore(indexedGenericOp, indexedValues, indexing,		inlineRegionAndEmitStore<IndexedValueType>(indexedGenericOp, indexedValues,
outputBuffers);		indexing, outputBuffers);
}		}
};		};

// This struct is for factoring out the implementation and support template		/// This struct is for factoring out the implementation and support template
// instantiations in the following 2 cases:		/// instantiations in the following 2 cases:
// 1. Appending to a list of patterns via RewritePatternList.		/// 1. Appending to a list of patterns via RewritePatternList.
// 2. Direct invocation via `linalgOpToLoops` and `linalgOpToAffineLoops`.		/// 2. Direct invocation via `linalgOpToLoops` and `linalgOpToAffineLoops`.
// The implementation must work both in DRR and inside a RewritePattern. As a		/// The implementation must work both in DRR and inside a RewritePattern. As a
// consequence, (1) it is only allowed to emit new ops if the match is		/// consequence, (1) it is only allowed to emit new ops if the match is
// guaranteed to be a success, (2) it is not allowed erase/replace, and (3) an		/// guaranteed to be a success, (2) it is not allowed erase/replace, and (3) an
// encompassing pattern must take care of the erasure logic.		/// encompassing pattern must take care of the erasure logic.
template <typename LoopTy, typename ConcreteOpTy>		template <typename LoopTy, typename ConcreteOpTy>
class LinalgOpToLoopsImpl {		class LinalgOpToLoopsImpl {
public:		public:
static Optional<LinalgLoops> doit(Operation *op, PatternRewriter &rewriter);		static Optional<LinalgLoops> doit(Operation *op, PatternRewriter &rewriter);
};		};

namespace {		namespace {
/// Helper struct to generate the loop nest for the op. This factored out here		/// Helper struct to generate the loop nest for the op. This factored out here
Show All 10 Lines	GenericLoopNestRangeBuilder<LoopTy>(allIvs, loopRanges)([&] {
SmallVector<Value, 4> allIvValues(allIvs.begin(), allIvs.end());		SmallVector<Value, 4> allIvValues(allIvs.begin(), allIvs.end());
LinalgScopedEmitter<IndexedValueTy,		LinalgScopedEmitter<IndexedValueTy,
ConcreteOpTy>::emitScalarImplementation(allIvValues,		ConcreteOpTy>::emitScalarImplementation(allIvValues,
linalgOp);		linalgOp);
});		});
}		}
};		};

/// Generates loops nest using loop.parallel. loop.parallel is only used for the		/// Generates loop nest using loop.parallel. loop.parallel is only used for the
/// outer parallel loops. All other loops are generated using loop.for		/// outer parallel loops. All other loops are generated using loop.for
/// operation.		/// operation.
template <typename ConcreteOpTy>		template <typename ConcreteOpTy>
class GenerateLoopNest<loop::ParallelOp, ConcreteOpTy> {		class GenerateLoopNest<loop::ParallelOp, ConcreteOpTy> {
public:		public:
using IndexedValueTy = StdIndexedValue;		using IndexedValueTy = StdIndexedValue;

static void doit(ConcreteOpTy linalgOp, ArrayRef<Value> loopRanges,		static void doit(ConcreteOpTy linalgOp, ArrayRef<Value> loopRanges,
▲ Show 20 Lines • Show All 103 Lines • ▼ Show 20 Lines	LogicalResult matchAndRewrite(Operation *op,
using Impl = LinalgOpToLoopsImpl<LoopType, ConcreteOp>;		using Impl = LinalgOpToLoopsImpl<LoopType, ConcreteOp>;
if (!Impl::doit(op, rewriter))		if (!Impl::doit(op, rewriter))
return failure();		return failure();
rewriter.eraseOp(op);		rewriter.eraseOp(op);
return success();		return success();
}		}
};		};

// Helper classes for type list expansion.		/// Helper classes for type list expansion.
template <typename LoopType, typename... LinalgOps>		template <typename LoopType, typename... LinalgOps>
class RewritePatternList;		class RewritePatternList;

template <typename LoopType>		template <typename LoopType>
class RewritePatternList<LoopType> {		class RewritePatternList<LoopType> {
public:		public:
static void build(OwningRewritePatternList &patterns, MLIRContext *ctx) {}		static void build(OwningRewritePatternList &patterns, MLIRContext *ctx) {}
};		};
Show All 11 Lines
template <typename LoopType>		template <typename LoopType>
void FillRewritePatterns(OwningRewritePatternList &patterns, MLIRContext *ctx) {		void FillRewritePatterns(OwningRewritePatternList &patterns, MLIRContext *ctx) {
RewritePatternList<LoopType,		RewritePatternList<LoopType,
#define GET_OP_LIST		#define GET_OP_LIST
#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"		#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
>::build(patterns, ctx);		>::build(patterns, ctx);
}		}

// Local folding pattern for AffineApplyOp that we can apply greedily.		/// Local folding pattern for AffineApplyOp that we can apply greedily.
// This replaces AffineApplyOp by the proper value in cases where the associated		/// This replaces AffineApplyOp by the proper value in cases where the
// map is trivial. A trivial map here is defined as a map with a single result		/// associated map is trivial.
// and either:		/// A trivial map here is defined as a map with a single result and either:
// 1. Zero operand + returns a single AffineConstantExpr		/// 1. Zero operand + returns a single AffineConstantExpr
// 2. One operand + returns a single AffineDimExpr		/// 2. One operand + returns a single AffineDimExpr
// 3. One operands + returns a single AffineSymbolExpr		/// 3. One operand + returns a single AffineSymbolExpr
		bondhugulaUnsubmitted Done Reply Inline Actions Nit: typo operands -> operand. bondhugula: Nit: typo operands -> operand.
//		//
// In the first case, the AffineApplyOp is replaced by a new constant. In the		/// In the first case, the AffineApplyOp is replaced by a new constant. In the
// other cases, it is replaced by its unique operand.		/// other cases, it is replaced by its unique operand.
struct FoldAffineOp : public RewritePattern {		struct FoldAffineOp : public RewritePattern {
FoldAffineOp(MLIRContext *context)		FoldAffineOp(MLIRContext *context)
: RewritePattern(AffineApplyOp::getOperationName(), 0, context) {}		: RewritePattern(AffineApplyOp::getOperationName(), 0, context) {}

LogicalResult matchAndRewrite(Operation *op,		LogicalResult matchAndRewrite(Operation *op,
PatternRewriter &rewriter) const override {		PatternRewriter &rewriter) const override {
AffineApplyOp affineApplyOp = cast<AffineApplyOp>(op);		AffineApplyOp affineApplyOp = cast<AffineApplyOp>(op);
auto map = affineApplyOp.getAffineMap();		auto map = affineApplyOp.getAffineMap();
▲ Show 20 Lines • Show All 126 Lines • Show Last 20 Lines

mlir/test/Dialect/Linalg/affine.mlir

	// RUN: mlir-opt %s -convert-linalg-to-affine-loops \| FileCheck %s			// RUN: mlir-opt %s -convert-linalg-to-affine-loops \| FileCheck %s

	// Test that we can lower all the way to LLVM without crashing, don't check results here.			// Test that we can lower all the way to LLVM without crashing, don't check results here.
	// RUN: mlir-opt %s --convert-linalg-to-llvm -o=/dev/null 2>&1			// RUN: mlir-opt %s -convert-linalg-to-affine-loops -convert-linalg-to-llvm -o=/dev/null 2>&1

	// CHECK-DAG: #[[strided2D:.]] = affine_map<(d0, d1)[s0, s1] -> (d0 s1 + s0 + d1)>			// CHECK-DAG: #[[strided2D:.]] = affine_map<(d0, d1)[s0, s1] -> (d0 s1 + s0 + d1)>
	// CHECK-DAG: #[[strided3D:.]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 s1 + s0 + d1 * s2 + d2)>			// CHECK-DAG: #[[strided3D:.]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 s1 + s0 + d1 * s2 + d2)>

	// CHECK-DAG: #[[stride2Dilation1:.]] = affine_map<(d0, d1) -> (d0 2 + d1)>			// CHECK-DAG: #[[stride2Dilation1:.]] = affine_map<(d0, d1) -> (d0 2 + d1)>

				// CHECK-DAG: #[[clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)>

	func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {			func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
	%c0 = constant 0 : index			%c0 = constant 0 : index
	%c1 = constant 1 : index			%c1 = constant 1 : index
	%A = view %arg0[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32, offset: ?, strides: [?, 1]>			%A = view %arg0[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32, offset: ?, strides: [?, 1]>
	%B = view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32, offset: ?, strides: [?, 1]>			%B = view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32, offset: ?, strides: [?, 1]>
	%C = view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32, offset: ?, strides: [?, 1]>			%C = view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32, offset: ?, strides: [?, 1]>
	linalg.matmul(%A, %B, %C) : memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>			linalg.matmul(%A, %B, %C) : memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>
	return			return
	Show All 29 Lines
	// CHECK: %[[B:.*]] = dim %arg1, 0 : memref<?x?x?xf32, #[[strided3D]]>			// CHECK: %[[B:.*]] = dim %arg1, 0 : memref<?x?x?xf32, #[[strided3D]]>
	// CHECK: %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?xf32, #[[strided3D]]>			// CHECK: %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?xf32, #[[strided3D]]>
	// CHECK: affine.for %{{.*}} = 0 to %[[B]] {			// CHECK: affine.for %{{.*}} = 0 to %[[B]] {
	// CHECK: affine.for %{{.*}} = 0 to %[[X0]] {			// CHECK: affine.for %{{.*}} = 0 to %[[X0]] {
	// CHECK: affine.for %{{.*}} = 0 to %[[K]] {			// CHECK: affine.for %{{.*}} = 0 to %[[K]] {
	// CHECK: affine.for %{{.*}} = 0 to %[[Q]] {			// CHECK: affine.for %{{.*}} = 0 to %[[Q]] {
	// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] {			// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] {
	// CHECK: %[[SUM:.]] = affine.apply #[[stride2Dilation1]](%{{.}}, %{{.*}})			// CHECK: %[[SUM:.]] = affine.apply #[[stride2Dilation1]](%{{.}}, %{{.*}})

				func @conv_padding(%arg0: memref<?x?x?x?xf32>,
				%arg1: memref<?x?x?x?xf32>,
				%arg2: memref<?x?x?x?xf32>) {
				linalg.conv(%arg0, %arg1, %arg2) {dilations = [1, 1],
				padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>,
				strides = [1, 1]} :
				memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>
				return
				}
				// CHECK-LABEL: func @conv_padding
				// CHECK: %{{.}}: memref<?x?x?x?xf32>, %{{.}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>) {
				// CHECK: %[[ZERO:.*]] = constant 0.000000e+00 : f32
				// CHECK: %[[Z0:.*]] = dim %arg0, 0 : memref<?x?x?x?xf32>
				// CHECK: %[[Z1:.*]] = dim %arg0, 1 : memref<?x?x?x?xf32>
				// CHECK: %[[Q:.*]] = dim %arg0, 2 : memref<?x?x?x?xf32>
				// CHECK: %[[K:.*]] = dim %arg0, 3 : memref<?x?x?x?xf32>
				// CHECK: %[[B:.*]] = dim %arg1, 0 : memref<?x?x?x?xf32>
				// CHECK: %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?x?xf32>
				// CHECK: %[[X1:.*]] = dim %arg2, 2 : memref<?x?x?x?xf32>
				// CHECK: affine.for %{{.*}} = 0 to %[[B]] {
				// CHECK: affine.for %{{.*}} = 0 to %[[X0]] {
				// CHECK: affine.for %{{.*}} = 0 to %[[X1]] {
				// CHECK: affine.for %{{.*}} = 0 to %[[K]] {
				// CHECK: affine.for %{{.*}} = 0 to %[[Q]] {
				// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] {
				// CHECK: affine.for %{{.*}} = 0 to %[[Z1]] {
				// CHECK: %[[SUM0:.]] = affine.apply #{{.}}(%{{.}}, %{{.}})
				// CHECK: %[[SUM1:.]] = affine.apply #{{.}}(%{{.}}, %{{.}})
				// CHECK: %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]])
				// CHECK: %[[IDY:.*]] = affine.max #[[clampMinMap]](%[[SUM1]])
				// Padded conv involves an affine.max in the memory access which is not
				// allowed by affine.load. Override to always use an std.load.
				// CHECK: %{{.}} = load %{{.}}[%{{.}}, %[[IDX]], %[[IDY]], %{{.}}] : memref<?x?x?x?xf32>
				// CHECK: %{{.}} = select %{{.}}, %{{.}}, %{{.}} : f32
				// CHECK: %{{.}} = affine.load %{{.}}[%{{.}}, %{{.}}, %{{.}}, %{{.}}] : memref<?x?x?x?xf32>
				// CHECK: %{{.}} = mulf %{{.}}, %{{.*}} : f32
				// CHECK: %{{.}} = affine.load %{{.}}[%{{.}}, %{{.}}, %{{.}}, %{{.}}] : memref<?x?x?x?xf32>
				// CHECK: %{{.}} = addf %{{.}}, %{{.*}} : f32
				// CHECK: affine.store %{{.}}, %{{.}}[%{{.}}, %{{.}}, %{{.}}, %{{.}}] : memref<?x?x?x?xf32>

				//----------------------------------------------------------------------------//
				// Named ops to loops.
				//----------------------------------------------------------------------------//
				func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memref<?x?x?xf32>) {
				linalg.batch_matmul %A, %B, %C : (memref<?x?x?xf32>, memref<?x?x?xf32>, memref<?x?x?xf32>) -> ()
				return
				}
				// CHECK-LABEL: @named_batch_matmul
				// CHECK-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
				// CHECK-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
				// CHECK-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
				// CHECK: %[[B:.*]] = dim %[[mA]], 0 : memref<?x?x?xf32>
				// CHECK: %[[M:.*]] = dim %[[mA]], 1 : memref<?x?x?xf32>
				// CHECK: %[[K:.*]] = dim %[[mA]], 2 : memref<?x?x?xf32>
				// CHECK: %[[N:.*]] = dim %[[mB]], 2 : memref<?x?x?xf32>
				// CHECK: affine.for %[[b:.*]] = 0 to %[[B]] {
				// CHECK: affine.for %[[m:.*]] = 0 to %[[M]] {
				// CHECK: affine.for %[[n:.*]] = 0 to %[[N]] {
				// CHECK: affine.for %[[k:.*]] = 0 to %[[K]] {
				// CHECK: %[[va:.*]] = affine.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
				// CHECK: %[[vb:.*]] = affine.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
				// CHECK: %[[vc:.*]] = affine.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
				// CHECK: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32
				// CHECK: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
				// CHECK: affine.store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>

mlir/test/Dialect/Linalg/loops.mlir

// RUN: mlir-opt %s -convert-linalg-to-loops \| FileCheck --check-prefix=CHECKLOOP %s		// RUN: mlir-opt %s -convert-linalg-to-loops \| FileCheck --check-prefix=CHECKLOOP %s
// RUN: mlir-opt %s -convert-linalg-to-parallel-loops \| FileCheck --check-prefix=CHECKPARALLEL %s		// RUN: mlir-opt %s -convert-linalg-to-parallel-loops \| FileCheck --check-prefix=CHECKPARALLEL %s

		bondhugulaUnsubmitted Done Reply Inline Actions Nit: I think you have a separate file affine.mlir for -convert-linalg-to-affine-loops? bondhugula: Nit: I think you have a separate file affine.mlir for -convert-linalg-to-affine-loops?
		ftynseUnsubmitted Done Reply Inline Actions Why do you need to disable pass threading here? ftynse: Why do you need to disable pass threading here?
// Test that we can lower all the way to LLVM without crashing, don't check results here.		// Test that we can lower all the way to LLVM without crashing, don't check results here.
// RUN: mlir-opt %s --convert-linalg-to-llvm -o=/dev/null 2>&1		// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -o=/dev/null 2>&1
		ftynseUnsubmitted Done Reply Inline Actions I'm surprised this ever worked on non-unix system ftynse: I'm surprised this ever worked on non-unix system
		nicolasvasilacheAuthorUnsubmitted Done Reply Inline Actions Hmm, I've picked this up from here IIRC nicolasvasilache: Hmm, I've picked this up from [here](https://github.com/llvm/llvm…

// CHECKLOOP-DAG: #[[strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>		// CHECKLOOP-DAG: #[[strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
// CHECKLOOP-DAG: #[[strided2D:.]] = affine_map<(d0, d1)[s0, s1] -> (d0 s1 + s0 + d1)>		// CHECKLOOP-DAG: #[[strided2D:.]] = affine_map<(d0, d1)[s0, s1] -> (d0 s1 + s0 + d1)>
// CHECKLOOP-DAG: #[[strided3D:.]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 s1 + s0 + d1 * s2 + d2)>		// CHECKLOOP-DAG: #[[strided3D:.]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 s1 + s0 + d1 * s2 + d2)>
// CHECKLOOP-DAG: #[[strided4D:.]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 s1 + s0 + d1 * s2 + d2 * s3 + d3)>		// CHECKLOOP-DAG: #[[strided4D:.]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 s1 + s0 + d1 * s2 + d2 * s3 + d3)>
// CHECKLOOP-DAG: #[[clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)>		// CHECKLOOP-DAG: #[[clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)>

// CHECKLOOP-DAG: #[[Stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)>		// CHECKLOOP-DAG: #[[Stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)>
▲ Show 20 Lines • Show All 335 Lines • ▼ Show 20 Lines
// CHECKPARALLEL: %[[SUM1:.]] = affine.apply #[[Stride3Dilation5]](%{{.}}, %{{.*}})		// CHECKPARALLEL: %[[SUM1:.]] = affine.apply #[[Stride3Dilation5]](%{{.}}, %{{.*}})
// CHECKPARALLEL: %{{.}} = load %{{.}}[%{{.}}, %[[SUM0]], %[[SUM1]], %{{.}}] : memref<?x?x?x?xf32, #[[strided4D]]>		// CHECKPARALLEL: %{{.}} = load %{{.}}[%{{.}}, %[[SUM0]], %[[SUM1]], %{{.}}] : memref<?x?x?x?xf32, #[[strided4D]]>
// CHECKPARALLEL: %{{.}} = load %{{.}}[%{{.}}, %{{.}}, %{{.}}, %{{.}}] : memref<?x?x?x?xf32, #[[strided4D]]>		// CHECKPARALLEL: %{{.}} = load %{{.}}[%{{.}}, %{{.}}, %{{.}}, %{{.}}] : memref<?x?x?x?xf32, #[[strided4D]]>
// CHECKPARALLEL: %{{.}} = mulf %{{.}}, %{{.*}} : f32		// CHECKPARALLEL: %{{.}} = mulf %{{.}}, %{{.*}} : f32
// CHECKPARALLEL: %{{.}} = load %{{.}}[%{{.}}, %{{.}}, %{{.}}, %{{.}}] : memref<?x?x?x?xf32, #[[strided4D]]>		// CHECKPARALLEL: %{{.}} = load %{{.}}[%{{.}}, %{{.}}, %{{.}}, %{{.}}] : memref<?x?x?x?xf32, #[[strided4D]]>
// CHECKPARALLEL: %{{.}} = addf %{{.}}, %{{.*}} : f32		// CHECKPARALLEL: %{{.}} = addf %{{.}}, %{{.*}} : f32
// CHECKPARALLEL: store %{{.}}, %{{.}}[%{{.}}, %{{.}}, %{{.}}, %{{.}}] : memref<?x?x?x?xf32, #[[strided4D]]>		// CHECKPARALLEL: store %{{.}}, %{{.}}[%{{.}}, %{{.}}, %{{.}}, %{{.}}] : memref<?x?x?x?xf32, #[[strided4D]]>


func @conv_padding(%arg0: memref<?x?x?x?xf32>,		func @conv_padding(%arg0: memref<?x?x?x?xf32>,
%arg1: memref<?x?x?x?xf32>,		%arg1: memref<?x?x?x?xf32>,
%arg2: memref<?x?x?x?xf32>) {		%arg2: memref<?x?x?x?xf32>) {
linalg.conv(%arg0, %arg1, %arg2) {dilations = [1, 1],		linalg.conv(%arg0, %arg1, %arg2) {dilations = [1, 1],
padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>,		padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>,
strides = [1, 1]} :		strides = [1, 1]} :
memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>		memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>
return		return
▲ Show 20 Lines • Show All 483 Lines • ▼ Show 20 Lines	func @scalar_code(%arg0: memref<f32>, %arg1 : memref<f32>, %arg2 : memref<f32>)
} : memref<f32>, memref<f32>, memref<f32>		} : memref<f32>, memref<f32>, memref<f32>
return		return
}		}
// CHECKLOOP-LABEL: @scalar_code		// CHECKLOOP-LABEL: @scalar_code
// CHECKLOOP-SAME: %[[ARG0]]: memref<f32>		// CHECKLOOP-SAME: %[[ARG0]]: memref<f32>
// CHECKLOOP-SAME: %[[ARG1]]: memref<f32>		// CHECKLOOP-SAME: %[[ARG1]]: memref<f32>
// CHECKLOOP-SAME: %[[ARG2]]: memref<f32>		// CHECKLOOP-SAME: %[[ARG2]]: memref<f32>
// CHECKLOOP-NOT: loop.for		// CHECKLOOP-NOT: loop.for
// CHECKLOOP-DAG: load %[[ARG0]][]		// CHECKLOOP: load %[[ARG0]][]
// CHECKLOOP-DAG: load %[[ARG1]][]		// CHECKLOOP: load %[[ARG1]][]
// CHECKLOOP: addf		// CHECKLOOP: addf
// CHECKLOOP: store %{{.*}}, %[[ARG2]][]		// CHECKLOOP: store %{{.*}}, %[[ARG2]][]

// CHECKPARALLEL-LABEL: @scalar_code		// CHECKPARALLEL-LABEL: @scalar_code
// CHECKPARALLEL-SAME: %[[ARG0]]: memref<f32>		// CHECKPARALLEL-SAME: %[[ARG0]]: memref<f32>
// CHECKPARALLEL-SAME: %[[ARG1]]: memref<f32>		// CHECKPARALLEL-SAME: %[[ARG1]]: memref<f32>
// CHECKPARALLEL-SAME: %[[ARG2]]: memref<f32>		// CHECKPARALLEL-SAME: %[[ARG2]]: memref<f32>
// CHECKPARALLEL-NOT: loop.for		// CHECKPARALLEL-NOT: loop.for
// CHECKPARALLEL-DAG: load %[[ARG0]][]		// CHECKPARALLEL: load %[[ARG0]][]
// CHECKPARALLEL-DAG: load %[[ARG1]][]		// CHECKPARALLEL: load %[[ARG1]][]
// CHECKPARALLEL: addf		// CHECKPARALLEL: addf
// CHECKPARALLEL: store %{{.*}}, %[[ARG2]][]		// CHECKPARALLEL: store %{{.*}}, %[[ARG2]][]

		//----------------------------------------------------------------------------//
		// Named ops to loops.
		//----------------------------------------------------------------------------//
		func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memref<?x?x?xf32>) {
		linalg.batch_matmul %A, %B, %C : (memref<?x?x?xf32>, memref<?x?x?xf32>, memref<?x?x?xf32>) -> ()
		return
		}
		// CHECKLOOP-LABEL: @named_batch_matmul
		// CHECKLOOP-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
		// CHECKLOOP-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
		// CHECKLOOP-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
		// CHECKLOOP: %[[B:.*]] = dim %[[mA]], 0 : memref<?x?x?xf32>
		// CHECKLOOP: %[[M:.*]] = dim %[[mA]], 1 : memref<?x?x?xf32>
		// CHECKLOOP: %[[K:.*]] = dim %[[mA]], 2 : memref<?x?x?xf32>
		// CHECKLOOP: %[[N:.*]] = dim %[[mB]], 2 : memref<?x?x?xf32>
		// CHECKLOOP: loop.for %[[b:.]] = %{{.}} to %[[B]] step %{{.*}} {
		// CHECKLOOP: loop.for %[[m:.]] = %{{.}} to %[[M]] step %{{.*}} {
		// CHECKLOOP: loop.for %[[n:.]] = %{{.}} to %[[N]] step %{{.*}} {
		// CHECKLOOP: loop.for %[[k:.]] = %{{.}} to %[[K]] step %{{.*}} {
		// CHECKLOOP: %[[va:.*]] = load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
		ftynseUnsubmitted Done Reply Inline Actions The comment in the code says loads are emitted in the same order as operands, so why "DAG" ? ftynse: The comment in the code says loads are emitted in the same order as operands, so why "DAG" ?
		// CHECKLOOP: %[[vb:.*]] = load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
		// CHECKLOOP: %[[vc:.*]] = load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
		// CHECKLOOP: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32
		ftynseUnsubmitted Done Reply Inline Actions DAG here looks like it would allow breaking use-def chains... ftynse: DAG here looks like it would allow breaking use-def chains...
		// CHECKLOOP: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
		// CHECKLOOP: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>

		// CHECKPARALLEL-LABEL: @named_batch_matmul
		// CHECKPARALLEL-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
		// CHECKPARALLEL-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
		// CHECKPARALLEL-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
		// CHECKPARALLEL: %[[B:.*]] = dim %[[mA]], 0 : memref<?x?x?xf32>
		// CHECKPARALLEL: %[[M:.*]] = dim %[[mA]], 1 : memref<?x?x?xf32>
		// CHECKPARALLEL: %[[K:.*]] = dim %[[mA]], 2 : memref<?x?x?xf32>
		// CHECKPARALLEL: %[[N:.*]] = dim %[[mB]], 2 : memref<?x?x?xf32>
		// CHECKPARALLEL: loop.parallel (%[[b:.]], %[[m:.]], %[[n:.]]) = ({{.}}) to (%[[B]], %[[M]], %[[N]]) step ({{.*}}) {
		// CHECKPARALLEL: loop.for %[[k:.]] = %{{.}} to %[[K]] step %{{.*}} {
		// CHECKPARALLEL: %[[va:.*]] = load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
		// CHECKPARALLEL: %[[vb:.*]] = load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
		// CHECKPARALLEL: %[[vc:.*]] = load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
		// CHECKPARALLEL: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32
		// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
		// CHECKPARALLEL: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>

mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc

	// RUN: mlir-linalg-ods-gen %s -gen-ods-decl=1 \| FileCheck %s --check-prefix=ODS			// RUN: mlir-linalg-ods-gen %s -gen-ods-decl=1 \| FileCheck %s --check-prefix=ODS
	// RUN: mlir-linalg-ods-gen %s -gen-impl=1 \| FileCheck %s --check-prefix=IMPL			// RUN: mlir-linalg-ods-gen %s -gen-impl=1 \| FileCheck %s --check-prefix=IMPL

	// ODS-LABEL: def Test1Op : LinalgNamedStructured_Op<"test1", [			// ODS-LABEL: def Test1Op : LinalgNamedStructured_Op<"test1", [
	// ODS-NEXT: NInputs<2>			// ODS-NEXT: NInputs<2>
	// ODS-NEXT: NOutputs<1>			// ODS-NEXT: NOutputs<1>
	// ODS-NEXT: NamedStructuredOpTraits			// ODS-NEXT: NamedStructuredOpTraits
	// ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp">			// ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp">
	//			//
	// IMPL-LABEL: Test1Op::referenceIterators() {			// IMPL-LABEL: SmallVector<StringRef, 8> Test1Op::referenceIterators
	// IMPL-NEXT: { {{.}}Parallel{{.}}, {{.}}Reduction{{.}} }			// IMPL: { {{.}}Parallel{{.}}, {{.}}Reduction{{.}} }
	//			//
	// IMPL: Test1Op::referenceIndexingMaps() {			// IMPL: SmallVector<AffineMap, 8> Test1Op::referenceIndexingMaps
	// IMPL: AffineMap::get(2, 0, {d0, d1}, context),			// IMPL: AffineMap::get(2, 0, {d0, d1}, context),
	// IMPL-NEXT: AffineMap::get(2, 0, {d1}, context),			// IMPL-NEXT: AffineMap::get(2, 0, {d1}, context),
	// IMPL-NEXT: AffineMap::get(2, 0, {d0}, context) };			// IMPL-NEXT: AffineMap::get(2, 0, {d0}, context) };
	//			//
	// IMPL: Test1Op::regionBuilder(Block &block) {			// IMPL: void Test1Op::regionBuilder(Block &block) {
	// IMPL: Value [[a:.]](args[0]), [[b:.]](args[1]), [[c:.*]](args[2]);			// IMPL: Value [[a:.]](args[0]), [[b:.]](args[1]), [[c:.*]](args[2]);
	// IMPL: Value [[d:.*]] = std_mulf([[a]], [[b]]);			// IMPL: Value [[d:.*]] = std_mulf([[a]], [[b]]);
	// IMPL: Value [[e:.*]] = std_addf([[c]], [[d]]);			// IMPL: Value [[e:.*]] = std_addf([[c]], [[d]]);
	// IMPL: (linalg_yield(ValueRange{ [[e]] }));			// IMPL: (linalg_yield(ValueRange{ [[e]] }));
	//			//
	ods_def<Test1Op> :			ods_def<Test1Op> :
	def test1(A: f32(M, K), B: f32(K)) -> (C: f32(M)) {			def test1(A: f32(M, K), B: f32(K)) -> (C: f32(M)) {
	C(m) = std_addf<k>(std_mulf(A(m, k), B(k)));			C(m) = std_addf<k>(std_mulf(A(m, k), B(k)));
	}			}

	// ODS-LABEL: def Test2Op : LinalgNamedStructured_Op<"test2", [			// ODS-LABEL: def Test2Op : LinalgNamedStructured_Op<"test2", [
	// ODS-NEXT: NInputs<2>			// ODS-NEXT: NInputs<2>
	// ODS-NEXT: NOutputs<1>			// ODS-NEXT: NOutputs<1>
	// ODS-NEXT: NamedStructuredOpTraits			// ODS-NEXT: NamedStructuredOpTraits
	// ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp">			// ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp">
	//			//
	// IMPL-LABEL: Test2Op::referenceIterators() {			// IMPL-LABEL: SmallVector<StringRef, 8> Test2Op::referenceIterators
	// IMPL-NEXT: { {{.}}Parallel{{.}}, {{.}}Parallel{{.}}, {{.}}Reduction{{.}} }			// IMPL: { {{.}}Parallel{{.}}, {{.}}Parallel{{.}}, {{.}}Reduction{{.}} }
	//			//
	// IMPL: Test2Op::referenceIndexingMaps() {			// IMPL: SmallVector<AffineMap, 8> Test2Op::referenceIndexingMaps
	// IMPL: AffineMap::get(3, 0, {d0, d2}, context),			// IMPL: AffineMap::get(3, 0, {d0, d2}, context),
	// IMPL-NEXT: AffineMap::get(3, 0, {d2, d1}, context),			// IMPL-NEXT: AffineMap::get(3, 0, {d2, d1}, context),
	// IMPL-NEXT: AffineMap::get(3, 0, {d0, d1}, context) };			// IMPL-NEXT: AffineMap::get(3, 0, {d0, d1}, context) };
	//			//
	// IMPL: Test2Op::regionBuilder(Block &block) {			// IMPL: Test2Op::regionBuilder(Block &block) {
	// IMPL: Value [[a:.]](args[0]), [[b:.]](args[1]), [[c:.*]](args[2]);			// IMPL: Value [[a:.]](args[0]), [[b:.]](args[1]), [[c:.*]](args[2]);
	// IMPL: Value [[d:.*]] = std_mulf([[a]], [[b]]);			// IMPL: Value [[d:.*]] = std_mulf([[a]], [[b]]);
	// IMPL: Value [[e:.*]] = std_addf([[c]], [[d]]);			// IMPL: Value [[e:.*]] = std_addf([[c]], [[d]]);
	// IMPL: (linalg_yield(ValueRange{ [[e]] }));			// IMPL: (linalg_yield(ValueRange{ [[e]] }));
	//			//
	ods_def<Test2Op> :			ods_def<Test2Op> :
	def test2(A: f32(M, K), B: f32(K, N)) -> (C: f32(M, N)) {			def test2(A: f32(M, K), B: f32(K, N)) -> (C: f32(M, N)) {
	C(m, n) = std_addf<k>(std_mulf(A(m, k), B(k, n)));			C(m, n) = std_addf<k>(std_mulf(A(m, k), B(k, n)));
	}			}

	// ODS-LABEL: def Test3Op : LinalgNamedStructured_Op<"test3", [			// ODS-LABEL: def Test3Op : LinalgNamedStructured_Op<"test3", [
	// ODS-NEXT: NInputs<2>			// ODS-NEXT: NInputs<2>
	// ODS-NEXT: NOutputs<1>			// ODS-NEXT: NOutputs<1>
	// ODS-NEXT: NamedStructuredOpTraits			// ODS-NEXT: NamedStructuredOpTraits
	// ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp">			// ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp">
	//			//
	// IMPL-LABEL: Test3Op::referenceIterators() {			// IMPL-LABEL: SmallVector<StringRef, 8> Test3Op::referenceIterators
	// IMPL-NEXT: { {{.}}Parallel{{.}}, {{.}}Parallel{{.}}, {{.}}Reduction{{.}} }			// IMPL: { {{.}}Parallel{{.}}, {{.}}Parallel{{.}}, {{.}}Reduction{{.}} }
	//			//
	// IMPL: Test3Op::referenceIndexingMaps() {			// IMPL: SmallVector<AffineMap, 8> Test3Op::referenceIndexingMaps
	// IMPL: AffineMap::get(4, 0, {d0, d1, d3}, context),			// IMPL: AffineMap::get(4, 0, {d0, d1, d3}, context),
	// IMPL-NEXT: AffineMap::get(4, 0, {d3, d2}, context),			// IMPL-NEXT: AffineMap::get(4, 0, {d3, d2}, context),
	// IMPL-NEXT: AffineMap::get(4, 0, {d0, d1, d2}, context) };			// IMPL-NEXT: AffineMap::get(4, 0, {d0, d1, d2}, context) };
	//			//
	// IMPL: Test3Op::regionBuilder(Block &block) {			// IMPL: Test3Op::regionBuilder(Block &block) {
	// IMPL: Value [[a:.]](args[0]), [[b:.]](args[1]), [[c:.*]](args[2]);			// IMPL: Value [[a:.]](args[0]), [[b:.]](args[1]), [[c:.*]](args[2]);
	// IMPL: Value [[d:.*]] = std_mulf([[a]], [[b]]);			// IMPL: Value [[d:.*]] = std_mulf([[a]], [[b]]);
	// IMPL: Value [[e:.*]] = std_addf([[c]], [[d]]);			// IMPL: Value [[e:.*]] = std_addf([[c]], [[d]]);
	// IMPL: (linalg_yield(ValueRange{ [[e]] }));			// IMPL: (linalg_yield(ValueRange{ [[e]] }));
	//			//
	ods_def<Test3Op> :			ods_def<Test3Op> :
	def test3(A: f32(Batch, M, K), B: f32(K, N)) -> (C: f32(Batch, M, N)) {			def test3(A: f32(Batch, M, K), B: f32(K, N)) -> (C: f32(Batch, M, N)) {
	C(b, m, n) = std_addf<k>(std_mulf(A(b, m, k), B(k, n)));			C(b, m, n) = std_addf<k>(std_mulf(A(b, m, k), B(k, n)));
	}			}

mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp

Show First 20 Lines • Show All 1,466 Lines • ▼ Show 20 Lines	SingleBlockImplicitTerminator<"YieldOp">]> {
let results = (outs Variadic<AnyRankedTensor>:$output_tensors);		let results = (outs Variadic<AnyRankedTensor>:$output_tensors);
let regions = (region SizedRegion<1>:$region);		let regions = (region SizedRegion<1>:$region);
let builders = [OpBuilder<		let builders = [OpBuilder<
"OpBuilder &b, OperationState &result, TypeRange outputTypes, "		"OpBuilder &b, OperationState &result, TypeRange outputTypes, "
# "ValueRange views",		# "ValueRange views",
[{{		[{{
result.addOperands(views);		result.addOperands(views);
result.addTypes(outputTypes);		result.addTypes(outputTypes);
buildNamedStructuredOpRegion<{0}>(		buildNamedStructuredOpRegionAndAttributes<{0}>(
b, result, TypeRange(views), outputTypes);		b, result, TypeRange(views), outputTypes);
}]>		}]>
];		];
let parser = [{		let parser = [{
return ::parseNamedStructuredOp<{0}>(parser, result);		return ::parseNamedStructuredOp<{0}>(parser, result);
}];		}];
let extraClassDeclaration = [{{		let extraClassDeclaration = [{{
llvm::Optional<SmallVector<StringRef, 8>> referenceIterators();		llvm::Optional<SmallVector<StringRef, 8>> referenceIterators();
		static SmallVector<StringRef, 8> referenceIterators(
		TypeRange inputTypes, TypeRange outputTypes);

llvm::Optional<SmallVector<AffineMap, 8>> referenceIndexingMaps();		llvm::Optional<SmallVector<AffineMap, 8>> referenceIndexingMaps();
		static SmallVector<AffineMap, 8> referenceIndexingMaps(
		TypeRange inputTypes, TypeRange outputTypes);

static void regionBuilder(Block &block);		static void regionBuilder(Block &block);
}];		}];
})FMT";		})FMT";

unsigned nInputs = 0, nOutputs = 0;		unsigned nInputs = 0, nOutputs = 0;
for (auto &t : registeredTensors) {		for (auto &t : registeredTensors) {
if (t.getValue().isOutput)		if (t.getValue().isOutput)
nOutputs++;		nOutputs++;
else		else
nInputs++;		nInputs++;
}		}

os << llvm::formatv(header, cppOpName, linalgOpName, nInputs, nOutputs);		os << llvm::formatv(header, cppOpName, linalgOpName, nInputs, nOutputs);
}		}

/// Print the C++ StructuredOpsInterface impl of `referenceIterators`.		/// Print the C++ StructuredOpsInterface impl of `referenceIterators`.
void TCParser::printReferenceIterators(llvm::raw_ostream &os,		void TCParser::printReferenceIterators(llvm::raw_ostream &os,
StringRef cppOpName,		StringRef cppOpName,
ComprehensionParsingState &state) {		ComprehensionParsingState &state) {
const char *referenceReferenceIteratorsFmt =		const char *referenceReferenceIteratorsFmt =
R"FMT(		R"FMT(
llvm::Optional<SmallVector<StringRef, 8>> {0}::referenceIterators() {		// This is temporary until we transition out of manually specified ops
		// that should be auto-generated with linalg-ods-gen.
		llvm::Optional<SmallVector<StringRef, 8>> {0}::referenceIterators() {{
		llvm_unreachable("Unexpected missing `iterator_types` attribute.");
		}
		SmallVector<StringRef, 8> {0}::referenceIterators(
		TypeRange inputTypes, TypeRange outputTypes) {
return SmallVector<StringRef, 8>{{ {1} };		return SmallVector<StringRef, 8>{{ {1} };
})FMT";		})FMT";

std::string iteratorsStr;		std::string iteratorsStr;
llvm::raw_string_ostream ss(iteratorsStr);		llvm::raw_string_ostream ss(iteratorsStr);
unsigned pos = 0;		unsigned pos = 0;
llvm::interleaveComma(		llvm::interleaveComma(
state.dims, ss, [&](std::pair<StringRef, AffineExpr> p) {		state.dims, ss, [&](std::pair<StringRef, AffineExpr> p) {
Show All 16 Lines	void TCParser::printReferenceIterators(llvm::raw_ostream &os,

os << llvm::formatv(referenceReferenceIteratorsFmt, cppOpName, iteratorsStr);		os << llvm::formatv(referenceReferenceIteratorsFmt, cppOpName, iteratorsStr);
}		}

/// Print the C++ StructuredOpsInterface impl of `referenceIndexingMaps`.		/// Print the C++ StructuredOpsInterface impl of `referenceIndexingMaps`.
void TCParser::printReferenceIndexingMaps(llvm::raw_ostream &os,		void TCParser::printReferenceIndexingMaps(llvm::raw_ostream &os,
StringRef cppOpName,		StringRef cppOpName,
ComprehensionParsingState &state) {		ComprehensionParsingState &state) {
		// 1. Generic string template for specifying reference indexing maps.
const char *referenceIndexingMapsFmt =		const char *referenceIndexingMapsFmt =
R"FMT(		R"FMT(
llvm::Optional<SmallVector<AffineMap, 8>> {0}::referenceIndexingMaps() {		// This is temporary until we transition out of manually specified ops that
MLIRContext *context = getContext();		// should be auto-generated with linalg-ods-gen.
		llvm::Optional<SmallVector<AffineMap, 8>> {0}::referenceIndexingMaps() {{
		llvm_unreachable("Unexpected missing `indexing_maps` attribute.");
		}
		SmallVector<AffineMap, 8> {0}::referenceIndexingMaps(
		TypeRange inputTypes, TypeRange outputTypes) {
		assert(!inputTypes.empty() && "At least one input expected");
		MLIRContext context = (inputTypes.begin()).getContext();
AffineExpr {1};		AffineExpr {1};
bindDims(context, {1});		bindDims(context, {1});
return SmallVector<AffineMap, 8>{{ {2} };		return SmallVector<AffineMap, 8>{{ {2} };
})FMT";		})FMT";

		// 2. Print a comma-separated list of identifiers for the AffineExpr in
		// `state.dims`. These will replace the `{1}` placeholder in both
		// `AffineExpr {1}` and `bindDims(context, {1})` ensuring the AffineExpr
		// identifiers are bound in the right order to the proper AffineDimExpr.
std::string dimsStr;		std::string dimsStr;
llvm::raw_string_ostream ss(dimsStr);		llvm::raw_string_ostream ss(dimsStr);
llvm::interleaveComma(		llvm::interleaveComma(
state.dims, ss,		state.dims, ss,
[&](std::pair<StringRef, AffineExpr> p) { ss << p.second; });		[&](std::pair<StringRef, AffineExpr> p) { ss << p.second; });
ss.flush();		ss.flush();

		// 3. Print a comma-separated list of AffineMap constructors that use the
		// identifiers from 1. The AffineExpr use the common arithmetic operators on
		// AffineExpr. These AffineMap constructors will replace the `{2}` placeholder
		// in return `SmallVector<AffineMap, 8>{{ {2} };`.
std::string mapsStr;		std::string mapsStr;
		bondhugulaUnsubmitted Done Reply Inline Actions A comment here and perhaps above will help. bondhugula: A comment here and perhaps above will help.
llvm::raw_string_ostream mapsStringStream(mapsStr);		llvm::raw_string_ostream mapsStringStream(mapsStr);
SmallVector<TensorUse, 4> orderedUses(state.orderedTensorArgs.size());		SmallVector<TensorUse, 4> orderedUses(state.orderedTensorArgs.size());
for (auto it : state.orderedTensorArgs)		for (const auto &it : state.orderedTensorArgs)
orderedUses[it.second] = it.first;		orderedUses[it.second] = it.first;
llvm::interleaveComma(orderedUses, mapsStringStream, [&](TensorUse u) {		llvm::interleaveComma(orderedUses, mapsStringStream, [&](TensorUse u) {
assert(u.indexingMap);		assert(u.indexingMap);
const char *mapFmt = "\n\tAffineMap::get({0}, 0, {1}, context)";		const char *mapFmt = "\n\tAffineMap::get({0}, 0, {1}, context)";
if (u.indexingMap.isEmpty()) {		if (u.indexingMap.isEmpty()) {
mapsStringStream << llvm::formatv(mapFmt, state.dims.size(), "context");		mapsStringStream << llvm::formatv(mapFmt, state.dims.size(), "context");
return;		return;
}		}

std::string exprsStr;		std::string exprsStr;
llvm::raw_string_ostream exprsStringStream(exprsStr);		llvm::raw_string_ostream exprsStringStream(exprsStr);
exprsStringStream << "{";		exprsStringStream << "{";
llvm::interleaveComma(u.indexingMap.getResults(), exprsStringStream);		llvm::interleaveComma(u.indexingMap.getResults(), exprsStringStream);
exprsStringStream << "}";		exprsStringStream << "}";
exprsStringStream.flush();		exprsStringStream.flush();

mapsStringStream << llvm::formatv(mapFmt, state.dims.size(), exprsStr);		mapsStringStream << llvm::formatv(mapFmt, state.dims.size(), exprsStr);
});		});
mapsStringStream.flush();		mapsStringStream.flush();

		// 4. Apply format to 1. using 2. and 3.
os << llvm::formatv(referenceIndexingMapsFmt, cppOpName, dimsStr, mapsStr);		os << llvm::formatv(referenceIndexingMapsFmt, cppOpName, dimsStr, mapsStr);
}		}

/// Print the C++ StructuredOpsInterface impl of `regionBuilder`.		/// Print the C++ StructuredOpsInterface impl of `regionBuilder`.
void TCParser::printRegionBuilder(llvm::raw_ostream &os, StringRef cppOpName,		void TCParser::printRegionBuilder(llvm::raw_ostream &os, StringRef cppOpName,
ComprehensionParsingState &state) {		ComprehensionParsingState &state) {
unsigned count = state.orderedTensorArgs.size();		unsigned count = state.orderedTensorArgs.size();
llvm::DenseMap<const TensorExpr *, unsigned> subExprsMap;		llvm::DenseMap<const TensorExpr *, unsigned> subExprsMap;
▲ Show 20 Lines • Show All 110 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][Linalg] Add support to lower named ops to loops.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 261296

mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h

mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp

mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp

mlir/test/Dialect/Linalg/affine.mlir

mlir/test/Dialect/Linalg/loops.mlir

mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc

mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][Linalg] Add support to lower named ops to loops.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 261296

mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h

mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp

mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp

mlir/test/Dialect/Linalg/affine.mlir

mlir/test/Dialect/Linalg/loops.mlir

mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc

mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp

[mlir][Linalg] Add support to lower named ops to loops.
ClosedPublic