Diff 296500

mlir/lib/Dialect/Linalg/Transforms/Loops.cpp

Show First 20 Lines • Show All 288 Lines • ▼ Show 20 Lines	void emitScalarImplementation(ArrayRef<Value> allIvs, FillOp fillOp) {
assert(nPar == allIvs.size());		assert(nPar == allIvs.size());
auto ivs = SmallVector<Value, 4>(allIvs.begin(), allIvs.begin() + nPar);		auto ivs = SmallVector<Value, 4>(allIvs.begin(), allIvs.begin() + nPar);
IndexedValueType O(fillOp.getOutputBuffer(0));		IndexedValueType O(fillOp.getOutputBuffer(0));
// Emit the proper scalar assignment, whether we are dealing with a 0-D or		// Emit the proper scalar assignment, whether we are dealing with a 0-D or
// an n-D loop nest; with or without permutations.		// an n-D loop nest; with or without permutations.
nPar > 0 ? O(ivs) = fillOp.value() : O() = fillOp.value();		nPar > 0 ? O(ivs) = fillOp.value() : O() = fillOp.value();
}		}

		// Create a padded view into the given `input` tensor using the 'indices'
		// to access the tensor. `skipPadding` lists the dimensions for which no padding
		// is needed e.g. the non-spatial dimensions for convolutions.
template <typename IndexedValueType>		template <typename IndexedValueType>
Value getConvOpInput(ConvOp convOp, StdIndexedValue im,		Value getPaddedInput(Value input, ArrayRef<Value> indices,
MutableArrayRef<Value> imIdx) {		ArrayRef<int> skipPadding) {
// TODO: add a level of indirection to linalg.generic.		// TODO: add a level of indirection to linalg.generic.
if (!convOp.padding())
return im(imIdx);		IndexedValueType indexedInput(input);

auto *context = ScopedContext::getContext();		auto *context = ScopedContext::getContext();
Value zeroIndex = std_constant_index(0);		Value zeroIndex = std_constant_index(0);
SmallVector<Value, 8> conds;		SmallVector<Value, 8> conds;
SmallVector<Value, 8> clampedImIdx;		SmallVector<Value, 8> clampedImIdx;
for (auto iter : llvm::enumerate(imIdx)) {		for (auto iter : llvm::enumerate(indices)) {
int idx = iter.index();		int idx = iter.index();
auto dim = iter.value();		auto dim = iter.value();
// Only need to iterate over the window dimensions.		if (is_contained(skipPadding, idx)) {
if (idx == 0 \|\| idx == static_cast<int>(imIdx.size()) - 1) {
clampedImIdx.push_back(dim);		clampedImIdx.push_back(dim);
continue;		continue;
}		}

using edsc::op::sge;		using edsc::op::sge;
using edsc::op::slt;		using edsc::op::slt;
using edsc::op::operator\|\|;		using edsc::op::operator\|\|;
Value leftOutOfBound = slt(dim, zeroIndex);		Value leftOutOfBound = slt(dim, zeroIndex);
if (conds.empty())		if (conds.empty())
conds.push_back(leftOutOfBound);		conds.push_back(leftOutOfBound);
else		else
conds.push_back(conds.back() \|\| leftOutOfBound);		conds.push_back(conds.back() \|\| leftOutOfBound);
Value rightBound = std_dim(convOp.input(), idx);		Value rightBound = std_dim(input, idx);
conds.push_back(conds.back() \|\| (sge(dim, rightBound)));		conds.push_back(conds.back() \|\| (sge(dim, rightBound)));

// When padding is involved, the indices will only be shifted to negative,		// When padding is involved, the indices will only be shifted to negative,
// so having a max op is enough.		// so having a max op is enough.
auto maxMap = AffineMap::get(/dimCount=/1, 0,		auto maxMap = AffineMap::get(/dimCount=/1, 0,
{getAffineDimExpr(/position=/0, context),		{getAffineDimExpr(/position=/0, context),
getAffineConstantExpr(0, context)},		getAffineConstantExpr(0, context)},
context);		context);
clampedImIdx.push_back(affine_max(dim.getType(), maxMap, ValueRange{dim}));		clampedImIdx.push_back(affine_max(dim.getType(), maxMap, ValueRange{dim}));
}		}

auto &b = ScopedContext::getBuilderRef();		auto &b = ScopedContext::getBuilderRef();
Type type = convOp.input().getType().cast<MemRefType>().getElementType();		Type type = input.getType().cast<MemRefType>().getElementType();
Value zero = std_constant(type, b.getZeroAttr(type));		Value zero = std_constant(type, b.getZeroAttr(type));
Value readInput = im(clampedImIdx);		Value readInput = indexedInput(clampedImIdx);
return conds.empty() ? readInput		return conds.empty() ? readInput
: (Value)std_select(conds.back(), zero, readInput);		: (Value)std_select(conds.back(), zero, readInput);
}		}

/// Returns true is `convOp` has a non-zero padding.		/// Returns true is `convOp` has a non-zero padding.
static bool hasPadding(ConvOp convOp) {		static bool hasPadding(ConvOp convOp) {
for (unsigned i = 0, e = convOp.getNumSpatialDimensions(); i < e; ++i) {		for (unsigned i = 0, e = convOp.getNumSpatialDimensions(); i < e; ++i) {
if (convOp.getLowPad(i) > 0 \|\| convOp.getHighPad(i) > 0)		if (convOp.getLowPad(i) > 0 \|\| convOp.getHighPad(i) > 0)
return true;		return true;
}		}
		hanchungUnsubmitted Not Done Reply Inline Actions Remove the blank line between the function and comments. Use `///` instead of `//` hanchung: Remove the blank line between the function and comments. Use `///` instead of `//`
return false;		return false;
}		}

template <typename IndexedValueType>		template <typename IndexedValueType>
static void emitScalarImplementation(ArrayRef<Value> allIvs, ConvOp convOp) {		static void emitScalarImplementation(ArrayRef<Value> allIvs, ConvOp convOp) {
assert(convOp.hasBufferSemantics() &&		assert(convOp.hasBufferSemantics() &&
"expected linalg op with buffer semantics");		"expected linalg op with buffer semantics");
auto &b = ScopedContext::getBuilderRef();		auto &b = ScopedContext::getBuilderRef();
auto loc = ScopedContext::getLocation();		auto loc = ScopedContext::getLocation();
auto mapsRange = convOp.indexing_maps().getAsRange<AffineMapAttr>();		auto mapsRange = convOp.indexing_maps().getAsRange<AffineMapAttr>();
auto maps = llvm::to_vector<8>(		auto maps = llvm::to_vector<8>(
llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));		llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
		hanchungUnsubmitted Not Done Reply Inline Actions /Negative=/true hanchung: /Negative=/true
SmallVector<Value, 8> fIdx(		SmallVector<Value, 8> fIdx(
makeCanonicalAffineApplies(b, loc, maps[0], allIvs));		makeCanonicalAffineApplies(b, loc, maps[0], allIvs));
SmallVector<Value, 8> imIdx(		SmallVector<Value, 8> imIdx(
makeCanonicalAffineApplies(b, loc, maps[1], allIvs));		makeCanonicalAffineApplies(b, loc, maps[1], allIvs));
SmallVector<Value, 8> oIdx(		SmallVector<Value, 8> oIdx(
makeCanonicalAffineApplies(b, loc, maps[2], allIvs));		makeCanonicalAffineApplies(b, loc, maps[2], allIvs));

IndexedValueType F(convOp.filter()), O(convOp.output());		IndexedValueType F(convOp.filter()), O(convOp.output());

// Emit scalar form. Padded conv involves an affine.max in the memory access		// Emit scalar form. Padded conv involves an affine.max in the memory access
// which is not allowed by affine.load. Override to use an StdIndexedValue		// which is not allowed by affine.load. Override to use an StdIndexedValue
// when there is non-zero padding.		// when there is non-zero padding.
if (hasPadding(convOp)) {		if (hasPadding(convOp)) {
StdIndexedValue I(convOp.input());		Value paddedInput = getPaddedInput<StdIndexedValue>(
Value paddedInput = getConvOpInput<IndexedValueType>(convOp, I, imIdx);		convOp.input(), imIdx,
		hanchungUnsubmitted Not Done Reply Inline Actions I would name it as `padValue` because zero is the context inside `getPadValueAttr<ConvOp>`. hanchung: I would name it as `padValue` because zero is the context inside `getPadValueAttr<ConvOp>`.
		/* Only need to pad the window dimensions */
		{0, static_cast<int>(imIdx.size()) - 1});
O(oIdx) += F(fIdx) * paddedInput;		O(oIdx) += F(fIdx) * paddedInput;
} else {		} else {
IndexedValueType I(convOp.input());		IndexedValueType I(convOp.input());
O(oIdx) += F(fIdx) * I(imIdx);		O(oIdx) += F(fIdx) * I(imIdx);
}		}
}		}

		template <typename PoolingOp>
		static bool hasPadding(PoolingOp poolingOp) {
		for (unsigned i = 0, e = poolingOp.getNumWindowLoops(); i < e; ++i) {
		if (poolingOp.getLowPad(i) > 0 \|\| poolingOp.getHighPad(i) > 0)
		return true;
		}
		return false;
		}

		template <typename IndexedValueType, typename PoolingOp>
		static Value getPoolingInput(PoolingOp op, ArrayRef<Value> inputIndices) {
		if (hasPadding(op)) {
		return getPaddedInput<StdIndexedValue>(op.input(), inputIndices,
		hanchungUnsubmitted Not Done Reply Inline Actions I think we have an issue for padding values. Ideally, the padding value for pooling ops are different, eg, we probably want minimal int/float value for max_pooling, zero for sum_pooling, maximal int/float value for min_pooling. Another way is to have an initial value, and we can use initial value as padding value. If we don't have an initial value, we need different padding values for different pooling ops. hanchung: I think we have an issue for padding values. Ideally, the padding value for pooling ops are…
		alberto-magniAuthorUnsubmitted Not Done Reply Inline Actions Ah right, of course makes sense. This would require to change the line above: Value zero = std_constant(type, b.getZeroAttr(type)); to use a parameter instead. I can make the change, if you think it is worth. alberto-magni: Ah right, of course makes sense. This would require to change the line above: ``` Value zero =…
		hanchungUnsubmitted Not Done Reply Inline Actions This makes sense to me without changing the op semantics. Let do it as this way to make progress. thanks! hanchung: This makes sense to me without changing the op semantics. Let do it as this way to make…
		alberto-magniAuthorUnsubmitted Done Reply Inline Actions Ok, perfect. I'll follow up on this tomorrow. alberto-magni: Ok, perfect. I'll follow up on this tomorrow.
		/Pad every dimension/ {});
		mravishankarUnsubmitted Not Done Reply Inline Actions I am not clear if this is actually the case. Its better to get Hanhan take a look at this. (added him as a reviewer) mravishankar: I am not clear if this is actually the case. Its better to get Hanhan take a look at this.
		hanchungUnsubmitted Not Done Reply Inline Actions Yes, this is correct because we don't carry any batch/channel dimension to Linalg. I think we're considering to carry that information to Linalg but never get a conclusion. The conv op does follow the tf semantics to have batch/window dims, but the pooling ops currently dones't. The reason is that I was starting from HLO ops and they don't have such semantics. We can consider to add them, but for now it's not the case. https://www.tensorflow.org/api_docs/python/tf/nn/pool hanchung: Yes, this is correct because we don't carry any batch/channel dimension to Linalg. I think…
		}
		IndexedValueType input(op.input());
		return input(inputIndices);
		}

template <typename IndexedValueType>		template <typename IndexedValueType>
void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingMaxOp op) {		void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingMaxOp op) {
InputAndOutputIndices indices = getInputAndOutputIndices(allIvs, op);		InputAndOutputIndices indices = getInputAndOutputIndices(allIvs, op);
// Emit scalar form.		// Emit scalar form.
IndexedValueType output(op.output());		IndexedValueType output(op.output());
IndexedValueType input(op.input());
Value lhs = output(indices.outputs);		Value lhs = output(indices.outputs);
Value rhs = input(indices.inputs);		Value rhs = getPoolingInput<IndexedValueType>(op, indices.inputs);
using edsc::op::sgt;		using edsc::op::sgt;
Value maxValue = std_select(sgt(lhs, rhs), lhs, rhs);		Value maxValue = std_select(sgt(lhs, rhs), lhs, rhs);
output(indices.outputs) = maxValue;		output(indices.outputs) = maxValue;
}		}

template <typename IndexedValueType>		template <typename IndexedValueType>
void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingMinOp op) {		void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingMinOp op) {
InputAndOutputIndices indices = getInputAndOutputIndices(allIvs, op);		InputAndOutputIndices indices = getInputAndOutputIndices(allIvs, op);
// Emit scalar form.		// Emit scalar form.
IndexedValueType output(op.output());		IndexedValueType output(op.output());
IndexedValueType input(op.input());		IndexedValueType input(op.input());
Value lhs = output(indices.outputs);		Value lhs = output(indices.outputs);
Value rhs = input(indices.inputs);		Value rhs = getPoolingInput<IndexedValueType>(op, indices.inputs);
using edsc::op::slt;		using edsc::op::slt;
Value minValue = std_select(slt(lhs, rhs), lhs, rhs);		Value minValue = std_select(slt(lhs, rhs), lhs, rhs);
output(indices.outputs) = minValue;		output(indices.outputs) = minValue;
}		}
template <typename IndexedValueType>		template <typename IndexedValueType>
void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingSumOp op) {		void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingSumOp op) {
auto indices = getInputAndOutputIndices(allIvs, op);		auto indices = getInputAndOutputIndices(allIvs, op);
IndexedValueType input(op.input()), output(op.output());		IndexedValueType output(op.output());

// Emit scalar form.		// Emit scalar form.
output(indices.outputs) += input(indices.inputs);		output(indices.outputs) +=
		getPoolingInput<IndexedValueType>(op, indices.inputs);
}		}
/// Emits the MLIR for the scalar part of the indexed generic op by:		/// Emits the MLIR for the scalar part of the indexed generic op by:
/// 1. Emitting load ops for each input and output view in order. This is		/// 1. Emitting load ops for each input and output view in order. This is
/// achieved by applying the appropriate input or output map to the		/// achieved by applying the appropriate input or output map to the
/// enclosing induction variables.		/// enclosing induction variables.
/// 2. Emitting a call to `op.fun()` that takes as arguments the induction		/// 2. Emitting a call to `op.fun()` that takes as arguments the induction
/// variables and the scalars from point 1. above.		/// variables and the scalars from point 1. above.
/// 3. Emitting store ops to store the results of 2. to the output views.		/// 3. Emitting store ops to store the results of 2. to the output views.
▲ Show 20 Lines • Show All 323 Lines • Show Last 20 Lines

mlir/test/Dialect/Linalg/loops.mlir

	Show All 11 Lines

	// CHECKLOOP-DAG: #[[$stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)>			// CHECKLOOP-DAG: #[[$stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)>
	// CHECKLOOP-DAG: #[[$stride2Dilation1:.]] = affine_map<(d0, d1) -> (d0 2 + d1)>			// CHECKLOOP-DAG: #[[$stride2Dilation1:.]] = affine_map<(d0, d1) -> (d0 2 + d1)>
	// CHECKLOOP-DAG: #[[$stride2Dilation4:.]] = affine_map<(d0, d1) -> (d0 2 + d1 * 4)>			// CHECKLOOP-DAG: #[[$stride2Dilation4:.]] = affine_map<(d0, d1) -> (d0 2 + d1 * 4)>
	// CHECKLOOP-DAG: #[[$stride3Dilation5:.]] = affine_map<(d0, d1) -> (d0 3 + d1 * 5)>			// CHECKLOOP-DAG: #[[$stride3Dilation5:.]] = affine_map<(d0, d1) -> (d0 3 + d1 * 5)>
	// CHECKLOOP-DAG: #[[$convLowerBound:.*]] = affine_map<()[s0] -> (s0 floordiv 2)>			// CHECKLOOP-DAG: #[[$convLowerBound:.*]] = affine_map<()[s0] -> (s0 floordiv 2)>
	// CHECKLOOP-DAG: #[[$convUpperBound:.*]] = affine_map<()[s0, s1] -> (s1 + s0 floordiv 2 - s0 + 1)>			// CHECKLOOP-DAG: #[[$convUpperBound:.*]] = affine_map<()[s0, s1] -> (s1 + s0 floordiv 2 - s0 + 1)>
	// CHECKLOOP-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)>			// CHECKLOOP-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)>
				// CHECKLOOP-DAG: #[[$stride1Dilation1Padding1:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 1)>
				// CHECKLOOP-DAG: #[[$stride1Dilation1Padding2:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 2)>

	// CHECKPARALLEL-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>			// CHECKPARALLEL-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
	// CHECKPARALLEL-DAG: #[[$strided2D:.]] = affine_map<(d0, d1)[s0, s1] -> (d0 s1 + s0 + d1)>			// CHECKPARALLEL-DAG: #[[$strided2D:.]] = affine_map<(d0, d1)[s0, s1] -> (d0 s1 + s0 + d1)>
	// CHECKPARALLEL-DAG: #[[$strided3D:.]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 s1 + s0 + d1 * s2 + d2)>			// CHECKPARALLEL-DAG: #[[$strided3D:.]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 s1 + s0 + d1 * s2 + d2)>
	// CHECKPARALLEL-DAG: #[[$strided4D:.]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 s1 + s0 + d1 * s2 + d2 * s3 + d3)>			// CHECKPARALLEL-DAG: #[[$strided4D:.]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 s1 + s0 + d1 * s2 + d2 * s3 + d3)>
	// CHECKPARALLEL-DAG: #[[$clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)>			// CHECKPARALLEL-DAG: #[[$clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)>

	// CHECKPARALLEL-DAG: #[[$stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)>			// CHECKPARALLEL-DAG: #[[$stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)>
	// CHECKPARALLEL-DAG: #[[$stride2Dilation1:.]] = affine_map<(d0, d1) -> (d0 2 + d1)>			// CHECKPARALLEL-DAG: #[[$stride2Dilation1:.]] = affine_map<(d0, d1) -> (d0 2 + d1)>
	// CHECKPARALLEL-DAG: #[[$stride2Dilation4:.]] = affine_map<(d0, d1) -> (d0 2 + d1 * 4)>			// CHECKPARALLEL-DAG: #[[$stride2Dilation4:.]] = affine_map<(d0, d1) -> (d0 2 + d1 * 4)>
	// CHECKPARALLEL-DAG: #[[$stride3Dilation5:.]] = affine_map<(d0, d1) -> (d0 3 + d1 * 5)>			// CHECKPARALLEL-DAG: #[[$stride3Dilation5:.]] = affine_map<(d0, d1) -> (d0 3 + d1 * 5)>
	// CHECKPARALLEL-DAG: #[[$convLowerBound:.*]] = affine_map<()[s0] -> (s0 floordiv 2)>			// CHECKPARALLEL-DAG: #[[$convLowerBound:.*]] = affine_map<()[s0] -> (s0 floordiv 2)>
	// CHECKPARALLEL-DAG: #[[$convUpperBound:.*]] = affine_map<()[s0, s1] -> (s1 + s0 floordiv 2 - s0 + 1)>			// CHECKPARALLEL-DAG: #[[$convUpperBound:.*]] = affine_map<()[s0, s1] -> (s1 + s0 floordiv 2 - s0 + 1)>
	// CHECKPARALLEL-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)>			// CHECKPARALLEL-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)>
				// CHECKPARALLEL-DAG: #[[$stride1Dilation1Padding1:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 1)>
				// CHECKPARALLEL-DAG: #[[$stride1Dilation1Padding2:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 2)>


	func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {			func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
	%c0 = constant 0 : index			%c0 = constant 0 : index
	%c1 = constant 1 : index			%c1 = constant 1 : index
	%A = view %arg0[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32>			%A = view %arg0[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32>
	%B = view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>			%B = view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
	%C = view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>			%C = view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
	▲ Show 20 Lines • Show All 423 Lines • ▼ Show 20 Lines
	// CHECKPARALLEL: scf.for %{{.}} = %{{.}} to %[[WY]] step %{{.*}} {			// CHECKPARALLEL: scf.for %{{.}} = %{{.}} to %[[WY]] step %{{.*}} {
	// CHECKPARALLEL: %[[IX:.]] = affine.apply #[[$stride2Dilation1]](%{{.}}, %{{.*}})			// CHECKPARALLEL: %[[IX:.]] = affine.apply #[[$stride2Dilation1]](%{{.}}, %{{.*}})
	// CHECKPARALLEL: %[[IY:.]] = affine.apply #[[$stride1Dilation1]](%{{.}}, %{{.*}})			// CHECKPARALLEL: %[[IY:.]] = affine.apply #[[$stride1Dilation1]](%{{.}}, %{{.*}})
	// CHECKPARALLEL: %{{.}} = load %{{.}}[%{{.}}, %{{.}}] : memref<?x?xf32>			// CHECKPARALLEL: %{{.}} = load %{{.}}[%{{.}}, %{{.}}] : memref<?x?xf32>
	// CHECKPARALLEL: %{{.}} = load %{{.}}[%[[IX]], %[[IY]]] : memref<?x?xf32>			// CHECKPARALLEL: %{{.}} = load %{{.}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
	// CHECKPARALLEL: %[[RES:.]] = select %{{.}}, %{{.}}, %{{.}} : f32			// CHECKPARALLEL: %[[RES:.]] = select %{{.}}, %{{.}}, %{{.}} : f32
	// CHECKPARALLEL: store %[[RES]], %{{.}}[%{{.}}, %{{.*}}] : memref<?x?xf32>			// CHECKPARALLEL: store %[[RES]], %{{.}}[%{{.}}, %{{.*}}] : memref<?x?xf32>

				func @pooling_max_padding(%arg0: memref<?x?xf32>,
				%arg1: memref<?x?xi32>,
				%arg2: memref<?x?xf32>) {
				linalg.pooling_max(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } :
				memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32>
				return
				}
				// CHECKLOOP-LABEL: func @pooling_max_padding
				// CHECKLOOP: %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
				// CHECKLOOP: %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
				// CHECKLOOP: %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
				// CHECKLOOP: %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
				// CHECKLOOP: scf.for %{{.}} = %{{.}} to %[[OX]] step %{{.*}} {
				// CHECKLOOP: scf.for %{{.}} = %{{.}} to %[[OY]] step %{{.*}} {
				// CHECKLOOP: scf.for %{{.}} = %{{.}} to %[[WX]] step %{{.*}} {
				// CHECKLOOP: scf.for %{{.}} = %{{.}} to %[[WY]] step %{{.*}} {
				// CHECKLOOP: %[[IX:.]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.}}, %{{.*}})
				// CHECKLOOP: %[[IY:.]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.}}, %{{.*}})
				// CHECKLOOP: %[[RHS:.]] = load %{{.}}[%{{.}}, %{{.}}] : memref<?x?xf32>
				// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
				// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
				// CHECKLOOP: %[[LHS:.]] = load %{{.}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
				// CHECKLOOP: %[[SEL:.]] = select %{{.}}, %{{.*}}, %[[LHS]] : f32
				// CHECKLOOP: %[[CMP:.*]] = cmpf "ogt", %[[RHS]], %[[SEL]] : f32
				// CHECKLOOP: %[[RES:.]] = select %{{.}}, %[[RHS]], %[[SEL]] : f32
				// CHECKLOOP: store %[[RES]], %{{.}}[%{{.}}, %{{.*}}] : memref<?x?xf32>

				// CHECKPARALLEL-LABEL: func @pooling_max_padding
				// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
				// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
				// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
				// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
				// CHECKPARALLEL: scf.parallel (%{{.}}, %{{.}}) = (%{{.}}, %{{.}}) to (%[[OX]], %[[OY]]) step (%{{.}}, %{{.}}) {
				// CHECKPARALLEL: scf.for %{{.}} = %{{.}} to %[[WX]] step %{{.*}} {
				// CHECKPARALLEL: scf.for %{{.}} = %{{.}} to %[[WY]] step %{{.*}} {
				// CHECKPARALLEL: %[[IX:.]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.}}, %{{.*}})
				// CHECKPARALLEL: %[[IY:.]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.}}, %{{.*}})
				// CHECKPARALLEL: %[[RHS:.]] = load %{{.}}[%{{.}}, %{{.}}] : memref<?x?xf32>
				// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
				// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
				// CHECKPARALLEL: %[[LHS:.]] = load %{{.}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
				// CHECKPARALLEL: %[[SEL:.]] = select %{{.}}, %{{.*}}, %[[LHS]] : f32
				// CHECKPARALLEL: %[[CMP:.*]] = cmpf "ogt", %[[RHS]], %[[SEL]] : f32
				// CHECKPARALLEL: %[[RES:.]] = select %{{.}}, %[[RHS]], %[[SEL]] : f32
				// CHECKPARALLEL: store %[[RES]], %{{.}}[%{{.}}, %{{.*}}] : memref<?x?xf32>



	func @pooling_min(%arg0: memref<?x?xf32>,			func @pooling_min(%arg0: memref<?x?xf32>,
	%arg1: memref<?x?xi32>,			%arg1: memref<?x?xi32>,
	%arg2: memref<?x?xf32>) {			%arg2: memref<?x?xf32>) {
	linalg.pooling_min(%arg0, %arg1, %arg2) { strides = [2, 1] }:			linalg.pooling_min(%arg0, %arg1, %arg2) { strides = [2, 1] }:
	memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32>			memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32>
	return			return
	}			}
	// CHECKLOOP-LABEL: func @pooling_min			// CHECKLOOP-LABEL: func @pooling_min
	Show All 22 Lines
	// CHECKPARALLEL: scf.for %{{.}} = %{{.}} to %[[WY]] step %{{.*}} {			// CHECKPARALLEL: scf.for %{{.}} = %{{.}} to %[[WY]] step %{{.*}} {
	// CHECKPARALLEL: %[[IX:.]] = affine.apply #[[$stride2Dilation1]](%{{.}}, %{{.*}})			// CHECKPARALLEL: %[[IX:.]] = affine.apply #[[$stride2Dilation1]](%{{.}}, %{{.*}})
	// CHECKPARALLEL: %[[IY:.]] = affine.apply #[[$stride1Dilation1]](%{{.}}, %{{.*}})			// CHECKPARALLEL: %[[IY:.]] = affine.apply #[[$stride1Dilation1]](%{{.}}, %{{.*}})
	// CHECKPARALLEL: %{{.}} = load %{{.}}[%{{.}}, %{{.}}] : memref<?x?xf32>			// CHECKPARALLEL: %{{.}} = load %{{.}}[%{{.}}, %{{.}}] : memref<?x?xf32>
	// CHECKPARALLEL: %{{.}} = load %{{.}}[%[[IX]], %[[IY]]] : memref<?x?xf32>			// CHECKPARALLEL: %{{.}} = load %{{.}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
	// CHECKPARALLEL: %[[RES:.]] = select %{{.}}, %{{.}}, %{{.}} : f32			// CHECKPARALLEL: %[[RES:.]] = select %{{.}}, %{{.}}, %{{.}} : f32
	// CHECKPARALLEL: store %[[RES]], %{{.}}[%{{.}}, %{{.*}}] : memref<?x?xf32>			// CHECKPARALLEL: store %[[RES]], %{{.}}[%{{.}}, %{{.*}}] : memref<?x?xf32>

				func @pooling_min_padding(%arg0: memref<?x?xf32>,
				%arg1: memref<?x?xi32>,
				%arg2: memref<?x?xf32>) {
				linalg.pooling_min(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } :
				memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32>
				return
				}
				// CHECKLOOP-LABEL: func @pooling_min_padding
				// CHECKLOOP: %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
				// CHECKLOOP: %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
				// CHECKLOOP: %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
				// CHECKLOOP: %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
				// CHECKLOOP: scf.for %{{.}} = %{{.}} to %[[OX]] step %{{.*}} {
				// CHECKLOOP: scf.for %{{.}} = %{{.}} to %[[OY]] step %{{.*}} {
				// CHECKLOOP: scf.for %{{.}} = %{{.}} to %[[WX]] step %{{.*}} {
				// CHECKLOOP: scf.for %{{.}} = %{{.}} to %[[WY]] step %{{.*}} {
				// CHECKLOOP: %[[IX:.]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.}}, %{{.*}})
				// CHECKLOOP: %[[IY:.]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.}}, %{{.*}})
				// CHECKLOOP: %[[RHS:.]] = load %{{.}}[%{{.}}, %{{.}}] : memref<?x?xf32>
				// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
				// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
				// CHECKLOOP: %[[LHS:.]] = load %{{.}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
				// CHECKLOOP: %[[SEL:.]] = select %{{.}}, %{{.*}}, %[[LHS]] : f32
				// CHECKLOOP: %[[CMP:.*]] = cmpf "olt", %[[RHS]], %[[SEL]] : f32
				// CHECKLOOP: %[[RES:.]] = select %{{.}}, %[[RHS]], %[[SEL]] : f32
				// CHECKLOOP: store %[[RES]], %{{.}}[%{{.}}, %{{.*}}] : memref<?x?xf32>

				// CHECKPARALLEL-LABEL: func @pooling_min_padding
				// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
				// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
				// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
				// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
				// CHECKPARALLEL: scf.parallel (%{{.}}, %{{.}}) = (%{{.}}, %{{.}}) to (%[[OX]], %[[OY]]) step (%{{.}}, %{{.}}) {
				// CHECKPARALLEL: scf.for %{{.}} = %{{.}} to %[[WX]] step %{{.*}} {
				// CHECKPARALLEL: scf.for %{{.}} = %{{.}} to %[[WY]] step %{{.*}} {
				// CHECKPARALLEL: %[[IX:.]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.}}, %{{.*}})
				// CHECKPARALLEL: %[[IY:.]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.}}, %{{.*}})
				// CHECKPARALLEL: %[[RHS:.]] = load %{{.}}[%{{.}}, %{{.}}] : memref<?x?xf32>
				// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
				// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
				// CHECKPARALLEL: %[[LHS:.]] = load %{{.}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
				// CHECKPARALLEL: %[[SEL:.]] = select %{{.}}, %{{.*}}, %[[LHS]] : f32
				// CHECKPARALLEL: %[[CMP:.*]] = cmpf "olt", %[[RHS]], %[[SEL]] : f32
				// CHECKPARALLEL: %[[RES:.]] = select %{{.}}, %[[RHS]], %[[SEL]] : f32
				// CHECKPARALLEL: store %[[RES]], %{{.}}[%{{.}}, %{{.*}}] : memref<?x?xf32>

	func @pooling_sum(%arg0: memref<?x?xf32>,			func @pooling_sum(%arg0: memref<?x?xf32>,
	%arg1: memref<?x?xi32>,			%arg1: memref<?x?xi32>,
	%arg2: memref<?x?xf32>) {			%arg2: memref<?x?xf32>) {
	linalg.pooling_sum(%arg0, %arg1, %arg2) { strides = [2, 1] }:			linalg.pooling_sum(%arg0, %arg1, %arg2) { strides = [2, 1] }:
	memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32>			memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32>
	return			return
	}			}
	// CHECKLOOP-LABEL: func @pooling_sum			// CHECKLOOP-LABEL: func @pooling_sum
	Show All 22 Lines
	// CHECKPARALLEL: scf.for %{{.}} = %{{.}} to %[[WY]] step %{{.*}} {			// CHECKPARALLEL: scf.for %{{.}} = %{{.}} to %[[WY]] step %{{.*}} {
	// CHECKPARALLEL: %[[IX:.]] = affine.apply #[[$stride2Dilation1]](%{{.}}, %{{.*}})			// CHECKPARALLEL: %[[IX:.]] = affine.apply #[[$stride2Dilation1]](%{{.}}, %{{.*}})
	// CHECKPARALLEL: %[[IY:.]] = affine.apply #[[$stride1Dilation1]](%{{.}}, %{{.*}})			// CHECKPARALLEL: %[[IY:.]] = affine.apply #[[$stride1Dilation1]](%{{.}}, %{{.*}})
	// CHECKPARALLEL: %[[RHS:.]] = load %{{.}}[%[[IX]], %[[IY]]] : memref<?x?xf32>			// CHECKPARALLEL: %[[RHS:.]] = load %{{.}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
	// CHECKPARALLEL: %[[LHS:.]] = load %{{.}}[%{{.}}, %{{.}}] : memref<?x?xf32>			// CHECKPARALLEL: %[[LHS:.]] = load %{{.}}[%{{.}}, %{{.}}] : memref<?x?xf32>
	// CHECKPARALLEL: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32			// CHECKPARALLEL: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32
	// CHECKPARALLEL: store %[[RES]], %{{.}}[%{{.}}, %{{.*}}] : memref<?x?xf32>			// CHECKPARALLEL: store %[[RES]], %{{.}}[%{{.}}, %{{.*}}] : memref<?x?xf32>

				func @pooling_sum_padding(%arg0: memref<?x?xf32>,
				%arg1: memref<?x?xi32>,
				%arg2: memref<?x?xf32>) {
				linalg.pooling_sum(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } :
				memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32>
				return
				}

				// CHECKLOOP-LABEL: func @pooling_sum_padding
				// CHECKLOOP: %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
				// CHECKLOOP: %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
				// CHECKLOOP: %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
				// CHECKLOOP: %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
				// CHECKLOOP: scf.for %{{.}} = %{{.}} to %[[OX]] step %{{.*}} {
				// CHECKLOOP: scf.for %{{.}} = %{{.}} to %[[OY]] step %{{.*}} {
				// CHECKLOOP: scf.for %{{.}} = %{{.}} to %[[WX]] step %{{.*}} {
				// CHECKLOOP: scf.for %{{.}} = %{{.}} to %[[WY]] step %{{.*}} {
				// CHECKLOOP: %[[IX:.]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.}}, %{{.*}})
				// CHECKLOOP: %[[IY:.]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.}}, %{{.*}})
				// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
				// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
				// CHECKLOOP: %[[LHS:.]] = load %{{.}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
				// CHECKLOOP: %[[SEL:.]] = select %{{.}}, %{{.*}}, %[[LHS]] : f32
				// CHECKLOOP: %[[RHS:.]] = load %{{.}}[%{{.}}, %{{.}}] : memref<?x?xf32>
				// CHECKLOOP: %[[RES:.*]] = addf %[[RHS]], %[[SEL]] : f32
				// CHECKLOOP: store %[[RES]], %{{.}}[%{{.}}, %{{.*}}] : memref<?x?xf32>

				// CHECKPARALLEL-LABEL: func @pooling_sum_padding
				// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
				// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
				// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
				// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
				// CHECKPARALLEL: scf.parallel (%{{.}}, %{{.}}) = (%{{.}}, %{{.}}) to (%[[OX]], %[[OY]]) step (%{{.}}, %{{.}}) {
				// CHECKPARALLEL: scf.for %{{.}} = %{{.}} to %[[WX]] step %{{.*}} {
				// CHECKPARALLEL: scf.for %{{.}} = %{{.}} to %[[WY]] step %{{.*}} {
				// CHECKPARALLEL: %[[IX:.]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.}}, %{{.*}})
				// CHECKPARALLEL: %[[IY:.]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.}}, %{{.*}})
				// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
				// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
				// CHECKPARALLEL: %[[LHS:.]] = load %{{.}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
				// CHECKPARALLEL: %[[SEL:.]] = select %{{.}}, %{{.*}}, %[[LHS]] : f32
				// CHECKPARALLEL: %[[RHS:.]] = load %{{.}}[%{{.}}, %{{.}}] : memref<?x?xf32>
				// CHECKPARALLEL: %[[RES:.*]] = addf %[[RHS]], %[[SEL]] : f32
				// CHECKPARALLEL: store %[[RES]], %{{.}}[%{{.}}, %{{.*}}] : memref<?x?xf32>


	#accesses = [			#accesses = [
	affine_map<(i, j, k) -> (i, j)>,			affine_map<(i, j, k) -> (i, j)>,
	affine_map<(i, j, k) -> (i, j, k)>,			affine_map<(i, j, k) -> (i, j, k)>,
	affine_map<(i, j, k) -> (i, k, j)>			affine_map<(i, j, k) -> (i, k, j)>
	]			]
	#trait2 = {			#trait2 = {
	args_in = 1,			args_in = 1,
	args_out = 2,			args_out = 2,
	▲ Show 20 Lines • Show All 913 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][Linalg] Lower padding attribute for pooling ops
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 296500

mlir/lib/Dialect/Linalg/Transforms/Loops.cpp

mlir/test/Dialect/Linalg/loops.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][Linalg] Lower padding attribute for pooling opsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 296500

mlir/lib/Dialect/Linalg/Transforms/Loops.cpp

mlir/test/Dialect/Linalg/loops.mlir

[mlir][Linalg] Lower padding attribute for pooling ops
ClosedPublic