Diff 480234

mlir/include/mlir/Dialect/Affine/Utils.h

	//===- Utils.h - Affine dialect utilities ------------------------ C++ --===//			//===- Utils.h - Affine dialect utilities ------------------------ C++ --===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//
	// This header file declares a set of utilities for the affine dialect ops.			// This header file declares a set of utilities for the affine dialect ops.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef MLIR_DIALECT_AFFINE_UTILS_H			#ifndef MLIR_DIALECT_AFFINE_UTILS_H
	#define MLIR_DIALECT_AFFINE_UTILS_H			#define MLIR_DIALECT_AFFINE_UTILS_H

	#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"			#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
				#include "mlir/Dialect/Affine/IR/AffineOps.h"

	namespace mlir {			namespace mlir {

	class AffineForOp;			class AffineForOp;
	class AffineIfOp;			class AffineIfOp;
	class AffineParallelOp;			class AffineParallelOp;
	class DominanceInfo;			class DominanceInfo;
	class Operation;			class Operation;
	▲ Show 20 Lines • Show All 298 Lines • ▼ Show 20 Lines
	/// (noninclusive) and prior to `memOp` (e.g. on a control flow/op path			/// (noninclusive) and prior to `memOp` (e.g. on a control flow/op path
	/// between the operations) do not have the potential memory effect			/// between the operations) do not have the potential memory effect
	/// `EffectType` on `memOp`. `memOp` is an operation that reads or writes to			/// `EffectType` on `memOp`. `memOp` is an operation that reads or writes to
	/// a memref. For example, if `EffectType` is MemoryEffects::Write, this method			/// a memref. For example, if `EffectType` is MemoryEffects::Write, this method
	/// will check if there is no write to the memory between `start` and `memOp`			/// will check if there is no write to the memory between `start` and `memOp`
	/// that would change the read within `memOp`.			/// that would change the read within `memOp`.
	template <typename EffectType, typename T>			template <typename EffectType, typename T>
	bool hasNoInterveningEffect(Operation *start, T memOp);			bool hasNoInterveningEffect(Operation *start, T memOp);

				struct AffineValueExpr {
				explicit AffineValueExpr(AffineExpr e) : e(e) {}
				AffineValueExpr bind(Value v) {
				this->v = v;
				return *this;
				}
				AffineValueExpr bind(OpFoldResult v) {
				this->v = v;
				return *this;
				}
				operator AffineExpr() const { return e; }
				operator OpFoldResult() const { return v; }
				AffineExpr e;
				OpFoldResult v;
				};

				/// Helper struct to build simple AffineValueExprs with minimal type inference
				/// support.
				struct AffineBuilder {
				AffineBuilder(OpBuilder &b, Location loc) : b(b), loc(loc) {}
				OpFoldResult add(AffineValueExpr lhs, AffineValueExpr rhs) {
				return makeComposedFoldedAffineApply(b, loc, {lhs.e + rhs.e}, {lhs, rhs});
				}
				OpFoldResult sub(AffineValueExpr lhs, AffineValueExpr rhs) {
				return makeComposedFoldedAffineApply(b, loc, {lhs.e - rhs.e}, {lhs, rhs});
				}
				OpFoldResult mul(AffineValueExpr lhs, AffineValueExpr rhs) {
				return makeComposedFoldedAffineApply(b, loc, {lhs.e * rhs.e}, {lhs, rhs});
				}
				OpFoldResult ceil(AffineValueExpr lhs, AffineValueExpr rhs) {
				return makeComposedFoldedAffineApply(b, loc, {lhs.e.ceilDiv(rhs.e)},
				{lhs, rhs});
				}
				OpFoldResult min(ArrayRef<OpFoldResult> vals) {
				return makeComposedFoldedAffineMin(
				b, loc, AffineMap::getMultiDimIdentityMap(vals.size(), b.getContext()),
				vals);
				}
				OpFoldResult max(ArrayRef<OpFoldResult> vals) {
				return makeComposedFoldedAffineMax(
				b, loc, AffineMap::getMultiDimIdentityMap(vals.size(), b.getContext()),
				vals);
				}

				private:
				OpBuilder &b;
				Location loc;
				};

	} // namespace mlir			} // namespace mlir

	#endif // MLIR_DIALECT_AFFINE_UTILS_H			#endif // MLIR_DIALECT_AFFINE_UTILS_H

mlir/lib/Dialect/Tensor/IR/CMakeLists.txt

	Show First 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
	add_mlir_dialect_library(MLIRTensorTilingInterfaceImpl			add_mlir_dialect_library(MLIRTensorTilingInterfaceImpl
	TensorTilingInterfaceImpl.cpp			TensorTilingInterfaceImpl.cpp

	ADDITIONAL_HEADER_DIRS			ADDITIONAL_HEADER_DIRS
	${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor			${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor

	LINK_LIBS PUBLIC			LINK_LIBS PUBLIC
	MLIRAffineDialect			MLIRAffineDialect
				MLIRDialectUtils
	MLIRIR			MLIRIR
	MLIRLinalgDialect			MLIRLinalgDialect
	MLIRSCFDialect			MLIRSCFDialect
	MLIRSupport			MLIRSupport
	MLIRTensorDialect			MLIRTensorDialect
				MLIRTensorUtils
	MLIRTilingInterface			MLIRTilingInterface
	)			)

mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp

//===- TensorTilingInterface.cpp - Tiling Interface models - C++ -------===//		//===- TensorTilingInterface.cpp - Tiling Interface models - C++ -------===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"		#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"		#include "mlir/Dialect/Affine/IR/AffineOps.h"
		#include "mlir/Dialect/Affine/Utils.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"		#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"		#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/SCF/IR/SCF.h"		#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"		#include "mlir/Dialect/Tensor/IR/Tensor.h"
		#include "mlir/Dialect/Tensor/Utils/Utils.h"
		#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Interfaces/TilingInterface.h"		#include "mlir/Interfaces/TilingInterface.h"

using namespace mlir;		using namespace mlir;
using namespace mlir::tensor;		using namespace mlir::tensor;

namespace {		namespace {

struct PadOpTiling : public TilingInterface::ExternalModel<PadOpTiling, PadOp> {		struct PadOpTiling : public TilingInterface::ExternalModel<PadOpTiling, PadOp> {
Show All 40 Lines	getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
SmallVector<OpFoldResult> &resultOffsets,		SmallVector<OpFoldResult> &resultOffsets,
SmallVector<OpFoldResult> &resultSizes) const {		SmallVector<OpFoldResult> &resultSizes) const {
resultOffsets.assign(offsets.begin(), offsets.end());		resultOffsets.assign(offsets.begin(), offsets.end());
resultSizes.assign(sizes.begin(), sizes.end());		resultSizes.assign(sizes.begin(), sizes.end());
return success();		return success();
}		}
};		};

		struct PackOpTiling
		: public TilingInterface::ExternalModel<PackOpTiling, PackOp> {
		cheliniUnsubmitted Done Reply Inline Actions Can we please rename this method to something like: `computeInversePermutationForDimsPos`? And please let's also update the comment. chelini: Can we please rename this method to something like: `computeInversePermutationForDimsPos`? And…

		SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
		// Note that here we only consider untiled dimensions and outer tiled data
		// dimensions, the inner tiled data dimensions are materialized when
		nicolasvasilacheUnsubmitted Done Reply Inline Actions I have strong doubts that this is needed .. can we compose existing stuff instead rather than create new one-off functions? I already spotted this undoPermutationToVector and I suspect this is another instance. Please increase the doc significantly because I cannot tell what this wants to do. nicolasvasilache: I have strong doubts that this is needed .. can we compose existing stuff instead rather than…
		hanchungAuthorUnsubmitted Done Reply Inline Actions it's not needed after I revisited the semantics of outerDimsPerm. hanchung: it's not needed after I revisited the semantics of outerDimsPerm.
		// building the body of the operation.
		auto packOp = cast<PackOp>(op);
		SmallVector<utils::IteratorType> iteratorTypes(
		packOp.getSourceRank(), utils::IteratorType::parallel);
		return iteratorTypes;
		}

		SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
		OpBuilder::InsertionGuard guard(b);
		auto packOp = cast<PackOp>(op);
		Location loc = packOp.getLoc();
		int64_t rank = packOp.getSourceRank();
		Value zero = b.create<arith::ConstantIndexOp>(loc, 0);
		Value one = b.create<arith::ConstantIndexOp>(loc, 1);
		ReifiedRankedShapedTypeDims resultShape;
		(void)packOp.reifyResultShapes(b, resultShape);
		SmallVector<Range> loopRanges(rank);
		for (auto dim : llvm::seq<int64_t>(0, rank)) {
		loopRanges[dim].offset = zero;
		loopRanges[dim].stride = one;
		loopRanges[dim].size = resultShape[0][dim];
		}
		return loopRanges;
		}
		nicolasvasilacheUnsubmitted Done Reply Inline Actions can you go grab `AffineBuilder` from iree-dialects/Dialect/LinalgExt/Transforms/Utils.h and put it in a reusable place? This has been written one too many times. nicolasvasilache: can you go grab `AffineBuilder` from iree-dialects/Dialect/LinalgExt/Transforms/Utils.h and put…
		hanchungAuthorUnsubmitted Done Reply Inline Actions moved to Affine/Utils hanchung: moved to Affine/Utils

		rengolinUnsubmitted Done Reply Inline Actions typo: dimensions rengolin: typo: dimensions
		SmallVector<Operation *>
		getTiledImplementation(Operation *op, OpBuilder &b,
		ArrayRef<OpFoldResult> offsets,
		ArrayRef<OpFoldResult> sizes) const {
		auto packOp = cast<PackOp>(op);
		Location loc = packOp.getLoc();

		// The tiling is applied on interchanged dimensions. We have to undo the
		// interchange to map sizes and offsets to the original input.
		int64_t inputRank = packOp.getSourceRank();
		ArrayRef<int64_t> dimsToOuterBlock(packOp.getOuterDimsPerm());
		SmallVector<OpFoldResult> origOffsets(offsets.begin(), offsets.end());
		SmallVector<OpFoldResult> origSizes(sizes.begin(), sizes.end());
		if (!dimsToOuterBlock.empty()) {
		SmallVector<int64_t> inversedPerm =
		invertPermutationVector(dimsToOuterBlock);
		applyPermutationToVector<OpFoldResult>(origOffsets, inversedPerm);
		applyPermutationToVector<OpFoldResult>(origSizes, inversedPerm);
		}

		DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
		packOp.getDimAndTileMapping();
		SmallVector<OpFoldResult> srcDimValues =
		tensor::createDimValues(b, loc, packOp.getSource());
		SmallVector<OpFoldResult> inputIndices, inputSizes;
		for (auto dim : llvm::seq<int64_t>(0, inputRank)) {
		using AV = AffineValueExpr;
		cheliniUnsubmitted Done Reply Inline Actions (Optional) I find this method a bit too long. Maybe we should bring the lambdas out? chelini: (Optional) I find this method a bit too long. Maybe we should bring the lambdas out?
		AffineBuilder ab(b, loc);
		AffineExpr dim0, dim1, sym;
		bindDims(b.getContext(), dim0, dim1);
		bindSymbols(b.getContext(), sym);
		if (dimAndTileMapping.count(dim)) {
		// If the data dimension is tiled, the i-th index is the product of
		// offset_i and tile_i, and the i-th size is the product of sizes_i and
		// tile_i.
		auto avOffset = AV(dim0).bind(origOffsets[dim]);
		auto avSize = AV(dim0).bind(origSizes[dim]);
		auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
		inputIndices.push_back(ab.mul(avOffset, avTileSize));
		inputSizes.push_back(ab.mul(avSize, avTileSize));
		} else {
		inputIndices.push_back(origOffsets[dim]);
		inputSizes.push_back(origSizes[dim]);
		}

		// Limit the size of the input operand for incomplete tiles.
		OpFoldResult dimSize = srcDimValues[dim];
		auto avDimSize = AV(dim0).bind(dimSize);
		auto avInputIdx = AV(dim1).bind(inputIndices.back());
		inputSizes.back() =
		cheliniUnsubmitted Done Reply Inline Actions We can use an `ArrayRef` here. chelini: We can use an `ArrayRef` here.
		ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
		}

		auto oneAttr = b.getI64IntegerAttr(1);
		SmallVector<OpFoldResult> strides(inputRank, oneAttr);

		SmallVector<Value> tiledOperands;
		tiledOperands.push_back(b.create<ExtractSliceOp>(
		loc, packOp.getSource(), inputIndices, inputSizes, strides));

		SmallVector<OpFoldResult> outputOffsets, outputSizes;
		if (failed(getResultTilePosition(op, b, 0, offsets, sizes, outputOffsets,
		outputSizes)))
		return {};

		strides.append(packOp.getDestRank() - inputRank, oneAttr);
		auto extractSlice = b.create<ExtractSliceOp>(
		loc, packOp.getDest(), outputOffsets, outputSizes, strides);
		tiledOperands.push_back(extractSlice);

		if (auto val = packOp.getPaddingValue())
		tiledOperands.push_back(val);
		for (auto tile : packOp.getInnerTiles())
		tiledOperands.push_back(tile);

		Operation *tiledPackOp = b.create<PackOp>(
		loc, TypeRange{extractSlice.getType()}, tiledOperands, op->getAttrs());

		return {tiledPackOp};
		}

		LogicalResult
		getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
		ArrayRef<OpFoldResult> offsets,
		ArrayRef<OpFoldResult> sizes,
		SmallVector<OpFoldResult> &resultOffsets,
		SmallVector<OpFoldResult> &resultSizes) const {
		// The iteration domain is over outer dimensions of packed layout. In this
		// context, the outer dimensions of `resultOffsets` are `offsets`. The
		// inner dimensions of `resultOffsets` are zeros because tiling is not
		// applied to them.
		auto packOp = cast<PackOp>(op);
		int64_t inputRank = packOp.getSourceRank();
		int64_t outputRank = packOp.getDestRank();
		auto zeroAttr = b.getI64IntegerAttr(0);
		resultOffsets.assign(offsets.begin(), offsets.end());
		resultOffsets.append(outputRank - inputRank, zeroAttr);

		ReifiedRankedShapedTypeDims outputShape;
		(void)packOp.reifyResultShapes(b, outputShape);
		resultSizes.assign(sizes.begin(), sizes.end());
		cheliniUnsubmitted Done Reply Inline Actions single stmt body, drop braces? chelini: single stmt body, drop braces?
		for (auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
		resultSizes.push_back(getAsOpFoldResult(outputShape[0][dataTileDim]));

		return success();
		}
		};

} // namespace		} // namespace

Operation *tensor::bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp,		Operation *tensor::bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp,
ArrayRef<OpFoldResult> offsets,		ArrayRef<OpFoldResult> offsets,
ArrayRef<OpFoldResult> sizes,		ArrayRef<OpFoldResult> sizes,
bool generateZeroSliceGuard) {		bool generateZeroSliceGuard) {
// Only constant padding value supported.		// Only constant padding value supported.
Value padValue = padOp.getConstantPaddingValue();		Value padValue = padOp.getConstantPaddingValue();
if (!padValue)		if (!padValue)
		rengolinUnsubmitted Done Reply Inline Actions This seems to be created from line 209: tiledOperands.push_back(b.create<ExtractSliceOp>( loc, packOp.getDest(), outputOffsets, outputSizes, strides)); Then some push_backs, then this constant indexing. It's confusing. If it's always from the same op, then it would be much clearer if this was just: auto extractOp = b.create<ExtractSliceOp>(...); tiledOperands.push_back(extractOp); ... Operation tiledPackOp = b.create<PackOp>(loc, {extractOp.getType()}, tiledOperands, op->getAttrs()); rengolin:* This seems to be created from line 209: ``` tiledOperands.push_back(b.
		hanchungAuthorUnsubmitted Done Reply Inline Actions good point, done! hanchung: good point, done!
return nullptr;		return nullptr;

// Helper variables and functions for various arithmetic operations. These		// Helper variables and functions for various arithmetic operations. These
// are used extensively for computing new offset/length and padding values.		// are used extensively for computing new offset/length and padding values.
Location loc = padOp->getLoc();		Location loc = padOp->getLoc();
AffineExpr dim0, dim1;		AffineExpr dim0, dim1;
bindDims(b.getContext(), dim0, dim1);		bindDims(b.getContext(), dim0, dim1);
// Add two integers.		// Add two integers.
auto addMap = AffineMap::get(2, 0, {dim0 + dim1});		auto addMap = AffineMap::get(2, 0, {dim0 + dim1});
auto add = [&](Value v1, Value v2) {		auto add = [&](Value v1, Value v2) {
		cheliniUnsubmitted Not Done Reply Inline Actions I did not quite get this comment. Specifically this sentence: `In this context, the outer dimensions of result tile position is the same.` chelini: I did not quite get this comment. Specifically this sentence: `In this context, the outer…
return b.createOrFold<AffineApplyOp>(loc, addMap, ValueRange{v1, v2});		return b.createOrFold<AffineApplyOp>(loc, addMap, ValueRange{v1, v2});
};		};
// Subtract two integers.		// Subtract two integers.
auto subMap = AffineMap::get(2, 0, {dim0 - dim1});		auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
auto sub = [&](Value v1, Value v2) {		auto sub = [&](Value v1, Value v2) {
return b.createOrFold<AffineApplyOp>(loc, subMap, ValueRange{v1, v2});		return b.createOrFold<AffineApplyOp>(loc, subMap, ValueRange{v1, v2});
};		};
// Take the minimum of two integers.		// Take the minimum of two integers.
auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext());		auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext());
auto min = [&](Value v1, Value v2) {		auto min = [&](Value v1, Value v2) {
return b.createOrFold<AffineMinOp>(loc, idMap, ValueRange{v1, v2});		return b.createOrFold<AffineMinOp>(loc, idMap, ValueRange{v1, v2});
		cheliniUnsubmitted Not Done Reply Inline Actions I would just (void) here, like we did in line 119. `reifyResultShapes` does not fail. chelini: I would just (void) here, like we did in line 119. `reifyResultShapes` does not fail.
};		};
// Take the maximum of two integers.		// Take the maximum of two integers.
		cheliniUnsubmitted Not Done Reply Inline Actions Can you please add a comment here? It is not clear to me why we have these checks. chelini: Can you please add a comment here? It is not clear to me why we have these checks.
auto max = [&](Value v1, Value v2) {		auto max = [&](Value v1, Value v2) {
return b.createOrFold<AffineMaxOp>(loc, idMap, ValueRange{v1, v2});		return b.createOrFold<AffineMaxOp>(loc, idMap, ValueRange{v1, v2});
};		};
// Zero index-typed integer.		// Zero index-typed integer.
auto zero = b.create<arith::ConstantIndexOp>(loc, 0);		auto zero = b.create<arith::ConstantIndexOp>(loc, 0);

// Helper function for filling static/dynamic low/high padding indices		// Helper function for filling static/dynamic low/high padding indices
// vectors of PadOp.		// vectors of PadOp.
▲ Show 20 Lines • Show All 166 Lines • ▼ Show 20 Lines	Operation *tensor::bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp,
}		}
return createPadOfExtractSlice();		return createPadOfExtractSlice();
}		}

void mlir::tensor::registerTilingInterfaceExternalModels(		void mlir::tensor::registerTilingInterfaceExternalModels(
DialectRegistry &registry) {		DialectRegistry &registry) {
registry.addExtension(+[](MLIRContext ctx, TensorDialect dialect) {		registry.addExtension(+[](MLIRContext ctx, TensorDialect dialect) {
tensor::PadOp::attachInterface<PadOpTiling>(*ctx);		tensor::PadOp::attachInterface<PadOpTiling>(*ctx);
		tensor::PackOp::attachInterface<PackOpTiling>(*ctx);
});		});
}		}

mlir/test/Dialect/Tensor/tiling.mlir

This file was added.

				// RUN: mlir-opt %s -test-transform-dialect-interpreter -canonicalize -cse -split-input-file \| FileCheck %s

				// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)>
				// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * -32 + 128, 64)>
				// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> (d0 * -32 + 256, 128)>
				// CHECK: func.func @NC_to_NCnc
				// CHECK-SAME: %[[IN:.*]]: tensor<128x256xf32>,
				// CHECK-SAME: %[[OUT:.*]]: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> {
				// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
				// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
				// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
				// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
				// CHECK: %[[RES0:.]] = scf.for %[[N:.]] = %[[C0]] to %[[C4]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<4x8x32x32xf32>) {
				// CHECK: %[[RES1:.+]] = scf.for %[[C:.]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.]] = %[[ITER0]]) -> (tensor<4x8x32x32xf32>) {
				// CHECK-DAG: %[[IN_N:.+]] = affine.apply #[[MAP0]](%[[N]])
				// CHECK-DAG: %[[IN_N_SZ:.*]] = affine.min #[[MAP1]]
				// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]])
				// CHECK-DAG: %[[IN_C_SZ:.*]] = affine.min #[[MAP2]]
				// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_N]], %[[IN_C]]] [%[[IN_N_SZ]], %[[IN_C_SZ]]] [1, 1] : tensor<128x256xf32> to tensor<?x?xf32>
				// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[N]], %[[C]], 0, 0] [2, 4, 32, 32] [1, 1, 1, 1] : tensor<4x8x32x32xf32> to tensor<2x4x32x32xf32>
				// CHECK: %[[CAST_OUT:.*]] = tensor.cast %[[SUB_OUT]]
				// CHECK: %[[SUB_RES:.*]] = tensor.pack
				// CHECK-SAME: %[[SUB_IN]] inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %[[CAST_OUT]]
				// CHECK: %[[CAST_RES:.*]] = tensor.cast %[[SUB_RES]]
				// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[CAST_RES]] into %[[ITER1]]
				// CHECK: scf.yield %[[INSERT]] : tensor<4x8x32x32xf32>
				// CHECK: }
				// CHECK: scf.yield %[[RES1:.*]] : tensor<4x8x32x32xf32>
				// CHECK: }
				// CHECK: return %[[RES0:.*]] : tensor<4x8x32x32xf32>
				// CHECK: }
				func.func @NC_to_NCnc(%arg0: tensor<128x256xf32>, %arg1: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> {
				%0 = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %arg1 : tensor<128x256xf32> -> tensor<4x8x32x32xf32>
				return %0 : tensor<4x8x32x32xf32>
				}

				transform.sequence failures(propagate) {
				^bb0(%arg1: !pdl.operation):
				%0 = transform.structured.match ops{["tensor.pack"]} in %arg1
				%1, %loops:2 = transform.structured.tile_to_scf_for %0 [2, 4]
				}

				// -----

				// CHECK: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 8)>
				// CHECK: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * -8 + 256, 16)>
				// CHECK: func.func @KC_to_CKkc
				// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]:
				// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]:
				// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
				// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
				// CHECK-DAG: %[[C32:.+]] = arith.constant 32 : index
				// CHECK: scf.for %[[C:.+]] = %[[C0]] to %[[C32]] step %[[C2]]
				// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]])
				// CHECK-DAG: %[[IN_C_SZ:.+]] = affine.min #[[MAP1]](%[[C]])
				// CHECK: %[[INPUT_SLICE:.+]] = tensor.extract_slice %[[IN]]
				// CHECK-SAME: [0, %[[IN_C]]] [128, %[[IN_C_SZ]]]
				// CHECK: %[[CAST_IN:.+]] = tensor.cast %[[INPUT_SLICE]]
				// CHECK: %[[OUTPUT_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[C]], 0, 0, 0] [2, 4, 32, 8]
				// CHECK: %[[CAST_OUT:.+]] = tensor.cast %[[OUTPUT_SLICE]]
				// CHECK: tensor.pack
				// CHECK-SAME: %[[CAST_IN]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8]
				// CHECK-SAME: into %[[CAST_OUT]]
				func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) -> tensor<32x4x32x8xf32> {
				%0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32>
				return %0 : tensor<32x4x32x8xf32>
				}

				transform.sequence failures(propagate) {
				^bb0(%arg1: !pdl.operation):
				%0 = transform.structured.match ops{["tensor.pack"]} in %arg1
				%1, %loops:2 = transform.structured.tile_to_scf_for %0 [2, 4]
				}

				// -----

				// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 2)>
				// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * -2 + 15, 8)>
				// CHECK: func.func @pad_and_pack_static(
				// CHECK-SAME: %[[IN:.*]]: tensor<13x15xf32>,
				// CHECK-SAME: %[[OUT:.*]]: tensor<2x8x8x2xf32>,
				// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor<2x8x8x2xf32> {
				// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
				// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
				// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
				// CHECK-DAG: %[[RES0:.]] = scf.for %[[J:.]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[OUT]]) -> (tensor<2x8x8x2xf32>) {
				// CHECK-DAG: %[[IN_J:.*]] = affine.apply #[[MAP0]](%[[J]])
				// CHECK-DAG: %[[IN_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])
				// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][0, %[[IN_J]]] [13, %[[IN_J_SZ]]] [1, 1]
				// CHECK: %[[CAST_IN:.*]] = tensor.cast %[[SUB_IN]]
				// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][0, %[[J]], 0, 0] [2, 4, 8, 2] [1, 1, 1, 1]
				// CHECK: %[[CAST_OUT:.*]] = tensor.cast %[[SUB_OUT]]
				// CHECK: %[[SUB_RES:.*]] = tensor.pack
				// CHECK-SAME: %[[CAST_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2]
				// CHECK-SAME: into %[[CAST_OUT]]
				// CHECK: %[[CAST_RES:.*]] = tensor.cast %[[SUB_RES]]
				// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[CAST_RES]] into %[[ITER1]]
				// CHECK: scf.yield %[[INSERT]] : tensor<2x8x8x2xf32>
				// CHECK: }
				// CHECK: return %[[RES0:.*]] : tensor<2x8x8x2xf32>
				// CHECK: }
				func.func @pad_and_pack_static(%input: tensor<13x15xf32>, %output: tensor<2x8x8x2xf32>, %pad: f32) -> tensor<2x8x8x2xf32> {
				%0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
				return %0 : tensor<2x8x8x2xf32>
				}

				transform.sequence failures(propagate) {
				^bb0(%arg1: !pdl.operation):
				%0 = transform.structured.match ops{["tensor.pack"]} in %arg1
				%1, %loops:2 = transform.structured.tile_to_scf_for %0 [2, 4]
				}

				// -----

				// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
				// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
				// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> (d0 * 8)>
				// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -8 + s0, d0 * 8)>
				// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 * 2)>
				// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -2 + s0, d0 * 2)>
				// CHECK: func.func @pad_and_pack_partially_dynamic(
				// CHECK-SAME: %[[IN:.*]]: tensor<?x?xf32>,
				// CHECK-SAME: %[[OUT:.*]]: tensor<?x?x8x2xf32>,
				// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor<?x?x8x2xf32> {
				// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
				// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
				// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
				// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
				// CHECK-DAG: %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor<?x?x8x2xf32>
				// CHECK-DAG: %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor<?x?x8x2xf32>
				// CHECK: %[[RES0:.]] = scf.for %[[I:.]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<?x?x8x2xf32>) {
				// CHECK-DAG: %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]]
				// CHECK: %[[RES1:.]] = scf.for %[[J:.]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<?x?x8x2xf32>) {
				// CHECK-DAG: %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]]
				// CHECK-DAG: %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]])
				// CHECK-DAG: %[[IN_I_SZ:.*]] = affine.min #[[MAP3]]
				// CHECK-DAG: %[[IN_J:.*]] = affine.apply #[[MAP4]](%[[J]])
				// CHECK-DAG: %[[IN_J_SZ:.*]] = affine.min #[[MAP5]]
				// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
				// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], 8, 2] [1, 1, 1, 1] : tensor<?x?x8x2xf32> to tensor<?x?x8x2xf32>
				// CHECK: %[[SUB_RES:.*]] = tensor.pack
				// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2]
				// CHECK-SAME: into %[[SUB_OUT]]
				// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]]
				// CHECK: scf.yield %[[INSERT]] : tensor<?x?x8x2xf32>
				// CHECK: }
				// CHECK: scf.yield %[[RES1:.*]] : tensor<?x?x8x2xf32>
				// CHECK: }
				// CHECK: return %[[VAL_34:.*]] : tensor<?x?x8x2xf32>
				// CHECK: }
				func.func @pad_and_pack_partially_dynamic(%input: tensor<?x?xf32>, %output: tensor<?x?x8x2xf32>, %pad: f32) -> tensor<?x?x8x2xf32> {
				%0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
				return %0 : tensor<?x?x8x2xf32>
				}

				transform.sequence failures(propagate) {
				^bb0(%arg1: !pdl.operation):
				%0 = transform.structured.match ops{["tensor.pack"]} in %arg1
				%1, %loops:2 = transform.structured.tile_to_scf_for %0 [2, 4]
				}

				// -----

				// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
				// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
				// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (d0 * s0)>
				// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s0, -(d1 * s0) + s1)>
				// CHECK: func.func @pad_and_pack_fully_dynamic(
				// CHECK-SAME: %[[IN:.*]]: tensor<?x?xf32>,
				// CHECK-SAME: %[[OUT:.*]]: tensor<?x?x?x?xf32>,
				// CHECK-SAME: %[[PAD:.*]]: f32,
				// CHECK-SAME: %[[TILE_0:.*]]: index,
				// CHECK-SAME: %[[TILE_1:.*]]: index) -> tensor<?x?x?x?xf32> {
				// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
				// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
				// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
				// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
				// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
				// CHECK-DAG: %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor<?x?x?x?xf32>
				// CHECK-DAG: %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor<?x?x?x?xf32>
				// CHECK: %[[RES0:.]] = scf.for %[[I:.]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<?x?x?x?xf32>) {
				// CHECK: %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]]
				// CHECK: %[[RES1:.]] = scf.for %[[J:.]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<?x?x?x?xf32>) {
				// CHECK: %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]]
				// CHECK: %[[IN_D0:.*]] = tensor.dim %[[IN]], %[[C0]]
				// CHECK: %[[IN_D1:.*]] = tensor.dim %[[IN]], %[[C1]]
				// CHECK: %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]])[%[[TILE_0]]]
				// CHECK: %[[IN_I_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_I_SZ]], %[[I]])[%[[TILE_0]], %[[IN_D0]]]
				// CHECK: %[[IN_J:.*]] = affine.apply #[[MAP2]](%[[J]])[%[[TILE_1]]]
				// CHECK: %[[IN_J_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_J_SZ]], %[[J]])[%[[TILE_1]], %[[IN_D1]]]
				// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
				// CHECK: %[[OUT_D2:.+]] = tensor.dim %[[OUT]], %[[C2]]
				// CHECK: %[[OUT_D3:.+]] = tensor.dim %[[OUT]], %[[C3]]
				// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], %[[OUT_D2]], %[[OUT_D3]]] [1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
				// CHECK: %[[PACK:.*]] = tensor.pack
				// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [%[[TILE_0]], %[[TILE_1]]]
				// CHECK-SAME: into %[[SUB_OUT]]
				// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[PACK]] into %[[ITER1]]
				// CHECK: scf.yield %[[INSERT]] : tensor<?x?x?x?xf32>
				// CHECK: }
				// CHECK: scf.yield %[[RES1:.*]] : tensor<?x?x?x?xf32>
				// CHECK: }
				// CHECK: return %[[RES0:.*]] : tensor<?x?x?x?xf32>
				// CHECK: }
				func.func @pad_and_pack_fully_dynamic(%source: tensor<?x?xf32>, %dest: tensor<?x?x?x?xf32>, %pad: f32, %tile_n : index, %tile_m : index) -> tensor<?x?x?x?xf32> {
				%0 = tensor.pack %source padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
				return %0 : tensor<?x?x?x?xf32>
				}

				transform.sequence failures(propagate) {
				^bb0(%arg1: !pdl.operation):
				%0 = transform.structured.match ops{["tensor.pack"]} in %arg1
				%1, %loops:2 = transform.structured.tile_to_scf_for %0 [2, 4]
				}

utils/bazel/llvm-project-overlay/mlir/BUILD.bazel

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 5,367 Lines • ▼ Show 20 Lines

	cc_library(			cc_library(
	name = "TensorTilingInterfaceImpl",			name = "TensorTilingInterfaceImpl",
	srcs = ["lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp"],			srcs = ["lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp"],
	hdrs = ["include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"],			hdrs = ["include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"],
	includes = ["include"],			includes = ["include"],
	deps = [			deps = [
	":AffineDialect",			":AffineDialect",
				":AffineUtils",
	":ArithUtils",			":ArithUtils",
				":DialectUtils",
	":IR",			":IR",
	":LinalgDialect",			":LinalgDialect",
	":SCFDialect",			":SCFDialect",
	":TensorDialect",			":TensorDialect",
				":TensorUtils",
	":TilingInterface",			":TilingInterface",
	"//llvm:Support",			"//llvm:Support",
	],			],
	)			)

	cc_library(			cc_library(
	name = "TensorUtils",			name = "TensorUtils",
	srcs = ["lib/Dialect/Tensor/Utils/Utils.cpp"],			srcs = ["lib/Dialect/Tensor/Utils/Utils.cpp"],
	▲ Show 20 Lines • Show All 5,024 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][tensor] Implement TilingInterface for tensor.pack op.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 480234

mlir/include/mlir/Dialect/Affine/Utils.h

mlir/lib/Dialect/Tensor/IR/CMakeLists.txt

mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp

mlir/test/Dialect/Tensor/tiling.mlir

utils/bazel/llvm-project-overlay/mlir/BUILD.bazel

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][tensor] Implement TilingInterface for tensor.pack op.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 480234

mlir/include/mlir/Dialect/Affine/Utils.h

mlir/lib/Dialect/Tensor/IR/CMakeLists.txt

mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp

mlir/test/Dialect/Tensor/tiling.mlir

utils/bazel/llvm-project-overlay/mlir/BUILD.bazel

[mlir][tensor] Implement TilingInterface for tensor.pack op.
ClosedPublic