Diff 338006

mlir/include/mlir/Conversion/VectorToSCF/GradualVectorToSCF.h

This file was added.

				//===- GradualVectorToSCF.h - Convert vector to SCF dialect ------ C++ --===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//

				#ifndef MLIR_CONVERSION_VECTORTOSCF_GRADUALVECTORTOSCF_H_
				#define MLIR_CONVERSION_VECTORTOSCF_GRADUALVECTORTOSCF_H_

				#include "mlir/IR/PatternMatch.h"

				namespace mlir {
				class MLIRContext;
				class Pass;
				class RewritePatternSet;

				/// When lowering an N-d vector transfer op to an (N-1)-d vector transfer op,
				/// a temporary buffer is created through which individual (N-1)-d vector are
				/// staged. This pattern can be applied multiple time, until the transfer op
				/// is 1-d.
				/// This is consistent with the lack of an LLVM instruction to dynamically
				/// index into an aggregate (see the Vector dialect lowering to LLVM deep dive).
				///
				/// An instruction such as:
				/// ```
				/// vector.transfer_write %vec, %A[%a, %b, %c] :
				/// vector<9x17x15xf32>, memref<?x?x?xf32>
				/// ```
				/// Lowers to pseudo-IR resembling (unpacking one dimension):
				/// ```
				/// %0 = alloca() : memref<vector<9x17x15xf32>>
				/// store %vec, %0[] : memref<vector<9x17x15xf32>>
				/// %1 = vector.type_cast %0 :
				/// memref<vector<9x17x15xf32>> to memref<9xvector<17x15xf32>>
				/// affine.for %I = 0 to 9 {
				/// %dim = dim %A, 0 : memref<?x?x?xf32>
				/// %add = affine.apply %I + %a
				/// %cmp = cmpi "slt", %add, %dim : index
				/// scf.if %cmp {
				/// %vec_2d = load %1[%I] : memref<9xvector<17x15xf32>>
				/// vector.transfer_write %vec_2d, %A[%add, %b, %c] :
				/// vector<17x15xf32>, memref<?x?x?xf32>
				/// ```
				///
				/// When applying the pattern a second time, the existing alloca() operation
				/// is reused and only a second vector.type_cast is added.

				/// Collect a set of patterns to convert from the Vector dialect to SCF + std.
				void populateGradualVectorToSCFConversionPatterns(RewritePatternSet &patterns);

				/// Create a pass to convert a subset of vector ops to SCF.
				std::unique_ptr<Pass> createGradualConvertVectorToSCFPass();

				} // namespace mlir

				#endif // MLIR_CONVERSION_VECTORTOSCF_GRADUALVECTORTOSCF_H_

mlir/lib/Conversion/VectorToSCF/GradualVectorToSCF.cpp

This file was added.

				//===- GradualVectorToSCF.h - Convert vector to SCF dialect ------ C++ --===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				//
				// This file implements lowering of vector transfer operations to SCF.
				//
				//===----------------------------------------------------------------------===//

				#include <type_traits>

				#include "mlir/Conversion/VectorToSCF/GradualVectorToSCF.h"

				#include "../PassDetail.h"
				#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
				#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
				#include "mlir/Dialect/SCF/EDSC/Intrinsics.h"
				#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
				#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
				#include "mlir/Dialect/Vector/VectorOps.h"
				#include "mlir/Dialect/Vector/VectorUtils.h"
				#include "mlir/IR/Builders.h"
				#include "mlir/Pass/Pass.h"
				#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
				#include "mlir/Transforms/Passes.h"

				using namespace mlir;
				using namespace mlir::edsc;
				using namespace mlir::edsc::intrinsics;
				using vector::TransferReadOp;
				using vector::TransferWriteOp;

				namespace {

				/// Attribute name used for labeling transfer ops during gradual lowering.
				static const char kPassLabel[] = "__scf_lowering__";
				nicolasvasilacheUnsubmitted Not Done Reply Inline Actions `__vector_to_scf_lowering__` ? I imagine there could be some collision at some future point in time. nicolasvasilache: `__vector_to_scf_lowering__` ? I imagine there could be some collision at some future point in…

				/// Lower to 1D transfer ops. Target-specific lowering will lower those.
				static const int64_t kTargetRank = 1;

				/// Given a MemRefType with VectorType element type, unpack one dimension from
				/// the VectorType into the MemRefType.
				///
				/// E.g.: memref<9xvector<5x6xf32>> --> memref<9x5xvector<6xf32>>
				static MemRefType unpackOneDim(MemRefType type) {
				auto vectorType = type.getElementType().dyn_cast<VectorType>();
				auto memrefShape = type.getShape();
				SmallVector<int64_t, 8> newMemrefShape;
				newMemrefShape.append(memrefShape.begin(), memrefShape.end());
				newMemrefShape.push_back(vectorType.getDimSize(0));
				return MemRefType::get(newMemrefShape,
				VectorType::get(vectorType.getShape().drop_front(),
				vectorType.getElementType()));
				}

				// TODO: Parallelism and threadlocal considerations.
				static Value setAllocAtFunctionEntry(MemRefType type, Operation *op) {
				auto &b = ScopedContext::getBuilderRef();
				OpBuilder::InsertionGuard guard(b);
				Operation *scope =
				op->getParentWithTrait<OpTrait::AutomaticAllocationScope>();
				assert(scope && "Expected op to be inside automatic allocation scope");
				b.setInsertionPointToStart(&scope->getRegion(0).front());
				Value res = memref_alloca(type);
				return res;
				}

				/// Given a vector transfer op, calculate which dimension of the `source`
				/// memref should be unpacked in the next application of TransferOpConversion.
				template <typename OpTy>
				static int64_t unpackedDim(OpTy xferOp) {
				return xferOp.getShapedType().getRank() - xferOp.getVectorType().getRank();
				}

				/// Calculate the indicies for the new vector transfer op.
				nicolasvasilacheUnsubmitted Done Reply Inline Actions typo `indices` nicolasvasilache: typo `indices`
				///
				/// E.g.: transfer_read %A[%a, %b, %c, %d] ... : vector<5x4x3xf32> ...
				/// --> transfer_read %A[%a, %b + iv, %c, %d] ... vector<4x3f32>
				/// ^^^^^^
				/// `iv` is the iteration variable of the (new) surrounding loop.
				template <typename OpTy>
				static void getXferIndices(OpTy xferOp, Value iv,
				SmallVector<Value, 8> &indices) {
				using namespace mlir::edsc::op;
				typename OpTy::Adaptor adaptor(xferOp);
				// Corresponding memref dim of the vector dim that is unpacked.
				auto dim = unpackedDim(xferOp);
				auto prevIndices = adaptor.indices();
				indices.append(prevIndices.begin(), prevIndices.end());
				indices[dim] = adaptor.indices()[dim] + iv;
				nicolasvasilacheUnsubmitted Done Reply Inline Actions you could just do: using mlir::edsc::op::operator+; right above this line. nicolasvasilache: you could just do: ``` using mlir::edsc::op::operator+; ``` right above this line.
				}

				/// Generate an in-bounds check if the transfer op on the to-be-unpacked
				/// dimension may go out-of-bounds.
				template <typename OpTy>
				static void generateInBoundsCheck(
				OpTy xferOp, Value iv, PatternRewriter &rewriter,
				function_ref<void(OpBuilder &, Location)> inBoundsCase,
				function_ref<void(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
				// Corresponding memref dim of the vector dim that is unpacked.
				auto dim = unpackedDim(xferOp);

				if (!xferOp.isDimInBounds(0)) {
				using namespace mlir::edsc::op;
				auto memrefDim = memref_dim(xferOp.source(), std_constant_index(dim));
				auto memrefIdx = xferOp.indices()[dim] + iv;
				auto cond = std_cmpi_sgt(memrefDim.value, memrefIdx);
				rewriter.create<scf::IfOp>(
				xferOp.getLoc(), cond,
				[&](OpBuilder &builder, Location loc) {
				inBoundsCase(builder, loc);
				builder.create<scf::YieldOp>(xferOp.getLoc());
				},
				[&](OpBuilder &builder, Location loc) {
				if (outOfBoundsCase) {
				outOfBoundsCase(builder, loc);
				}
				builder.create<scf::YieldOp>(xferOp.getLoc());
				});
				} else {
				// No runtime check needed if dim is guaranteed to be in-bounds.
				inBoundsCase(rewriter, xferOp.getLoc());
				}
				}

				/// Given an ArrayAttr, return a copy where the first element is dropped.
				static ArrayAttr dropFirstElem(PatternRewriter &rewriter, ArrayAttr attr) {
				if (!attr) {
				nicolasvasilacheUnsubmitted Done Reply Inline Actions in MLIR we omit trivial braces. nicolasvasilache: in MLIR we omit trivial braces.
				return attr;
				}
				return ArrayAttr::get(rewriter.getContext(), attr.getValue().drop_front());
				}

				/// Codegen strategy, depending on the operation.
				template <typename OpTy>
				struct Strategy;

				/// Code strategy for vector TransferReadOp.
				template <>
				struct Strategy<TransferReadOp> {
				/// Find the StoreOp that is used for writing the current TransferReadOp's
				/// result to the temporary buffer allocation.
				static memref::StoreOp getStoreOp(TransferReadOp xferOp) {
				assert(xferOp->hasOneUse() && "Expected exactly one use of TransferReadOp");
				auto storeOp = dyn_cast<memref::StoreOp>((*xferOp->use_begin()).getOwner());
				assert(storeOp && "Expected TransferReadOp result used by StoreOp");
				return storeOp;
				}

				/// Find the temporary buffer allocation. All labeled TransferReadOps are
				/// used like this, where %buf is either the buffer allocation or a type cast
				/// of the buffer allocation:
				/// ```
				/// %vec = vector.transfer_read ... { __scf_lowering__ } ...
				/// memref.store %vec, %buf[...] ...
				/// ```
				static Value getBuffer(TransferReadOp xferOp) {
				return getStoreOp(xferOp).getMemRef();
				}

				/// Retrieve the indices of the current StoreOp.
				static void getStoreIndices(TransferReadOp xferOp,
				SmallVector<Value, 8> &indices) {
				auto storeOp = getStoreOp(xferOp);
				auto prevIndices = memref::StoreOpAdaptor(storeOp).indices();
				indices.append(prevIndices.begin(), prevIndices.end());
				}

				/// Rewrite the TransferReadOp, assuming that there are no out-of-bounds
				/// accesses on the to-be-unpacked dimension.
				///
				/// 1. Generate a new (N-1)-d TransferReadOp using the loop iteration
				/// variable `iv`.
				/// 2. Store the result into the (already `vector.type_cast`ed) buffer.
				///
				/// E.g.:
				/// ```
				/// %vec = vector.transfer_read %A[%a+%i, %b, %c], %cst
				/// : memref<?x?x?xf32>, vector<4x3xf32>
				/// memref.store %vec, %buf[%i] : memref<5xvector<4x3xf32>>
				/// ```
				/// Is rewritten to:
				/// ```
				/// %casted = vector.type_cast %buf
				/// : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
				/// for %j = 0 to 4 {
				/// %vec = vector.transfer_read %A[%a+%i, %b+%j, %c], %cst
				/// : memref<?x?x?xf32>, vector<3xf32>
				/// memref.store %vec, %casted[%i, %j] : memref<5x4xvector<3xf32>>
				/// }
				/// ```
				///
				/// Note: The loop and type cast are generated in TransferOpConversion.
				/// The original TransferReadOp and store op are deleted in `cleanup`.
				static void rewriteOp(PatternRewriter &rewriter, TransferReadOp xferOp,
				Value buffer, Value iv) {
				SmallVector<Value, 8> storeIndices;
				getStoreIndices(xferOp, storeIndices);
				storeIndices.push_back(iv);

				SmallVector<Value, 8> xferIndices;
				getXferIndices(xferOp, iv, xferIndices);

				auto bufferType = buffer.getType().dyn_cast<ShapedType>();
				auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
				auto map = getTransferMinorIdentityMap(xferOp.getShapedType(), vecType);
				auto inBoundsAttr = dropFirstElem(rewriter, xferOp.in_boundsAttr());
				auto newXfer = vector_transfer_read(vecType, xferOp.source(), xferIndices,
				AffineMapAttr::get(map),
				xferOp.padding(), Value(), inBoundsAttr)
				.value;

				if (vecType.getRank() > kTargetRank)
				newXfer.getDefiningOp()->setAttr(kPassLabel, rewriter.getUnitAttr());

				memref_store(newXfer, buffer, storeIndices);
				}

				/// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write
				/// padding value to the temporary buffer.
				static void handleOutOfBoundsDim(PatternRewriter &rewriter,
				TransferReadOp xferOp, Value buffer,
				Value iv) {
				SmallVector<Value, 8> storeIndices;
				getStoreIndices(xferOp, storeIndices);
				storeIndices.push_back(iv);

				auto bufferType = buffer.getType().dyn_cast<ShapedType>();
				auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
				auto vec = std_splat(vecType, xferOp.padding());
				memref_store(vec, buffer, storeIndices);
				}

				/// Cleanup after rewriting the op.
				static void cleanup(PatternRewriter &rewriter, TransferReadOp xferOp) {
				rewriter.eraseOp(getStoreOp(xferOp));
				rewriter.eraseOp(xferOp);
				}
				};

				/// Codegen strategy for vector TransferWriteOp.
				template <>
				struct Strategy<TransferWriteOp> {
				/// Find the temporary buffer allocation. All labeled TransferWriteOps are
				/// used like this, where %buf is either the buffer allocation or a type cast
				/// of the buffer allocation:
				/// ```
				/// %vec = memref.load %buf[...] ...
				/// vector.transfer_write %vec ... { __scf_lowering__ } ...
				/// ```
				static Value getBuffer(TransferWriteOp xferOp) {
				auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>();
				assert(loadOp && "Expected transfer op vector produced by LoadOp");
				return loadOp.getMemRef();
				}

				/// Retrieve the indices of the current LoadOp.
				static void getLoadIndices(TransferWriteOp xferOp,
				SmallVector<Value, 8> &indices) {
				auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>();
				auto prevIndices = memref::LoadOpAdaptor(loadOp).indices();
				indices.append(prevIndices.begin(), prevIndices.end());
				}

				/// Rewrite the TransferWriteOp, assuming that there are no out-of-bounds
				/// accesses on the to-be-unpacked dimension.
				///
				/// 1. Load an (N-1)-d vector from the (already `vector.type_cast`ed) buffer,
				/// using the loop iteration variable `iv`.
				/// 2. Generate a new (N-1)-d TransferWriteOp, writing the loaded vector back
				/// to memory.
				///
				/// Note: For more details, see comments on Strategy<TransferReadOp>.
				static void rewriteOp(PatternRewriter &rewriter, TransferWriteOp xferOp,
				Value buffer, Value iv) {
				SmallVector<Value, 8> loadIndices;
				getLoadIndices(xferOp, loadIndices);
				loadIndices.push_back(iv);

				SmallVector<Value, 8> xferIndices;
				getXferIndices(xferOp, iv, xferIndices);

				auto vec = memref_load(buffer, loadIndices);
				auto vecType = vec.value.getType().dyn_cast<VectorType>();
				auto map = getTransferMinorIdentityMap(xferOp.getShapedType(), vecType);
				auto inBoundsAttr = dropFirstElem(rewriter, xferOp.in_boundsAttr());
				auto newXfer =
				vector_transfer_write(Type(), vec, xferOp.source(), xferIndices,
				AffineMapAttr::get(map), Value(), inBoundsAttr);

				if (vecType.getRank() > kTargetRank)
				newXfer.op->setAttr(kPassLabel, rewriter.getUnitAttr());
				}

				/// Handle out-of-bounds accesses on the to-be-unpacked dimension.
				static void handleOutOfBoundsDim(PatternRewriter &rewriter,
				TransferWriteOp xferOp, Value buffer,
				Value iv) {}

				/// Cleanup after rewriting the op.
				static void cleanup(PatternRewriter &rewriter, TransferWriteOp xferOp) {
				rewriter.eraseOp(xferOp);
				}
				};

				/// Prepare a TransferReadOp for gradual lowering.
				///
				/// 1. Allocate a temporary buffer.
				/// 2. Label the TransferReadOp, marking it eligible for gradual lowering.
				/// 3. Store the result of the TransferReadOp into the temporary buffer.
				/// 4. Load the result from the temporary buffer and replace all uses of the
				/// original TransferReadOp with this load.
				///
				/// E.g.:
				/// ```
				/// %vec = vector.transfer_read %A[%a, %b, %c], %cst
				/// : vector<5x4xf32>, memref<?x?x?xf32>
				/// ```
				/// is rewritten to:
				/// ```
				/// %0 = memref.alloca() : memref<vector<5x4xf32>>
				/// %1 = vector.transfer_read %A[%a, %b, %c], %cst { __scf_lowering__ }
				/// : vector<5x4xf32>, memref<?x?x?xf32>
				/// memref.store %1, %0[] : memref<vector<5x4xf32>>
				/// %vec = memref.load %0[] : memref<vector<5x4xf32>>
				/// ```
				struct PrepareTransferReadConversion : public OpRewritePattern<TransferReadOp> {
				using OpRewritePattern<TransferReadOp>::OpRewritePattern;

				LogicalResult matchAndRewrite(TransferReadOp xferOp,
				PatternRewriter &rewriter) const override {
				if (xferOp->hasAttr(kPassLabel))
				return failure();
				if (xferOp.getVectorType().getRank() <= kTargetRank)
				return failure();
				if (xferOp.mask())
				return failure();
				if (!xferOp.permutation_map().isIdentity())
				return failure();

				ScopedContext scope(rewriter, xferOp.getLoc());
				auto allocType = MemRefType::get({}, xferOp.getVectorType());
				auto buffer = setAllocAtFunctionEntry(allocType, xferOp);
				auto newXfer = xferOp.clone();
				nicolasvasilacheUnsubmitted Not Done Reply Inline Actions same comment re. inplace op update nicolasvasilache: same comment re. inplace op update
				springermAuthorUnsubmitted Done Reply Inline Actions There is no `rewriter.replaceAllUsesWith`. I couldn't find a way to replace all uses without cloning/creating a new op. `Value.replaceAllUsesWith` seems dangerous because it does not go through `rewriter`. springerm: There is no `rewriter.replaceAllUsesWith`. I couldn't find a way to replace all uses without…
				nicolasvasilacheUnsubmitted Not Done Reply Inline Actions If you do inplace update (i.e. to a first good approximation: if your return types don't change), then you just update in place and there is no need to replace anything: the use-def chains are already connected and don't change. So it's both shorter code and more efficient :) nicolasvasilache: If you do inplace update (i.e. to a first good approximation: if your return types don't…
				newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
				rewriter.insert(newXfer);
				memref_store(newXfer, buffer);
				rewriter.replaceOpWithNewOp<memref::LoadOp>(xferOp, buffer);

				return success();
				}
				};

				/// Prepare a TransferWriteOp for gradual lowering.
				///
				/// 1. Allocate a temporary buffer.
				/// 2. Store the vector into the buffer.
				/// 3. Load the vector from the buffer again.
				/// 4. Use the loaded vector as a TransferWriteOp operand and label the op,
				/// marking it eligible for gradual lowering via TransferOpConversion.
				///
				/// E.g.:
				/// ```
				/// vector.transfer_write %vec, %A[%a, %b, %c]
				/// : vector<5x4xf32>, memref<?x?x?xf32>
				/// ```
				/// is rewritten to:
				/// ```
				/// %0 = memref.alloca() : memref<vector<5x4xf32>>
				/// memref.store %vec, %0[] : memref<vector<5x4xf32>>
				/// %1 = memref.load %0[] : memref<vector<5x4xf32>>
				/// vector.transfer_write %1, %A[%a, %b, %c] { __scf_lowering__ }
				/// : vector<5x4xf32>, memref<?x?x?xf32>
				/// ```
				struct PrepareTransferWriteConversion
				: public OpRewritePattern<TransferWriteOp> {
				using OpRewritePattern<TransferWriteOp>::OpRewritePattern;

				LogicalResult matchAndRewrite(TransferWriteOp xferOp,
				PatternRewriter &rewriter) const override {
				if (xferOp->hasAttr(kPassLabel))
				return failure();
				if (xferOp.getVectorType().getRank() <= kTargetRank)
				return failure();
				if (xferOp.mask())
				return failure();
				if (!xferOp.permutation_map().isIdentity())
				return failure();

				ScopedContext scope(rewriter, xferOp.getLoc());
				auto allocType = MemRefType::get({}, xferOp.getVectorType());
				auto buffer = setAllocAtFunctionEntry(allocType, xferOp);
				memref_store(xferOp.vector(), buffer);
				auto loadedVec = memref_load(buffer);

				auto newXfer = xferOp.clone();
				nicolasvasilacheUnsubmitted Done Reply Inline Actions This is usually considered dangerous and we prefer `rewriter.clone` to keep track of stuff. But this goes away if you use root updates. nicolasvasilache: This is usually considered dangerous and we prefer `rewriter.clone` to keep track of stuff. But…
				newXfer.vectorMutable().assign(loadedVec);
				nicolasvasilacheUnsubmitted Done Reply Inline Actions If types don't change, we usually prefer: rewriter.startRootUpdate(xferOp); ... // updates rewriter.finalizeRootUpdate(xferOp); an so no need to clone, erase ops etc. nicolasvasilache: If types don't change, we usually prefer: ``` rewriter.startRootUpdate(xferOp); ... // updates…
				newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
				rewriter.insert(newXfer);
				nicolasvasilacheUnsubmitted Done Reply Inline Actions This is usually considered dangerous and we prefer `rewriter.clone` to keep track of stuff. But this goes away if you use root updates. nicolasvasilache: This is usually considered dangerous and we prefer `rewriter.clone` to keep track of stuff. But…
				rewriter.eraseOp(xferOp);

				return success();
				nicolasvasilacheUnsubmitted Done Reply Inline Actions The 2 paths are almost identical code, can we create a single templated pattern ? nicolasvasilache: The 2 paths are almost identical code, can we create a single templated pattern ?
				springermAuthorUnsubmitted Done Reply Inline Actions You mean `PrepareTransferWriteConversion` and `PrepareTransferReadConversion`? What's identical is mostly the checks in the beginning of the function. I put those in a separate function. springerm: You mean `PrepareTransferWriteConversion` and `PrepareTransferReadConversion`? What's identical…
				nicolasvasilacheUnsubmitted Not Done Reply Inline Actions I was also thinking about some of the helper stuff: ScopedContext scope(rewriter, xferOp.getLoc()); auto allocType = MemRefType::get({}, xferOp.getVectorType()); auto buffer = setAllocAtFunctionEntry(allocType, xferOp); Seems like we could have something like: template <OpType> struct PrepareTransferConversion : public OpRewritePattern<OpType> { using OpRewritePattern<OpType>::OpRewritePattern; PrepareTransferConversion(Lambda doit) : _doit(doit) {} LogicalResult matchAndRewrite(OpType xferOp, PatternRewriter &rewriter) const override { if (xferOp->hasAttr(kPassLabel)) return failure(); if (xferOp.getVectorType().getRank() <= kTargetRank) return failure(); if (xferOp.mask()) return failure(); if (!xferOp.permutation_map().isIdentity()) return failure(); ScopedContext scope(rewriter, xferOp.getLoc()); auto allocType = MemRefType::get({}, xferOp.getVectorType()); auto buffer = setAllocAtFunctionEntry(allocType, xferOp); // ... other common things _doit(...); } If that sounds too convoluted feel free to ignore. nicolasvasilache: I was also thinking about some of the helper stuff: ``` ScopedContext scope(rewriter…
				}
				};

				/// Gradual lowering of vector transfer ops: Unpack one dimension.
				///
				/// 1. Unpack one dimension from the current buffer type and cast the buffer
				/// to that new type. E.g.:
				/// ```
				/// %vec = memref.load %0[%1] : memref<5xvector<4x3xf32>>
				/// vector.transfer_write %vec ...
				/// ```
				/// The following cast is generated:
				/// ```
				/// %casted = vector.type_cast %0
				/// : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
				/// ```
				/// 2. Generate a for loop and rewrite the transfer op according to the
				/// corresponding Strategy<OpTy>. If the to-be-unpacked dimension can be
				/// out-of-bounds, generate an if-check and handle both cases separately.
				/// 3. Clean up according to the corresponding Strategy<OpTy>.
				template <typename OpTy>
				struct TransferOpConversion : public OpRewritePattern<OpTy> {
				using OpRewritePattern<OpTy>::OpRewritePattern;

				LogicalResult matchAndRewrite(OpTy xferOp,
				PatternRewriter &rewriter) const override {
				if (!xferOp->hasAttr(kPassLabel))
				return failure();

				ScopedContext scope(rewriter, xferOp.getLoc());
				// How the buffer can be found depends on OpTy.
				auto buffer = Strategy<OpTy>::getBuffer(xferOp);
				auto bufferType = buffer.getType().template dyn_cast<MemRefType>();
				auto castedType = unpackOneDim(bufferType);
				auto casted = vector_type_cast(castedType, buffer);

				auto lb = std_constant_index(0).value;
				auto ub = std_constant_index(castedType.getDimSize(0)).value;
				affineLoopBuilder(lb, ub, 1, [&](Value iv) {
				generateInBoundsCheck(
				xferOp, iv, rewriter,
				/inBoundsCase=/
				[&](OpBuilder & /b/, Location loc) {
				Strategy<OpTy>::rewriteOp(rewriter, xferOp, casted, iv);
				},
				/outOfBoundsCase=/
				[&](OpBuilder & /b/, Location loc) {
				Strategy<OpTy>::handleOutOfBoundsDim(rewriter, xferOp, casted, iv);
				});
				});

				Strategy<OpTy>::cleanup(rewriter, xferOp);
				return success();
				}
				};

				} // namespace

				namespace mlir {

				void populateGradualVectorToSCFConversionPatterns(RewritePatternSet &patterns) {
				patterns.add<PrepareTransferReadConversion, PrepareTransferWriteConversion,
				TransferOpConversion<TransferReadOp>,
				TransferOpConversion<TransferWriteOp>>(patterns.getContext());
				}

				struct ConvertGradualVectorToSCFPass
				: public ConvertVectorToSCFBase<ConvertGradualVectorToSCFPass> {
				void runOnFunction() override {
				RewritePatternSet patterns(getFunction().getContext());
				populateGradualVectorToSCFConversionPatterns(patterns);
				(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
				}
				};

				} // namespace mlir

				std::unique_ptr<Pass> mlir::createGradualConvertVectorToSCFPass() {
				return std::make_unique<ConvertGradualVectorToSCFPass>();
				}

mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir

	// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm \| \			// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm \| \
	// RUN: mlir-cpu-runner -e entry -entry-point-result=void \			// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
	// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext \| \			// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext \| \
	// RUN: FileCheck %s			// RUN: FileCheck %s

				// RUN: mlir-opt %s -test-gradual-convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm \| \
				// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
				// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext \| \
				// RUN: FileCheck %s

	func @transfer_read_2d(%A : memref<?x?xf32>, %base1: index, %base2: index) {			func @transfer_read_2d(%A : memref<?x?xf32>, %base1: index, %base2: index) {
	%fm42 = constant -42.0: f32			%fm42 = constant -42.0: f32
	%f = vector.transfer_read %A[%base1, %base2], %fm42			%f = vector.transfer_read %A[%base1, %base2], %fm42
	{permutation_map = affine_map<(d0, d1) -> (d0, d1)>} :			{permutation_map = affine_map<(d0, d1) -> (d0, d1)>} :
	memref<?x?xf32>, vector<4x9xf32>			memref<?x?xf32>, vector<4x9xf32>
	vector.print %f: vector<4x9xf32>			vector.print %f: vector<4x9xf32>
	return			return
	}			}
	▲ Show 20 Lines • Show All 46 Lines • Show Last 20 Lines

mlir/test/lib/Transforms/TestVectorTransforms.cpp

//===- TestVectorToVectorConversion.cpp - Test VectorTransfers lowering ---===//		//===- TestVectorToVectorConversion.cpp - Test VectorTransfers lowering ---===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include <type_traits>		#include <type_traits>

#include "mlir/Analysis/SliceAnalysis.h"		#include "mlir/Analysis/SliceAnalysis.h"
		#include "mlir/Conversion/VectorToSCF/GradualVectorToSCF.h"
		nicolasvasilacheUnsubmitted Done Reply Inline Actions In MLIR we usually use the term "progressive" for this: "progressive lowering" (not necessarily in code but def. in other communications like prez/posts etc.). Could you please update everywhere for consistency? nicolasvasilache: In MLIR we usually use the term "progressive" for this: "progressive lowering" (not necessarily…
#include "mlir/Dialect/Affine/IR/AffineOps.h"		#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"		#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"		#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/SCF.h"		#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"		#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/Vector/VectorOps.h"		#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/Dialect/Vector/VectorTransforms.h"		#include "mlir/Dialect/Vector/VectorTransforms.h"
#include "mlir/Pass/Pass.h"		#include "mlir/Pass/Pass.h"
▲ Show 20 Lines • Show All 350 Lines • ▼ Show 20 Lines	struct TestVectorTransferLoweringPatterns
}		}
void runOnFunction() override {		void runOnFunction() override {
RewritePatternSet patterns(&getContext());		RewritePatternSet patterns(&getContext());
populateVectorTransferLoweringPatterns(patterns);		populateVectorTransferLoweringPatterns(patterns);
(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));		(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
}		}
};		};

		struct TestGradualVectorToSCFLoweringPatterns
		: public PassWrapper<TestGradualVectorToSCFLoweringPatterns, FunctionPass> {
		void getDependentDialects(DialectRegistry &registry) const override {
		registry.insert<memref::MemRefDialect, scf::SCFDialect, AffineDialect>();
		}
		void runOnFunction() override {
		RewritePatternSet patterns(&getContext());
		populateGradualVectorToSCFConversionPatterns(patterns);
		(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
		}
		};

} // end anonymous namespace		} // end anonymous namespace

namespace mlir {		namespace mlir {
namespace test {		namespace test {
void registerTestVectorConversions() {		void registerTestVectorConversions() {
PassRegistration<TestVectorToVectorConversion> vectorToVectorPass(		PassRegistration<TestVectorToVectorConversion> vectorToVectorPass(
"test-vector-to-vector-conversion",		"test-vector-to-vector-conversion",
"Test conversion patterns between ops in the vector dialect");		"Test conversion patterns between ops in the vector dialect");
Show All 30 Lines	void registerTestVectorConversions() {

PassRegistration<TestVectorTransferOpt> transferOpOpt(		PassRegistration<TestVectorTransferOpt> transferOpOpt(
"test-vector-transferop-opt",		"test-vector-transferop-opt",
"Test optimization transformations for transfer ops");		"Test optimization transformations for transfer ops");

PassRegistration<TestVectorTransferLoweringPatterns> transferOpLoweringPass(		PassRegistration<TestVectorTransferLoweringPatterns> transferOpLoweringPass(
"test-vector-transfer-lowering-patterns",		"test-vector-transfer-lowering-patterns",
"Test conversion patterns to lower transfer ops to other vector ops");		"Test conversion patterns to lower transfer ops to other vector ops");

		PassRegistration<TestGradualVectorToSCFLoweringPatterns> transferOpToSCF(
		"test-gradual-convert-vector-to-scf",
		"Test conversion patterns to gradually lower transfer ops to SCF");
}		}
} // namespace test		} // namespace test
} // namespace mlir		} // namespace mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir] Gradually lower vector to SCF
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 338006

mlir/include/mlir/Conversion/VectorToSCF/GradualVectorToSCF.h

mlir/lib/Conversion/VectorToSCF/GradualVectorToSCF.cpp

mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir

mlir/test/lib/Transforms/TestVectorTransforms.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[mlir] Gradually lower vector to SCFClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 338006

mlir/include/mlir/Conversion/VectorToSCF/GradualVectorToSCF.h

mlir/lib/Conversion/VectorToSCF/GradualVectorToSCF.cpp

mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir

mlir/test/lib/Transforms/TestVectorTransforms.cpp

[mlir] Gradually lower vector to SCF
ClosedPublic