Diff 386432

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

Show All 10 Lines

#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"		#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"		#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"		#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/Utils.h"		#include "mlir/Dialect/SCF/Utils.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"		#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"		#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/Dialect/Vector/VectorTransforms.h"		#include "mlir/Dialect/Vector/VectorTransforms.h"
		#include "mlir/Dialect/X86Vector/Transforms.h"
#include "mlir/IR/Identifier.h"		#include "mlir/IR/Identifier.h"
#include "mlir/IR/PatternMatch.h"		#include "mlir/IR/PatternMatch.h"
#include "mlir/Transforms/Bufferize.h"		#include "mlir/Transforms/Bufferize.h"
#include "llvm/ADT/SmallBitVector.h"		#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallSet.h"		#include "llvm/ADT/SmallSet.h"

namespace mlir {		namespace mlir {
class BufferizeTypeConverter;		class BufferizeTypeConverter;
▲ Show 20 Lines • Show All 961 Lines • ▼ Show 20 Lines	struct LinalgVectorLoweringOptions {
}		}
/// Enable lowering of vector.transpose.		/// Enable lowering of vector.transpose.
/// In a progressive lowering of vectors, this would be the 7th step.		/// In a progressive lowering of vectors, this would be the 7th step.
bool transposeLowering = false;		bool transposeLowering = false;
LinalgVectorLoweringOptions &enableVectorTransposeLowering(bool val = true) {		LinalgVectorLoweringOptions &enableVectorTransposeLowering(bool val = true) {
transposeLowering = val;		transposeLowering = val;
return *this;		return *this;
}		}
		/// Enable AVX2-specific lowerings.
		bool avx2Lowering = false;
		LinalgVectorLoweringOptions &enableAVX2Lowering(bool val = true) {
		avx2Lowering = val;
		return *this;
		}

/// Configure the post staged-patterns late vector.transfer to scf		/// Configure the post staged-patterns late vector.transfer to scf
/// conversion.		/// conversion.
VectorTransferToSCFOptions vectorTransferToSCFOptions;		VectorTransferToSCFOptions vectorTransferToSCFOptions;
LinalgVectorLoweringOptions &		LinalgVectorLoweringOptions &
setVectorTransferToSCFOptions(VectorTransferToSCFOptions options) {		setVectorTransferToSCFOptions(VectorTransferToSCFOptions options) {
vectorTransferToSCFOptions = options;		vectorTransferToSCFOptions = options;
return *this;		return *this;
}		}
/// Configure late vector transformations.		/// Configure late vector transformations.
vector::VectorTransformsOptions vectorTransformOptions;		vector::VectorTransformsOptions vectorTransformOptions;
LinalgVectorLoweringOptions &		LinalgVectorLoweringOptions &
setVectorTransformsOptions(vector::VectorTransformsOptions options) {		setVectorTransformsOptions(vector::VectorTransformsOptions options) {
vectorTransformOptions = options;		vectorTransformOptions = options;
return *this;		return *this;
}		}
		/// Configure specialized vector lowerings.
		x86vector::avx2::LoweringOptions avx2LoweringOptions;
		LinalgVectorLoweringOptions &
		setAVX2LoweringOptions(x86vector::avx2::LoweringOptions options) {
		avx2LoweringOptions = options;
		return *this;
		}
};		};

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Transformations exposed as rewrite patterns.		// Transformations exposed as rewrite patterns.
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
/// Trait to check if T provides a `getOperationName` method.		/// Trait to check if T provides a `getOperationName` method.
template <typename T, typename... Args>		template <typename T, typename... Args>
using has_get_operation_name = decltype(T::getOperationName());		using has_get_operation_name = decltype(T::getOperationName());
▲ Show 20 Lines • Show All 332 Lines • Show Last 20 Lines

mlir/include/mlir/Dialect/X86Vector/Transforms.h

	//=- Transforms.h - X86Vector Dialect Transformation Entrypoints -- C++ --=//			//=- Transforms.h - X86Vector Dialect Transformation Entrypoints -- C++ --=//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef MLIR_DIALECT_X86VECTOR_TRANSFORMS_H			#ifndef MLIR_DIALECT_X86VECTOR_TRANSFORMS_H
	#define MLIR_DIALECT_X86VECTOR_TRANSFORMS_H			#define MLIR_DIALECT_X86VECTOR_TRANSFORMS_H

				#include "mlir/IR/Value.h"

	namespace mlir {			namespace mlir {

				class ImplicitLocOpBuilder;
	class LLVMConversionTarget;			class LLVMConversionTarget;
	class LLVMTypeConverter;			class LLVMTypeConverter;
	class RewritePatternSet;			class RewritePatternSet;
	using OwningRewritePatternList = RewritePatternSet;			using OwningRewritePatternList = RewritePatternSet;

				namespace x86vector {

				/// Helper class to factor out the creation and extraction of masks from nibs.
				struct MaskHelper {
				/// b01 captures the lower 2 bits, b67 captures the higher 2 bits.
				/// Meant to be used with instructions such as mm256ShufflePs.
				template <unsigned b67, unsigned b45, unsigned b23, unsigned b01>
				aartbikUnsubmitted Done Reply Inline Actions how about making these unsigned (which also makes your subsequent overflow asserts correct, since this would pass for negative numbers ;-) aartbik: how about making these unsigned (which also makes your subsequent overflow asserts correct…
				static char shuffle() {
				static_assert(b01 <= 0x03, "overflow");
				static_assert(b23 <= 0x03, "overflow");
				static_assert(b45 <= 0x03, "overflow");
				static_assert(b67 <= 0x03, "overflow");
				return (b67 << 6) + (b45 << 4) + (b23 << 2) + b01;
				}
				/// b01 captures the lower 2 bits, b67 captures the higher 2 bits.
				static void extractShuffle(char mask, char &b01, char &b23, char &b45,
				char &b67) {
				b67 = (mask & (0x03 << 6)) >> 6;
				b45 = (mask & (0x03 << 4)) >> 4;
				b23 = (mask & (0x03 << 2)) >> 2;
				b01 = mask & 0x03;
				}
				/// b03 captures the lower 4 bits, b47 captures the higher 4 bits.
				/// Meant to be used with instructions such as mm256Permute2f128Ps.
				template <unsigned b47, unsigned b03>
				static char permute() {
				static_assert(b03 <= 0x0f, "overflow");
				static_assert(b47 <= 0x0f, "overflow");
				return (b47 << 4) + b03;
				}
				/// b03 captures the lower 4 bits, b47 captures the higher 4 bits.
				static void extractPermute(char mask, char &b03, char &b47) {
				b47 = (mask & (0x0f << 4)) >> 4;
				b03 = mask & 0x0f;
				}
				};

				//===----------------------------------------------------------------------===//
				/// Helpers extracted from:
				/// - clang/lib/Headers/avxintrin.h
				/// - clang/test/CodeGen/X86/avx-builtins.c
				/// - clang/test/CodeGen/X86/avx2-builtins.c
				/// - clang/test/CodeGen/X86/avx-shuffle-builtins.c
				/// as well as the Intel Intrinsics Guide
				/// (https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html)
				/// make it easier to just implement known good lowerings.
				/// All intrinsics correspond 1-1 to the Intel definition.
				//===----------------------------------------------------------------------===//

				namespace avx2 {

				/// Lower to vector.shuffle v1, v2, [0, 8, 1, 9, 4, 12, 5, 13].
				Value mm256UnpackLoPs(ImplicitLocOpBuilder &b, Value v1, Value v2);

				/// Lower to vector.shuffle v1, v2, [0, 8, 1, 9, 4, 12, 5, 13].
				Value mm256UnpackHiPs(ImplicitLocOpBuilder &b, Value v1, Value v2);

				/// a a b b a a b b
				/// Take an 8 bit mask, 2 bit for each position of a[0, 3) and b[0, 4):
				/// 0:127 \| 128:255
				/// b01 b23 C8 D8 \| b01+4 b23+4 C8+4 D8+4
				Value mm256ShufflePs(ImplicitLocOpBuilder &b, Value v1, Value v2, char mask);

				// imm[0:1] out of imm[0:3] is:
				// 0 1 2 3
				// a[0:127] or a[128:255] or b[0:127] or b[128:255] \|
				// a[0:127] or a[128:255] or b[0:127] or b[128:255]
				// 0 1 2 3
				// imm[0:1] out of imm[4:7].
				Value mm256Permute2f128Ps(ImplicitLocOpBuilder &b, Value v1, Value v2,
				char mask);

				/// 4x8xf32-specific AVX2 transpose lowering.
				void transpose4x8xf32(ImplicitLocOpBuilder &ib, MutableArrayRef<Value> vs);

				/// 8x8xf32-specific AVX2 transpose lowering.
				void transpose8x8xf32(ImplicitLocOpBuilder &ib, MutableArrayRef<Value> vs);

				/// Structure to control the behavior of specialized AVX2 transpose lowering.
				dcaballeUnsubmitted Done Reply Inline Actions nit: avx2 -> AVX2 for consistency dcaballe: nit: avx2 -> AVX2 for consistency
				struct TransposeLoweringOptions {
				bool lower4x8xf32_ = false;
				TransposeLoweringOptions &lower4x8xf32(bool lower = true) {
				lower4x8xf32_ = lower;
				return *this;
				}
				bool lower8x8xf32_ = false;
				TransposeLoweringOptions &lower8x8xf32(bool lower = true) {
				lower8x8xf32_ = lower;
				return *this;
				}
				};

				/// Options for controlling specialized AVX2 lowerings.
				struct LoweringOptions {
				/// Configure specialized vector lowerings.
				TransposeLoweringOptions transposeOptions;
				LoweringOptions &setTransposeOptions(TransposeLoweringOptions options) {
				transposeOptions = options;
				return *this;
				}
				};

				/// Insert specialized transpose lowering patterns.
				void populateSpecializedTransposeLoweringPatterns(
				RewritePatternSet &patterns, LoweringOptions options = LoweringOptions(),
				int benefit = 10);

				} // namespace avx2
				} // namespace x86vector

	/// Collect a set of patterns to lower X86Vector ops to ops that map to LLVM			/// Collect a set of patterns to lower X86Vector ops to ops that map to LLVM
	/// intrinsics.			/// intrinsics.
	void populateX86VectorLegalizeForLLVMExportPatterns(			void populateX86VectorLegalizeForLLVMExportPatterns(
	LLVMTypeConverter &converter, RewritePatternSet &patterns);			LLVMTypeConverter &converter, RewritePatternSet &patterns);

	/// Configure the target to support lowering X86Vector ops to ops that map to			/// Configure the target to support lowering X86Vector ops to ops that map to
	/// LLVM intrinsics.			/// LLVM intrinsics.
	void configureX86VectorLegalizeForExportTarget(LLVMConversionTarget &target);			void configureX86VectorLegalizeForExportTarget(LLVMConversionTarget &target);

	} // namespace mlir			} // namespace mlir

	#endif // MLIR_DIALECT_X86VECTOR_TRANSFORMS_H			#endif // MLIR_DIALECT_X86VECTOR_TRANSFORMS_H

mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt

Show First 20 Lines • Show All 45 Lines • ▼ Show 20 Lines	add_mlir_dialect_library(MLIRLinalgTransforms
MLIRPass		MLIRPass
MLIRStandard		MLIRStandard
MLIRStandardOpsTransforms		MLIRStandardOpsTransforms
MLIRStandardToLLVM		MLIRStandardToLLVM
MLIRTensor		MLIRTensor
MLIRTransforms		MLIRTransforms
MLIRTransformUtils		MLIRTransformUtils
MLIRVector		MLIRVector
		MLIRX86VectorTransforms
MLIRVectorToSCF		MLIRVectorToSCF
)		)

mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp

Show First 20 Lines • Show All 290 Lines • ▼ Show 20 Lines	struct LinalgStrategyLowerVectorsPass

void runOnFunction() override {		void runOnFunction() override {
auto funcOp = getFunction();		auto funcOp = getFunction();
if (!anchorFuncName.empty() && funcOp.getName() != anchorFuncName)		if (!anchorFuncName.empty() && funcOp.getName() != anchorFuncName)
return;		return;

MLIRContext *context = funcOp.getContext();		MLIRContext *context = funcOp.getContext();
RewritePatternSet patterns(context);		RewritePatternSet patterns(context);
vector::populateVectorToVectorCanonicalizationPatterns(patterns);		vector::populateVectorToVectorCanonicalizationPatterns(patterns);
		dcaballeUnsubmitted Done Reply Inline Actions what happened here? Should we remove it? dcaballe: what happened here? Should we remove it?
		nicolasvasilacheAuthorUnsubmitted Done Reply Inline Actions oops, thanks! nicolasvasilache: oops, thanks!
// In a progressive lowering of vectors, this would be the 1st step.		// In a progressive lowering of vectors, this would be the 1st step.
if (options.contractionLowering) {		if (options.contractionLowering) {
patterns.add<ContractionOpToOuterProductOpLowering,		patterns.add<ContractionOpToOuterProductOpLowering,
ContractionOpToMatmulOpLowering, ContractionOpLowering>(		ContractionOpToMatmulOpLowering, ContractionOpLowering>(
options.vectorTransformOptions, context);		options.vectorTransformOptions, context);
vector::populateVectorTransferPermutationMapLoweringPatterns(patterns);		vector::populateVectorTransferPermutationMapLoweringPatterns(patterns);
}		}
// In a progressive lowering of vectors, this would be the 2nd step.		// In a progressive lowering of vectors, this would be the 2nd step.
Show All 21 Lines	void runOnFunction() override {
// In a progressive lowering of vectors, this would be the 6th step.		// In a progressive lowering of vectors, this would be the 6th step.
if (options.shapeCastLowering) {		if (options.shapeCastLowering) {
vector::populateVectorShapeCastLoweringPatterns(patterns);		vector::populateVectorShapeCastLoweringPatterns(patterns);
}		}
// In a progressive lowering of vectors, this would be the 7th step.		// In a progressive lowering of vectors, this would be the 7th step.
if (options.transposeLowering) {		if (options.transposeLowering) {
vector::populateVectorTransposeLoweringPatterns(		vector::populateVectorTransposeLoweringPatterns(
patterns, options.vectorTransformOptions);		patterns, options.vectorTransformOptions);
		if (options.avx2Lowering)
		x86vector::avx2::populateSpecializedTransposeLoweringPatterns(
		patterns, options.avx2LoweringOptions, /benefit=/10);
}		}
(void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));		(void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
}		}

LinalgVectorLoweringOptions options;		LinalgVectorLoweringOptions options;
LinalgTransformationFilter filter;		LinalgTransformationFilter filter;
};		};

▲ Show 20 Lines • Show All 80 Lines • Show Last 20 Lines

mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp

This file was added.

				//===- AVXTranspose.cpp - Lower Vector transpose to AVX -------------------===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				//
				// This file implements vector.transpose rewrites as AVX patterns for particular
				// sizes of interest.
				aartbikUnsubmitted Done Reply Inline Actions The filename seems to imply it is specific to AVX target and the transpose op. Perhaps make the L9 and L1 description more consistent with that? aartbik: The filename seems to imply it is specific to AVX target and the transpose op. Perhaps make the…
				//
				//===----------------------------------------------------------------------===//

				#include "mlir/Dialect/Vector/VectorOps.h"
				#include "mlir/Dialect/X86Vector/Transforms.h"
				#include "mlir/IR/ImplicitLocOpBuilder.h"
				#include "mlir/IR/Matchers.h"
				#include "mlir/IR/PatternMatch.h"

				using namespace mlir;
				using namespace mlir::vector;
				using namespace mlir::x86vector;
				using namespace mlir::x86vector::avx2;

				Value mlir::x86vector::avx2::mm256UnpackLoPs(ImplicitLocOpBuilder &b, Value v1,
				Value v2) {
				return b.create<vector::ShuffleOp>(
				v1, v2, ArrayRef<int64_t>{0, 8, 1, 9, 4, 12, 5, 13});
				}

				Value mlir::x86vector::avx2::mm256UnpackHiPs(ImplicitLocOpBuilder &b, Value v1,
				Value v2) {
				return b.create<vector::ShuffleOp>(
				v1, v2, ArrayRef<int64_t>{2, 10, 3, 11, 6, 14, 7, 15});
				}
				/// a a b b a a b b
				/// Takes an 8 bit mask, 2 bit for each position of a[0, 3) and b[0, 4):
				/// 0:127 \| 128:255
				/// b01 b23 C8 D8 \| b01+4 b23+4 C8+4 D8+4
				Value mlir::x86vector::avx2::mm256ShufflePs(ImplicitLocOpBuilder &b, Value v1,
				Value v2, char mask) {
				char b01, b23, b45, b67;
				MaskHelper::extractShuffle(mask, b01, b23, b45, b67);
				SmallVector<int64_t> shuffleMask{b01, b23, b45 + 8, b67 + 8,
				b01 + 4, b23 + 4, b45 + 8 + 4, b67 + 8 + 4};
				return b.create<vector::ShuffleOp>(v1, v2, shuffleMask);
				}

				// imm[0:1] out of imm[0:3] is:
				// 0 1 2 3
				// a[0:127] or a[128:255] or b[0:127] or b[128:255] \|
				// a[0:127] or a[128:255] or b[0:127] or b[128:255]
				// 0 1 2 3
				// imm[0:1] out of imm[4:7].
				Value mlir::x86vector::avx2::mm256Permute2f128Ps(ImplicitLocOpBuilder &b,
				Value v1, Value v2,
				char mask) {
				SmallVector<int64_t> shuffleMask;
				auto appendToMask = [&](char control) {
				if (control == 0)
				llvm::append_range(shuffleMask, ArrayRef<int64_t>{0, 1, 2, 3});
				else if (control == 1)
				llvm::append_range(shuffleMask, ArrayRef<int64_t>{4, 5, 6, 7});
				else if (control == 2)
				llvm::append_range(shuffleMask, ArrayRef<int64_t>{8, 9, 10, 11});
				else if (control == 3)
				llvm::append_range(shuffleMask, ArrayRef<int64_t>{12, 13, 14, 15});
				else
				llvm_unreachable("control > 3 : overflow");
				};
				char b03, b47;
				MaskHelper::extractPermute(mask, b03, b47);
				appendToMask(b03);
				appendToMask(b47);
				return b.create<vector::ShuffleOp>(v1, v2, shuffleMask);
				}

				/// AVX2 4x8xf32-specific transpose lowering using a "C intrinsics" model.
				void mlir::x86vector::avx2::transpose4x8xf32(ImplicitLocOpBuilder &ib,
				MutableArrayRef<Value> vs) {
				auto vt = VectorType::get({8}, Float32Type::get(ib.getContext()));
				#ifndef NDEBUG
				aartbikUnsubmitted Done Reply Inline Actions I am guessing this is there to avoid "unused variable" errors in no NDEBUG mode that removes the asserts? aartbik: I am guessing this is there to avoid "unused variable" errors in no NDEBUG mode that removes…
				nicolasvasilacheAuthorUnsubmitted Done Reply Inline Actions yup. nicolasvasilache: yup.
				mehdi_aminiUnsubmitted Not Done Reply Inline Actions The `#ifndef NDEBUG` alternative would also eliminate the two uniquing calls / lookup in prod here (the optimizer won't remove these side-effecting `get()`) mehdi_amini: The `#ifndef NDEBUG` alternative would also eliminate the two uniquing calls / lookup in prod…
				assert(vs.size() == 4 && "expects 4 vectors");
				assert(llvm::all_of(ValueRange{vs}.getTypes(),
				[&](Type t) { return t == vt; }) &&
				"expects all types to be vector<8xf32>");
				#endif

				Value T0 = mm256UnpackLoPs(ib, vs[0], vs[1]);
				Value T1 = mm256UnpackHiPs(ib, vs[0], vs[1]);
				Value T2 = mm256UnpackLoPs(ib, vs[2], vs[3]);
				Value T3 = mm256UnpackHiPs(ib, vs[2], vs[3]);
				dcaballeUnsubmitted Done Reply Inline Actions Just a philosophical comment on whether we want to promote this way of describing masks. Even though these are AVX2 specific intrinsics, this mask format is so misleading if you compare it with how the shuffle mask is represented in MLIR and LLVM... I guess, it will facilitate the portability of code based on AVX2 intrinsics. That's an important point. dcaballe: Just a philosophical comment on whether we want to promote this way of describing masks. Even…
				nicolasvasilacheAuthorUnsubmitted Done Reply Inline Actions In the end, masks lower to a single i8 and anyone can use `0xCE`. However I will strongly push back against writing the compiler this way because it is hyper-obfuscating (in fact, even C intrinsics have an MM_SHUFFLE macro to try and improve things). Depending on the instruction / instrinsic the 8bits are interpreted in various confusing ways.. My position is that the alternative is so terrible, that we should absolutely promote this way of describing masks. Translating existing code may be trickier but it will also force anyone doing this translation understand what it is doing. nicolasvasilache: In the end, masks lower to a single i8 and anyone can use `0xCE`. However I will strongly push…
				Value S0 = mm256ShufflePs(ib, T0, T2, MaskHelper::shuffle<1, 0, 1, 0>());
				Value S1 = mm256ShufflePs(ib, T0, T2, MaskHelper::shuffle<3, 2, 3, 2>());
				Value S2 = mm256ShufflePs(ib, T1, T3, MaskHelper::shuffle<1, 0, 1, 0>());
				Value S3 = mm256ShufflePs(ib, T1, T3, MaskHelper::shuffle<3, 2, 3, 2>());
				vs[0] = mm256Permute2f128Ps(ib, S0, S1, MaskHelper::permute<2, 0>());
				vs[1] = mm256Permute2f128Ps(ib, S2, S3, MaskHelper::permute<2, 0>());
				vs[2] = mm256Permute2f128Ps(ib, S0, S1, MaskHelper::permute<3, 1>());
				vs[3] = mm256Permute2f128Ps(ib, S2, S3, MaskHelper::permute<3, 1>());
				}

				/// AVX2 8x8xf32-specific transpose lowering using a "C intrinsics" model.
				void mlir::x86vector::avx2::transpose8x8xf32(ImplicitLocOpBuilder &ib,
				MutableArrayRef<Value> vs) {
				auto vt = VectorType::get({8}, Float32Type::get(ib.getContext()));
				(void)vt;
				assert(vs.size() == 8 && "expects 8 vectors");
				assert(llvm::all_of(ValueRange{vs}.getTypes(),
				[&](Type t) { return t == vt; }) &&
				"expects all types to be vector<8xf32>");

				Value T0 = mm256UnpackLoPs(ib, vs[0], vs[1]);
				Value T1 = mm256UnpackHiPs(ib, vs[0], vs[1]);
				Value T2 = mm256UnpackLoPs(ib, vs[2], vs[3]);
				Value T3 = mm256UnpackHiPs(ib, vs[2], vs[3]);
				Value T4 = mm256UnpackLoPs(ib, vs[4], vs[5]);
				Value T5 = mm256UnpackHiPs(ib, vs[4], vs[5]);
				Value T6 = mm256UnpackLoPs(ib, vs[6], vs[7]);
				Value T7 = mm256UnpackHiPs(ib, vs[6], vs[7]);
				Value S0 = mm256ShufflePs(ib, T0, T2, MaskHelper::shuffle<1, 0, 1, 0>());
				Value S1 = mm256ShufflePs(ib, T0, T2, MaskHelper::shuffle<3, 2, 3, 2>());
				Value S2 = mm256ShufflePs(ib, T1, T3, MaskHelper::shuffle<1, 0, 1, 0>());
				Value S3 = mm256ShufflePs(ib, T1, T3, MaskHelper::shuffle<3, 2, 3, 2>());
				Value S4 = mm256ShufflePs(ib, T4, T6, MaskHelper::shuffle<1, 0, 1, 0>());
				Value S5 = mm256ShufflePs(ib, T4, T6, MaskHelper::shuffle<3, 2, 3, 2>());
				Value S6 = mm256ShufflePs(ib, T5, T7, MaskHelper::shuffle<1, 0, 1, 0>());
				Value S7 = mm256ShufflePs(ib, T5, T7, MaskHelper::shuffle<3, 2, 3, 2>());
				vs[0] = mm256Permute2f128Ps(ib, S0, S4, MaskHelper::permute<2, 0>());
				vs[1] = mm256Permute2f128Ps(ib, S1, S5, MaskHelper::permute<2, 0>());
				vs[2] = mm256Permute2f128Ps(ib, S2, S6, MaskHelper::permute<2, 0>());
				vs[3] = mm256Permute2f128Ps(ib, S3, S7, MaskHelper::permute<2, 0>());
				vs[4] = mm256Permute2f128Ps(ib, S0, S4, MaskHelper::permute<3, 1>());
				vs[5] = mm256Permute2f128Ps(ib, S1, S5, MaskHelper::permute<3, 1>());
				vs[6] = mm256Permute2f128Ps(ib, S2, S6, MaskHelper::permute<3, 1>());
				vs[7] = mm256Permute2f128Ps(ib, S3, S7, MaskHelper::permute<3, 1>());
				}

				/// Rewrite avx2-specific 2-D vector.transpose, for the supported cases and
				/// depending on the `TransposeLoweringOptions`.
				class TransposeOpLowering : public OpRewritePattern<vector::TransposeOp> {
				public:
				using OpRewritePattern<vector::TransposeOp>::OpRewritePattern;

				TransposeOpLowering(LoweringOptions loweringOptions, MLIRContext *context,
				int benefit)
				: OpRewritePattern<vector::TransposeOp>(context, benefit),
				loweringOptions(loweringOptions) {}

				LogicalResult matchAndRewrite(vector::TransposeOp op,
				PatternRewriter &rewriter) const override {
				auto loc = op.getLoc();

				VectorType srcType = op.getVectorType();
				if (srcType.getRank() != 2)
				return rewriter.notifyMatchFailure(op, "Not a 2-D transpose");

				SmallVector<int64_t, 4> transp;
				for (auto attr : op.transp())
				transp.push_back(attr.cast<IntegerAttr>().getInt());
				if (transp[0] != 1 && transp[1] != 0)
				return rewriter.notifyMatchFailure(op, "Not a 2-D transpose permutation");

				int64_t m = srcType.getShape().front(), n = srcType.getShape().back();

				auto applyRewrite = [&]() {
				ImplicitLocOpBuilder ib(loc, rewriter);
				SmallVector<Value> vs;
				aartbikUnsubmitted Done Reply Inline Actions int64_t for i, since m is that type? aartbik: int64_t for i, since m is that type?
				for (int64_t i = 0; i < m; ++i)
				vs.push_back(ib.create<vector::ExtractOp>(op.vector(), i));
				if (m == 4)
				transpose4x8xf32(ib, vs);
				if (m == 8)
				transpose8x8xf32(ib, vs);
				auto flattenedType =
				VectorType::get({n * m}, op.getVectorType().getElementType());
				auto transposedType =
				VectorType::get({n, m}, op.getVectorType().getElementType());
				Value res = ib.create<arith::ConstantOp>(
				op.getVectorType(), ib.getZeroAttr(op.getVectorType()));
				// The transposed form is still 4x8 and needs to be reinterpreted as 8x4
				// via shape_casts.
				aartbikUnsubmitted Done Reply Inline Actions int64_t for i, since m is that type? aartbik: int64_t for i, since m is that type?
				for (int64_t i = 0; i < m; ++i)
				res = ib.create<vector::InsertOp>(vs[i], res, i);
				if (m == 4) {
				res = ib.create<vector::ShapeCastOp>(flattenedType, res);
				res = ib.create<vector::ShapeCastOp>(transposedType, res);
				}

				rewriter.replaceOp(op, res);
				return success();
				};

				if (loweringOptions.transposeOptions.lower4x8xf32_ && m == 4 && n == 8)
				return applyRewrite();
				if (loweringOptions.transposeOptions.lower8x8xf32_ && m == 8 && n == 8)
				return applyRewrite();
				return failure();
				}

				private:
				LoweringOptions loweringOptions;
				};

				void mlir::x86vector::avx2::populateSpecializedTransposeLoweringPatterns(
				RewritePatternSet &patterns, LoweringOptions options, int benefit) {
				patterns.add<TransposeOpLowering>(options, patterns.getContext(), benefit);
				}

mlir/lib/Dialect/X86Vector/Transforms/CMakeLists.txt

	add_mlir_dialect_library(MLIRX86VectorTransforms			add_mlir_dialect_library(MLIRX86VectorTransforms
				AVXTranspose.cpp
	LegalizeForLLVMExport.cpp			LegalizeForLLVMExport.cpp

	DEPENDS			DEPENDS
	MLIRX86VectorConversionsIncGen			MLIRX86VectorConversionsIncGen

	LINK_LIBS PUBLIC			LINK_LIBS PUBLIC
	MLIRArithmetic			MLIRArithmetic
	MLIRX86Vector			MLIRX86Vector
	MLIRIR			MLIRIR
	MLIRLLVMCommonConversion			MLIRLLVMCommonConversion
	MLIRLLVMIR			MLIRLLVMIR
				MLIRVector
	)			)

mlir/test/Dialect/Vector/vector-contract-matvec-transforms.mlir

	// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-outerproduct=1 \| FileCheck %s			// RUN: mlir-opt %s -test-vector-contraction-lowering=vector-outerproduct=1 \| FileCheck %s

	#matvec_accesses = [			#matvec_accesses = [
	affine_map<(i, j) -> (i, j)>,			affine_map<(i, j) -> (i, j)>,
	affine_map<(i, j) -> (j)>,			affine_map<(i, j) -> (j)>,
	affine_map<(i, j) -> (i)>			affine_map<(i, j) -> (i)>
	]			]
	#matvec_trait = {			#matvec_trait = {
	indexing_maps = #matvec_accesses,			indexing_maps = #matvec_accesses,
	▲ Show 20 Lines • Show All 200 Lines • Show Last 20 Lines

mlir/test/Dialect/Vector/vector-contract-transforms.mlir

// RUN: mlir-opt %s -test-vector-contraction-conversion \| FileCheck %s		// RUN: mlir-opt %s -test-vector-contraction-lowering \| FileCheck %s
// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-lower-matrix-intrinsics=1 \| FileCheck %s --check-prefix=MATRIX		// RUN: mlir-opt %s -test-vector-contraction-lowering=vector-lower-matrix-intrinsics=1 \| FileCheck %s --check-prefix=MATRIX
// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-outerproduct=1 \| FileCheck %s --check-prefix=OUTERPRODUCT		// RUN: mlir-opt %s -test-vector-contraction-lowering=vector-outerproduct=1 \| FileCheck %s --check-prefix=OUTERPRODUCT
// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-filter-outerproduct=1 \| FileCheck %s --check-prefix=FILTEROUTERPRODUCT		// RUN: mlir-opt %s -test-vector-contraction-lowering=vector-filter-outerproduct=1 \| FileCheck %s --check-prefix=FILTEROUTERPRODUCT

#dotp_accesses = [		#dotp_accesses = [
affine_map<(i) -> (i)>,		affine_map<(i) -> (i)>,
affine_map<(i) -> (i)>,		affine_map<(i) -> (i)>,
affine_map<(i) -> ()>		affine_map<(i) -> ()>
]		]
#dotp_trait = {		#dotp_trait = {
indexing_maps = #dotp_accesses,		indexing_maps = #dotp_accesses,
▲ Show 20 Lines • Show All 131 Lines • ▼ Show 20 Lines	#matmat_trait = {
iterator_types = ["parallel", "parallel", "reduction"]		iterator_types = ["parallel", "parallel", "reduction"]
}		}

// CHECK-LABEL: func @extract_contract4		// CHECK-LABEL: func @extract_contract4
// CHECK-SAME: %[[A:.*0]]: vector<2x2xf32>,		// CHECK-SAME: %[[A:.*0]]: vector<2x2xf32>,
// CHECK-SAME: %[[B:.*1]]: vector<2x2xf32>,		// CHECK-SAME: %[[B:.*1]]: vector<2x2xf32>,
// CHECK-SAME: %[[C:.*2]]: vector<2x2xf32>		// CHECK-SAME: %[[C:.*2]]: vector<2x2xf32>
// CHECK: %[[R:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf32>		// CHECK: %[[R:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf32>
// ... bunch of extract insert to transpose B into Bt		// CHECK: %[[Bt:.*]] = vector.transpose %arg1, [1, 0] : vector<2x2xf32> to vector<2x2xf32>
// CHECK: %[[Bt:.]] = vector.insert %{{.}}, %{{.*}} [1, 1] : f32 into vector<2x2xf32>
// CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<2x2xf32>		// CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<2x2xf32>
// CHECK: %[[T2:.*]] = vector.extract %[[Bt]][0] : vector<2x2xf32>		// CHECK: %[[T2:.*]] = vector.extract %[[Bt]][0] : vector<2x2xf32>
// CHECK: %[[T9:.*]] = arith.mulf %[[T0]], %[[T2]] : vector<2xf32>		// CHECK: %[[T9:.*]] = arith.mulf %[[T0]], %[[T2]] : vector<2xf32>
// CHECK: %[[T10:.*]] = vector.reduction "add", %[[T9]] : vector<2xf32> into f32		// CHECK: %[[T10:.*]] = vector.reduction "add", %[[T9]] : vector<2xf32> into f32
// CHECK: %[[T11:.*]] = vector.insert %[[T10]], %[[R]] [0, 0] : f32 into vector<2x2xf32>		// CHECK: %[[T11:.*]] = vector.insert %[[T10]], %[[R]] [0, 0] : f32 into vector<2x2xf32>
//		//
// CHECK: %[[T12:.*]] = vector.extract %[[Bt]][1] : vector<2x2xf32>		// CHECK: %[[T12:.*]] = vector.extract %[[Bt]][1] : vector<2x2xf32>
// CHECK: %[[T19:.*]] = arith.mulf %[[T0]], %[[T12]] : vector<2xf32>		// CHECK: %[[T19:.*]] = arith.mulf %[[T0]], %[[T12]] : vector<2xf32>
▲ Show 20 Lines • Show All 232 Lines • ▼ Show 20 Lines
// CHECK: %[[T1:.*]] = arith.muli %[[A]], %[[T0]] : vector<16xi32>		// CHECK: %[[T1:.*]] = arith.muli %[[A]], %[[T0]] : vector<16xi32>
// CHECK: %[[T2:.*]] = arith.addi %[[T1]], %[[C]] : vector<16xi32>		// CHECK: %[[T2:.*]] = arith.addi %[[T1]], %[[C]] : vector<16xi32>
// CHECK: return %[[T2]] : vector<16xi32>		// CHECK: return %[[T2]] : vector<16xi32>
func @axpy_int_add(%arg0: vector<16xi32>, %arg1: i32, %arg2: vector<16xi32>) -> vector<16xi32> {		func @axpy_int_add(%arg0: vector<16xi32>, %arg1: i32, %arg2: vector<16xi32>) -> vector<16xi32> {
%0 = vector.outerproduct %arg0, %arg1, %arg2: vector<16xi32>, i32		%0 = vector.outerproduct %arg0, %arg1, %arg2: vector<16xi32>, i32
return %0: vector<16xi32>		return %0: vector<16xi32>
}		}

// CHECK-LABEL: func @transpose23
// CHECK-SAME: %[[A:.*]]: vector<2x3xf32>
// CHECK: %[[Z:.*]] = arith.constant dense<0.000000e+00> : vector<3x2xf32>
// CHECK: %[[T0:.*]] = vector.extract %[[A]][0, 0] : vector<2x3xf32>
// CHECK: %[[T1:.*]] = vector.insert %[[T0]], %[[Z]] [0, 0] : f32 into vector<3x2xf32>
// CHECK: %[[T2:.*]] = vector.extract %[[A]][1, 0] : vector<2x3xf32>
// CHECK: %[[T3:.*]] = vector.insert %[[T2]], %[[T1]] [0, 1] : f32 into vector<3x2xf32>
// CHECK: %[[T4:.*]] = vector.extract %[[A]][0, 1] : vector<2x3xf32>
// CHECK: %[[T5:.*]] = vector.insert %[[T4]], %[[T3]] [1, 0] : f32 into vector<3x2xf32>
// CHECK: %[[T6:.*]] = vector.extract %[[A]][1, 1] : vector<2x3xf32>
// CHECK: %[[T7:.*]] = vector.insert %[[T6]], %[[T5]] [1, 1] : f32 into vector<3x2xf32>
// CHECK: %[[T8:.*]] = vector.extract %[[A]][0, 2] : vector<2x3xf32>
// CHECK: %[[T9:.*]] = vector.insert %[[T8]], %[[T7]] [2, 0] : f32 into vector<3x2xf32>
// CHECK: %[[T10:.*]] = vector.extract %[[A]][1, 2] : vector<2x3xf32>
// CHECK: %[[T11:.*]] = vector.insert %[[T10]], %[[T9]] [2, 1] : f32 into vector<3x2xf32>
// CHECK: return %[[T11]] : vector<3x2xf32>

func @transpose23(%arg0: vector<2x3xf32>) -> vector<3x2xf32> {
%0 = vector.transpose %arg0, [1, 0] : vector<2x3xf32> to vector<3x2xf32>
return %0 : vector<3x2xf32>
}

// CHECK-LABEL: func @nop_shape_cast		// CHECK-LABEL: func @nop_shape_cast
// CHECK-SAME: %[[A:.*]]: vector<16xf32>		// CHECK-SAME: %[[A:.*]]: vector<16xf32>
// CHECK: return %[[A]] : vector<16xf32>		// CHECK: return %[[A]] : vector<16xf32>

func @nop_shape_cast(%arg0: vector<16xf32>) -> vector<16xf32> {		func @nop_shape_cast(%arg0: vector<16xf32>) -> vector<16xf32> {
%0 = vector.shape_cast %arg0 : vector<16xf32> to vector<16xf32>		%0 = vector.shape_cast %arg0 : vector<16xf32> to vector<16xf32>
return %0 : vector<16xf32>		return %0 : vector<16xf32>
}		}
▲ Show 20 Lines • Show All 698 Lines • Show Last 20 Lines

mlir/test/Dialect/Vector/vector-flat-transforms.mlir

This file was deleted.

	// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-flat-transpose=1 \| FileCheck %s

	// Tests for lowering 2-D vector.transpose into vector.flat_transpose.
	//
	// TODO: having ShapeCastOp2DDownCastRewritePattern and
	// ShapeCastOp2DUpCastRewritePattern too early in the greedy rewriting
	// patterns misses opportunities to fold shape casts!

	// No shape cast folding expected.
	//
	// CHECK-LABEL: func @transpose44_44(
	// CHECK-SAME: %[[A:.*]]: vector<4x4xf32>
	// CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<4x4xf32>
	// CHECK: %[[T8:.]] = vector.flat_transpose %{{.}} {columns = 4 : i32, rows = 4 : i32} : vector<16xf32> -> vector<16xf32>
	// CHECK: %[[T9:.*]] = vector.extract_strided_slice %[[T8]] {offsets = [0], sizes = [4], strides = [1]} : vector<16xf32> to vector<4xf32>
	//
	func @transpose44_44(%arg0: vector<4x4xf32>) -> vector<4x4xf32> {
	%0 = vector.transpose %arg0, [1, 0] : vector<4x4xf32> to vector<4x4xf32>
	return %0 : vector<4x4xf32>
	}

	// Folds preceding shape cast as expected,
	// no following shape cast folding expected.
	//
	// FIXME: PR49590 - shape_cast not stable.
	//
	// CHECK-LABEL: func @transpose16_44(
	// CHECK-SAME: %[[A:.*]]: vector<16xf32>
	// HECK: %[[T0:.*]] = vector.flat_transpose %[[A]] {columns = 4 : i32, rows = 4 : i32} : vector<16xf32> -> vector<16xf32>
	// HECK: %[[T1:.*]] = vector.extract_strided_slice %[[T0]] {offsets = [0], sizes = [4], strides = [1]} : vector<16xf32> to vector<4xf32>
	//
	func @transpose16_44(%arg0: vector<16xf32>) -> vector<4x4xf32> {
	%0 = vector.shape_cast %arg0 : vector<16xf32> to vector<4x4xf32>
	%1 = vector.transpose %0, [1, 0] : vector<4x4xf32> to vector<4x4xf32>
	return %1 : vector<4x4xf32>
	}

	// No preceding shape cast folding expected,
	// but FAILS to fold following cast.
	//
	// CHECK-LABEL: func @transpose44_16(
	// CHECK-SAME: %[[A:.*]]: vector<4x4xf32>
	// CHECK: %[[T0:.*]] = vector.extract %[[A]][0] : vector<4x4xf32>
	// CHECK: %[[T8:.]] = vector.flat_transpose %{{.}} {columns = 4 : i32, rows = 4 : i32} : vector<16xf32> -> vector<16xf32>
	func @transpose44_16(%arg0: vector<4x4xf32>) -> vector<16xf32> {
	%0 = vector.transpose %arg0, [1, 0] : vector<4x4xf32> to vector<4x4xf32>
	%1 = vector.shape_cast %0 : vector<4x4xf32> to vector<16xf32>
	return %1 : vector<16xf32>
	}

	// Folds preceding shape cast as expected,
	// but FAILS to fold following cast.
	//
	// FIXME: PR49590 - shape_cast not stable.
	//
	// CHECK-LABEL: func @transpose16_16(
	// CHECK-SAME: %[[A:.*]]: vector<16xf32>
	// HECK: %[[T0:.*]] = vector.flat_transpose %[[A]] {columns = 4 : i32, rows = 4 : i32} : vector<16xf32> -> vector<16xf32>
	//
	func @transpose16_16(%arg0: vector<16xf32>) -> vector<16xf32> {
	%0 = vector.shape_cast %arg0 : vector<16xf32> to vector<4x4xf32>
	%1 = vector.transpose %0, [1, 0] : vector<4x4xf32> to vector<4x4xf32>
	%2 = vector.shape_cast %1 : vector<4x4xf32> to vector<16xf32>
	return %2 : vector<16xf32>
	}

mlir/test/Dialect/Vector/vector-mem-transforms.mlir

	// RUN: mlir-opt %s -test-vector-to-vector-conversion \| FileCheck %s			// RUN: mlir-opt %s -test-vector-to-vector-lowering \| FileCheck %s

	// CHECK-LABEL: func @maskedload0(			// CHECK-LABEL: func @maskedload0(
	// CHECK-SAME: %[[A0:.*]]: memref<?xf32>,			// CHECK-SAME: %[[A0:.*]]: memref<?xf32>,
	// CHECK-SAME: %[[A1:.*]]: vector<16xf32>) -> vector<16xf32> {			// CHECK-SAME: %[[A1:.*]]: vector<16xf32>) -> vector<16xf32> {
	// CHECK-DAG: %[[C:.*]] = arith.constant 0 : index			// CHECK-DAG: %[[C:.*]] = arith.constant 0 : index
	// CHECK-NEXT: %[[T:.*]] = vector.load %[[A0]][%[[C]]] : memref<?xf32>, vector<16xf32>			// CHECK-NEXT: %[[T:.*]] = vector.load %[[A0]][%[[C]]] : memref<?xf32>, vector<16xf32>
	// CHECK-NEXT: return %[[T]] : vector<16xf32>			// CHECK-NEXT: return %[[T]] : vector<16xf32>
	func @maskedload0(%base: memref<?xf32>, %pass_thru: vector<16xf32>) -> vector<16xf32> {			func @maskedload0(%base: memref<?xf32>, %pass_thru: vector<16xf32>) -> vector<16xf32> {
	▲ Show 20 Lines • Show All 179 Lines • Show Last 20 Lines

mlir/test/Dialect/Vector/vector-transforms.mlir

	// RUN: mlir-opt %s -test-vector-to-vector-conversion="unroll" \| FileCheck %s			// RUN: mlir-opt %s -test-vector-to-vector-lowering="unroll" \| FileCheck %s

	// CHECK-DAG: #[[MAP1:map[0-9]+]] = affine_map<(d0, d1, d2) -> (d1, d2)>			// CHECK-DAG: #[[MAP1:map[0-9]+]] = affine_map<(d0, d1, d2) -> (d1, d2)>

	// CHECK-LABEL: func @add4x2			// CHECK-LABEL: func @add4x2
	// CHECK: %[[S1:.]] = vector.extract_strided_slice %{{.}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>			// CHECK: %[[S1:.]] = vector.extract_strided_slice %{{.}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>
	// CHECK-NEXT: %[[S2:.]] = vector.extract_strided_slice %{{.}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>			// CHECK-NEXT: %[[S2:.]] = vector.extract_strided_slice %{{.}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>
	// CHECK-NEXT: %[[A1:.*]] = arith.addf %[[S1]], %[[S2]] : vector<2x2xf32>			// CHECK-NEXT: %[[A1:.*]] = arith.addf %[[S1]], %[[S2]] : vector<2x2xf32>
	// CHECK-NEXT: %[[VEC0:.]] = vector.insert_strided_slice %[[A1]], %{{.}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x2xf32>			// CHECK-NEXT: %[[VEC0:.]] = vector.insert_strided_slice %[[A1]], %{{.}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x2xf32>
	▲ Show 20 Lines • Show All 618 Lines • Show Last 20 Lines

mlir/test/Dialect/Vector/vector-transpose-lowering.mlir

This file was added.

				// RUN: mlir-opt %s -test-vector-transpose-lowering=eltwise=1 \| FileCheck %s --check-prefix=ELTWISE
				// RUN: mlir-opt %s -test-vector-transpose-lowering=shuffle=1 \| FileCheck %s --check-prefix=SHUFFLE
				// RUN: mlir-opt %s -test-vector-transpose-lowering=flat=1 \| FileCheck %s --check-prefix=FLAT
				// RUN: mlir-opt %s -test-vector-transpose-lowering=avx2=1 \| FileCheck %s --check-prefix=AVX2

				// ELTWISE-LABEL: func @transpose23
				// ELTWISE-SAME: %[[A:.*]]: vector<2x3xf32>
				// ELTWISE: %[[Z:.*]] = arith.constant dense<0.000000e+00> : vector<3x2xf32>
				// ELTWISE: %[[T0:.*]] = vector.extract %[[A]][0, 0] : vector<2x3xf32>
				// ELTWISE: %[[T1:.*]] = vector.insert %[[T0]], %[[Z]] [0, 0] : f32 into vector<3x2xf32>
				// ELTWISE: %[[T2:.*]] = vector.extract %[[A]][1, 0] : vector<2x3xf32>
				// ELTWISE: %[[T3:.*]] = vector.insert %[[T2]], %[[T1]] [0, 1] : f32 into vector<3x2xf32>
				// ELTWISE: %[[T4:.*]] = vector.extract %[[A]][0, 1] : vector<2x3xf32>
				// ELTWISE: %[[T5:.*]] = vector.insert %[[T4]], %[[T3]] [1, 0] : f32 into vector<3x2xf32>
				// ELTWISE: %[[T6:.*]] = vector.extract %[[A]][1, 1] : vector<2x3xf32>
				// ELTWISE: %[[T7:.*]] = vector.insert %[[T6]], %[[T5]] [1, 1] : f32 into vector<3x2xf32>
				// ELTWISE: %[[T8:.*]] = vector.extract %[[A]][0, 2] : vector<2x3xf32>
				// ELTWISE: %[[T9:.*]] = vector.insert %[[T8]], %[[T7]] [2, 0] : f32 into vector<3x2xf32>
				// ELTWISE: %[[T10:.*]] = vector.extract %[[A]][1, 2] : vector<2x3xf32>
				// ELTWISE: %[[T11:.*]] = vector.insert %[[T10]], %[[T9]] [2, 1] : f32 into vector<3x2xf32>
				// ELTWISE: return %[[T11]] : vector<3x2xf32>
				func @transpose23(%arg0: vector<2x3xf32>) -> vector<3x2xf32> {
				%0 = vector.transpose %arg0, [1, 0] : vector<2x3xf32> to vector<3x2xf32>
				return %0 : vector<3x2xf32>
				}

				// SHUFFLE-LABEL: func @transpose
				// FLAT-LABEL: func @transpose(
				func @transpose(%arg0: vector<2x4xf32>) -> vector<4x2xf32> {
				// SHUFFLE: vector.shape_cast %{{.*}} : vector<2x4xf32> to vector<8xf32>
				// 0 4
				// 0 1 2 3 1 5
				// 4 5 6 7 -> 2 6
				// 3 7
				// SHUFFLE-NEXT: vector.shuffle %{{.*}} [0, 4, 1, 5, 2, 6, 3, 7] : vector<8xf32>, vector<8xf32>
				// SHUFFLE-NEXT: vector.shape_cast %{{.*}} : vector<8xf32> to vector<4x2xf32>

				// FLAT: vector.shape_cast {{.*}} : vector<2x4xf32> to vector<8xf32>
				// FLAT: vector.flat_transpose %{{.*}} {columns = 2 : i32, rows = 4 : i32} : vector<8xf32> -> vector<8xf32>
				// FLAT: vector.shape_cast {{.*}} : vector<8xf32> to vector<4x2xf32>
				%0 = vector.transpose %arg0, [1, 0] : vector<2x4xf32> to vector<4x2xf32>
				return %0 : vector<4x2xf32>
				}

				// AVX2-LABEL: func @transpose4x8
				func @transpose4x8xf32(%arg0: vector<4x8xf32>) -> vector<8x4xf32> {
				// AVX2: vector.extract {{.*}}[0]
				// AVX2-NEXT: vector.extract {{.*}}[1]
				// AVX2-NEXT: vector.extract {{.*}}[2]
				// AVX2-NEXT: vector.extract {{.*}}[3]
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 8, 1, 9, 4, 12, 5, 13] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [2, 10, 3, 11, 6, 14, 7, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 8, 1, 9, 4, 12, 5, 13] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [2, 10, 3, 11, 6, 14, 7, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 8, 9, 4, 5, 12, 13] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [2, 3, 10, 11, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 8, 9, 4, 5, 12, 13] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [2, 3, 10, 11, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 2, 3, 8, 9, 10, 11] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 2, 3, 8, 9, 10, 11] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [4, 5, 6, 7, 12, 13, 14, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [4, 5, 6, 7, 12, 13, 14, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.insert {{.*}}[0]
				// AVX2-NEXT: vector.insert {{.*}}[1]
				// AVX2-NEXT: vector.insert {{.*}}[2]
				// AVX2-NEXT: vector.insert {{.*}}[3]
				// AVX2-NEXT: vector.shape_cast {{.*}} vector<4x8xf32> to vector<32xf32>
				// AVX2-NEXT: vector.shape_cast {{.*}} vector<32xf32> to vector<8x4xf32>
				%0 = vector.transpose %arg0, [1, 0] : vector<4x8xf32> to vector<8x4xf32>
				return %0 : vector<8x4xf32>
				}

				// AVX2-LABEL: func @transpose8x8
				func @transpose8x8xf32(%arg0: vector<8x8xf32>) -> vector<8x8xf32> {
				// AVX2: vector.shuffle {{.*}} [0, 8, 1, 9, 4, 12, 5, 13] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [2, 10, 3, 11, 6, 14, 7, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 8, 1, 9, 4, 12, 5, 13] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [2, 10, 3, 11, 6, 14, 7, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 8, 1, 9, 4, 12, 5, 13] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [2, 10, 3, 11, 6, 14, 7, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 8, 1, 9, 4, 12, 5, 13] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [2, 10, 3, 11, 6, 14, 7, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 8, 9, 4, 5, 12, 13] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [2, 3, 10, 11, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 8, 9, 4, 5, 12, 13] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [2, 3, 10, 11, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 8, 9, 4, 5, 12, 13] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [2, 3, 10, 11, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 8, 9, 4, 5, 12, 13] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [2, 3, 10, 11, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 2, 3, 8, 9, 10, 11] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 2, 3, 8, 9, 10, 11] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 2, 3, 8, 9, 10, 11] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 2, 3, 8, 9, 10, 11] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [4, 5, 6, 7, 12, 13, 14, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [4, 5, 6, 7, 12, 13, 14, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [4, 5, 6, 7, 12, 13, 14, 15] : vector<8xf32>, vector<8xf32>
				// AVX2-NEXT: vector.shuffle {{.*}} [4, 5, 6, 7, 12, 13, 14, 15] : vector<8xf32>, vector<8xf32>
				%0 = vector.transpose %arg0, [1, 0] : vector<8x8xf32> to vector<8x8xf32>
				return %0 : vector<8x8xf32>
				}

mlir/test/Dialect/Vector/vector-transpose-to-shuffle.mlir

This file was deleted.

	// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-shuffle-transpose=1 \| FileCheck %s

	// CHECK-LABEL: func @transpose
	func @transpose(%arg0: vector<2x4xf32>) -> vector<4x2xf32> {
	// CHECK: vector.shape_cast %{{.*}} : vector<2x4xf32> to vector<8xf32>
	// 0 4
	// 0 1 2 3 1 5
	// 4 5 6 7 -> 2 6
	// 3 7
	// CHECK: vector.shuffle %{{.*}} [0, 4, 1, 5, 2, 6, 3, 7] : vector<8xf32>, vector<8xf32>
	// CHECK: vector.shape_cast %{{.*}} : vector<8xf32> to vector<4x2xf32>
	%0 = vector.transpose %arg0, [1, 0] : vector<2x4xf32> to vector<4x2xf32>
	return %0 : vector<4x2xf32>
	}

mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp

//===- TestVectorToVectorConversion.cpp - Test VectorTransfers lowering ---===//		//===- TestVectorTransforms.cpp - Test Vector transforms and lowerings ----===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include <type_traits>		#include <type_traits>

#include "mlir/Analysis/SliceAnalysis.h"		#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"		#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"		#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
		#include "mlir/Dialect/Linalg/Passes.h"
		#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"		#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/SCF.h"		#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"		#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/Vector/VectorTransforms.h"		#include "mlir/Dialect/Vector/VectorTransforms.h"
#include "mlir/Pass/Pass.h"		#include "mlir/Pass/Pass.h"
		#include "mlir/Pass/PassManager.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"		#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

using namespace mlir;		using namespace mlir;
		using namespace mlir::linalg;
using namespace mlir::vector;		using namespace mlir::vector;

namespace {		namespace {

struct TestVectorToVectorConversion		struct TestVectorToVectorLowering
: public PassWrapper<TestVectorToVectorConversion, FunctionPass> {		: public PassWrapper<TestVectorToVectorLowering, FunctionPass> {
TestVectorToVectorConversion() = default;		TestVectorToVectorLowering() = default;
TestVectorToVectorConversion(const TestVectorToVectorConversion &pass) {}		TestVectorToVectorLowering(const TestVectorToVectorLowering &pass) {}
StringRef getArgument() const final {		StringRef getArgument() const final {
return "test-vector-to-vector-conversion";		return "test-vector-to-vector-lowering";
}		}
StringRef getDescription() const final {		StringRef getDescription() const final {
return "Test conversion patterns between ops in the vector dialect";		return "Test lowering patterns between ops in the vector dialect";
}		}

void getDependentDialects(DialectRegistry &registry) const override {		void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<AffineDialect>();		registry.insert<AffineDialect>();
}		}

Option<bool> unroll{*this, "unroll", llvm::cl::desc("Include unrolling"),		Option<bool> unroll{*this, "unroll", llvm::cl::desc("Include unrolling"),
llvm::cl::init(false)};		llvm::cl::init(false)};
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines	private:
}		}

static LogicalResult filter(Operation *op) {		static LogicalResult filter(Operation *op) {
return success(isa<arith::AddFOp, SelectOp, arith::CmpFOp, ContractionOp,		return success(isa<arith::AddFOp, SelectOp, arith::CmpFOp, ContractionOp,
TransferReadOp, TransferWriteOp>(op));		TransferReadOp, TransferWriteOp>(op));
}		}
};		};

struct TestVectorContractionConversion		struct TestVectorContractionLowering
: public PassWrapper<TestVectorContractionConversion, FunctionPass> {		: public PassWrapper<TestVectorContractionLowering, FunctionPass> {
StringRef getArgument() const final {		StringRef getArgument() const final {
return "test-vector-contraction-conversion";		return "test-vector-contraction-lowering";
}		}
StringRef getDescription() const final {		StringRef getDescription() const final {
return "Test conversion patterns that lower contract ops in the vector "		return "Test lowering patterns that lower contract ops in the vector "
"dialect";		"dialect";
}		}
TestVectorContractionConversion() = default;		TestVectorContractionLowering() = default;
TestVectorContractionConversion(const TestVectorContractionConversion &pass) {		TestVectorContractionLowering(const TestVectorContractionLowering &pass) {}
}

Option<bool> lowerToFlatMatrix{		Option<bool> lowerToFlatMatrix{
*this, "vector-lower-matrix-intrinsics",		*this, "vector-lower-matrix-intrinsics",
llvm::cl::desc("Lower vector.contract to llvm.intr.matrix.multiply"),		llvm::cl::desc("Lower vector.contract to llvm.intr.matrix.multiply"),
llvm::cl::init(false)};		llvm::cl::init(false)};
Option<bool> lowerToFlatTranspose{
*this, "vector-flat-transpose",
llvm::cl::desc("Lower 2-D vector.transpose to vector.flat_transpose"),
llvm::cl::init(false)};
Option<bool> lowerToShuffleTranspose{
*this, "vector-shuffle-transpose",
llvm::cl::desc("Lower 2-D vector.transpose to shape_cast + shuffle"),
llvm::cl::init(false)};
Option<bool> lowerToOuterProduct{		Option<bool> lowerToOuterProduct{
*this, "vector-outerproduct",		*this, "vector-outerproduct",
llvm::cl::desc("Lower vector.contract to vector.outerproduct"),		llvm::cl::desc("Lower vector.contract to vector.outerproduct"),
llvm::cl::init(false)};		llvm::cl::init(false)};
Option<bool> lowerToFilterOuterProduct{		Option<bool> lowerToFilterOuterProduct{
*this, "vector-filter-outerproduct",		*this, "vector-filter-outerproduct",
llvm::cl::desc("Lower vector.contract to vector.outerproduct but not for "		llvm::cl::desc("Lower vector.contract to vector.outerproduct but not for "
"vectors of size 4."),		"vectors of size 4."),
Show All 29 Lines	void runOnFunction() override {
}		}

// Test on all contract lowering patterns.		// Test on all contract lowering patterns.
VectorContractLowering contractLowering = VectorContractLowering::Dot;		VectorContractLowering contractLowering = VectorContractLowering::Dot;
if (lowerToFlatMatrix)		if (lowerToFlatMatrix)
contractLowering = VectorContractLowering::Matmul;		contractLowering = VectorContractLowering::Matmul;
VectorMultiReductionLowering vectorMultiReductionLowering =		VectorMultiReductionLowering vectorMultiReductionLowering =
VectorMultiReductionLowering::InnerParallel;		VectorMultiReductionLowering::InnerParallel;
VectorTransposeLowering transposeLowering =		VectorTransformsOptions options{contractLowering,
VectorTransposeLowering::EltWise;		vectorMultiReductionLowering,
if (lowerToFlatTranspose)		VectorTransposeLowering()};
transposeLowering = VectorTransposeLowering::Flat;
if (lowerToShuffleTranspose)
transposeLowering = VectorTransposeLowering::Shuffle;
VectorTransformsOptions options{
contractLowering, vectorMultiReductionLowering, transposeLowering};
populateVectorBroadcastLoweringPatterns(patterns);		populateVectorBroadcastLoweringPatterns(patterns);
populateVectorContractLoweringPatterns(patterns, options);		populateVectorContractLoweringPatterns(patterns, options);
populateVectorMaskOpLoweringPatterns(patterns);		populateVectorMaskOpLoweringPatterns(patterns);
if (!lowerToShuffleTranspose)
populateVectorShapeCastLoweringPatterns(patterns);		populateVectorShapeCastLoweringPatterns(patterns);
populateVectorTransposeLoweringPatterns(patterns, options);
(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));		(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
}		}
};		};

		struct TestVectorTransposeLowering
		: public PassWrapper<TestVectorTransposeLowering, FunctionPass> {
		StringRef getArgument() const final {
		return "test-vector-transpose-lowering";
		}
		StringRef getDescription() const final {
		return "Test lowering patterns that lower contract ops in the vector "
		"dialect";
		}
		TestVectorTransposeLowering() = default;
		TestVectorTransposeLowering(const TestVectorTransposeLowering &pass) {}

		Option<bool> lowerToEltwise{
		*this, "eltwise",
		llvm::cl::desc("Lower 2-D vector.transpose to eltwise insert/extract"),
		llvm::cl::init(false)};
		Option<bool> lowerToFlatTranspose{
		*this, "flat",
		llvm::cl::desc("Lower 2-D vector.transpose to vector.flat_transpose"),
		llvm::cl::init(false)};
		Option<bool> lowerToShuffleTranspose{
		*this, "shuffle",
		llvm::cl::desc("Lower 2-D vector.transpose to shape_cast + shuffle"),
		llvm::cl::init(false)};
		Option<bool> lowerToAvx2{
		*this, "avx2",
		llvm::cl::desc("Lower vector.transpose to avx2-specific patterns"),
		llvm::cl::init(false)};

		void runOnFunction() override {
		RewritePatternSet patterns(&getContext());

		// Test on one pattern in isolation.
		// Explicitly disable shape_cast lowering.
		LinalgVectorLoweringOptions options = LinalgVectorLoweringOptions()
		.enableVectorTransposeLowering()
		.enableShapeCastLowering(false);
		if (lowerToEltwise) {
		options = options.setVectorTransformsOptions(
		VectorTransformsOptions().setVectorTransposeLowering(
		VectorTransposeLowering::EltWise));
		}
		if (lowerToFlatTranspose) {
		options = options.setVectorTransformsOptions(
		VectorTransformsOptions().setVectorTransposeLowering(
		VectorTransposeLowering::Flat));
		}
		if (lowerToShuffleTranspose) {
		options = options.setVectorTransformsOptions(
		VectorTransformsOptions().setVectorTransposeLowering(
		VectorTransposeLowering::Shuffle));
		}
		if (lowerToAvx2) {
		options = options.enableAVX2Lowering().setAVX2LoweringOptions(
		x86vector::avx2::LoweringOptions().setTransposeOptions(
		x86vector::avx2::TransposeLoweringOptions()
		.lower4x8xf32()
		.lower8x8xf32()));
		}

		OpPassManager dynamicPM("builtin.func");
		dynamicPM.addPass(createLinalgStrategyLowerVectorsPass(options));
		if (failed(runPipeline(dynamicPM, getFunction())))
		return signalPassFailure();
		}
		};

struct TestVectorUnrollingPatterns		struct TestVectorUnrollingPatterns
: public PassWrapper<TestVectorUnrollingPatterns, FunctionPass> {		: public PassWrapper<TestVectorUnrollingPatterns, FunctionPass> {
StringRef getArgument() const final {		StringRef getArgument() const final {
return "test-vector-unrolling-patterns";		return "test-vector-unrolling-patterns";
}		}
StringRef getDescription() const final {		StringRef getDescription() const final {
return "Test conversion patterns to unroll contract ops in the vector "		return "Test lowering patterns to unroll contract ops in the vector "
"dialect";		"dialect";
}		}
TestVectorUnrollingPatterns() = default;		TestVectorUnrollingPatterns() = default;
TestVectorUnrollingPatterns(const TestVectorUnrollingPatterns &pass) {}		TestVectorUnrollingPatterns(const TestVectorUnrollingPatterns &pass) {}
void runOnFunction() override {		void runOnFunction() override {
MLIRContext *ctx = &getContext();		MLIRContext *ctx = &getContext();
RewritePatternSet patterns(ctx);		RewritePatternSet patterns(ctx);
populateVectorUnrollPatterns(		populateVectorUnrollPatterns(
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines
};		};

struct TestVectorDistributePatterns		struct TestVectorDistributePatterns
: public PassWrapper<TestVectorDistributePatterns, FunctionPass> {		: public PassWrapper<TestVectorDistributePatterns, FunctionPass> {
StringRef getArgument() const final {		StringRef getArgument() const final {
return "test-vector-distribute-patterns";		return "test-vector-distribute-patterns";
}		}
StringRef getDescription() const final {		StringRef getDescription() const final {
return "Test conversion patterns to distribute vector ops in the vector "		return "Test lowering patterns to distribute vector ops in the vector "
"dialect";		"dialect";
}		}
TestVectorDistributePatterns() = default;		TestVectorDistributePatterns() = default;
TestVectorDistributePatterns(const TestVectorDistributePatterns &pass) {}		TestVectorDistributePatterns(const TestVectorDistributePatterns &pass) {}
void getDependentDialects(DialectRegistry &registry) const override {		void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<VectorDialect>();		registry.insert<VectorDialect>();
registry.insert<AffineDialect>();		registry.insert<AffineDialect>();
}		}
Show All 37 Lines	void runOnFunction() override {
(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));		(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
}		}
};		};

struct TestVectorToLoopPatterns		struct TestVectorToLoopPatterns
: public PassWrapper<TestVectorToLoopPatterns, FunctionPass> {		: public PassWrapper<TestVectorToLoopPatterns, FunctionPass> {
StringRef getArgument() const final { return "test-vector-to-forloop"; }		StringRef getArgument() const final { return "test-vector-to-forloop"; }
StringRef getDescription() const final {		StringRef getDescription() const final {
return "Test conversion patterns to break up a vector op into a for loop";		return "Test lowering patterns to break up a vector op into a for loop";
}		}
TestVectorToLoopPatterns() = default;		TestVectorToLoopPatterns() = default;
TestVectorToLoopPatterns(const TestVectorToLoopPatterns &pass) {}		TestVectorToLoopPatterns(const TestVectorToLoopPatterns &pass) {}
void getDependentDialects(DialectRegistry &registry) const override {		void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<VectorDialect>();		registry.insert<VectorDialect>();
registry.insert<AffineDialect>();		registry.insert<AffineDialect>();
}		}
Option<int32_t> multiplicity{		Option<int32_t> multiplicity{
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines	struct TestVectorTransferUnrollingPatterns
: public PassWrapper<TestVectorTransferUnrollingPatterns, FunctionPass> {		: public PassWrapper<TestVectorTransferUnrollingPatterns, FunctionPass> {
void getDependentDialects(DialectRegistry &registry) const override {		void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<AffineDialect>();		registry.insert<AffineDialect>();
}		}
StringRef getArgument() const final {		StringRef getArgument() const final {
return "test-vector-transfer-unrolling-patterns";		return "test-vector-transfer-unrolling-patterns";
}		}
StringRef getDescription() const final {		StringRef getDescription() const final {
return "Test conversion patterns to unroll transfer ops in the vector "		return "Test lowering patterns to unroll transfer ops in the vector "
"dialect";		"dialect";
}		}
void runOnFunction() override {		void runOnFunction() override {
MLIRContext *ctx = &getContext();		MLIRContext *ctx = &getContext();
RewritePatternSet patterns(ctx);		RewritePatternSet patterns(ctx);
populateVectorUnrollPatterns(		populateVectorUnrollPatterns(
patterns,		patterns,
UnrollVectorOptions()		UnrollVectorOptions()
Show All 9 Lines

struct TestVectorTransferFullPartialSplitPatterns		struct TestVectorTransferFullPartialSplitPatterns
: public PassWrapper<TestVectorTransferFullPartialSplitPatterns,		: public PassWrapper<TestVectorTransferFullPartialSplitPatterns,
FunctionPass> {		FunctionPass> {
StringRef getArgument() const final {		StringRef getArgument() const final {
return "test-vector-transfer-full-partial-split";		return "test-vector-transfer-full-partial-split";
}		}
StringRef getDescription() const final {		StringRef getDescription() const final {
return "Test conversion patterns to split "		return "Test lowering patterns to split "
"transfer ops via scf.if + linalg ops";		"transfer ops via scf.if + linalg ops";
}		}
TestVectorTransferFullPartialSplitPatterns() = default;		TestVectorTransferFullPartialSplitPatterns() = default;
TestVectorTransferFullPartialSplitPatterns(		TestVectorTransferFullPartialSplitPatterns(
const TestVectorTransferFullPartialSplitPatterns &pass) {}		const TestVectorTransferFullPartialSplitPatterns &pass) {}

void getDependentDialects(DialectRegistry &registry) const override {		void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<AffineDialect, linalg::LinalgDialect, memref::MemRefDialect,		registry.insert<AffineDialect, linalg::LinalgDialect, memref::MemRefDialect,
Show All 31 Lines	struct TestVectorTransferLoweringPatterns
: public PassWrapper<TestVectorTransferLoweringPatterns, FunctionPass> {		: public PassWrapper<TestVectorTransferLoweringPatterns, FunctionPass> {
void getDependentDialects(DialectRegistry &registry) const override {		void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<memref::MemRefDialect>();		registry.insert<memref::MemRefDialect>();
}		}
StringRef getArgument() const final {		StringRef getArgument() const final {
return "test-vector-transfer-lowering-patterns";		return "test-vector-transfer-lowering-patterns";
}		}
StringRef getDescription() const final {		StringRef getDescription() const final {
return "Test conversion patterns to lower transfer ops to other vector ops";		return "Test lowering patterns to lower transfer ops to other vector ops";
}		}
void runOnFunction() override {		void runOnFunction() override {
RewritePatternSet patterns(&getContext());		RewritePatternSet patterns(&getContext());
populateVectorTransferLoweringPatterns(patterns);		populateVectorTransferLoweringPatterns(patterns);
populateVectorTransferPermutationMapLoweringPatterns(patterns);		populateVectorTransferPermutationMapLoweringPatterns(patterns);
(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));		(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
}		}
};		};

struct TestVectorMultiReductionLoweringPatterns		struct TestVectorMultiReductionLoweringPatterns
: public PassWrapper<TestVectorMultiReductionLoweringPatterns,		: public PassWrapper<TestVectorMultiReductionLoweringPatterns,
FunctionPass> {		FunctionPass> {
TestVectorMultiReductionLoweringPatterns() = default;		TestVectorMultiReductionLoweringPatterns() = default;
TestVectorMultiReductionLoweringPatterns(		TestVectorMultiReductionLoweringPatterns(
const TestVectorMultiReductionLoweringPatterns &pass) {}		const TestVectorMultiReductionLoweringPatterns &pass) {}
void getDependentDialects(DialectRegistry &registry) const override {		void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<memref::MemRefDialect>();		registry.insert<memref::MemRefDialect>();
}		}
StringRef getArgument() const final {		StringRef getArgument() const final {
return "test-vector-multi-reduction-lowering-patterns";		return "test-vector-multi-reduction-lowering-patterns";
}		}
StringRef getDescription() const final {		StringRef getDescription() const final {
return "Test conversion patterns to lower vector.multi_reduction to other "		return "Test lowering patterns to lower vector.multi_reduction to other "
"vector ops";		"vector ops";
}		}
Option<bool> useOuterReductions{		Option<bool> useOuterReductions{
*this, "use-outer-reductions",		*this, "use-outer-reductions",
llvm::cl::desc("Move reductions to outer most dimensions"),		llvm::cl::desc("Move reductions to outer most dimensions"),
llvm::cl::init(false)};		llvm::cl::init(false)};
void runOnFunction() override {		void runOnFunction() override {
RewritePatternSet patterns(&getContext());		RewritePatternSet patterns(&getContext());
Show All 16 Lines	void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<memref::MemRefDialect, AffineDialect>();		registry.insert<memref::MemRefDialect, AffineDialect>();
}		}

StringRef getArgument() const final {		StringRef getArgument() const final {
return "test-vector-transfer-collapse-inner-most-dims";		return "test-vector-transfer-collapse-inner-most-dims";
}		}

StringRef getDescription() const final {		StringRef getDescription() const final {
return "Test conversion patterns that reducedes the rank of the vector "		return "Test lowering patterns that reducedes the rank of the vector "
"transfer memory and vector operands.";		"transfer memory and vector operands.";
}		}

void runOnFunction() override {		void runOnFunction() override {
RewritePatternSet patterns(&getContext());		RewritePatternSet patterns(&getContext());
populateVectorTransferCollapseInnerMostContiguousDimsPatterns(patterns);		populateVectorTransferCollapseInnerMostContiguousDimsPatterns(patterns);
(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));		(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
}		}
Show All 15 Lines	void runOnFunction() override {
(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));		(void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
}		}
};		};

} // end anonymous namespace		} // end anonymous namespace

namespace mlir {		namespace mlir {
namespace test {		namespace test {
void registerTestVectorConversions() {		void registerTestVectorLowerings() {
PassRegistration<TestVectorToVectorConversion>();		PassRegistration<TestVectorToVectorLowering>();

		PassRegistration<TestVectorContractionLowering>();

PassRegistration<TestVectorContractionConversion>();		PassRegistration<TestVectorTransposeLowering>();

PassRegistration<TestVectorUnrollingPatterns>();		PassRegistration<TestVectorUnrollingPatterns>();

PassRegistration<TestVectorTransferUnrollingPatterns>();		PassRegistration<TestVectorTransferUnrollingPatterns>();

PassRegistration<TestVectorTransferFullPartialSplitPatterns>();		PassRegistration<TestVectorTransferFullPartialSplitPatterns>();

PassRegistration<TestVectorDistributePatterns>();		PassRegistration<TestVectorDistributePatterns>();
Show All 15 Lines

mlir/tools/mlir-opt/mlir-opt.cpp

Show First 20 Lines • Show All 101 Lines • ▼ Show 20 Lines
void registerTestNumberOfBlockExecutionsPass();		void registerTestNumberOfBlockExecutionsPass();
void registerTestNumberOfOperationExecutionsPass();		void registerTestNumberOfOperationExecutionsPass();
void registerTestOpaqueLoc();		void registerTestOpaqueLoc();
void registerTestPDLByteCodePass();		void registerTestPDLByteCodePass();
void registerTestPreparationPassWithAllowedMemrefResults();		void registerTestPreparationPassWithAllowedMemrefResults();
void registerTestRecursiveTypesPass();		void registerTestRecursiveTypesPass();
void registerTestSCFUtilsPass();		void registerTestSCFUtilsPass();
void registerTestSliceAnalysisPass();		void registerTestSliceAnalysisPass();
void registerTestVectorConversions();		void registerTestVectorLowerings();
} // namespace test		} // namespace test
} // namespace mlir		} // namespace mlir

namespace test {		namespace test {
void registerTestDialect(DialectRegistry &);		void registerTestDialect(DialectRegistry &);
} // namespace test		} // namespace test

#ifdef MLIR_INCLUDE_TESTS		#ifdef MLIR_INCLUDE_TESTS
▲ Show 20 Lines • Show All 73 Lines • ▼ Show 20 Lines	#endif
mlir::test::registerTestMemRefStrideCalculation();		mlir::test::registerTestMemRefStrideCalculation();
mlir::test::registerTestNumberOfBlockExecutionsPass();		mlir::test::registerTestNumberOfBlockExecutionsPass();
mlir::test::registerTestNumberOfOperationExecutionsPass();		mlir::test::registerTestNumberOfOperationExecutionsPass();
mlir::test::registerTestOpaqueLoc();		mlir::test::registerTestOpaqueLoc();
mlir::test::registerTestPDLByteCodePass();		mlir::test::registerTestPDLByteCodePass();
mlir::test::registerTestRecursiveTypesPass();		mlir::test::registerTestRecursiveTypesPass();
mlir::test::registerTestSCFUtilsPass();		mlir::test::registerTestSCFUtilsPass();
mlir::test::registerTestSliceAnalysisPass();		mlir::test::registerTestSliceAnalysisPass();
mlir::test::registerTestVectorConversions();		mlir::test::registerTestVectorLowerings();
}		}
#endif		#endif

int main(int argc, char **argv) {		int main(int argc, char **argv) {
registerAllPasses();		registerAllPasses();
#ifdef MLIR_INCLUDE_TESTS		#ifdef MLIR_INCLUDE_TESTS
registerTestPasses();		registerTestPasses();
#endif		#endif
Show All 9 Lines

utils/bazel/llvm-project-overlay/mlir/BUILD.bazel

Show First 20 Lines • Show All 1,452 Lines • ▼ Show 20 Lines	cc_library(
hdrs = ["include/mlir/Dialect/X86Vector/Transforms.h"],		hdrs = ["include/mlir/Dialect/X86Vector/Transforms.h"],
includes = ["include"],		includes = ["include"],
deps = [		deps = [
":ArithmeticDialect",		":ArithmeticDialect",
":IR",		":IR",
":LLVMCommonConversion",		":LLVMCommonConversion",
":LLVMDialect",		":LLVMDialect",
":StandardOps",		":StandardOps",
		":VectorOps",
":X86Vector",		":X86Vector",
"//llvm:Core",		"//llvm:Core",
"//llvm:Support",		"//llvm:Support",
],		],
)		)

gentbl_cc_library(		gentbl_cc_library(
name = "X86VectorConversionIncGen",		name = "X86VectorConversionIncGen",
▲ Show 20 Lines • Show All 4,927 Lines • ▼ Show 20 Lines	deps = [
":SCFTransforms",		":SCFTransforms",
":StandardOps",		":StandardOps",
":StandardOpsTransforms",		":StandardOpsTransforms",
":Support",		":Support",
":TensorDialect",		":TensorDialect",
":TransformUtils",		":TransformUtils",
":VectorOps",		":VectorOps",
":VectorToSCF",		":VectorToSCF",
		":X86VectorTransforms",
"//llvm:Support",		"//llvm:Support",
],		],
)		)

cc_library(		cc_library(
name = "ComprehensiveBufferize",		name = "ComprehensiveBufferize",
srcs = [		srcs = [
"lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp",		"lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp",
▲ Show 20 Lines • Show All 1,044 Lines • Show Last 20 Lines

utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel

Show First 20 Lines • Show All 478 Lines • ▼ Show 20 Lines	cc_library(
name = "TestVector",		name = "TestVector",
srcs = glob(["lib/Dialect/Vector/*.cpp"]),		srcs = glob(["lib/Dialect/Vector/*.cpp"]),
defines = ["MLIR_CUDA_CONVERSIONS_ENABLED"],		defines = ["MLIR_CUDA_CONVERSIONS_ENABLED"],
includes = ["lib/Dialect/Test"],		includes = ["lib/Dialect/Test"],
deps = [		deps = [
"//mlir:Affine",		"//mlir:Affine",
"//mlir:Analysis",		"//mlir:Analysis",
"//mlir:LinalgOps",		"//mlir:LinalgOps",
		"//mlir:LinalgTransforms",
"//mlir:MemRefDialect",		"//mlir:MemRefDialect",
"//mlir:Pass",		"//mlir:Pass",
"//mlir:SCFDialect",		"//mlir:SCFDialect",
"//mlir:StandardOps",		"//mlir:StandardOps",
"//mlir:TransformUtils",		"//mlir:TransformUtils",
"//mlir:VectorOps",		"//mlir:VectorOps",
"//mlir:VectorToSCF",		"//mlir:VectorToSCF",
],		],
Show All 28 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][X86Vector] Add specialized vector.transpose lowering patterns for AVX2
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 386432

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

mlir/include/mlir/Dialect/X86Vector/Transforms.h

mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt

mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp

mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp

mlir/lib/Dialect/X86Vector/Transforms/CMakeLists.txt

mlir/test/Dialect/Vector/vector-contract-matvec-transforms.mlir

mlir/test/Dialect/Vector/vector-contract-transforms.mlir

mlir/test/Dialect/Vector/vector-flat-transforms.mlir

mlir/test/Dialect/Vector/vector-mem-transforms.mlir

mlir/test/Dialect/Vector/vector-transforms.mlir

mlir/test/Dialect/Vector/vector-transpose-lowering.mlir

mlir/test/Dialect/Vector/vector-transpose-to-shuffle.mlir

mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp

mlir/tools/mlir-opt/mlir-opt.cpp

utils/bazel/llvm-project-overlay/mlir/BUILD.bazel

utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][X86Vector] Add specialized vector.transpose lowering patterns for AVX2ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 386432

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

mlir/include/mlir/Dialect/X86Vector/Transforms.h

mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt

mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp

mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp

mlir/lib/Dialect/X86Vector/Transforms/CMakeLists.txt

mlir/test/Dialect/Vector/vector-contract-matvec-transforms.mlir

mlir/test/Dialect/Vector/vector-contract-transforms.mlir

mlir/test/Dialect/Vector/vector-flat-transforms.mlir

mlir/test/Dialect/Vector/vector-mem-transforms.mlir

mlir/test/Dialect/Vector/vector-transforms.mlir

mlir/test/Dialect/Vector/vector-transpose-lowering.mlir

mlir/test/Dialect/Vector/vector-transpose-to-shuffle.mlir

mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp

mlir/tools/mlir-opt/mlir-opt.cpp

utils/bazel/llvm-project-overlay/mlir/BUILD.bazel

utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel

[mlir][X86Vector] Add specialized vector.transpose lowering patterns for AVX2
ClosedPublic