Diff 301767

mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir

	// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32 \			// RUN: mlir-opt %s -test-vector-to-forloop -convert-vector-to-scf \
	// RUN: -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm \| \			// RUN: -lower-affine -convert-scf-to-std -convert-vector-to-llvm \| \
				nicolasvasilacheUnsubmitted Done Reply Inline Actions Could we add an extra RUN command that just does `mlir-opt %s -test-vector-to-forloop` and checks the presence of the forloop+vectors ? nicolasvasilache: Could we add an extra RUN command that just does `mlir-opt %s -test-vector-to-forloop` and…
	// RUN: mlir-cpu-runner -e main -entry-point-result=void \			// RUN: mlir-cpu-runner -e main -entry-point-result=void \
	// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \| \			// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \| \
	// RUN: FileCheck %s			// RUN: FileCheck %s

				nicolasvasilacheUnsubmitted Done Reply Inline Actions blank line to delimit commands ? nicolasvasilache: blank line to delimit commands ?
				// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine \
				// RUN: -convert-scf-to-std -convert-vector-to-llvm \| mlir-cpu-runner -e main \
				// RUN: -entry-point-result=void \
				// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \| \
				// RUN: FileCheck %s

				// RUN: mlir-opt %s -test-vector-to-forloop \| FileCheck %s -check-prefix=TRANSFORM


	func @print_memref_f32(memref<*xf32>)			func @print_memref_f32(memref<*xf32>)

	func @alloc_1d_filled_inc_f32(%arg0: index, %arg1: f32) -> memref<?xf32> {			func @alloc_1d_filled_inc_f32(%arg0: index, %arg1: f32) -> memref<?xf32> {
	%c0 = constant 0 : index			%c0 = constant 0 : index
	%c1 = constant 1 : index			%c1 = constant 1 : index
	%0 = alloc(%arg0) : memref<?xf32>			%0 = alloc(%arg0) : memref<?xf32>
	scf.for %arg2 = %c0 to %arg0 step %c1 {			scf.for %arg2 = %c0 to %arg0 step %c1 {
	%tmp = index_cast %arg2 : index to i32			%tmp = index_cast %arg2 : index to i32
	%tmp1 = sitofp %tmp : i32 to f32			%tmp1 = sitofp %tmp : i32 to f32
	%tmp2 = addf %tmp1, %arg1 : f32			%tmp2 = addf %tmp1, %arg1 : f32
	store %tmp2, %0[%arg2] : memref<?xf32>			store %tmp2, %0[%arg2] : memref<?xf32>
	}			}
	return %0 : memref<?xf32>			return %0 : memref<?xf32>
	}			}

	func @vector_add_cycle(%id : index, %A: memref<?xf32>, %B: memref<?xf32>, %C: memref<?xf32>) {			// Large vector addf that can be broken down into a loop of smaller vector addf.
				aartbikUnsubmitted Not Done Reply Inline Actions Not in this CL, but probably during a later cleanup, I would rename the "distribution" part to something better. Loop distribution is typically reserved for for { for s1 -> s1 s2 for } s2 what is done here is more stripmining, blocking or tiling or chunking in 1-D, or something named like that. aartbik: Not in this CL, but probably during a later cleanup, I would rename the "distribution" part to…
				ThomasRaouxAuthorUnsubmitted Done Reply Inline Actions Makes sense. I can rename it in a future patch once we get more agreement on the design, ThomasRaoux: Makes sense. I can rename it in a future patch once we get more agreement on the design,
	%c0 = constant 0 : index
	%cf0 = constant 0.0 : f32
	%a = vector.transfer_read %A[%c0], %cf0: memref<?xf32>, vector<64xf32>
	%b = vector.transfer_read %B[%c0], %cf0: memref<?xf32>, vector<64xf32>
	%acc = addf %a, %b: vector<64xf32>
	vector.transfer_write %acc, %C[%c0]: vector<64xf32>, memref<?xf32>
	return
	}

	// Loop over a function containinng a large add vector and distribute it so that
	// each iteration of the loop process part of the vector operation.
	func @main() {			func @main() {
				%cf0 = constant 0.0 : f32
	%cf1 = constant 1.0 : f32			%cf1 = constant 1.0 : f32
	%cf2 = constant 2.0 : f32			%cf2 = constant 2.0 : f32
	%c0 = constant 0 : index			%c0 = constant 0 : index
	%c1 = constant 1 : index			%c1 = constant 1 : index
				%c32 = constant 32 : index
	%c64 = constant 64 : index			%c64 = constant 64 : index
	%out = alloc(%c64) : memref<?xf32>			%out = alloc(%c64) : memref<?xf32>
	%in1 = call @alloc_1d_filled_inc_f32(%c64, %cf1) : (index, f32) -> memref<?xf32>			%in1 = call @alloc_1d_filled_inc_f32(%c64, %cf1) : (index, f32) -> memref<?xf32>
	%in2 = call @alloc_1d_filled_inc_f32(%c64, %cf2) : (index, f32) -> memref<?xf32>			%in2 = call @alloc_1d_filled_inc_f32(%c64, %cf2) : (index, f32) -> memref<?xf32>
	scf.for %arg5 = %c0 to %c64 step %c1 {			// Check that the tansformatio correctly happened.
				aartbikUnsubmitted Not Done Reply Inline Actions c32 because 2x32=64, right? I am not super convinced I find this intermediate step much easier to understand than generating the chunked loop right away, but I hope you convince me in the discourse discussion aartbik: c32 because 2x32=64, right? I am not super convinced I find this intermediate step much easier…
				ThomasRaouxAuthorUnsubmitted Done Reply Inline Actions Correct, right now the extract_map expects contiguous IDs. (%arg5 : 32 goes from 0 to 31). About iterative vs all at once transformation let's keep talking on Discourse :) ThomasRaoux: Correct, right now the extract_map expects contiguous IDs. (%arg5 : 32 goes from 0 to 31).
				nicolasvasilacheUnsubmitted Not Done Reply Inline Actions As discussed on discourse, this is transient state internal to the test pass and should not be exposed. Let's please have the test do 2 things. Input IR: %a = vector.transfer_read %in1[%c0], %cf0: memref<?xf32>, vector<256xf32> %b = vector.transfer_read %in2[%c0], %cf0: memref<?xf32>, vector<256xf32> %acc = addf %a, %b: vector<256xf32> vector.transfer_write %acc, %out[%c0]: vector<256xf32>, memref<?xf32> Output IR: scf.for %arg5 = %c0 to %c256 step %c8 { %a = vector.transfer_read %in1[%arg5], %cf0: memref<?xf32>, vector<8xf32> %b = vector.transfer_read %in2[%arg5], %cf0: memref<?xf32>, vector<8xf32> %acc = addf %a, %b: vector<8xf32> vector.transfer_write %acc, %out[%arg5]: vector<8xf32>, memref<?xf32> } The test should also run with and without the application of the test pass and produce the same result. nicolasvasilache: As discussed on discourse, this is transient state internal to the test pass and should not be…
				ThomasRaouxAuthorUnsubmitted Done Reply Inline Actions Done. Starting from just the vector add and running with and without the transformation pass. ThomasRaoux: Done. Starting from just the vector add and running with and without the transformation pass.
	call @vector_add_cycle(%arg5, %in1, %in2, %out) : (index, memref<?xf32>, memref<?xf32>, memref<?xf32>) -> ()			// TRANSFORM: scf.for
	}			// TRANSFORM: vector.transfer_read {{.*}} : memref<?xf32>, vector<2xf32>
				// TRANSFORM: vector.transfer_read {{.*}} : memref<?xf32>, vector<2xf32>
				// TRANSFORM: %{{.}} = addf %{{.}}, %{{.*}} : vector<2xf32>
				// TRANSFORM: vector.transfer_write {{.*}} : vector<2xf32>, memref<?xf32>
				// TRANSFORM: }
				%a = vector.transfer_read %in1[%c0], %cf0: memref<?xf32>, vector<64xf32>
				%b = vector.transfer_read %in2[%c0], %cf0: memref<?xf32>, vector<64xf32>
				%acc = addf %a, %b: vector<64xf32>
				vector.transfer_write %acc, %out[%c0]: vector<64xf32>, memref<?xf32>
	%converted = memref_cast %out : memref<?xf32> to memref<*xf32>			%converted = memref_cast %out : memref<?xf32> to memref<*xf32>
	call @print_memref_f32(%converted): (memref<*xf32>) -> ()			call @print_memref_f32(%converted): (memref<*xf32>) -> ()
	// CHECK: Unranked{{.*}}data =			// CHECK: Unranked{{.*}}data =
	// CHECK: [			// CHECK: [
	// CHECK-SAME: 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27,			// CHECK-SAME: 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27,
	// CHECK-SAME: 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51,			// CHECK-SAME: 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51,
	// CHECK-SAME: 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75,			// CHECK-SAME: 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75,
	// CHECK-SAME: 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99,			// CHECK-SAME: 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99,
	// CHECK-SAME: 101, 103, 105, 107, 109, 111, 113, 115, 117, 119,			// CHECK-SAME: 101, 103, 105, 107, 109, 111, 113, 115, 117, 119,
	// CHECK-SAME: 121, 123, 125, 127, 129]			// CHECK-SAME: 121, 123, 125, 127, 129]
	dealloc %out : memref<?xf32>			dealloc %out : memref<?xf32>
	dealloc %in1 : memref<?xf32>			dealloc %in1 : memref<?xf32>
	dealloc %in2 : memref<?xf32>			dealloc %in2 : memref<?xf32>
	return			return
	}			}

mlir/lib/Dialect/Vector/VectorTransforms.cpp

Show First 20 Lines • Show All 2,520 Lines • ▼ Show 20 Lines	LogicalResult matchAndRewrite(vector::TransferReadOp read,
if (!read.getResult().hasOneUse())		if (!read.getResult().hasOneUse())
return failure();		return failure();
auto extract =		auto extract =
dyn_cast<vector::ExtractMapOp>(*read.getResult().getUsers().begin());		dyn_cast<vector::ExtractMapOp>(*read.getResult().getUsers().begin());
if (!extract)		if (!extract)
return failure();		return failure();
edsc::ScopedContext scope(rewriter, read.getLoc());		edsc::ScopedContext scope(rewriter, read.getLoc());
using mlir::edsc::op::operator+;		using mlir::edsc::op::operator+;
		using mlir::edsc::op::operator*;
using namespace mlir::edsc::intrinsics;		using namespace mlir::edsc::intrinsics;
SmallVector<Value, 4> indices(read.indices().begin(), read.indices().end());		SmallVector<Value, 4> indices(read.indices().begin(), read.indices().end());
indices.back() = indices.back() + extract.id();		indices.back() =
		indices.back() +
		(extract.id() *
		std_constant_index(extract.getResultType().getDimSize(0)));
Value newRead = vector_transfer_read(extract.getType(), read.memref(),		Value newRead = vector_transfer_read(extract.getType(), read.memref(),
indices, read.permutation_map(),		indices, read.permutation_map(),
read.padding(), ArrayAttr());		read.padding(), ArrayAttr());
Value dest = rewriter.create<ConstantOp>(		Value dest = rewriter.create<ConstantOp>(
read.getLoc(), read.getType(), rewriter.getZeroAttr(read.getType()));		read.getLoc(), read.getType(), rewriter.getZeroAttr(read.getType()));
newRead = rewriter.create<vector::InsertMapOp>(		newRead = rewriter.create<vector::InsertMapOp>(
read.getLoc(), newRead, dest, extract.id(), extract.multiplicity());		read.getLoc(), newRead, dest, extract.id(), extract.multiplicity());
rewriter.replaceOp(read, newRead);		rewriter.replaceOp(read, newRead);
return success();		return success();
}		}
};		};

struct TransferWriteInsertPattern		struct TransferWriteInsertPattern
: public OpRewritePattern<vector::TransferWriteOp> {		: public OpRewritePattern<vector::TransferWriteOp> {
TransferWriteInsertPattern(MLIRContext *context)		TransferWriteInsertPattern(MLIRContext *context)
: OpRewritePattern<vector::TransferWriteOp>(context) {}		: OpRewritePattern<vector::TransferWriteOp>(context) {}
LogicalResult matchAndRewrite(vector::TransferWriteOp write,		LogicalResult matchAndRewrite(vector::TransferWriteOp write,
PatternRewriter &rewriter) const override {		PatternRewriter &rewriter) const override {
auto insert = write.vector().getDefiningOp<vector::InsertMapOp>();		auto insert = write.vector().getDefiningOp<vector::InsertMapOp>();
if (!insert)		if (!insert)
return failure();		return failure();
edsc::ScopedContext scope(rewriter, write.getLoc());		edsc::ScopedContext scope(rewriter, write.getLoc());
using mlir::edsc::op::operator+;		using mlir::edsc::op::operator+;
		using mlir::edsc::op::operator*;
using namespace mlir::edsc::intrinsics;		using namespace mlir::edsc::intrinsics;
SmallVector<Value, 4> indices(write.indices().begin(),		SmallVector<Value, 4> indices(write.indices().begin(),
write.indices().end());		write.indices().end());
indices.back() = indices.back() + insert.id();		indices.back() =
		indices.back() +
		(insert.id() *
		std_constant_index(insert.getSourceVectorType().getDimSize(0)));
vector_transfer_write(insert.vector(), write.memref(), indices,		vector_transfer_write(insert.vector(), write.memref(), indices,
write.permutation_map(), ArrayAttr());		write.permutation_map(), ArrayAttr());
rewriter.eraseOp(write);		rewriter.eraseOp(write);
return success();		return success();
}		}
};		};

// TODO: Add pattern to rewrite ExtractSlices(ConstantMaskOp).		// TODO: Add pattern to rewrite ExtractSlices(ConstantMaskOp).
Show All 36 Lines

mlir/test/Dialect/Vector/vector-distribution.mlir

	// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32 \| FileCheck %s			// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32 -split-input-file \| FileCheck %s

	// CHECK-LABEL: func @distribute_vector_add			// CHECK-LABEL: func @distribute_vector_add
	mehdi_aminiUnsubmitted Not Done Reply Inline Actions Why removing the "-LABEL" here? mehdi_amini: Why removing the "-LABEL" here?
	ThomasRaouxAuthorUnsubmitted Done Reply Inline Actions That was done by mistake. I do need to change the CHECK-LABEL in the 3rd test otherwise the [[MAP]] variable gets reset after the CHECK-LABEL. ThomasRaoux: That was done by mistake. I do need to change the CHECK-LABEL in the 3rd test otherwise the…
	// CHECK-SAME: (%[[ID:.*]]: index			// CHECK-SAME: (%[[ID:.*]]: index
	// CHECK-NEXT: %[[ADDV:.]] = addf %{{.}}, %{{.*}} : vector<32xf32>			// CHECK-NEXT: %[[ADDV:.]] = addf %{{.}}, %{{.*}} : vector<32xf32>
	// CHECK-NEXT: %[[EXA:.]] = vector.extract_map %{{.}}[%[[ID]]] : vector<32xf32> to vector<1xf32>			// CHECK-NEXT: %[[EXA:.]] = vector.extract_map %{{.}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
	// CHECK-NEXT: %[[EXB:.]] = vector.extract_map %{{.}}[%[[ID]]] : vector<32xf32> to vector<1xf32>			// CHECK-NEXT: %[[EXB:.]] = vector.extract_map %{{.}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
	// CHECK-NEXT: %[[ADD:.*]] = addf %[[EXA]], %[[EXB]] : vector<1xf32>			// CHECK-NEXT: %[[ADD:.*]] = addf %[[EXA]], %[[EXB]] : vector<1xf32>
	// CHECK-NEXT: %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ADDV]][%[[ID]]] : vector<1xf32> into vector<32xf32>			// CHECK-NEXT: %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ADDV]][%[[ID]]] : vector<1xf32> into vector<32xf32>
	// CHECK-NEXT: return %[[INS]] : vector<32xf32>			// CHECK-NEXT: return %[[INS]] : vector<32xf32>
	func @distribute_vector_add(%id : index, %A: vector<32xf32>, %B: vector<32xf32>) -> vector<32xf32> {			func @distribute_vector_add(%id : index, %A: vector<32xf32>, %B: vector<32xf32>) -> vector<32xf32> {
	%0 = addf %A, %B : vector<32xf32>			%0 = addf %A, %B : vector<32xf32>
	return %0: vector<32xf32>			return %0: vector<32xf32>
	}			}

				// -----

	// CHECK-LABEL: func @vector_add_read_write			// CHECK-LABEL: func @vector_add_read_write
	// CHECK-SAME: (%[[ID:.*]]: index			// CHECK-SAME: (%[[ID:.*]]: index
	// CHECK: %[[EXA:.]] = vector.transfer_read %{{.}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>			// CHECK: %[[EXA:.]] = vector.transfer_read %{{.}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
	// CHECK-NEXT: %[[EXB:.]] = vector.transfer_read %{{.}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>			// CHECK-NEXT: %[[EXB:.]] = vector.transfer_read %{{.}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
	// CHECK-NEXT: %[[ADD1:.*]] = addf %[[EXA]], %[[EXB]] : vector<1xf32>			// CHECK-NEXT: %[[ADD1:.*]] = addf %[[EXA]], %[[EXB]] : vector<1xf32>
	// CHECK-NEXT: %[[EXC:.]] = vector.transfer_read %{{.}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>			// CHECK-NEXT: %[[EXC:.]] = vector.transfer_read %{{.}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
	// CHECK-NEXT: %[[ADD2:.*]] = addf %[[ADD1]], %[[EXC]] : vector<1xf32>			// CHECK-NEXT: %[[ADD2:.*]] = addf %[[ADD1]], %[[EXC]] : vector<1xf32>
	// CHECK-NEXT: vector.transfer_write %[[ADD2]], %{{.*}}[%[[ID]]] : vector<1xf32>, memref<32xf32>			// CHECK-NEXT: vector.transfer_write %[[ADD2]], %{{.*}}[%[[ID]]] : vector<1xf32>, memref<32xf32>
	// CHECK-NEXT: return			// CHECK-NEXT: return
	func @vector_add_read_write(%id : index, %A: memref<32xf32>, %B: memref<32xf32>, %C: memref<32xf32>, %D: memref<32xf32>) {			func @vector_add_read_write(%id : index, %A: memref<32xf32>, %B: memref<32xf32>, %C: memref<32xf32>, %D: memref<32xf32>) {
	%c0 = constant 0 : index			%c0 = constant 0 : index
	%cf0 = constant 0.0 : f32			%cf0 = constant 0.0 : f32
	%a = vector.transfer_read %A[%c0], %cf0: memref<32xf32>, vector<32xf32>			%a = vector.transfer_read %A[%c0], %cf0: memref<32xf32>, vector<32xf32>
	%b = vector.transfer_read %B[%c0], %cf0: memref<32xf32>, vector<32xf32>			%b = vector.transfer_read %B[%c0], %cf0: memref<32xf32>, vector<32xf32>
	%acc = addf %a, %b: vector<32xf32>			%acc = addf %a, %b: vector<32xf32>
	%c = vector.transfer_read %C[%c0], %cf0: memref<32xf32>, vector<32xf32>			%c = vector.transfer_read %C[%c0], %cf0: memref<32xf32>, vector<32xf32>
	%d = addf %acc, %c: vector<32xf32>			%d = addf %acc, %c: vector<32xf32>
	vector.transfer_write %d, %D[%c0]: vector<32xf32>, memref<32xf32>			vector.transfer_write %d, %D[%c0]: vector<32xf32>, memref<32xf32>
	return			return
	}			}

	// CHECK-LABEL: func @vector_add_cycle			// -----

				// CHECK-DAG: #[[MAP0:map[0-9]+]] = affine_map<()[s0] -> (s0 * 2)>

				// CHECK: func @vector_add_cycle
	// CHECK-SAME: (%[[ID:.*]]: index			// CHECK-SAME: (%[[ID:.*]]: index
	// CHECK: %[[EXA:.]] = vector.transfer_read %{{.}}[%[[ID]]], %{{.*}} : memref<64xf32>, vector<2xf32>			// CHECK: %[[ID1:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
	// CHECK-NEXT: %[[EXB:.]] = vector.transfer_read %{{.}}[%[[ID]]], %{{.*}} : memref<64xf32>, vector<2xf32>			// CHECK-NEXT: %[[EXA:.]] = vector.transfer_read %{{.}}[%[[ID1]]], %{{.*}} : memref<64xf32>, vector<2xf32>
				// CHECK-NEXT: %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
				// CHECK-NEXT: %[[EXB:.]] = vector.transfer_read %{{.}}[%[[ID2]]], %{{.*}} : memref<64xf32>, vector<2xf32>
	// CHECK-NEXT: %[[ADD:.*]] = addf %[[EXA]], %[[EXB]] : vector<2xf32>			// CHECK-NEXT: %[[ADD:.*]] = addf %[[EXA]], %[[EXB]] : vector<2xf32>
	// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[ID]]] : vector<2xf32>, memref<64xf32>			// CHECK-NEXT: %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
				// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[ID3]]] : vector<2xf32>, memref<64xf32>
	// CHECK-NEXT: return			// CHECK-NEXT: return
	func @vector_add_cycle(%id : index, %A: memref<64xf32>, %B: memref<64xf32>, %C: memref<64xf32>) {			func @vector_add_cycle(%id : index, %A: memref<64xf32>, %B: memref<64xf32>, %C: memref<64xf32>) {
	%c0 = constant 0 : index			%c0 = constant 0 : index
	%cf0 = constant 0.0 : f32			%cf0 = constant 0.0 : f32
	%a = vector.transfer_read %A[%c0], %cf0: memref<64xf32>, vector<64xf32>			%a = vector.transfer_read %A[%c0], %cf0: memref<64xf32>, vector<64xf32>
	%b = vector.transfer_read %B[%c0], %cf0: memref<64xf32>, vector<64xf32>			%b = vector.transfer_read %B[%c0], %cf0: memref<64xf32>, vector<64xf32>
	%acc = addf %a, %b: vector<64xf32>			%acc = addf %a, %b: vector<64xf32>
	vector.transfer_write %acc, %C[%c0]: vector<64xf32>, memref<64xf32>			vector.transfer_write %acc, %C[%c0]: vector<64xf32>, memref<64xf32>
	return			return
	}			}

				// -----

	// Negative test to make sure nothing is done in case the vector size is not a			// Negative test to make sure nothing is done in case the vector size is not a
	// multiple of multiplicity.			// multiple of multiplicity.
	// CHECK-LABEL: func @vector_negative_test			// CHECK-LABEL: func @vector_negative_test
	// CHECK: %[[C0:.*]] = constant 0 : index			// CHECK: %[[C0:.*]] = constant 0 : index
	// CHECK: %[[EXA:.]] = vector.transfer_read %{{.}}[%[[C0]]], %{{.*}} : memref<64xf32>, vector<16xf32>			// CHECK: %[[EXA:.]] = vector.transfer_read %{{.}}[%[[C0]]], %{{.*}} : memref<64xf32>, vector<16xf32>
	// CHECK-NEXT: %[[EXB:.]] = vector.transfer_read %{{.}}[%[[C0]]], %{{.*}} : memref<64xf32>, vector<16xf32>			// CHECK-NEXT: %[[EXB:.]] = vector.transfer_read %{{.}}[%[[C0]]], %{{.*}} : memref<64xf32>, vector<16xf32>
	// CHECK-NEXT: %[[ADD:.*]] = addf %[[EXA]], %[[EXB]] : vector<16xf32>			// CHECK-NEXT: %[[ADD:.*]] = addf %[[EXA]], %[[EXB]] : vector<16xf32>
	// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.}}[%[[C0]]] {{.}} : vector<16xf32>, memref<64xf32>			// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.}}[%[[C0]]] {{.}} : vector<16xf32>, memref<64xf32>
	Show All 12 Lines

mlir/test/lib/Transforms/TestVectorTransforms.cpp

//===- TestVectorToVectorConversion.cpp - Test VectorTransfers lowering ---===//		//===- TestVectorToVectorConversion.cpp - Test VectorTransfers lowering ---===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include <type_traits>		#include <type_traits>

		#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"		#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"		#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/SCF/SCF.h"		#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"		#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/Vector/VectorOps.h"		#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/Dialect/Vector/VectorTransforms.h"		#include "mlir/Dialect/Vector/VectorTransforms.h"
#include "mlir/Pass/Pass.h"		#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"		#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
▲ Show 20 Lines • Show All 161 Lines • ▼ Show 20 Lines	func.walk([&](AddFOp op) {
}		}
});		});
patterns.insert<PointwiseExtractPattern>(ctx);		patterns.insert<PointwiseExtractPattern>(ctx);
populateVectorToVectorTransformationPatterns(patterns, ctx);		populateVectorToVectorTransformationPatterns(patterns, ctx);
applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));		applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
}		}
};		};

		struct TestVectorToLoopPatterns
		: public PassWrapper<TestVectorToLoopPatterns, FunctionPass> {
		TestVectorToLoopPatterns() = default;
		TestVectorToLoopPatterns(const TestVectorToLoopPatterns &pass) {}
		void getDependentDialects(DialectRegistry &registry) const override {
		registry.insert<VectorDialect>();
		registry.insert<AffineDialect>();
		}
		Option<int32_t> multiplicity{
		*this, "distribution-multiplicity",
		llvm::cl::desc("Set the multiplicity used for distributing vector"),
		llvm::cl::init(32)};
		void runOnFunction() override {
		MLIRContext *ctx = &getContext();
		OwningRewritePatternList patterns;
		FuncOp func = getFunction();
		func.walk([&](AddFOp op) {
		// Check that the operation type can be broken down into a loop.
		VectorType type = op.getType().dyn_cast<VectorType>();
		if (!type \|\| type.getRank() != 1 \|\|
		type.getNumElements() % multiplicity != 0)
		return mlir::WalkResult::advance();
		auto filterAlloc = [](Operation *op) {
		if (isa<ConstantOp, AllocOp, CallOp>(op))
		return false;
		return true;
		};
		auto dependentOps = getSlice(op, filterAlloc);
		// Create a loop and move instructions from the Op slice into the loop.
		OpBuilder builder(op);
		auto zero = builder.create<ConstantOp>(
		op.getLoc(), builder.getIndexType(),
		builder.getIntegerAttr(builder.getIndexType(), 0));
		auto one = builder.create<ConstantOp>(
		op.getLoc(), builder.getIndexType(),
		builder.getIntegerAttr(builder.getIndexType(), 1));
		auto numIter = builder.create<ConstantOp>(
		op.getLoc(), builder.getIndexType(),
		builder.getIntegerAttr(builder.getIndexType(), multiplicity));
		auto forOp = builder.create<scf::ForOp>(op.getLoc(), zero, numIter, one);
		for (Operation *it : dependentOps) {
		it->moveBefore(forOp.getBody()->getTerminator());
		}
		// break up the original op and let the patterns propagate.
		Optional<mlir::vector::DistributeOps> ops = distributPointwiseVectorOp(
		builder, op.getOperation(), forOp.getInductionVar(), multiplicity);
		if (ops.hasValue()) {
		SmallPtrSet<Operation *, 1> extractOp({ops->extract, ops->insert});
		op.getResult().replaceAllUsesExcept(ops->insert.getResult(), extractOp);
		}
		return mlir::WalkResult::interrupt();
		});
		patterns.insert<PointwiseExtractPattern>(ctx);
		populateVectorToVectorTransformationPatterns(patterns, ctx);
		applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
		}
		};

struct TestVectorTransferUnrollingPatterns		struct TestVectorTransferUnrollingPatterns
: public PassWrapper<TestVectorTransferUnrollingPatterns, FunctionPass> {		: public PassWrapper<TestVectorTransferUnrollingPatterns, FunctionPass> {
void getDependentDialects(DialectRegistry &registry) const override {		void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<AffineDialect>();		registry.insert<AffineDialect>();
}		}
void runOnFunction() override {		void runOnFunction() override {
MLIRContext *ctx = &getContext();		MLIRContext *ctx = &getContext();
OwningRewritePatternList patterns;		OwningRewritePatternList patterns;
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines	void registerTestVectorConversions() {
PassRegistration<TestVectorTransferFullPartialSplitPatterns>		PassRegistration<TestVectorTransferFullPartialSplitPatterns>
vectorTransformFullPartialPass("test-vector-transfer-full-partial-split",		vectorTransformFullPartialPass("test-vector-transfer-full-partial-split",
"Test conversion patterns to split "		"Test conversion patterns to split "
"transfer ops via scf.if + linalg ops");		"transfer ops via scf.if + linalg ops");
PassRegistration<TestVectorDistributePatterns> distributePass(		PassRegistration<TestVectorDistributePatterns> distributePass(
"test-vector-distribute-patterns",		"test-vector-distribute-patterns",
"Test conversion patterns to distribute vector ops in the vector "		"Test conversion patterns to distribute vector ops in the vector "
"dialect");		"dialect");
		PassRegistration<TestVectorToLoopPatterns> vectorToForLoop(
		"test-vector-to-forloop",
		"Test conversion patterns to break up a vector op into a for loop");
}		}
} // namespace mlir		} // namespace mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][vector] Improve vector distribute integration test and fix block distribution
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 301767

mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir

mlir/lib/Dialect/Vector/VectorTransforms.cpp

mlir/test/Dialect/Vector/vector-distribution.mlir

mlir/test/lib/Transforms/TestVectorTransforms.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][vector] Improve vector distribute integration test and fix block distributionClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 301767

mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir

mlir/lib/Dialect/Vector/VectorTransforms.cpp

mlir/test/Dialect/Vector/vector-distribution.mlir

mlir/test/lib/Transforms/TestVectorTransforms.cpp

[mlir][vector] Improve vector distribute integration test and fix block distribution
ClosedPublic