Diff 296496

mlir/lib/Dialect/Vector/VectorOps.cpp

Show First 20 Lines • Show All 806 Lines • ▼ Show 20 Lines	while (insertOp \|\| transposeOp) {
// `vector.insert` / `vector.transpose`.		// `vector.insert` / `vector.transpose`.
// Compute insert/transpose for the next iteration.		// Compute insert/transpose for the next iteration.
insertOp = insertionDest.getDefiningOp<vector::InsertOp>();		insertOp = insertionDest.getDefiningOp<vector::InsertOp>();
transposeOp = insertionDest.getDefiningOp<vector::TransposeOp>();		transposeOp = insertionDest.getDefiningOp<vector::TransposeOp>();
}		}
return Value();		return Value();
}		}

		/// Fold extractOp with scalar result coming from BroadcastOp.
		static Value foldExtractFromBroadcast(ExtractOp extractOp) {
		aartbikUnsubmitted Not Done Reply Inline Actions This transformation is not correct (as written). For example func @fold_extract_broadcast(%arg0: vector<4xf32>) -> f32 { %0 = vector.broadcast %arg0 : vector<4xf32> to vector<1x2x4xf32> %1 = vector.extract %0[0, 1, 2] : vector<1x2x4xf32> return %1 : f32 } will break. You will need to do a bit more analysis of the types (but in that case, you can probably generalize beyond scalars). aartbik: This transformation is not correct (as written). For example func @fold_extract_broadcast…
		ThomasRaouxAuthorUnsubmitted Done Reply Inline Actions Thanks for catching that. I missed to consider that broadcast source can be a vector. I generalized it to vector as long as the type broadcast source is the same as extract destination the transformation is correct. I added a test for the vector case and a negative test as well. ThomasRaoux: Thanks for catching that. I missed to consider that broadcast source can be a vector. I…
		auto broadcastOp = extractOp.vector().getDefiningOp<vector::BroadcastOp>();
		if (!broadcastOp)
		nicolasvasilacheUnsubmitted Not Done Reply Inline Actions Why worry about the types here? Shouldn't you just drop the `n-k first dimensions` from the extract and turn it into `vector.extract %a[2] : vector<4xf32> to f32`? Depending on the dimension of the extract op compared to `n-k` you have 3 cases. nicolasvasilache: Why worry about the types here? Shouldn't you just drop the `n-k first dimensions` from the…
		ThomasRaouxAuthorUnsubmitted Done Reply Inline Actions I was trying to only handle the case where the extract and broadcast cancel each other. I can handle also the case where the rank of broadcast source is greater than the rank of extract result. I don't think I can handle the case where the rank of vector result is greater than the rank of broadcast source since I would need to create a new broadcast operation, my understanding is that the fold method shouldn't create new operations? What do you think? ThomasRaoux: I was trying to only handle the case where the extract and broadcast cancel each other. I can…
		nicolasvasilacheUnsubmitted Not Done Reply Inline Actions Right, the third case would have to be a canonicalization pattern followed by DCE (if there are no other uses). It seems undesirable to have both a folding and a canonicalization for the overlap of the 3 cases. I'd say let's make the folding support the 2 cases it can with a TODO that if/when we want the third we should move all this to a canonicalization pattern ? nicolasvasilache: Right, the third case would have to be a canonicalization pattern followed by DCE (if there are…
		ThomasRaouxAuthorUnsubmitted Done Reply Inline Actions Sounds good, I added the case 2 where result rank is smaller than broadcast source rank and added a TODO for the case where result rank if bigger than broadcast source rank. ThomasRaoux: Sounds good, I added the case 2 where result rank is smaller than broadcast source rank and…
		return Value();
		if (extractOp.getType() == broadcastOp.getSourceType())
		return broadcastOp.source();
		auto getRank = [](Type type) {
		return type.isa<VectorType>() ? type.cast<VectorType>().getRank() : 0;
		};
		unsigned broadcasrSrcRank = getRank(broadcastOp.getSourceType());
		unsigned extractResultRank = getRank(extractOp.getType());
		if (extractResultRank < broadcasrSrcRank) {
		auto extractPos = extractVector<int64_t>(extractOp.position());
		unsigned rankDiff = broadcasrSrcRank - extractResultRank;
		extractPos.erase(
		extractPos.begin(),
		std::next(extractPos.begin(), extractPos.size() - rankDiff));
		extractOp.setOperand(broadcastOp.source());
		// OpBuilder is only used as a helper to build an I64ArrayAttr.
		OpBuilder b(extractOp.getContext());
		extractOp.setAttr(ExtractOp::getPositionAttrName(),
		b.getI64ArrayAttr(extractPos));
		return extractOp.getResult();
		}
		// TODO: In case the rank of the broadcast source is greater than the rank of
		// the extract result this can be combined into a new broadcast op. This needs
		aartbikUnsubmitted Not Done Reply Inline Actions typo: broadcast aartbik: typo: broadcast
		// to be added a canonicalization pattern if needed.
		return Value();
		}

OpFoldResult ExtractOp::fold(ArrayRef<Attribute>) {		OpFoldResult ExtractOp::fold(ArrayRef<Attribute>) {
if (succeeded(foldExtractOpFromExtractChain(*this)))		if (succeeded(foldExtractOpFromExtractChain(*this)))
return getResult();		return getResult();
if (succeeded(foldExtractOpFromTranspose(*this)))		if (succeeded(foldExtractOpFromTranspose(*this)))
return getResult();		return getResult();
if (auto val = foldExtractOpFromInsertChainAndTranspose(*this))		if (auto val = foldExtractOpFromInsertChainAndTranspose(*this))
return val;		return val;
		if (auto val = foldExtractFromBroadcast(*this))
		return val;
return OpFoldResult();		return OpFoldResult();
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// ExtractSlicesOp		// ExtractSlicesOp
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

void ExtractSlicesOp::build(OpBuilder &builder, OperationState &result,		void ExtractSlicesOp::build(OpBuilder &builder, OperationState &result,
▲ Show 20 Lines • Show All 1,928 Lines • Show Last 20 Lines

mlir/test/Dialect/Vector/canonicalize.mlir

Show First 20 Lines • Show All 342 Lines • ▼ Show 20 Lines	func @fold_extract_transpose(
%4 = vector.transpose %b, [0, 2, 3, 1] : vector<3x6x5x6xf32> to vector<3x5x6x6xf32>		%4 = vector.transpose %b, [0, 2, 3, 1] : vector<3x6x5x6xf32> to vector<3x5x6x6xf32>
%5 = vector.extract %4[0, 1, 2] : vector<3x5x6x6xf32>		%5 = vector.extract %4[0, 1, 2] : vector<3x5x6x6xf32>

return %1, %3, %5 : vector<6xf32>, vector<6xf32>, vector<6xf32>		return %1, %3, %5 : vector<6xf32>, vector<6xf32>, vector<6xf32>
}		}

// -----		// -----

		// CHECK-LABEL: fold_extract_broadcast
		// CHECK-SAME: %[[A:.*]]: f32
		// CHECK: return %[[A]] : f32
		func @fold_extract_broadcast(%a : f32) -> f32 {
		%b = vector.broadcast %a : f32 to vector<1x2x4xf32>
		%r = vector.extract %b[0, 1, 2] : vector<1x2x4xf32>
		return %r : f32
		}

		// -----

		// CHECK-LABEL: fold_extract_broadcast_vector
		// CHECK-SAME: %[[A:.*]]: vector<4xf32>
		// CHECK: return %[[A]] : vector<4xf32>
		func @fold_extract_broadcast_vector(%a : vector<4xf32>) -> vector<4xf32> {
		%b = vector.broadcast %a : vector<4xf32> to vector<1x2x4xf32>
		%r = vector.extract %b[0, 1] : vector<1x2x4xf32>
		return %r : vector<4xf32>
		}

		// -----

		// CHECK-LABEL: fold_extract_broadcast
		// CHECK-SAME: %[[A:.*]]: vector<4xf32>
		// CHECK: %[[R:.*]] = vector.extract %[[A]][2] : vector<4xf32>
		// CHECK: return %[[R]] : f32
		func @fold_extract_broadcast(%a : vector<4xf32>) -> f32 {
		%b = vector.broadcast %a : vector<4xf32> to vector<1x2x4xf32>
		%r = vector.extract %b[0, 1, 2] : vector<1x2x4xf32>
		return %r : f32
		}

		// -----

		// Negative test for extract_op folding when the type of broadcast source
		// doesn't match the type of vector.extract.
		// CHECK-LABEL: fold_extract_broadcast_negative
		// CHECK: %[[B:.]] = vector.broadcast %{{.}} : f32 to vector<1x2x4xf32>
		// CHECK: %[[R:.*]] = vector.extract %[[B]][0, 1] : vector<1x2x4xf32>
		// CHECK: return %[[R]] : vector<4xf32>
		func @fold_extract_broadcast_negative(%a : f32) -> vector<4xf32> {
		%b = vector.broadcast %a : f32 to vector<1x2x4xf32>
		%r = vector.extract %b[0, 1] : vector<1x2x4xf32>
		return %r : vector<4xf32>
		}

		// -----

// CHECK-LABEL: fold_vector_transfers		// CHECK-LABEL: fold_vector_transfers
func @fold_vector_transfers(%A: memref<?x8xf32>) -> (vector<4x8xf32>, vector<4x9xf32>) {		func @fold_vector_transfers(%A: memref<?x8xf32>) -> (vector<4x8xf32>, vector<4x9xf32>) {
%c0 = constant 0 : index		%c0 = constant 0 : index
%f0 = constant 0.0 : f32		%f0 = constant 0.0 : f32

// CHECK: vector.transfer_read %{{.*}} {masked = [true, false]}		// CHECK: vector.transfer_read %{{.*}} {masked = [true, false]}
%1 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x8xf32>, vector<4x8xf32>		%1 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x8xf32>, vector<4x8xf32>

▲ Show 20 Lines • Show All 54 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][vector] Fold extractOp coming from broadcastOp
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 296496

mlir/lib/Dialect/Vector/VectorOps.cpp

mlir/test/Dialect/Vector/canonicalize.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][vector] Fold extractOp coming from broadcastOpClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 296496

mlir/lib/Dialect/Vector/VectorOps.cpp

mlir/test/Dialect/Vector/canonicalize.mlir

[mlir][vector] Fold extractOp coming from broadcastOp
ClosedPublic