This is an archive of the discontinued LLVM Phabricator instance.

[mlir][SCF] Add a ParallelCombiningOpInterface to decouple scf::PerformConcurrently from its contained operations
ClosedPublic

Authored by nicolasvasilache on Jun 29 2022, 2:39 PM.

Download Raw Diff

Details

Reviewers

ThomasRaoux
christopherbate
ftynse

Commits

rGb994d388aeb2: [mlir][SCF] Add a ParallelCombiningOpInterface to decouple scf…

Summary

This allows purging references of scf.ForeachThreadOp and scf.PerformConcurrentlyOp from
ParallelInsertSliceOp.
This will allowmoving the op closer to tensor::InsertSliceOp with which it should share much more
code.

In the future, the decoupling will also allow extending the type of ops that can be used in the
parallel combinator as well as semantics related to multiple concurrent inserts to the same
result.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

nicolasvasilache created this revision.Jun 29 2022, 2:39 PM

Herald added a project: Restricted Project. · View Herald TranscriptJun 29 2022, 2:39 PM

Herald added subscribers: bzcheeseman, sdasgup3, wenzhicui and 21 others. · View Herald Transcript

nicolasvasilache requested review of this revision.Jun 29 2022, 2:39 PM

Herald added a project: Restricted Project. · View Herald TranscriptJun 29 2022, 2:39 PM

Herald added a subscriber: stephenneuendorffer. · View Herald Transcript

Harbormaster completed remote builds in B172874: Diff 441173.Jun 29 2022, 3:51 PM

Add another helper method.

Harbormaster completed remote builds in B172997: Diff 441339.Jun 30 2022, 4:09 AM

Rebase + fixes.

Harbormaster completed remote builds in B173037: Diff 441394.Jun 30 2022, 8:00 AM

nicolasvasilache added a child revision: D128920: [mlir][Tensor] Move ParallelInsertSlice to the tensor dialect.Jun 30 2022, 8:23 AM

LGTM

This revision is now accepted and ready to land.Jun 30 2022, 9:20 AM

Closed by commit rGb994d388aeb2: [mlir][SCF] Add a ParallelCombiningOpInterface to decouple scf… (authored by nicolasvasilache). · Explain WhyJul 1 2022, 12:18 AM

This revision was automatically updated to reflect the committed changes.

nicolasvasilache added a commit: rGb994d388aeb2: [mlir][SCF] Add a ParallelCombiningOpInterface to decouple scf….

Herald added a subscriber: anlunx. · View Herald TranscriptJul 1 2022, 12:18 AM

Revision Contents

Path

Size

mlir/

include/

mlir/

Dialect/

SCF/

IR/

SCF.h

1 line

SCFOps.td

16 lines

Interfaces/

CMakeLists.txt

1 line

ParallelCombiningOpInterface.h

24 lines

ParallelCombiningOpInterface.td

71 lines

lib/

Dialect/

SCF/

IR/

CMakeLists.txt

1 line

SCF.cpp

33 lines

Transforms/

BufferizableOpInterfaceImpl.cpp

38 lines

Interfaces/

CMakeLists.txt

2 lines

ParallelCombiningOpInterface.cpp

18 lines

Diff 441339

mlir/include/mlir/Dialect/SCF/IR/SCF.h

	Show All 12 Lines
	#ifndef MLIR_DIALECT_SCF_SCF_H			#ifndef MLIR_DIALECT_SCF_SCF_H
	#define MLIR_DIALECT_SCF_SCF_H			#define MLIR_DIALECT_SCF_SCF_H

	#include "mlir/IR/Builders.h"			#include "mlir/IR/Builders.h"
	#include "mlir/IR/BuiltinTypes.h"			#include "mlir/IR/BuiltinTypes.h"
	#include "mlir/IR/RegionKindInterface.h"			#include "mlir/IR/RegionKindInterface.h"
	#include "mlir/Interfaces/ControlFlowInterfaces.h"			#include "mlir/Interfaces/ControlFlowInterfaces.h"
	#include "mlir/Interfaces/LoopLikeInterface.h"			#include "mlir/Interfaces/LoopLikeInterface.h"
				#include "mlir/Interfaces/ParallelCombiningOpInterface.h"
	#include "mlir/Interfaces/SideEffectInterfaces.h"			#include "mlir/Interfaces/SideEffectInterfaces.h"
	#include "mlir/Interfaces/ViewLikeInterface.h"			#include "mlir/Interfaces/ViewLikeInterface.h"

	namespace mlir {			namespace mlir {
	namespace scf {			namespace scf {
	void buildTerminatedBody(OpBuilder &builder, Location loc);			void buildTerminatedBody(OpBuilder &builder, Location loc);
	} // namespace scf			} // namespace scf
	} // namespace mlir			} // namespace mlir
	▲ Show 20 Lines • Show All 76 Lines • Show Last 20 Lines

mlir/include/mlir/Dialect/SCF/IR/SCFOps.td

Show All 10 Lines
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#ifndef MLIR_DIALECT_SCF_SCFOPS		#ifndef MLIR_DIALECT_SCF_SCFOPS
#define MLIR_DIALECT_SCF_SCFOPS		#define MLIR_DIALECT_SCF_SCFOPS

include "mlir/Interfaces/ControlFlowInterfaces.td"		include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/LoopLikeInterface.td"		include "mlir/Interfaces/LoopLikeInterface.td"
include "mlir/IR/RegionKindInterface.td"		include "mlir/IR/RegionKindInterface.td"
		include "mlir/Interfaces/ParallelCombiningOpInterface.td"
include "mlir/Interfaces/SideEffectInterfaces.td"		include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/ViewLikeInterface.td"		include "mlir/Interfaces/ViewLikeInterface.td"

def SCF_Dialect : Dialect {		def SCF_Dialect : Dialect {
let name = "scf";		let name = "scf";
let cppNamespace = "::mlir::scf";		let cppNamespace = "::mlir::scf";
let dependentDialects = ["arith::ArithmeticDialect"];		let dependentDialects = ["arith::ArithmeticDialect"];
let emitAccessorPrefix = kEmitAccessorPrefix_Prefixed;		let emitAccessorPrefix = kEmitAccessorPrefix_Prefixed;
▲ Show 20 Lines • Show All 436 Lines • ▼ Show 20 Lines

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// PerformConcurrentlyOp		// PerformConcurrentlyOp
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

def PerformConcurrentlyOp : SCF_Op<"foreach_thread.perform_concurrently", [		def PerformConcurrentlyOp : SCF_Op<"foreach_thread.perform_concurrently", [
NoSideEffect,		NoSideEffect,
Terminator,		Terminator,
		DeclareOpInterfaceMethods<ParallelCombiningOpInterface>,
HasParent<"ForeachThreadOp">,		HasParent<"ForeachThreadOp">,
] # GraphRegionNoTerminator.traits> {		] # GraphRegionNoTerminator.traits> {
let summary = "terminates a `foreach_thread` block";		let summary = "terminates a `foreach_thread` block";
let description = [{		let description = [{
`scf.foreach_thread.perform_concurrently` is a designated terminator for		`scf.foreach_thread.perform_concurrently` is a designated terminator for
the `scf.foreach_thread` operation.		the `scf.foreach_thread` operation.

It has a single region with a single block that contains a flat list of ops.		It has a single region with a single block that contains a flat list of ops.
Show All 11 Lines	def PerformConcurrentlyOp : SCF_Op<"foreach_thread.perform_concurrently", [
let skipDefaultBuilders = 1;		let skipDefaultBuilders = 1;
let builders = [		let builders = [
OpBuilder<(ins)>,		OpBuilder<(ins)>,
];		];

// TODO: Add a `PerformConcurrentlyOpInterface` interface for ops that can		// TODO: Add a `PerformConcurrentlyOpInterface` interface for ops that can
// appear inside perform_concurrently.		// appear inside perform_concurrently.
let extraClassDeclaration = [{		let extraClassDeclaration = [{
SmallVector<Type> yieldedTypes();		::llvm::SmallVector<::mlir::Type> getYieldedTypes();
::llvm::iterator_range<Block::iterator> yieldingOps();		::llvm::iterator_range<::mlir::Block::iterator> getYieldingOps();
		::mlir::OpResult getParentResult(int64_t idx);
}];		}];
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// ParallelInsertSliceOp		// ParallelInsertSliceOp
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

// TODO: Implement PerformConcurrentlyOpInterface.		// TODO: Implement PerformConcurrentlyOpInterface.
def ParallelInsertSliceOp : SCF_Op<"foreach_thread.parallel_insert_slice", [		def ParallelInsertSliceOp : SCF_Op<"foreach_thread.parallel_insert_slice", [
AttrSizedOperandSegments,		AttrSizedOperandSegments,
OffsetSizeAndStrideOpInterface,		OffsetSizeAndStrideOpInterface,
HasParent<"PerformConcurrentlyOp">]> {		// TODO: Cannot use an interface here atm, verify this manually for now.
		// HasParent<"ParallelCombiningOpInterface">
		]> {
let summary = [{		let summary = [{
Specify the tensor slice update of a single thread within the terminator of		Specify the tensor slice update of a single thread within the terminator of
an `scf.foreach_thread`.		an `scf.foreach_thread`.
}];		}];
let description = [{		let description = [{
The parent `scf.foreach_thread` returns values that are formed by aggregating		The parent `scf.foreach_thread` returns values that are formed by aggregating
the actions of all the ops contained within the `perform_concurrently`		the actions of all the ops contained within the `perform_concurrently`
terminator of all the threads, in some unspecified order.		terminator of all the threads, in some unspecified order.
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	let extraClassDeclaration = [{
::mlir::ArrayAttr static_strides() { return getStaticStrides(); }		::mlir::ArrayAttr static_strides() { return getStaticStrides(); }

Type yieldedType() { return getDest().getType(); }		Type yieldedType() { return getDest().getType(); }

RankedTensorType getSourceType() {		RankedTensorType getSourceType() {
return getSource().getType().cast<RankedTensorType>();		return getSource().getType().cast<RankedTensorType>();
}		}

		ParallelCombiningOpInterface getParallelCombiningParent() {
		return dyn_cast<ParallelCombiningOpInterface>(
		getOperation()->getParentOp());
		}

/// Return the expected rank of each of the `static_offsets`, `static_sizes`		/// Return the expected rank of each of the `static_offsets`, `static_sizes`
/// and `static_strides` attributes.		/// and `static_strides` attributes.
std::array<unsigned, 3> getArrayAttrMaxRanks() {		std::array<unsigned, 3> getArrayAttrMaxRanks() {
unsigned rank = getSourceType().getRank();		unsigned rank = getSourceType().getRank();
return {rank, rank, rank};		return {rank, rank, rank};
}		}

/// Return the number of leading operands before `offsets`, `sizes` and		/// Return the number of leading operands before `offsets`, `sizes` and
▲ Show 20 Lines • Show All 454 Lines • Show Last 20 Lines

mlir/include/mlir/Interfaces/CMakeLists.txt

	add_mlir_interface(CallInterfaces)			add_mlir_interface(CallInterfaces)
	add_mlir_interface(CastInterfaces)			add_mlir_interface(CastInterfaces)
	add_mlir_interface(ControlFlowInterfaces)			add_mlir_interface(ControlFlowInterfaces)
	add_mlir_interface(CopyOpInterface)			add_mlir_interface(CopyOpInterface)
	add_mlir_interface(DerivedAttributeOpInterface)			add_mlir_interface(DerivedAttributeOpInterface)
	add_mlir_interface(InferIntRangeInterface)			add_mlir_interface(InferIntRangeInterface)
	add_mlir_interface(InferTypeOpInterface)			add_mlir_interface(InferTypeOpInterface)
	add_mlir_interface(LoopLikeInterface)			add_mlir_interface(LoopLikeInterface)
				add_mlir_interface(ParallelCombiningOpInterface)
	add_mlir_interface(SideEffectInterfaces)			add_mlir_interface(SideEffectInterfaces)
	add_mlir_interface(TilingInterface)			add_mlir_interface(TilingInterface)
	add_mlir_interface(VectorInterfaces)			add_mlir_interface(VectorInterfaces)
	add_mlir_interface(ViewLikeInterface)			add_mlir_interface(ViewLikeInterface)

	set(LLVM_TARGET_DEFINITIONS DataLayoutInterfaces.td)			set(LLVM_TARGET_DEFINITIONS DataLayoutInterfaces.td)
	mlir_tablegen(DataLayoutAttrInterface.h.inc -gen-attr-interface-decls)			mlir_tablegen(DataLayoutAttrInterface.h.inc -gen-attr-interface-decls)
	mlir_tablegen(DataLayoutAttrInterface.cpp.inc -gen-attr-interface-defs)			mlir_tablegen(DataLayoutAttrInterface.cpp.inc -gen-attr-interface-defs)
	Show All 21 Lines

mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.h

This file was added.

				//===- ParallelCombiningOpInterface.h - Parallel combining op interface ---===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				//
				// This file implements the operation interface for ops that parallel combining
				// operations.
				//
				//===----------------------------------------------------------------------===//

				#ifndef MLIR_INTERFACES_PARALLELCOMBININGOPINTERFACE_H_
				#define MLIR_INTERFACES_PARALLELCOMBININGOPINTERFACE_H_

				#include "mlir/IR/OpDefinition.h"

				namespace mlir {} // namespace mlir

				/// Include the generated interface declarations.
				#include "mlir/Interfaces/ParallelCombiningOpInterface.h.inc"

				#endif // MLIR_INTERFACES_PARALLELCOMBININGOPINTERFACE_H_

mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td

This file was added.

				//===- ParallelCombiningOpInterface.td - Parallel iface ----- tablegen --===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				//
				// Defines the interface for ops that perform parallel combining operations.
				//
				//===----------------------------------------------------------------------===//

				#ifndef MLIR_INTERFACES_PARALLELCOMBININGOPINTERFACE
				#define MLIR_INTERFACES_PARALLELCOMBININGOPINTERFACE

				include "mlir/IR/OpBase.td"

				def ParallelCombiningOpInterface : OpInterface<"ParallelCombiningOpInterface"> {
				let description = [{
				A parallel combining op is an op with a region, that is not isolated from
				above and yields values to its parent op without itself returning an SSA
				value. The yielded values are determined by subvalues produced by the ops
				contained in the region (the `yieldingOps`) and combined in any unspecified
				order to produce the values yielded to the parent op.

				This is useful as a terminator to parallel operations that iterate over
				some set and return tensors while avoiding tight coupling between the
				iterating op, the combining op and the individual subtensor producing ops.
				}];
				let cppNamespace = "::mlir";

				let methods = [
				InterfaceMethod<
				/desc=/[{
				Return `idx`^th result of the parent operation.
				}],
				/retTy=/"::mlir::OpResult",
				/methodName=/"getParentResult",
				/args=/(ins "int64_t":$idx),
				/methodBody=/[{
				return $_op.getParentResult(idx);
				}]
				>,
				InterfaceMethod<
				/desc=/[{
				Return the contained ops that yield subvalues that this op combines to
				yield to its parent.
				}],
				/retTy=/"::llvm::iterator_range<Block::iterator>",
				/methodName=/"getYieldingOps",
				/args=/(ins),
				/methodBody=/[{
				return $_op.getYieldingOps();
				}]
				>,
				InterfaceMethod<
				/desc=/[{
				Return the contained ops that yield subvalues that this op combines to
				yield to its parent.
				}],
				/retTy=/"::llvm::SmallVector<::mlir::Type>",
				/methodName=/"getYieldedTypes",
				/args=/(ins),
				/methodBody=/[{
				return $_op.getYieldedTypes();
				}]
				>,
				];
				}

				#endif // MLIR_INTERFACES_PARALLELCOMBININGOPINTERFACE

mlir/lib/Dialect/SCF/IR/CMakeLists.txt

	add_mlir_dialect_library(MLIRSCFDialect			add_mlir_dialect_library(MLIRSCFDialect
	SCF.cpp			SCF.cpp

	ADDITIONAL_HEADER_DIRS			ADDITIONAL_HEADER_DIRS
	${MLIR_MAIN_INCLUDE_DIR}/mlir/SCF			${MLIR_MAIN_INCLUDE_DIR}/mlir/SCF

	DEPENDS			DEPENDS
	MLIRSCFOpsIncGen			MLIRSCFOpsIncGen

	LINK_LIBS PUBLIC			LINK_LIBS PUBLIC
	MLIRArithmeticDialect			MLIRArithmeticDialect
	MLIRBufferizationDialect			MLIRBufferizationDialect
	MLIRControlFlowDialect			MLIRControlFlowDialect
	MLIRIR			MLIRIR
	MLIRLoopLikeInterface			MLIRLoopLikeInterface
				MLIRParallelCombiningOpInterface
	MLIRSideEffectInterfaces			MLIRSideEffectInterfaces
	)			)

mlir/lib/Dialect/SCF/IR/SCF.cpp

Show First 20 Lines • Show All 1,055 Lines • ▼ Show 20 Lines	if (failed(getTerminator().verify()))
return failure();		return failure();

// Check that the body defines as single block argument for the thread index.		// Check that the body defines as single block argument for the thread index.
auto *body = getBody();		auto *body = getBody();
if (body->getNumArguments() != getRank())		if (body->getNumArguments() != getRank())
return emitOpError("region expects ") << getRank() << " arguments";		return emitOpError("region expects ") << getRank() << " arguments";

// Verify consistency between the result types and the terminator.		// Verify consistency between the result types and the terminator.
auto terminatorTypes = getTerminator().yieldedTypes();		auto terminatorTypes = getTerminator().getYieldedTypes();
auto opResults = getResults();		auto opResults = getResults();
if (opResults.size() != terminatorTypes.size())		if (opResults.size() != terminatorTypes.size())
return emitOpError("produces ")		return emitOpError("produces ")
<< opResults.size() << " results, but its terminator yields "		<< opResults.size() << " results, but its terminator yields "
<< terminatorTypes.size() << " value(s)";		<< terminatorTypes.size() << " value(s)";
unsigned i = 0;		unsigned i = 0;
for (auto e : llvm::zip(terminatorTypes, opResults)) {		for (auto e : llvm::zip(terminatorTypes, opResults)) {
if (std::get<0>(e) != std::get<1>(e).getType())		if (std::get<0>(e) != std::get<1>(e).getType())
▲ Show 20 Lines • Show All 104 Lines • ▼ Show 20 Lines	void ForeachThreadOp::build(

OpBuilder::InsertionGuard guard(builder);		OpBuilder::InsertionGuard guard(builder);
builder.setInsertionPointToStart(&bodyBlock);		builder.setInsertionPointToStart(&bodyBlock);
bodyBuilder(builder, result.location, bodyBlock.getArguments());		bodyBuilder(builder, result.location, bodyBlock.getArguments());
auto terminator =		auto terminator =
llvm::dyn_cast<PerformConcurrentlyOp>(bodyBlock.getTerminator());		llvm::dyn_cast<PerformConcurrentlyOp>(bodyBlock.getTerminator());
assert(terminator &&		assert(terminator &&
"expected bodyBuilder to create PerformConcurrentlyOp terminator");		"expected bodyBuilder to create PerformConcurrentlyOp terminator");
result.addTypes(terminator.yieldedTypes());		result.addTypes(terminator.getYieldedTypes());
}		}

// The ensureTerminator method generated by SingleBlockImplicitTerminator is		// The ensureTerminator method generated by SingleBlockImplicitTerminator is
// unaware of the fact that our terminator also needs a region to be		// unaware of the fact that our terminator also needs a region to be
// well-formed. We override it here to ensure that we do the right thing.		// well-formed. We override it here to ensure that we do the right thing.
void ForeachThreadOp::ensureTerminator(Region &region, OpBuilder &builder,		void ForeachThreadOp::ensureTerminator(Region &region, OpBuilder &builder,
Location loc) {		Location loc) {
OpTrait::SingleBlockImplicitTerminator<PerformConcurrentlyOp>::Impl<		OpTrait::SingleBlockImplicitTerminator<PerformConcurrentlyOp>::Impl<
Show All 17 Lines	ForeachThreadOp mlir::scf::getForeachThreadOpThreadIndexOwner(Value val) {
return dyn_cast<ForeachThreadOp>(containingOp);		return dyn_cast<ForeachThreadOp>(containingOp);
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// ParallelInsertSliceOp		// ParallelInsertSliceOp
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

OpResult ParallelInsertSliceOp::getTiedOpResult() {		OpResult ParallelInsertSliceOp::getTiedOpResult() {
auto foreachThreadOp = getOperation()->getParentOfType<ForeachThreadOp>();		ParallelCombiningOpInterface parallelCombiningParent =
assert(foreachThreadOp && "unlinked ParallelInsertSliceOp");		getParallelCombiningParent();
PerformConcurrentlyOp performConcurrentlyOp = foreachThreadOp.getTerminator();		for (const auto &it :
for (const auto &it : llvm::enumerate(performConcurrentlyOp.yieldingOps())) {		llvm::enumerate(parallelCombiningParent.getYieldingOps())) {
Operation &nextOp = it.value();		Operation &nextOp = it.value();
if (&nextOp == getOperation())		if (&nextOp == getOperation())
return foreachThreadOp->getResult(it.index());		return parallelCombiningParent.getParentResult(it.index());
}		}
llvm_unreachable("ParallelInsertSliceOp not found");		llvm_unreachable("ParallelInsertSliceOp no tied OpResult found");
}		}

// Build a ParallelInsertSliceOp with mixed static and dynamic entries.		// Build a ParallelInsertSliceOp with mixed static and dynamic entries.
void ParallelInsertSliceOp::build(OpBuilder &b, OperationState &result,		void ParallelInsertSliceOp::build(OpBuilder &b, OperationState &result,
Value source, Value dest,		Value source, Value dest,
ArrayRef<OpFoldResult> offsets,		ArrayRef<OpFoldResult> offsets,
ArrayRef<OpFoldResult> sizes,		ArrayRef<OpFoldResult> sizes,
ArrayRef<OpFoldResult> strides,		ArrayRef<OpFoldResult> strides,
Show All 21 Lines	SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));		llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(		SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(
llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));		llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(		SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));		llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
build(b, result, source, dest, offsetValues, sizeValues, strideValues);		build(b, result, source, dest, offsetValues, sizeValues, strideValues);
}		}

		LogicalResult ParallelInsertSliceOp::verify() {
		if (!isa<ParallelCombiningOpInterface>(getOperation()->getParentOp()))
		return this->emitError("expected ParallelCombiningOpInterface parent, got:")
		<< *(getOperation()->getParentOp());
		return success();
		}

namespace {		namespace {
/// Pattern to rewrite a parallel_insert_slice op with constant arguments.		/// Pattern to rewrite a parallel_insert_slice op with constant arguments.
class ParallelInsertSliceOpConstantArgumentFolder final		class ParallelInsertSliceOpConstantArgumentFolder final
: public OpRewritePattern<ParallelInsertSliceOp> {		: public OpRewritePattern<ParallelInsertSliceOp> {
public:		public:
using OpRewritePattern<ParallelInsertSliceOp>::OpRewritePattern;		using OpRewritePattern<ParallelInsertSliceOp>::OpRewritePattern;

LogicalResult matchAndRewrite(ParallelInsertSliceOp insertSliceOp,		LogicalResult matchAndRewrite(ParallelInsertSliceOp insertSliceOp,
▲ Show 20 Lines • Show All 104 Lines • ▼ Show 20 Lines	ParseResult PerformConcurrentlyOp::parse(OpAsmParser &parser,
result.addRegion(std::move(region));		result.addRegion(std::move(region));

// Parse the optional attribute list.		// Parse the optional attribute list.
if (parser.parseOptionalAttrDict(result.attributes))		if (parser.parseOptionalAttrDict(result.attributes))
return failure();		return failure();
return success();		return success();
}		}

SmallVector<Type> PerformConcurrentlyOp::yieldedTypes() {		OpResult PerformConcurrentlyOp::getParentResult(int64_t idx) {
		return getOperation()->getParentOp()->getResult(idx);
		}

		SmallVector<Type> PerformConcurrentlyOp::getYieldedTypes() {
return llvm::to_vector<4>(		return llvm::to_vector<4>(
llvm::map_range(this->yieldingOps(), [](Operation &op) {		llvm::map_range(getYieldingOps(), [](Operation &op) {
auto insertSliceOp = dyn_cast<ParallelInsertSliceOp>(&op);		auto insertSliceOp = dyn_cast<ParallelInsertSliceOp>(&op);
return insertSliceOp ? insertSliceOp.yieldedType() : Type();		return insertSliceOp ? insertSliceOp.yieldedType() : Type();
}));		}));
}		}

llvm::iterator_range<Block::iterator> PerformConcurrentlyOp::yieldingOps() {		llvm::iterator_range<Block::iterator> PerformConcurrentlyOp::getYieldingOps() {
return getRegion().front().getOperations();		return getRegion().front().getOperations();
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// IfOp		// IfOp
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

bool mlir::scf::insideMutuallyExclusiveBranches(Operation a, Operation b) {		bool mlir::scf::insideMutuallyExclusiveBranches(Operation a, Operation b) {
▲ Show 20 Lines • Show All 1,990 Lines • Show Last 20 Lines

mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp

Show First 20 Lines • Show All 1,037 Lines • ▼ Show 20 Lines
struct ParallelInsertSliceOpInterface		struct ParallelInsertSliceOpInterface
: public BufferizableOpInterface::ExternalModel<		: public BufferizableOpInterface::ExternalModel<
ParallelInsertSliceOpInterface, ParallelInsertSliceOp> {		ParallelInsertSliceOpInterface, ParallelInsertSliceOp> {
SmallVector<OpResult> getAliasingOpResult(Operation *op, OpOperand &opOperand,		SmallVector<OpResult> getAliasingOpResult(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {		const AnalysisState &state) const {
if (&opOperand != &op->getOpOperand(1) /dest/)		if (&opOperand != &op->getOpOperand(1) /dest/)
return {};		return {};

// ParallelInsertSliceOp itself has no results. Tensors are returned via		// The i-th ParallelInsertSliceOp result is returned via its enclosing op..
// the parent op.
auto foreachThreadOp = op->getParentOfType<ForeachThreadOp>();
assert(foreachThreadOp &&
"could not find valid owner of parallel_insert_slice");

// The i-th ParallelInsertSliceOp result is returned via the i-th OpResult
// of the parent ForeachThreadOp.
Block *block = op->getBlock();		Block *block = op->getBlock();
unsigned int opIdx = 0;		unsigned int opIdx = 0;
for (ParallelInsertSliceOp insertOp :		for (ParallelInsertSliceOp insertOp :
block->getOps<ParallelInsertSliceOp>()) {		block->getOps<ParallelInsertSliceOp>()) {
if (insertOp.getOperation() == op)		if (insertOp.getOperation() == op)
break;		break;
++opIdx;		++opIdx;
}		}
assert(opIdx < foreachThreadOp->getNumResults() &&		ParallelCombiningOpInterface parallelCombiningParent =
"could not find op inside terminator op");		getParallelCombiningParent();
		return {parallelCombiningParent.getParentResult(opIdx)};
return {foreachThreadOp->getResult(opIdx)};
}		}

bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,		bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {		const AnalysisState &state) const {
return true;		return true;
}		}

bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,		bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
Show All 28 Lines	LogicalResult resolveConflicts(Operation *op, RewriterBase &rewriter,
// perform_concurrently {		// perform_concurrently {
// parallel_insert_slice %a into %b ...		// parallel_insert_slice %a into %b ...
// parallel_insert_slice %c into %copy ...		// parallel_insert_slice %c into %copy ...
// }		// }
// }		// }

OpBuilder::InsertionGuard g(rewriter);		OpBuilder::InsertionGuard g(rewriter);
auto insertOp = cast<ParallelInsertSliceOp>(op);		auto insertOp = cast<ParallelInsertSliceOp>(op);
auto foreachThreadOp = insertOp->getParentOfType<ForeachThreadOp>();		ParallelCombiningOpInterface parallelCombiningParent =
		getParallelCombiningParent();
		Operation *parallelIteratingOp = parallelCombiningParent->getParentOp();

// Nothing to do if the destination tensor is inplace.		// Nothing to do if the destination tensor is inplace.
assert(state.isInPlace(op->getOpOperand(0) /src/) &&		assert(state.isInPlace(op->getOpOperand(0) /src/) &&
"source is always in-place");		"source is always in-place");
if (state.isInPlace(op->getOpOperand(1) /dest/))		if (state.isInPlace(op->getOpOperand(1) /dest/))
return success();		return success();

// Find corresponding OpResult.		// Find corresponding OpResult.
OpResult opResult = insertOp.getTiedOpResult();		OpResult opResult = insertOp.getTiedOpResult();

// Insert tensor allocation right before the ForeachThreadOp.		// Insert tensor allocation right before the ForeachThreadOp.
rewriter.setInsertionPoint(foreachThreadOp);		rewriter.setInsertionPoint(parallelIteratingOp);
bool isYielded = state.isTensorYielded(opResult);		bool isYielded = state.isTensorYielded(opResult);
FailureOr<Value> alloc =		FailureOr<Value> alloc =
allocateTensorForShapedValue(rewriter, op->getLoc(), insertOp.getDest(),		allocateTensorForShapedValue(rewriter, op->getLoc(), insertOp.getDest(),
/escape=/isYielded, state.getOptions());		/escape=/isYielded, state.getOptions());
if (failed(alloc))		if (failed(alloc))
return failure();		return failure();

// Update destination operand.		// Update destination operand.
rewriter.updateRootInPlace(		rewriter.updateRootInPlace(
insertOp, [&]() { insertOp.getDestMutable().assign(*alloc); });		insertOp, [&]() { insertOp.getDestMutable().assign(*alloc); });

return success();		return success();
}		}

LogicalResult bufferize(Operation *op, RewriterBase &rewriter,		LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {		const BufferizationOptions &options) const {
OpBuilder::InsertionGuard g(rewriter);		OpBuilder::InsertionGuard g(rewriter);
auto insertOp = cast<ParallelInsertSliceOp>(op);		auto insertOp = cast<ParallelInsertSliceOp>(op);
auto performConcurrentlyOp = cast<PerformConcurrentlyOp>(op->getParentOp());		ParallelCombiningOpInterface parallelCombiningParent =
auto foreachThreadOp =		getParallelCombiningParent();
cast<ForeachThreadOp>(performConcurrentlyOp->getParentOp());		Operation *parallelIteratingOp = parallelCombiningParent->getParentOp();

// Get destination buffer.		// Get destination buffer.
FailureOr<Value> destBuffer =		FailureOr<Value> destBuffer =
getBuffer(rewriter, insertOp.getDest(), options);		getBuffer(rewriter, insertOp.getDest(), options);
if (failed(destBuffer))		if (failed(destBuffer))
return failure();		return failure();

// Bufferize the ParallelInsertSliceOp outside of the PerformConcurrentlyOp.		// Bufferize the ParallelInsertSliceOp outside of `parallelCombiningParent`.
rewriter.setInsertionPoint(performConcurrentlyOp);		rewriter.setInsertionPoint(parallelCombiningParent);
FailureOr<Value> srcBuffer =		FailureOr<Value> srcBuffer =
getBuffer(rewriter, insertOp.getSource(), options);		getBuffer(rewriter, insertOp.getSource(), options);
if (failed(srcBuffer))		if (failed(srcBuffer))
return failure();		return failure();
Value subview = rewriter.create<memref::SubViewOp>(		Value subview = rewriter.create<memref::SubViewOp>(
insertOp.getLoc(), *destBuffer, insertOp.getMixedOffsets(),		insertOp.getLoc(), *destBuffer, insertOp.getMixedOffsets(),
insertOp.getMixedSizes(), insertOp.getMixedStrides());		insertOp.getMixedSizes(), insertOp.getMixedStrides());
// This memcpy will fold away if everything bufferizes in-place.		// This memcpy will fold away if everything bufferizes in-place.
if (failed(options.createMemCpy(rewriter, insertOp.getLoc(), *srcBuffer,		if (failed(options.createMemCpy(rewriter, insertOp.getLoc(), *srcBuffer,
subview)))		subview)))
return failure();		return failure();

// Replace all uses of ForeachThreadOp (just the corresponding result).		// Replace all uses of parallelIteratingOp (just the corresponding result).
rewriter.setInsertionPointAfter(foreachThreadOp);		rewriter.setInsertionPointAfter(parallelIteratingOp);
Value toTensorOp =		Value toTensorOp =
rewriter.create<ToTensorOp>(foreachThreadOp.getLoc(), *destBuffer);		rewriter.create<ToTensorOp>(parallelIteratingOp->getLoc(), *destBuffer);
// PerformConcurrentlyOp can have multiple ParallelInsertSliceOps.		// PerformConcurrentlyOp can have multiple ParallelInsertSliceOps.
SmallVector<OpOperand *> resultUses =		SmallVector<OpOperand *> resultUses =
llvm::to_vector(llvm::map_range(insertOp.getTiedOpResult().getUses(),		llvm::to_vector(llvm::map_range(insertOp.getTiedOpResult().getUses(),
[](OpOperand &use) { return &use; }));		[](OpOperand &use) { return &use; }));
for (OpOperand *use : resultUses) {		for (OpOperand *use : resultUses) {
rewriter.updateRootInPlace(use->getOwner(),		rewriter.updateRootInPlace(use->getOwner(),
[&]() { use->set(toTensorOp); });		[&]() { use->set(toTensorOp); });
}		}
▲ Show 20 Lines • Show All 103 Lines • Show Last 20 Lines

mlir/lib/Interfaces/CMakeLists.txt

	set(LLVM_OPTIONAL_SOURCES			set(LLVM_OPTIONAL_SOURCES
	CallInterfaces.cpp			CallInterfaces.cpp
	CastInterfaces.cpp			CastInterfaces.cpp
	ControlFlowInterfaces.cpp			ControlFlowInterfaces.cpp
	CopyOpInterface.cpp			CopyOpInterface.cpp
	DataLayoutInterfaces.cpp			DataLayoutInterfaces.cpp
	DerivedAttributeOpInterface.cpp			DerivedAttributeOpInterface.cpp
	InferIntRangeInterface.cpp			InferIntRangeInterface.cpp
	InferTypeOpInterface.cpp			InferTypeOpInterface.cpp
	LoopLikeInterface.cpp			LoopLikeInterface.cpp
				ParallelCombiningOpInterface.cpp
	SideEffectInterfaces.cpp			SideEffectInterfaces.cpp
	TilingInterface.cpp			TilingInterface.cpp
	VectorInterfaces.cpp			VectorInterfaces.cpp
	ViewLikeInterface.cpp			ViewLikeInterface.cpp
	)			)

	function(add_mlir_interface_library name)			function(add_mlir_interface_library name)
	add_mlir_library(MLIR${name}			add_mlir_library(MLIR${name}
	Show All 14 Lines
	add_mlir_interface_library(CallInterfaces)			add_mlir_interface_library(CallInterfaces)
	add_mlir_interface_library(CastInterfaces)			add_mlir_interface_library(CastInterfaces)
	add_mlir_interface_library(ControlFlowInterfaces)			add_mlir_interface_library(ControlFlowInterfaces)
	add_mlir_interface_library(CopyOpInterface)			add_mlir_interface_library(CopyOpInterface)
	add_mlir_interface_library(DataLayoutInterfaces)			add_mlir_interface_library(DataLayoutInterfaces)
	add_mlir_interface_library(DerivedAttributeOpInterface)			add_mlir_interface_library(DerivedAttributeOpInterface)
	add_mlir_interface_library(InferIntRangeInterface)			add_mlir_interface_library(InferIntRangeInterface)
	add_mlir_interface_library(InferTypeOpInterface)			add_mlir_interface_library(InferTypeOpInterface)
				add_mlir_interface_library(ParallelCombiningOpInterface)
	add_mlir_interface_library(SideEffectInterfaces)			add_mlir_interface_library(SideEffectInterfaces)
	add_mlir_interface_library(TilingInterface)			add_mlir_interface_library(TilingInterface)
	add_mlir_interface_library(VectorInterfaces)			add_mlir_interface_library(VectorInterfaces)
	add_mlir_interface_library(ViewLikeInterface)			add_mlir_interface_library(ViewLikeInterface)
	add_mlir_interface_library(LoopLikeInterface)			add_mlir_interface_library(LoopLikeInterface)

mlir/lib/Interfaces/ParallelCombiningOpInterface.cpp

This file was added.

				//===- ParallelCombiningOpInterface.cpp - Parallel combining op interface -===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//

				#include "mlir/Interfaces/ParallelCombiningOpInterface.h"

				using namespace mlir;

				//===----------------------------------------------------------------------===//
				// ParallelCombiningOpInterface
				//===----------------------------------------------------------------------===//

				/// Include the definitions of the interface.
				#include "mlir/Interfaces/ParallelCombiningOpInterface.cpp.inc"

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][SCF] Add a ParallelCombiningOpInterface to decouple scf::PerformConcurrently from its contained operationsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 441339

mlir/include/mlir/Dialect/SCF/IR/SCF.h

mlir/include/mlir/Dialect/SCF/IR/SCFOps.td

mlir/include/mlir/Interfaces/CMakeLists.txt

mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.h

mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td

mlir/lib/Dialect/SCF/IR/CMakeLists.txt

mlir/lib/Dialect/SCF/IR/SCF.cpp

mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp

mlir/lib/Interfaces/CMakeLists.txt

mlir/lib/Interfaces/ParallelCombiningOpInterface.cpp

[mlir][SCF] Add a ParallelCombiningOpInterface to decouple scf::PerformConcurrently from its contained operations
ClosedPublic