Diff 337022

mlir/include/mlir/Dialect/Async/Passes.h

	Show All 16 Lines

	namespace mlir {			namespace mlir {

	std::unique_ptr<OperationPass<FuncOp>> createAsyncParallelForPass();			std::unique_ptr<OperationPass<FuncOp>> createAsyncParallelForPass();

	std::unique_ptr<OperationPass<FuncOp>>			std::unique_ptr<OperationPass<FuncOp>>
	createAsyncParallelForPass(int numWorkerThreads);			createAsyncParallelForPass(int numWorkerThreads);

	std::unique_ptr<OperationPass<FuncOp>> createAsyncRefCountingPass();			std::unique_ptr<OperationPass<ModuleOp>> createAsyncToAsyncRuntimePass();

	std::unique_ptr<OperationPass<FuncOp>> createAsyncRefCountingOptimizationPass();			std::unique_ptr<OperationPass<FuncOp>> createAsyncRuntimeRefCountingPass();

	std::unique_ptr<OperationPass<ModuleOp>> createAsyncToAsyncRuntimePass();			std::unique_ptr<OperationPass<FuncOp>> createAsyncRuntimeRefCountingOptPass();

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// Registration			// Registration
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	/// Generate the code for registering passes.			/// Generate the code for registering passes.
	#define GEN_PASS_REGISTRATION			#define GEN_PASS_REGISTRATION
	#include "mlir/Dialect/Async/Passes.h.inc"			#include "mlir/Dialect/Async/Passes.h.inc"

	} // namespace mlir			} // namespace mlir

	#endif // MLIR_DIALECT_ASYNC_PASSES_H_			#endif // MLIR_DIALECT_ASYNC_PASSES_H_

mlir/include/mlir/Dialect/Async/Passes.td

Show All 18 Lines let options = [

Option<"numConcurrentAsyncExecute", "num-concurrent-async-execute", Option<"numConcurrentAsyncExecute", "num-concurrent-async-execute",

"int32_t", /*default=*/"4", "int32_t", /*default=*/"4",

"The number of async.execute operations that will be used for concurrent " "The number of async.execute operations that will be used for concurrent "

"loop execution."> "loop execution.">

]; ];

let dependentDialects = ["async::AsyncDialect", "scf::SCFDialect"]; let dependentDialects = ["async::AsyncDialect", "scf::SCFDialect"];

} }

def AsyncRefCounting : FunctionPass<"async-ref-counting"> { def AsyncToAsyncRuntime : Pass<"async-to-async-runtime", "ModuleOp"> {

let summary = "Automatic reference counting for Async dialect data types"; let summary = "Lower high level async operations (e.g. async.execute) to the"

let constructor = "mlir::createAsyncRefCountingPass()"; "explicit async.runtime and async.coro operations";

mehdi_aminiUnsubmitted

Done

let summary = "Lower high level async operations (e.g. async.execute) to the"

- "explicit async.rutime and async.coro operations";

+ "explicit async.runtime and async.coro operations";

let constructor = "mlir::createAsyncToAsyncRuntimePass()";

mehdi_amini:

let constructor = "mlir::createAsyncToAsyncRuntimePass()";

let dependentDialects = ["async::AsyncDialect"]; let dependentDialects = ["async::AsyncDialect"];

} }

def AsyncRefCountingOptimization : def AsyncRuntimeRefCounting : FunctionPass<"async-runtime-ref-counting"> {

FunctionPass<"async-ref-counting-optimization"> { let summary = "Automatic reference counting for Async runtime operations";

mehdi_aminiUnsubmitted

Done

I'd add here on which abstraction level it is intended to work (async.execute vs async.runtime/async.coro)

mehdi_amini: I'd add here on which abstraction level it is intended to work (`async.execute` vs `async.

let summary = "Optimize automatic reference counting operations for the" let description = [{

"Async dialect by removing redundant operations"; This pass works at the async runtime abtraction level, after all

let constructor = "mlir::createAsyncRefCountingOptimizationPass()"; `async.execute` and `async.await` operations are lowered to the async

runtime API calls, and async coroutine operations.

It relies on the LLVM coroutines switched-resume lowering semantics for

the correct placing of the reference counting operations.

See: https://llvm.org/docs/Coroutines.html#switched-resume-lowering

}];

let constructor = "mlir::createAsyncRuntimeRefCountingPass()";

let dependentDialects = ["async::AsyncDialect"]; let dependentDialects = ["async::AsyncDialect"];

} }

def AsyncToAsyncRuntime : Pass<"async-to-async-runtime", "ModuleOp"> { def AsyncRuntimeRefCountingOpt :

let summary = "Lower high level async operations (e.g. async.execute) to the" FunctionPass<"async-runtime-ref-counting-opt"> {

"explicit async.rutime and async.coro operations"; let summary = "Optimize automatic reference counting operations for the"

let constructor = "mlir::createAsyncToAsyncRuntimePass()"; "Async runtime by removing redundant operations";

let constructor = "mlir::createAsyncRuntimeRefCountingOptPass()";

let dependentDialects = ["async::AsyncDialect"]; let dependentDialects = ["async::AsyncDialect"];

} }

#endif // MLIR_DIALECT_ASYNC_PASSES #endif // MLIR_DIALECT_ASYNC_PASSES

mlir/lib/Dialect/Async/Transforms/AsyncRefCounting.cpp

This file was deleted.

	//===- AsyncRefCounting.cpp - Implementation of Async Ref Counting --------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements automatic reference counting for Async dialect data
	// types.
	//
	//===----------------------------------------------------------------------===//

	#include "PassDetail.h"
	#include "mlir/Analysis/Liveness.h"
	#include "mlir/Dialect/Async/IR/Async.h"
	#include "mlir/Dialect/Async/Passes.h"
	#include "mlir/Dialect/StandardOps/IR/Ops.h"
	#include "mlir/IR/PatternMatch.h"
	#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
	#include "llvm/ADT/SmallSet.h"

	using namespace mlir;
	using namespace mlir::async;

	#define DEBUG_TYPE "async-ref-counting"

	namespace {

	class AsyncRefCountingPass : public AsyncRefCountingBase<AsyncRefCountingPass> {
	public:
	AsyncRefCountingPass() = default;
	void runOnFunction() override;

	private:
	/// Adds an automatic reference counting to the `value`.
	///
	/// All values are semantically created with a reference count of +1 and it is
	/// the responsibility of the last async value user to drop reference count.
	///
	/// Async values created when:
	/// 1. Operation returns async result (e.g. the result of an
	/// `async.execute`).
	/// 2. Async value passed in as a block argument.
	///
	/// To implement automatic reference counting, we must insert a +1 reference
	/// before each `async.execute` operation using the value, and drop it after
	/// the last use inside the async body region (we currently drop the reference
	/// before the `async.yield` terminator).
	///
	/// Automatic reference counting algorithm outline:
	///
	/// 1. `ReturnLike` operations forward the reference counted values without
	/// modifying the reference count.
	///
	/// 2. Use liveness analysis to find blocks in the CFG where the lifetime of
	/// reference counted values ends, and insert `drop_ref` operations after
	/// the last use of the value.
	///
	/// 3. Insert `add_ref` before the `async.execute` operation capturing the
	/// value, and pairing `drop_ref` before the async body region terminator,
	/// to release the captured reference counted value when execution
	/// completes.
	///
	/// 4. If the reference counted value is passed only to some of the block
	/// successors, insert `drop_ref` operations in the beginning of the blocks
	/// that do not have reference counted value uses.
	///
	///
	/// Example:
	///
	/// %token = ...
	/// async.execute {
	/// async.await %token : !async.token // await #1
	/// async.yield
	/// }
	/// async.await %token : !async.token // await #2
	///
	/// Based on the liveness analysis await #2 is the last use of the %token,
	/// however the execution of the async region can be delayed, and to guarantee
	/// that the %token is still alive when await #1 executes we need to
	/// explicitly extend its lifetime using `add_ref` operation.
	///
	/// After automatic reference counting:
	///
	/// %token = ...
	///
	/// // Make sure that %token is alive inside async.execute.
	/// async.add_ref %token {count = 1 : i32} : !async.token
	///
	/// async.execute {
	/// async.await %token : !async.token // await #1
	///
	/// // Drop the extra reference added to keep %token alive.
	/// async.drop_ref %token {count = 1 : i32} : !async.token
	///
	/// async.yied
	/// }
	/// async.await %token : !async.token // await #2
	///
	/// // Drop the reference after the last use of %token.
	/// async.drop_ref %token {count = 1 : i32} : !async.token
	///
	LogicalResult addAutomaticRefCounting(Value value);
	};

	} // namespace

	LogicalResult AsyncRefCountingPass::addAutomaticRefCounting(Value value) {
	MLIRContext *ctx = value.getContext();
	OpBuilder builder(ctx);

	// Set inserton point after the operation producing a value, or at the
	// beginning of the block if the value defined by the block argument.
	if (Operation *op = value.getDefiningOp())
	builder.setInsertionPointAfter(op);
	else
	builder.setInsertionPointToStart(value.getParentBlock());

	Location loc = value.getLoc();
	auto i32 = IntegerType::get(ctx, 32);

	// Drop the reference count immediately if the value has no uses.
	if (value.getUses().empty()) {
	builder.create<RuntimeDropRefOp>(loc, value, IntegerAttr::get(i32, 1));
	return success();
	}

	// Use liveness analysis to find the placement of `drop_ref`operation.
	auto liveness = getAnalysis<Liveness>();

	// We analyse only the blocks of the region that defines the `value`, and do
	// not check nested blocks attached to operations.
	//
	// By analyzing only the `definingRegion` CFG we potentially loose an
	// opportunity to drop the reference count earlier and can extend the lifetime
	// of reference counted value longer then it is really required.
	//
	// We also assume that all nested regions finish their execution before the
	// completion of the owner operation. The only exception to this rule is
	// `async.execute` operation, which is handled explicitly below.
	Region *definingRegion = value.getParentRegion();

	// ------------------------------------------------------------------------ //
	// Find blocks where the `value` dies: the value is in `liveIn` set and not
	// in the `liveOut` set. We place `drop_ref` immediately after the last use
	// of the `value` in such regions.
	// ------------------------------------------------------------------------ //

	// Last users of the `value` inside all blocks where the value dies.
	llvm::SmallSet<Operation *, 4> lastUsers;

	for (Block &block : definingRegion->getBlocks()) {
	const LivenessBlockInfo *blockLiveness = liveness.getLiveness(&block);

	// Value in live input set or was defined in the block.
	bool liveIn = blockLiveness->isLiveIn(value) \|\|
	blockLiveness->getBlock() == value.getParentBlock();
	if (!liveIn)
	continue;

	// Value is in the live out set.
	bool liveOut = blockLiveness->isLiveOut(value);
	if (liveOut)
	continue;

	// We proved that `value` dies in the `block`. Now find the last use of the
	// `value` inside the `block`.

	// Find any user of the `value` inside the block (including uses in nested
	// regions attached to the operations in the block).
	Operation *userInTheBlock = nullptr;
	for (Operation *user : value.getUsers()) {
	userInTheBlock = block.findAncestorOpInBlock(*user);
	if (userInTheBlock)
	break;
	}

	// Values with zero users handled explicitly in the beginning, if the value
	// is in live out set it must have at least one use in the block.
	assert(userInTheBlock && "value must have a user in the block");

	// Find the last user of the `value` in the block;
	Operation *lastUser = blockLiveness->getEndOperation(value, userInTheBlock);
	assert(lastUsers.count(lastUser) == 0 && "last users must be unique");
	lastUsers.insert(lastUser);
	}

	// Process all the last users of the `value` inside each block where the value
	// dies.
	for (Operation *lastUser : lastUsers) {
	// Return like operations forward reference count.
	if (lastUser->hasTrait<OpTrait::ReturnLike>())
	continue;

	// We can't currently handle other types of terminators.
	if (lastUser->hasTrait<OpTrait::IsTerminator>())
	return lastUser->emitError() << "async reference counting can't handle "
	"terminators that are not ReturnLike";

	// Add a drop_ref immediately after the last user.
	builder.setInsertionPointAfter(lastUser);
	builder.create<RuntimeDropRefOp>(loc, value, IntegerAttr::get(i32, 1));
	}

	// ------------------------------------------------------------------------ //
	// Find blocks where the `value` is in `liveOut` set, however it is not in
	// the `liveIn` set of all successors. If the `value` is not in the successor
	// `liveIn` set, we add a `drop_ref` to the beginning of it.
	// ------------------------------------------------------------------------ //

	// Successors that we'll need a `drop_ref` for the `value`.
	llvm::SmallSet<Block *, 4> dropRefSuccessors;

	for (Block &block : definingRegion->getBlocks()) {
	const LivenessBlockInfo *blockLiveness = liveness.getLiveness(&block);

	// Skip the block if value is not in the `liveOut` set.
	if (!blockLiveness->isLiveOut(value))
	continue;

	// Find successors that do not have `value` in the `liveIn` set.
	for (Block *successor : block.getSuccessors()) {
	const LivenessBlockInfo *succLiveness = liveness.getLiveness(successor);

	if (!succLiveness->isLiveIn(value))
	dropRefSuccessors.insert(successor);
	}
	}

	// Drop reference in all successor blocks that do not have the `value` in
	// their `liveIn` set.
	for (Block *dropRefSuccessor : dropRefSuccessors) {
	builder.setInsertionPointToStart(dropRefSuccessor);
	builder.create<RuntimeDropRefOp>(loc, value, IntegerAttr::get(i32, 1));
	}

	// ------------------------------------------------------------------------ //
	// Find all `async.execute` operation that take `value` as an operand
	// (dependency token or async value), or capture implicitly by the nested
	// region. Each `async.execute` operation will require `add_ref` operation
	// to keep all captured values alive until it will finish its execution.
	// ------------------------------------------------------------------------ //

	llvm::SmallSet<ExecuteOp, 4> executeOperations;

	auto trackAsyncExecute = [&](Operation *op) {
	if (auto execute = dyn_cast<ExecuteOp>(op))
	executeOperations.insert(execute);
	};

	for (Operation *user : value.getUsers()) {
	// Follow parent operations up until the operation in the `definingRegion`.
	while (user->getParentRegion() != definingRegion) {
	trackAsyncExecute(user);
	user = user->getParentOp();
	assert(user != nullptr && "value user lies outside of the value region");
	}

	// Don't forget to process the parent in the `definingRegion` (can be the
	// original user operation itself).
	trackAsyncExecute(user);
	}

	// Process all `async.execute` operations capturing `value`.
	for (ExecuteOp execute : executeOperations) {
	// Add a reference before the execute operation to keep the reference
	// counted alive before the async region completes execution.
	builder.setInsertionPoint(execute.getOperation());
	builder.create<RuntimeAddRefOp>(loc, value, IntegerAttr::get(i32, 1));

	// Drop the reference inside the async region before completion.
	OpBuilder executeBuilder = OpBuilder::atBlockTerminator(execute.getBody());
	executeBuilder.create<RuntimeDropRefOp>(loc, value,
	IntegerAttr::get(i32, 1));
	}

	return success();
	}

	void AsyncRefCountingPass::runOnFunction() {
	FuncOp func = getFunction();

	// Check that we do not have explicit `add_ref` or `drop_ref` in the IR
	// because otherwise automatic reference counting will produce incorrect
	// results.
	WalkResult refCountingWalk = func.walk([&](Operation *op) -> WalkResult {
	if (isa<RuntimeAddRefOp, RuntimeDropRefOp>(op))
	return op->emitError() << "explicit reference counting is not supported";
	return WalkResult::advance();
	});

	if (refCountingWalk.wasInterrupted())
	signalPassFailure();

	// Add reference counting to block arguments.
	WalkResult blockWalk = func.walk([&](Block *block) -> WalkResult {
	for (BlockArgument arg : block->getArguments())
	if (isRefCounted(arg.getType()))
	if (failed(addAutomaticRefCounting(arg)))
	return WalkResult::interrupt();

	return WalkResult::advance();
	});

	if (blockWalk.wasInterrupted())
	signalPassFailure();

	// Add reference counting to operation results.
	WalkResult opWalk = func.walk([&](Operation *op) -> WalkResult {
	for (unsigned i = 0; i < op->getNumResults(); ++i)
	if (isRefCounted(op->getResultTypes()[i]))
	if (failed(addAutomaticRefCounting(op->getResult(i))))
	return WalkResult::interrupt();

	return WalkResult::advance();
	});

	if (opWalk.wasInterrupted())
	signalPassFailure();
	}

	std::unique_ptr<OperationPass<FuncOp>> mlir::createAsyncRefCountingPass() {
	return std::make_unique<AsyncRefCountingPass>();
	}

mlir/lib/Dialect/Async/Transforms/AsyncRefCountingOptimization.cpp

This file was deleted.

	//===- AsyncRefCountingOptimization.cpp - Async Ref Counting --------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// Optimize Async dialect reference counting operations.
	//
	//===----------------------------------------------------------------------===//

	#include "PassDetail.h"
	#include "mlir/Dialect/Async/IR/Async.h"
	#include "mlir/Dialect/Async/Passes.h"
	#include "llvm/ADT/SmallSet.h"

	using namespace mlir;
	using namespace mlir::async;

	#define DEBUG_TYPE "async-ref-counting"

	namespace {

	class AsyncRefCountingOptimizationPass
	: public AsyncRefCountingOptimizationBase<
	AsyncRefCountingOptimizationPass> {
	public:
	AsyncRefCountingOptimizationPass() = default;
	void runOnFunction() override;

	private:
	LogicalResult optimizeReferenceCounting(Value value);
	};

	} // namespace

	LogicalResult
	AsyncRefCountingOptimizationPass::optimizeReferenceCounting(Value value) {
	Region *definingRegion = value.getParentRegion();

	// Find all users of the `value` inside each block, including operations that
	// do not use `value` directly, but have a direct use inside nested region(s).
	//
	// Example:
	//
	// ^bb1:
	// %token = ...
	// scf.if %cond {
	// ^bb2:
	// async.await %token : !async.token
	// }
	//
	// %token has a use inside ^bb2 (`async.await`) and inside ^bb1 (`scf.if`).
	//
	// In addition to the operation that uses the `value` we also keep track if
	// this user is an `async.execute` operation itself, or has `async.execute`
	// operations in the nested regions that do use the `value`.

	struct UserInfo {
	Operation *operation;
	bool hasExecuteUser;
	};

	struct BlockUsersInfo {
	llvm::SmallVector<RuntimeAddRefOp, 4> addRefs;
	llvm::SmallVector<RuntimeDropRefOp, 4> dropRefs;
	llvm::SmallVector<UserInfo, 4> users;
	};

	llvm::DenseMap<Block *, BlockUsersInfo> blockUsers;

	auto updateBlockUsersInfo = [&](UserInfo user) {
	BlockUsersInfo &info = blockUsers[user.operation->getBlock()];
	info.users.push_back(user);

	if (auto addRef = dyn_cast<RuntimeAddRefOp>(user.operation))
	info.addRefs.push_back(addRef);
	if (auto dropRef = dyn_cast<RuntimeDropRefOp>(user.operation))
	info.dropRefs.push_back(dropRef);
	};

	for (Operation *user : value.getUsers()) {
	bool isAsyncUser = isa<ExecuteOp>(user);

	while (user->getParentRegion() != definingRegion) {
	updateBlockUsersInfo({user, isAsyncUser});
	user = user->getParentOp();
	isAsyncUser \|= isa<ExecuteOp>(user);
	assert(user != nullptr && "value user lies outside of the value region");
	}

	updateBlockUsersInfo({user, isAsyncUser});
	}

	// Sort all operations found in the block.
	auto preprocessBlockUsersInfo = [](BlockUsersInfo &info) -> BlockUsersInfo & {
	auto isBeforeInBlock = [](Operation a, Operation b) -> bool {
	return a->isBeforeInBlock(b);
	};
	llvm::sort(info.addRefs, isBeforeInBlock);
	llvm::sort(info.dropRefs, isBeforeInBlock);
	llvm::sort(info.users, [&](UserInfo a, UserInfo b) -> bool {
	return isBeforeInBlock(a.operation, b.operation);
	});

	return info;
	};

	// Find and erase matching pairs of `add_ref` / `drop_ref` operations in the
	// blocks that modify the reference count of the `value`.
	for (auto &kv : blockUsers) {
	BlockUsersInfo &info = preprocessBlockUsersInfo(kv.second);

	// Find all cancellable pairs first and erase them later to keep all
	// pointers in the `info` valid until the end.
	//
	// Mapping from `dropRef.getOperation()` to `addRef.getOperation()`.
	llvm::SmallDenseMap<Operation , Operation > cancellable;

	for (RuntimeAddRefOp addRef : info.addRefs) {
	for (RuntimeDropRefOp dropRef : info.dropRefs) {
	// `drop_ref` operation after the `add_ref` with matching count.
	if (dropRef.count() != addRef.count() \|\|
	dropRef->isBeforeInBlock(addRef.getOperation()))
	continue;

	// `drop_ref` was already marked for removal.
	if (cancellable.find(dropRef.getOperation()) != cancellable.end())
	continue;

	// Check `value` users between `addRef` and `dropRef` in the `block`.
	Operation *addRefOp = addRef.getOperation();
	Operation *dropRefOp = dropRef.getOperation();

	// If there is a "regular" user after the `async.execute` user it is
	// unsafe to erase cancellable reference counting operations pair,
	// because async region can complete before the "regular" user and
	// destroy the reference counted value.
	bool hasExecuteUser = false;
	bool unsafeToCancel = false;

	for (UserInfo &user : info.users) {
	Operation *op = user.operation;

	// `user` operation lies after `addRef` ...
	if (op == addRefOp \|\| op->isBeforeInBlock(addRefOp))
	continue;
	// ... and before `dropRef`.
	if (op == dropRefOp \|\| dropRefOp->isBeforeInBlock(op))
	break;

	bool isRegularUser = !user.hasExecuteUser;
	bool isExecuteUser = user.hasExecuteUser;

	// It is unsafe to cancel `addRef` / `dropRef` pair.
	if (isRegularUser && hasExecuteUser) {
	unsafeToCancel = true;
	break;
	}

	hasExecuteUser \|= isExecuteUser;
	}

	// Mark the pair of reference counting operations for removal.
	if (!unsafeToCancel)
	cancellable[dropRef.getOperation()] = addRef.getOperation();

	// If it us unsafe to cancel `addRef <-> dropRef` pair at this point,
	// all the following pairs will be also unsafe.
	break;
	}
	}

	// Erase all cancellable `addRef <-> dropRef` operation pairs.
	for (auto &kv : cancellable) {
	kv.first->erase();
	kv.second->erase();
	}
	}

	return success();
	}

	void AsyncRefCountingOptimizationPass::runOnFunction() {
	FuncOp func = getFunction();

	// Optimize reference counting for values defined by block arguments.
	WalkResult blockWalk = func.walk([&](Block *block) -> WalkResult {
	for (BlockArgument arg : block->getArguments())
	if (isRefCounted(arg.getType()))
	if (failed(optimizeReferenceCounting(arg)))
	return WalkResult::interrupt();

	return WalkResult::advance();
	});

	if (blockWalk.wasInterrupted())
	signalPassFailure();

	// Optimize reference counting for values defined by operation results.
	WalkResult opWalk = func.walk([&](Operation *op) -> WalkResult {
	for (unsigned i = 0; i < op->getNumResults(); ++i)
	if (isRefCounted(op->getResultTypes()[i]))
	if (failed(optimizeReferenceCounting(op->getResult(i))))
	return WalkResult::interrupt();

	return WalkResult::advance();
	});

	if (opWalk.wasInterrupted())
	signalPassFailure();
	}

	std::unique_ptr<OperationPass<FuncOp>>
	mlir::createAsyncRefCountingOptimizationPass() {
	return std::make_unique<AsyncRefCountingOptimizationPass>();
	}

mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp

This file was added.

//===- AsyncRuntimeRefCounting.cpp - Async Runtime Ref Counting -----------===//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//===----------------------------------------------------------------------===//

// This file implements automatic reference counting for Async runtime

// operations and types.

//===----------------------------------------------------------------------===//

#include "PassDetail.h"

#include "mlir/Analysis/Liveness.h"

#include "mlir/Dialect/Async/IR/Async.h"

#include "mlir/Dialect/Async/Passes.h"

#include "mlir/Dialect/StandardOps/IR/Ops.h"

#include "mlir/IR/ImplicitLocOpBuilder.h"

#include "mlir/IR/PatternMatch.h"

#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

#include "llvm/ADT/SmallSet.h"

using namespace mlir;

using namespace mlir::async;

#define DEBUG_TYPE "async-runtime-ref-counting"

namespace {

class AsyncRuntimeRefCountingPass

: public AsyncRuntimeRefCountingBase<AsyncRuntimeRefCountingPass> {

public:

AsyncRuntimeRefCountingPass() = default;

void runOnFunction() override;

private:

/// Adds an automatic reference counting to the `value`.

///

/// All values (token, group or value) are semantically created with a

/// reference count of +1 and it is the responsibility of the async value user

/// to place the `add_ref` and `drop_ref` operations to ensure that the value

/// is destroyed after the last use.

///

/// The function returns failure if it can't deduce the locations where

/// to place the reference counting operations.

///

/// Async values "semantically created" when:

/// 1. Operation returns async result (e.g. `async.runtime.create`)

/// 2. Async value passed in as a block argument (or function argument,

/// because function arguments are just entry block arguments)

///

mehdi_aminiUnsubmitted

Done

The parenthesis isn't clear to me: does it imply that a function argument in the function body is considered a new async value and start with a ref count of 1?
Why isn't a function argument just whatever the caller provides?

mehdi_amini: The parenthesis isn't clear to me: does it imply that a function argument in the function body…

ezhulenevAuthorUnsubmitted

Done

I've reworded it a little bit and added documentation for passing args to functions.

ezhulenev: I've reworded it a little bit and added documentation for passing args to functions.

/// Passing async value as a function argument (or block argument) does not

/// really mean that a new async value is created, it only means that the

/// caller of a function transfered ownership of `+1` reference to the callee.

/// It is convenient to think that from the callee perspective async value was

/// "created" with `+1` reference by the block argument.

mehdi_aminiUnsubmitted

Done

/// #1 Insert `drop_ref` operations after last use of the `value`.

- /// #2 Insert `add_ref` operations before functions calls with referene

+ /// #2 Insert `add_ref` operations before functions calls with reference

/// counted `value` operand.

mehdi_amini:

///

/// Automatic reference counting algorithm outline:

///

/// #1 Insert `drop_ref` operations after last use of the `value`.

/// #2 Insert `add_ref` operations before functions calls with reference

/// counted `value` operand (newly created `+1` reference will be

/// transferred to the callee).

/// #3 Verify that divergent control flow does not lead to leaked reference

/// counted objects.

///

/// Async runtime reference counting optimization pass will optimize away

/// some of the redundant `add_ref` and `drop_ref` operations inserted by this

/// strategy (see `async-runtime-ref-counting-opt`).

LogicalResult addAutomaticRefCounting(Value value);

/// (#1) Adds the `drop_ref` operation after the last use of the `value`

/// relying on the liveness analysis.

///

/// If the `value` is in the block `liveIn` set and it is not in the block

/// `liveOut` set, it means that it "dies" in the block. We find the last

/// use of the value in such block and:

mehdi_aminiUnsubmitted

Done

If the function call is also the last use of this value in a block, is this needed?

mehdi_amini: If the function call is also the last use of this value in a block, is this needed?

ezhulenevAuthorUnsubmitted

Done

No, but to keeps things simple it is still added there, and ref-counting-opt will remove it. This is now documented in the beginning.

ezhulenev: No, but to keeps things simple it is still added there, and ref-counting-opt will remove it.

///

/// 1. If the last user is a `ReturnLike` operation we do nothing, because

/// it forwards the ownership to the caller.

/// 2. Otherwise we add a `drop_ref` operation immediately after the last

/// use.

LogicalResult addDropRefAfterLastUse(Value value);

/// (#2) Adds the `add_ref` operation before the function call taking `value`

/// operand to ensure that the value passed to the function entry block

/// has a `+1` reference count.

LogicalResult addAddRefBeforeFunctionCall(Value value);

/// (#3) Verifies that if a block has a value in the `liveOut` set, then the

/// value is in `liveIn` set in all successors.

///

/// Example:

///

/// ^entry:

/// %token = async.runtime.create : !async.token

/// cond_br %cond, ^bb1, ^bb2

/// ^bb1:

/// async.runtime.await %token

/// return

/// ^bb2:

/// return

///

/// This CFG will be rejected because ^bb2 does not have `value` in the

/// `liveIn` set, and it will leak a reference counted object.

///

/// An exception to this rule are blocks with `async.coro.suspend` terminator,

/// because in Async to LLVM lowering it is guaranteed that the control flow

/// will jump into the resume block, and then follow into the cleanup and

/// suspend blocks.

///

/// Example:

///

/// ^entry(%value: !async.value<f32>):

/// async.runtime.await_and_resume %value, %hdl : !async.value<f32>

/// async.coro.suspend %ret, ^suspend, ^resume, ^cleanup

/// ^resume:

/// %0 = async.runtime.load %value

/// br ^cleanup

/// ^cleanup:

/// ...

/// ^suspend:

/// ...

///

/// Although cleanup and suspend blocks do not have the `value` in the

/// `liveIn` set, it is guaranteed that execution will eventually continue in

/// the resume block (we never explicitly destroy coroutines).

LogicalResult verifySuccessors(Value value);

};

} // namespace

LogicalResult AsyncRuntimeRefCountingPass::addDropRefAfterLastUse(Value value) {

OpBuilder builder(value.getContext());

Location loc = value.getLoc();

// Use liveness analysis to find the placement of `drop_ref`operation.

auto &liveness = getAnalysis<Liveness>();

// We analyse only the blocks of the region that defines the `value`, and do

// not check nested blocks attached to operations.

// By analyzing only the `definingRegion` CFG we potentially loose an

// opportunity to drop the reference count earlier and can extend the lifetime

// of reference counted value longer then it is really required.

// We also assume that all nested regions finish their execution before the

// completion of the owner operation. The only exception to this rule is

// `async.execute` operation, and we verify that they are lowered to the

// `async.runtime` operations before adding automatic reference counting.

mehdi_aminiUnsubmitted

Done

One possible "optimization" may be to collect the block to consider from the users of the value first and iterate only these blocks?

I suspect we could also take advantage of this traversal of the uses ahead of time to collect them and avoid re-traversing them in every block where the value "dies" (line 170).

Something like:

DenseMap<Block *, Operation *> last_users;
for (Operation *user : value.getUsers()) {
   Block block = definingRegion->findAncestorOpInRegion(user->getParent());
   auto it = last_users.insert({block, user});
   if (!it.second && user->isBeforeInBlock(it.first.second))
     it.first.second = user;
}

for (const auto &block_and_last_user :  last_users) {
   Block *block = block_and_last_user.first;
   Operation lastUser = block_and_last_user.second;
  
   // check liveness now
  if (....)
   ....
   lastUsers.insert(lastUser);   
}

mehdi_amini: One possible "optimization" may be to collect the block to consider from the users of the value…

ezhulenevAuthorUnsubmitted

Done

Done. Also updated the documentation a bit to explain how/why it works.

ezhulenev: Done. Also updated the documentation a bit to explain how/why it works.

Region *definingRegion = value.getParentRegion();

// Last users of the `value` inside all blocks where the value dies.

llvm::SmallSet<Operation *, 4> lastUsers;

// Find blocks in the `definingRegion` that have users of the `value` (if

// there are multiple users in the block, which one will be selected is

// undefined). User operation might be not the actual user of the value, but

// the operation in the block that has a "real user" in one of the attached

// regions.

llvm::DenseMap<Block *, Operation *> usersInTheBlocks;

for (Operation *user : value.getUsers()) {

Block *userBlock = user->getBlock();

Block *ancestor = definingRegion->findAncestorBlockInRegion(*userBlock);

usersInTheBlocks[ancestor] = ancestor->findAncestorOpInBlock(*user);

assert(ancestor && "ancestor block must be not null");

assert(usersInTheBlocks[ancestor] && "ancestor op must be not null");

}

// Find blocks where the `value` dies: the value is in `liveIn` set and not

// in the `liveOut` set. We place `drop_ref` immediately after the last use

// of the `value` in such regions (after handling few special cases).

// We do not traverse all the blocks in the `definingRegion`, because the

// `value` can be in the live in set only if it has users in the block, or it

// is defined in the block.

mehdi_aminiUnsubmitted

Done

What the guaranteed on this?
Isn't there valid IR that can hit this?

To take the example provided above and slightly tweaking it:

///   ^entry:
///     %token = async.runtime.create : !async.token
///     cond_br %cond, ^bb1, ^bb2
///   ^bb1:
///     async.runtime.await %token
///     return
///   ^bb2:
///     br ^bb1

The token is live in ^bb2 even if not used there.

mehdi_amini: What the guaranteed on this? Isn't there valid IR that can hit this? To take the example…

ezhulenevAuthorUnsubmitted

Done

Slightly updated the documentation, I think that @token_arg_cond_br_await_with_fallthough in the async-runtime-ref-counting.mlir tests exactly that.

ezhulenev: Slightly updated the documentation, I think that `@token_arg_cond_br_await_with_fallthough` in…

// Values with zero users (only definition) handled explicitly above.

for (auto &blockAndUser : usersInTheBlocks) {

Block *block = blockAndUser.getFirst();

Operation *userInTheBlock = blockAndUser.getSecond();

const LivenessBlockInfo *blockLiveness = liveness.getLiveness(block);

// Value must be in the live input set or defined in the block.

assert(blockLiveness->isLiveIn(value) ||

blockLiveness->getBlock() == value.getParentBlock());

// If value is in the live out set, it means it doesn't "die" in the block.

if (blockLiveness->isLiveOut(value))

continue;

// At this point we proved that `value` dies in the `block`. Find the last

// use of the `value` inside the `block`, this is where it "dies".

Operation *lastUser = blockLiveness->getEndOperation(value, userInTheBlock);

assert(lastUsers.count(lastUser) == 0 && "last users must be unique");

lastUsers.insert(lastUser);

}

// Process all the last users of the `value` inside each block where the value

// dies.

for (Operation *lastUser : lastUsers) {

// Return like operations forward reference count.

if (lastUser->hasTrait<OpTrait::ReturnLike>())

continue;

// We can't currently handle other types of terminators.

if (lastUser->hasTrait<OpTrait::IsTerminator>())

return lastUser->emitError() << "async reference counting can't handle "

"terminators that are not ReturnLike";

// Add a drop_ref immediately after the last user.

builder.setInsertionPointAfter(lastUser);

builder.create<RuntimeDropRefOp>(loc, value, builder.getI32IntegerAttr(1));

}

return success();

}

LogicalResult

AsyncRuntimeRefCountingPass::addAddRefBeforeFunctionCall(Value value) {

OpBuilder builder(value.getContext());

Location loc = value.getLoc();

for (Operation *user : value.getUsers()) {

if (!isa<CallOp>(user))

continue;

// Add a reference before the function call to pass the value at `+1`

// reference to the function entry block.

builder.setInsertionPoint(user);

builder.create<RuntimeAddRefOp>(loc, value, builder.getI32IntegerAttr(1));

}

return success();

}

LogicalResult AsyncRuntimeRefCountingPass::verifySuccessors(Value value) {

OpBuilder builder(value.getContext());

// Blocks with successfors with different `liveIn` properties of the `value`.

llvm::SmallSet<Block *, 4> divergentLivenessBlocks;

// Use liveness analysis to find the placement of `drop_ref`operation.

auto &liveness = getAnalysis<Liveness>();

// Because we only add `drop_ref` operations to the region that defines the

// `value` we can only process CFG for the same region.

Region *definingRegion = value.getParentRegion();

// Collect blocks with successors with mismatching `liveIn` sets.

for (Block &block : definingRegion->getBlocks()) {

const LivenessBlockInfo *blockLiveness = liveness.getLiveness(&block);

// Skip the block if value is not in the `liveOut` set.

if (!blockLiveness->isLiveOut(value))

continue;

// Sucessors with value in `liveIn` set and not value in `liveIn` set.

llvm::SmallSet<Block *, 4> liveInSuccessors;

llvm::SmallSet<Block *, 4> noLiveInSuccessors;

// Collect successors that do not have `value` in the `liveIn` set.

for (Block *successor : block.getSuccessors()) {

const LivenessBlockInfo *succLiveness = liveness.getLiveness(successor);

if (succLiveness->isLiveIn(value))

liveInSuccessors.insert(successor);

else

noLiveInSuccessors.insert(successor);

}

// Block has successors with different `liveIn` property of the `value`.

if (!liveInSuccessors.empty() && !noLiveInSuccessors.empty())

divergentLivenessBlocks.insert(&block);

}

// Verify that divergent `liveIn` property only present in blocks with

// async.coro.suspend terminator.

for (Block *block : divergentLivenessBlocks) {

Operation *terminator = block->getTerminator();

if (isa<CoroSuspendOp>(terminator))

continue;

return terminator->emitOpError("successor have different `liveIn` property "

"of the reference counted value: ");

}

return success();

}

LogicalResult

AsyncRuntimeRefCountingPass::addAutomaticRefCounting(Value value) {

OpBuilder builder(value.getContext());

Location loc = value.getLoc();

// Set inserton point after the operation producing a value, or at the

// beginning of the block if the value defined by the block argument.

if (Operation *op = value.getDefiningOp())

builder.setInsertionPointAfter(op);

else

builder.setInsertionPointToStart(value.getParentBlock());

// Drop the reference count immediately if the value has no uses.

if (value.getUses().empty()) {

builder.create<RuntimeDropRefOp>(loc, value, builder.getI32IntegerAttr(1));

return success();

}

// Add `drop_ref` operations based on the liveness analysis.

if (failed(addDropRefAfterLastUse(value)))

return failure();

// Add `add_ref` operations before function calls.

if (failed(addAddRefBeforeFunctionCall(value)))

return failure();

// Verify that the `value` is in `liveIn` set of all successors.

if (failed(verifySuccessors(value)))

return failure();

return success();

}

void AsyncRuntimeRefCountingPass::runOnFunction() {

FuncOp func = getFunction();

// Check that we do not have high level async operations in the IR because

// otherwise automatic reference counting will produce incorrect results after

// execute operations will be lowered to `async.runtime`

WalkResult executeOpWalk = func.walk([&](Operation *op) -> WalkResult {

if (!isa<ExecuteOp, AwaitOp, AwaitAllOp, YieldOp>(op))

return WalkResult::advance();

return op->emitError()

<< "async operations must be lowered to async runtime operations";

});

if (executeOpWalk.wasInterrupted()) {

signalPassFailure();

return;

}

// Add reference counting to block arguments.

WalkResult blockWalk = func.walk([&](Block *block) -> WalkResult {

for (BlockArgument arg : block->getArguments())

if (isRefCounted(arg.getType()))

if (failed(addAutomaticRefCounting(arg)))

return WalkResult::interrupt();

return WalkResult::advance();

});

if (blockWalk.wasInterrupted()) {

signalPassFailure();

return;

mehdi_aminiUnsubmitted

Done

Should we return here?

mehdi_amini: Should we return here?

}

// Add reference counting to operation results.

WalkResult opWalk = func.walk([&](Operation *op) -> WalkResult {

for (unsigned i = 0; i < op->getNumResults(); ++i)

if (isRefCounted(op->getResultTypes()[i]))

if (failed(addAutomaticRefCounting(op->getResult(i))))

return WalkResult::interrupt();

return WalkResult::advance();

});

if (opWalk.wasInterrupted())

signalPassFailure();

}

std::unique_ptr<OperationPass<FuncOp>>

mlir::createAsyncRuntimeRefCountingPass() {

return std::make_unique<AsyncRuntimeRefCountingPass>();

}

mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCountingOpt.cpp

This file was added.

				//===- AsyncRuntimeRefCountingOpt.cpp - Async Ref Counting --------------===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				//
				// Optimize Async dialect reference counting operations.
				//
				//===----------------------------------------------------------------------===//

				#include "PassDetail.h"
				#include "mlir/Dialect/Async/IR/Async.h"
				#include "mlir/Dialect/Async/Passes.h"
				#include "llvm/ADT/SmallSet.h"

				using namespace mlir;
				using namespace mlir::async;

				#define DEBUG_TYPE "async-ref-counting"

				namespace {

				class AsyncRuntimeRefCountingOptPass
				: public AsyncRuntimeRefCountingOptBase<AsyncRuntimeRefCountingOptPass> {
				public:
				AsyncRuntimeRefCountingOptPass() = default;
				void runOnFunction() override;

				private:
				LogicalResult optimizeReferenceCounting(
				Value value, llvm::SmallDenseMap<Operation , Operation > &cancellable);
				};

				} // namespace

				LogicalResult AsyncRuntimeRefCountingOptPass::optimizeReferenceCounting(
				Value value, llvm::SmallDenseMap<Operation , Operation > &cancellable) {
				Region *definingRegion = value.getParentRegion();

				// Find all users of the `value` inside each block, including operations that
				// do not use `value` directly, but have a direct use inside nested region(s).
				//
				// Example:
				//
				// ^bb1:
				// %token = ...
				// scf.if %cond {
				// ^bb2:
				// async.runtime.await %token : !async.token
				// }
				//
				// %token has a use inside ^bb2 (`async.runtime.await`) and inside ^bb1
				// (`scf.if`).

				struct BlockUsersInfo {
				llvm::SmallVector<RuntimeAddRefOp, 4> addRefs;
				llvm::SmallVector<RuntimeDropRefOp, 4> dropRefs;
				llvm::SmallVector<Operation *, 4> users;
				};

				llvm::DenseMap<Block *, BlockUsersInfo> blockUsers;

				auto updateBlockUsersInfo = [&](Operation *user) {
				BlockUsersInfo &info = blockUsers[user->getBlock()];
				info.users.push_back(user);

				if (auto addRef = dyn_cast<RuntimeAddRefOp>(user))
				info.addRefs.push_back(addRef);
				if (auto dropRef = dyn_cast<RuntimeDropRefOp>(user))
				info.dropRefs.push_back(dropRef);
				};

				for (Operation *user : value.getUsers()) {
				while (user->getParentRegion() != definingRegion) {
				updateBlockUsersInfo(user);
				user = user->getParentOp();
				assert(user != nullptr && "value user lies outside of the value region");
				}

				updateBlockUsersInfo(user);
				}

				// Sort all operations found in the block.
				auto preprocessBlockUsersInfo = [](BlockUsersInfo &info) -> BlockUsersInfo & {
				auto isBeforeInBlock = [](Operation a, Operation b) -> bool {
				return a->isBeforeInBlock(b);
				};
				llvm::sort(info.addRefs, isBeforeInBlock);
				llvm::sort(info.dropRefs, isBeforeInBlock);
				llvm::sort(info.users, [&](Operation a, Operation b) -> bool {
				return isBeforeInBlock(a, b);
				});

				return info;
				};

				// Find and erase matching pairs of `add_ref` / `drop_ref` operations in the
				// blocks that modify the reference count of the `value`.
				for (auto &kv : blockUsers) {
				BlockUsersInfo &info = preprocessBlockUsersInfo(kv.second);

				for (RuntimeAddRefOp addRef : info.addRefs) {
				for (RuntimeDropRefOp dropRef : info.dropRefs) {
				// `drop_ref` operation after the `add_ref` with matching count.
				if (dropRef.count() != addRef.count() \|\|
				dropRef->isBeforeInBlock(addRef.getOperation()))
				continue;
				mehdi_aminiUnsubmitted Done Reply Inline Actions Why does the `isBeforeInBlock` matters? I'd expect a drop_ref followed by an add_ref should be optimizable just like in the other direction? mehdi_amini: Why does the `isBeforeInBlock` matters? I'd expect a drop_ref followed by an add_ref should be…
				ezhulenevAuthorUnsubmitted Done Reply Inline Actions This might hide bugs at runtime, `drop_ref` can drop reference count to zero and delete the value, and the following `add_ref` must fail (with asan or msan, it likely lead to segfault in the regular build). ezhulenev: This might hide bugs at runtime, `drop_ref` can drop reference count to zero and delete the…
				mehdi_aminiUnsubmitted Done Reply Inline Actions If I understand correctly, you're saying that an incorrect input will not crash at runtime? It is fine in general to do that: most optimization have to assume that the input program is "correct" and optimize accordingly (LLVM optimizations will always do that). mehdi_amini: If I understand correctly, you're saying that an incorrect input will not crash at runtime? It…
				ezhulenevAuthorUnsubmitted Done Reply Inline Actions I'm not that confident in the correctness of reference counting pass to silently ignore this type of errors :) Also if the program is correct wrt reference counting such optimization opportunities should not exists. ezhulenev: I'm not that confident in the correctness of reference counting pass to silently ignore this…

				// Try to cancel the pair of `add_ref` and `drop_ref` operations.
				auto emplaced = cancellable.try_emplace(dropRef.getOperation(),
				addRef.getOperation());

				if (!emplaced.second) // `drop_ref` was already marked for removal
				continue; // go to the next `drop_ref`

				mehdi_aminiUnsubmitted Done Reply Inline Actions The double lookup in the map can be avoided: bool inserted = cancellable.insert({dropRef.getOperation(), addRef.getOperation()}).second; if (inserted) break; mehdi_amini: The double lookup in the map can be avoided: ``` bool inserted = cancellable.insert({dropRef.
				if (emplaced.second) // successfully cancelled `add_ref` <-> `drop_ref`
				break; // go to the next `add_ref`
				}
				}
				}

				return success();
				}

				void AsyncRuntimeRefCountingOptPass::runOnFunction() {
				FuncOp func = getFunction();

				// Mapping from `dropRef.getOperation()` to `addRef.getOperation()`.
				//
				// Find all cancellable pairs of operation and erase them in the end to keep
				// all iterators valid while we are walking the function operations.
				llvm::SmallDenseMap<Operation , Operation > cancellable;

				// Optimize reference counting for values defined by block arguments.
				WalkResult blockWalk = func.walk([&](Block *block) -> WalkResult {
				for (BlockArgument arg : block->getArguments())
				if (isRefCounted(arg.getType()))
				if (failed(optimizeReferenceCounting(arg, cancellable)))
				return WalkResult::interrupt();

				return WalkResult::advance();
				});

				if (blockWalk.wasInterrupted())
				signalPassFailure();

				// Optimize reference counting for values defined by operation results.
				WalkResult opWalk = func.walk([&](Operation *op) -> WalkResult {
				for (unsigned i = 0; i < op->getNumResults(); ++i)
				if (isRefCounted(op->getResultTypes()[i]))
				if (failed(optimizeReferenceCounting(op->getResult(i), cancellable)))
				return WalkResult::interrupt();

				return WalkResult::advance();
				});

				if (opWalk.wasInterrupted())
				signalPassFailure();

				LLVM_DEBUG({
				llvm::dbgs() << "Found " << cancellable.size()
				<< " cancellable reference counting operations\n";
				});

				// Erase all cancellable `add_ref <-> drop_ref` operation pairs.
				for (auto &kv : cancellable) {
				kv.first->erase();
				kv.second->erase();
				}
				}

				std::unique_ptr<OperationPass<FuncOp>>
				mlir::createAsyncRuntimeRefCountingOptPass() {
				return std::make_unique<AsyncRuntimeRefCountingOptPass>();
				}

mlir/lib/Dialect/Async/Transforms/CMakeLists.txt

	add_mlir_dialect_library(MLIRAsyncTransforms			add_mlir_dialect_library(MLIRAsyncTransforms
	AsyncParallelFor.cpp			AsyncParallelFor.cpp
	AsyncRefCounting.cpp			AsyncRuntimeRefCounting.cpp
	AsyncRefCountingOptimization.cpp			AsyncRuntimeRefCountingOpt.cpp
	AsyncToAsyncRuntime.cpp			AsyncToAsyncRuntime.cpp

	ADDITIONAL_HEADER_DIRS			ADDITIONAL_HEADER_DIRS
	${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Async			${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Async

	DEPENDS			DEPENDS
	MLIRAsyncPassIncGen			MLIRAsyncPassIncGen

	LINK_LIBS PUBLIC			LINK_LIBS PUBLIC
	MLIRIR			MLIRIR
	MLIRAsync			MLIRAsync
	MLIRSCF			MLIRSCF
	MLIRPass			MLIRPass
	MLIRTransforms			MLIRTransforms
	MLIRTransformUtils			MLIRTransformUtils
	)			)

mlir/test/Dialect/Async/async-ref-counting-optimization.mlir

This file was deleted.

	// RUN: mlir-opt %s -async-ref-counting-optimization \| FileCheck %s

	// CHECK-LABEL: @cancellable_operations_0
	func @cancellable_operations_0(%arg0: !async.token) {
	// CHECK-NOT: async.runtime.add_ref
	// CHECK-NOT: async.runtime.drop_ref
	async.runtime.add_ref %arg0 {count = 1 : i32} : !async.token
	async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
	// CHECK: return
	return
	}

	// CHECK-LABEL: @cancellable_operations_1
	func @cancellable_operations_1(%arg0: !async.token) {
	// CHECK-NOT: async.runtime.add_ref
	// CHECK: async.execute
	async.runtime.add_ref %arg0 {count = 1 : i32} : !async.token
	async.execute [%arg0] {
	// CHECK: async.runtime.drop_ref
	async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
	// CHECK-NEXT: async.yield
	async.yield
	}
	// CHECK-NOT: async.runtime.drop_ref
	async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
	// CHECK: return
	return
	}

	// CHECK-LABEL: @cancellable_operations_2
	func @cancellable_operations_2(%arg0: !async.token) {
	// CHECK: async.await
	// CHECK-NEXT: async.await
	// CHECK-NEXT: async.await
	// CHECK-NEXT: return
	async.runtime.add_ref %arg0 {count = 1 : i32} : !async.token
	async.await %arg0 : !async.token
	async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
	async.await %arg0 : !async.token
	async.runtime.add_ref %arg0 {count = 1 : i32} : !async.token
	async.await %arg0 : !async.token
	async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
	return
	}

	// CHECK-LABEL: @cancellable_operations_3
	func @cancellable_operations_3(%arg0: !async.token) {
	// CHECK-NOT: add_ref
	async.runtime.add_ref %arg0 {count = 1 : i32} : !async.token
	%token = async.execute {
	async.await %arg0 : !async.token
	// CHECK: async.runtime.drop_ref
	async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
	async.yield
	}
	// CHECK-NOT: async.runtime.drop_ref
	async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
	// CHECK: async.await
	async.await %arg0 : !async.token
	// CHECK: return
	return
	}

	// CHECK-LABEL: @not_cancellable_operations_0
	func @not_cancellable_operations_0(%arg0: !async.token, %arg1: i1) {
	// It is unsafe to cancel `add_ref` / `drop_ref` pair because it is possible
	// that the body of the `async.execute` operation will run before the await
	// operation in the function body, and will destroy the `%arg0` token.
	// CHECK: add_ref
	async.runtime.add_ref %arg0 {count = 1 : i32} : !async.token
	%token = async.execute {
	// CHECK: async.await
	async.await %arg0 : !async.token
	// CHECK: async.runtime.drop_ref
	async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
	// CHECK: async.yield
	async.yield
	}
	// CHECK: async.await
	async.await %arg0 : !async.token
	// CHECK: drop_ref
	async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
	// CHECK: return
	return
	}

	// CHECK-LABEL: @not_cancellable_operations_1
	func @not_cancellable_operations_1(%arg0: !async.token, %arg1: i1) {
	// Same reason as above, although `async.execute` is inside the nested
	// region or "regular" operation.
	//
	// NOTE: This test is not correct w.r.t. reference counting, and at runtime
	// would leak %arg0 value if %arg1 is false. IR like this will not be
	// constructed by automatic reference counting pass, because it would
	// place `async.runtime.add_ref` right before the `async.execute`
	// inside `scf.if`.

	// CHECK: async.runtime.add_ref
	async.runtime.add_ref %arg0 {count = 1 : i32} : !async.token
	scf.if %arg1 {
	%token = async.execute {
	async.await %arg0 : !async.token
	// CHECK: async.runtime.drop_ref
	async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
	async.yield
	}
	}
	// CHECK: async.await
	async.await %arg0 : !async.token
	// CHECK: async.runtime.drop_ref
	async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
	// CHECK: return
	return
	}

mlir/test/Dialect/Async/async-ref-counting.mlir

This file was deleted.

	// RUN: mlir-opt %s -async-ref-counting \| FileCheck %s

	// CHECK-LABEL: @cond
	func private @cond() -> i1

	// CHECK-LABEL: @token_arg_no_uses
	func @token_arg_no_uses(%arg0: !async.token) {
	// CHECK: async.runtime.drop_ref %arg0 {count = 1 : i32}
	return
	}

	// CHECK-LABEL: @token_arg_conditional_await
	func @token_arg_conditional_await(%arg0: !async.token, %arg1: i1) {
	cond_br %arg1, ^bb1, ^bb2
	^bb1:
	// CHECK: async.runtime.drop_ref %arg0 {count = 1 : i32}
	return
	^bb2:
	// CHECK: async.await %arg0
	// CHECK: async.runtime.drop_ref %arg0 {count = 1 : i32}
	async.await %arg0 : !async.token
	return
	}

	// CHECK-LABEL: @token_no_uses
	func @token_no_uses() {
	// CHECK: %[[TOKEN:.*]] = async.execute
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	%token = async.execute {
	async.yield
	}
	return
	}

	// CHECK-LABEL: @token_return
	func @token_return() -> !async.token {
	// CHECK: %[[TOKEN:.*]] = async.execute
	%token = async.execute {
	async.yield
	}
	// CHECK: return %[[TOKEN]]
	return %token : !async.token
	}

	// CHECK-LABEL: @token_await
	func @token_await() {
	// CHECK: %[[TOKEN:.*]] = async.execute
	%token = async.execute {
	async.yield
	}
	// CHECK: async.await %[[TOKEN]]
	async.await %token : !async.token
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	// CHECK: return
	return
	}

	// CHECK-LABEL: @token_await_and_return
	func @token_await_and_return() -> !async.token {
	// CHECK: %[[TOKEN:.*]] = async.execute
	%token = async.execute {
	async.yield
	}
	// CHECK: async.await %[[TOKEN]]
	// CHECK-NOT: async.runtime.drop_ref
	async.await %token : !async.token
	// CHECK: return %[[TOKEN]]
	return %token : !async.token
	}

	// CHECK-LABEL: @token_await_inside_scf_if
	func @token_await_inside_scf_if(%arg0: i1) {
	// CHECK: %[[TOKEN:.*]] = async.execute
	%token = async.execute {
	async.yield
	}
	// CHECK: scf.if %arg0 {
	scf.if %arg0 {
	// CHECK: async.await %[[TOKEN]]
	async.await %token : !async.token
	}
	// CHECK: }
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	// CHECK: return
	return
	}

	// CHECK-LABEL: @token_conditional_await
	func @token_conditional_await(%arg0: i1) {
	// CHECK: %[[TOKEN:.*]] = async.execute
	%token = async.execute {
	async.yield
	}
	cond_br %arg0, ^bb1, ^bb2
	^bb1:
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	return
	^bb2:
	// CHECK: async.await %[[TOKEN]]
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	async.await %token : !async.token
	return
	}

	// CHECK-LABEL: @token_await_in_the_loop
	func @token_await_in_the_loop() {
	// CHECK: %[[TOKEN:.*]] = async.execute
	%token = async.execute {
	async.yield
	}
	br ^bb1
	^bb1:
	// CHECK: async.await %[[TOKEN]]
	async.await %token : !async.token
	%0 = call @cond(): () -> (i1)
	cond_br %0, ^bb1, ^bb2
	^bb2:
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	return
	}

	// CHECK-LABEL: @token_defined_in_the_loop
	func @token_defined_in_the_loop() {
	br ^bb1
	^bb1:
	// CHECK: %[[TOKEN:.*]] = async.execute
	%token = async.execute {
	async.yield
	}
	// CHECK: async.await %[[TOKEN]]
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	async.await %token : !async.token
	%0 = call @cond(): () -> (i1)
	cond_br %0, ^bb1, ^bb2
	^bb2:
	return
	}

	// CHECK-LABEL: @token_capture
	func @token_capture() {
	// CHECK: %[[TOKEN:.*]] = async.execute
	%token = async.execute {
	async.yield
	}

	// CHECK: async.runtime.add_ref %[[TOKEN]] {count = 1 : i32}
	// CHECK: %[[TOKEN_0:.*]] = async.execute
	%token_0 = async.execute {
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	// CHECK-NEXT: async.yield
	async.await %token : !async.token
	async.yield
	}
	// CHECK: async.runtime.drop_ref %[[TOKEN_0]] {count = 1 : i32}
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	// CHECK: return
	return
	}

	// CHECK-LABEL: @token_nested_capture
	func @token_nested_capture() {
	// CHECK: %[[TOKEN:.*]] = async.execute
	%token = async.execute {
	async.yield
	}

	// CHECK: async.runtime.add_ref %[[TOKEN]] {count = 1 : i32}
	// CHECK: %[[TOKEN_0:.*]] = async.execute
	%token_0 = async.execute {
	// CHECK: async.runtime.add_ref %[[TOKEN]] {count = 1 : i32}
	// CHECK: %[[TOKEN_1:.*]] = async.execute
	%token_1 = async.execute {
	// CHECK: async.runtime.add_ref %[[TOKEN]] {count = 1 : i32}
	// CHECK: %[[TOKEN_2:.*]] = async.execute
	%token_2 = async.execute {
	// CHECK: async.await %[[TOKEN]]
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	async.await %token : !async.token
	async.yield
	}
	// CHECK: async.runtime.drop_ref %[[TOKEN_2]] {count = 1 : i32}
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	async.yield
	}
	// CHECK: async.runtime.drop_ref %[[TOKEN_1]] {count = 1 : i32}
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	async.yield
	}
	// CHECK: async.runtime.drop_ref %[[TOKEN_0]] {count = 1 : i32}
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	// CHECK: return
	return
	}

	// CHECK-LABEL: @token_dependency
	func @token_dependency() {
	// CHECK: %[[TOKEN:.*]] = async.execute
	%token = async.execute {
	async.yield
	}

	// CHECK: async.runtime.add_ref %[[TOKEN]] {count = 1 : i32}
	// CHECK: %[[TOKEN_0:.*]] = async.execute
	%token_0 = async.execute[%token] {
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	// CHECK-NEXT: async.yield
	async.yield
	}

	// CHECK: async.await %[[TOKEN]]
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	async.await %token : !async.token
	// CHECK: async.await %[[TOKEN_0]]
	// CHECK: async.runtime.drop_ref %[[TOKEN_0]] {count = 1 : i32}
	async.await %token_0 : !async.token

	// CHECK: return
	return
	}

	// CHECK-LABEL: @value_operand
	func @value_operand() -> f32 {
	// CHECK: %[[TOKEN:.]], %[[RESULTS:.]] = async.execute
	%token, %results = async.execute -> !async.value<f32> {
	%0 = constant 0.0 : f32
	async.yield %0 : f32
	}

	// CHECK: async.runtime.add_ref %[[TOKEN]] {count = 1 : i32}
	// CHECK: async.runtime.add_ref %[[RESULTS]] {count = 1 : i32}
	// CHECK: %[[TOKEN_0:.*]] = async.execute
	%token_0 = async.execute[%token](%results as %arg0 : !async.value<f32>) {
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	// CHECK: async.runtime.drop_ref %[[RESULTS]] {count = 1 : i32}
	// CHECK: async.yield
	async.yield
	}

	// CHECK: async.await %[[TOKEN]]
	// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
	async.await %token : !async.token

	// CHECK: async.await %[[TOKEN_0]]
	// CHECK: async.runtime.drop_ref %[[TOKEN_0]] {count = 1 : i32}
	async.await %token_0 : !async.token

	// CHECK: async.await %[[RESULTS]]
	// CHECK: async.runtime.drop_ref %[[RESULTS]] {count = 1 : i32}
	%0 = async.await %results : !async.value<f32>

	// CHECK: return
	return %0 : f32
	}

mlir/test/Dialect/Async/async-runtime-ref-counting-opt.mlir

This file was added.

				// RUN: mlir-opt %s -async-runtime-ref-counting-opt \| FileCheck %s

				func private @consume_token(%arg0: !async.token)

				// CHECK-LABEL: @cancellable_operations_0
				func @cancellable_operations_0(%arg0: !async.token) {
				// CHECK-NOT: async.runtime.add_ref
				// CHECK-NOT: async.runtime.drop_ref
				async.runtime.add_ref %arg0 {count = 1 : i32} : !async.token
				async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
				// CHECK: return
				return
				}

				// CHECK-LABEL: @cancellable_operations_1
				func @cancellable_operations_1(%arg0: !async.token) {
				// CHECK-NOT: async.runtime.add_ref
				async.runtime.add_ref %arg0 {count = 1 : i32} : !async.token
				// CHECK: call @consume_toke
				call @consume_token(%arg0): (!async.token) -> ()
				// CHECK-NOT: async.runtime.drop_ref
				async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
				// CHECK: return
				return
				}

				// CHECK-LABEL: @cancellable_operations_2
				func @cancellable_operations_2(%arg0: !async.token) {
				// CHECK: async.runtime.await
				// CHECK-NEXT: async.runtime.await
				// CHECK-NEXT: async.runtime.await
				// CHECK-NEXT: return
				async.runtime.add_ref %arg0 {count = 1 : i32} : !async.token
				async.runtime.await %arg0 : !async.token
				async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
				async.runtime.await %arg0 : !async.token
				async.runtime.add_ref %arg0 {count = 1 : i32} : !async.token
				async.runtime.await %arg0 : !async.token
				async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
				return
				}

				// CHECK-LABEL: @cancellable_operations_3
				func @cancellable_operations_3(%arg0: !async.token) {
				// CHECK-NOT: add_ref
				async.runtime.add_ref %arg0 {count = 1 : i32} : !async.token
				// CHECK: call @consume_toke
				call @consume_token(%arg0): (!async.token) -> ()
				// CHECK-NOT: async.runtime.drop_ref
				async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token
				// CHECK: async.runtime.await
				async.runtime.await %arg0 : !async.token
				// CHECK: return
				return
				}

mlir/test/Dialect/Async/async-runtime-ref-counting.mlir

This file was added.

				// RUN: mlir-opt %s -async-runtime-ref-counting \| FileCheck %s

				// CHECK-LABEL: @token
				func private @token() -> !async.token

				// CHECK-LABEL: @cond
				func private @cond() -> i1

				// CHECK-LABEL: @take_token
				func private @take_token(%arg0: !async.token)

				// CHECK-LABEL: @token_arg_no_uses
				// CHECK: %[[TOKEN:.*]]: !async.token
				func @token_arg_no_uses(%arg0: !async.token) {
				// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
				return
				}

				// CHECK-LABEL: @token_value_no_uses
				func @token_value_no_uses() {
				// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token
				// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
				%0 = async.runtime.create : !async.token
				return
				}

				// CHECK-LABEL: @token_returned_no_uses
				func @token_returned_no_uses() {
				// CHECK: %[[TOKEN:.*]] = call @token
				// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
				%0 = call @token() : () -> !async.token
				return
				}

				// CHECK-LABEL: @token_arg_to_func
				// CHECK: %[[TOKEN:.*]]: !async.token
				func @token_arg_to_func(%arg0: !async.token) {
				// CHECK: async.runtime.add_ref %[[TOKEN]] {count = 1 : i32} : !async.token
				call @take_token(%arg0): (!async.token) -> ()
				// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32} : !async.token
				return
				}

				// CHECK-LABEL: @token_value_to_func
				func @token_value_to_func() {
				// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token
				%0 = async.runtime.create : !async.token
				// CHECK: async.runtime.add_ref %[[TOKEN]] {count = 1 : i32} : !async.token
				call @take_token(%0): (!async.token) -> ()
				// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
				return
				}

				// CHECK-LABEL: @token_arg_cond_br_await_with_fallthough
				// CHECK: %[[TOKEN:.*]]: !async.token
				func @token_arg_cond_br_await_with_fallthough(%arg0: !async.token, %arg1: i1) {
				// CHECK: cond_br
				// CHECK-SAME: ^[[BB1:.]], ^[[BB2:.]]
				cond_br %arg1, ^bb1, ^bb2
				^bb1:
				// CHECK: ^[[BB1]]:
				// CHECK: br ^[[BB2]]
				br ^bb2
				^bb2:
				// CHECK: ^[[BB2]]:
				// CHECK: async.runtime.await %[[TOKEN]]
				// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
				async.runtime.await %arg0 : !async.token
				return
				}

				// CHECK-LABEL: @token_simple_return
				func @token_simple_return() -> !async.token {
				// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token
				%token = async.runtime.create : !async.token
				// CHECK: return %[[TOKEN]]
				return %token : !async.token
				}

				// CHECK-LABEL: @token_coro_return
				// CHECK-NOT: async.runtime.drop_ref
				// CHECK-NOT: async.runtime.add_ref
				func @token_coro_return() -> !async.token {
				%token = async.runtime.create : !async.token
				%id = async.coro.id
				%hdl = async.coro.begin %id
				%saved = async.coro.save %hdl
				async.runtime.resume %hdl
				async.coro.suspend %saved, ^suspend, ^resume, ^cleanup
				^resume:
				br ^cleanup
				^cleanup:
				async.coro.free %id, %hdl
				br ^suspend
				^suspend:
				async.coro.end %hdl
				return %token : !async.token
				}

				// CHECK-LABEL: @token_coro_await_and_resume
				// CHECK: %[[TOKEN:.*]]: !async.token
				func @token_coro_await_and_resume(%arg0: !async.token) -> !async.token {
				%token = async.runtime.create : !async.token
				%id = async.coro.id
				%hdl = async.coro.begin %id
				%saved = async.coro.save %hdl
				// CHECK: async.runtime.await_and_resume %[[TOKEN]]
				async.runtime.await_and_resume %arg0, %hdl : !async.token
				// CHECK-NEXT: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
				async.coro.suspend %saved, ^suspend, ^resume, ^cleanup
				^resume:
				br ^cleanup
				^cleanup:
				async.coro.free %id, %hdl
				br ^suspend
				^suspend:
				async.coro.end %hdl
				return %token : !async.token
				}

				// CHECK-LABEL: @value_coro_await_and_resume
				// CHECK: %[[VALUE:.*]]: !async.value<f32>
				func @value_coro_await_and_resume(%arg0: !async.value<f32>) -> !async.token {
				%token = async.runtime.create : !async.token
				%id = async.coro.id
				%hdl = async.coro.begin %id
				%saved = async.coro.save %hdl
				// CHECK: async.runtime.await_and_resume %[[VALUE]]
				async.runtime.await_and_resume %arg0, %hdl : !async.value<f32>
				// CHECK: async.coro.suspend
				// CHECK-SAME: ^[[SUSPEND:.]], ^[[RESUME:.]], ^[[CLEANUP:.*]]
				async.coro.suspend %saved, ^suspend, ^resume, ^cleanup
				^resume:
				// CHECK: ^[[RESUME]]:
				// CHECK: %[[LOADED:.*]] = async.runtime.load %[[VALUE]]
				// CHECK: async.runtime.drop_ref %[[VALUE]] {count = 1 : i32}
				%0 = async.runtime.load %arg0 : !async.value<f32>
				// CHECK: addf %[[LOADED]], %[[LOADED]]
				%1 = addf %0, %0 : f32
				br ^cleanup
				^cleanup:
				async.coro.free %id, %hdl
				br ^suspend
				^suspend:
				async.coro.end %hdl
				return %token : !async.token
				}

				// CHECK-LABEL: @outlined_async_execute
				// CHECK: %[[TOKEN:.*]]: !async.token
				func private @outlined_async_execute(%arg0: !async.token) -> !async.token {
				%0 = async.runtime.create : !async.token
				%1 = async.coro.id
				%2 = async.coro.begin %1
				%3 = async.coro.save %2
				async.runtime.resume %2
				// CHECK: async.coro.suspend
				async.coro.suspend %3, ^suspend, ^resume, ^cleanup
				^resume:
				// CHECK: ^[[RESUME:.*]]:
				%4 = async.coro.save %2
				async.runtime.await_and_resume %arg0, %2 : !async.token
				// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
				// CHECK: async.coro.suspend
				async.coro.suspend %4, ^suspend, ^resume_1, ^cleanup
				^resume_1:
				// CHECK: ^[[RESUME_1:.*]]:
				// CHECK: async.runtime.set_available
				async.runtime.set_available %0 : !async.token
				br ^cleanup
				^cleanup:
				// CHECK: ^[[CLEANUP:.*]]:
				// CHECK: async.coro.free
				async.coro.free %1, %2
				br ^suspend
				^suspend:
				// CHECK: ^[[SUSPEND:.*]]:
				// CHECK: async.coro.end
				async.coro.end %2
				return %0 : !async.token
				}

				// CHECK-LABEL: @token_await_inside_nested_region
				// CHECK: %[[ARG:.*]]: i1
				func @token_await_inside_nested_region(%arg0: i1) {
				// CHECK: %[[TOKEN:.*]] = call @token()
				%token = call @token() : () -> !async.token
				// CHECK: scf.if %[[ARG]] {
				scf.if %arg0 {
				// CHECK: async.runtime.await %[[TOKEN]]
				async.runtime.await %token : !async.token
				}
				// CHECK: }
				// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
				// CHECK: return
				return
				}

				// CHECK-LABEL: @token_defined_in_the_loop
				func @token_defined_in_the_loop() {
				br ^bb1
				^bb1:
				// CHECK: ^[[BB1:.*]]:
				// CHECK: %[[TOKEN:.*]] = call @token()
				%token = call @token() : () -> !async.token
				// CHECK: async.runtime.await %[[TOKEN]]
				// CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32}
				async.runtime.await %token : !async.token
				%0 = call @cond(): () -> (i1)
				cond_br %0, ^bb1, ^bb2
				^bb2:
				// CHECK: ^[[BB2:.*]]:
				// CHECK: return
				return
				}

mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir

	// RUN: mlir-opt %s \			// RUN: mlir-opt %s \
	// RUN: -linalg-tile-to-parallel-loops="linalg-tile-sizes=256" \			// RUN: -linalg-tile-to-parallel-loops="linalg-tile-sizes=256" \
	// RUN: -async-parallel-for="num-concurrent-async-execute=4" \			// RUN: -async-parallel-for="num-concurrent-async-execute=4" \
	// RUN: -async-ref-counting \
	// RUN: -async-to-async-runtime \			// RUN: -async-to-async-runtime \
				// RUN: -async-runtime-ref-counting \
				// RUN: -async-runtime-ref-counting-opt \
	// RUN: -convert-async-to-llvm \			// RUN: -convert-async-to-llvm \
	// RUN: -lower-affine \			// RUN: -lower-affine \
	// RUN: -convert-linalg-to-loops \			// RUN: -convert-linalg-to-loops \
	// RUN: -convert-scf-to-std \			// RUN: -convert-scf-to-std \
	// RUN: -std-expand \			// RUN: -std-expand \
	// RUN: -convert-vector-to-llvm \			// RUN: -convert-vector-to-llvm \
	// RUN: -convert-std-to-llvm \			// RUN: -convert-std-to-llvm \
	// RUN: \| mlir-cpu-runner \			// RUN: \| mlir-cpu-runner \
	▲ Show 20 Lines • Show All 117 Lines • Show Last 20 Lines

mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir

	// RUN: mlir-opt %s -async-parallel-for \			// RUN: mlir-opt %s -async-parallel-for \
	// RUN: -async-ref-counting \
	// RUN: -async-to-async-runtime \			// RUN: -async-to-async-runtime \
				// RUN: -async-runtime-ref-counting \
				// RUN: -async-runtime-ref-counting-opt \
	// RUN: -convert-async-to-llvm \			// RUN: -convert-async-to-llvm \
	// RUN: -convert-scf-to-std \			// RUN: -convert-scf-to-std \
	// RUN: -convert-std-to-llvm \			// RUN: -convert-std-to-llvm \
	// RUN: \| mlir-cpu-runner \			// RUN: \| mlir-cpu-runner \
	// RUN: -e entry -entry-point-result=void -O0 \			// RUN: -e entry -entry-point-result=void -O0 \
	// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \			// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
	// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_async_runtime%shlibext\			// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_async_runtime%shlibext\
	// RUN: \| FileCheck %s --dump-input=always			// RUN: \| FileCheck %s --dump-input=always
	▲ Show 20 Lines • Show All 57 Lines • Show Last 20 Lines

mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir

	// RUN: mlir-opt %s -async-parallel-for \			// RUN: mlir-opt %s -async-parallel-for \
	// RUN: -async-to-async-runtime \			// RUN: -async-to-async-runtime \
	// RUN: -async-ref-counting \			// RUN: -async-runtime-ref-counting \
				// RUN: -async-runtime-ref-counting-opt \
	// RUN: -convert-async-to-llvm \			// RUN: -convert-async-to-llvm \
	// RUN: -convert-scf-to-std \			// RUN: -convert-scf-to-std \
	// RUN: -convert-std-to-llvm \			// RUN: -convert-std-to-llvm \
	// RUN: \| mlir-cpu-runner \			// RUN: \| mlir-cpu-runner \
	// RUN: -e entry -entry-point-result=void -O0 \			// RUN: -e entry -entry-point-result=void -O0 \
	// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \			// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
	// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_async_runtime%shlibext\			// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_async_runtime%shlibext\
	// RUN: \| FileCheck %s --dump-input=always			// RUN: \| FileCheck %s --dump-input=always
	▲ Show 20 Lines • Show All 84 Lines • Show Last 20 Lines

mlir/test/mlir-cpu-runner/async-group.mlir

	// RUN: mlir-opt %s -async-ref-counting \			// RUN: mlir-opt %s -async-to-async-runtime \
	// RUN: -async-to-async-runtime \			// RUN: -async-runtime-ref-counting \
				// RUN: -async-runtime-ref-counting-opt \
	// RUN: -convert-async-to-llvm \			// RUN: -convert-async-to-llvm \
	// RUN: -convert-std-to-llvm \			// RUN: -convert-std-to-llvm \
	// RUN: \| mlir-cpu-runner \			// RUN: \| mlir-cpu-runner \
	// RUN: -e main -entry-point-result=void -O0 \			// RUN: -e main -entry-point-result=void -O0 \
	// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \			// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \
	// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \			// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
	// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \			// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \
	// RUN: \| FileCheck %s			// RUN: \| FileCheck %s
	Show All 32 Lines

mlir/test/mlir-cpu-runner/async-value.mlir

	// RUN: mlir-opt %s -async-ref-counting \			// RUN: mlir-opt %s -async-to-async-runtime \
	// RUN: -async-to-async-runtime \			// RUN: -async-runtime-ref-counting \
				// RUN: -async-runtime-ref-counting-opt \
	// RUN: -convert-async-to-llvm \			// RUN: -convert-async-to-llvm \
	// RUN: -convert-vector-to-llvm \			// RUN: -convert-vector-to-llvm \
	// RUN: -convert-std-to-llvm \			// RUN: -convert-std-to-llvm \
	// RUN: \| mlir-cpu-runner \			// RUN: \| mlir-cpu-runner \
	// RUN: -e main -entry-point-result=void -O0 \			// RUN: -e main -entry-point-result=void -O0 \
	// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \			// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \
	// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \			// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
	// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \			// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \
	▲ Show 20 Lines • Show All 72 Lines • Show Last 20 Lines

mlir/test/mlir-cpu-runner/async.mlir

	// RUN: mlir-opt %s -async-ref-counting \			// RUN: mlir-opt %s -async-to-async-runtime \
	// RUN: -async-to-async-runtime \			// RUN: -async-runtime-ref-counting \
				// RUN: -async-runtime-ref-counting-opt \
	// RUN: -convert-async-to-llvm \			// RUN: -convert-async-to-llvm \
	// RUN: -convert-linalg-to-loops \			// RUN: -convert-linalg-to-loops \
	// RUN: -convert-scf-to-std \			// RUN: -convert-scf-to-std \
	// RUN: -convert-linalg-to-llvm \			// RUN: -convert-linalg-to-llvm \
	// RUN: -convert-std-to-llvm \			// RUN: -convert-std-to-llvm \
	// RUN: \| mlir-cpu-runner \			// RUN: \| mlir-cpu-runner \
	// RUN: -e main -entry-point-result=void -O0 \			// RUN: -e main -entry-point-result=void -O0 \
	// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \			// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \
	▲ Show 20 Lines • Show All 77 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir] Async: add automatic reference counting at async.runtime operations level
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 337022

mlir/include/mlir/Dialect/Async/Passes.h

mlir/include/mlir/Dialect/Async/Passes.td

mlir/lib/Dialect/Async/Transforms/AsyncRefCounting.cpp

mlir/lib/Dialect/Async/Transforms/AsyncRefCountingOptimization.cpp

mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp

mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCountingOpt.cpp

mlir/lib/Dialect/Async/Transforms/CMakeLists.txt

mlir/test/Dialect/Async/async-ref-counting-optimization.mlir

mlir/test/Dialect/Async/async-ref-counting.mlir

mlir/test/Dialect/Async/async-runtime-ref-counting-opt.mlir

mlir/test/Dialect/Async/async-runtime-ref-counting.mlir

mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir

mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir

mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir

mlir/test/mlir-cpu-runner/async-group.mlir

mlir/test/mlir-cpu-runner/async-value.mlir

mlir/test/mlir-cpu-runner/async.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir] Async: add automatic reference counting at async.runtime operations levelClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 337022

mlir/include/mlir/Dialect/Async/Passes.h

mlir/include/mlir/Dialect/Async/Passes.td

mlir/lib/Dialect/Async/Transforms/AsyncRefCounting.cpp

mlir/lib/Dialect/Async/Transforms/AsyncRefCountingOptimization.cpp

mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp

mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCountingOpt.cpp

mlir/lib/Dialect/Async/Transforms/CMakeLists.txt

mlir/test/Dialect/Async/async-ref-counting-optimization.mlir

mlir/test/Dialect/Async/async-ref-counting.mlir

mlir/test/Dialect/Async/async-runtime-ref-counting-opt.mlir

mlir/test/Dialect/Async/async-runtime-ref-counting.mlir

mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir

mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir

mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir

mlir/test/mlir-cpu-runner/async-group.mlir

mlir/test/mlir-cpu-runner/async-value.mlir

mlir/test/mlir-cpu-runner/async.mlir

[mlir] Async: add automatic reference counting at async.runtime operations level
ClosedPublic