diff --git a/mlir/include/mlir/Transforms/BufferAssignment.h b/mlir/include/mlir/Transforms/BufferAssignment.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Transforms/BufferAssignment.h @@ -0,0 +1,132 @@ +//===- BufferAssignment.h - Buffer Assignment Utilities ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header file defines buffer assginment helper methods to compute proper +// and valid positions for placing Alloc and Dealloc operations. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TRANSFORM_BUFFERASSIGNMENT_H +#define MLIR_TRANSFORM_BUFFERASSIGNMENT_H + +#include "mlir/Analysis/Dominance.h" +#include "mlir/Analysis/Liveness.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Operation.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/DialectConversion.h" + +namespace mlir { + +/// Prepares a buffer assignment phase. It can place (user-defined) alloc +/// nodes. This simplifies the integration of the actual buffer-assignment +/// pass. Sample usage: +/// BufferAssignmentPlacer baHelper(regionOp); +/// -> determine alloc positions +/// auto allocPosition = baHelper.computeAllocPosition(value); +/// -> place alloc +/// allocBuilder.setInsertionPoint(positions.getAllocPosition()); +/// +/// alternatively: +/// -> place alloc +/// baHelper.insertAlloc(...); +/// Note: this class is intended to be used during legalization. In order +/// to move alloc and dealloc nodes into the right places you can use the +/// createBufferAssignmentPass() function. +class BufferAssignmentPlacer { +public: + /// Creates a new assignment builder. + explicit BufferAssignmentPlacer(Operation *op); + + /// Returns the operation this analysis was constructed from. + Operation *getOperation() const { return operation; } + + /// Computes the actual position to place allocs for the given value. + OpBuilder::InsertPoint computeAllocPosition(Value value); + +private: + /// The operation this analysis was constructed from. + Operation *operation; +}; + +/// Helper conversion pattern that encapsulates a BufferAssignmentPlacer +/// instance. +template +class BufferAssignmentOpConversionPattern + : public OpConversionPattern { +public: + explicit BufferAssignmentOpConversionPattern( + MLIRContext *context_, + BufferAssignmentPlacer *bufferAssignment_ = nullptr, + PatternBenefit benefit_ = 1) + : OpConversionPattern(context_, benefit_), + bufferAssignment(bufferAssignment_) {} + +protected: + BufferAssignmentPlacer *bufferAssignment; +}; + +// Converts only the tensor-type function and block arguments to memref-type. +class FunctionAndBlockSignatureConverter + : public BufferAssignmentOpConversionPattern { +public: + using BufferAssignmentOpConversionPattern< + FuncOp>::BufferAssignmentOpConversionPattern; + + // Adding functions whose arguments are memref type to the set of legal + // operations. + static void addDynamicallyLegalFuncOp(ConversionTarget &target); + + // Performs the actual signature rewriting step. + LogicalResult + matchAndRewrite(FuncOp funcOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final; +}; + +// This pattern converter transforms a non-void ReturnOpSourceTy into a void +// return of type ReturnOpTargetTy. It uses a copy operation of type CopyOpTy to +// copy the results to the output buffer. +template +class NonVoidToVoidReturnOpConverter + : public BufferAssignmentOpConversionPattern { +public: + using BufferAssignmentOpConversionPattern< + ReturnOpSourceTy>::BufferAssignmentOpConversionPattern; + + // Performs the actual return-op conversion step. + LogicalResult + matchAndRewrite(ReturnOpSourceTy returnOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto numReturnValues = returnOp.getNumOperands(); + auto funcOp = returnOp.template getParentOfType(); + auto numFuncArgs = funcOp.getNumArguments(); + auto loc = returnOp.getLoc(); + + // Find the corresponding output buffer for each operand. + for (auto operand : llvm::enumerate(operands)) { + auto returnArgNumber = numFuncArgs - numReturnValues + operand.index(); + auto dstBuffer = funcOp.getArgument(returnArgNumber); + if (dstBuffer == operand.value()) + continue; + + // Insert the copy operation to copy before the return. + rewriter.setInsertionPoint( + returnOp.getOperation()->getBlock()->getTerminator()); + rewriter.create(loc, operand.value(), + funcOp.getArgument(returnArgNumber)); + } + // Insert the new target return operation. + rewriter.replaceOpWithNewOp(returnOp); + return success(); + } +}; + +} // end namespace mlir + +#endif // MLIR_TRANSFORM_BUFFERASSIGNMENT_H diff --git a/mlir/lib/Transforms/BufferAssignment.cpp b/mlir/lib/Transforms/BufferAssignment.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Transforms/BufferAssignment.cpp @@ -0,0 +1,466 @@ +//===- BufferAssignment.cpp - the impl for buffer assignment --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements logic for computing proper alloc and dealloc positions. +// The main class is the BufferAssignment class that realizes this analysis. +// In order to put allocations and deallocations at safe positions, it is +// significantly important to put them into the proper blocks. However, the +// liveness analysis does not pay attention to aliases, which can occur due to +// branches (and their associated block arguments) in general. For this purpose, +// BufferAssignment firstly finds all possible aliases for a single value (using +// the BufferAssignmentAliasAnalysis class). Consider the following example: +// +// ^bb0(%arg0): +// cond_br %cond, ^bb1, ^bb2 +// ^bb1: +// br ^exit(%arg0) +// ^bb2: +// %new_value = ... +// br ^exit(%new_value) +// ^exit(%arg1): +// return %arg1; +// +// Using liveness information on its own would cause us to place the allocs and +// deallocs in the wrong block. This is due to the fact that %new_value will not +// be liveOut of its block. Instead, we have to place the alloc for %new_value +// in bb0 and its associated dealloc in exit. Using the class +// BufferAssignmentAliasAnalysis, we will find out that %new_value has a +// potential alias %arg1. In order to find the dealloc position we have to find +// all potential aliases, iterate over their uses and find the common +// post-dominator block. In this block we can safely be sure that %new_value +// will die and can use liveness information to determine the exact operation +// after which we have to insert the dealloc. Finding the alloc position is +// highly similar and non- obvious. Again, we have to consider all potential +// aliases and find the common dominator block to place the alloc. +// +// TODO(dfki): +// The current implementation does not support loops. The only thing that +// is currently missing is a high-level loop analysis that allows us to move +// allocs and deallocs outside of the loop blocks. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Transforms/BufferAssignment.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Operation.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; + +namespace { + +//===----------------------------------------------------------------------===// +// BufferAssignmentAliasAnalysis +//===----------------------------------------------------------------------===// + +/// A straight-forward alias analysis which ensures that all aliases of all +/// values will be determined. This is a requirement for the BufferAssignment +/// class since you need to determine safe positions to place alloc and +/// deallocs. +class BufferAssignmentAliasAnalysis { +public: + using ValueSetT = SmallPtrSet; + +public: + /// Constructs a new alias analysis using the op provided. + BufferAssignmentAliasAnalysis(Operation *op) { build(op->getRegions()); } + + /// Finds all immediate and indirect aliases this value could potentially + /// have. Note that the resulting set will also contain the value provided as + /// it is an alias of itself. + ValueSetT resolve(Value value) const { + ValueSetT result; + resolveRecursive(value, result); + return result; + } + +private: + /// Recursively determines alias information for the given value. It stores + /// all newly found potential aliases in the given result set. + void resolveRecursive(Value value, ValueSetT &result) const { + if (!result.insert(value).second) + return; + auto it = aliases.find(value); + if (it == aliases.end()) + return; + for (auto alias : it->second) + resolveRecursive(alias, result); + } + + /// This function constructs a mapping from values to its immediate aliases. + /// It iterates over all blocks, gets their predecessors, determines the + /// values that will be passed to the corresponding block arguments and + /// inserts them into map. + void build(MutableArrayRef regions) { + for (Region ®ion : regions) { + for (Block &block : region) { + // Iterate over all predecessor and get the mapped values to their + // corresponding block arguments values. + for (auto pred : block.getPredecessors()) { + // Determine the current successor index of the current predecessor. + unsigned successorIndex = std::distance( + pred->getSuccessors().begin(), + llvm::find_if(pred->getSuccessors(), [&](Block *successor) { + return successor == █ + })); + // Get the terminator and the values that will be passed to our block. + if (auto branchInterface = + dyn_cast(pred->getTerminator())) { + // Query the branch op interace to get the successor operands. + auto successorOps = + branchInterface.getSuccessorOperands(successorIndex); + if (successorOps.hasValue()) { + // Build the actual mapping of values to their immediate aliases. + for (auto arg : block.getArguments()) { + Value predecessorArgValue = + successorOps.getValue()[arg.getArgNumber()]; + aliases[predecessorArgValue].insert(arg); + } + } + } + } + } + } + } + + /// Maps values to all immediate aliases this value can have. + llvm::DenseMap aliases; +}; + +//===----------------------------------------------------------------------===// +// BufferAssignmentPositions +//===----------------------------------------------------------------------===// + +/// Stores proper alloc and dealloc positions to place dialect-specific alloc +/// and dealloc operations. +struct BufferAssignmentPositions { +public: + BufferAssignmentPositions() + : allocPosition(nullptr), deallocPosition(nullptr) {} + + /// Creates a new positions tuple including alloc and dealloc positions. + BufferAssignmentPositions(Operation *allocPosition, + Operation *deallocPosition) + : allocPosition(allocPosition), deallocPosition(deallocPosition) {} + + /// Returns the alloc position before which the alloc operation has to be + /// inserted. + Operation *getAllocPosition() const { return allocPosition; } + + /// Returns the dealloc position after which the dealloc operation has to be + /// inserted. + Operation *getDeallocPosition() const { return deallocPosition; } + +private: + Operation *allocPosition; + Operation *deallocPosition; +}; + +//===----------------------------------------------------------------------===// +// BufferAssignmentAnalysis +//===----------------------------------------------------------------------===// + +// The main buffer assignment analysis used to place allocs and deallocs. +class BufferAssignmentAnalysis { +public: + using DeallocSetT = SmallPtrSet; + +public: + BufferAssignmentAnalysis(Operation *op) + : operation(op), liveness(op), dominators(op), postDominators(op), + aliases(op) {} + + /// Computes the actual positions to place allocs and deallocs for the given + /// value. + BufferAssignmentPositions computeAllocAndDeallocPositions(Value value) const { + if (value.use_empty()) { + return BufferAssignmentPositions(value.getDefiningOp(), + value.getDefiningOp()); + } + // Get all possible aliases + auto possibleValues = aliases.resolve(value); + return BufferAssignmentPositions(getAllocPosition(value, possibleValues), + getDeallocPosition(value, possibleValues)); + } + + /// Finds all associated dealloc nodes for the alloc nodes using alias + /// information. + DeallocSetT findAssociatedDeallocs(AllocOp alloc) const { + DeallocSetT result; + auto possibleValues = aliases.resolve(alloc); + for (auto alias : possibleValues) + for (auto user : alias.getUsers()) { + if (isa(user)) + result.insert(user); + } + return result; + } + + /// Dumps the buffer assignment information to the given stream. + void print(raw_ostream &os) const { + os << "// ---- Buffer Assignment -----\n"; + + for (Region ®ion : operation->getRegions()) + for (Block &block : region) + for (Operation &operation : block) + for (Value result : operation.getResults()) { + BufferAssignmentPositions positions = + computeAllocAndDeallocPositions(result); + os << "Positions for "; + result.print(os); + os << "\n Alloc: "; + positions.getAllocPosition()->print(os); + os << "\n Dealloc: "; + positions.getDeallocPosition()->print(os); + os << "\n"; + } + } + +private: + /// Finds a proper placement block to store alloc/dealloc node according to + /// the algorithm described at the top of the file. It supports dominator and + /// post-dominator analyses via template arguments. + template + Block *findPlacementBlock(Value value, const AliasesT &aliases, + const DominatorT &doms) const { + assert(!value.isa() && "Cannot place a block argument"); + // Start with the current block the value is defined in. + Block *dom = value.getDefiningOp()->getBlock(); + // Iterate over all aliases and their uses to find a safe placement block + // according to the given dominator information. + for (auto alias : aliases) + for (auto user : alias.getUsers()) { + // Move upwards in the dominator tree to find an appropriate + // dominator block that takes the current use into account. + dom = doms.findNearestCommonDominator(dom, user->getBlock()); + } + return dom; + } + + /// Finds a proper alloc positions according to the algorithm described at the + /// top of the file. + template + Operation *getAllocPosition(Value value, const AliasesT &aliases) const { + // Determine the actual block to place the alloc and get liveness + // information. + auto placementBlock = findPlacementBlock(value, aliases, dominators); + auto livenessInfo = liveness.getLiveness(placementBlock); + + // We have to ensure that the alloc will be before the first use of all + // aliases of the given value. We first assume that there are no uses in the + // placementBlock and that we can safely place the alloc before the + // terminator at the end of the block. + Operation *startOperation = placementBlock->getTerminator(); + // Iterate over all aliases and ensure that the startOperation will point to + // the first operation of all potential aliases in the placementBlock. + for (auto alias : aliases) { + auto aliasStartOperation = livenessInfo->getStartOperation(alias); + // Check whether the aliasStartOperation lies in the desired block and + // whether it is before the current startOperation. If yes, this will be + // the new startOperation. + if (aliasStartOperation->getBlock() == placementBlock && + aliasStartOperation->isBeforeInBlock(startOperation)) + startOperation = aliasStartOperation; + } + // startOperation is the first operation before which we can safely store + // the alloc taking all potential aliases into account. + return startOperation; + } + + /// Finds a proper dealloc positions according to the algorithm described at + /// the top of the file. + template + Operation *getDeallocPosition(Value value, const AliasesT &aliases) const { + // Determine the actual block to place the dealloc and get liveness + // information. + auto placementBlock = findPlacementBlock(value, aliases, postDominators); + auto livenessInfo = liveness.getLiveness(placementBlock); + + // We have to ensure that the dealloc will be after the last use of all + // aliases of the given value. We first assume that there are no uses in the + // placementBlock and that we can safely place the dealloc at the beginning. + Operation *endOperation = &placementBlock->front(); + // Iterate over all aliases and ensure that the endOperation will point to + // the last operation of all potential aliases in the placementBlock. + for (auto alias : aliases) { + auto aliasEndOperation = + livenessInfo->getEndOperation(alias, endOperation); + // Check whether the aliasEndOperation lies in the desired block and + // whether it is behind the current endOperation. If yes, this will be the + // new endOperation. + if (aliasEndOperation->getBlock() == placementBlock && + endOperation->isBeforeInBlock(aliasEndOperation)) + endOperation = aliasEndOperation; + } + // endOperation is the last operation behind which we can safely store the + // dealloc taking all potential aliases into account. + return endOperation; + } + + /// The operation this transformation was constructed from. + Operation *operation; + + /// The underlying liveness analysis to compute fine grained information about + /// alloc and dealloc positions. + Liveness liveness; + + /// The dominator analysis to place allocs in the appropriate blocks. + DominanceInfo dominators; + + /// The post dominator analysis to place deallocs in the appropriate blocks. + PostDominanceInfo postDominators; + + /// The internal alias analysis to ensure that allocs and deallocs take all + /// their potential aliases into account. + BufferAssignmentAliasAnalysis aliases; +}; + +//===----------------------------------------------------------------------===// +// BufferAssignmentPass +//===----------------------------------------------------------------------===// + +/// The actual buffer assignment pass that moves alloc and dealloc nodes into +/// the right positions. It uses the algorithm described at the top of the file. +// TODO(dfki): create a templated version that allows to match dialect-specific +// alloc/dealloc nodes and to insert dialect-specific dealloc node. +struct BufferAssignmentPass + : mlir::PassWrapper { + void runOnFunction() override { + // Get required analysis information first. + auto &analysis = getAnalysis(); + + // Compute an initial placement of all nodes. + llvm::SmallDenseMap placements; + getFunction().walk([&](AllocOp alloc) { + placements[alloc] = analysis.computeAllocAndDeallocPositions(alloc); + }); + + // Move alloc (and dealloc - if any) nodes into the right places + // and insert dealloc nodes if necessary. + getFunction().walk([&](AllocOp alloc) { + // Find already associated dealloc nodes. + auto deallocs = analysis.findAssociatedDeallocs(alloc); + assert(deallocs.size() < 2 && + "Not supported number of associated dealloc operations"); + + // Move alloc node to the right place. + BufferAssignmentPositions &positions = placements[alloc]; + Operation *allocOperation = alloc.getOperation(); + allocOperation->moveBefore(positions.getAllocPosition()); + + // If there is an existing dealloc, move it to the right place. + if (deallocs.size()) { + Operation *nextOp = positions.getDeallocPosition()->getNextNode(); + assert(nextOp && "Invalid Dealloc operation position"); + (*deallocs.begin())->moveBefore(nextOp); + } else { + // If there is no dealloc node, insert one in the right place. + OpBuilder builder(alloc); + builder.setInsertionPointAfter(positions.getDeallocPosition()); + builder.create(allocOperation->getLoc(), alloc); + } + }); + }; +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// BufferAssignmentPlacer +//===----------------------------------------------------------------------===// + +/// Creates a new assignment placer. +BufferAssignmentPlacer::BufferAssignmentPlacer(Operation *op) : operation(op) {} + +/// Computes the actual position to place allocs for the given value. +OpBuilder::InsertPoint +BufferAssignmentPlacer::computeAllocPosition(Value value) { + Operation *insertOp = value.getDefiningOp(); + assert(insertOp && "There is not a defining operation for the input value"); + OpBuilder opBuilder(insertOp); + return opBuilder.saveInsertionPoint(); +} + +//===----------------------------------------------------------------------===// +// FunctionAndBlockSignatureConverter +//===----------------------------------------------------------------------===// + +// Performs the actual signature rewriting step. +LogicalResult FunctionAndBlockSignatureConverter::matchAndRewrite( + FuncOp funcOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + auto toMemrefConverter = [&](Type t) -> Type { + if (auto tensorType = t.dyn_cast()) + return MemRefType::get(tensorType.getShape(), + tensorType.getElementType()); + return t; + }; + // Converting tensor-type function arguments to memref-type. + auto funcType = funcOp.getType(); + TypeConverter::SignatureConversion conversion(funcType.getNumInputs()); + for (auto argType : llvm::enumerate(funcType.getInputs())) + conversion.addInputs(argType.index(), toMemrefConverter(argType.value())); + for (auto resType : funcType.getResults()) + conversion.addInputs(toMemrefConverter(resType)); + rewriter.updateRootInPlace(funcOp, [&] { + funcOp.setType( + rewriter.getFunctionType(conversion.getConvertedTypes(), llvm::None)); + rewriter.applySignatureConversion(&funcOp.getBody(), conversion); + }); + // Converting tensor-type block arugments of all blocks inside the + // function region to memref-type except for the entry block. + for (auto &block : funcOp.getBlocks()) { + if (block.isEntryBlock()) + continue; + for (int i = 0, e = block.getNumArguments(); i < e; ++i) { + auto oldArg = block.getArgument(i); + auto newArg = + block.insertArgument(i, toMemrefConverter(oldArg.getType())); + oldArg.replaceAllUsesWith(newArg); + block.eraseArgument(i + 1); + } + } + return success(); +} + +/// A helper method to make the functions, whose all block argument types are +/// Memref or non-shaped type, legal. BufferAssignmentPlacer expects all +/// function and block argument types are in Memref or non-shaped type. Using +/// this helper method and additionally, FunctionAndBlockSignatureConverter as a +/// pattern conversion make sure that the type of block arguments are compatible +/// with using BufferAssignmentPlacer. +void FunctionAndBlockSignatureConverter::addDynamicallyLegalFuncOp( + ConversionTarget &target) { + auto isLegalBlockArg = [](BlockArgument arg) -> bool { + auto type = arg.getType(); + return type.isa() || !type.isa(); + }; + target.addDynamicallyLegalOp([&](FuncOp funcOp) { + bool legality = true; + for (auto &block2 : funcOp.getBlocks()) { + legality &= llvm::all_of(block2.getArguments(), isLegalBlockArg); + if (!legality) + break; + } + return legality; + }); +} + +//===----------------------------------------------------------------------===// +// Buffer assignment pass registrations +//===----------------------------------------------------------------------===// + +namespace mlir { +void registerBufferAssignmentPass() { + PassRegistration( + "buffer-assignment", + "Executes buffer assignment pass to automatically move alloc and dealloc " + "operations into their proper positions"); +} +} // end namespace mlir diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt --- a/mlir/lib/Transforms/CMakeLists.txt +++ b/mlir/lib/Transforms/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory(Utils) add_mlir_library(MLIRTransforms + BufferAssignment.cpp Canonicalizer.cpp CSE.cpp DialectConversion.cpp diff --git a/mlir/test/Transforms/buffer-assignment-prepration.mlir b/mlir/test/Transforms/buffer-assignment-prepration.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Transforms/buffer-assignment-prepration.mlir @@ -0,0 +1,95 @@ +// RUN: mlir-opt -test-buffer-assignment-preparation -allow-unregistered-dialect -split-input-file %s | FileCheck %s -dump-input-on-failure + +// CHECK-LABEL: func @func_signature_conversion +func @func_signature_conversion(%arg0: tensor<4x8xf32>) { + return +} +// CHECK: ({{.*}}: memref<4x8xf32>) { + +// ----- + +// CHECK-LABEL: func @non_void_to_void_return_op_converter +func @non_void_to_void_return_op_converter(%arg0: tensor<4x8xf32>) -> tensor<4x8xf32> { + return %arg0 : tensor<4x8xf32> +} +// CHECK: (%[[ARG0:.*]]: [[TYPE:.*]]<[[RANK:.*]]>, %[[RESULT:.*]]: [[TYPE]]<[[RANK]]>) { +// CHECK-NEXT: "buffer_assignment_test.copy"(%[[ARG0]], %[[RESULT]]) +// CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @func_and_block_signature_conversion +func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32>{ + cond_br %cond, ^bb1, ^bb2 + ^bb1: + br ^exit(%arg0 : tensor<2xf32>) + ^bb2: + br ^exit(%arg0 : tensor<2xf32>) + ^exit(%arg2: tensor<2xf32>): + return %arg1 : tensor<4x4xf32> +} +// CHECK: (%[[ARG0:.*]]: [[ARG0_TYPE:.*]], %[[COND:.*]]: i1, %[[ARG1:.*]]: [[ARG1_TYPE:.*]], %[[RESULT:.*]]: [[RESULT_TYPE:.*]]) { +// CHECK: br ^[[EXIT_BLOCK:.*]](%[[ARG0]] : [[ARG0_TYPE]]) +// CHECK: br ^[[EXIT_BLOCK]](%[[ARG0]] : [[ARG0_TYPE]]) +// CHECK: ^[[EXIT_BLOCK]](%{{.*}}: [[ARG0_TYPE]]) +// CHECK-NEXT: "buffer_assignment_test.copy"(%[[ARG1]], %[[RESULT]]) +// CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @compute_allocs_position_simple +func @compute_allocs_position_simple(%cond: i1, %arg0: tensor<2xf32>) -> tensor<2xf32>{ + %0 = "buffer_assignment_test.unary"(%arg0) : (tensor<2xf32>) -> tensor<2xf32> + %1 = "buffer_assignment_test.unary"(%0) : (tensor<2xf32>) -> tensor<2xf32> + return %1 : tensor<2xf32> +} +// CHECK: (%{{.*}}: {{.*}}, %[[ARG0:.*]]: memref<2xf32>, +// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered"(%[[ARG0]], %[[FIRST_ALLOC]]) +// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered"(%[[FIRST_ALLOC]], %[[SECOND_ALLOC]]) + +// ----- + +// CHECK-LABEL: func @compute_allocs_position +func @compute_allocs_position(%cond: i1, %arg0: tensor<2xf32>) -> tensor<2xf32>{ + %0 = "buffer_assignment_test.unary"(%arg0) : (tensor<2xf32>) -> tensor<2xf32> + %1 = "buffer_assignment_test.unary"(%0) : (tensor<2xf32>) -> tensor<2xf32> + cond_br %cond, ^bb1(%arg0, %0: tensor<2xf32>, tensor<2xf32>), + ^bb2(%0, %arg0: tensor<2xf32>, tensor<2xf32>) + ^bb1(%arg1 : tensor<2xf32>, %arg2 : tensor<2xf32>): + %2 = "buffer_assignment_test.unary"(%arg0) : (tensor<2xf32>) -> tensor<2xf32> + %3 = "buffer_assignment_test.unary"(%2) : (tensor<2xf32>) -> tensor<2xf32> + br ^exit(%arg1, %arg2 : tensor<2xf32>, tensor<2xf32>) + ^bb2(%arg3 : tensor<2xf32>, %arg4 : tensor<2xf32>): + %4 = "buffer_assignment_test.unary"(%arg0) : (tensor<2xf32>) -> tensor<2xf32> + %5 = "buffer_assignment_test.unary"(%4) : (tensor<2xf32>) -> tensor<2xf32> + br ^exit(%arg3, %arg4 : tensor<2xf32>, tensor<2xf32>) + ^exit(%arg5 : tensor<2xf32>, %arg6 : tensor<2xf32>): + %6 = "buffer_assignment_test.unary"(%arg0) : (tensor<2xf32>) -> tensor<2xf32> + %7 = "buffer_assignment_test.unary"(%6) : (tensor<2xf32>) -> tensor<2xf32> + return %7 : tensor<2xf32> +} +// CHECK: (%{{.*}}: {{.*}}, %[[ARG0:.*]]: memref<2xf32>, +// CHECK-NEXT: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered"(%[[ARG0]], %[[ALLOC0]]) +// CHECK-NEXT: %[[ALLOC1:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered"(%[[ALLOC0]], %[[ALLOC1]]) +// CHECK-NEXT: cond_br %{{.*}}, ^[[BB0:.*]]({{.*}}), ^[[BB1:.*]]( +// CHECK-NEXT: ^[[BB0]] +// CHECK-NEXT: %[[ALLOC2:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered"(%[[ARG0]], %[[ALLOC2]]) +// CHECK-NEXT: %[[ALLOC3:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered"(%[[ALLOC2]], %[[ALLOC3]]) +// CHECK-NEXT: br ^[[EXIT:.*]]({{.*}}) +// CHECK-NEXT: ^[[BB1]] +// CHECK-NEXT: %[[ALLOC4:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered"(%[[ARG0]], %[[ALLOC4]]) +// CHECK-NEXT: %[[ALLOC5:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered"(%[[ALLOC4]], %[[ALLOC5]]) +// CHECK-NEXT: br ^[[EXIT]] +// CHECK-NEXT: ^[[EXIT]] +// CHECK-NEXT: %[[ALLOC6:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered"(%[[ARG0]], %[[ALLOC6]]) +// CHECK-NEXT: %[[ALLOC7:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered"(%[[ALLOC6]], %[[ALLOC7]]) diff --git a/mlir/test/Transforms/buffer-assignment.mlir b/mlir/test/Transforms/buffer-assignment.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Transforms/buffer-assignment.mlir @@ -0,0 +1,230 @@ +// RUN: mlir-opt -buffer-assignment -allow-unregistered-dialect -split-input-file %s | FileCheck %s -dump-input-on-failure + +// CHECK-LABEL: func @condBranch +func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + cond_br %arg0, ^bb1, ^bb2 +^bb1: + br ^bb3(%arg1 : memref<2xf32>) +^bb2: + %0 = alloc() : memref<2xf32> + "buffer_assignment_test.unary_lowered"(%arg1, %0) : (memref<2xf32>, memref<2xf32>) -> () + br ^bb3(%0 : memref<2xf32>) +^bb3(%1: memref<2xf32>): + "buffer_assignment_test.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} +// CHECK-NEXT: %[[ALLOC:.*]] = alloc() +// CHECK-NEXT: cond_br +// CHECK: "buffer_assignment_test.copy +// CHECK-NEXT: dealloc %[[ALLOC]] +// CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @emptyUsesValue +func @emptyUsesValue(%arg0: memref<4xf32>) { + %0 = alloc() : memref<4xf32> + return +} +// CHECK-NEXT: %[[ALLOC:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC]] +// CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @criticalEdge +func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) +^bb1: + %0 = alloc() : memref<2xf32> + "buffer_assignment_test.unary_lowered"(%arg1, %0) : (memref<2xf32>, memref<2xf32>) -> () + br ^bb2(%0 : memref<2xf32>) +^bb2(%1: memref<2xf32>): + "buffer_assignment_test.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} +// CHECK-NEXT: %[[ALLOC:.*]] = alloc() +// CHECK-NEXT: cond_br +// CHECK: "buffer_assignment_test.copy +// CHECK-NEXT: dealloc %[[ALLOC]] +// CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @invCriticalEdge +func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + "buffer_assignment_test.unary_lowered"(%arg1, %0) : (memref<2xf32>, memref<2xf32>) -> () + cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) +^bb1: + br ^bb2(%0 : memref<2xf32>) +^bb2(%1: memref<2xf32>): + "buffer_assignment_test.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} +// CHECK: dealloc +// CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @ifElse +func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + "buffer_assignment_test.unary_lowered"(%arg1, %0) : (memref<2xf32>, memref<2xf32>) -> () + cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) +^bb1(%1: memref<2xf32>, %2: memref<2xf32>): + br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) +^bb2(%3: memref<2xf32>, %4: memref<2xf32>): + br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) +^bb3(%5: memref<2xf32>, %6: memref<2xf32>): + %7 = alloc() : memref<2xf32> + "buffer_assignment_test.unary_lowered"(%5, %7) : (memref<2xf32>, memref<2xf32>) -> () + "buffer_assignment_test.copy"(%7, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} +// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered" +// CHECK: %[[SECOND_ALLOC:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered" +// CHECK-NEXT: dealloc %[[FIRST_ALLOC]] +// CHECK-NEXT: "buffer_assignment_test.copy +// CHECK-NEXT: dealloc %[[SECOND_ALLOC]] +// CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @ifElseNoUsers +func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + "buffer_assignment_test.unary_lowered"(%arg1, %0) : (memref<2xf32>, memref<2xf32>) -> () + cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) +^bb1(%1: memref<2xf32>, %2: memref<2xf32>): + br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) +^bb2(%3: memref<2xf32>, %4: memref<2xf32>): + br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) +^bb3(%5: memref<2xf32>, %6: memref<2xf32>): + "buffer_assignment_test.copy"(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} +// CHECK: dealloc +// CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @ifElseNested +func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + "buffer_assignment_test.unary_lowered"(%arg1, %0) : (memref<2xf32>, memref<2xf32>) -> () + cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) +^bb1(%1: memref<2xf32>, %2: memref<2xf32>): + br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>) +^bb2(%3: memref<2xf32>, %4: memref<2xf32>): + cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>) +^bb3(%5: memref<2xf32>): + br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>) +^bb4(%6: memref<2xf32>): + br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) +^bb5(%7: memref<2xf32>, %8: memref<2xf32>): + %9 = alloc() : memref<2xf32> + "buffer_assignment_test.unary_lowered"(%7, %9) : (memref<2xf32>, memref<2xf32>) -> () + "buffer_assignment_test.copy"(%9, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} +// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered" +// CHECK: %[[SECOND_ALLOC:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered" +// CHECK-NEXT: dealloc %[[FIRST_ALLOC]] +// CHECK-NEXT: "buffer_assignment_test.copy +// CHECK-NEXT: dealloc %[[SECOND_ALLOC]] +// CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @redundantOperations +func @redundantOperations(%arg0: memref<4xf32>) { + %0 = alloc() : memref<4xf32> + "buffer_assignment_test.unary_lowered"(%arg0, %0) : (memref<4xf32>, memref<4xf32>) -> () + %1 = alloc() : memref<4xf32> + "buffer_assignment_test.unary_lowered"(%0, %1) : (memref<4xf32>, memref<4xf32>) -> () + return +} +// CHECK: (%[[ARG0:.*]]: {{.*}}) +// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered"(%[[ARG0]], %[[FIRST_ALLOC]]) +// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc() +// CHECK-NEXT: "buffer_assignment_test.unary_lowered"(%[[FIRST_ALLOC]], %[[SECOND_ALLOC]]) +// CHECK-NEXT: dealloc +// CHECK-NEXT: dealloc +// CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc +func @moving_alloc_and_inserting_missing_dealloc(%cond: i1, %arg0: memref<2xf32>, %arg1: memref<2xf32>){ + cond_br %cond, ^bb1, ^bb2 +^bb1: + %0 = alloc() : memref<2xf32> + "buffer_assignment_test.unary_lowered"(%arg0, %0) : (memref<2xf32>, memref<2xf32>) -> () + br ^exit(%0 : memref<2xf32>) +^bb2: + + %1 = alloc() : memref<2xf32> + "buffer_assignment_test.unary_lowered"(%arg0, %1) : (memref<2xf32>, memref<2xf32>) -> () + br ^exit(%1 : memref<2xf32>) +^exit(%arg2: memref<2xf32>): + "bufer_assignment_test.copy"(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + return +} +// CHECK-NEXT: %{{.*}} = alloc() +// CHECK-NEXT: %{{.*}} = alloc() +// CHECK: "bufer_assignment_test.copy" +// CHECK-NEXT: dealloc +// CHECK-NEXT: dealloc +// CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @moving_invalid_dealloc_op_complex +func @moving_invalid_dealloc_op_complex(%cond: i1, %arg0: memref<2xf32>, %arg1: memref<2xf32>){ + cond_br %cond, ^bb1, ^bb2 +^bb1: + br ^exit(%arg0 : memref<2xf32>) +^bb2: + %1 = alloc() : memref<2xf32> + "buffer_assignment_test.unary_lowered"(%arg0, %1) : (memref<2xf32>, memref<2xf32>) -> () + dealloc %1 : memref<2xf32> + br ^exit(%1 : memref<2xf32>) +^exit(%arg2: memref<2xf32>): + "bufer_assignment_test.copy"(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + return +} +// CHECK-NEXT: %{{.*}} = alloc() +// CHECK: bufer_assignment_test.copy +// CHECK-NEXT: dealloc +// CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @inserting_missing_dealloc_simple +func @inserting_missing_dealloc_simple(%arg0 : memref<2xf32>, %arg1: memref<2xf32>){ + %0 = alloc() : memref<2xf32> + "buffer_assignment_test.unary_lowered"(%arg0, %0) : (memref<2xf32>, memref<2xf32>) -> () + "bufer_assignment_test.copy"(%0, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + return +} +// CHECK: bufer_assignment_test.copy +// CHECK-NEXT: dealloc + +// ----- + +// CHECK-LABEL: func @moving_invalid_dealloc_op +func @moving_invalid_dealloc_op(%arg0 : memref<2xf32>, %arg1: memref<2xf32>){ + %0 = alloc() : memref<2xf32> + "buffer_assignment_test.unary_lowered"(%arg0, %0) : (memref<2xf32>, memref<2xf32>) -> () + dealloc %0 : memref<2xf32> + "bufer_assignment_test.copy"(%0, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + return +} +// CHECK: bufer_assignment_test.copy +// CHECK-NEXT: dealloc \ No newline at end of file diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(MLIRTestTransforms TestAllReduceLowering.cpp + TestBufferAssignment.cpp TestCallGraph.cpp TestConstantFold.cpp TestConvertGPUKernelToCubin.cpp diff --git a/mlir/test/lib/Transforms/TestBufferAssignment.cpp b/mlir/test/lib/Transforms/TestBufferAssignment.cpp new file mode 100644 --- /dev/null +++ b/mlir/test/lib/Transforms/TestBufferAssignment.cpp @@ -0,0 +1,157 @@ +//===- TestBufferAssignment.cpp - Test for buffer assignment ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements logic for testing buffer assignment including its +// utility converters. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Operation.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Transforms/BufferAssignment.h" + +using namespace mlir; + +namespace { +/// This pass tests two provided operation converters, +/// FunctionAndBlockSignatureConverter and NonVoidToVoidReturnOpConverter, for +/// Buffer Assignment. +struct TestBufferAssignmentPreparationPass + : mlir::PassWrapper { + /// This dialect independent unary operation has been defined only for testing + /// buffer assignment. + class BufferAssignmentTestUnaryOp + : public Op { + public: + using Op::Op; + static StringRef getOperationName() { + return "buffer_assignment_test.unary"; + } + static void build(Builder *b, OperationState &state, Value source) { + state.addOperands(source); + } + }; + + /// This dialect independent lowered unary operation has been defined only for + /// testing buffer assignment. + class BufferAssignmentTestUnaryLoweredOp + : public Op::Impl> { + public: + using Op::Op; + static StringRef getOperationName() { + return "buffer_assignment_test.unary_lowered"; + } + static void build(Builder *b, OperationState &state, Value source, + Value target) { + state.addOperands(source); + state.addOperands(target); + } + }; + + /// This dialect independent copy operation has been defined only for testing + /// NonVoidToVoidReturnOpConverter + class BufferAssignmentTestCopyOp + : public Op::Impl> { + public: + using Op::Op; + static StringRef getOperationName() { + return "buffer_assignment_test.copy"; + } + static void build(Builder *b, OperationState &state, Value from, Value to) { + state.addOperands(from); + state.addOperands(to); + } + }; + + /// A simple converter that legalizes a BufferAssignmentTestUnaryOp to a + /// BufferAssignmentTestUnaryLoweredOp and creates buffer allocation for + /// the result of the computation. + class TestUnaryOpConverter : public BufferAssignmentOpConversionPattern< + BufferAssignmentTestUnaryOp> { + public: + using BufferAssignmentOpConversionPattern< + BufferAssignmentTestUnaryOp>::BufferAssignmentOpConversionPattern; + + // Performs the actual legalization conversion step. + LogicalResult + matchAndRewrite(BufferAssignmentTestUnaryOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + // Create a new buffer allocation using the current BufferAssignmentPlacer + // instance. + auto result = op.getResult(); + auto result_type = result.getType().dyn_cast(); + auto memref_type = + MemRefType::get(result_type.getShape(), result_type.getElementType()); + rewriter.restoreInsertionPoint( + bufferAssignment->computeAllocPosition(result)); + auto alloc = rewriter.create(op.getLoc(), memref_type); + + // Create the lowered operation and replace the old operation with a + // reference to the allocated buffer. + rewriter.create(op.getLoc(), + operands[0], alloc); + rewriter.replaceOp(op, {alloc}); + return success(); + } + }; + + void runOnFunction() override { + OwningRewritePatternList patterns; + auto funcOp = getOperation(); + auto context = funcOp.getContext(); + ConversionTarget target(*context); + BufferAssignmentPlacer bufferAssignmentPlacer(funcOp); + + // Specifying the legal and illegal operations. + context->allowUnregisteredDialects(true); + target.addIllegalOp(); + target.addLegalOp(); + target.addLegalOp(); + target.addLegalOp(); + target.addLegalOp(); + // TODO(dfki): ReturnOp can also be changed to TestReturnOp like + // BufferAssignmentTestCopyOp. + target.addDynamicallyLegalOp( + [](ReturnOp returnOp) { return returnOp.getNumOperands() == 0; }); + FunctionAndBlockSignatureConverter::addDynamicallyLegalFuncOp(target); + + // Adding patterns for testing this pass. + // clang-format off + patterns.insert< + FunctionAndBlockSignatureConverter, + TestUnaryOpConverter, + NonVoidToVoidReturnOpConverter + + >(context, &bufferAssignmentPlacer); + // clang-format on + + if (failed(applyPartialConversion(funcOp, target, patterns, nullptr))) { + funcOp.emitOpError() + << "Failed to apply buffer assignment preparation steps"; + } + }; +}; +} // end anonymous namespace + +namespace mlir { +/// This pass tests helper methods such as computeAllocPosition, +/// FunctionAndBlockSignatureConverter, NonVoidToVoidReturnOpConverter +/// conversion patterns. +void registerTestBufferAssignmentPreparationPass() { + PassRegistration( + "test-buffer-assignment-preparation", + "Tests buffer assignment helper methods including its " + "operation-conversion-patterns"); +} +} // end namespace mlir \ No newline at end of file diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -29,6 +29,7 @@ namespace mlir { // Defined in the test directory, no public header. +void registerBufferAssignmentPass(); void registerConvertToTargetEnvPass(); void registerInliner(); void registerMemRefBoundCheck(); @@ -41,6 +42,7 @@ void registerTestAffineDataCopyPass(); void registerTestAllReduceLoweringPass(); void registerTestAffineLoopUnswitchingPass(); +void registerTestBufferAssignmentPreparationPass(); void registerTestLinalgMatmulToVectorPass(); void registerTestLoopPermutationPass(); void registerTestCallGraphPass(); @@ -93,6 +95,7 @@ cl::desc("Allow operation with no registered dialects"), cl::init(false)); void registerTestPasses() { + registerBufferAssignmentPass(); registerConvertToTargetEnvPass(); registerInliner(); registerMemRefBoundCheck(); @@ -112,6 +115,7 @@ #if MLIR_CUDA_CONVERSIONS_ENABLED registerTestConvertGPUKernelToCubinPass(); #endif + registerTestBufferAssignmentPreparationPass(); registerTestDominancePass(); registerTestFunc(); registerTestGpuMemoryPromotionPass();