diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp --- a/mlir/lib/Transforms/BufferPlacement.cpp +++ b/mlir/lib/Transforms/BufferPlacement.cpp @@ -7,14 +7,15 @@ //===----------------------------------------------------------------------===// // // This file implements logic for computing correct alloc and dealloc positions. -// The main class is the BufferPlacementPass class that implements the -// underlying algorithm. In order to put allocations and deallocations at safe -// positions, it is significantly important to put them into the correct blocks. -// However, the liveness analysis does not pay attention to aliases, which can -// occur due to branches (and their associated block arguments) in general. For -// this purpose, BufferPlacement firstly finds all possible aliases for a single -// value (using the BufferPlacementAliasAnalysis class). Consider the following -// example: +// Furthermore, buffer placement also adds required new alloc and copy +// operations to ensure that all buffers are deallocated.The main class is the +// BufferPlacementPass class that implements the underlying algorithm. In order +// to put allocations and deallocations at safe positions, it is significantly +// important to put them into the correct blocks. However, the liveness analysis +// does not pay attention to aliases, which can occur due to branches (and their +// associated block arguments) in general. For this purpose, BufferPlacement +// firstly finds all possible aliases for a single value (using the +// BufferPlacementAliasAnalysis class). Consider the following example: // // ^bb0(%arg0): // cond_br %cond, ^bb1, ^bb2 @@ -28,16 +29,23 @@ // // Using liveness information on its own would cause us to place the allocs and // deallocs in the wrong block. This is due to the fact that %new_value will not -// be liveOut of its block. Instead, we have to place the alloc for %new_value -// in bb0 and its associated dealloc in exit. Using the class -// BufferPlacementAliasAnalysis, we will find out that %new_value has a -// potential alias %arg1. In order to find the dealloc position we have to find -// all potential aliases, iterate over their uses and find the common -// post-dominator block. In this block we can safely be sure that %new_value -// will die and can use liveness information to determine the exact operation -// after which we have to insert the dealloc. Finding the alloc position is -// highly similar and non- obvious. Again, we have to consider all potential -// aliases and find the common dominator block to place the alloc. +// be liveOut of its block. Instead, we can place the alloc for %new_value +// in bb0 and its associated dealloc in exit. Alternatively, the alloc can stay +// (or even has to stay due to additional dependencies) at this location and we +// have to free the buffer in the same block, because it cannot be freed in the +// post dominator. However, this requires a new copy buffer for %arg1 that will +// contain the actual contents. Using the class BufferPlacementAliasAnalysis, we +// will find out that %new_value has a potential alias %arg1. In order to find +// the dealloc position we have to find all potential aliases, iterate over +// their uses and find the common post-dominator block (note that additional +// copies and buffers remove potential aliases and will influence the placement +// of the deallocs). In all cases, the computed block can be safely used to free +// the %new_value buffer (may be exit or bb2) as it will die and we can use +// liveness information to determine the exact operation after which we have to +// insert the dealloc. Finding the alloc position is similar and non-obvious. +// However, the algorithm supports moving allocs to other places and introducing +// copy buffers and placing deallocs in safe places to ensure that all buffers +// will be freed in the end. // // TODO: // The current implementation does not support loops and the resulting code will @@ -49,8 +57,11 @@ //===----------------------------------------------------------------------===// #include "mlir/Transforms/BufferPlacement.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/IR/Operation.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/Passes.h" +#include "llvm/ADT/SetOperations.h" using namespace mlir; @@ -67,12 +78,21 @@ class BufferPlacementAliasAnalysis { public: using ValueSetT = SmallPtrSet; + using ValueMapT = llvm::DenseMap; public: /// Constructs a new alias analysis using the op provided. BufferPlacementAliasAnalysis(Operation *op) { build(op->getRegions()); } - /// Finds all immediate and indirect aliases this value could potentially + /// Find all immediate aliases this value could potentially have. + ValueMapT::const_iterator find(Value value) const { + return aliases.find(value); + } + + /// Returns the end iterator that can be used in combination with find. + ValueMapT::const_iterator end() const { return aliases.end(); } + + /// Find all immediate and indirect aliases this value could potentially /// have. Note that the resulting set will also contain the value provided as /// it is an alias of itself. ValueSetT resolve(Value value) const { @@ -81,6 +101,12 @@ return result; } + /// Removes the given values from all alias sets. + void remove(const SmallPtrSetImpl &aliasValues) { + for (auto &entry : aliases) + llvm::set_subtract(entry.second, aliasValues); + } + private: /// Recursively determines alias information for the given value. It stores /// all newly found potential aliases in the given result set. @@ -127,121 +153,313 @@ } /// Maps values to all immediate aliases this value can have. - llvm::DenseMap aliases; + ValueMapT aliases; }; //===----------------------------------------------------------------------===// -// BufferPlacementPositions +// BufferPlacement //===----------------------------------------------------------------------===// -/// Stores correct alloc and dealloc positions to place dialect-specific alloc -/// and dealloc operations. -struct BufferPlacementPositions { +// The main buffer placement analysis used to place allocs, copies and deallocs. +class BufferPlacement { public: - BufferPlacementPositions() - : allocPosition(nullptr), deallocPosition(nullptr) {} + using ValueSetT = BufferPlacementAliasAnalysis::ValueSetT; + + /// An intermediate representation of a single allocation node. + struct AllocEntry { + /// A reference to the associated allocation node. + Value allocValue; - /// Creates a new positions tuple including alloc and dealloc positions. - BufferPlacementPositions(Operation *allocPosition, Operation *deallocPosition) - : allocPosition(allocPosition), deallocPosition(deallocPosition) {} + /// The associated placement block in which the allocation should be + /// performed. + Block *placementBlock; - /// Returns the alloc position before which the alloc operation has to be - /// inserted. - Operation *getAllocPosition() const { return allocPosition; } + /// The associated dealloc operation (if any). + Operation *deallocOperation; + }; - /// Returns the dealloc position after which the dealloc operation has to be - /// inserted. - Operation *getDeallocPosition() const { return deallocPosition; } + using AllocEntryList = SmallVector; + +public: + BufferPlacement(Operation *op) + : operation(op), aliases(op), liveness(op), dominators(op), + postDominators(op) { + // Gather all allocation nodes + initBlockMapping(); + } + + /// Performs the actual placement/creation of all alloc, copy and dealloc + /// nodes. + void place() { + // Place all allocations. + placeAllocs(); + // Add additional allocations and copies that are required. + introduceCopies(); + // Find all associated dealloc nodes. + findDeallocs(); + // Place deallocations for all allocation entries. + placeDeallocs(); + } private: - Operation *allocPosition; - Operation *deallocPosition; -}; + /// Initializes the internal block mapping by discovering allocation nodes. It + /// maps all allocation nodes to their initial block in which they can be + /// safely allocated. + void initBlockMapping() { + operation->walk([&](MemoryEffectOpInterface opInterface) { + // Try to find a single allocation result. + SmallVector effects; + opInterface.getEffects(effects); -//===----------------------------------------------------------------------===// -// BufferPlacementAnalysis -//===----------------------------------------------------------------------===// + SmallVector allocateResultEffects; + llvm::copy_if(effects, std::back_inserter(allocateResultEffects), + [=](MemoryEffects::EffectInstance &it) { + Value value = it.getValue(); + return isa(it.getEffect()) && + value && value.isa(); + }); + // If there is one result only, we will be able to move the allocation and + // (possibly existing) deallocation ops. + if (allocateResultEffects.size() != 1) + return; + // Get allocation result. + auto allocResult = allocateResultEffects[0].getValue().cast(); + // Find the initial allocation block and register this result. + allocs.push_back( + {allocResult, getInitialAllocBlock(allocResult), nullptr}); + }); + } -// The main buffer placement analysis used to place allocs and deallocs. -class BufferPlacementAnalysis { -public: - using DeallocSetT = SmallPtrSet; + /// Computes a valid allocation position in a dominator (if possible) for the + /// given allocation result. + Block *getInitialAllocBlock(OpResult result) { + // Get all allocation operands as these operands are important for the + // allocation operation. + auto operands = result.getOwner()->getOperands(); + if (operands.size() < 1) + return findCommonDominator(result, aliases.resolve(result), dominators); + + // If this node has dependencies, check all dependent nodes with respect + // to a common post dominator in which all values are available. + ValueSetT dependencies(++operands.begin(), operands.end()); + return findCommonDominator(*operands.begin(), dependencies, postDominators); + } -public: - BufferPlacementAnalysis(Operation *op) - : operation(op), liveness(op), dominators(op), postDominators(op), - aliases(op) {} - - /// Computes the actual positions to place allocs and deallocs for the given - /// value. - BufferPlacementPositions - computeAllocAndDeallocPositions(OpResult result) const { - if (result.use_empty()) - return BufferPlacementPositions(result.getOwner(), result.getOwner()); - // Get all possible aliases. - auto possibleValues = aliases.resolve(result); - return BufferPlacementPositions(getAllocPosition(result, possibleValues), - getDeallocPosition(result, possibleValues)); + /// Finds correct alloc positions according to the algorithm described at + /// the top of the file for all alloc nodes that can be handled by this + /// analysis. + void placeAllocs() const { + for (auto &entry : allocs) { + Value alloc = entry.allocValue; + // Get the actual block to place the alloc and get liveness information + // for the placement block. + Block *placementBlock = entry.placementBlock; + // We have to ensure that we place the alloc before its first use in this + // block. + const LivenessBlockInfo *livenessInfo = + liveness.getLiveness(placementBlock); + Operation *startOperation = livenessInfo->getStartOperation(alloc); + // Check whether the start operation lies in the desired placement block. + // If not, we will use the terminator as this is the last operation in + // this block. + if (startOperation->getBlock() != placementBlock) + startOperation = placementBlock->getTerminator(); + + // Move the alloc in front of the start operation. + Operation *allocOperation = alloc.getDefiningOp(); + allocOperation->moveBefore(startOperation); + } } - /// Finds all associated dealloc nodes for the alloc nodes using alias - /// information. - DeallocSetT findAssociatedDeallocs(OpResult allocResult) const { - DeallocSetT result; - auto possibleValues = aliases.resolve(allocResult); - for (Value alias : possibleValues) - for (Operation *op : alias.getUsers()) { - // Check for an existing memory effect interface. - auto effectInstance = dyn_cast(op); - if (!effectInstance) + /// Introduces required allocs and copy operations to avoid memory leaks. + void introduceCopies() { + // Initialize the set of block arguments that require a dedicated memory + // free operation since their arguments cannot be safely deallocated in a + // post dominator. + SmallPtrSet blockArgsToFree; + llvm::SmallDenseSet> visitedBlockArgs; + SmallVector, 8> toProcess; + + // Check dominance relation for proper dominance properties. If the given + // value node does not dominate an alias, we will have to create a copy in + // order to free all buffers that can potentially leak into a post + // dominator. + auto findUnsafeValues = [&](Value source, Block *definingBlock) { + auto it = aliases.find(source); + if (it == aliases.end()) + return; + for (Value value : it->second) { + auto blockArg = value.cast(); + if (blockArgsToFree.count(blockArg) > 0) continue; - // Check whether the associated value will be freed using the current - // operation. - SmallVector effects; - effectInstance.getEffectsOnValue(alias, effects); - if (llvm::any_of(effects, [=](MemoryEffects::EffectInstance &it) { - return isa(it.getEffect()); - })) - result.insert(op); + // Check whether we have to free this particular block argument. + if (!dominators.dominates(definingBlock, blockArg.getOwner())) { + toProcess.emplace_back(blockArg, blockArg.getParentBlock()); + blockArgsToFree.insert(blockArg); + } else if (visitedBlockArgs.insert({blockArg, definingBlock}).second) + toProcess.emplace_back(blockArg, definingBlock); } - return result; + }; + + // Detect possibly unsafe aliases starting from all allocations. + for (auto &entry : allocs) + findUnsafeValues(entry.allocValue, entry.placementBlock); + + // Try to find block arguments that require an explicit free operation + // until we reach a fix point. + while (!toProcess.empty()) { + auto current = toProcess.pop_back_val(); + findUnsafeValues(std::get<0>(current), std::get<1>(current)); + } + + // Update buffer aliases to ensure that we free all buffers and block + // arguments at the correct locations. + aliases.remove(blockArgsToFree); + + // Add new allocs and additional copy operations. + for (BlockArgument blockArg : blockArgsToFree) { + Block *block = blockArg.getOwner(); + + // Allocate a buffer for the current block argument in the block of + // the associated value (which will be a predecessor block by + // definition). + for (auto it = block->pred_begin(), e = block->pred_end(); it != e; + ++it) { + // Get the terminator and the value that will be passed to our + // argument. + Operation *terminator = (*it)->getTerminator(); + auto successorOperand = + cast(terminator) + .getMutableSuccessorOperands(it.getSuccessorIndex()) + .getValue() + .slice(blockArg.getArgNumber(), 1); + Value sourceValue = ((OperandRange)successorOperand)[0]; + + // Create a new alloc at the current location of the terminator. + auto memRefType = sourceValue.getType().cast(); + OpBuilder builder(terminator); + + // Extract information about dynamically shaped types by + // extracting their dynamic dimensions. + SmallVector dynamicOperands; + for (auto shapeElement : llvm::enumerate(memRefType.getShape())) { + if (!ShapedType::isDynamic(shapeElement.value())) + continue; + dynamicOperands.push_back(builder.create( + terminator->getLoc(), sourceValue, shapeElement.index())); + } + + // TODO: provide a generic interface to create dialect-specific + // Alloc and CopyOp nodes. + auto alloc = builder.create(terminator->getLoc(), memRefType, + dynamicOperands); + // Wire new alloc and successor operand. + successorOperand.assign(alloc); + // Create a new copy operation that copies to contents of the old + // allocation to the new one. + builder.create(terminator->getLoc(), sourceValue, + alloc); + } + + // Register the block argument to require a final dealloc. Note that + // we do not have to assign a block here since we do not want to + // move the allocation node to another location. + allocs.push_back({blockArg, nullptr, nullptr}); + } } - /// Dumps the buffer placement information to the given stream. - void print(raw_ostream &os) const { - os << "// ---- Buffer Placement -----\n"; - - for (Region ®ion : operation->getRegions()) - for (Block &block : region) - for (Operation &operation : block) - for (OpResult result : operation.getResults()) { - BufferPlacementPositions positions = - computeAllocAndDeallocPositions(result); - os << "Positions for "; - result.print(os); - os << "\n Alloc: "; - positions.getAllocPosition()->print(os); - os << "\n Dealloc: "; - positions.getDeallocPosition()->print(os); - os << "\n"; - } + /// Finds associated deallocs that can be linked to our allocation nodes (if + /// any). + void findDeallocs() { + for (auto &entry : allocs) { + auto userIt = + llvm::find_if(entry.allocValue.getUsers(), [&](Operation *user) { + auto effectInterface = dyn_cast(user); + if (!effectInterface) + return false; + // Try to find a free effect that is applied to one of our values + // that will be automatically freed by our pass. + SmallVector effects; + effectInterface.getEffectsOnValue(entry.allocValue, effects); + return llvm::any_of( + effects, [&](MemoryEffects::EffectInstance &it) { + return isa(it.getEffect()); + }); + }); + // Assign the associated dealloc operation (if any). + if (userIt != entry.allocValue.user_end()) + entry.deallocOperation = *userIt; + } } -private: - /// Finds a correct placement block to store alloc/dealloc node according to - /// the algorithm described at the top of the file. It supports dominator and + /// Finds correct dealloc positions according to the algorithm described at + /// the top of the file for all alloc nodes and block arguments that can be + /// handled by this analysis. + void placeDeallocs() const { + // Move or insert deallocs using the previously computed information. + // These deallocations will be linked to their associated allocation nodes + // since they don't have any aliases that can (potentially) increase their + // liveness. + for (auto &entry : allocs) { + Value alloc = entry.allocValue; + auto aliasesSet = aliases.resolve(alloc); + assert(aliasesSet.size() > 0 && "must contain at least one alias"); + + // Determine the actual block to place the dealloc and get liveness + // information. + Block *placementBlock = + findCommonDominator(alloc, aliasesSet, postDominators); + const LivenessBlockInfo *livenessInfo = + liveness.getLiveness(placementBlock); + + // We have to ensure that the dealloc will be after the last use of all + // aliases of the given value. We first assume that there are no uses in + // the placementBlock and that we can safely place the dealloc at the + // beginning. + Operation *endOperation = &placementBlock->front(); + // Iterate over all aliases and ensure that the endOperation will point + // to the last operation of all potential aliases in the placementBlock. + for (Value alias : aliasesSet) { + Operation *aliasEndOperation = + livenessInfo->getEndOperation(alias, endOperation); + // Check whether the aliasEndOperation lies in the desired block and + // whether it is behind the current endOperation. If yes, this will be + // the new endOperation. + if (aliasEndOperation->getBlock() == placementBlock && + endOperation->isBeforeInBlock(aliasEndOperation)) + endOperation = aliasEndOperation; + } + // endOperation is the last operation behind which we can safely store + // the dealloc taking all potential aliases into account. + + // If there is an existing dealloc, move it to the right place. + if (entry.deallocOperation) { + entry.deallocOperation->moveAfter(endOperation); + } else { + // If the Dealloc position is at the terminator operation of the block, + // then the value should escape from a deallocation. + Operation *nextOp = endOperation->getNextNode(); + if (!nextOp) + continue; + // If there is no dealloc node, insert one in the right place. + OpBuilder builder(nextOp); + builder.create(alloc.getLoc(), alloc); + } + } + } + + /// Finds a common dominator for the given value while taking the positions + /// of the values in the value set into account. It supports dominator and /// post-dominator analyses via template arguments. template - Block * - findPlacementBlock(OpResult result, - const BufferPlacementAliasAnalysis::ValueSetT &aliases, - const DominatorT &doms) const { + Block *findCommonDominator(Value value, const ValueSetT &values, + const DominatorT &doms) const { // Start with the current block the value is defined in. - Block *dom = result.getOwner()->getBlock(); + Block *dom = value.getParentBlock(); // Iterate over all aliases and their uses to find a safe placement block // according to the given dominator information. - for (Value alias : aliases) - for (Operation *user : alias.getUsers()) { + for (Value childValue : values) + for (Operation *user : childValue.getUsers()) { // Move upwards in the dominator tree to find an appropriate // dominator block that takes the current use into account. dom = doms.findNearestCommonDominator(dom, user->getBlock()); @@ -249,86 +467,24 @@ return dom; } - /// Finds a correct alloc position according to the algorithm described at - /// the top of the file. - Operation *getAllocPosition( - OpResult result, - const BufferPlacementAliasAnalysis::ValueSetT &aliases) const { - // Determine the actual block to place the alloc and get liveness - // information. - Block *placementBlock = findPlacementBlock(result, aliases, dominators); - const LivenessBlockInfo *livenessInfo = - liveness.getLiveness(placementBlock); - - // We have to ensure that the alloc will be before the first use of all - // aliases of the given value. We first assume that there are no uses in the - // placementBlock and that we can safely place the alloc before the - // terminator at the end of the block. - Operation *startOperation = placementBlock->getTerminator(); - // Iterate over all aliases and ensure that the startOperation will point to - // the first operation of all potential aliases in the placementBlock. - for (Value alias : aliases) { - Operation *aliasStartOperation = livenessInfo->getStartOperation(alias); - // Check whether the aliasStartOperation lies in the desired block and - // whether it is before the current startOperation. If yes, this will be - // the new startOperation. - if (aliasStartOperation->getBlock() == placementBlock && - aliasStartOperation->isBeforeInBlock(startOperation)) - startOperation = aliasStartOperation; - } - // startOperation is the first operation before which we can safely store - // the alloc taking all potential aliases into account. - return startOperation; - } - - /// Finds a correct dealloc position according to the algorithm described at - /// the top of the file. - Operation *getDeallocPosition( - OpResult result, - const BufferPlacementAliasAnalysis::ValueSetT &aliases) const { - // Determine the actual block to place the dealloc and get liveness - // information. - Block *placementBlock = findPlacementBlock(result, aliases, postDominators); - const LivenessBlockInfo *livenessInfo = - liveness.getLiveness(placementBlock); - - // We have to ensure that the dealloc will be after the last use of all - // aliases of the given value. We first assume that there are no uses in the - // placementBlock and that we can safely place the dealloc at the beginning. - Operation *endOperation = &placementBlock->front(); - // Iterate over all aliases and ensure that the endOperation will point to - // the last operation of all potential aliases in the placementBlock. - for (Value alias : aliases) { - Operation *aliasEndOperation = - livenessInfo->getEndOperation(alias, endOperation); - // Check whether the aliasEndOperation lies in the desired block and - // whether it is behind the current endOperation. If yes, this will be the - // new endOperation. - if (aliasEndOperation->getBlock() == placementBlock && - endOperation->isBeforeInBlock(aliasEndOperation)) - endOperation = aliasEndOperation; - } - // endOperation is the last operation behind which we can safely store the - // dealloc taking all potential aliases into account. - return endOperation; - } - /// The operation this transformation was constructed from. Operation *operation; - /// The underlying liveness analysis to compute fine grained information about - /// alloc and dealloc positions. + /// Alias information that can be updated during the insertion of copies. + BufferPlacementAliasAnalysis aliases; + + /// Maps allocation nodes to their associated blocks. + AllocEntryList allocs; + + /// The underlying liveness analysis to compute fine grained information + /// about alloc and dealloc positions. Liveness liveness; - /// The dominator analysis to place allocs in the appropriate blocks. + /// The dominator analysis to place deallocs in the appropriate blocks. DominanceInfo dominators; /// The post dominator analysis to place deallocs in the appropriate blocks. PostDominanceInfo postDominators; - - /// The internal alias analysis to ensure that allocs and deallocs take all - /// their potential aliases into account. - BufferPlacementAliasAnalysis aliases; }; //===----------------------------------------------------------------------===// @@ -336,73 +492,16 @@ //===----------------------------------------------------------------------===// /// The actual buffer placement pass that moves alloc and dealloc nodes into -/// the right positions. It uses the algorithm described at the top of the file. +/// the right positions. It uses the algorithm described at the top of the +/// file. struct BufferPlacementPass : mlir::PassWrapper { - void runOnFunction() override { - // Get required analysis information first. - auto &analysis = getAnalysis(); - - // Compute an initial placement of all nodes. - llvm::SmallVector, 16> - placements; - getFunction().walk([&](MemoryEffectOpInterface op) { - // Try to find a single allocation result. - SmallVector effects; - op.getEffects(effects); - - SmallVector allocateResultEffects; - llvm::copy_if(effects, std::back_inserter(allocateResultEffects), - [=](MemoryEffects::EffectInstance &it) { - Value value = it.getValue(); - return isa(it.getEffect()) && - value && value.isa(); - }); - // If there is one result only, we will be able to move the allocation and - // (possibly existing) deallocation ops. - if (allocateResultEffects.size() == 1) { - // Insert allocation result. - auto allocResult = allocateResultEffects[0].getValue().cast(); - placements.emplace_back( - allocResult, analysis.computeAllocAndDeallocPositions(allocResult)); - } - }); - // Move alloc (and dealloc - if any) nodes into the right places and insert - // dealloc nodes if necessary. - for (auto &entry : placements) { - // Find already associated dealloc nodes. - OpResult alloc = entry.first; - auto deallocs = analysis.findAssociatedDeallocs(alloc); - if (deallocs.size() > 1) { - emitError(alloc.getLoc(), - "not supported number of associated dealloc operations"); - return; - } - - // Move alloc node to the right place. - BufferPlacementPositions &positions = entry.second; - Operation *allocOperation = alloc.getOwner(); - allocOperation->moveBefore(positions.getAllocPosition()); - - // If there is an existing dealloc, move it to the right place. - Operation *nextOp = positions.getDeallocPosition()->getNextNode(); - // If the Dealloc position is at the terminator operation of the block, - // then the value should escape from a deallocation. - if (!nextOp) { - assert(deallocs.empty() && - "There should be no dealloc for the returned buffer"); - continue; - } - if (deallocs.size()) { - (*deallocs.begin())->moveBefore(nextOp); - } else { - // If there is no dealloc node, insert one in the right place. - OpBuilder builder(nextOp); - builder.create(allocOperation->getLoc(), alloc); - } - } - }; + void runOnFunction() override { + // Place all required alloc, copy and dealloc nodes. + BufferPlacement placement(getFunction()); + placement.place(); + } }; } // end anonymous namespace diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir --- a/mlir/test/Transforms/buffer-placement.mlir +++ b/mlir/test/Transforms/buffer-placement.mlir @@ -1,7 +1,8 @@ // RUN: mlir-opt -buffer-placement -split-input-file %s | FileCheck %s -// This file checks the behaviour of BufferPlacement pass for moving Alloc and Dealloc -// operations and inserting the missing the DeallocOps in their correct positions. +// This file checks the behaviour of BufferPlacement pass for moving Alloc and +// Dealloc operations and inserting the missing the DeallocOps in their correct +// positions. // Test Case: // bb0 @@ -9,8 +10,9 @@ // bb1 bb2 <- Initial position of AllocOp // \ / // bb3 -// BufferPlacement Expected Behaviour: It should move the existing AllocOp to the entry block, -// and insert a DeallocOp at the exit block after CopyOp since %1 is an alias for %0 and %arg1. +// BufferPlacement Expected Behaviour: It should move the existing AllocOp to +// the entry block, and insert a DeallocOp at the exit block after CopyOp since +// %1 is an alias for %0 and %arg1. #map0 = affine_map<(d0) -> (d0)> @@ -21,7 +23,11 @@ br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg1, %0 { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 linalg.yield %tmp1 : f32 @@ -40,8 +46,154 @@ // ----- +// Test Case: +// bb0 +// / \ +// bb1 bb2 <- Initial position of AllocOp +// \ / +// bb3 +// BufferPlacement Expected Behaviour: It should not move the existing AllocOp +// to any other block since the alloc has a dynamic dependency to block argument +// %0 in bb2. Since the dynamic type is passed to bb3 via the block argument %2, +// it is currently required to allocate a temporary buffer for %2 that gets +// copies of %arg0 and %1 with their appropriate shape dimensions. The copy +// buffer deallocation will be applied to %2 in block bb3. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @condBranchDynamicType +func @condBranchDynamicType( + %arg0: i1, + %arg1: memref, + %arg2: memref, + %arg3: index) { + cond_br %arg0, ^bb1, ^bb2(%arg3: index) +^bb1: + br ^bb3(%arg1 : memref) +^bb2(%0: index): + %1 = alloc(%0) : memref + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg1, %1 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref, memref + br ^bb3(%1 : memref) +^bb3(%2: memref): + "linalg.copy"(%2, %arg2) : (memref, memref) -> () + return +} + +// CHECK-NEXT: cond_br +// CHECK: %[[DIM0:.*]] = dim +// CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[DIM0]]) +// CHECK-NEXT: linalg.copy(%{{.*}}, %[[ALLOC0]]) +// CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) +// CHECK-NEXT: %[[ALLOC1:.*]] = alloc(%[[IDX]]) +// CHECK-NEXT: linalg.generic +// CHECK: %[[DIM1:.*]] = dim %[[ALLOC1]] +// CHECK-NEXT: %[[ALLOC2:.*]] = alloc(%[[DIM1]]) +// CHECK-NEXT: linalg.copy(%[[ALLOC1]], %[[ALLOC2]]) +// CHECK-NEXT: dealloc %[[ALLOC1]] +// CHECK-NEXT: br ^bb3 +// CHECK-NEXT: ^bb3(%[[ALLOC3:.*]]:{{.*}}) +// CHECK: linalg.copy(%[[ALLOC3]], +// CHECK-NEXT: dealloc %[[ALLOC3]] +// CHECK-NEXT: return + +// ----- + +// Test Case: +// bb0 +// / \ +// bb1 bb2 <- Initial position of AllocOp +// | / \ +// | bb3 bb4 +// | \ / +// \ bb5 +// \ / +// bb6 +// | +// bb7 +// BufferPlacement Expected Behaviour: It should not move the existing AllocOp +// to any other block since the alloc has a dynamic dependency to block argument +// %0 in bb2. Since the dynamic type is passed to bb5 via the block argument %2 +// and to bb6 via block argument %3, it is currently required to allocate +// temporary buffers for %2 and %3 that gets copies of %1 and %arg0 1 with their +// appropriate shape dimensions. The copy buffer deallocations will be applied +// to %2 in block bb5 and to %3 in block bb6. Furthermore, there should be no +// copy inserted for %4. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @condBranchDynamicType +func @condBranchDynamicTypeNested( + %arg0: i1, + %arg1: memref, + %arg2: memref, + %arg3: index) { + cond_br %arg0, ^bb1, ^bb2(%arg3: index) +^bb1: + br ^bb6(%arg1 : memref) +^bb2(%0: index): + %1 = alloc(%0) : memref + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg1, %1 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref, memref + cond_br %arg0, ^bb3, ^bb4 +^bb3: + br ^bb5(%1 : memref) +^bb4: + br ^bb5(%1 : memref) +^bb5(%2: memref): + br ^bb6(%2 : memref) +^bb6(%3: memref): + br ^bb7(%3 : memref) +^bb7(%4: memref): + "linalg.copy"(%4, %arg2) : (memref, memref) -> () + return +} + +// CHECK-NEXT: cond_br +// CHECK: ^bb1 +// CHECK: %[[DIM0:.*]] = dim +// CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[DIM0]]) +// CHECK-NEXT: linalg.copy(%{{.*}}, %[[ALLOC0]]) +// CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) +// CHECK-NEXT: %[[ALLOC1:.*]] = alloc(%[[IDX]]) +// CHECK-NEXT: linalg.generic +// CHECK: cond_br +// CHECK: ^bb3: +// CHECK-NEXT: br ^bb5(%[[ALLOC1]]{{.*}}) +// CHECK: ^bb4: +// CHECK-NEXT: br ^bb5(%[[ALLOC1]]{{.*}}) +// CHECK-NEXT: ^bb5(%[[ALLOC2:.*]]:{{.*}}) +// CHECK: %[[DIM2:.*]] = dim %[[ALLOC2]] +// CHECK-NEXT: %[[ALLOC3:.*]] = alloc(%[[DIM2]]) +// CHECK-NEXT: linalg.copy(%[[ALLOC2]], %[[ALLOC3]]) +// CHECK-NEXT: dealloc %[[ALLOC1]] +// CHECK-NEXT: br ^bb6(%[[ALLOC3]]{{.*}}) +// CHECK-NEXT: ^bb6(%[[ALLOC4:.*]]:{{.*}}) +// CHECK-NEXT: br ^bb7(%[[ALLOC4]]{{.*}}) +// CHECK-NEXT: ^bb7(%[[ALLOC5:.*]]:{{.*}}) +// CHECK: linalg.copy(%[[ALLOC5]], +// CHECK-NEXT: dealloc %[[ALLOC4]] +// CHECK-NEXT: return + +// ----- + // Test Case: Existing AllocOp with no users. -// BufferPlacement Expected Behaviour: It should insert a DeallocOp right before ReturnOp. +// BufferPlacement Expected Behaviour: It should insert a DeallocOp right before +// ReturnOp. // CHECK-LABEL: func @emptyUsesValue func @emptyUsesValue(%arg0: memref<4xf32>) { @@ -60,8 +212,9 @@ // | bb1 <- Initial position of AllocOp // \ / // bb2 -// BufferPlacement Expected Behaviour: It should move the existing AllocOp to the entry block -// and insert a DeallocOp at the exit block after CopyOp since %1 is an alias for %0 and %arg1. +// BufferPlacement Expected Behaviour: It should move the existing AllocOp to +// the entry block and insert a DeallocOp at the exit block after CopyOp since +// %1 is an alias for %0 and %arg1. #map0 = affine_map<(d0) -> (d0)> @@ -70,7 +223,11 @@ cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) ^bb1: %0 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg1, %0 { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 linalg.yield %tmp1 : f32 @@ -95,15 +252,20 @@ // | bb1 // \ / // bb2 -// BufferPlacement Expected Behaviour: It shouldn't move the alloc position. It only inserts -// a DeallocOp at the exit block after CopyOp since %1 is an alias for %0 and %arg1. +// BufferPlacement Expected Behaviour: It shouldn't move the alloc position. It +// only inserts a DeallocOp at the exit block after CopyOp since %1 is an alias +// for %0 and %arg1. #map0 = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @invCriticalEdge func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg1, %0 { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 linalg.yield %tmp1 : f32 @@ -127,28 +289,39 @@ // bb1 bb2 // \ / // bb3 <- Initial position of the second AllocOp -// BufferPlacement Expected Behaviour: It shouldn't move the AllocOps. It only inserts two missing DeallocOps in the exit block. -// %5 is an alias for %0. Therefore, the DeallocOp for %0 should occur after the last GenericOp. The Dealloc for %7 should -// happen after the CopyOp. +// BufferPlacement Expected Behaviour: It shouldn't move the AllocOps. It only +// inserts two missing DeallocOps in the exit block. %5 is an alias for %0. +// Therefore, the DeallocOp for %0 should occur after the last GenericOp. The +// Dealloc for %7 should happen after the CopyOp. #map0 = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @ifElse func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg1, %0 { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 linalg.yield %tmp1 : f32 }: memref<2xf32>, memref<2xf32> - cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) + cond_br %arg0, + ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), + ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) ^bb1(%1: memref<2xf32>, %2: memref<2xf32>): br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) ^bb2(%3: memref<2xf32>, %4: memref<2xf32>): br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) ^bb3(%5: memref<2xf32>, %6: memref<2xf32>): %7 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %5, %7 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %5, %7 { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 linalg.yield %tmp2 : f32 @@ -162,7 +335,7 @@ // CHECK: %[[SECOND_ALLOC:.*]] = alloc() // CHECK-NEXT: linalg.generic // CHECK: dealloc %[[FIRST_ALLOC]] -// CHECK-NEXT: linalg.copy +// CHECK: linalg.copy // CHECK-NEXT: dealloc %[[SECOND_ALLOC]] // CHECK-NEXT: return @@ -174,20 +347,27 @@ // bb1 bb2 // \ / // bb3 -// BufferPlacement Expected Behaviour: It shouldn't move the AllocOp. It only inserts a missing DeallocOp -// in the exit block since %5 or %6 are the latest aliases of %0. +// BufferPlacement Expected Behaviour: It shouldn't move the AllocOp. It only +// inserts a missing DeallocOp in the exit block since %5 or %6 are the latest +// aliases of %0. #map0 = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @ifElseNoUsers func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg1, %0 { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 linalg.yield %tmp1 : f32 }: memref<2xf32>, memref<2xf32> - cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) + cond_br %arg0, + ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), + ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) ^bb1(%1: memref<2xf32>, %2: memref<2xf32>): br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) ^bb2(%3: memref<2xf32>, %4: memref<2xf32>): @@ -197,7 +377,8 @@ return } -// CHECK: dealloc +// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() +// CHECK: dealloc %[[FIRST_ALLOC]] // CHECK-NEXT: return // ----- @@ -219,12 +400,18 @@ // CHECK-LABEL: func @ifElseNested func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg1, %0 { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 linalg.yield %tmp1 : f32 }: memref<2xf32>, memref<2xf32> - cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) + cond_br %arg0, + ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), + ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) ^bb1(%1: memref<2xf32>, %2: memref<2xf32>): br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>) ^bb2(%3: memref<2xf32>, %4: memref<2xf32>): @@ -235,7 +422,11 @@ br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) ^bb5(%7: memref<2xf32>, %8: memref<2xf32>): %9 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %7, %9 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %7, %9 { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 linalg.yield %tmp2 : f32 @@ -249,28 +440,36 @@ // CHECK: %[[SECOND_ALLOC:.*]] = alloc() // CHECK-NEXT: linalg.generic // CHECK: dealloc %[[FIRST_ALLOC]] -// CHECK-NEXT: linalg.copy +// CHECK: linalg.copy // CHECK-NEXT: dealloc %[[SECOND_ALLOC]] // CHECK-NEXT: return // ----- // Test Case: Dead operations in a single block. -// BufferPlacement Expected Behaviour: It shouldn't move the AllocOps. It only inserts the two missing DeallocOps -// after the last GenericOp. +// BufferPlacement Expected Behaviour: It shouldn't move the AllocOps. It only +// inserts the two missing DeallocOps after the last GenericOp. #map0 = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @redundantOperations func @redundantOperations(%arg0: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %0 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg0, %0 { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 linalg.yield %tmp1 : f32 }: memref<2xf32>, memref<2xf32> %1 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %0, %1 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %0, %1 { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 linalg.yield %tmp2 : f32 @@ -290,22 +489,30 @@ // ----- // Test Case: -// bb0 -// / \ -// Initial position of the first AllocOp -> bb1 bb2 <- Initial position of the second AllocOp -// \ / -// bb3 -// BufferPlacement Expected Behaviour: Both AllocOps should be moved to the entry block. Both missing DeallocOps should be moved to -// the exit block after CopyOp since %arg2 is an alias for %0 and %1. +// bb0 +// / \ +// Initial pos of the 1st AllocOp -> bb1 bb2 <- Initial pos of the 2nd AllocOp +// \ / +// bb3 +// BufferPlacement Expected Behaviour: Both AllocOps should be moved to the +// entry block. Both missing DeallocOps should be moved to the exit block after +// CopyOp since %arg2 is an alias for %0 and %1. #map0 = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc -func @moving_alloc_and_inserting_missing_dealloc(%cond: i1, %arg0: memref<2xf32>, %arg1: memref<2xf32>){ +func @moving_alloc_and_inserting_missing_dealloc( + %cond: i1, + %arg0: memref<2xf32>, + %arg1: memref<2xf32>) { cond_br %cond, ^bb1, ^bb2 ^bb1: %0 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %0 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg0, %0 { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 linalg.yield %tmp1 : f32 @@ -313,7 +520,11 @@ br ^exit(%0 : memref<2xf32>) ^bb2: %1 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %1 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg0, %1 { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 linalg.yield %tmp2 : f32 @@ -333,25 +544,33 @@ // ----- -// Test Case: Invalid position of the DeallocOp. There is a user after deallocation. +// Test Case: Invalid position of the DeallocOp. There is a user after +// deallocation. // bb0 // / \ // bb1 bb2 <- Initial position of AllocOp // \ / // bb3 -// BufferPlacement Expected Behaviour: It should move the AllocOp to the entry block. -// The existing DeallocOp should be moved to exit block. +// BufferPlacement Expected Behaviour: It should move the AllocOp to the entry +// block. The existing DeallocOp should be moved to exit block. #map0 = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @moving_invalid_dealloc_op_complex -func @moving_invalid_dealloc_op_complex(%cond: i1, %arg0: memref<2xf32>, %arg1: memref<2xf32>){ +func @moving_invalid_dealloc_op_complex( + %cond: i1, + %arg0: memref<2xf32>, + %arg1: memref<2xf32>) { cond_br %cond, ^bb1, ^bb2 ^bb1: br ^exit(%arg0 : memref<2xf32>) ^bb2: %1 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %1 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg0, %1 { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 linalg.yield %tmp1 : f32 @@ -375,9 +594,15 @@ #map0 = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @inserting_missing_dealloc_simple -func @inserting_missing_dealloc_simple(%arg0 : memref<2xf32>, %arg1: memref<2xf32>){ +func @inserting_missing_dealloc_simple( + %arg0 : memref<2xf32>, + %arg1: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %0 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg0, %0 { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 linalg.yield %tmp1 : f32 @@ -391,14 +616,19 @@ // ----- -// Test Case: Moving invalid DeallocOp (there is a user after deallocation) in a single block. +// Test Case: Moving invalid DeallocOp (there is a user after deallocation) in a +// single block. #map0 = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @moving_invalid_dealloc_op -func @moving_invalid_dealloc_op(%arg0 : memref<2xf32>, %arg1: memref<2xf32>){ +func @moving_invalid_dealloc_op(%arg0 : memref<2xf32>, %arg1: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %0 { + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg0, %0 { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 linalg.yield %tmp1 : f32