diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp --- a/mlir/lib/Transforms/BufferPlacement.cpp +++ b/mlir/lib/Transforms/BufferPlacement.cpp @@ -47,13 +47,6 @@ // copy buffers and placing deallocs in safe places to ensure that all buffers // will be freed in the end. // -// TODO: -// The current implementation does not support loops and the resulting code will -// be invalid with respect to program semantics. The only thing that is -// currently missing is a high-level loop analysis that allows us to move allocs -// and deallocs outside of the loop blocks. Furthermore, it doesn't also accept -// functions which return buffers already. -// //===----------------------------------------------------------------------===// #include "mlir/Transforms/BufferPlacement.h" @@ -76,6 +69,22 @@ } } +/// Wrapper for the actual `RegionBranchOpInterface.getSuccessorRegions` +/// function that initializes the required `operandAttributes` array. +void getSuccessorRegions(RegionBranchOpInterface regionInterface, + llvm::Optional index, + SmallVectorImpl &successors) { + // Create an empty attribute for each operand to comply with the + // `getSuccessorRegions` interface definition that requires a single + // attribute per operand. + SmallVector operandAttributes( + regionInterface.getOperation()->getNumOperands()); + + // Get all successor regions using the temporarily allocated + // `operandAttributes`. + regionInterface.getSuccessorRegions(index, operandAttributes, successors); +} + namespace { //===----------------------------------------------------------------------===// // BufferPlacementAliasAnalysis @@ -165,16 +174,10 @@ // Query the RegionBranchOpInterface to find potential successor regions. op->walk([&](RegionBranchOpInterface regionInterface) { - // Create an empty attribute for each operand to comply with the - // `getSuccessorRegions` interface definition that requires a single - // attribute per operand. - SmallVector operandAttributes( - regionInterface.getOperation()->getNumOperands()); - // Extract all entry regions and wire all initial entry successor inputs. SmallVector entrySuccessors; - regionInterface.getSuccessorRegions(/*index=*/llvm::None, - operandAttributes, entrySuccessors); + getSuccessorRegions(regionInterface, /*index=*/llvm::None, + entrySuccessors); for (RegionSuccessor &entrySuccessor : entrySuccessors) { // Wire the entry region's successor arguments with the initial // successor inputs. @@ -190,8 +193,8 @@ // Iterate over all successor region entries that are reachable from the // current region. SmallVector successorRegions; - regionInterface.getSuccessorRegions( - region.getRegionNumber(), operandAttributes, successorRegions); + getSuccessorRegions(regionInterface, region.getRegionNumber(), + successorRegions); for (RegionSuccessor &successorRegion : successorRegions) { // Iterate over all immediate terminator operations and wire the // successor inputs with the operands of each terminator. @@ -208,6 +211,118 @@ ValueMapT aliases; }; +/// A straight-forward program analysis which detects loop backedges induced by +/// explicit control flow and RegionBranchOpInterfaces. +class Backedges { +public: + using BlockSetT = SmallPtrSet; + using BackedgeSetT = llvm::DenseSet>; + +public: + /// Constructs a new backedges analysis using the op provided. + Backedges(Operation *op) { recurse(op, op->getBlock()); } + + /// Returns the start iterator to loop over all backedges. + BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); } + + /// Returns the end iterator to loop over all backedges. + BackedgeSetT::const_iterator end() const { return edgeSet.end(); } + +private: + /// Enters the current block and inserts a backedge into the `edgeSet` if we + /// have already visited the current block. The inserted edge links the given + /// `predecessor` with the `current` block. + bool enter(Block ¤t, Block *predecessor) { + bool inserted = visited.insert(¤t).second; + if (!inserted) + edgeSet.insert(std::make_pair(predecessor, ¤t)); + return inserted; + } + + /// Leaves the current block. + void exit(Block ¤t) { visited.erase(¤t); } + + /// Recurses into the given operation while taking all attached regions into + /// account. + void recurse(Operation *op, Block *predecessor) { + Block *current = op->getBlock(); + // If the current op implements the `BranchOpInterface`, there can be + // cycles in the scope of all successor blocks. + if (isa(op)) { + for (Block *succ : current->getSuccessors()) + recurse(*succ, current, [=]() {}); + } + + // If the current op implements the `RegionBranchOpInterface`, there can be + // cycles in the scope of all attached regions. + if (auto regionInterface = dyn_cast(op)) { + // Extract and recurse into all entry regions that this operation can + // directly branch to. + SmallVector entrySuccessors; + getSuccessorRegions(regionInterface, /*index=*/llvm::None, + entrySuccessors); + for (RegionSuccessor &entrySuccessor : entrySuccessors) + recurse(regionInterface, current, entrySuccessor); + } else { + // If not, recurse into all distinct regions. Note that there cannot be + // any control-flow edges between those regions. + for (Region ®ion : op->getRegions()) + recurse(region.front(), current, [=]() {}); + } + } + + /// Recurses into control-flow structures that rely on the + /// `RegionBranchOpInterface` instead of explicit control flow. + void recurse(RegionBranchOpInterface regionInterface, Block *predecessor, + RegionSuccessor ®ionSuccessor) { + // If this region successor branches to its parent operation, we can safely + // ignore this one since it cannot introduce any cycles. + Region *successor = regionSuccessor.getSuccessor(); + if (!successor) + return; + + // Start with the entry block of the region and recurse into all nested + // region successors. + Block &entryBlock = successor->front(); + recurse(entryBlock, predecessor, [&]() { + // Extract all successors for the current region using the + // `RegionBranchOpInterace` and recurse into each region successor. + SmallVector successorRegions; + getSuccessorRegions(regionInterface, successor->getRegionNumber(), + successorRegions); + for (RegionSuccessor ®ionSuccessor : successorRegions) + recurse(regionInterface, &entryBlock, regionSuccessor); + }); + } + + /// Recurses into explicit control-flow structures that are given by + /// the successor relation defined on the block level. + template + void recurse(Block &block, Block *predecessor, const FuncT &recurseOp) { + // Try to enter the current block. If this is not possible, we are + // currently processing this block and can safely return here. + if (!enter(block, predecessor)) + return; + + // Recurse into all operations and successor blocks. + for (auto &op : block.getOperations()) + recurse(&op, predecessor); + + // Invoke the user-defined recursion operation while the current block is + // still active. + recurseOp(); + + // Leave the current block. + exit(block); + } + + /// Stores all blocks that are currently visited and on the processing stack. + BlockSetT visited; + + /// Stores all backedges in the format (source, target). + BackedgeSetT edgeSet; +}; + //===----------------------------------------------------------------------===// // BufferPlacement //===----------------------------------------------------------------------===// @@ -251,6 +366,8 @@ findDeallocs(); // Place deallocations for all allocation entries. placeDeallocs(); + // Place deallocations for all backedges. + placeBackedgeDeallocs(); } private: @@ -337,11 +454,10 @@ } /// Introduces required allocs and copy operations to avoid memory leaks. + /// This function initializes the the set of values that require a dedicated + /// memory free operation since their operands cannot be safely deallocated in + /// a post dominator. void introduceCopies() { - // Initialize the set of values that require a dedicated memory free - // operation since their operands cannot be safely deallocated in a post - // dominator. - SmallPtrSet valuesToFree; llvm::SmallDenseSet> visitedValues; SmallVector, 8> toProcess; @@ -479,17 +595,12 @@ MutableArrayRef regions, const TPredicate ®ionPredicate, const TOperandProvider &operandProvider) { - // Create an empty attribute for each operand to comply with the - // `getSuccessorRegions` interface definition that requires a single - // attribute per operand. - SmallVector operandAttributes( - regionInterface.getOperation()->getNumOperands()); for (Region ®ion : regions) { // Query the regionInterface to get all successor regions of the current // one. SmallVector successorRegions; - regionInterface.getSuccessorRegions(region.getRegionNumber(), - operandAttributes, successorRegions); + getSuccessorRegions(regionInterface, region.getRegionNumber(), + successorRegions); // Try to find a matching region successor. RegionSuccessor *regionSuccessor = llvm::find_if(successorRegions, regionPredicate); @@ -623,6 +734,102 @@ } } + /// Inserts and places deallocation operations for buffers that have to be + /// freed before returning to other parts of the program via backedges. + void placeBackedgeDeallocs() { + // Determine and loop over all backedges starting with the operation this + // analysis operates on. + Backedges backedges(operation); + for (auto &edge : backedges) { + Block *source = edge.first; + Block *target = edge.second; + + // Get the terminator of the source block and determine whether it uses a + // `BranchOpInterface` (works on explicit control flow) or on a + // `RegionBranchOpInterface`. + Operation *terminator = source->getTerminator(); + if (auto branchInterface = dyn_cast(terminator)) { + // Find the coorresponding successor entry and get its operands. + auto it = llvm::find(source->getSuccessors(), target); + auto successorOperands = + branchInterface.getSuccessorOperands(it.getIndex()); + // Ignore back edges without passing any values to their successor + // blocks. + if (!successorOperands.hasValue()) + continue; + placeDeallocsForBackedgeBranch(terminator, target->getArguments(), + successorOperands.getValue()); + } else { + // If this terminator does not operate on explicit control flow, it has + // to be a `RegionBranchOpInterface` in order to be recognized by the + // `Backedges` analysis. + assert(terminator->hasTrait() && + terminator->getParentRegion() && + "Not supported backedge terminator"); + auto regionInterface = cast( + terminator->getParentRegion()->getParentOp()); + Region *sourceRegion = source->getParent(); + // Get all successor regions of the current source region. + SmallVector successorRegions; + getSuccessorRegions(regionInterface, sourceRegion->getRegionNumber(), + successorRegions); + // Find the region successor that corresponds to the source region. + RegionSuccessor *regionSuccessor = + llvm::find_if(successorRegions, [&](RegionSuccessor &succ) { + return succ.getSuccessor() == sourceRegion; + }); + assert(regionSuccessor != successorRegions.end() && + "Invalid region successor backedge"); + placeDeallocsForBackedgeBranch(terminator, + regionSuccessor->getSuccessorInputs(), + terminator->getOperands()); + } + } + } + + /// Places deallocation operations for all iteration arguments that have to be + /// freed. An iteration argument is a `BlockArgument` that is reachable via a + /// backedge. The list of operands represents values that are passed to the + /// iteration arguments following the backedge. + template + void placeDeallocsForBackedgeBranch(Operation *operation, + const IterationArgumentsT &iterationArgs, + const OperandsT &operands) { + for (auto it : llvm::zip(iterationArgs, operands)) { + // Skip values that do not operate on buffers. + Value iterationArg = std::get<0>(it); + if (!iterationArg.getType().isa()) + continue; + + // Extract the operand that is passed via the backedge and check whether + // the value has been defined in the scope of this loop. If not, we are + // not allowed to place a dealloc here since the buffer will be + // automatically freed in its post dominator anyway. + Value value = std::get<1>(it); + Block *domBlock = iterationArg.getParentBlock(); + Block *postDomBlock = value.getParentBlock(); + if (dominators.properlyDominates(postDomBlock, domBlock)) + continue; + + // Test all aliases of the iteration argument whether they belong to the + // loop and have been freed. If the current operand value is not + // contained in this set, we have to place a free operation in all cases. + auto iterationValueAliases = aliases.resolve(iterationArg); + if (iterationValueAliases.count(value) && + !llvm::any_of(iterationValueAliases, [&](Value alias) { + return dominators.dominates(domBlock, alias.getParentBlock()) && + postDominators.postDominates(postDomBlock, + alias.getParentBlock()) && + valuesToFree.count(alias); + })) + continue; + + // Insert dealloc for the corresponding iteration argument. + OpBuilder builder(operation); + builder.create(operation->getLoc(), iterationArg); + } + } + /// Finds a common dominator for the given value while taking the positions /// of the values in the value set into account. It supports dominator and /// post-dominator analyses via template arguments. @@ -651,6 +858,9 @@ /// Maps allocation nodes to their associated blocks. AllocEntryList allocs; + /// The set of all values that require copy and specific free operations. + ValueSetT valuesToFree; + /// The underlying liveness analysis to compute fine grained information /// about alloc and dealloc positions. Liveness liveness; diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir --- a/mlir/test/Transforms/buffer-placement.mlir +++ b/mlir/test/Transforms/buffer-placement.mlir @@ -1125,3 +1125,312 @@ // CHECK: %[[ALLOCA:.*]] = alloca(%arg0, %arg1) // CHECK-NEXT: scf.yield %[[ALLOC0]] // CHECK: return %[[ALLOC1]] + +// ----- + +// Test Case: structured control-flow loop using a nested alloc. +// The alloc positions of %3 will not be changed, but the iteration argument +// %iterBuf has to be freed before yielding %3 to avoid memory leaks. + +// ----- + +// CHECK-LABEL: func @loop_alloc +func @loop_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = alloc() : memref<2xf32> + scf.yield %3 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC0]] +// CHECK: %[[ALLOC1:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = +// CHECK: %[[ALLOC2:.*]] = alloc() +// CHECK: %[[ALLOC3:.*]] = alloc() +// CHECK: linalg.copy(%[[ALLOC2]], %[[ALLOC3]]) +// CHECK: dealloc %[[ALLOC2]] +// CHECK: dealloc %[[IALLOC]] +// CHECK: scf.yield %[[ALLOC3]] +// CHECK: linalg.copy(%[[ALLOC1]], %arg4) +// CHECK-NEXT: dealloc %[[ALLOC1]] + +// ----- + +// Test Case: structured control-flow loop with a nested if operation. +// The loop yields buffers that have been defined outside of the loop and the +// backeges only use the iteration arguments (or one of its aliases). +// Therefore, we do not have to (and are not allowed to) free any buffers +// that are passed via the backedges. + +// CHECK-LABEL: func @loop_nested_if_no_alloc +func @loop_nested_if_no_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = scf.if %2 -> (memref<2xf32>) { + scf.yield %0 : memref<2xf32> + } else { + scf.yield %iterBuf : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = +// CHECK: %[[ALLOC2:.*]] = scf.if +// CHECK: scf.yield %[[ALLOC0]] +// CHECK: scf.yield %[[IALLOC]] +// CHECK: scf.yield %[[ALLOC2]] +// CHECK: linalg.copy(%[[ALLOC1]], %arg4) +// CHECK: dealloc %[[ALLOC0]] + +// ----- + +// Test Case: structured control-flow loop with a nested if operation using +// a deeply nested buffer allocation. +// Since the innermost allocation happens in a divergent branch, we have to +// introduce additional copies for the nested if operation. Since the loop's +// yield operation "returns" %3, it will return a newly allocated buffer. +// Therefore, we have to free the iteration argument %iterBuf before +// "returning" %3. + +// CHECK-LABEL: func @loop_nested_if_alloc +func @loop_nested_if_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>) -> memref<2xf32> { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = scf.if %2 -> (memref<2xf32>) { + %4 = alloc() : memref<2xf32> + scf.yield %4 : memref<2xf32> + } else { + scf.yield %0 : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + return %1 : memref<2xf32> +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = +// CHECK: %[[ALLOC2:.*]] = scf.if + +// CHECK: %[[ALLOC3:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC4:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC3]], %[[ALLOC4]]) +// CHECK-NEXT: dealloc %[[ALLOC3]] +// CHECK-NEXT: scf.yield %[[ALLOC4]] + +// CHECK: %[[ALLOC5:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[ALLOC5]]) +// CHECK-NEXT: scf.yield %[[ALLOC5]] + +// CHECK: %[[ALLOC6:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC2:.*]], %[[ALLOC6]]) +// CHECK-NEXT: dealloc %[[ALLOC2]] +// CHECK-NEXT: dealloc %[[IALLOC]] +// CHECK-NEXT: scf.yield %[[ALLOC6]] + +// CHECK: dealloc %[[ALLOC0]] +// CHECK-NEXT: return %[[ALLOC1]] + +// ----- + +// Test Case: several nested structured control-flow loops with a deeply nested +// buffer allocation inside an if operation. +// Same behavior is an loop_nested_if_alloc: we have to insert deallocations +// before each yield in all loops recursively. + +// CHECK-LABEL: func @loop_nested_alloc +func @loop_nested_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = scf.for %i2 = %lb to %ub step %step + iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> { + %3 = scf.for %i3 = %lb to %ub step %step + iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> { + %4 = cmpi "eq", %i, %ub : index + %5 = scf.if %4 -> (memref<2xf32>) { + %6 = alloc() : memref<2xf32> + scf.yield %6 : memref<2xf32> + } else { + scf.yield %iterBuf3 : memref<2xf32> + } + scf.yield %5 : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + scf.yield %2 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC0]] +// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}} iter_args(%[[IALLOC0:.*]] = +// CHECK: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args(%[[IALLOC1:.*]] = +// CHECK: %[[ALLOC3:.*]] = scf.for {{.*}} iter_args(%[[IALLOC2:.*]] = +// CHECK: %[[ALLOC4:.*]] = scf.if +// CHECK-NEXT: %[[ALLOC5:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC6:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC5]], %[[ALLOC6]]) +// CHECK-NEXT: dealloc %[[ALLOC5]] +// CHECK-NEXT: scf.yield %[[ALLOC6]] + +// CHECK: %[[ALLOC7:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[IALLOC2]], %[[ALLOC7]]) +// CHECK-NEXT: scf.yield %[[ALLOC7]] + +// CHECK: %[[ALLOC8:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC4]], %[[ALLOC8]]) +// CHECK-NEXT: dealloc %[[ALLOC4]] +// CHECK-NEXT: dealloc %[[IALLOC2]] +// CHECK-NEXT: scf.yield %[[ALLOC8]] + +// CHECK: %[[ALLOC9:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC3]], %[[ALLOC9]]) +// CHECK-NEXT: dealloc %[[ALLOC3]] +// CHECK-NEXT: dealloc %[[IALLOC1]] +// CHECK-NEXT: scf.yield %[[ALLOC9]] + +// CHECK: %[[ALLOC10:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC2]], %[[ALLOC10]]) +// CHECK-NEXT: dealloc %[[ALLOC2]] +// CHECK-NEXT: dealloc %[[IALLOC0]] +// CHECK-NEXT: scf.yield %[[ALLOC10]] + +// CHECK: linalg.copy(%[[ALLOC1]], %arg4) +// CHECK-NEXT: dealloc %[[ALLOC1]] + +// ----- + +// Test Case: explicit control-flow loop with a dynamically allocated buffer. +// Since %alloc1 flows back to the loop header, we have to free the iteration +// argument %buff before returning to the header. + +// CHECK-LABEL: func @loop_dynalloc +func @loop_dynalloc( + %arg0 : i32, + %arg1 : i32, + %arg2: memref, + %arg3: memref) { + %const0 = constant 0 : i32 + br ^loopHeader(%const0, %arg2 : i32, memref) + +^loopHeader(%i : i32, %buff : memref): + %lessThan = cmpi "slt", %i, %arg1 : i32 + cond_br %lessThan, + ^loopBody(%i, %buff : i32, memref), + ^exit(%buff : memref) + +^loopBody(%val : i32, %buff2: memref): + %const1 = constant 1 : i32 + %inc = addi %val, %const1 : i32 + %size = std.index_cast %inc : i32 to index + %alloc1 = alloc(%size) : memref + br ^loopHeader(%inc, %alloc1 : i32, memref) + +^exit(%buff3 : memref): + "linalg.copy"(%buff3, %arg3) : (memref, memref) -> () + return +} + +// CHECK: %[[DIM0:.*]] = dim %arg2 +// CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[DIM0]]) +// CHECK-NEXT: linalg.copy(%arg2, %[[ALLOC0]]) +// CHECK-NEXT: br ^bb1({{.*}}, %[[ALLOC0]] : {{.*}}) + +// CHECK: ^bb1({{.*}}, %[[IALLOC0:.*]]: {{.*}}): +// CHECK: cond_br {{.*}}, ^bb2({{.*}}, %[[IALLOC0]] : {{.*}}), +// CHECK-SAME: ^bb3(%[[IALLOC0]] : {{.*}}) + +// CHECK: ^bb2 +// CHECK: %[[DIM1:.*]] = index_cast +// CHECK-NEXT: %[[ALLOC1:.*]] = alloc(%[[DIM1]]) +// CHECK: %[[DIM2:.*]] = dim %[[ALLOC1]] +// CHECK-NEXT: %[[ALLOC2:.*]] = alloc(%[[DIM2]]) +// CHECK-NEXT: linalg.copy(%[[ALLOC1]], %[[ALLOC2]]) +// CHECK-NEXT: dealloc %[[ALLOC1]] +// CHECK-NEXT: dealloc %[[IALLOC0]] +// CHECK-NEXT: br ^bb1({{.*}}, %[[ALLOC2]] : {{.*}}) + +// CHECK: ^bb3(%[[ALLOC3:.*]]: {{.*}}): +// CHECK-NEXT: linalg.copy(%[[ALLOC3]], %arg3) +// CHECK-NEXT: dealloc %[[IALLOC0]] + +// ----- + +// Test Case: explicit control-flow loop with a dynamically allocated buffer. +// Since %alloc1 flows to the do-while loop header, we have to free the +// iteration argument %buff2 before returning to the loop body. + +// CHECK-LABEL: func @do_loop_alloc +func @do_loop_alloc( + %arg0 : i32, + %arg1 : i32, + %arg2: memref<2xf32>, + %arg3: memref<2xf32>) { + %const0 = constant 0 : i32 + br ^loopBody(%const0, %arg2 : i32, memref<2xf32>) + +^loopBody(%val : i32, %buff2: memref<2xf32>): + %const1 = constant 1 : i32 + %inc = addi %val, %const1 : i32 + %alloc1 = alloc() : memref<2xf32> + br ^loopHeader(%inc, %alloc1 : i32, memref<2xf32>) + +^loopHeader(%i : i32, %buff : memref<2xf32>): + %lessThan = cmpi "slt", %i, %arg1 : i32 + cond_br %lessThan, + ^loopBody(%i, %buff : i32, memref<2xf32>), + ^exit(%buff : memref<2xf32>) + +^exit(%buff3 : memref<2xf32>): + "linalg.copy"(%buff3, %arg3) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: br ^bb1({{.*}}, %arg2 + +// CHECK: ^bb1({{.*}}, %[[IALLOC0:.*]]: {{.*}}): +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: br ^bb2({{.*}}, %[[ALLOC0]] : + +// CHECK: ^bb2({{.*}}, %[[IALLOC1:.*]]: {{.*}}): +// CHECK: dealloc %[[IALLOC0]] +// CHECK-NEXT: cond_br {{.*}}, ^bb1({{.*}}, %[[IALLOC1]] : {{.*}}), +// CHECK-SAME: ^bb3(%[[IALLOC1]] : + +// CHECK: ^bb3(%[[ALLOC2:.*]]: {{.*}}): +// CHECK-NEXT: linalg.copy(%[[ALLOC2]], %arg3) +// CHECK-NEXT: dealloc %[[ALLOC0]]