diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp --- a/mlir/lib/Transforms/BufferPlacement.cpp +++ b/mlir/lib/Transforms/BufferPlacement.cpp @@ -48,11 +48,10 @@ // will be freed in the end. // // TODO: -// The current implementation does not support loops and the resulting code will -// be invalid with respect to program semantics. The only thing that is -// currently missing is a high-level loop analysis that allows us to move allocs -// and deallocs outside of the loop blocks. Furthermore, it doesn't also accept -// functions which return buffers already. +// The current implementation does not support explicit-control-flow loops and +// the resulting code will be invalid with respect to program semantics. +// However, structured control-flow loops are fully supported. Furthermore, it +// doesn't accept functions which return buffers already. // //===----------------------------------------------------------------------===// @@ -77,6 +76,22 @@ } } +/// Wrapper for the actual `RegionBranchOpInterface.getSuccessorRegions` +/// function that initializes the required `operandAttributes` array. +static void getSuccessorRegions(RegionBranchOpInterface regionInterface, + llvm::Optional index, + SmallVectorImpl &successors) { + // Create a list of null attributes for each operand to comply with the + // `getSuccessorRegions` interface definition that requires a single + // attribute per operand. + SmallVector operandAttributes( + regionInterface.getOperation()->getNumOperands()); + + // Get all successor regions using the temporarily allocated + // `operandAttributes`. + regionInterface.getSuccessorRegions(index, operandAttributes, successors); +} + namespace { //===----------------------------------------------------------------------===// // BufferPlacementAliasAnalysis @@ -166,16 +181,10 @@ // Query the RegionBranchOpInterface to find potential successor regions. op->walk([&](RegionBranchOpInterface regionInterface) { - // Create an empty attribute for each operand to comply with the - // `getSuccessorRegions` interface definition that requires a single - // attribute per operand. - SmallVector operandAttributes( - regionInterface.getOperation()->getNumOperands()); - // Extract all entry regions and wire all initial entry successor inputs. SmallVector entrySuccessors; - regionInterface.getSuccessorRegions(/*index=*/llvm::None, - operandAttributes, entrySuccessors); + getSuccessorRegions(regionInterface, /*index=*/llvm::None, + entrySuccessors); for (RegionSuccessor &entrySuccessor : entrySuccessors) { // Wire the entry region's successor arguments with the initial // successor inputs. @@ -191,8 +200,8 @@ // Iterate over all successor region entries that are reachable from the // current region. SmallVector successorRegions; - regionInterface.getSuccessorRegions( - region.getRegionNumber(), operandAttributes, successorRegions); + getSuccessorRegions(regionInterface, region.getRegionNumber(), + successorRegions); for (RegionSuccessor &successorRegion : successorRegions) { // Iterate over all immediate terminator operations and wire the // successor inputs with the operands of each terminator. @@ -209,6 +218,83 @@ ValueMapT aliases; }; +//===----------------------------------------------------------------------===// +// Backedges +//===----------------------------------------------------------------------===// + +/// A straight-forward program analysis which detects loop backedges induced by +/// explicit control flow. +class Backedges { +public: + using BlockSetT = SmallPtrSet; + using BackedgeSetT = llvm::DenseSet>; + +public: + /// Constructs a new backedges analysis using the op provided. + Backedges(Operation *op) { recurse(op, op->getBlock()); } + + /// Returns the number of backedges formed by explicit control flow. + size_t size() const { return edgeSet.size(); } + + /// Returns the start iterator to loop over all backedges. + BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); } + + /// Returns the end iterator to loop over all backedges. + BackedgeSetT::const_iterator end() const { return edgeSet.end(); } + +private: + /// Enters the current block and inserts a backedge into the `edgeSet` if we + /// have already visited the current block. The inserted edge links the given + /// `predecessor` with the `current` block. + bool enter(Block ¤t, Block *predecessor) { + bool inserted = visited.insert(¤t).second; + if (!inserted) + edgeSet.insert(std::make_pair(predecessor, ¤t)); + return inserted; + } + + /// Leaves the current block. + void exit(Block ¤t) { visited.erase(¤t); } + + /// Recurses into the given operation while taking all attached regions into + /// account. + void recurse(Operation *op, Block *predecessor) { + Block *current = op->getBlock(); + // If the current op implements the `BranchOpInterface`, there can be + // cycles in the scope of all successor blocks. + if (isa(op)) { + for (Block *succ : current->getSuccessors()) + recurse(*succ, current); + } + // Recurse into all distinct regions and check for explicit control-flow + // loops. + for (Region ®ion : op->getRegions()) + recurse(region.front(), current); + } + + /// Recurses into explicit control-flow structures that are given by + /// the successor relation defined on the block level. + void recurse(Block &block, Block *predecessor) { + // Try to enter the current block. If this is not possible, we are + // currently processing this block and can safely return here. + if (!enter(block, predecessor)) + return; + + // Recurse into all operations and successor blocks. + for (auto &op : block.getOperations()) + recurse(&op, predecessor); + + // Leave the current block. + exit(block); + } + + /// Stores all blocks that are currently visited and on the processing stack. + BlockSetT visited; + + /// Stores all backedges in the format (source, target). + BackedgeSetT edgeSet; +}; + //===----------------------------------------------------------------------===// // BufferPlacement //===----------------------------------------------------------------------===// @@ -357,9 +443,14 @@ for (Value value : it->second) { if (valuesToFree.count(value) > 0) continue; - // Check whether we have to free this particular block argument. - if (!dominators.dominates(definingBlock, value.getParentBlock())) { - toProcess.emplace_back(value, value.getParentBlock()); + Block *parentBlock = value.getParentBlock(); + // Check whether we have to free this particular block argument or + // generic value. We have to free the current alias if it is either + // defined in a non-dominated block or it is defined in the same block + // but the current value is not dominated by the source value. + if (!dominators.dominates(definingBlock, parentBlock) || + (definingBlock == parentBlock && value.isa())) { + toProcess.emplace_back(value, parentBlock); valuesToFree.insert(value); } else if (visitedValues.insert(std::make_tuple(value, definingBlock)) .second) @@ -431,22 +522,42 @@ // argument belongs to the first block in a region and the parent operation // implements the RegionBranchOpInterface. Region *argRegion = block->getParent(); + Operation *parentOp = argRegion->getParentOp(); RegionBranchOpInterface regionInterface; if (!argRegion || &argRegion->front() != block || - !(regionInterface = - dyn_cast(argRegion->getParentOp()))) + !(regionInterface = dyn_cast(parentOp))) return; introduceCopiesForRegionSuccessors( - regionInterface, argRegion->getParentOp()->getRegions(), + regionInterface, argRegion->getParentOp()->getRegions(), blockArg, [&](RegionSuccessor &successorRegion) { // Find a predecessor of our argRegion. return successorRegion.getSuccessor() == argRegion; - }, - [&](RegionSuccessor &successorRegion) { - // The operand index will be the argument number. - return blockArg.getArgNumber(); }); + + // Check whether the block argument belongs to an entry region of the + // parent operation. In this case, we have to introduce an additional copy + // for buffer that is passed to the argument. + SmallVector successorRegions; + getSuccessorRegions(regionInterface, llvm::None, successorRegions); + auto *it = + llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) { + return successorRegion.getSuccessor() == argRegion; + }); + if (it == successorRegions.end()) + return; + + // Determine the actual operand to introduce a copy for and rewire the + // operand to point to the copy instead. + Value operand = + regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber()) + [llvm::find(it->getSuccessorInputs(), blockArg).getIndex()]; + Value copy = introduceBufferCopy(operand, parentOp); + + auto op = llvm::find(parentOp->getOperands(), operand); + assert(op != parentOp->getOperands().end() && + "parentOp does not contain operand"); + parentOp->setOperand(op.getIndex(), copy); } /// Introduces temporary allocs in front of all associated nested-region @@ -455,42 +566,34 @@ // Get the actual result index in the scope of the parent terminator. Operation *operation = value.getDefiningOp(); auto regionInterface = cast(operation); - introduceCopiesForRegionSuccessors( - regionInterface, operation->getRegions(), - [&](RegionSuccessor &successorRegion) { - // Determine whether this region has a successor entry that leaves - // this region by returning to its parent operation. - return !successorRegion.getSuccessor(); - }, - [&](RegionSuccessor &successorRegion) { - // Find the associated success input index. - return llvm::find(successorRegion.getSuccessorInputs(), value) - .getIndex(); - }); + // Filter successors that return to the parent operation. + auto regionPredicate = [&](RegionSuccessor &successorRegion) { + // If the RegionSuccessor has no associated successor, it will return to + // its parent operation. + return !successorRegion.getSuccessor(); + }; + // Introduce a copy for all region "results" that are returned to the parent + // operation. This is required since the parent's result value has been + // considered critical. Therefore, the algorithm assumes that a copy of a + // previously allocated buffer is returned by the operation (like in the + // case of a block argument). + introduceCopiesForRegionSuccessors(regionInterface, operation->getRegions(), + value, regionPredicate); } /// Introduces buffer copies for all terminators in the given regions. The /// regionPredicate is applied to every successor region in order to restrict - /// the copies to specific regions. Thereby, the operandProvider is invoked - /// for each matching region successor and determines the operand index that - /// requires a buffer copy. - template - void - introduceCopiesForRegionSuccessors(RegionBranchOpInterface regionInterface, - MutableArrayRef regions, - const TPredicate ®ionPredicate, - const TOperandProvider &operandProvider) { - // Create an empty attribute for each operand to comply with the - // `getSuccessorRegions` interface definition that requires a single - // attribute per operand. - SmallVector operandAttributes( - regionInterface.getOperation()->getNumOperands()); + /// the copies to specific regions. + template + void introduceCopiesForRegionSuccessors( + RegionBranchOpInterface regionInterface, MutableArrayRef regions, + Value argValue, const TPredicate ®ionPredicate) { for (Region ®ion : regions) { // Query the regionInterface to get all successor regions of the current // one. SmallVector successorRegions; - regionInterface.getSuccessorRegions(region.getRegionNumber(), - operandAttributes, successorRegions); + getSuccessorRegions(regionInterface, region.getRegionNumber(), + successorRegions); // Try to find a matching region successor. RegionSuccessor *regionSuccessor = llvm::find_if(successorRegions, regionPredicate); @@ -498,7 +601,9 @@ continue; // Get the operand index in the context of the current successor input // bindings. - auto operandIndex = operandProvider(*regionSuccessor); + size_t operandIndex = + llvm::find(regionSuccessor->getSuccessorInputs(), argValue) + .getIndex(); // Iterate over all immediate terminator operations to introduce // new buffer allocations. Thereby, the appropriate terminator operand @@ -518,6 +623,16 @@ /// its content into the newly allocated buffer. The terminator operation is /// used to insert the alloc and copy operations at the right places. Value introduceBufferCopy(Value sourceValue, Operation *terminator) { + // Avoid multiple copies of the same source value. This can happen in the + // presence of loops when a branch acts as a backedge while also having + // another successor that returns to its parent operation. Note: that + // copying copied buffers can introduce memory leaks since the invariant of + // BufferPlacement assumes that a buffer will be only copied once into a + // temporary buffer. Hence, the construction of copy chains introduces + // additional allocations that are not tracked automatically by the + // algorithm. + if (copiedValues.contains(sourceValue)) + return sourceValue; // Create a new alloc at the current location of the terminator. auto memRefType = sourceValue.getType().cast(); OpBuilder builder(terminator); @@ -541,6 +656,8 @@ // allocation to the new one. builder.create(terminator->getLoc(), sourceValue, alloc); + // Remember the copy of original source value. + copiedValues.insert(alloc); return alloc; } @@ -652,6 +769,9 @@ /// Maps allocation nodes to their associated blocks. AllocEntryList allocs; + // Stores already copied allocations to avoid additional copies of copies. + ValueSetT copiedValues; + /// The underlying liveness analysis to compute fine grained information /// about alloc and dealloc positions. Liveness liveness; @@ -673,6 +793,14 @@ struct BufferPlacementPass : BufferPlacementBase { void runOnFunction() override { + // Ensure that there are supported loops only. + Backedges backedges(getFunction()); + if (backedges.size()) { + getFunction().emitError( + "Structured control-flow loops are supported only."); + return; + } + // Place all required alloc, copy and dealloc nodes. BufferPlacement placement(getFunction()); placement.place(); diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir --- a/mlir/test/Transforms/buffer-placement.mlir +++ b/mlir/test/Transforms/buffer-placement.mlir @@ -1125,3 +1125,295 @@ // CHECK: %[[ALLOCA:.*]] = alloca(%arg0, %arg1) // CHECK-NEXT: scf.yield %[[ALLOC0]] // CHECK: return %[[ALLOC1]] + +// ----- + +// Test Case: structured control-flow loop using a nested alloc. +// The alloc positions of %3 will not be changed, but the iteration argument +// %iterBuf has to be freed before yielding %3 to avoid memory leaks. + +// ----- + +// CHECK-LABEL: func @loop_alloc +func @loop_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = alloc() : memref<2xf32> + scf.yield %3 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC0]] +// CHECK-NEXT: %[[ALLOC1:.*]] = alloc() +// CHECK: linalg.copy(%arg3, %[[ALLOC1]]) +// CHECK: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = %[[ALLOC1]] +// CHECK: cmpi +// CHECK: dealloc %[[IALLOC]] +// CHECK: %[[ALLOC3:.*]] = alloc() +// CHECK: %[[ALLOC4:.*]] = alloc() +// CHECK: linalg.copy(%[[ALLOC3]], %[[ALLOC4]]) +// CHECK: dealloc %[[ALLOC3]] +// CHECK: scf.yield %[[ALLOC4]] +// CHECK: } +// CHECK: linalg.copy(%[[ALLOC2]], %arg4) +// CHECK-NEXT: dealloc %[[ALLOC2]] + +// ----- + +// Test Case: structured control-flow loop with a nested if operation. +// The loop yields buffers that have been defined outside of the loop and the +// backeges only use the iteration arguments (or one of its aliases). +// Therefore, we do not have to (and are not allowed to) free any buffers +// that are passed via the backedges. + +// CHECK-LABEL: func @loop_nested_if_no_alloc +func @loop_nested_if_no_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = scf.if %2 -> (memref<2xf32>) { + scf.yield %0 : memref<2xf32> + } else { + scf.yield %iterBuf : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = +// CHECK: %[[ALLOC2:.*]] = scf.if +// CHECK: scf.yield %[[ALLOC0]] +// CHECK: scf.yield %[[IALLOC]] +// CHECK: scf.yield %[[ALLOC2]] +// CHECK: linalg.copy(%[[ALLOC1]], %arg4) +// CHECK: dealloc %[[ALLOC0]] + +// ----- + +// Test Case: structured control-flow loop with a nested if operation using +// a deeply nested buffer allocation. +// Since the innermost allocation happens in a divergent branch, we have to +// introduce additional copies for the nested if operation. Since the loop's +// yield operation "returns" %3, it will return a newly allocated buffer. +// Therefore, we have to free the iteration argument %iterBuf before +// "returning" %3. + +// CHECK-LABEL: func @loop_nested_if_alloc +func @loop_nested_if_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>) -> memref<2xf32> { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = scf.if %2 -> (memref<2xf32>) { + %4 = alloc() : memref<2xf32> + scf.yield %4 : memref<2xf32> + } else { + scf.yield %0 : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + return %1 : memref<2xf32> +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK: %[[ALLOC1:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]]) +// CHECK-NEXT: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = %[[ALLOC1]] +// CHECK: dealloc %[[IALLOC]] +// CHECK: %[[ALLOC3:.*]] = scf.if + +// CHECK: %[[ALLOC4:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC5:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC4]], %[[ALLOC5]]) +// CHECK-NEXT: dealloc %[[ALLOC4]] +// CHECK-NEXT: scf.yield %[[ALLOC5]] + +// CHECK: %[[ALLOC6:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[ALLOC6]]) +// CHECK-NEXT: scf.yield %[[ALLOC6]] + +// CHECK: %[[ALLOC7:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC3:.*]], %[[ALLOC7]]) +// CHECK-NEXT: dealloc %[[ALLOC3]] +// CHECK-NEXT: scf.yield %[[ALLOC7]] + +// CHECK: dealloc %[[ALLOC0]] +// CHECK-NEXT: return %[[ALLOC2]] + +// ----- + +// Test Case: several nested structured control-flow loops with a deeply nested +// buffer allocation inside an if operation. +// Same behavior is an loop_nested_if_alloc: we have to insert deallocations +// before each yield in all loops recursively. + +// CHECK-LABEL: func @loop_nested_alloc +func @loop_nested_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = scf.for %i2 = %lb to %ub step %step + iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> { + %3 = scf.for %i3 = %lb to %ub step %step + iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> { + %4 = alloc() : memref<2xf32> + %5 = cmpi "eq", %i, %ub : index + %6 = scf.if %5 -> (memref<2xf32>) { + %7 = alloc() : memref<2xf32> + scf.yield %7 : memref<2xf32> + } else { + scf.yield %iterBuf3 : memref<2xf32> + } + scf.yield %6 : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + scf.yield %2 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC0]] +// CHECK-NEXT: %[[ALLOC1:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]]) +// CHECK-NEXT: %[[VAL_7:.*]] = scf.for {{.*}} iter_args(%[[IALLOC0:.*]] = %[[ALLOC1]]) +// CHECK: %[[ALLOC2:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[IALLOC0]], %[[ALLOC2]]) +// CHECK-NEXT: dealloc %[[IALLOC0]] +// CHECK-NEXT: %[[ALLOC3:.*]] = scf.for {{.*}} iter_args(%[[IALLOC1:.*]] = %[[ALLOC2]]) +// CHECK: %[[ALLOC5:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[IALLOC1]], %[[ALLOC5]]) +// CHECK-NEXT: dealloc %[[IALLOC1]] + +// CHECK: %[[ALLOC6:.*]] = scf.for {{.*}} iter_args(%[[IALLOC2:.*]] = %[[ALLOC5]]) +// CHECK: %[[ALLOC8:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC8]] +// CHECK: %[[ALLOC9:.*]] = scf.if + +// CHECK: %[[ALLOC11:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC12:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC11]], %[[ALLOC12]]) +// CHECK-NEXT: dealloc %[[ALLOC11]] +// CHECK-NEXT: scf.yield %[[ALLOC12]] + +// CHECK: %[[ALLOC13:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[IALLOC2]], %[[ALLOC13]]) +// CHECK-NEXT: scf.yield %[[ALLOC13]] + +// CHECK: dealloc %[[IALLOC2]] +// CHECK-NEXT: %[[ALLOC10:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC9]], %[[ALLOC10]]) +// CHECK-NEXT: dealloc %[[ALLOC9]] +// CHECK-NEXT: scf.yield %[[ALLOC10]] + +// CHECK: %[[ALLOC7:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC6]], %[[ALLOC7]]) +// CHECK-NEXT: dealloc %[[ALLOC6]] +// CHECK-NEXT: scf.yield %[[ALLOC7]] + +// CHECK: %[[ALLOC4:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC3]], %[[ALLOC4]]) +// CHECK-NEXT: dealloc %[[ALLOC3]] +// CHECK-NEXT: scf.yield %[[ALLOC4]] + +// CHECK: linalg.copy(%[[VAL_7]], %arg4) +// CHECK-NEXT: dealloc %[[VAL_7]] + +// ----- + +// Test Case: explicit control-flow loop with a dynamically allocated buffer. +// The BufferPlacement transformation should fail on this explicit +// control-flow loop since they are not supported. + +// CHECK-LABEL: func @loop_dynalloc +func @loop_dynalloc( + %arg0 : i32, + %arg1 : i32, + %arg2: memref, + %arg3: memref) { + %const0 = constant 0 : i32 + br ^loopHeader(%const0, %arg2 : i32, memref) + +^loopHeader(%i : i32, %buff : memref): + %lessThan = cmpi "slt", %i, %arg1 : i32 + cond_br %lessThan, + ^loopBody(%i, %buff : i32, memref), + ^exit(%buff : memref) + +^loopBody(%val : i32, %buff2: memref): + %const1 = constant 1 : i32 + %inc = addi %val, %const1 : i32 + %size = std.index_cast %inc : i32 to index + %alloc1 = alloc(%size) : memref + br ^loopHeader(%inc, %alloc1 : i32, memref) + +^exit(%buff3 : memref): + "linalg.copy"(%buff3, %arg3) : (memref, memref) -> () + return +} + +// expected-error@+1 {{Structured control-flow loops are supported only}} + +// ----- + +// Test Case: explicit control-flow loop with a dynamically allocated buffer. +// The BufferPlacement transformation should fail on this explicit +// control-flow loop since they are not supported. + +// CHECK-LABEL: func @do_loop_alloc +func @do_loop_alloc( + %arg0 : i32, + %arg1 : i32, + %arg2: memref<2xf32>, + %arg3: memref<2xf32>) { + %const0 = constant 0 : i32 + br ^loopBody(%const0, %arg2 : i32, memref<2xf32>) + +^loopBody(%val : i32, %buff2: memref<2xf32>): + %const1 = constant 1 : i32 + %inc = addi %val, %const1 : i32 + %alloc1 = alloc() : memref<2xf32> + br ^loopHeader(%inc, %alloc1 : i32, memref<2xf32>) + +^loopHeader(%i : i32, %buff : memref<2xf32>): + %lessThan = cmpi "slt", %i, %arg1 : i32 + cond_br %lessThan, + ^loopBody(%i, %buff : i32, memref<2xf32>), + ^exit(%buff : memref<2xf32>) + +^exit(%buff3 : memref<2xf32>): + "linalg.copy"(%buff3, %arg3) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// expected-error@+1 {{Structured control-flow loops are supported only}}