diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -215,7 +215,6 @@ void orderNodes(); Loop *getAdjustedLoop(RegionNode *RN); - unsigned getAdjustedLoopDepth(RegionNode *RN); void analyzeLoops(RegionNode *N); @@ -324,65 +323,108 @@ return LI->getLoopFor(RN->getEntry()); } -/// Use the exit block to determine the loop depth if RN is a SubRegion. -unsigned StructurizeCFG::getAdjustedLoopDepth(RegionNode *RN) { - if (RN->isSubRegion()) { - Region *SubR = RN->getNodeAs(); - return LI->getLoopDepth(SubR->getExit()); - } - - return LI->getLoopDepth(RN->getEntry()); -} - -/// Build up the general order of nodes +/// Build up the general order of nodes, by performing a topology sort of the +/// parent region's nodes, while ensuring that there is no outer loop node +/// between any two inner loop nodes. void StructurizeCFG::orderNodes() { - ReversePostOrderTraversal RPOT(ParentRegion); - SmallDenseMap LoopBlocks; + SmallVector POT; + SmallDenseMap LoopSizes; + for (RegionNode *RN : post_order(ParentRegion)) { + POT.push_back(RN); - // The reverse post-order traversal of the list gives us an ordering close - // to what we want. The only problem with it is that sometimes backedges - // for outer loops will be visited before backedges for inner loops. - for (RegionNode *RN : RPOT) { + // Accumulate the number of nodes inside the region that belong to a loop. Loop *Loop = getAdjustedLoop(RN); - ++LoopBlocks[Loop]; + ++LoopSizes[Loop]; } - - unsigned CurrentLoopDepth = 0; + // A quick exit for the case where all nodes belong to the same loop (or no + // loop at all). + if (LoopSizes.size() <= 1U) { + Order.assign(POT.begin(), POT.end()); + return; + } + Order.resize(POT.size()); + + // The post-order traversal of the list gives us an ordering close to what we + // want. The only problem with it is that sometimes backedges for outer loops + // will be visited before backedges for inner loops. So now we fix that by + // inserting the nodes in order, while making sure that encountered inner loop + // are complete before their parents (outer loops). + + SmallVector WorkList; + // Get the size of the outermost region (the nodes that don't belong to any + // loop inside ParentRegion). + unsigned ZeroCurrentLoopSize = 0U; + auto LSI = LoopSizes.find(nullptr); + unsigned *CurrentLoopSize = + LSI != LoopSizes.end() ? &LSI->second : &ZeroCurrentLoopSize; Loop *CurrentLoop = nullptr; - for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { - RegionNode *RN = cast(*I); - unsigned LoopDepth = getAdjustedLoopDepth(RN); - if (is_contained(Order, *I)) - continue; + // The "skipped" list is actually located at the (reversed) beginning of the + // POT. This saves us the use of an intermediate container. + // Note that there is always enough room, for the skipped nodes, before the + // current location, as we have just passed at least that amount of nodes. + + auto Begin = POT.rbegin(); + auto I = Begin, SkippedEnd = Begin; + auto O = Order.rbegin(), OE = Order.rend(); + while (O != OE) { + // If we have any skipped nodes, then erase the gap between the end of the + // "skipped" list, and the current location. + if (SkippedEnd != Begin) { + POT.erase(I.base(), SkippedEnd.base()); + I = SkippedEnd = Begin = POT.rbegin(); + } - if (LoopDepth < CurrentLoopDepth) { - // Make sure we have visited all blocks in this loop before moving back to - // the outer loop. + // Keep processing outer loops, in order (from inner most, to outer). + if (!WorkList.empty()) { + CurrentLoop = WorkList.pop_back_val(); + CurrentLoopSize = &LoopSizes.find(CurrentLoop)->second; + } - auto LoopI = I; - while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) { - LoopI++; - if (getAdjustedLoop(cast(*LoopI)) == CurrentLoop) { - --BlockCount; - Order.push_back(*LoopI); + // Keep processing loops while only going deeper (into inner loops). + do { + assert(I != POT.rend()); + RegionNode *RN = *I++; + + Loop *L = getAdjustedLoop(RN); + if (L != CurrentLoop) { + // If L is a loop inside CurrentLoop, then CurrentLoop must be the + // parent of L. + // To prove this, we will contradict the opposite: + // Let P be the parent of L. If CurrentLoop is the parent of P, then + // the header of P must have been processed already, as it must + // dominate the other blocks of P (otherwise P is an irreducible loop, + // and won't be recorded in the LoopInfo), especially L (inside). But + // then CurrentLoop must have been updated to P at the time of + // processing the header of P, which conflicts with the assumption + // that CurrentLoop is not P. + + // If L is not a loop inside CurrentLoop, then skip RN. + if (!L || L->getParentLoop() != CurrentLoop) { + // Skip the node by pushing it to the end of the "skipped" list. + *SkippedEnd++ = RN; + continue; } + + // If we still haven't processed all the nodes that belong to + // CurrentLoop, then make sure we come back later, to finish the job, by + // pushing it to the WorkList. + if (*CurrentLoopSize) + WorkList.push_back(CurrentLoop); + + CurrentLoop = L; + CurrentLoopSize = &LoopSizes.find(CurrentLoop)->second; } - } - CurrentLoop = getAdjustedLoop(RN); - if (CurrentLoop) - LoopBlocks[CurrentLoop]--; + assert(O != OE); + *O++ = RN; - CurrentLoopDepth = LoopDepth; - Order.push_back(*I); + // If we have finished processing the current loop, then we are done here. + --*CurrentLoopSize; + } while (*CurrentLoopSize); } - - // This pass originally used a post-order traversal and then operated on - // the list in reverse. Now that we are using a reverse post-order traversal - // rather than re-working the whole pass to operate on the list in order, - // we just reverse the list and continue to operate on it in reverse. - std::reverse(Order.begin(), Order.end()); + assert(WorkList.empty()); + assert(SkippedEnd == Begin); } /// Determine the end of the loops diff --git a/llvm/test/Transforms/StructurizeCFG/interleaved-loop-order.ll b/llvm/test/Transforms/StructurizeCFG/interleaved-loop-order.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/StructurizeCFG/interleaved-loop-order.ll @@ -0,0 +1,262 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -structurizecfg %s -o - | FileCheck %s + +; This test have an outer loop containing an inner loop, +; for which there is an interleaved post-order traversal. +; +; This used to produce incorrect code. +; For example %outer.loop.body used to branched to %inner.loop.end +; (instead of %inner.loop.header). + +define i1 @test_nested(i32 %x, i1 %b1, i1 %b2, i1 %b3) { +; CHECK-LABEL: @test_nested( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B3_INV:%.*]] = xor i1 [[B3:%.*]], true +; CHECK-NEXT: br label [[OUTER_LOOP_HEADER:%.*]] +; CHECK: Flow12: +; CHECK-NEXT: br i1 [[TMP3:%.*]], label [[EXIT_TRUE:%.*]], label [[FLOW13:%.*]] +; CHECK: exit.true: +; CHECK-NEXT: br label [[FLOW13]] +; CHECK: Flow13: +; CHECK-NEXT: br i1 [[TMP2:%.*]], label [[NEWDEFAULT:%.*]], label [[FLOW14:%.*]] +; CHECK: NewDefault: +; CHECK-NEXT: br label [[EXIT_FALSE:%.*]] +; CHECK: Flow14: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[EXIT_FALSE]] ], [ true, [[FLOW13]] ] +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit.false: +; CHECK-NEXT: br label [[FLOW14]] +; CHECK: outer.loop.header: +; CHECK-NEXT: br i1 [[B1:%.*]], label [[OUTER_LOOP_BODY:%.*]], label [[FLOW3:%.*]] +; CHECK: outer.loop.body: +; CHECK-NEXT: br label [[INNER_LOOP_HEADER:%.*]] +; CHECK: Flow3: +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP16:%.*]], [[FLOW11:%.*]] ], [ true, [[OUTER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP2]] = phi i1 [ [[TMP12:%.*]], [[FLOW11]] ], [ false, [[OUTER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP3]] = phi i1 [ false, [[FLOW11]] ], [ true, [[OUTER_LOOP_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP1]], label [[FLOW12:%.*]], label [[OUTER_LOOP_HEADER]] +; CHECK: inner.loop.header: +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP8:%.*]], [[FLOW4:%.*]] ], [ false, [[OUTER_LOOP_BODY]] ] +; CHECK-NEXT: br i1 [[B2:%.*]], label [[INNER_LOOP_BODY:%.*]], label [[FLOW4]] +; CHECK: Flow6: +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[INNER_LOOP_LATCH:%.*]] ], [ true, [[LEAFBLOCK:%.*]] ] +; CHECK-NEXT: br label [[FLOW5:%.*]] +; CHECK: Flow7: +; CHECK-NEXT: br i1 [[TMP10:%.*]], label [[INNER_LOOP_END:%.*]], label [[FLOW8:%.*]] +; CHECK: inner.loop.end: +; CHECK-NEXT: br label [[FLOW8]] +; CHECK: inner.loop.body: +; CHECK-NEXT: br i1 [[B3_INV]], label [[INNER_LOOP_BODY_ELSE:%.*]], label [[FLOW:%.*]] +; CHECK: inner.loop.body.else: +; CHECK-NEXT: br label [[FLOW]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[INNER_LOOP_BODY_ELSE]] ], [ true, [[INNER_LOOP_BODY]] ] +; CHECK-NEXT: br i1 [[TMP6]], label [[INNER_LOOP_BODY_THEN:%.*]], label [[INNER_LOOP_COND:%.*]] +; CHECK: inner.loop.body.then: +; CHECK-NEXT: br label [[INNER_LOOP_COND]] +; CHECK: Flow4: +; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ [[TMP17:%.*]], [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP18:%.*]], [[FLOW5]] ], [ [[TMP4]], [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ [[TMP19:%.*]], [[FLOW5]] ], [ false, [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP10]] = phi i1 [ false, [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP7]], label [[FLOW7:%.*]], label [[INNER_LOOP_HEADER]] +; CHECK: inner.loop.cond: +; CHECK-NEXT: br label [[NODEBLOCK:%.*]] +; CHECK: NodeBlock: +; CHECK-NEXT: [[PIVOT:%.*]] = icmp slt i32 [[X:%.*]], 1 +; CHECK-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK]], label [[FLOW5]] +; CHECK: Flow8: +; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ true, [[INNER_LOOP_END]] ], [ false, [[FLOW7]] ] +; CHECK-NEXT: br i1 [[TMP9]], label [[LEAFBLOCK1:%.*]], label [[FLOW9:%.*]] +; CHECK: LeafBlock1: +; CHECK-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: br i1 [[SWITCHLEAF2]], label [[INNER_LOOP_BREAK:%.*]], label [[FLOW10:%.*]] +; CHECK: LeafBlock: +; CHECK-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: br i1 [[SWITCHLEAF]], label [[INNER_LOOP_LATCH]], label [[FLOW6:%.*]] +; CHECK: Flow9: +; CHECK-NEXT: [[TMP12]] = phi i1 [ [[TMP14:%.*]], [[FLOW10]] ], [ [[TMP8]], [[FLOW8]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i1 [ [[TMP15:%.*]], [[FLOW10]] ], [ [[TMP11]], [[FLOW8]] ] +; CHECK-NEXT: br i1 [[TMP13]], label [[OUTER_LOOP_CLEANUP:%.*]], label [[FLOW11]] +; CHECK: inner.loop.break: +; CHECK-NEXT: br label [[FLOW10]] +; CHECK: Flow10: +; CHECK-NEXT: [[TMP14]] = phi i1 [ false, [[INNER_LOOP_BREAK]] ], [ true, [[LEAFBLOCK1]] ] +; CHECK-NEXT: [[TMP15]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP11]], [[LEAFBLOCK1]] ] +; CHECK-NEXT: br label [[FLOW9]] +; CHECK: outer.loop.cleanup: +; CHECK-NEXT: br label [[OUTER_LOOP_LATCH:%.*]] +; CHECK: Flow11: +; CHECK-NEXT: [[TMP16]] = phi i1 [ false, [[OUTER_LOOP_LATCH]] ], [ true, [[FLOW9]] ] +; CHECK-NEXT: br label [[FLOW3]] +; CHECK: outer.loop.latch: +; CHECK-NEXT: br label [[FLOW11]] +; CHECK: Flow5: +; CHECK-NEXT: [[TMP17]] = phi i1 [ [[TMP5]], [[FLOW6]] ], [ true, [[NODEBLOCK]] ] +; CHECK-NEXT: [[TMP18]] = phi i1 [ [[TMP5]], [[FLOW6]] ], [ [[TMP4]], [[NODEBLOCK]] ] +; CHECK-NEXT: [[TMP19]] = phi i1 [ false, [[FLOW6]] ], [ true, [[NODEBLOCK]] ] +; CHECK-NEXT: br label [[FLOW4]] +; CHECK: inner.loop.latch: +; CHECK-NEXT: br label [[FLOW6]] +; CHECK: exit: +; CHECK-NEXT: ret i1 [[TMP0]] +; +entry: + br label %outer.loop.header + +exit.true: ; preds = %outer.loop.header + br label %exit + +exit.false: ; preds = %inner.loop.cond + br label %exit + +outer.loop.header: ; preds = %outer.loop.latch, %entry + br i1 %b1, label %outer.loop.body, label %exit.true + +outer.loop.body: ; preds = %outer.loop.header + br label %inner.loop.header + +inner.loop.header: ; preds = %inner.loop.latch, %outer.loop.body + br i1 %b2, label %inner.loop.body, label %inner.loop.end + +inner.loop.end: ; preds = %inner.loop.header + br label %outer.loop.cleanup + +inner.loop.body: ; preds = %inner.loop.header + br i1 %b3, label %inner.loop.body.then, label %inner.loop.body.else + +inner.loop.body.else: ; preds = %inner.loop.body + br label %inner.loop.cond + +inner.loop.body.then: ; preds = %inner.loop.body + br label %inner.loop.cond + +inner.loop.cond: ; preds = %inner.loop.body.then, %inner.loop.body.else + switch i32 %x, label %exit.false [ + i32 0, label %inner.loop.latch + i32 1, label %inner.loop.break + ] + +inner.loop.break: ; preds = %inner.loop.cond + br label %outer.loop.cleanup + +outer.loop.cleanup: ; preds = %inner.loop.break, %inner.loop.end + br label %outer.loop.latch + +outer.loop.latch: ; preds = %outer.loop.cleanup + br label %outer.loop.header + +inner.loop.latch: ; preds = %inner.loop.cond + br label %inner.loop.header + +exit: ; preds = %exit.false, %exit.true + %r = phi i1 [ true, %exit.true ], [ false, %exit.false ] + ret i1 %r +} + +; This test checks sibling loops that by default have an +; interleaved post-order traversal. + +define void @test_siblings(i1 %b1, i1 %b2, i1 %b3, i1 %b4, i1 %b5, i1 %b6, i1 %b7, i1 %b8, i1 %b9) { +; CHECK-LABEL: @test_siblings( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B9_INV:%.*]] = xor i1 [[B9:%.*]], true +; CHECK-NEXT: [[B6_INV:%.*]] = xor i1 [[B6:%.*]], true +; CHECK-NEXT: [[B2_INV:%.*]] = xor i1 [[B2:%.*]], true +; CHECK-NEXT: [[B8_INV:%.*]] = xor i1 [[B8:%.*]], true +; CHECK-NEXT: [[B5_INV:%.*]] = xor i1 [[B5:%.*]], true +; CHECK-NEXT: [[B3_INV:%.*]] = xor i1 [[B3:%.*]], true +; CHECK-NEXT: [[B4_INV:%.*]] = xor i1 [[B4:%.*]], true +; CHECK-NEXT: [[B1_INV:%.*]] = xor i1 [[B1:%.*]], true +; CHECK-NEXT: br i1 [[B1_INV]], label [[IF_ELSE:%.*]], label [[FLOW:%.*]] +; CHECK: if.else: +; CHECK-NEXT: br label [[FLOW]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ [[TMP0]], [[FLOW1:%.*]] ], [ [[B2]], [[IF_ELSE]] ], [ false, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP5:%.*]], [[FLOW1]] ], [ [[B2_INV]], [[IF_ELSE]] ], [ false, [[ENTRY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ false, [[FLOW1]] ], [ false, [[IF_ELSE]] ], [ true, [[ENTRY]] ] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP1_HEADER:%.*]], label [[FLOW1]] +; CHECK: loop1.header: +; CHECK-NEXT: br i1 [[B3_INV]], label [[LOOP1_BODY:%.*]], label [[FLOW2:%.*]] +; CHECK: Flow2: +; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ true, [[LOOP1_BODY]] ], [ [[TMP1]], [[LOOP1_HEADER]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[B5_INV]], [[LOOP1_BODY]] ], [ [[B3]], [[LOOP1_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP1_LATCH:%.*]], label [[FLOW3:%.*]] +; CHECK: loop1.latch: +; CHECK-NEXT: br label [[FLOW3]] +; CHECK: Flow1: +; CHECK-NEXT: [[TMP5]] = phi i1 [ [[TMP6:%.*]], [[FLOW3]] ], [ [[TMP1]], [[FLOW]] ] +; CHECK-NEXT: br i1 true, label [[FLOW4:%.*]], label [[FLOW]] +; CHECK: loop1.body: +; CHECK-NEXT: br label [[FLOW2]] +; CHECK: Flow3: +; CHECK-NEXT: [[TMP6]] = phi i1 [ false, [[LOOP1_LATCH]] ], [ [[TMP3]], [[FLOW2]] ] +; CHECK-NEXT: br label [[FLOW1]] +; CHECK: Flow4: +; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ false, [[FLOW5:%.*]] ], [ [[TMP5]], [[FLOW1]] ] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP2_HEADER:%.*]], label [[FLOW5]] +; CHECK: loop2.header: +; CHECK-NEXT: br i1 [[B6_INV]], label [[LOOP2_BODY:%.*]], label [[FLOW6:%.*]] +; CHECK: Flow5: +; CHECK-NEXT: [[TMP8:%.*]] = phi i1 [ [[TMP11:%.*]], [[FLOW7:%.*]] ], [ false, [[FLOW4]] ] +; CHECK-NEXT: br i1 true, label [[FLOW8:%.*]], label [[FLOW4]] +; CHECK: loop2.body: +; CHECK-NEXT: br label [[FLOW6]] +; CHECK: Flow6: +; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ true, [[LOOP2_BODY]] ], [ false, [[LOOP2_HEADER]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ [[B7:%.*]], [[LOOP2_BODY]] ], [ [[B6]], [[LOOP2_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP10]], label [[LOOP2_LATCH:%.*]], label [[FLOW7]] +; CHECK: loop2.latch: +; CHECK-NEXT: br label [[FLOW7]] +; CHECK: Flow7: +; CHECK-NEXT: [[TMP11]] = phi i1 [ false, [[LOOP2_LATCH]] ], [ [[TMP9]], [[FLOW6]] ] +; CHECK-NEXT: br label [[FLOW5]] +; CHECK: Flow8: +; CHECK-NEXT: [[TMP12:%.*]] = phi i1 [ false, [[FLOW10:%.*]] ], [ [[TMP0]], [[FLOW5]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i1 [ false, [[FLOW10]] ], [ [[TMP8]], [[FLOW5]] ] +; CHECK-NEXT: br i1 [[TMP13]], label [[LOOP3_HEADER:%.*]], label [[FLOW9:%.*]] +; CHECK: loop3.header: +; CHECK-NEXT: br label [[FLOW9]] +; CHECK: Flow9: +; CHECK-NEXT: [[TMP14:%.*]] = phi i1 [ true, [[LOOP3_HEADER]] ], [ [[TMP12]], [[FLOW8]] ] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOOP3_LATCH:%.*]], label [[FLOW10]] +; CHECK: loop3.latch: +; CHECK-NEXT: br label [[FLOW10]] +; CHECK: Flow10: +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[FLOW8]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br i1 %b1, label %loop1.header, label %if.else + +if.else: + br i1 %b2, label %loop3.latch, label %loop2.header + +loop1.header: + br i1 %b3, label %loop1.latch, label %loop1.body + +loop1.latch: + br i1 %b4, label %loop1.header, label %exit + +loop1.body: + br i1 %b5, label %loop2.header, label %loop1.latch + +loop2.header: + br i1 %b6, label %loop2.latch, label %loop2.body + +loop2.body: + br i1 %b7, label %loop2.latch, label %loop3.header + +loop2.latch: + br i1 %b8, label %loop2.header, label %exit + +loop3.header: + br label %loop3.latch + +loop3.latch: + br i1 %b9, label %loop3.header, label %exit + +exit: + ret void +}