Index: llvm/lib/Transforms/Scalar/StructurizeCFG.cpp =================================================================== --- llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -215,7 +215,6 @@ void orderNodes(); Loop *getAdjustedLoop(RegionNode *RN); - unsigned getAdjustedLoopDepth(RegionNode *RN); void analyzeLoops(RegionNode *N); @@ -324,65 +323,108 @@ return LI->getLoopFor(RN->getEntry()); } -/// Use the exit block to determine the loop depth if RN is a SubRegion. -unsigned StructurizeCFG::getAdjustedLoopDepth(RegionNode *RN) { - if (RN->isSubRegion()) { - Region *SubR = RN->getNodeAs(); - return LI->getLoopDepth(SubR->getExit()); - } - - return LI->getLoopDepth(RN->getEntry()); -} - -/// Build up the general order of nodes +/// Build up the general order of nodes, by performing a topology sort of the +/// parent region's nodes, while ensuring that there is no outer loop node +/// between any two inner loop nodes. void StructurizeCFG::orderNodes() { - ReversePostOrderTraversal RPOT(ParentRegion); - SmallDenseMap LoopBlocks; + SmallVector POT; + SmallDenseMap LoopSizes; + for (RegionNode *RN : post_order(ParentRegion)) { + POT.push_back(RN); - // The reverse post-order traversal of the list gives us an ordering close - // to what we want. The only problem with it is that sometimes backedges - // for outer loops will be visited before backedges for inner loops. - for (RegionNode *RN : RPOT) { + // Accumulate the number of nodes inside the region that belong to a loop. Loop *Loop = getAdjustedLoop(RN); - ++LoopBlocks[Loop]; + ++LoopSizes[Loop]; } - - unsigned CurrentLoopDepth = 0; + // A quick exit for the case where all nodes belong to the same loop (or no + // loop at all). + if (LoopSizes.size() <= 1U) { + Order.assign(POT.begin(), POT.end()); + return; + } + Order.resize(POT.size()); + + // The post-order traversal of the list gives us an ordering close to what we + // want. The only problem with it is that sometimes backedges for outer loops + // will be visited before backedges for inner loops. So now we fix that by + // inserting the nodes in order, while making sure that encountered inner loop + // are complete before their parents (outer loops). + + SmallVector WorkList; + // Get the size of the outermost region (the nodes that don't belong to any + // loop inside ParentRegion). + unsigned ZeroCurrentLoopSize = 0U; + auto LSI = LoopSizes.find(nullptr); + unsigned *CurrentLoopSize = + LSI != LoopSizes.end() ? &LSI->second : &ZeroCurrentLoopSize; Loop *CurrentLoop = nullptr; - for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { - RegionNode *RN = cast(*I); - unsigned LoopDepth = getAdjustedLoopDepth(RN); - if (is_contained(Order, *I)) - continue; + // The "skipped" list is actually located at the (reversed) beginning of the + // POT. This saves us the use of an intermediate container. + // Note that there is always enough room, for the skipped nodes, before the + // current location, as we have just passed at least that amount of nodes. + + auto Begin = POT.rbegin(); + auto I = Begin, SkippedEnd = Begin; + auto O = Order.rbegin(), OE = Order.rend(); + while (O != OE) { + // If we have any skipped nodes, then erase the gap between the end of the + // "skipped" list, and the current location. + if (SkippedEnd != Begin) { + POT.erase(I.base(), SkippedEnd.base()); + I = SkippedEnd = Begin = POT.rbegin(); + } - if (LoopDepth < CurrentLoopDepth) { - // Make sure we have visited all blocks in this loop before moving back to - // the outer loop. + // Keep processing outer loops, in order (from inner most, to outer). + if (!WorkList.empty()) { + CurrentLoop = WorkList.pop_back_val(); + CurrentLoopSize = &LoopSizes.find(CurrentLoop)->second; + } - auto LoopI = I; - while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) { - LoopI++; - if (getAdjustedLoop(cast(*LoopI)) == CurrentLoop) { - --BlockCount; - Order.push_back(*LoopI); + // Keep processing loops while only going deeper (into inner loops). + do { + assert(I != POT.rend()); + RegionNode *RN = *I++; + + Loop *L = getAdjustedLoop(RN); + if (L != CurrentLoop) { + // If L is a loop inside CurrentLoop, then CurrentLoop must be the + // parent of L. + // To prove this, we will contradict the opposite: + // Let P be the parent of L. If CurrentLoop is the parent of P, then + // the header of P must have been processed already, as it must + // dominate the other blocks of P (otherwise P is an irreducible loop, + // and won't be recorded in the LoopInfo), especially L (inside). But + // then CurrentLoop must have been updated to P at the time of + // processing the header of P, which conflicts with the assumption + // that CurrentLoop is not P. + + // If L is not a loop inside CurrentLoop, then skip RN. + if (!L || L->getParentLoop() != CurrentLoop) { + // Skip the node by pushing it to the end of the "skipped" list. + *SkippedEnd++ = RN; + continue; } + + // If we still haven't processed all the nodes that belong to + // CurrentLoop, then make sure we come back later, to finish the job, by + // pushing it to the WorkList. + if (*CurrentLoopSize) + WorkList.push_back(CurrentLoop); + + CurrentLoop = L; + CurrentLoopSize = &LoopSizes.find(CurrentLoop)->second; } - } - CurrentLoop = getAdjustedLoop(RN); - if (CurrentLoop) - LoopBlocks[CurrentLoop]--; + assert(O != OE); + *O++ = RN; - CurrentLoopDepth = LoopDepth; - Order.push_back(*I); + // If we have finished processing the current loop, then we are done here. + --*CurrentLoopSize; + } while (*CurrentLoopSize); } - - // This pass originally used a post-order traversal and then operated on - // the list in reverse. Now that we are using a reverse post-order traversal - // rather than re-working the whole pass to operate on the list in order, - // we just reverse the list and continue to operate on it in reverse. - std::reverse(Order.begin(), Order.end()); + assert(WorkList.empty()); + assert(SkippedEnd == Begin); } /// Determine the end of the loops Index: llvm/test/Transforms/StructurizeCFG/nested-loop-order-bug.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/StructurizeCFG/nested-loop-order-bug.ll @@ -0,0 +1,160 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -structurizecfg %s -o - | FileCheck %s + +; This test used to produce incorrect code. +; For example %outer.loop.body used to branched to %inner.loop.end +; (instead of %inner.loop.header). + +define i1 @test(i32 %x) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_LOOP_HEADER:%.*]] +; CHECK: Flow12: +; CHECK-NEXT: br i1 [[TMP3:%.*]], label [[EXIT_TRUE:%.*]], label [[FLOW13:%.*]] +; CHECK: exit.true: +; CHECK-NEXT: br label [[FLOW13]] +; CHECK: Flow13: +; CHECK-NEXT: br i1 [[TMP2:%.*]], label [[NEWDEFAULT:%.*]], label [[FLOW14:%.*]] +; CHECK: NewDefault: +; CHECK-NEXT: br label [[EXIT_FALSE:%.*]] +; CHECK: Flow14: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[EXIT_FALSE]] ], [ true, [[FLOW13]] ] +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit.false: +; CHECK-NEXT: br label [[FLOW14]] +; CHECK: outer.loop.header: +; CHECK-NEXT: [[COND:%.*]] = call i1 @foo() +; CHECK-NEXT: br i1 [[COND]], label [[OUTER_LOOP_BODY:%.*]], label [[FLOW3:%.*]] +; CHECK: outer.loop.body: +; CHECK-NEXT: br label [[INNER_LOOP_HEADER:%.*]] +; CHECK: Flow3: +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP17:%.*]], [[FLOW11:%.*]] ], [ true, [[OUTER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP2]] = phi i1 [ [[TMP13:%.*]], [[FLOW11]] ], [ false, [[OUTER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP3]] = phi i1 [ false, [[FLOW11]] ], [ true, [[OUTER_LOOP_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP1]], label [[FLOW12:%.*]], label [[OUTER_LOOP_HEADER]] +; CHECK: inner.loop.header: +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP9:%.*]], [[FLOW4:%.*]] ], [ false, [[OUTER_LOOP_BODY]] ] +; CHECK-NEXT: [[COND1:%.*]] = call i1 @foo() +; CHECK-NEXT: br i1 [[COND1]], label [[INNER_LOOP_BODY:%.*]], label [[FLOW4]] +; CHECK: Flow6: +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[INNER_LOOP_LATCH:%.*]] ], [ true, [[LEAFBLOCK:%.*]] ] +; CHECK-NEXT: br label [[FLOW5:%.*]] +; CHECK: Flow7: +; CHECK-NEXT: br i1 [[TMP11:%.*]], label [[INNER_LOOP_END:%.*]], label [[FLOW8:%.*]] +; CHECK: inner.loop.end: +; CHECK-NEXT: br label [[FLOW8]] +; CHECK: inner.loop.body: +; CHECK-NEXT: [[COND2:%.*]] = call i1 @foo() +; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[COND2]], true +; CHECK-NEXT: br i1 [[TMP6]], label [[INNER_LOOP_BODY_ELSE:%.*]], label [[FLOW:%.*]] +; CHECK: inner.loop.body.else: +; CHECK-NEXT: br label [[FLOW]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ false, [[INNER_LOOP_BODY_ELSE]] ], [ true, [[INNER_LOOP_BODY]] ] +; CHECK-NEXT: br i1 [[TMP7]], label [[INNER_LOOP_BODY_THEN:%.*]], label [[INNER_LOOP_COND:%.*]] +; CHECK: inner.loop.body.then: +; CHECK-NEXT: br label [[INNER_LOOP_COND]] +; CHECK: Flow4: +; CHECK-NEXT: [[TMP8:%.*]] = phi i1 [ [[TMP18:%.*]], [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP9]] = phi i1 [ [[TMP19:%.*]], [[FLOW5]] ], [ [[TMP4]], [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ [[TMP20:%.*]], [[FLOW5]] ], [ false, [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP11]] = phi i1 [ false, [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP8]], label [[FLOW7:%.*]], label [[INNER_LOOP_HEADER]] +; CHECK: inner.loop.cond: +; CHECK-NEXT: br label [[NODEBLOCK:%.*]] +; CHECK: NodeBlock: +; CHECK-NEXT: [[PIVOT:%.*]] = icmp slt i32 [[X:%.*]], 1 +; CHECK-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK]], label [[FLOW5]] +; CHECK: Flow8: +; CHECK-NEXT: [[TMP12:%.*]] = phi i1 [ true, [[INNER_LOOP_END]] ], [ false, [[FLOW7]] ] +; CHECK-NEXT: br i1 [[TMP10]], label [[LEAFBLOCK1:%.*]], label [[FLOW9:%.*]] +; CHECK: LeafBlock1: +; CHECK-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: br i1 [[SWITCHLEAF2]], label [[INNER_LOOP_BREAK:%.*]], label [[FLOW10:%.*]] +; CHECK: LeafBlock: +; CHECK-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: br i1 [[SWITCHLEAF]], label [[INNER_LOOP_LATCH]], label [[FLOW6:%.*]] +; CHECK: Flow9: +; CHECK-NEXT: [[TMP13]] = phi i1 [ [[TMP15:%.*]], [[FLOW10]] ], [ [[TMP9]], [[FLOW8]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i1 [ [[TMP16:%.*]], [[FLOW10]] ], [ [[TMP12]], [[FLOW8]] ] +; CHECK-NEXT: br i1 [[TMP14]], label [[OUTER_LOOP_CLEANUP:%.*]], label [[FLOW11]] +; CHECK: inner.loop.break: +; CHECK-NEXT: br label [[FLOW10]] +; CHECK: Flow10: +; CHECK-NEXT: [[TMP15]] = phi i1 [ false, [[INNER_LOOP_BREAK]] ], [ true, [[LEAFBLOCK1]] ] +; CHECK-NEXT: [[TMP16]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP12]], [[LEAFBLOCK1]] ] +; CHECK-NEXT: br label [[FLOW9]] +; CHECK: outer.loop.cleanup: +; CHECK-NEXT: br label [[OUTER_LOOP_LATCH:%.*]] +; CHECK: Flow11: +; CHECK-NEXT: [[TMP17]] = phi i1 [ false, [[OUTER_LOOP_LATCH]] ], [ true, [[FLOW9]] ] +; CHECK-NEXT: br label [[FLOW3]] +; CHECK: outer.loop.latch: +; CHECK-NEXT: br label [[FLOW11]] +; CHECK: Flow5: +; CHECK-NEXT: [[TMP18]] = phi i1 [ [[TMP5]], [[FLOW6]] ], [ true, [[NODEBLOCK]] ] +; CHECK-NEXT: [[TMP19]] = phi i1 [ [[TMP5]], [[FLOW6]] ], [ [[TMP4]], [[NODEBLOCK]] ] +; CHECK-NEXT: [[TMP20]] = phi i1 [ false, [[FLOW6]] ], [ true, [[NODEBLOCK]] ] +; CHECK-NEXT: br label [[FLOW4]] +; CHECK: inner.loop.latch: +; CHECK-NEXT: br label [[FLOW6]] +; CHECK: exit: +; CHECK-NEXT: ret i1 [[TMP0]] +; +entry: + br label %outer.loop.header + +exit.true: ; preds = %outer.loop.header + br label %exit + +exit.false: ; preds = %inner.loop.cond + br label %exit + +outer.loop.header: ; preds = %outer.loop.latch, %entry + %cond = call i1 @foo() + br i1 %cond, label %outer.loop.body, label %exit.true + +outer.loop.body: ; preds = %outer.loop.header + br label %inner.loop.header + +inner.loop.header: ; preds = %inner.loop.latch, %outer.loop.body + %cond1 = call i1 @foo() + br i1 %cond1, label %inner.loop.body, label %inner.loop.end + +inner.loop.end: ; preds = %inner.loop.header + br label %outer.loop.cleanup + +inner.loop.body: ; preds = %inner.loop.header + %cond2 = call i1 @foo() + br i1 %cond2, label %inner.loop.body.then, label %inner.loop.body.else + +inner.loop.body.else: ; preds = %inner.loop.body + br label %inner.loop.cond + +inner.loop.body.then: ; preds = %inner.loop.body + br label %inner.loop.cond + +inner.loop.cond: ; preds = %inner.loop.body.then, %inner.loop.body.else + switch i32 %x, label %exit.false [ + i32 0, label %inner.loop.latch + i32 1, label %inner.loop.break + ] + +inner.loop.break: ; preds = %inner.loop.cond + br label %outer.loop.cleanup + +outer.loop.cleanup: ; preds = %inner.loop.break, %inner.loop.end + br label %outer.loop.latch + +outer.loop.latch: ; preds = %outer.loop.cleanup + br label %outer.loop.header + +inner.loop.latch: ; preds = %inner.loop.cond + br label %inner.loop.header + +exit: ; preds = %exit.false, %exit.true + %r = phi i1 [ true, %exit.true ], [ false, %exit.false ] + ret i1 %r +} + +declare i1 @foo()