Index: llvm/lib/Transforms/Scalar/StructurizeCFG.cpp =================================================================== --- llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -136,6 +136,31 @@ bool resultIsRememberedBlock() { return ResultIsRemembered; } }; +/// Performs a topology sort of a given region's nodes, while ensuring that +/// there is no outer loop node between any two inner loop nodes. +class RegionSorter { + LoopInfo *LI; + SmallVectorImpl &Out; + SmallVector POT; + SmallDenseMap LoopSizes; + SmallVector WorkList; + Loop *CurrentLoop; + + unsigned *lookupLoopSize(Loop *L) { + auto I = LoopSizes.find(L); + return I != LoopSizes.end() ? &I->second : nullptr; + } + + bool pushPathToAncestor(Loop *L, Loop *Ancestor); + + Loop *getAdjustedLoop(RegionNode *RN); + +public: + RegionSorter(LoopInfo *LI, SmallVectorImpl &O) : LI(LI), Out(O) {} + + void run(Region *R); +}; + /// Transforms the control flow graph on one single entry/exit region /// at a time. /// @@ -214,9 +239,6 @@ void orderNodes(); - Loop *getAdjustedLoop(RegionNode *RN); - unsigned getAdjustedLoopDepth(RegionNode *RN); - void analyzeLoops(RegionNode *N); Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); @@ -315,7 +337,7 @@ } /// Use the exit block to determine the loop if RN is a SubRegion. -Loop *StructurizeCFG::getAdjustedLoop(RegionNode *RN) { +Loop *RegionSorter::getAdjustedLoop(RegionNode *RN) { if (RN->isSubRegion()) { Region *SubRegion = RN->getNodeAs(); return LI->getLoopFor(SubRegion->getExit()); @@ -324,65 +346,101 @@ return LI->getLoopFor(RN->getEntry()); } -/// Use the exit block to determine the loop depth if RN is a SubRegion. -unsigned StructurizeCFG::getAdjustedLoopDepth(RegionNode *RN) { - if (RN->isSubRegion()) { - Region *SubR = RN->getNodeAs(); - return LI->getLoopDepth(SubR->getExit()); - } +/// Insert all the ancestors of L up to Ancestor. +bool RegionSorter::pushPathToAncestor(Loop *L, Loop *Ancestor) { + if (L != Ancestor) { + if (!L) + return false; - return LI->getLoopDepth(RN->getEntry()); + if (!pushPathToAncestor(L->getParentLoop(), Ancestor)) + return false; + } + // If the loop has unprocessed nodes associated to it, then push it to the + // WorkList. + if (LoopSizes.lookup(L)) + WorkList.push_back(L); + return true; } -/// Build up the general order of nodes -void StructurizeCFG::orderNodes() { - ReversePostOrderTraversal RPOT(ParentRegion); - SmallDenseMap LoopBlocks; +void RegionSorter::run(Region *R) { + for (RegionNode *RN : post_order(R)) { + POT.push_back(RN); - // The reverse post-order traversal of the list gives us an ordering close - // to what we want. The only problem with it is that sometimes backedges - // for outer loops will be visited before backedges for inner loops. - for (RegionNode *RN : RPOT) { + // Accumulate the number of nodes inside the region that belong to a loop. Loop *Loop = getAdjustedLoop(RN); - ++LoopBlocks[Loop]; + ++LoopSizes[Loop]; } + Out.resize(POT.size()); - unsigned CurrentLoopDepth = 0; - Loop *CurrentLoop = nullptr; - for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { - RegionNode *RN = cast(*I); - unsigned LoopDepth = getAdjustedLoopDepth(RN); - - if (is_contained(Order, *I)) - continue; - - if (LoopDepth < CurrentLoopDepth) { - // Make sure we have visited all blocks in this loop before moving back to - // the outer loop. + if (POT.empty()) + return; - auto LoopI = I; - while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) { - LoopI++; - if (getAdjustedLoop(cast(*LoopI)) == CurrentLoop) { - --BlockCount; - Order.push_back(*LoopI); + // The post-order traversal of the list gives us an ordering close to what we + // want. The only problem with it is that sometimes backedges for outer loops + // will be visited before backedges for inner loops. + + auto Begin = POT.rbegin(); + auto SkippedEnd = Begin; + auto I = Begin; + auto O = Out.rbegin(), OE = Out.rend(); + CurrentLoop = nullptr; + unsigned *CurrentLoopSize = lookupLoopSize(CurrentLoop); + while (true) { + // Keep processing loops while only going deeper (into inner loops). + while (true) { + assert(I != POT.rend()); + RegionNode *RN = *I++; + + Loop *L = getAdjustedLoop(RN); + if (L != CurrentLoop) { + // If CurrentLoop is not ancestor of L, then skip RN. + if (!L || !pushPathToAncestor(L->getParentLoop(), CurrentLoop)) { + // Push the node to the end of the "skipped" list. + *SkippedEnd++ = RN; + continue; } + + CurrentLoop = L; + CurrentLoopSize = lookupLoopSize(CurrentLoop); } + + assert(O != Out.rend()); + *O++ = RN; + + // If we have finished processing the current loop, then we are done here. + if (!--*CurrentLoopSize) + break; } - CurrentLoop = getAdjustedLoop(RN); - if (CurrentLoop) - LoopBlocks[CurrentLoop]--; + if (O == OE) + break; + + // The "skipped" list is actually located at the (reversed) beginning of the + // POT. This saves us the use of an intermediate container. + // Note that there is always enough room, for the skipped nodes, before the + // current location, as we have just passed at least that amount of nodes. + + // If we have any skipped nodes, then erase the gap between the end of the + // "skipped" list, and the current location. + if (SkippedEnd != Begin) { + POT.erase(I.base(), SkippedEnd.base()); + I = SkippedEnd = Begin = POT.rbegin(); + } - CurrentLoopDepth = LoopDepth; - Order.push_back(*I); + // Keep processing outer loops, in order (from inner most, to outer). + if (!WorkList.empty()) { + CurrentLoop = WorkList.pop_back_val(); + CurrentLoopSize = lookupLoopSize(CurrentLoop); + } } + assert(WorkList.empty()); + assert(SkippedEnd == Begin); +} - // This pass originally used a post-order traversal and then operated on - // the list in reverse. Now that we are using a reverse post-order traversal - // rather than re-working the whole pass to operate on the list in order, - // we just reverse the list and continue to operate on it in reverse. - std::reverse(Order.begin(), Order.end()); +/// Build up the general order of nodes +void StructurizeCFG::orderNodes() { + RegionSorter RS(LI, Order); + RS.run(ParentRegion); } /// Determine the end of the loops Index: llvm/test/Transforms/StructurizeCFG/pr41703.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/StructurizeCFG/pr41703.ll @@ -0,0 +1,158 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -structurizecfg %s -o - | FileCheck %s + +; This test used to produce incorrect code. + +define i1 @test(i32 %x) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_LOOP_HEADER:%.*]] +; CHECK: Flow12: +; CHECK-NEXT: br i1 [[TMP3:%.*]], label [[EXIT_TRUE:%.*]], label [[FLOW13:%.*]] +; CHECK: exit.true: +; CHECK-NEXT: br label [[FLOW13]] +; CHECK: Flow13: +; CHECK-NEXT: br i1 [[TMP2:%.*]], label [[NEWDEFAULT:%.*]], label [[FLOW14:%.*]] +; CHECK: NewDefault: +; CHECK-NEXT: br label [[EXIT_FALSE:%.*]] +; CHECK: Flow14: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[EXIT_FALSE]] ], [ true, [[FLOW13]] ] +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit.false: +; CHECK-NEXT: br label [[FLOW14]] +; CHECK: outer.loop.header: +; CHECK-NEXT: [[COND:%.*]] = call i1 @foo() +; CHECK-NEXT: br i1 [[COND]], label [[OUTER_LOOP_BODY:%.*]], label [[FLOW3:%.*]] +; CHECK: outer.loop.body: +; CHECK-NEXT: br label [[INNER_LOOP_HEADER:%.*]] +; CHECK: Flow3: +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP17:%.*]], [[FLOW11:%.*]] ], [ true, [[OUTER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP2]] = phi i1 [ [[TMP13:%.*]], [[FLOW11]] ], [ false, [[OUTER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP3]] = phi i1 [ false, [[FLOW11]] ], [ true, [[OUTER_LOOP_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP1]], label [[FLOW12:%.*]], label [[OUTER_LOOP_HEADER]] +; CHECK: inner.loop.header: +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP9:%.*]], [[FLOW4:%.*]] ], [ false, [[OUTER_LOOP_BODY]] ] +; CHECK-NEXT: [[COND1:%.*]] = call i1 @foo() +; CHECK-NEXT: br i1 [[COND1]], label [[INNER_LOOP_BODY:%.*]], label [[FLOW4]] +; CHECK: Flow6: +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[INNER_LOOP_LATCH:%.*]] ], [ true, [[LEAFBLOCK:%.*]] ] +; CHECK-NEXT: br label [[FLOW5:%.*]] +; CHECK: Flow7: +; CHECK-NEXT: br i1 [[TMP11:%.*]], label [[INNER_LOOP_END:%.*]], label [[FLOW8:%.*]] +; CHECK: inner.loop.end: +; CHECK-NEXT: br label [[FLOW8]] +; CHECK: inner.loop.body: +; CHECK-NEXT: [[COND2:%.*]] = call i1 @foo() +; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[COND2]], true +; CHECK-NEXT: br i1 [[TMP6]], label [[INNER_LOOP_BODY_ELSE:%.*]], label [[FLOW:%.*]] +; CHECK: inner.loop.body.else: +; CHECK-NEXT: br label [[FLOW]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ false, [[INNER_LOOP_BODY_ELSE]] ], [ true, [[INNER_LOOP_BODY]] ] +; CHECK-NEXT: br i1 [[TMP7]], label [[INNER_LOOP_BODY_THEN:%.*]], label [[INNER_LOOP_COND:%.*]] +; CHECK: inner.loop.body.then: +; CHECK-NEXT: br label [[INNER_LOOP_COND]] +; CHECK: Flow4: +; CHECK-NEXT: [[TMP8:%.*]] = phi i1 [ [[TMP18:%.*]], [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP9]] = phi i1 [ [[TMP19:%.*]], [[FLOW5]] ], [ [[TMP4]], [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ [[TMP20:%.*]], [[FLOW5]] ], [ false, [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP11]] = phi i1 [ false, [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP8]], label [[FLOW7:%.*]], label [[INNER_LOOP_HEADER]] +; CHECK: inner.loop.cond: +; CHECK-NEXT: br label [[NODEBLOCK:%.*]] +; CHECK: NodeBlock: +; CHECK-NEXT: [[PIVOT:%.*]] = icmp slt i32 [[X:%.*]], 1 +; CHECK-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK]], label [[FLOW5]] +; CHECK: Flow8: +; CHECK-NEXT: [[TMP12:%.*]] = phi i1 [ true, [[INNER_LOOP_END]] ], [ false, [[FLOW7]] ] +; CHECK-NEXT: br i1 [[TMP10]], label [[LEAFBLOCK1:%.*]], label [[FLOW9:%.*]] +; CHECK: LeafBlock1: +; CHECK-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: br i1 [[SWITCHLEAF2]], label [[INNER_LOOP_BREAK:%.*]], label [[FLOW10:%.*]] +; CHECK: LeafBlock: +; CHECK-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: br i1 [[SWITCHLEAF]], label [[INNER_LOOP_LATCH]], label [[FLOW6:%.*]] +; CHECK: Flow9: +; CHECK-NEXT: [[TMP13]] = phi i1 [ [[TMP15:%.*]], [[FLOW10]] ], [ [[TMP9]], [[FLOW8]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i1 [ [[TMP16:%.*]], [[FLOW10]] ], [ [[TMP12]], [[FLOW8]] ] +; CHECK-NEXT: br i1 [[TMP14]], label [[OUTER_LOOP_CLEANUP:%.*]], label [[FLOW11]] +; CHECK: inner.loop.break: +; CHECK-NEXT: br label [[FLOW10]] +; CHECK: Flow10: +; CHECK-NEXT: [[TMP15]] = phi i1 [ false, [[INNER_LOOP_BREAK]] ], [ true, [[LEAFBLOCK1]] ] +; CHECK-NEXT: [[TMP16]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP12]], [[LEAFBLOCK1]] ] +; CHECK-NEXT: br label [[FLOW9]] +; CHECK: outer.loop.cleanup: +; CHECK-NEXT: br label [[OUTER_LOOP_LATCH:%.*]] +; CHECK: Flow11: +; CHECK-NEXT: [[TMP17]] = phi i1 [ false, [[OUTER_LOOP_LATCH]] ], [ true, [[FLOW9]] ] +; CHECK-NEXT: br label [[FLOW3]] +; CHECK: outer.loop.latch: +; CHECK-NEXT: br label [[FLOW11]] +; CHECK: Flow5: +; CHECK-NEXT: [[TMP18]] = phi i1 [ [[TMP5]], [[FLOW6]] ], [ true, [[NODEBLOCK]] ] +; CHECK-NEXT: [[TMP19]] = phi i1 [ [[TMP5]], [[FLOW6]] ], [ [[TMP4]], [[NODEBLOCK]] ] +; CHECK-NEXT: [[TMP20]] = phi i1 [ false, [[FLOW6]] ], [ true, [[NODEBLOCK]] ] +; CHECK-NEXT: br label [[FLOW4]] +; CHECK: inner.loop.latch: +; CHECK-NEXT: br label [[FLOW6]] +; CHECK: exit: +; CHECK-NEXT: ret i1 [[TMP0]] +; +entry: + br label %outer.loop.header + +exit.true: ; preds = %outer.loop.header + br label %exit + +exit.false: ; preds = %inner.loop.cond + br label %exit + +outer.loop.header: ; preds = %outer.loop.latch, %entry + %cond = call i1 @foo() + br i1 %cond, label %outer.loop.body, label %exit.true + +outer.loop.body: ; preds = %outer.loop.header + br label %inner.loop.header + +inner.loop.header: ; preds = %inner.loop.latch, %outer.loop.body + %cond1 = call i1 @foo() + br i1 %cond1, label %inner.loop.body, label %inner.loop.end + +inner.loop.end: ; preds = %inner.loop.header + br label %outer.loop.cleanup + +inner.loop.body: ; preds = %inner.loop.header + %cond2 = call i1 @foo() + br i1 %cond2, label %inner.loop.body.then, label %inner.loop.body.else + +inner.loop.body.else: ; preds = %inner.loop.body + br label %inner.loop.cond + +inner.loop.body.then: ; preds = %inner.loop.body + br label %inner.loop.cond + +inner.loop.cond: ; preds = %inner.loop.body.then, %inner.loop.body.else + switch i32 %x, label %exit.false [ + i32 0, label %inner.loop.latch + i32 1, label %inner.loop.break + ] + +inner.loop.break: ; preds = %inner.loop.cond + br label %outer.loop.cleanup + +outer.loop.cleanup: ; preds = %inner.loop.break, %inner.loop.end + br label %outer.loop.latch + +outer.loop.latch: ; preds = %outer.loop.cleanup + br label %outer.loop.header + +inner.loop.latch: ; preds = %inner.loop.cond + br label %inner.loop.header + +exit: ; preds = %exit.false, %exit.true + %r = phi i1 [ true, %exit.true ], [ false, %exit.false ] + ret i1 %r +} + +declare i1 @foo()