Index: llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -792,9 +792,9 @@ /// original loop, multiple cloned sibling loops may be created. All of them /// are returned so that the newly introduced loop nest roots can be /// identified. -static Loop *buildClonedLoops(Loop &OrigL, ArrayRef ExitBlocks, - const ValueToValueMapTy &VMap, LoopInfo &LI, - SmallVectorImpl &NonChildClonedLoops) { +static void buildClonedLoops(Loop &OrigL, ArrayRef ExitBlocks, + const ValueToValueMapTy &VMap, LoopInfo &LI, + SmallVectorImpl &NonChildClonedLoops) { Loop *ClonedL = nullptr; auto *OrigPH = OrigL.getLoopPreheader(); @@ -887,6 +887,7 @@ } else { LI.addTopLevelLoop(ClonedL); } + NonChildClonedLoops.push_back(ClonedL); ClonedL->reserveBlocks(BlocksInClonedLoop.size()); // We don't want to just add the cloned loop blocks based on how we @@ -1040,9 +1041,6 @@ NonChildClonedLoops.push_back(cloneLoopNest( *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI)); } - - // Return the main cloned loop if any. - return ClonedL; } static void @@ -1608,8 +1606,7 @@ // different from the original structure due to the simplified CFG. This also // handles inserting all the cloned blocks into the correct loops. SmallVector NonChildClonedLoops; - Loop *ClonedL = - buildClonedLoops(L, ExitBlocks, VMap, LI, NonChildClonedLoops); + buildClonedLoops(L, ExitBlocks, VMap, LI, NonChildClonedLoops); // Delete anything that was made dead in the original loop due to // unswitching. @@ -1638,7 +1635,7 @@ // also need to cover any intervening loops. We add all of these loops to // a list and sort them by loop depth to achieve this without updating // unnecessary loops. - auto UpdateLCSSA = [&](Loop &UpdateL) { + auto UpdateLoop = [&](Loop &UpdateL) { #ifndef NDEBUG UpdateL.verifyLoop(); for (Loop *ChildL : UpdateL) { @@ -1647,51 +1644,41 @@ "Perturbed a child loop's LCSSA form!"); } #endif + // First build LCSSA for this loop so that we can preserve it when + // forming dedicated exits. We don't want to perturb some other loop's + // LCSSA while doing that CFG edit. formLCSSA(UpdateL, DT, &LI, nullptr); + + // For loops reached by this loop's original exit blocks we may + // introduced new, non-dedicated exits. At least try to re-form dedicated + // exits for these loops. This may fail if they couldn't have dedicated + // exits to start with. + formDedicatedExitBlocks(&UpdateL, &DT, &LI, /*PreserveLCSSA*/ true); }; // For non-child cloned loops and hoisted loops, we just need to update LCSSA // and we can do it in any order as they don't nest relative to each other. - for (Loop *UpdatedL : llvm::concat(NonChildClonedLoops, HoistedLoops)) - UpdateLCSSA(*UpdatedL); + // + // Also check if any of the loops we have updated have become top-level loops + // as that will necessitate widening the outer loop scope. + for (Loop *UpdatedL : + llvm::concat(NonChildClonedLoops, HoistedLoops)) { + UpdateLoop(*UpdatedL); + if (!UpdatedL->getParentLoop()) + OuterExitL = nullptr; + } + if (IsStillLoop) { + UpdateLoop(L); + if (!L.getParentLoop()) + OuterExitL = nullptr; + } // If the original loop had exit blocks, walk up through the outer most loop // of those exit blocks to update LCSSA and form updated dedicated exits. - if (OuterExitL != &L) { - SmallVector OuterLoops; - // We start with the cloned loop and the current loop if they are loops and - // move toward OuterExitL. Also, if either the cloned loop or the current - // loop have become top level loops we need to walk all the way out. - if (ClonedL) { - OuterLoops.push_back(ClonedL); - if (!ClonedL->getParentLoop()) - OuterExitL = nullptr; - } - if (IsStillLoop) { - OuterLoops.push_back(&L); - if (!L.getParentLoop()) - OuterExitL = nullptr; - } - // Grab all of the enclosing loops now. + if (OuterExitL != &L) for (Loop *OuterL = ParentL; OuterL != OuterExitL; OuterL = OuterL->getParentLoop()) - OuterLoops.push_back(OuterL); - - // Finally, update our list of outer loops. This is nicely ordered to work - // inside-out. - for (Loop *OuterL : OuterLoops) { - // First build LCSSA for this loop so that we can preserve it when - // forming dedicated exits. We don't want to perturb some other loop's - // LCSSA while doing that CFG edit. - UpdateLCSSA(*OuterL); - - // For loops reached by this loop's original exit blocks we may - // introduced new, non-dedicated exits. At least try to re-form dedicated - // exits for these loops. This may fail if they couldn't have dedicated - // exits to start with. - formDedicatedExitBlocks(OuterL, &DT, &LI, /*PreserveLCSSA*/ true); - } - } + UpdateLoop(*OuterL); #ifndef NDEBUG // Verify the entire loop structure to catch any incorrect updates before we Index: llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll =================================================================== --- llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll +++ llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll @@ -2562,3 +2562,104 @@ exit: ret void } + +; Non-trivial loop unswitching where there are two invariant conditions, but the +; second one is only in the cloned copy of the loop after unswitching. +define i32 @test24(i1* %ptr, i1 %cond1, i1 %cond2) { +; CHECK-LABEL: @test24( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split + +loop_begin: + br i1 %cond1, label %loop_a, label %loop_b + +loop_a: + br i1 %cond2, label %loop_a_a, label %loop_a_c +; The second unswitched condition. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br i1 %cond2, label %entry.split.us.split.us, label %entry.split.us.split + +loop_a_a: + call void @a() + br label %latch +; The 'loop_a_a' unswitched loop. +; +; CHECK: entry.split.us.split.us: +; CHECK-NEXT: br label %loop_begin.us.us +; +; CHECK: loop_begin.us.us: +; CHECK-NEXT: br label %loop_a.us.us +; +; CHECK: loop_a.us.us: +; CHECK-NEXT: br label %loop_a_a.us.us +; +; CHECK: loop_a_a.us.us: +; CHECK-NEXT: call void @a() +; CHECK-NEXT: br label %latch.us.us +; +; CHECK: latch.us.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us.us, label %loop_exit.split.us.split.us +; +; CHECK: loop_exit.split.us.split.us: +; CHECK-NEXT: br label %loop_exit.split + +loop_a_c: + call void @c() + br label %latch +; The 'loop_a_c' unswitched loop. +; +; CHECK: entry.split.us.split: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: br label %loop_a.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: br label %loop_a_c.us +; +; CHECK: loop_a_c.us: +; CHECK-NEXT: call void @c() +; CHECK-NEXT: br label %latch +; +; CHECK: latch.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us.split +; +; CHECK: loop_exit.split.us.split: +; CHECK-NEXT: br label %loop_exit.split + +loop_b: + call void @b() + br label %latch +; The 'loop_b' unswitched loop. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: br label %loop_b +; +; CHECK: loop_b: +; CHECK-NEXT: call void @b() +; CHECK-NEXT: br label %latch +; +; CHECK: latch: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: br label %loop_exit + +latch: + %v = load i1, i1* %ptr + br i1 %v, label %loop_begin, label %loop_exit + +loop_exit: + ret i32 0 +; CHECK: loop_exit: +; CHECK-NEXT: ret +} \ No newline at end of file