Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -397,6 +397,12 @@ // the other we have is `LoopInstSimplify`. LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging); + // Simplify the loop body. We do this initially to clean up after other loop + // passes run, either when iterating on a loop or on inner loops with + // implications on the outer loop. + LPM1.addPass(LoopInstSimplifyPass()); + LPM1.addPass(LoopSimplifyCFGPass()); + // Rotate Loop - disable header duplication at -Oz LPM1.addPass(LoopRotatePass(Level != Oz)); LPM1.addPass(LICMPass()); Index: llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -1466,7 +1466,7 @@ static bool unswitchInvariantBranch( Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, - function_ref)> NonTrivialUnswitchCB) { + function_ref)> UnswitchCB) { assert(BI.isConditional() && "Can only unswitch a conditional branch!"); assert(L.isLoopInvariant(BI.getCondition()) && "Can only unswitch an invariant branch condition!"); @@ -1706,7 +1706,7 @@ for (Loop *UpdatedL : llvm::concat(NonChildClonedLoops, HoistedLoops)) if (UpdatedL->getParentLoop() == ParentL) SibLoops.push_back(UpdatedL); - NonTrivialUnswitchCB(IsStillLoop, SibLoops); + UnswitchCB(IsStillLoop, SibLoops); ++NumBranches; return true; @@ -1754,23 +1754,27 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, TargetTransformInfo &TTI, bool NonTrivial, - function_ref)> NonTrivialUnswitchCB) { + function_ref)> UnswitchCB) { assert(L.isRecursivelyLCSSAForm(DT, LI) && "Loops must be in LCSSA form before unswitching."); - bool Changed = false; // Must be in loop simplified form: we need a preheader and dedicated exits. if (!L.isLoopSimplifyForm()) return false; // Try trivial unswitch first before loop over other basic blocks in the loop. - Changed |= unswitchAllTrivialConditions(L, DT, LI); + if (unswitchAllTrivialConditions(L, DT, LI)) { + // If we unswitched successfully we will want to clean up the loop before + // processing it further so just mark it as unswitched and return. + UnswitchCB(/*CurrentLoopValid*/ true, {}); + return true; + } // If we're not doing non-trivial unswitching, we're done. We both accept // a parameter but also check a local flag that can be used for testing // a debugging. if (!NonTrivial && !EnableNonTrivialUnswitch) - return Changed; + return false; // Collect all remaining invariant branch conditions within this loop (as // opposed to an inner loop which would be handled when visiting that inner @@ -1785,7 +1789,7 @@ // If we didn't find any candidates, we're done. if (UnswitchCandidates.empty()) - return Changed; + return false; // Check if there are irreducible CFG cycles in this loop. If so, we cannot // easily unswitch non-trivial edges out of the loop. Doing so might turn the @@ -1796,7 +1800,7 @@ LoopBlocksRPO RPOT(&L); RPOT.perform(&LI); if (containsIrreducibleCFG(RPOT, LI)) - return Changed; + return false; LLVM_DEBUG( dbgs() << "Considering " << UnswitchCandidates.size() @@ -1824,10 +1828,10 @@ continue; if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB)) - return Changed; + return false; if (auto CS = CallSite(&I)) if (CS.isConvergent() || CS.cannotDuplicate()) - return Changed; + return false; Cost += TTI.getUserCost(&I); } @@ -1898,18 +1902,17 @@ } } - if (BestUnswitchCost < UnswitchThreshold) { - LLVM_DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = " - << BestUnswitchCost << ") branch: " << *BestUnswitchTI - << "\n"); - Changed |= unswitchInvariantBranch(L, cast(*BestUnswitchTI), DT, - LI, AC, NonTrivialUnswitchCB); - } else { + if (BestUnswitchCost >= UnswitchThreshold) { LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << BestUnswitchCost << "\n"); + return false; } - return Changed; + LLVM_DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = " + << BestUnswitchCost << ") branch: " << *BestUnswitchTI + << "\n"); + return unswitchInvariantBranch(L, cast(*BestUnswitchTI), DT, LI, + AC, UnswitchCB); } PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, @@ -1925,10 +1928,11 @@ // after it has been deleted. std::string LoopName = L.getName(); - auto NonTrivialUnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid, - ArrayRef NewLoops) { + auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid, + ArrayRef NewLoops) { // If we did a non-trivial unswitch, we have added new (cloned) loops. - U.addSiblingLoops(NewLoops); + if (!NewLoops.empty()) + U.addSiblingLoops(NewLoops); // If the current loop remains valid, we should revisit it to catch any // other unswitch opportunities. Otherwise, we need to mark it as deleted. @@ -1939,7 +1943,7 @@ }; if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial, - NonTrivialUnswitchCB)) + UnswitchCB)) return PreservedAnalyses::all(); // Historically this pass has had issues with the dominator tree so verify it @@ -1987,8 +1991,8 @@ auto &AC = getAnalysis().getAssumptionCache(F); auto &TTI = getAnalysis().getTTI(F); - auto NonTrivialUnswitchCB = [&L, &LPM](bool CurrentLoopValid, - ArrayRef NewLoops) { + auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, + ArrayRef NewLoops) { // If we did a non-trivial unswitch, we have added new (cloned) loops. for (auto *NewL : NewLoops) LPM.addLoop(*NewL); @@ -2003,7 +2007,7 @@ }; bool Changed = - unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, NonTrivialUnswitchCB); + unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB); // If anything was unswitched, also clear any cached information about this // loop. Index: llvm/test/Other/new-pm-defaults.ll =================================================================== --- llvm/test/Other/new-pm-defaults.ll +++ llvm/test/Other/new-pm-defaults.ll @@ -145,6 +145,8 @@ ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Starting Loop pass manager run. +; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass +; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy Index: llvm/test/Other/new-pm-thinlto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-defaults.ll +++ llvm/test/Other/new-pm-thinlto-defaults.ll @@ -129,6 +129,8 @@ ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Starting Loop pass manager run. +; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass +; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy Index: llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll @@ -0,0 +1,41 @@ +; RUN: opt -passes='loop(loop-instsimplify,simplify-cfg,unswitch),verify' -S < %s | FileCheck %s + +declare void @some_func() noreturn + +define i32 @test1(i32* %var, i1 %cond1, i1 %cond2) { +; CHECK-LABEL: @test1( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit.split +; +; CHECK: entry.split: +; CHECK-NEXT: br i1 %{{.*}}, label %entry.split.split, label %loop_exit +; +; CHECK: entry.split.split: +; CHECK-NEXT: br label %do_something + +loop_begin: + br i1 %cond1, label %continue, label %loop_exit ; first trivial condition + +continue: + %var_val = load i32, i32* %var + %var_cond = trunc i32 %var_val to i1 + %maybe_cond = select i1 %cond1, i1 %cond2, i1 %var_cond + br i1 %maybe_cond, label %do_something, label %loop_exit ; second trivial condition + +do_something: + call void @some_func() noreturn nounwind + br label %loop_begin +; CHECK: do_something: +; CHECK-NEXT: call +; CHECK-NEXT: br label %do_something + +loop_exit: + ret i32 0 +; CHECK: loop_exit: +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: ret +} \ No newline at end of file