Index: llvm/trunk/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/trunk/lib/Passes/PassBuilder.cpp +++ llvm/trunk/lib/Passes/PassBuilder.cpp @@ -390,13 +390,21 @@ // Add the primary loop simplification pipeline. // FIXME: Currently this is split into two loop pass pipelines because we run - // some function passes in between them. These can and should be replaced by - // loop pass equivalenst but those aren't ready yet. Specifically, - // `SimplifyCFGPass` and `InstCombinePass` are used. We have - // `LoopSimplifyCFGPass` which isn't yet powerful enough, and the closest to - // the other we have is `LoopInstSimplify`. + // some function passes in between them. These can and should be removed + // and/or replaced by scheduling the loop pass equivalents in the correct + // positions. But those equivalent passes aren't powerful enough yet. + // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still + // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to + // fully replace `SimplifyCFGPass`, and the closest to the other we have is + // `LoopInstSimplify`. LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging); + // Simplify the loop body. We do this initially to clean up after other loop + // passes run, either when iterating on a loop or on inner loops with + // implications on the outer loop. + LPM1.addPass(LoopInstSimplifyPass()); + LPM1.addPass(LoopSimplifyCFGPass()); + // Rotate Loop - disable header duplication at -Oz LPM1.addPass(LoopRotatePass(Level != Oz)); LPM1.addPass(LICMPass()); Index: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -138,10 +138,10 @@ cl::Hidden, cl::desc("Disable shrink-wrap library calls")); -static cl::opt - EnableSimpleLoopUnswitch("enable-simple-loop-unswitch", cl::init(false), - cl::Hidden, - cl::desc("Enable the simple loop unswitch pass.")); +static cl::opt EnableSimpleLoopUnswitch( + "enable-simple-loop-unswitch", cl::init(false), cl::Hidden, + cl::desc("Enable the simple loop unswitch pass. Also enables independent " + "cleanup passes integrated into the loop pass manager pipeline.")); static cl::opt EnableGVNSink( "enable-gvn-sink", cl::init(false), cl::Hidden, @@ -335,6 +335,15 @@ MPM.add(createTailCallEliminationPass()); // Eliminate tail calls MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions + + // Begin the loop pass pipeline. + if (EnableSimpleLoopUnswitch) { + // The simple loop unswitch pass relies on separate cleanup passes. Schedule + // them first so when we re-process a loop they run before other loop + // passes. + MPM.add(createLoopInstSimplifyPass()); + MPM.add(createLoopSimplifyCFGPass()); + } // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); MPM.add(createLICMPass()); // Hoist loop invariants @@ -342,20 +351,26 @@ MPM.add(createSimpleLoopUnswitchLegacyPass()); else MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + // FIXME: We break the loop pass pipeline here in order to do full + // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the + // need for this. MPM.add(createCFGSimplificationPass()); addInstructionCombiningPass(MPM); + // We resume loop passes creating a second loop pipeline here. MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. addExtensionsToPM(EP_LateLoopOptimizations, MPM); MPM.add(createLoopDeletionPass()); // Delete dead loops if (EnableLoopInterchange) { + // FIXME: These are function passes and break the loop pass pipeline. MPM.add(createLoopInterchangePass()); // Interchange loops MPM.add(createCFGSimplificationPass()); } if (!DisableUnrollLoops) MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops addExtensionsToPM(EP_LoopOptimizerEnd, MPM); + // This ends the loop pass pipelines. if (OptLevel > 1) { MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds Index: llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -1466,7 +1466,7 @@ static bool unswitchInvariantBranch( Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, - function_ref)> NonTrivialUnswitchCB) { + function_ref)> UnswitchCB) { assert(BI.isConditional() && "Can only unswitch a conditional branch!"); assert(L.isLoopInvariant(BI.getCondition()) && "Can only unswitch an invariant branch condition!"); @@ -1706,7 +1706,7 @@ for (Loop *UpdatedL : llvm::concat(NonChildClonedLoops, HoistedLoops)) if (UpdatedL->getParentLoop() == ParentL) SibLoops.push_back(UpdatedL); - NonTrivialUnswitchCB(IsStillLoop, SibLoops); + UnswitchCB(IsStillLoop, SibLoops); ++NumBranches; return true; @@ -1754,23 +1754,27 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, TargetTransformInfo &TTI, bool NonTrivial, - function_ref)> NonTrivialUnswitchCB) { + function_ref)> UnswitchCB) { assert(L.isRecursivelyLCSSAForm(DT, LI) && "Loops must be in LCSSA form before unswitching."); - bool Changed = false; // Must be in loop simplified form: we need a preheader and dedicated exits. if (!L.isLoopSimplifyForm()) return false; // Try trivial unswitch first before loop over other basic blocks in the loop. - Changed |= unswitchAllTrivialConditions(L, DT, LI); + if (unswitchAllTrivialConditions(L, DT, LI)) { + // If we unswitched successfully we will want to clean up the loop before + // processing it further so just mark it as unswitched and return. + UnswitchCB(/*CurrentLoopValid*/ true, {}); + return true; + } // If we're not doing non-trivial unswitching, we're done. We both accept // a parameter but also check a local flag that can be used for testing // a debugging. if (!NonTrivial && !EnableNonTrivialUnswitch) - return Changed; + return false; // Collect all remaining invariant branch conditions within this loop (as // opposed to an inner loop which would be handled when visiting that inner @@ -1785,7 +1789,7 @@ // If we didn't find any candidates, we're done. if (UnswitchCandidates.empty()) - return Changed; + return false; // Check if there are irreducible CFG cycles in this loop. If so, we cannot // easily unswitch non-trivial edges out of the loop. Doing so might turn the @@ -1796,7 +1800,7 @@ LoopBlocksRPO RPOT(&L); RPOT.perform(&LI); if (containsIrreducibleCFG(RPOT, LI)) - return Changed; + return false; LLVM_DEBUG( dbgs() << "Considering " << UnswitchCandidates.size() @@ -1824,10 +1828,10 @@ continue; if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB)) - return Changed; + return false; if (auto CS = CallSite(&I)) if (CS.isConvergent() || CS.cannotDuplicate()) - return Changed; + return false; Cost += TTI.getUserCost(&I); } @@ -1898,18 +1902,17 @@ } } - if (BestUnswitchCost < UnswitchThreshold) { - LLVM_DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = " - << BestUnswitchCost << ") branch: " << *BestUnswitchTI - << "\n"); - Changed |= unswitchInvariantBranch(L, cast(*BestUnswitchTI), DT, - LI, AC, NonTrivialUnswitchCB); - } else { + if (BestUnswitchCost >= UnswitchThreshold) { LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << BestUnswitchCost << "\n"); + return false; } - return Changed; + LLVM_DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = " + << BestUnswitchCost << ") branch: " << *BestUnswitchTI + << "\n"); + return unswitchInvariantBranch(L, cast(*BestUnswitchTI), DT, LI, + AC, UnswitchCB); } PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, @@ -1925,10 +1928,11 @@ // after it has been deleted. std::string LoopName = L.getName(); - auto NonTrivialUnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid, - ArrayRef NewLoops) { + auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid, + ArrayRef NewLoops) { // If we did a non-trivial unswitch, we have added new (cloned) loops. - U.addSiblingLoops(NewLoops); + if (!NewLoops.empty()) + U.addSiblingLoops(NewLoops); // If the current loop remains valid, we should revisit it to catch any // other unswitch opportunities. Otherwise, we need to mark it as deleted. @@ -1939,7 +1943,7 @@ }; if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial, - NonTrivialUnswitchCB)) + UnswitchCB)) return PreservedAnalyses::all(); // Historically this pass has had issues with the dominator tree so verify it @@ -1987,8 +1991,8 @@ auto &AC = getAnalysis().getAssumptionCache(F); auto &TTI = getAnalysis().getTTI(F); - auto NonTrivialUnswitchCB = [&L, &LPM](bool CurrentLoopValid, - ArrayRef NewLoops) { + auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, + ArrayRef NewLoops) { // If we did a non-trivial unswitch, we have added new (cloned) loops. for (auto *NewL : NewLoops) LPM.addLoop(*NewL); @@ -2003,7 +2007,7 @@ }; bool Changed = - unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, NonTrivialUnswitchCB); + unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB); // If anything was unswitched, also clear any cached information about this // loop. Index: llvm/trunk/test/Other/new-pm-defaults.ll =================================================================== --- llvm/trunk/test/Other/new-pm-defaults.ll +++ llvm/trunk/test/Other/new-pm-defaults.ll @@ -145,6 +145,8 @@ ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Starting Loop pass manager run. +; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass +; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy Index: llvm/trunk/test/Other/new-pm-thinlto-defaults.ll =================================================================== --- llvm/trunk/test/Other/new-pm-thinlto-defaults.ll +++ llvm/trunk/test/Other/new-pm-thinlto-defaults.ll @@ -129,6 +129,8 @@ ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Starting Loop pass manager run. +; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass +; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy Index: llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll =================================================================== --- llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll +++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll @@ -0,0 +1,41 @@ +; RUN: opt -passes='loop(loop-instsimplify,simplify-cfg,unswitch),verify' -S < %s | FileCheck %s + +declare void @some_func() noreturn + +define i32 @test1(i32* %var, i1 %cond1, i1 %cond2) { +; CHECK-LABEL: @test1( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit.split +; +; CHECK: entry.split: +; CHECK-NEXT: br i1 %{{.*}}, label %entry.split.split, label %loop_exit +; +; CHECK: entry.split.split: +; CHECK-NEXT: br label %do_something + +loop_begin: + br i1 %cond1, label %continue, label %loop_exit ; first trivial condition + +continue: + %var_val = load i32, i32* %var + %var_cond = trunc i32 %var_val to i1 + %maybe_cond = select i1 %cond1, i1 %cond2, i1 %var_cond + br i1 %maybe_cond, label %do_something, label %loop_exit ; second trivial condition + +do_something: + call void @some_func() noreturn nounwind + br label %loop_begin +; CHECK: do_something: +; CHECK-NEXT: call +; CHECK-NEXT: br label %do_something + +loop_exit: + ret i32 0 +; CHECK: loop_exit: +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: ret +} \ No newline at end of file