Index: llvm/trunk/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/trunk/lib/Passes/PassBuilder.cpp
+++ llvm/trunk/lib/Passes/PassBuilder.cpp
@@ -390,13 +390,21 @@
 
   // Add the primary loop simplification pipeline.
   // FIXME: Currently this is split into two loop pass pipelines because we run
-  // some function passes in between them. These can and should be replaced by
-  // loop pass equivalenst but those aren't ready yet. Specifically,
-  // `SimplifyCFGPass` and `InstCombinePass` are used. We have
-  // `LoopSimplifyCFGPass` which isn't yet powerful enough, and the closest to
-  // the other we have is `LoopInstSimplify`.
+  // some function passes in between them. These can and should be removed
+  // and/or replaced by scheduling the loop pass equivalents in the correct
+  // positions. But those equivalent passes aren't powerful enough yet.
+  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
+  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
+  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
+  // `LoopInstSimplify`.
   LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging);
 
+  // Simplify the loop body. We do this initially to clean up after other loop
+  // passes run, either when iterating on a loop or on inner loops with
+  // implications on the outer loop.
+  LPM1.addPass(LoopInstSimplifyPass());
+  LPM1.addPass(LoopSimplifyCFGPass());
+
   // Rotate Loop - disable header duplication at -Oz
   LPM1.addPass(LoopRotatePass(Level != Oz));
   LPM1.addPass(LICMPass());
Index: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -138,10 +138,10 @@
                               cl::Hidden,
                               cl::desc("Disable shrink-wrap library calls"));
 
-static cl::opt<bool>
-    EnableSimpleLoopUnswitch("enable-simple-loop-unswitch", cl::init(false),
-                             cl::Hidden,
-                             cl::desc("Enable the simple loop unswitch pass."));
+static cl::opt<bool> EnableSimpleLoopUnswitch(
+    "enable-simple-loop-unswitch", cl::init(false), cl::Hidden,
+    cl::desc("Enable the simple loop unswitch pass. Also enables independent "
+             "cleanup passes integrated into the loop pass manager pipeline."));
 
 static cl::opt<bool> EnableGVNSink(
     "enable-gvn-sink", cl::init(false), cl::Hidden,
@@ -335,6 +335,15 @@
   MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
   MPM.add(createReassociatePass());           // Reassociate expressions
+
+  // Begin the loop pass pipeline.
+  if (EnableSimpleLoopUnswitch) {
+    // The simple loop unswitch pass relies on separate cleanup passes. Schedule
+    // them first so when we re-process a loop they run before other loop
+    // passes.
+    MPM.add(createLoopInstSimplifyPass());
+    MPM.add(createLoopSimplifyCFGPass());
+  }
   // Rotate Loop - disable header duplication at -Oz
   MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
   MPM.add(createLICMPass());                  // Hoist loop invariants
@@ -342,20 +351,26 @@
     MPM.add(createSimpleLoopUnswitchLegacyPass());
   else
     MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
+  // FIXME: We break the loop pass pipeline here in order to do full
+  // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the
+  // need for this.
   MPM.add(createCFGSimplificationPass());
   addInstructionCombiningPass(MPM);
+  // We resume loop passes creating a second loop pipeline here.
   MPM.add(createIndVarSimplifyPass());        // Canonicalize indvars
   MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
   addExtensionsToPM(EP_LateLoopOptimizations, MPM);
   MPM.add(createLoopDeletionPass());          // Delete dead loops
 
   if (EnableLoopInterchange) {
+    // FIXME: These are function passes and break the loop pass pipeline.
     MPM.add(createLoopInterchangePass()); // Interchange loops
     MPM.add(createCFGSimplificationPass());
   }
   if (!DisableUnrollLoops)
     MPM.add(createSimpleLoopUnrollPass(OptLevel));    // Unroll small loops
   addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
+  // This ends the loop pass pipelines.
 
   if (OptLevel > 1) {
     MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
Index: llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
===================================================================
--- llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -1466,7 +1466,7 @@
 static bool unswitchInvariantBranch(
     Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI,
     AssumptionCache &AC,
-    function_ref<void(bool, ArrayRef<Loop *>)> NonTrivialUnswitchCB) {
+    function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
   assert(BI.isConditional() && "Can only unswitch a conditional branch!");
   assert(L.isLoopInvariant(BI.getCondition()) &&
          "Can only unswitch an invariant branch condition!");
@@ -1706,7 +1706,7 @@
   for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
     if (UpdatedL->getParentLoop() == ParentL)
       SibLoops.push_back(UpdatedL);
-  NonTrivialUnswitchCB(IsStillLoop, SibLoops);
+  UnswitchCB(IsStillLoop, SibLoops);
 
   ++NumBranches;
   return true;
@@ -1754,23 +1754,27 @@
 static bool
 unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
              TargetTransformInfo &TTI, bool NonTrivial,
-             function_ref<void(bool, ArrayRef<Loop *>)> NonTrivialUnswitchCB) {
+             function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
   assert(L.isRecursivelyLCSSAForm(DT, LI) &&
          "Loops must be in LCSSA form before unswitching.");
-  bool Changed = false;
 
   // Must be in loop simplified form: we need a preheader and dedicated exits.
   if (!L.isLoopSimplifyForm())
     return false;
 
   // Try trivial unswitch first before loop over other basic blocks in the loop.
-  Changed |= unswitchAllTrivialConditions(L, DT, LI);
+  if (unswitchAllTrivialConditions(L, DT, LI)) {
+    // If we unswitched successfully we will want to clean up the loop before
+    // processing it further so just mark it as unswitched and return.
+    UnswitchCB(/*CurrentLoopValid*/ true, {});
+    return true;
+  }
 
   // If we're not doing non-trivial unswitching, we're done. We both accept
   // a parameter but also check a local flag that can be used for testing
   // a debugging.
   if (!NonTrivial && !EnableNonTrivialUnswitch)
-    return Changed;
+    return false;
 
   // Collect all remaining invariant branch conditions within this loop (as
   // opposed to an inner loop which would be handled when visiting that inner
@@ -1785,7 +1789,7 @@
 
   // If we didn't find any candidates, we're done.
   if (UnswitchCandidates.empty())
-    return Changed;
+    return false;
 
   // Check if there are irreducible CFG cycles in this loop. If so, we cannot
   // easily unswitch non-trivial edges out of the loop. Doing so might turn the
@@ -1796,7 +1800,7 @@
   LoopBlocksRPO RPOT(&L);
   RPOT.perform(&LI);
   if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
-    return Changed;
+    return false;
 
   LLVM_DEBUG(
       dbgs() << "Considering " << UnswitchCandidates.size()
@@ -1824,10 +1828,10 @@
         continue;
 
       if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
-        return Changed;
+        return false;
       if (auto CS = CallSite(&I))
         if (CS.isConvergent() || CS.cannotDuplicate())
-          return Changed;
+          return false;
 
       Cost += TTI.getUserCost(&I);
     }
@@ -1898,18 +1902,17 @@
     }
   }
 
-  if (BestUnswitchCost < UnswitchThreshold) {
-    LLVM_DEBUG(dbgs() << "  Trying to unswitch non-trivial (cost = "
-                      << BestUnswitchCost << ") branch: " << *BestUnswitchTI
-                      << "\n");
-    Changed |= unswitchInvariantBranch(L, cast<BranchInst>(*BestUnswitchTI), DT,
-                                       LI, AC, NonTrivialUnswitchCB);
-  } else {
+  if (BestUnswitchCost >= UnswitchThreshold) {
     LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "
                       << BestUnswitchCost << "\n");
+    return false;
   }
 
-  return Changed;
+  LLVM_DEBUG(dbgs() << "  Trying to unswitch non-trivial (cost = "
+                    << BestUnswitchCost << ") branch: " << *BestUnswitchTI
+                    << "\n");
+  return unswitchInvariantBranch(L, cast<BranchInst>(*BestUnswitchTI), DT, LI,
+                                 AC, UnswitchCB);
 }
 
 PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
@@ -1925,10 +1928,11 @@
   // after it has been deleted.
   std::string LoopName = L.getName();
 
-  auto NonTrivialUnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
-                                                  ArrayRef<Loop *> NewLoops) {
+  auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
+                                        ArrayRef<Loop *> NewLoops) {
     // If we did a non-trivial unswitch, we have added new (cloned) loops.
-    U.addSiblingLoops(NewLoops);
+    if (!NewLoops.empty())
+      U.addSiblingLoops(NewLoops);
 
     // If the current loop remains valid, we should revisit it to catch any
     // other unswitch opportunities. Otherwise, we need to mark it as deleted.
@@ -1939,7 +1943,7 @@
   };
 
   if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial,
-                    NonTrivialUnswitchCB))
+                    UnswitchCB))
     return PreservedAnalyses::all();
 
   // Historically this pass has had issues with the dominator tree so verify it
@@ -1987,8 +1991,8 @@
   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
 
-  auto NonTrivialUnswitchCB = [&L, &LPM](bool CurrentLoopValid,
-                                         ArrayRef<Loop *> NewLoops) {
+  auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid,
+                               ArrayRef<Loop *> NewLoops) {
     // If we did a non-trivial unswitch, we have added new (cloned) loops.
     for (auto *NewL : NewLoops)
       LPM.addLoop(*NewL);
@@ -2003,7 +2007,7 @@
   };
 
   bool Changed =
-      unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, NonTrivialUnswitchCB);
+      unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB);
 
   // If anything was unswitched, also clear any cached information about this
   // loop.
Index: llvm/trunk/test/Other/new-pm-defaults.ll
===================================================================
--- llvm/trunk/test/Other/new-pm-defaults.ll
+++ llvm/trunk/test/Other/new-pm-defaults.ll
@@ -145,6 +145,8 @@
 ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Starting Loop pass manager run.
+; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass
+; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: LoopRotatePass
 ; CHECK-O-NEXT: Running pass: LICM
 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
Index: llvm/trunk/test/Other/new-pm-thinlto-defaults.ll
===================================================================
--- llvm/trunk/test/Other/new-pm-thinlto-defaults.ll
+++ llvm/trunk/test/Other/new-pm-thinlto-defaults.ll
@@ -129,6 +129,8 @@
 ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Starting Loop pass manager run.
+; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass
+; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: LoopRotatePass
 ; CHECK-O-NEXT: Running pass: LICM
 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
Index: llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll
===================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll
@@ -0,0 +1,41 @@
+; RUN: opt -passes='loop(loop-instsimplify,simplify-cfg,unswitch),verify<loops>' -S < %s | FileCheck %s
+
+declare void @some_func() noreturn
+
+define i32 @test1(i32* %var, i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @test1(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split, label %loop_exit.split
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split.split, label %loop_exit
+;
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    br label %do_something
+
+loop_begin:
+  br i1 %cond1, label %continue, label %loop_exit ; first trivial condition
+
+continue:
+  %var_val = load i32, i32* %var
+  %var_cond = trunc i32 %var_val to i1
+  %maybe_cond = select i1 %cond1, i1 %cond2, i1 %var_cond
+  br i1 %maybe_cond, label %do_something, label %loop_exit ; second trivial condition
+
+do_something:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+; CHECK:       do_something:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %do_something
+
+loop_exit:
+  ret i32 0
+; CHECK:       loop_exit:
+; CHECK-NEXT:    br label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    ret
+}
\ No newline at end of file