Index: lib/Transforms/Scalar/LoopPredication.cpp
===================================================================
--- lib/Transforms/Scalar/LoopPredication.cpp
+++ lib/Transforms/Scalar/LoopPredication.cpp
@@ -177,7 +177,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/LoopPredication.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -246,6 +248,7 @@
     }
   };
 
+  AliasAnalysis *AA;
   ScalarEvolution *SE;
   BranchProbabilityInfo *BPI;
 
@@ -254,6 +257,12 @@
   BasicBlock *Preheader;
   LoopICmp LatchCheck;
 
+  /// The set of all instructions within the loop which have been proven to
+  /// produce the same result across all iterations of the given loop.  Note
+  /// that this does not say anything about whether the instruction is safe to
+  /// move.  For instance, "udiv i32 %arg, 0" could be in this set.
+  SmallPtrSet<Value*, 16> InvariantExpressions;
+
   bool isSupportedStep(const SCEV* Step);
   Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI) {
     return parseLoopICmp(ICI->getPredicate(), ICI->getOperand(0),
@@ -264,7 +273,7 @@
 
   Optional<LoopICmp> parseLoopLatchICmp();
 
-  bool CanExpand(const SCEV* S);
+  bool CanExpand(const SCEV* S, const Instruction *At);
   Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder,
                      ICmpInst::Predicate Pred, const SCEV *LHS,
                      const SCEV *RHS);
@@ -307,8 +316,9 @@
   Optional<LoopICmp> generateLoopLatchCheck(Type *RangeCheckType);
 
 public:
-  LoopPredication(ScalarEvolution *SE, BranchProbabilityInfo *BPI)
-      : SE(SE), BPI(BPI){};
+  LoopPredication(AliasAnalysis *AA, ScalarEvolution *SE,
+                  BranchProbabilityInfo *BPI)
+    : AA(AA), SE(SE), BPI(BPI){};
   bool runOnLoop(Loop *L);
 };
 
@@ -329,8 +339,9 @@
       return false;
     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
     BranchProbabilityInfo &BPI =
-        getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
-    LoopPredication LP(SE, &BPI);
+      getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+    auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+    LoopPredication LP(AA, SE, &BPI);
     return LP.runOnLoop(L);
   }
 };
@@ -356,7 +367,7 @@
       AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
   Function *F = L.getHeader()->getParent();
   auto *BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(*F);
-  LoopPredication LP(&AR.SE, BPI);
+  LoopPredication LP(&AR.AA, &AR.SE, BPI);
   if (!LP.runOnLoop(&L))
     return PreservedAnalyses::all();
 
@@ -394,15 +405,21 @@
   Type *Ty = LHS->getType();
   assert(Ty == RHS->getType() && "expandCheck operands have different types?");
 
-  if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS))
-    return Builder.getTrue();
-  if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred),
-                                   LHS, RHS))
-    return Builder.getFalse();
+  if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L)) {
+    if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS))
+      return Builder.getTrue();
+    if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred),
+                                     LHS, RHS))
+      return Builder.getFalse();
+  }
 
   Instruction *InsertAt = &*Builder.GetInsertPoint();
+  if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L))
+    InsertAt = L->getLoopPreheader()->getTerminator();
   Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt);
   Value *RHSV = Expander.expandCodeFor(RHS, Ty, InsertAt);
+  IRBuilder<>::InsertPointGuard G(Builder);
+  Builder.SetInsertPoint(InsertAt);
   return Builder.CreateICmp(Pred, LHSV, RHSV);
 }
 
@@ -438,8 +455,39 @@
   return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop);
 }
 
-bool LoopPredication::CanExpand(const SCEV* S) {
-  return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE);
+bool LoopPredication::CanExpand(const SCEV* S, const Instruction *At) {
+  if (SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE))
+    return true;
+
+  // Handling expressions which produce invariant results, but *haven't* yet
+  // been removed from the loop serves two important purposes.
+  // 1) Most importantly, it resolves a pass ordering cycle which would
+  // otherwise need us to iteration licm, loop-predication, and either
+  // loop-unswitch or loop-peeling to make progress on examples with lots of
+  // predicable range checks in a row.  (Since, in the general case,  we can't
+  // hoist the length checks until the dominating checks have been discharged
+  // as we can't prove doing so is safe.)
+  // 2) As a nice side effect, this exposes the value of peeling or unswitching
+  // much more obviously in the IR.  Otherwise, the cost modeling for other
+  // transforms would end up needing to duplicate all of this logic to model a
+  // check which becomes predictable based on a modeled peel or unswitch.
+  //
+  // The cost of doing so in the worst case is an extra load in the loop to
+  // materialize the loop invariant test value instead of checking against the
+  // original IV which is presumable in a register inside the loop.  Such cases
+  // are presumably rare, and hint at missing oppurtunities for other passes.
+  S->dump();
+  dbgs() << isSafeToExpandAt(S, At, *SE) << "\n";
+  if (SE->getLoopDisposition(S, L) == ScalarEvolution::LoopInvariant) {
+    dbgs() << "IV1\n";
+    return isSafeToExpandAt(S, At, *SE);
+  }
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
+    if (InvariantExpressions.count(U->getValue())) {
+       dbgs() << "IV2\n";
+       return isSafeToExpandAt(S, At, *SE);
+    }
+  return false;
 }
 
 Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
@@ -461,8 +509,10 @@
   const SCEV *RHS =
       SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart),
                      SE->getMinusSCEV(LatchStart, SE->getOne(Ty)));
-  if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) ||
-      !CanExpand(LatchLimit) || !CanExpand(RHS)) {
+  Instruction *InsertAt = &*Builder.GetInsertPoint();
+  Instruction *PHTerm = &*L->getLoopPreheader()->getTerminator();
+  if (!CanExpand(GuardStart, PHTerm) || !CanExpand(GuardLimit, InsertAt) ||
+      !CanExpand(LatchLimit, PHTerm) || !CanExpand(RHS, InsertAt)) {
     LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
     return None;
   }
@@ -477,6 +527,10 @@
       expandCheck(Expander, Builder, LimitCheckPred, LatchLimit, RHS);
   auto *FirstIterationCheck = expandCheck(Expander, Builder, RangeCheck.Pred,
                                           GuardStart, GuardLimit);
+  if (L->isLoopInvariant(LimitCheck) && L->isLoopInvariant(FirstIterationCheck))
+    InsertAt = L->getLoopPreheader()->getTerminator();
+  IRBuilder<>::InsertPointGuard G(Builder);
+  Builder.SetInsertPoint(InsertAt);
   return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
 }
 
@@ -487,8 +541,9 @@
   const SCEV *GuardStart = RangeCheck.IV->getStart();
   const SCEV *GuardLimit = RangeCheck.Limit;
   const SCEV *LatchLimit = LatchCheck.Limit;
-  if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) ||
-      !CanExpand(LatchLimit)) {
+  Instruction *InsertAt = &*Builder.GetInsertPoint();
+  if (!CanExpand(GuardStart, InsertAt) || !CanExpand(GuardLimit, InsertAt) ||
+      !CanExpand(LatchLimit, InsertAt)) {
     LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
     return None;
   }
@@ -512,6 +567,10 @@
                                           GuardStart, GuardLimit);
   auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, LatchLimit,
                                  SE->getOne(Ty));
+  if (L->isLoopInvariant(LimitCheck) && L->isLoopInvariant(FirstIterationCheck))
+    InsertAt = L->getLoopPreheader()->getTerminator();
+  IRBuilder<>::InsertPointGuard G(Builder);
+  Builder.SetInsertPoint(InsertAt);
   return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
 }
 
@@ -586,6 +645,7 @@
                                         Value *Condition,
                                         SCEVExpander &Expander,
                                         IRBuilder<> &Builder) {
+  IRBuilder<>::InsertPointGuard G(Builder);
   unsigned NumWidened = 0;
   // The guard condition is expected to be in form of:
   //   cond1 && cond2 && cond3 ...
@@ -608,6 +668,8 @@
     }
 
     if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
+      IRBuilder<>::InsertPointGuard G(Builder);
+      Builder.SetInsertPoint(ICI);
       if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander,
                                                    Builder)) {
         Checks.push_back(NewRangeCheck.getValue());
@@ -838,6 +900,32 @@
   return true;
 }
 
+/// Return true for instructions which can cheaply be proven to produce a
+/// single fixed result across all iterations on which they might execute.
+/// IMPORTANT: This does not prove that the instruction *does* execute or is
+/// safe to move in any way.  It only proves that all executions of the given
+/// instruction must return the same value across all loop iterations.
+static bool IsInvariantExpression(Loop *L, Instruction *I,
+                                  AliasAnalysis *AA,
+                                  SmallPtrSetImpl<Value*>& InvExprs) {
+  if (L->isLoopInvariant(I))
+    return true;
+  
+  for (Value *Op : I->operands())
+    if (!L->isLoopInvariant(Op) &&
+        !InvExprs.count(Op))
+      return false;
+
+  if (const auto *LI = dyn_cast<LoadInst>(I)) {
+    if (!LI->isUnordered())
+      return false;
+    if (AA->pointsToConstantMemory(LI->getOperand(0)) ||
+        LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+      return true;
+  }
+  return !I->mayHaveSideEffects() && !I->mayReadOrWriteMemory();
+}
+
 bool LoopPredication::runOnLoop(Loop *Loop) {
   L = Loop;
 
@@ -892,6 +980,28 @@
   if (Guards.empty() && GuardsAsWidenableBranches.empty())
     return false;
 
+  // Populate the set of invariant expressions for the given loop.  This is in
+  // the worst case O(Instructions) in the function (since we have to consider
+  // uses outside the loop to reject them).
+  assert(InvariantExpressions.empty());
+  SmallVector<Instruction*, 16> Worklist;
+  for (const auto BB : L->blocks())
+    for (auto &I : *BB)
+      if (IsInvariantExpression(L, &I, AA, InvariantExpressions))
+        Worklist.push_back(&I);
+  while (!Worklist.empty()) {
+    Value *Item = Worklist.pop_back_val();
+    InvariantExpressions.insert(Item);;
+    for (User *U : Item->users()) {
+      Instruction *User = cast<Instruction>(U);
+      if (!L->contains(User->getParent()))
+        continue;
+      if (!IsInvariantExpression(L, User, AA, InvariantExpressions))
+        continue;
+      Worklist.push_back(User);
+    }
+  }
+
   SCEVExpander Expander(*SE, *DL, "loop-predication");
 
   bool Changed = false;
@@ -900,5 +1010,6 @@
   for (auto *Guard : GuardsAsWidenableBranches)
     Changed |= widenWidenableBranchGuardConditions(Guard, Expander);
 
+  InvariantExpressions.clear();
   return Changed;
 }
Index: test/Transforms/LoopPredication/invariant_load.ll
===================================================================
--- test/Transforms/LoopPredication/invariant_load.ll
+++ test/Transforms/LoopPredication/invariant_load.ll
@@ -78,8 +78,10 @@
 ; CHECK-NEXT:    [[UNKNOWN:%.*]] = load volatile i1, i1* @UNKNOWN
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[UNKNOWN]]) [ "deopt"() ]
 ; CHECK-NEXT:    [[LEN:%.*]] = load i32, i32* [[LENGTH:%.*]], align 4, !invariant.load !0
-; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LEN]]
-; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule i32 [[N]], [[LEN]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 0, [[LEN]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ]
 ; CHECK-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
 ; CHECK-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]]
 ; CHECK-NEXT:    [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4
@@ -139,8 +141,10 @@
 ; CHECK-NEXT:    [[UNKNOWN:%.*]] = load volatile i1, i1* @UNKNOWN
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[UNKNOWN]]) [ "deopt"() ]
 ; CHECK-NEXT:    [[LEN:%.*]] = load i32, i32* @Length, align 4
-; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LEN]]
-; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule i32 [[N]], [[LEN]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 0, [[LEN]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ]
 ; CHECK-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
 ; CHECK-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]]
 ; CHECK-NEXT:    [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4