Index: include/llvm/Analysis/InstructionSimplify.h
===================================================================
--- include/llvm/Analysis/InstructionSimplify.h
+++ include/llvm/Analysis/InstructionSimplify.h
@@ -39,10 +39,11 @@
 template <typename T, typename... TArgs> class AnalysisManager;
 template <class T> class ArrayRef;
 class AssumptionCache;
-class DominatorTree;
-class Instruction;
 class DataLayout;
+class DominatorTree;
 class FastMathFlags;
+class Instruction;
+class KBCache;
 struct LoopStandardAnalysisResults;
 class OptimizationRemarkEmitter;
 class Pass;
@@ -55,6 +56,7 @@
   const TargetLibraryInfo *TLI = nullptr;
   const DominatorTree *DT = nullptr;
   AssumptionCache *AC = nullptr;
+  KBCache *KBC = nullptr;
   const Instruction *CxtI = nullptr;
 
   SimplifyQuery(const DataLayout &DL, const Instruction *CXTI = nullptr)
@@ -63,8 +65,9 @@
   SimplifyQuery(const DataLayout &DL, const TargetLibraryInfo *TLI,
                 const DominatorTree *DT = nullptr,
                 AssumptionCache *AC = nullptr,
+                KBCache *KBC = nullptr,
                 const Instruction *CXTI = nullptr)
-      : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI) {}
+      : DL(DL), TLI(TLI), DT(DT), AC(AC), KBC(KBC), CxtI(CXTI) {}
   SimplifyQuery getWithInstruction(Instruction *I) const {
     SimplifyQuery Copy(*this);
     Copy.CxtI = I;
@@ -217,7 +220,8 @@
   bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
                                      const TargetLibraryInfo *TLI = nullptr,
                                      const DominatorTree *DT = nullptr,
-                                     AssumptionCache *AC = nullptr);
+                                     AssumptionCache *AC = nullptr,
+                                     KBCache *KBC = nullptr);
 
   /// Recursively attempt to simplify an instruction.
   ///
@@ -228,7 +232,8 @@
   bool recursivelySimplifyInstruction(Instruction *I,
                                       const TargetLibraryInfo *TLI = nullptr,
                                       const DominatorTree *DT = nullptr,
-                                      AssumptionCache *AC = nullptr);
+                                      AssumptionCache *AC = nullptr,
+                                      KBCache *KBC = nullptr);
   // These helper functions return a SimplifyQuery structure that contains as
   // many of the optional analysis we use as are currently valid.  This is the
   // strongly preferred way of constructing SimplifyQuery in passes.
Index: include/llvm/Analysis/ValueTracking.h
===================================================================
--- include/llvm/Analysis/ValueTracking.h
+++ include/llvm/Analysis/ValueTracking.h
@@ -42,6 +42,15 @@
   enum ID : unsigned;
   }
 
+  // Declare the known-bits cache, and a deleter type so that we can make
+  // a unique_ptr of KBCache while allowing it to remain incomplete.
+  class KBCache;
+  struct KBCacheDeleter {
+    void operator()(KBCache *);
+  };
+
+  std::unique_ptr<KBCache, KBCacheDeleter> makeKnownBitsCache();
+
   /// Determine which bits of V are known to be either zero or one and return
   /// them in the KnownZero/KnownOne bit sets.
   ///
@@ -55,12 +64,14 @@
                         AssumptionCache *AC = nullptr,
                         const Instruction *CxtI = nullptr,
                         const DominatorTree *DT = nullptr,
+                        KBCache *KBC = nullptr,
                         OptimizationRemarkEmitter *ORE = nullptr);
   /// Returns the known bits rather than passing by reference.
   KnownBits computeKnownBits(const Value *V, const DataLayout &DL,
                              unsigned Depth = 0, AssumptionCache *AC = nullptr,
                              const Instruction *CxtI = nullptr,
-                             const DominatorTree *DT = nullptr);
+                             const DominatorTree *DT = nullptr,
+                             KBCache *KBC = nullptr);
   /// Compute known bits from the range metadata.
   /// \p KnownZero the set of bits that are known to be zero
   /// \p KnownOne the set of bits that are known to be one
@@ -71,7 +82,8 @@
                            const DataLayout &DL,
                            AssumptionCache *AC = nullptr,
                            const Instruction *CxtI = nullptr,
-                           const DominatorTree *DT = nullptr);
+                           const DominatorTree *DT = nullptr,
+                           KBCache *KBC = nullptr);
 
   /// Return true if the given value is known to have exactly one bit set when
   /// defined. For vectors return true if every element is known to be a power
@@ -82,7 +94,8 @@
                               bool OrZero = false, unsigned Depth = 0,
                               AssumptionCache *AC = nullptr,
                               const Instruction *CxtI = nullptr,
-                              const DominatorTree *DT = nullptr);
+                              const DominatorTree *DT = nullptr,
+                              KBCache *KBC = nullptr);
 
   /// Return true if the given value is known to be non-zero when defined. For
   /// vectors, return true if every element is known to be non-zero when
@@ -93,35 +106,40 @@
   bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0,
                       AssumptionCache *AC = nullptr,
                       const Instruction *CxtI = nullptr,
-                      const DominatorTree *DT = nullptr);
+                      const DominatorTree *DT = nullptr,
+                      KBCache *KBC = nullptr);
 
   /// Returns true if the give value is known to be non-negative.
   bool isKnownNonNegative(const Value *V, const DataLayout &DL,
                           unsigned Depth = 0,
                           AssumptionCache *AC = nullptr,
                           const Instruction *CxtI = nullptr,
-                          const DominatorTree *DT = nullptr);
+                          const DominatorTree *DT = nullptr,
+                          KBCache *KBC = nullptr);
 
   /// Returns true if the given value is known be positive (i.e. non-negative
   /// and non-zero).
   bool isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth = 0,
                        AssumptionCache *AC = nullptr,
                        const Instruction *CxtI = nullptr,
-                       const DominatorTree *DT = nullptr);
+                       const DominatorTree *DT = nullptr,
+                       KBCache *KBC = nullptr);
 
   /// Returns true if the given value is known be negative (i.e. non-positive
   /// and non-zero).
   bool isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0,
                        AssumptionCache *AC = nullptr,
                        const Instruction *CxtI = nullptr,
-                       const DominatorTree *DT = nullptr);
+                       const DominatorTree *DT = nullptr,
+                       KBCache *KBC = nullptr);
 
   /// Return true if the given values are known to be non-equal when defined.
   /// Supports scalar integer types only.
   bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL,
                       AssumptionCache *AC = nullptr,
                       const Instruction *CxtI = nullptr,
-                      const DominatorTree *DT = nullptr);
+                      const DominatorTree *DT = nullptr,
+                      KBCache *KBC = nullptr);
 
   /// Return true if 'V & Mask' is known to be zero. We use this predicate to
   /// simplify operations downstream. Mask is known to be zero for bits that V
@@ -136,7 +154,8 @@
                          const DataLayout &DL,
                          unsigned Depth = 0, AssumptionCache *AC = nullptr,
                          const Instruction *CxtI = nullptr,
-                         const DominatorTree *DT = nullptr);
+                         const DominatorTree *DT = nullptr,
+                         KBCache *KBC = nullptr);
 
   /// Return the number of times the sign bit of the register is replicated into
   /// the other bits. We know that at least 1 bit is always equal to the sign
@@ -148,7 +167,8 @@
   unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL,
                               unsigned Depth = 0, AssumptionCache *AC = nullptr,
                               const Instruction *CxtI = nullptr,
-                              const DominatorTree *DT = nullptr);
+                              const DominatorTree *DT = nullptr,
+                              KBCache *KBC = nullptr);
 
   /// This function computes the integer multiple of Base that equals V. If
   /// successful, it returns true and returns the multiple in Multiple. If
@@ -374,24 +394,28 @@
                                                const DataLayout &DL,
                                                AssumptionCache *AC,
                                                const Instruction *CxtI,
-                                               const DominatorTree *DT);
+                                               const DominatorTree *DT,
+                                               KBCache *KBC = nullptr);
   OverflowResult computeOverflowForUnsignedAdd(const Value *LHS,
                                                const Value *RHS,
                                                const DataLayout &DL,
                                                AssumptionCache *AC,
                                                const Instruction *CxtI,
-                                               const DominatorTree *DT);
+                                               const DominatorTree *DT,
+                                               KBCache *KBC = nullptr);
   OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS,
                                              const DataLayout &DL,
                                              AssumptionCache *AC = nullptr,
                                              const Instruction *CxtI = nullptr,
-                                             const DominatorTree *DT = nullptr);
+                                             const DominatorTree *DT = nullptr,
+                                             KBCache *KBC = nullptr);
   /// This version also leverages the sign bit of Add if known.
   OverflowResult computeOverflowForSignedAdd(const AddOperator *Add,
                                              const DataLayout &DL,
                                              AssumptionCache *AC = nullptr,
                                              const Instruction *CxtI = nullptr,
-                                             const DominatorTree *DT = nullptr);
+                                             const DominatorTree *DT = nullptr,
+                                             KBCache *KBC = nullptr);
 
   /// Returns true if the arithmetic part of the \p II 's result is
   /// used only along the paths control dependent on the computation
@@ -524,7 +548,8 @@
                                     unsigned Depth = 0,
                                     AssumptionCache *AC = nullptr,
                                     const Instruction *CxtI = nullptr,
-                                    const DominatorTree *DT = nullptr);
+                                    const DominatorTree *DT = nullptr,
+                                    KBCache *KBC = nullptr);
 } // end namespace llvm
 
 #endif
Index: include/llvm/Transforms/Utils/Local.h
===================================================================
--- include/llvm/Transforms/Utils/Local.h
+++ include/llvm/Transforms/Utils/Local.h
@@ -39,6 +39,7 @@
 class PHINode;
 class AllocaInst;
 class AssumptionCache;
+class KBCache;
 class ConstantExpr;
 class DataLayout;
 class TargetLibraryInfo;
@@ -182,14 +183,16 @@
                                     const DataLayout &DL,
                                     const Instruction *CxtI = nullptr,
                                     AssumptionCache *AC = nullptr,
-                                    const DominatorTree *DT = nullptr);
+                                    const DominatorTree *DT = nullptr,
+                                    KBCache *KBC = nullptr);
 
 /// Try to infer an alignment for the specified pointer.
 static inline unsigned getKnownAlignment(Value *V, const DataLayout &DL,
                                          const Instruction *CxtI = nullptr,
                                          AssumptionCache *AC = nullptr,
-                                         const DominatorTree *DT = nullptr) {
-  return getOrEnforceKnownAlignment(V, 0, DL, CxtI, AC, DT);
+                                         const DominatorTree *DT = nullptr,
+                                         KBCache *KBC = nullptr) {
+  return getOrEnforceKnownAlignment(V, 0, DL, CxtI, AC, DT, KBC);
 }
 
 /// Given a getelementptr instruction/constantexpr, emit the code necessary to
Index: lib/Analysis/InstructionSimplify.cpp
===================================================================
--- lib/Analysis/InstructionSimplify.cpp
+++ lib/Analysis/InstructionSimplify.cpp
@@ -690,7 +690,7 @@
 
     unsigned BitWidth = Op1->getType()->getScalarSizeInBits();
     KnownBits Known(BitWidth);
-    computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
     if (Known.Zero.isMaxSignedValue()) {
       // Op1 is either 0 or the minimum signed value. If the sub is NSW, then
       // Op1 must be 0 because negating the minimum signed value is undefined.
@@ -1311,7 +1311,7 @@
   // the number of bits in the type, the shift is undefined.
   unsigned BitWidth = Op1->getType()->getScalarSizeInBits();
   KnownBits Known(BitWidth);
-  computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+  computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
   if (Known.One.getLimitedValue() >= BitWidth)
     return UndefValue::get(Op0->getType());
 
@@ -1345,7 +1345,7 @@
   if (isExact) {
     unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
     KnownBits Op0Known(BitWidth);
-    computeKnownBits(Op0, Op0Known, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
+    computeKnownBits(Op0, Op0Known, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
     if (Op0Known.One[0])
       return Op0;
   }
@@ -1416,7 +1416,8 @@
     return X;
 
   // Arithmetic shifting an all-sign-bit value is a no-op.
-  unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+  unsigned NumSignBits =
+    ComputeNumSignBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
   if (NumSignBits == Op0->getType()->getScalarSizeInBits())
     return Op0;
 
@@ -1775,10 +1776,10 @@
   if (match(Op0, m_Neg(m_Specific(Op1))) ||
       match(Op1, m_Neg(m_Specific(Op0)))) {
     if (isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
-                               Q.DT))
+                               Q.DT, Q.KBC))
       return Op0;
     if (isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
-                               Q.DT))
+                               Q.DT, Q.KBC))
       return Op1;
   }
 
@@ -1943,10 +1944,12 @@
           match(A, m_Add(m_Value(V1), m_Value(V2)))) {
         // Add commutes, try both ways.
         if (V1 == B &&
-            MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+            MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT,
+                              Q.KBC))
           return A;
         if (V2 == B &&
-            MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+            MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT,
+                              Q.KBC))
           return A;
       }
       // Or commutes, try both ways.
@@ -1954,10 +1957,12 @@
           match(B, m_Add(m_Value(V1), m_Value(V2)))) {
         // Add commutes, try both ways.
         if (V1 == A &&
-            MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+            MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT,
+                              Q.KBC))
           return B;
         if (V2 == A &&
-            MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+            MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT,
+                              Q.KBC))
           return B;
       }
     }
@@ -2343,16 +2348,16 @@
     return getTrue(ITy);
   case ICmpInst::ICMP_EQ:
   case ICmpInst::ICMP_ULE:
-    if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+    if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC))
       return getFalse(ITy);
     break;
   case ICmpInst::ICMP_NE:
   case ICmpInst::ICMP_UGT:
-    if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+    if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC))
       return getTrue(ITy);
     break;
   case ICmpInst::ICMP_SLT: {
-    KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
     if (LHSKnown.isNegative())
       return getTrue(ITy);
     if (LHSKnown.isNonNegative())
@@ -2360,16 +2365,16 @@
     break;
   }
   case ICmpInst::ICMP_SLE: {
-    KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
     if (LHSKnown.isNegative())
       return getTrue(ITy);
     if (LHSKnown.isNonNegative() &&
-        isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+        isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC))
       return getFalse(ITy);
     break;
   }
   case ICmpInst::ICMP_SGE: {
-    KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
     if (LHSKnown.isNegative())
       return getFalse(ITy);
     if (LHSKnown.isNonNegative())
@@ -2377,11 +2382,11 @@
     break;
   }
   case ICmpInst::ICMP_SGT: {
-    KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
     if (LHSKnown.isNegative())
       return getFalse(ITy);
     if (LHSKnown.isNonNegative() &&
-        isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+        isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC))
       return getTrue(ITy);
     break;
   }
@@ -2668,8 +2673,8 @@
         return getTrue(ITy);
 
       if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) {
-        KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
-        KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+        KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
+        KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
         if (RHSKnown.isNonNegative() && YKnown.isNegative())
           return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy);
         if (RHSKnown.isNegative() || YKnown.isNonNegative())
@@ -2684,8 +2689,8 @@
         return getFalse(ITy);
 
       if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE) {
-        KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
-        KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+        KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
+        KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
         if (LHSKnown.isNonNegative() && YKnown.isNegative())
           return Pred == ICmpInst::ICMP_SGT ? getTrue(ITy) : getFalse(ITy);
         if (LHSKnown.isNegative() || YKnown.isNonNegative())
@@ -2740,7 +2745,7 @@
       break;
     case ICmpInst::ICMP_SGT:
     case ICmpInst::ICMP_SGE: {
-      KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+      KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
       if (!Known.isNonNegative())
         break;
       LLVM_FALLTHROUGH;
@@ -2751,7 +2756,7 @@
       return getFalse(ITy);
     case ICmpInst::ICMP_SLT:
     case ICmpInst::ICMP_SLE: {
-      KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+      KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
       if (!Known.isNonNegative())
         break;
       LLVM_FALLTHROUGH;
@@ -2770,7 +2775,7 @@
       break;
     case ICmpInst::ICMP_SGT:
     case ICmpInst::ICMP_SGE: {
-      KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+      KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
       if (!Known.isNonNegative())
         break;
       LLVM_FALLTHROUGH;
@@ -2781,7 +2786,7 @@
       return getTrue(ITy);
     case ICmpInst::ICMP_SLT:
     case ICmpInst::ICMP_SLE: {
-      KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+      KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
       if (!Known.isNonNegative())
         break;
       LLVM_FALLTHROUGH;
@@ -3316,7 +3321,7 @@
 
   // icmp eq|ne X, Y -> false|true if X != Y
   if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
-      isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
+      isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT, Q.KBC)) {
     LLVMContext &Ctx = LHS->getType()->getContext();
     return Pred == ICmpInst::ICMP_NE ?
       ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
@@ -3374,7 +3379,7 @@
     if (match(RHS, m_APInt(RHSVal))) {
       unsigned BitWidth = RHSVal->getBitWidth();
       KnownBits LHSKnown(BitWidth);
-      computeKnownBits(LHS, LHSKnown, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
+      computeKnownBits(LHS, LHSKnown, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT, Q.KBC);
       if (LHSKnown.Zero.intersects(*RHSVal) ||
           !LHSKnown.One.isSubsetOf(*RHSVal))
         return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy)
@@ -4682,7 +4687,7 @@
   if (!Result && I->getType()->isIntOrIntVectorTy()) {
     unsigned BitWidth = I->getType()->getScalarSizeInBits();
     KnownBits Known(BitWidth);
-    computeKnownBits(I, Known, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE);
+    computeKnownBits(I, Known, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, Q.KBC, ORE);
     if (Known.isConstant())
       Result = ConstantInt::get(I->getType(), Known.getConstant());
   }
@@ -4707,7 +4712,8 @@
 static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
                                               const TargetLibraryInfo *TLI,
                                               const DominatorTree *DT,
-                                              AssumptionCache *AC) {
+                                              AssumptionCache *AC,
+                                              KBCache *KBC) {
   bool Simplified = false;
   SmallSetVector<Instruction *, 8> Worklist;
   const DataLayout &DL = I->getModule()->getDataLayout();
@@ -4763,17 +4769,17 @@
 bool llvm::recursivelySimplifyInstruction(Instruction *I,
                                           const TargetLibraryInfo *TLI,
                                           const DominatorTree *DT,
-                                          AssumptionCache *AC) {
-  return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC);
+                                          AssumptionCache *AC, KBCache *KBC) {
+  return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC, KBC);
 }
 
 bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
                                          const TargetLibraryInfo *TLI,
                                          const DominatorTree *DT,
-                                         AssumptionCache *AC) {
+                                         AssumptionCache *AC, KBCache *KBC) {
   assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
   assert(SimpleV && "Must provide a simplified value.");
-  return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC);
+  return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC, KBC);
 }
 
 namespace llvm {
Index: lib/Analysis/ValueTracking.cpp
===================================================================
--- lib/Analysis/ValueTracking.cpp
+++ lib/Analysis/ValueTracking.cpp
@@ -38,6 +38,7 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Statepoint.h"
+#include "llvm/IR/ValueMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
@@ -60,6 +61,9 @@
 DontImproveNonNegativePhiBits("dont-improve-non-negative-phi-bits",
                               cl::Hidden, cl::init(true));
 
+static cl::opt<bool>
+UseCache("cache-known-bits", cl::Hidden, cl::init(true));
+
 /// Returns the bitwidth of the given scalar or pointer type. For vector types,
 /// returns the element type's bitwidth.
 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
@@ -79,6 +83,7 @@
   AssumptionCache *AC;
   const Instruction *CxtI;
   const DominatorTree *DT;
+  KBCache *KBC;
   // Unlike the other analyses, this may be a nullptr because not all clients
   // provide it currently.
   OptimizationRemarkEmitter *ORE;
@@ -94,18 +99,29 @@
   std::array<const Value *, MaxDepth> Excluded;
   unsigned NumExcluded;
 
+  // Is this query potentially sensitive on the context instruction.
+  bool *ContextSensitive;
+
   Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
-        const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr)
-      : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), NumExcluded(0) {}
+        const DominatorTree *DT, KBCache *KBC,
+        OptimizationRemarkEmitter *ORE = nullptr)
+      : DL(DL), AC(AC), CxtI(CxtI), DT(DT), KBC(KBC), ORE(ORE),
+        NumExcluded(0), ContextSensitive(nullptr) {}
 
   Query(const Query &Q, const Value *NewExcl)
-      : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE),
-        NumExcluded(Q.NumExcluded) {
+      : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), KBC(Q.KBC),
+        ORE(Q.ORE), NumExcluded(Q.NumExcluded),
+        ContextSensitive(Q.ContextSensitive) {
     Excluded = Q.Excluded;
     Excluded[NumExcluded++] = NewExcl;
     assert(NumExcluded <= Excluded.size());
   }
 
+  Query(const Query &Q, bool *NewCS)
+      : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), KBC(Q.KBC),
+        ORE(Q.ORE), Excluded(Q.Excluded), NumExcluded(Q.NumExcluded),
+        ContextSensitive(NewCS) {}
+
   bool isExcluded(const Value *Value) const {
     if (NumExcluded == 0)
       return false;
@@ -131,16 +147,64 @@
   return nullptr;
 }
 
+namespace llvm {
+class KBCache {
+  ValueMap<const Value *, KnownBits> KnownMap;
+
+public:
+  void cache(const Value *V, const KnownBits &Known) {
+    KnownMap[V] = Known;
+  }
+
+  bool find(const Value *V, KnownBits &Known) {
+    auto KBI = KnownMap.find(V);
+    if (KBI == KnownMap.end())
+      return false;
+
+    Known = KBI->second;
+    return true;
+  }
+};
+
+void KBCacheDeleter::operator()(KBCache *KBC) {
+  delete KBC;
+}
+
+std::unique_ptr<KBCache, KBCacheDeleter> makeKnownBitsCache() {
+  return std::unique_ptr<KBCache, KBCacheDeleter>(new KBCache);
+}
+} // namespace llvm
+
+static void computeKnownBitsNotCached(const Value *V, KnownBits &Known,
+                                      unsigned Depth, const Query &Q);
+
 static void computeKnownBits(const Value *V, KnownBits &Known,
-                             unsigned Depth, const Query &Q);
+                             unsigned Depth, const Query &Q) {
+  if (Q.KBC && UseCache && Q.KBC->find(V, Known))
+    return;
+
+  bool ContextSensitive;
+  ::computeKnownBitsNotCached(V, Known, Depth, Query(Q, &ContextSensitive));
+
+  // If this query is context-sensitive, then so it its parent query.
+  if (ContextSensitive) {
+    if (Q.ContextSensitive)
+      *Q.ContextSensitive = true;
+
+    return;
+  }
+
+  if (Q.KBC && UseCache)
+    Q.KBC->cache(V, Known);
+}
 
 void llvm::computeKnownBits(const Value *V, KnownBits &Known,
                             const DataLayout &DL, unsigned Depth,
                             AssumptionCache *AC, const Instruction *CxtI,
-                            const DominatorTree *DT,
+                            const DominatorTree *DT, KBCache *KBC,
                             OptimizationRemarkEmitter *ORE) {
   ::computeKnownBits(V, Known, Depth,
-                     Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
+                     Query(DL, AC, safeCxtI(V, CxtI), DT, KBC, ORE));
 }
 
 static KnownBits computeKnownBits(const Value *V, unsigned Depth,
@@ -149,14 +213,14 @@
 KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
                                  unsigned Depth, AssumptionCache *AC,
                                  const Instruction *CxtI,
-                                 const DominatorTree *DT) {
-  return ::computeKnownBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
+                                 const DominatorTree *DT, KBCache *KBC) {
+  return ::computeKnownBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, KBC));
 }
 
 bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
                                const DataLayout &DL,
                                AssumptionCache *AC, const Instruction *CxtI,
-                               const DominatorTree *DT) {
+                               const DominatorTree *DT, KBCache *KBC) {
   assert(LHS->getType() == RHS->getType() &&
          "LHS and RHS should have the same type");
   assert(LHS->getType()->isIntOrIntVectorTy() &&
@@ -164,8 +228,8 @@
   IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
   KnownBits LHSKnown(IT->getBitWidth());
   KnownBits RHSKnown(IT->getBitWidth());
-  computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT);
-  computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT);
+  computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, KBC);
+  computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, KBC);
   return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue();
 }
 
@@ -177,43 +241,43 @@
                                   bool OrZero,
                                   unsigned Depth, AssumptionCache *AC,
                                   const Instruction *CxtI,
-                                  const DominatorTree *DT) {
+                                  const DominatorTree *DT, KBCache *KBC) {
   return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
-                                  Query(DL, AC, safeCxtI(V, CxtI), DT));
+                                  Query(DL, AC, safeCxtI(V, CxtI), DT, KBC));
 }
 
 static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q);
 
 bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth,
                           AssumptionCache *AC, const Instruction *CxtI,
-                          const DominatorTree *DT) {
-  return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
+                          const DominatorTree *DT, KBCache *KBC) {
+  return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, KBC));
 }
 
 bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL,
                               unsigned Depth,
                               AssumptionCache *AC, const Instruction *CxtI,
-                              const DominatorTree *DT) {
-  KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
+                              const DominatorTree *DT, KBCache *KBC) {
+  KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT, KBC);
   return Known.isNonNegative();
 }
 
 bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
                            AssumptionCache *AC, const Instruction *CxtI,
-                           const DominatorTree *DT) {
+                           const DominatorTree *DT, KBCache *KBC) {
   if (auto *CI = dyn_cast<ConstantInt>(V))
     return CI->getValue().isStrictlyPositive();
 
   // TODO: We'd doing two recursive queries here.  We should factor this such
   // that only a single query is needed.
-  return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT) &&
-    isKnownNonZero(V, DL, Depth, AC, CxtI, DT);
+  return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, KBC) &&
+    isKnownNonZero(V, DL, Depth, AC, CxtI, DT, KBC);
 }
 
 bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
                            AssumptionCache *AC, const Instruction *CxtI,
-                           const DominatorTree *DT) {
-  KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
+                           const DominatorTree *DT, KBCache *KBC) {
+  KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT, KBC);
   return Known.isNegative();
 }
 
@@ -222,10 +286,10 @@
 bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
                            const DataLayout &DL,
                            AssumptionCache *AC, const Instruction *CxtI,
-                           const DominatorTree *DT) {
+                           const DominatorTree *DT, KBCache *KBC) {
   return ::isKnownNonEqual(V1, V2, Query(DL, AC,
                                          safeCxtI(V1, safeCxtI(V2, CxtI)),
-                                         DT));
+                                         DT, KBC));
 }
 
 static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
@@ -234,9 +298,10 @@
 bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
                              const DataLayout &DL,
                              unsigned Depth, AssumptionCache *AC,
-                             const Instruction *CxtI, const DominatorTree *DT) {
+                             const Instruction *CxtI, const DominatorTree *DT,
+                             KBCache *KBC) {
   return ::MaskedValueIsZero(V, Mask, Depth,
-                             Query(DL, AC, safeCxtI(V, CxtI), DT));
+                             Query(DL, AC, safeCxtI(V, CxtI), DT, KBC));
 }
 
 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
@@ -245,8 +310,8 @@
 unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
                                   unsigned Depth, AssumptionCache *AC,
                                   const Instruction *CxtI,
-                                  const DominatorTree *DT) {
-  return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
+                                  const DominatorTree *DT, KBCache *KBC) {
+  return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, KBC));
 }
 
 static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
@@ -446,6 +511,16 @@
   return false;
 }
 
+static bool isValidAssumeForContext(const Instruction *Inv,
+                                    const Query &Q) {
+  // The fact that we're checking an assumption means that this query is
+  // potentially context sensitive...
+  if (Q.ContextSensitive)
+    *Q.ContextSensitive = true;
+
+  return isValidAssumeForContext(Inv, Q.CxtI, Q.DT);
+}
+
 bool llvm::isValidAssumeForContext(const Instruction *Inv,
                                    const Instruction *CxtI,
                                    const DominatorTree *DT) {
@@ -524,13 +599,13 @@
 
     Value *Arg = I->getArgOperand(0);
 
-    if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+    if (Arg == V && isValidAssumeForContext(I, Q)) {
       assert(BitWidth == 1 && "assume operand is not i1?");
       Known.setAllOnes();
       return;
     }
     if (match(Arg, m_Not(m_Specific(V))) &&
-        isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        isValidAssumeForContext(I, Q)) {
       assert(BitWidth == 1 && "assume operand is not i1?");
       Known.setAllZero();
       return;
@@ -549,7 +624,7 @@
     ConstantInt *C;
     // assume(v = a)
     if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
-        Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       Known.Zero |= RHSKnown.Zero;
@@ -558,7 +633,7 @@
     } else if (match(Arg,
                      m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       KnownBits MaskKnown(BitWidth);
@@ -572,7 +647,7 @@
     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
                                    m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       KnownBits MaskKnown(BitWidth);
@@ -586,7 +661,7 @@
     } else if (match(Arg,
                      m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       KnownBits BKnown(BitWidth);
@@ -600,7 +675,7 @@
     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
                                    m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       KnownBits BKnown(BitWidth);
@@ -614,7 +689,7 @@
     } else if (match(Arg,
                      m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       KnownBits BKnown(BitWidth);
@@ -631,7 +706,7 @@
     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
                                    m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       KnownBits BKnown(BitWidth);
@@ -648,7 +723,7 @@
     } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
                                    m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       // For those bits in RHS that are known, we can propagate them to known
@@ -661,7 +736,7 @@
     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
                                    m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       // For those bits in RHS that are known, we can propagate them inverted
@@ -676,7 +751,7 @@
                                                 m_AShr(m_V, m_ConstantInt(C))),
                               m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       // For those bits in RHS that are known, we can propagate them to known
@@ -689,7 +764,7 @@
                                              m_AShr(m_V, m_ConstantInt(C)))),
                                    m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       // For those bits in RHS that are known, we can propagate them inverted
@@ -699,7 +774,7 @@
     // assume(v >=_s c) where c is non-negative
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
                Pred == ICmpInst::ICMP_SGE &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
@@ -710,7 +785,7 @@
     // assume(v >_s c) where c is at least -1.
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
                Pred == ICmpInst::ICMP_SGT &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
@@ -721,7 +796,7 @@
     // assume(v <=_s c) where c is negative
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
                Pred == ICmpInst::ICMP_SLE &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
@@ -732,7 +807,7 @@
     // assume(v <_s c) where c is non-positive
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
                Pred == ICmpInst::ICMP_SLT &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
@@ -743,7 +818,7 @@
     // assume(v <=_u c)
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
                Pred == ICmpInst::ICMP_ULE &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
@@ -752,7 +827,7 @@
       // assume(v <_u c)
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
                Pred == ICmpInst::ICMP_ULT &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+               isValidAssumeForContext(I, Q)) {
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
@@ -1482,8 +1557,8 @@
 /// where V is a vector, known zero, and known one values are the
 /// same width as the vector element, and the bit is set only if it is true
 /// for all of the elements in the vector.
-void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
-                      const Query &Q) {
+void computeKnownBitsNotCached(const Value *V, KnownBits &Known, unsigned Depth,
+                               const Query &Q) {
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
   unsigned BitWidth = Known.getBitWidth();
@@ -3472,7 +3547,8 @@
                                                    const DataLayout &DL,
                                                    AssumptionCache *AC,
                                                    const Instruction *CxtI,
-                                                   const DominatorTree *DT) {
+                                                   const DominatorTree *DT,
+                                                   KBCache *KBC) {
   // Multiplying n * m significant bits yields a result of n + m significant
   // bits. If the total number of significant bits does not exceed the
   // result bit width (minus 1), there is no overflow.
@@ -3482,8 +3558,8 @@
   unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
   KnownBits LHSKnown(BitWidth);
   KnownBits RHSKnown(BitWidth);
-  computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
-  computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
+  computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, KBC);
+  computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, KBC);
   // Note that underestimating the number of zero bits gives a more
   // conservative answer.
   unsigned ZeroBits = LHSKnown.countMinLeadingZeros() +
@@ -3519,10 +3595,11 @@
                                                    const DataLayout &DL,
                                                    AssumptionCache *AC,
                                                    const Instruction *CxtI,
-                                                   const DominatorTree *DT) {
-  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
+                                                   const DominatorTree *DT,
+                                                   KBCache *KBC) {
+  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT, KBC);
   if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
-    KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+    KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT, KBC);
 
     if (LHSKnown.isNegative() && RHSKnown.isNegative()) {
       // The sign bit is set in both cases: this MUST overflow.
@@ -3591,7 +3668,8 @@
                                                   const DataLayout &DL,
                                                   AssumptionCache *AC,
                                                   const Instruction *CxtI,
-                                                  const DominatorTree *DT) {
+                                                  const DominatorTree *DT,
+                                                  KBCache *KBC) {
   if (Add && Add->hasNoSignedWrap()) {
     return OverflowResult::NeverOverflows;
   }
@@ -3610,12 +3688,12 @@
   //
   // Since the carry into the most significant position is always equal to
   // the carry out of the addition, there is no signed overflow.
-  if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 &&
-      ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1)
+  if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT, KBC) > 1 &&
+      ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT, KBC) > 1)
     return OverflowResult::NeverOverflows;
 
-  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
-  KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT, KBC);
+  KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT, KBC);
 
   if (checkRippleForSignedAdd(LHSKnown, RHSKnown))
     return OverflowResult::NeverOverflows;
@@ -3633,7 +3711,7 @@
   bool LHSOrRHSKnownNegative = 
       (LHSKnown.isNegative() || RHSKnown.isNegative());
   if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
-    KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT);
+    KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT, KBC);
     if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
         (AddKnown.isNegative() && LHSOrRHSKnownNegative)) {
       return OverflowResult::NeverOverflows;
@@ -3709,9 +3787,10 @@
                                                  const DataLayout &DL,
                                                  AssumptionCache *AC,
                                                  const Instruction *CxtI,
-                                                 const DominatorTree *DT) {
+                                                 const DominatorTree *DT,
+                                                 KBCache *KBC) {
   return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1),
-                                       Add, DL, AC, CxtI, DT);
+                                       Add, DL, AC, CxtI, DT, KBC);
 }
 
 OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS,
@@ -3719,8 +3798,9 @@
                                                  const DataLayout &DL,
                                                  AssumptionCache *AC,
                                                  const Instruction *CxtI,
-                                                 const DominatorTree *DT) {
-  return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT);
+                                                 const DominatorTree *DT,
+                                                 KBCache *KBC) {
+  return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT, KBC);
 }
 
 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
@@ -4241,7 +4321,7 @@
                             const Value *LHS, const Value *RHS,
                             const DataLayout &DL, unsigned Depth,
                             AssumptionCache *AC, const Instruction *CxtI,
-                            const DominatorTree *DT) {
+                            const DominatorTree *DT, KBCache *KBC) {
   assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!");
   if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS)
     return true;
@@ -4304,7 +4384,8 @@
                       const Value *ARHS, const Value *BLHS,
                       const Value *BRHS, const DataLayout &DL,
                       unsigned Depth, AssumptionCache *AC,
-                      const Instruction *CxtI, const DominatorTree *DT) {
+                      const Instruction *CxtI, const DominatorTree *DT,
+                      KBCache *KBC) {
   switch (Pred) {
   default:
     return None;
@@ -4312,16 +4393,18 @@
   case CmpInst::ICMP_SLT:
   case CmpInst::ICMP_SLE:
     if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI,
-                        DT) &&
-        isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI, DT))
+                        DT, KBC) &&
+        isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI, DT,
+                        KBC))
       return true;
     return None;
 
   case CmpInst::ICMP_ULT:
   case CmpInst::ICMP_ULE:
     if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI,
-                        DT) &&
-        isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, DT))
+                        DT, KBC) &&
+        isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, DT,
+                        KBC))
       return true;
     return None;
   }
@@ -4387,7 +4470,7 @@
                                         const DataLayout &DL, bool InvertAPred,
                                         unsigned Depth, AssumptionCache *AC,
                                         const Instruction *CxtI,
-                                        const DominatorTree *DT) {
+                                        const DominatorTree *DT, KBCache *KBC) {
   // A mismatch occurs when we compare a scalar cmp to a vector cmp, for example.
   if (LHS->getType() != RHS->getType())
     return None;
@@ -4440,7 +4523,7 @@
 
   if (APred == BPred)
     return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC,
-                                 CxtI, DT);
+                                 CxtI, DT, KBC);
 
   return None;
 }
Index: lib/Transforms/InstCombine/InstCombineAddSub.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1111,7 +1111,7 @@
     return replaceInstUsesWith(I, V);
 
   // A+B --> A|B iff A and B have no bits set in common.
-  if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT))
+  if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT, KBC.get()))
     return BinaryOperator::CreateOr(LHS, RHS);
 
   if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1611,16 +1611,16 @@
       Value *Masked = nullptr;
       if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
           isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, &AC, CxtI,
-                                 &DT) &&
+                                 &DT, KBC.get()) &&
           isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, &AC, CxtI,
-                                 &DT)) {
+                                 &DT, KBC.get())) {
         Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
         Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
       } else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
                  isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, &AC,
-                                        CxtI, &DT) &&
+                                        CxtI, &DT, KBC.get()) &&
                  isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, &AC,
-                                        CxtI, &DT)) {
+                                        CxtI, &DT, KBC.get())) {
         Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
         Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
       }
Index: lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -167,8 +167,8 @@
 }
 
 Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
-  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT);
-  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT);
+  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT, KBC.get());
+  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT, KBC.get());
   unsigned MinAlign = std::min(DstAlign, SrcAlign);
   unsigned CopyAlign = MI->getAlignment();
 
@@ -246,7 +246,7 @@
 }
 
 Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
-  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
+  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT, KBC.get());
   if (MI->getAlignment() < Alignment) {
     MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
                                              Alignment, false));
@@ -2126,7 +2126,7 @@
   case Intrinsic::ppc_altivec_lvxl:
     // Turn PPC lvx -> load if the pointer is known aligned.
     if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
-                                   &DT) >= 16) {
+                                   &DT, KBC.get()) >= 16) {
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
                                          PointerType::getUnqual(II->getType()));
       return new LoadInst(Ptr);
@@ -2143,7 +2143,7 @@
   case Intrinsic::ppc_altivec_stvxl:
     // Turn stvx -> store if the pointer is known aligned.
     if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
-                                   &DT) >= 16) {
+                                   &DT, KBC.get()) >= 16) {
       Type *OpPtrTy =
         PointerType::getUnqual(II->getArgOperand(0)->getType());
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
@@ -2160,7 +2160,7 @@
   case Intrinsic::ppc_qpx_qvlfs:
     // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
     if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
-                                   &DT) >= 16) {
+                                   &DT, KBC.get()) >= 16) {
       Type *VTy = VectorType::get(Builder->getFloatTy(),
                                   II->getType()->getVectorNumElements());
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
@@ -2172,7 +2172,7 @@
   case Intrinsic::ppc_qpx_qvlfd:
     // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
     if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC,
-                                   &DT) >= 32) {
+                                   &DT, KBC.get()) >= 32) {
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
                                          PointerType::getUnqual(II->getType()));
       return new LoadInst(Ptr);
@@ -2181,7 +2181,7 @@
   case Intrinsic::ppc_qpx_qvstfs:
     // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
     if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
-                                   &DT) >= 16) {
+                                   &DT, KBC.get()) >= 16) {
       Type *VTy = VectorType::get(Builder->getFloatTy(),
           II->getArgOperand(0)->getType()->getVectorNumElements());
       Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
@@ -2193,7 +2193,7 @@
   case Intrinsic::ppc_qpx_qvstfd:
     // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
     if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC,
-                                   &DT) >= 32) {
+                                   &DT, KBC.get()) >= 32) {
       Type *OpPtrTy =
         PointerType::getUnqual(II->getArgOperand(0)->getType());
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
@@ -3048,7 +3048,7 @@
   case Intrinsic::arm_neon_vst3lane:
   case Intrinsic::arm_neon_vst4lane: {
     unsigned MemAlign =
-        getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
+        getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT, KBC.get());
     unsigned AlignArg = II->getNumArgOperands() - 1;
     ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
     if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
Index: lib/Transforms/InstCombine/InstCombineCompares.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4497,7 +4497,8 @@
     // if A is a power of 2.
     if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
         match(Op1, m_Zero()) &&
-        isKnownToBeAPowerOfTwo(A, DL, false, 0, &AC, &I, &DT) && I.isEquality())
+        isKnownToBeAPowerOfTwo(A, DL, false, 0, &AC, &I, &DT, KBC.get()) &&
+        I.isEquality())
       return new ICmpInst(I.getInversePredicate(),
                           Builder->CreateAnd(A, B),
                           Op1);
Index: lib/Transforms/InstCombine/InstCombineInternal.h
===================================================================
--- lib/Transforms/InstCombine/InstCombineInternal.h
+++ lib/Transforms/InstCombine/InstCombineInternal.h
@@ -215,6 +215,7 @@
   AssumptionCache &AC;
   TargetLibraryInfo &TLI;
   DominatorTree &DT;
+  std::unique_ptr<KBCache, KBCacheDeleter> KBC;
   const DataLayout &DL;
   const SimplifyQuery SQ;
   // Optional analyses. When non-null, these can both be used to do better
@@ -230,7 +231,7 @@
                const DataLayout &DL, LoopInfo *LI)
       : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
         ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT),
-        DL(DL), SQ(DL, &TLI, &DT, &AC), LI(LI), MadeIRChange(false) {}
+        KBC(makeKnownBitsCache()), DL(DL), SQ(DL, &TLI, &DT, &AC, KBC.get()), LI(LI), MadeIRChange(false) {}
 
   /// \brief Run the combiner over the entire worklist until it is empty.
   ///
@@ -542,26 +543,26 @@
 
   bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0,
                          const Instruction *CxtI = nullptr) const {
-    return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT);
+    return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT, KBC.get());
   }
   unsigned ComputeNumSignBits(const Value *Op, unsigned Depth = 0,
                               const Instruction *CxtI = nullptr) const {
-    return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT);
+    return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT, KBC.get());
   }
   OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
                                                const Value *RHS,
                                                const Instruction *CxtI) const {
-    return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT);
+    return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT, KBC.get());
   }
   OverflowResult computeOverflowForUnsignedAdd(const Value *LHS,
                                                const Value *RHS,
                                                const Instruction *CxtI) const {
-    return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT);
+    return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT, KBC.get());
   }
   OverflowResult computeOverflowForSignedAdd(const Value *LHS,
                                              const Value *RHS,
                                              const Instruction *CxtI) const {
-    return llvm::computeOverflowForSignedAdd(LHS, RHS, DL, &AC, CxtI, &DT);
+    return llvm::computeOverflowForSignedAdd(LHS, RHS, DL, &AC, CxtI, &DT, KBC.get());
   }
 
   /// Maximum size of array considered when transforming.
Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -389,7 +389,7 @@
     SmallVector<Instruction *, 4> ToDelete;
     if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
       unsigned SourceAlign = getOrEnforceKnownAlignment(
-          Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT);
+          Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT, KBC.get());
       if (AI.getAlignment() <= SourceAlign) {
         DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
         DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
@@ -950,7 +950,7 @@
 
   // Attempt to improve the alignment.
   unsigned KnownAlign = getOrEnforceKnownAlignment(
-      Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT);
+      Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT, KBC.get());
   unsigned LoadAlign = LI.getAlignment();
   unsigned EffectiveLoadAlign =
       LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
@@ -1299,7 +1299,8 @@
 
   // Attempt to improve the alignment.
   unsigned KnownAlign = getOrEnforceKnownAlignment(
-      Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT);
+      Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT,
+      KBC.get());
   unsigned StoreAlign = SI.getAlignment();
   unsigned EffectiveStoreAlign =
       StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
Index: lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1240,7 +1240,7 @@
       return BO;
     }
 
-    if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) {
+    if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT, KBC.get())) {
       // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
       // Safe because the only negative value (1 << Y) can take on is
       // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
@@ -1487,7 +1487,8 @@
                           I.getType());
 
   // X urem Y -> X and Y-1, where Y is a power of 2,
-  if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) {
+  if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT,
+                             KBC.get())) {
     Constant *N1 = Constant::getAllOnesValue(I.getType());
     Value *Add = Builder->CreateAdd(Op1, N1);
     return BinaryOperator::CreateAnd(Op0, Add);
Index: lib/Transforms/InstCombine/InstCombinePHI.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -937,7 +937,7 @@
       for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
         Instruction *CtxI = PN.getIncomingBlock(i)->getTerminator();
         Value *VA = PN.getIncomingValue(i);
-        if (isKnownNonZero(VA, DL, 0, &AC, CtxI, &DT)) {
+        if (isKnownNonZero(VA, DL, 0, &AC, CtxI, &DT, KBC.get())) {
           if (!NonZeroConst)
             NonZeroConst = GetAnyNonZeroConstInt(PN);
           PN.setIncomingValue(i, NonZeroConst);
Index: lib/Transforms/Scalar/JumpThreading.cpp
===================================================================
--- lib/Transforms/Scalar/JumpThreading.cpp
+++ lib/Transforms/Scalar/JumpThreading.cpp
@@ -1903,7 +1903,7 @@
     // phi translation.
     if (Value *IV = SimplifyInstruction(
             New,
-            {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
+            {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, nullptr, New})) {
       ValueMapping[&*BI] = IV;
       if (!New->mayHaveSideEffects()) {
         New->deleteValue();
Index: lib/Transforms/Utils/Local.cpp
===================================================================
--- lib/Transforms/Utils/Local.cpp
+++ lib/Transforms/Utils/Local.cpp
@@ -1034,13 +1034,14 @@
                                           const DataLayout &DL,
                                           const Instruction *CxtI,
                                           AssumptionCache *AC,
-                                          const DominatorTree *DT) {
+                                          const DominatorTree *DT,
+                                          KBCache *KBC) {
   assert(V->getType()->isPointerTy() &&
          "getOrEnforceKnownAlignment expects a pointer!");
   unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType());
 
   KnownBits Known(BitWidth);
-  computeKnownBits(V, Known, DL, 0, AC, CxtI, DT);
+  computeKnownBits(V, Known, DL, 0, AC, CxtI, DT, KBC);
   unsigned TrailZ = Known.countMinTrailingZeros();
 
   // Avoid trouble with ridiculously large TrailZ values, such as
Index: lib/Transforms/Utils/SimplifyInstructions.cpp
===================================================================
--- lib/Transforms/Utils/SimplifyInstructions.cpp
+++ lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -112,7 +112,8 @@
       OptimizationRemarkEmitter *ORE =
           &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
       const DataLayout &DL = F.getParent()->getDataLayout();
-      const SimplifyQuery SQ(DL, TLI, DT, AC);
+      // TODO add a KBCache here
+      const SimplifyQuery SQ(DL, TLI, DT, AC, nullptr);
       return runImpl(F, SQ, ORE);
     }
   };