Index: include/llvm/Analysis/InstructionSimplify.h =================================================================== --- include/llvm/Analysis/InstructionSimplify.h +++ include/llvm/Analysis/InstructionSimplify.h @@ -39,10 +39,11 @@ template class AnalysisManager; template class ArrayRef; class AssumptionCache; -class DominatorTree; -class Instruction; class DataLayout; +class DominatorTree; class FastMathFlags; +class Instruction; +class KBCache; struct LoopStandardAnalysisResults; class OptimizationRemarkEmitter; class Pass; @@ -55,6 +56,7 @@ const TargetLibraryInfo *TLI = nullptr; const DominatorTree *DT = nullptr; AssumptionCache *AC = nullptr; + KBCache *KBC = nullptr; const Instruction *CxtI = nullptr; SimplifyQuery(const DataLayout &DL, const Instruction *CXTI = nullptr) @@ -63,8 +65,9 @@ SimplifyQuery(const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, + KBCache *KBC = nullptr, const Instruction *CXTI = nullptr) - : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI) {} + : DL(DL), TLI(TLI), DT(DT), AC(AC), KBC(KBC), CxtI(CXTI) {} SimplifyQuery getWithInstruction(Instruction *I) const { SimplifyQuery Copy(*this); Copy.CxtI = I; @@ -217,7 +220,8 @@ bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr); + AssumptionCache *AC = nullptr, + KBCache *KBC = nullptr); /// Recursively attempt to simplify an instruction. /// @@ -228,7 +232,8 @@ bool recursivelySimplifyInstruction(Instruction *I, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr); + AssumptionCache *AC = nullptr, + KBCache *KBC = nullptr); // These helper functions return a SimplifyQuery structure that contains as // many of the optional analysis we use as are currently valid. This is the // strongly preferred way of constructing SimplifyQuery in passes. Index: include/llvm/Analysis/ValueTracking.h =================================================================== --- include/llvm/Analysis/ValueTracking.h +++ include/llvm/Analysis/ValueTracking.h @@ -42,6 +42,15 @@ enum ID : unsigned; } + // Declare the known-bits cache, and a deleter type so that we can make + // a unique_ptr of KBCache while allowing it to remain incomplete. + class KBCache; + struct KBCacheDeleter { + void operator()(KBCache *); + }; + + std::unique_ptr makeKnownBitsCache(); + /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. /// @@ -55,12 +64,14 @@ AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr, OptimizationRemarkEmitter *ORE = nullptr); /// Returns the known bits rather than passing by reference. KnownBits computeKnownBits(const Value *V, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr); /// Compute known bits from the range metadata. /// \p KnownZero the set of bits that are known to be zero /// \p KnownOne the set of bits that are known to be one @@ -71,7 +82,8 @@ const DataLayout &DL, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr); /// Return true if the given value is known to have exactly one bit set when /// defined. For vectors return true if every element is known to be a power @@ -82,7 +94,8 @@ bool OrZero = false, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr); /// Return true if the given value is known to be non-zero when defined. For /// vectors, return true if every element is known to be non-zero when @@ -93,35 +106,40 @@ bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr); /// Returns true if the give value is known to be non-negative. bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr); /// Returns true if the given value is known be positive (i.e. non-negative /// and non-zero). bool isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr); /// Returns true if the given value is known be negative (i.e. non-positive /// and non-zero). bool isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr); /// Return true if the given values are known to be non-equal when defined. /// Supports scalar integer types only. bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr); /// Return true if 'V & Mask' is known to be zero. We use this predicate to /// simplify operations downstream. Mask is known to be zero for bits that V @@ -136,7 +154,8 @@ const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr); /// Return the number of times the sign bit of the register is replicated into /// the other bits. We know that at least 1 bit is always equal to the sign @@ -148,7 +167,8 @@ unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr); /// This function computes the integer multiple of Base that equals V. If /// successful, it returns true and returns the multiple in Multiple. If @@ -374,24 +394,28 @@ const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT); + const DominatorTree *DT, + KBCache *KBC = nullptr); OverflowResult computeOverflowForUnsignedAdd(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT); + const DominatorTree *DT, + KBCache *KBC = nullptr); OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr); /// This version also leverages the sign bit of Add if known. OverflowResult computeOverflowForSignedAdd(const AddOperator *Add, const DataLayout &DL, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr); /// Returns true if the arithmetic part of the \p II 's result is /// used only along the paths control dependent on the computation @@ -524,7 +548,8 @@ unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr); } // end namespace llvm #endif Index: include/llvm/Transforms/Utils/Local.h =================================================================== --- include/llvm/Transforms/Utils/Local.h +++ include/llvm/Transforms/Utils/Local.h @@ -39,6 +39,7 @@ class PHINode; class AllocaInst; class AssumptionCache; +class KBCache; class ConstantExpr; class DataLayout; class TargetLibraryInfo; @@ -182,14 +183,16 @@ const DataLayout &DL, const Instruction *CxtI = nullptr, AssumptionCache *AC = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr); /// Try to infer an alignment for the specified pointer. static inline unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI = nullptr, AssumptionCache *AC = nullptr, - const DominatorTree *DT = nullptr) { - return getOrEnforceKnownAlignment(V, 0, DL, CxtI, AC, DT); + const DominatorTree *DT = nullptr, + KBCache *KBC = nullptr) { + return getOrEnforceKnownAlignment(V, 0, DL, CxtI, AC, DT, KBC); } /// Given a getelementptr instruction/constantexpr, emit the code necessary to Index: lib/Analysis/InstructionSimplify.cpp =================================================================== --- lib/Analysis/InstructionSimplify.cpp +++ lib/Analysis/InstructionSimplify.cpp @@ -690,7 +690,7 @@ unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); KnownBits Known(BitWidth); - computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (Known.Zero.isMaxSignedValue()) { // Op1 is either 0 or the minimum signed value. If the sub is NSW, then // Op1 must be 0 because negating the minimum signed value is undefined. @@ -1311,7 +1311,7 @@ // the number of bits in the type, the shift is undefined. unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); KnownBits Known(BitWidth); - computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (Known.One.getLimitedValue() >= BitWidth) return UndefValue::get(Op0->getType()); @@ -1345,7 +1345,7 @@ if (isExact) { unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); KnownBits Op0Known(BitWidth); - computeKnownBits(Op0, Op0Known, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); + computeKnownBits(Op0, Op0Known, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (Op0Known.One[0]) return Op0; } @@ -1416,7 +1416,8 @@ return X; // Arithmetic shifting an all-sign-bit value is a no-op. - unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + unsigned NumSignBits = + ComputeNumSignBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (NumSignBits == Op0->getType()->getScalarSizeInBits()) return Op0; @@ -1775,10 +1776,10 @@ if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { if (isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, - Q.DT)) + Q.DT, Q.KBC)) return Op0; if (isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, - Q.DT)) + Q.DT, Q.KBC)) return Op1; } @@ -1943,10 +1944,12 @@ match(A, m_Add(m_Value(V1), m_Value(V2)))) { // Add commutes, try both ways. if (V1 == B && - MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT, + Q.KBC)) return A; if (V2 == B && - MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT, + Q.KBC)) return A; } // Or commutes, try both ways. @@ -1954,10 +1957,12 @@ match(B, m_Add(m_Value(V1), m_Value(V2)))) { // Add commutes, try both ways. if (V1 == A && - MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT, + Q.KBC)) return B; if (V2 == A && - MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT, + Q.KBC)) return B; } } @@ -2343,16 +2348,16 @@ return getTrue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: - if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC)) return getFalse(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: - if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC)) return getTrue(ITy); break; case ICmpInst::ICMP_SLT: { - KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (LHSKnown.isNegative()) return getTrue(ITy); if (LHSKnown.isNonNegative()) @@ -2360,16 +2365,16 @@ break; } case ICmpInst::ICMP_SLE: { - KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (LHSKnown.isNegative()) return getTrue(ITy); if (LHSKnown.isNonNegative() && - isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC)) return getFalse(ITy); break; } case ICmpInst::ICMP_SGE: { - KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (LHSKnown.isNegative()) return getFalse(ITy); if (LHSKnown.isNonNegative()) @@ -2377,11 +2382,11 @@ break; } case ICmpInst::ICMP_SGT: { - KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (LHSKnown.isNegative()) return getFalse(ITy); if (LHSKnown.isNonNegative() && - isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC)) return getTrue(ITy); break; } @@ -2668,8 +2673,8 @@ return getTrue(ITy); if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) { - KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); + KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (RHSKnown.isNonNegative() && YKnown.isNegative()) return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy); if (RHSKnown.isNegative() || YKnown.isNonNegative()) @@ -2684,8 +2689,8 @@ return getFalse(ITy); if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE) { - KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); + KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (LHSKnown.isNonNegative() && YKnown.isNegative()) return Pred == ICmpInst::ICMP_SGT ? getTrue(ITy) : getFalse(ITy); if (LHSKnown.isNegative() || YKnown.isNonNegative()) @@ -2740,7 +2745,7 @@ break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: { - KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; @@ -2751,7 +2756,7 @@ return getFalse(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: { - KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; @@ -2770,7 +2775,7 @@ break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: { - KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; @@ -2781,7 +2786,7 @@ return getTrue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: { - KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; @@ -3316,7 +3321,7 @@ // icmp eq|ne X, Y -> false|true if X != Y if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && - isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) { + isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT, Q.KBC)) { LLVMContext &Ctx = LHS->getType()->getContext(); return Pred == ICmpInst::ICMP_NE ? ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); @@ -3374,7 +3379,7 @@ if (match(RHS, m_APInt(RHSVal))) { unsigned BitWidth = RHSVal->getBitWidth(); KnownBits LHSKnown(BitWidth); - computeKnownBits(LHS, LHSKnown, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); + computeKnownBits(LHS, LHSKnown, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT, Q.KBC); if (LHSKnown.Zero.intersects(*RHSVal) || !LHSKnown.One.isSubsetOf(*RHSVal)) return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy) @@ -4682,7 +4687,7 @@ if (!Result && I->getType()->isIntOrIntVectorTy()) { unsigned BitWidth = I->getType()->getScalarSizeInBits(); KnownBits Known(BitWidth); - computeKnownBits(I, Known, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE); + computeKnownBits(I, Known, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, Q.KBC, ORE); if (Known.isConstant()) Result = ConstantInt::get(I->getType(), Known.getConstant()); } @@ -4707,7 +4712,8 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI, const DominatorTree *DT, - AssumptionCache *AC) { + AssumptionCache *AC, + KBCache *KBC) { bool Simplified = false; SmallSetVector Worklist; const DataLayout &DL = I->getModule()->getDataLayout(); @@ -4763,17 +4769,17 @@ bool llvm::recursivelySimplifyInstruction(Instruction *I, const TargetLibraryInfo *TLI, const DominatorTree *DT, - AssumptionCache *AC) { - return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC); + AssumptionCache *AC, KBCache *KBC) { + return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC, KBC); } bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI, const DominatorTree *DT, - AssumptionCache *AC) { + AssumptionCache *AC, KBCache *KBC) { assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); assert(SimpleV && "Must provide a simplified value."); - return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC); + return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC, KBC); } namespace llvm { Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" +#include "llvm/IR/ValueMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" @@ -60,6 +61,9 @@ DontImproveNonNegativePhiBits("dont-improve-non-negative-phi-bits", cl::Hidden, cl::init(true)); +static cl::opt +UseCache("cache-known-bits", cl::Hidden, cl::init(true)); + /// Returns the bitwidth of the given scalar or pointer type. For vector types, /// returns the element type's bitwidth. static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { @@ -79,6 +83,7 @@ AssumptionCache *AC; const Instruction *CxtI; const DominatorTree *DT; + KBCache *KBC; // Unlike the other analyses, this may be a nullptr because not all clients // provide it currently. OptimizationRemarkEmitter *ORE; @@ -94,18 +99,29 @@ std::array Excluded; unsigned NumExcluded; + // Is this query potentially sensitive on the context instruction. + bool *ContextSensitive; + Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr) - : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), NumExcluded(0) {} + const DominatorTree *DT, KBCache *KBC, + OptimizationRemarkEmitter *ORE = nullptr) + : DL(DL), AC(AC), CxtI(CxtI), DT(DT), KBC(KBC), ORE(ORE), + NumExcluded(0), ContextSensitive(nullptr) {} Query(const Query &Q, const Value *NewExcl) - : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), - NumExcluded(Q.NumExcluded) { + : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), KBC(Q.KBC), + ORE(Q.ORE), NumExcluded(Q.NumExcluded), + ContextSensitive(Q.ContextSensitive) { Excluded = Q.Excluded; Excluded[NumExcluded++] = NewExcl; assert(NumExcluded <= Excluded.size()); } + Query(const Query &Q, bool *NewCS) + : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), KBC(Q.KBC), + ORE(Q.ORE), Excluded(Q.Excluded), NumExcluded(Q.NumExcluded), + ContextSensitive(NewCS) {} + bool isExcluded(const Value *Value) const { if (NumExcluded == 0) return false; @@ -131,16 +147,64 @@ return nullptr; } +namespace llvm { +class KBCache { + ValueMap KnownMap; + +public: + void cache(const Value *V, const KnownBits &Known) { + KnownMap[V] = Known; + } + + bool find(const Value *V, KnownBits &Known) { + auto KBI = KnownMap.find(V); + if (KBI == KnownMap.end()) + return false; + + Known = KBI->second; + return true; + } +}; + +void KBCacheDeleter::operator()(KBCache *KBC) { + delete KBC; +} + +std::unique_ptr makeKnownBitsCache() { + return std::unique_ptr(new KBCache); +} +} // namespace llvm + +static void computeKnownBitsNotCached(const Value *V, KnownBits &Known, + unsigned Depth, const Query &Q); + static void computeKnownBits(const Value *V, KnownBits &Known, - unsigned Depth, const Query &Q); + unsigned Depth, const Query &Q) { + if (Q.KBC && UseCache && Q.KBC->find(V, Known)) + return; + + bool ContextSensitive; + ::computeKnownBitsNotCached(V, Known, Depth, Query(Q, &ContextSensitive)); + + // If this query is context-sensitive, then so it its parent query. + if (ContextSensitive) { + if (Q.ContextSensitive) + *Q.ContextSensitive = true; + + return; + } + + if (Q.KBC && UseCache) + Q.KBC->cache(V, Known); +} void llvm::computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, + const DominatorTree *DT, KBCache *KBC, OptimizationRemarkEmitter *ORE) { ::computeKnownBits(V, Known, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT, ORE)); + Query(DL, AC, safeCxtI(V, CxtI), DT, KBC, ORE)); } static KnownBits computeKnownBits(const Value *V, unsigned Depth, @@ -149,14 +213,14 @@ KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { - return ::computeKnownBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); + const DominatorTree *DT, KBCache *KBC) { + return ::computeKnownBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, KBC)); } bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { + const DominatorTree *DT, KBCache *KBC) { assert(LHS->getType() == RHS->getType() && "LHS and RHS should have the same type"); assert(LHS->getType()->isIntOrIntVectorTy() && @@ -164,8 +228,8 @@ IntegerType *IT = cast(LHS->getType()->getScalarType()); KnownBits LHSKnown(IT->getBitWidth()); KnownBits RHSKnown(IT->getBitWidth()); - computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT); - computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT); + computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, KBC); + computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, KBC); return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue(); } @@ -177,43 +241,43 @@ bool OrZero, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { + const DominatorTree *DT, KBCache *KBC) { return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT)); + Query(DL, AC, safeCxtI(V, CxtI), DT, KBC)); } static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q); bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { - return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); + const DominatorTree *DT, KBCache *KBC) { + return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, KBC)); } bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { - KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT); + const DominatorTree *DT, KBCache *KBC) { + KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT, KBC); return Known.isNonNegative(); } bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { + const DominatorTree *DT, KBCache *KBC) { if (auto *CI = dyn_cast(V)) return CI->getValue().isStrictlyPositive(); // TODO: We'd doing two recursive queries here. We should factor this such // that only a single query is needed. - return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT) && - isKnownNonZero(V, DL, Depth, AC, CxtI, DT); + return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, KBC) && + isKnownNonZero(V, DL, Depth, AC, CxtI, DT, KBC); } bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { - KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT); + const DominatorTree *DT, KBCache *KBC) { + KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT, KBC); return Known.isNegative(); } @@ -222,10 +286,10 @@ bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { + const DominatorTree *DT, KBCache *KBC) { return ::isKnownNonEqual(V1, V2, Query(DL, AC, safeCxtI(V1, safeCxtI(V2, CxtI)), - DT)); + DT, KBC)); } static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, @@ -234,9 +298,10 @@ bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, - const Instruction *CxtI, const DominatorTree *DT) { + const Instruction *CxtI, const DominatorTree *DT, + KBCache *KBC) { return ::MaskedValueIsZero(V, Mask, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT)); + Query(DL, AC, safeCxtI(V, CxtI), DT, KBC)); } static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, @@ -245,8 +310,8 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { - return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); + const DominatorTree *DT, KBCache *KBC) { + return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, KBC)); } static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, @@ -446,6 +511,16 @@ return false; } +static bool isValidAssumeForContext(const Instruction *Inv, + const Query &Q) { + // The fact that we're checking an assumption means that this query is + // potentially context sensitive... + if (Q.ContextSensitive) + *Q.ContextSensitive = true; + + return isValidAssumeForContext(Inv, Q.CxtI, Q.DT); +} + bool llvm::isValidAssumeForContext(const Instruction *Inv, const Instruction *CxtI, const DominatorTree *DT) { @@ -524,13 +599,13 @@ Value *Arg = I->getArgOperand(0); - if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + if (Arg == V && isValidAssumeForContext(I, Q)) { assert(BitWidth == 1 && "assume operand is not i1?"); Known.setAllOnes(); return; } if (match(Arg, m_Not(m_Specific(V))) && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { assert(BitWidth == 1 && "assume operand is not i1?"); Known.setAllZero(); return; @@ -549,7 +624,7 @@ ConstantInt *C; // assume(v = a) if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); Known.Zero |= RHSKnown.Zero; @@ -558,7 +633,7 @@ } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); KnownBits MaskKnown(BitWidth); @@ -572,7 +647,7 @@ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); KnownBits MaskKnown(BitWidth); @@ -586,7 +661,7 @@ } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); KnownBits BKnown(BitWidth); @@ -600,7 +675,7 @@ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); KnownBits BKnown(BitWidth); @@ -614,7 +689,7 @@ } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); KnownBits BKnown(BitWidth); @@ -631,7 +706,7 @@ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); KnownBits BKnown(BitWidth); @@ -648,7 +723,7 @@ } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known @@ -661,7 +736,7 @@ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted @@ -676,7 +751,7 @@ m_AShr(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known @@ -689,7 +764,7 @@ m_AShr(m_V, m_ConstantInt(C)))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted @@ -699,7 +774,7 @@ // assume(v >=_s c) where c is non-negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGE && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); @@ -710,7 +785,7 @@ // assume(v >_s c) where c is at least -1. } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGT && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); @@ -721,7 +796,7 @@ // assume(v <=_s c) where c is negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLE && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); @@ -732,7 +807,7 @@ // assume(v <_s c) where c is non-positive } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLT && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); @@ -743,7 +818,7 @@ // assume(v <=_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULE && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); @@ -752,7 +827,7 @@ // assume(v <_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULT && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); @@ -1482,8 +1557,8 @@ /// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, - const Query &Q) { +void computeKnownBitsNotCached(const Value *V, KnownBits &Known, unsigned Depth, + const Query &Q) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = Known.getBitWidth(); @@ -3472,7 +3547,8 @@ const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { + const DominatorTree *DT, + KBCache *KBC) { // Multiplying n * m significant bits yields a result of n + m significant // bits. If the total number of significant bits does not exceed the // result bit width (minus 1), there is no overflow. @@ -3482,8 +3558,8 @@ unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); KnownBits LHSKnown(BitWidth); KnownBits RHSKnown(BitWidth); - computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT); - computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT); + computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, KBC); + computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, KBC); // Note that underestimating the number of zero bits gives a more // conservative answer. unsigned ZeroBits = LHSKnown.countMinLeadingZeros() + @@ -3519,10 +3595,11 @@ const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { - KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); + const DominatorTree *DT, + KBCache *KBC) { + KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT, KBC); if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) { - KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); + KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT, KBC); if (LHSKnown.isNegative() && RHSKnown.isNegative()) { // The sign bit is set in both cases: this MUST overflow. @@ -3591,7 +3668,8 @@ const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { + const DominatorTree *DT, + KBCache *KBC) { if (Add && Add->hasNoSignedWrap()) { return OverflowResult::NeverOverflows; } @@ -3610,12 +3688,12 @@ // // Since the carry into the most significant position is always equal to // the carry out of the addition, there is no signed overflow. - if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 && - ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1) + if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT, KBC) > 1 && + ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT, KBC) > 1) return OverflowResult::NeverOverflows; - KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); - KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); + KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT, KBC); + KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT, KBC); if (checkRippleForSignedAdd(LHSKnown, RHSKnown)) return OverflowResult::NeverOverflows; @@ -3633,7 +3711,7 @@ bool LHSOrRHSKnownNegative = (LHSKnown.isNegative() || RHSKnown.isNegative()); if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { - KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT); + KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT, KBC); if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) || (AddKnown.isNegative() && LHSOrRHSKnownNegative)) { return OverflowResult::NeverOverflows; @@ -3709,9 +3787,10 @@ const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { + const DominatorTree *DT, + KBCache *KBC) { return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), - Add, DL, AC, CxtI, DT); + Add, DL, AC, CxtI, DT, KBC); } OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS, @@ -3719,8 +3798,9 @@ const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { - return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT); + const DominatorTree *DT, + KBCache *KBC) { + return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT, KBC); } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { @@ -4241,7 +4321,7 @@ const Value *LHS, const Value *RHS, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { + const DominatorTree *DT, KBCache *KBC) { assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!"); if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) return true; @@ -4304,7 +4384,8 @@ const Value *ARHS, const Value *BLHS, const Value *BRHS, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, - const Instruction *CxtI, const DominatorTree *DT) { + const Instruction *CxtI, const DominatorTree *DT, + KBCache *KBC) { switch (Pred) { default: return None; @@ -4312,16 +4393,18 @@ case CmpInst::ICMP_SLT: case CmpInst::ICMP_SLE: if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI, - DT) && - isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI, DT)) + DT, KBC) && + isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI, DT, + KBC)) return true; return None; case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE: if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI, - DT) && - isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, DT)) + DT, KBC) && + isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, DT, + KBC)) return true; return None; } @@ -4387,7 +4470,7 @@ const DataLayout &DL, bool InvertAPred, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { + const DominatorTree *DT, KBCache *KBC) { // A mismatch occurs when we compare a scalar cmp to a vector cmp, for example. if (LHS->getType() != RHS->getType()) return None; @@ -4440,7 +4523,7 @@ if (APred == BPred) return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC, - CxtI, DT); + CxtI, DT, KBC); return None; } Index: lib/Transforms/InstCombine/InstCombineAddSub.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1111,7 +1111,7 @@ return replaceInstUsesWith(I, V); // A+B --> A|B iff A and B have no bits set in common. - if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT)) + if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT, KBC.get())) return BinaryOperator::CreateOr(LHS, RHS); if (Constant *CRHS = dyn_cast(RHS)) { Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1611,16 +1611,16 @@ Value *Masked = nullptr; if (LAnd->getOperand(0) == RAnd->getOperand(0) && isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, &AC, CxtI, - &DT) && + &DT, KBC.get()) && isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, &AC, CxtI, - &DT)) { + &DT, KBC.get())) { Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1)); Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask); } else if (LAnd->getOperand(1) == RAnd->getOperand(1) && isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, &AC, - CxtI, &DT) && + CxtI, &DT, KBC.get()) && isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, &AC, - CxtI, &DT)) { + CxtI, &DT, KBC.get())) { Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0)); Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask); } Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -167,8 +167,8 @@ } Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT); - unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT); + unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT, KBC.get()); + unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT, KBC.get()); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment(); @@ -246,7 +246,7 @@ } Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { - unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT); + unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT, KBC.get()); if (MI->getAlignment() < Alignment) { MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Alignment, false)); @@ -2126,7 +2126,7 @@ case Intrinsic::ppc_altivec_lvxl: // Turn PPC lvx -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC, - &DT) >= 16) { + &DT, KBC.get()) >= 16) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); @@ -2143,7 +2143,7 @@ case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC, - &DT) >= 16) { + &DT, KBC.get()) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); @@ -2160,7 +2160,7 @@ case Intrinsic::ppc_qpx_qvlfs: // Turn PPC QPX qvlfs -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC, - &DT) >= 16) { + &DT, KBC.get()) >= 16) { Type *VTy = VectorType::get(Builder->getFloatTy(), II->getType()->getVectorNumElements()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), @@ -2172,7 +2172,7 @@ case Intrinsic::ppc_qpx_qvlfd: // Turn PPC QPX qvlfd -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC, - &DT) >= 32) { + &DT, KBC.get()) >= 32) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); @@ -2181,7 +2181,7 @@ case Intrinsic::ppc_qpx_qvstfs: // Turn PPC QPX qvstfs -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC, - &DT) >= 16) { + &DT, KBC.get()) >= 16) { Type *VTy = VectorType::get(Builder->getFloatTy(), II->getArgOperand(0)->getType()->getVectorNumElements()); Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy); @@ -2193,7 +2193,7 @@ case Intrinsic::ppc_qpx_qvstfd: // Turn PPC QPX qvstfd -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC, - &DT) >= 32) { + &DT, KBC.get()) >= 32) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); @@ -3048,7 +3048,7 @@ case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: { unsigned MemAlign = - getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT); + getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT, KBC.get()); unsigned AlignArg = II->getNumArgOperands() - 1; ConstantInt *IntrAlign = dyn_cast(II->getArgOperand(AlignArg)); if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { Index: lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCompares.cpp +++ lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -4497,7 +4497,8 @@ // if A is a power of 2. if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && match(Op1, m_Zero()) && - isKnownToBeAPowerOfTwo(A, DL, false, 0, &AC, &I, &DT) && I.isEquality()) + isKnownToBeAPowerOfTwo(A, DL, false, 0, &AC, &I, &DT, KBC.get()) && + I.isEquality()) return new ICmpInst(I.getInversePredicate(), Builder->CreateAnd(A, B), Op1); Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -215,6 +215,7 @@ AssumptionCache &AC; TargetLibraryInfo &TLI; DominatorTree &DT; + std::unique_ptr KBC; const DataLayout &DL; const SimplifyQuery SQ; // Optional analyses. When non-null, these can both be used to do better @@ -230,7 +231,7 @@ const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), - DL(DL), SQ(DL, &TLI, &DT, &AC), LI(LI), MadeIRChange(false) {} + KBC(makeKnownBitsCache()), DL(DL), SQ(DL, &TLI, &DT, &AC, KBC.get()), LI(LI), MadeIRChange(false) {} /// \brief Run the combiner over the entire worklist until it is empty. /// @@ -542,26 +543,26 @@ bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0, const Instruction *CxtI = nullptr) const { - return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT); + return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT, KBC.get()); } unsigned ComputeNumSignBits(const Value *Op, unsigned Depth = 0, const Instruction *CxtI = nullptr) const { - return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT); + return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT, KBC.get()); } OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const Instruction *CxtI) const { - return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT); + return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT, KBC.get()); } OverflowResult computeOverflowForUnsignedAdd(const Value *LHS, const Value *RHS, const Instruction *CxtI) const { - return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT); + return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT, KBC.get()); } OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, const Instruction *CxtI) const { - return llvm::computeOverflowForSignedAdd(LHS, RHS, DL, &AC, CxtI, &DT); + return llvm::computeOverflowForSignedAdd(LHS, RHS, DL, &AC, CxtI, &DT, KBC.get()); } /// Maximum size of array considered when transforming. Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -389,7 +389,7 @@ SmallVector ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment( - Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT); + Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT, KBC.get()); if (AI.getAlignment() <= SourceAlign) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); @@ -950,7 +950,7 @@ // Attempt to improve the alignment. unsigned KnownAlign = getOrEnforceKnownAlignment( - Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT); + Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT, KBC.get()); unsigned LoadAlign = LI.getAlignment(); unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType()); @@ -1299,7 +1299,8 @@ // Attempt to improve the alignment. unsigned KnownAlign = getOrEnforceKnownAlignment( - Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT); + Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT, + KBC.get()); unsigned StoreAlign = SI.getAlignment(); unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType()); Index: lib/Transforms/InstCombine/InstCombineMulDivRem.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -1240,7 +1240,7 @@ return BO; } - if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) { + if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT, KBC.get())) { // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) // Safe because the only negative value (1 << Y) can take on is // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have @@ -1487,7 +1487,8 @@ I.getType()); // X urem Y -> X and Y-1, where Y is a power of 2, - if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) { + if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT, + KBC.get())) { Constant *N1 = Constant::getAllOnesValue(I.getType()); Value *Add = Builder->CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); Index: lib/Transforms/InstCombine/InstCombinePHI.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombinePHI.cpp +++ lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -937,7 +937,7 @@ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { Instruction *CtxI = PN.getIncomingBlock(i)->getTerminator(); Value *VA = PN.getIncomingValue(i); - if (isKnownNonZero(VA, DL, 0, &AC, CtxI, &DT)) { + if (isKnownNonZero(VA, DL, 0, &AC, CtxI, &DT, KBC.get())) { if (!NonZeroConst) NonZeroConst = GetAnyNonZeroConstInt(PN); PN.setIncomingValue(i, NonZeroConst); Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -1903,7 +1903,7 @@ // phi translation. if (Value *IV = SimplifyInstruction( New, - {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) { + {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, nullptr, New})) { ValueMapping[&*BI] = IV; if (!New->mayHaveSideEffects()) { New->deleteValue(); Index: lib/Transforms/Utils/Local.cpp =================================================================== --- lib/Transforms/Utils/Local.cpp +++ lib/Transforms/Utils/Local.cpp @@ -1034,13 +1034,14 @@ const DataLayout &DL, const Instruction *CxtI, AssumptionCache *AC, - const DominatorTree *DT) { + const DominatorTree *DT, + KBCache *KBC) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType()); KnownBits Known(BitWidth); - computeKnownBits(V, Known, DL, 0, AC, CxtI, DT); + computeKnownBits(V, Known, DL, 0, AC, CxtI, DT, KBC); unsigned TrailZ = Known.countMinTrailingZeros(); // Avoid trouble with ridiculously large TrailZ values, such as Index: lib/Transforms/Utils/SimplifyInstructions.cpp =================================================================== --- lib/Transforms/Utils/SimplifyInstructions.cpp +++ lib/Transforms/Utils/SimplifyInstructions.cpp @@ -112,7 +112,8 @@ OptimizationRemarkEmitter *ORE = &getAnalysis().getORE(); const DataLayout &DL = F.getParent()->getDataLayout(); - const SimplifyQuery SQ(DL, TLI, DT, AC); + // TODO add a KBCache here + const SimplifyQuery SQ(DL, TLI, DT, AC, nullptr); return runImpl(F, SQ, ORE); } };