Index: include/llvm/Analysis/InstructionSimplify.h =================================================================== --- include/llvm/Analysis/InstructionSimplify.h +++ include/llvm/Analysis/InstructionSimplify.h @@ -50,28 +50,32 @@ Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifySubInst - Given operands for a Sub, see if we can /// fold the result. If not, this returns null. Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// Given operands for an FAdd, see if we can fold the result. If not, this /// returns null. Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// Given operands for an FSub, see if we can fold the result. If not, this /// returns null. Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// Given operands for an FMul, see if we can fold the result. If not, this /// returns null. @@ -79,121 +83,139 @@ FastMathFlags FMF, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyMulInst - Given operands for a Mul, see if we can /// fold the result. If not, this returns null. Value *SimplifyMulInst(Value *LHS, Value *RHS, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifySDivInst - Given operands for an SDiv, see if we can /// fold the result. If not, this returns null. Value *SimplifySDivInst(Value *LHS, Value *RHS, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyUDivInst - Given operands for a UDiv, see if we can /// fold the result. If not, this returns null. Value *SimplifyUDivInst(Value *LHS, Value *RHS, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyFDivInst - Given operands for an FDiv, see if we can /// fold the result. If not, this returns null. Value *SimplifyFDivInst(Value *LHS, Value *RHS, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifySRemInst - Given operands for an SRem, see if we can /// fold the result. If not, this returns null. Value *SimplifySRemInst(Value *LHS, Value *RHS, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyURemInst - Given operands for a URem, see if we can /// fold the result. If not, this returns null. Value *SimplifyURemInst(Value *LHS, Value *RHS, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyFRemInst - Given operands for an FRem, see if we can /// fold the result. If not, this returns null. Value *SimplifyFRemInst(Value *LHS, Value *RHS, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyShlInst - Given operands for a Shl, see if we can /// fold the result. If not, this returns null. Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyLShrInst - Given operands for a LShr, see if we can /// fold the result. If not, this returns null. Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyAShrInst - Given operands for a AShr, see if we can /// fold the result. If not, this returns null. Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyAndInst - Given operands for an And, see if we can /// fold the result. If not, this returns null. Value *SimplifyAndInst(Value *LHS, Value *RHS, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyOrInst - Given operands for an Or, see if we can /// fold the result. If not, this returns null. Value *SimplifyOrInst(Value *LHS, Value *RHS, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyXorInst - Given operands for a Xor, see if we can /// fold the result. If not, this returns null. Value *SimplifyXorInst(Value *LHS, Value *RHS, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can /// fold the result. If not, this returns null. Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + Instruction *CxtI = nullptr); /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can /// fold the result. If not, this returns null. Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold /// the result. If not, this returns null. Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can /// fold the result. If not, this returns null. Value *SimplifyGEPInst(ArrayRef Ops, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we /// can fold the result. If not, this returns null. @@ -201,13 +223,15 @@ ArrayRef Idxs, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyTruncInst - Given operands for an TruncInst, see if we can fold /// the result. If not, this returns null. Value *SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); //=== Helper functions for higher up the class hierarchy. @@ -217,14 +241,16 @@ Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can /// fold the result. If not, this returns null. Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// \brief Given a function and iterators over arguments, see if we can fold /// the result. @@ -233,7 +259,8 @@ Value *SimplifyCall(Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// \brief Given a function and set of arguments, see if we can fold the /// result. @@ -242,7 +269,8 @@ Value *SimplifyCall(Value *V, ArrayRef Args, const DataLayout *TD = nullptr, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const Instruction *CxtI = nullptr); /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. Index: include/llvm/Analysis/ValueTracking.h =================================================================== --- include/llvm/Analysis/ValueTracking.h +++ include/llvm/Analysis/ValueTracking.h @@ -25,6 +25,7 @@ class DataLayout; class StringRef; class MDNode; + class DominatorTree; class TargetLibraryInfo; /// Determine which bits of V are known to be either zero or one and return @@ -36,7 +37,9 @@ /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout *TD = nullptr, unsigned Depth = 0); + const DataLayout *TD = nullptr, unsigned Depth = 0, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); /// Compute known bits from the range metadata. /// \p KnownZero the set of bits that are known to be zero void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, @@ -45,21 +48,26 @@ /// ComputeSignBit - Determine whether the sign bit is known to be zero or /// one. Convenience wrapper around computeKnownBits. void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout *TD = nullptr, unsigned Depth = 0); + const DataLayout *TD = nullptr, unsigned Depth = 0, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); /// isKnownToBeAPowerOfTwo - Return true if the given value is known to have /// exactly one bit set when defined. For vectors return true if every /// element is known to be a power of two when defined. Supports values with /// integer or pointer type and vectors of integers. If 'OrZero' is set then /// returns true if the given value is either a power of two or zero. - bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero = false, unsigned Depth = 0); + bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero = false, unsigned Depth = 0, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); /// isKnownNonZero - Return true if the given value is known to be non-zero /// when defined. For vectors return true if every element is known to be /// non-zero when defined. Supports values with integer or pointer type and /// vectors of integers. bool isKnownNonZero(Value *V, const DataLayout *TD = nullptr, - unsigned Depth = 0); + unsigned Depth = 0, const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Mask is known to be @@ -71,7 +79,9 @@ /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. bool MaskedValueIsZero(Value *V, const APInt &Mask, - const DataLayout *TD = nullptr, unsigned Depth = 0); + const DataLayout *TD = nullptr, unsigned Depth = 0, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); /// ComputeNumSignBits - Return the number of times the sign bit of the @@ -83,7 +93,9 @@ /// 'Op' must have a scalar integer type. /// unsigned ComputeNumSignBits(Value *Op, const DataLayout *TD = nullptr, - unsigned Depth = 0); + unsigned Depth = 0, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); /// ComputeMultiple - This function computes the integer multiple of Base that /// equals V. If successful, it returns true and returns the multiple in @@ -191,6 +203,13 @@ /// and byval arguments. bool isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI = nullptr); + /// Return true if it is valid to use the assumptions provided by an + /// invariant intrinsic, I, at the point in the control-flow identified by the + /// context instruction, CxtI. + bool isValidInvariantForContext(const Instruction *I, const Instruction *CxtI, + const DataLayout *DL = nullptr, + const DominatorTree *DT = nullptr); + } // end namespace llvm #endif Index: include/llvm/Transforms/Utils/Local.h =================================================================== --- include/llvm/Transforms/Utils/Local.h +++ include/llvm/Transforms/Utils/Local.h @@ -40,6 +40,7 @@ class TargetTransformInfo; class DIBuilder; class AliasAnalysis; +class DominatorTree; template class SmallVectorImpl; @@ -170,12 +171,16 @@ /// and it is more than the alignment of the ultimate object, see if we can /// increase the alignment of the ultimate object, making this check succeed. unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, - const DataLayout *TD = nullptr); + const DataLayout *TD = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); /// getKnownAlignment - Try to infer an alignment for the specified pointer. static inline unsigned getKnownAlignment(Value *V, - const DataLayout *TD = nullptr) { - return getOrEnforceKnownAlignment(V, 0, TD); + const DataLayout *TD = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr) { + return getOrEnforceKnownAlignment(V, 0, TD, CxtI, DT); } /// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the Index: lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- lib/Analysis/BasicAliasAnalysis.cpp +++ lib/Analysis/BasicAliasAnalysis.cpp @@ -205,7 +205,8 @@ /// represented in the result. static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, ExtensionKind &Extension, - const DataLayout &DL, unsigned Depth) { + const DataLayout &DL, unsigned Depth, + DominatorTree *DT) { assert(V->getType()->isIntegerTy() && "Not an integer value"); // Limit our recursion depth. @@ -222,23 +223,24 @@ case Instruction::Or: // X|C == X+C if all the bits in C are unset in X. Otherwise we can't // analyze it. - if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL)) + if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL, 0, + BOp, DT)) break; // FALL THROUGH. case Instruction::Add: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth+1); + DL, Depth+1, DT); Offset += RHSC->getValue(); return V; case Instruction::Mul: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth+1); + DL, Depth+1, DT); Offset *= RHSC->getValue(); Scale *= RHSC->getValue(); return V; case Instruction::Shl: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth+1); + DL, Depth+1, DT); Offset <<= RHSC->getValue().getLimitedValue(); Scale <<= RHSC->getValue().getLimitedValue(); return V; @@ -259,7 +261,7 @@ Extension = isa(V) ? EK_SignExt : EK_ZeroExt; Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, - DL, Depth+1); + DL, Depth+1, DT); Scale = Scale.zext(OldWidth); Offset = Offset.zext(OldWidth); @@ -289,7 +291,8 @@ static const Value * DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, SmallVectorImpl &VarIndices, - bool &MaxLookupReached, const DataLayout *DL) { + bool &MaxLookupReached, const DataLayout *DL, + DominatorTree *DT) { // Limit recursion depth to limit compile time in crazy cases. unsigned MaxLookup = MaxLookupSearchDepth; MaxLookupReached = false; @@ -378,7 +381,7 @@ // Use GetLinearExpression to decompose the index into a C1*V+C2 form. APInt IndexScale(Width, 0), IndexOffset(Width, 0); Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, - *DL, 0); + *DL, 0, DT); // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. @@ -894,6 +897,10 @@ bool GEP1MaxLookupReached; SmallVector GEP1VariableIndices; + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable(); + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; + // If we have two gep instructions with must-alias or not-alias'ing base // pointers, figure out if the indexes to the GEP tell us anything about the // derived pointer. @@ -917,10 +924,10 @@ SmallVector GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, - GEP2MaxLookupReached, DL); + GEP2MaxLookupReached, DL, DT); const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL); + GEP1MaxLookupReached, DL, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { @@ -949,14 +956,14 @@ // about the relation of the resulting pointer. const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL); + GEP1MaxLookupReached, DL, DT); int64_t GEP2BaseOffset; bool GEP2MaxLookupReached; SmallVector GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, - GEP2MaxLookupReached, DL); + GEP2MaxLookupReached, DL, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. @@ -995,7 +1002,7 @@ const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL); + GEP1MaxLookupReached, DL, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. Index: lib/Analysis/InstructionSimplify.cpp =================================================================== --- lib/Analysis/InstructionSimplify.cpp +++ lib/Analysis/InstructionSimplify.cpp @@ -45,9 +45,11 @@ const DataLayout *DL; const TargetLibraryInfo *TLI; const DominatorTree *DT; + const Instruction *CxtI; Query(const DataLayout *DL, const TargetLibraryInfo *tli, - const DominatorTree *dt) : DL(DL), TLI(tli), DT(dt) {} + const DominatorTree *dt, const Instruction *cxti = nullptr) + : DL(DL), TLI(tli), DT(dt), CxtI(cxti) {} }; static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned); @@ -575,8 +577,8 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT), + const DominatorTree *DT, const Instruction *CxtI) { + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT, CxtI), RecursionLimit); } @@ -769,8 +771,8 @@ Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT), + const DominatorTree *DT, const Instruction *CxtI) { + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT, CxtI), RecursionLimit); } @@ -947,28 +949,33 @@ Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyFAddInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, const Instruction *CxtI) { + return ::SimplifyFAddInst(Op0, Op1, FMF, Query (DL, TLI, DT, CxtI), + RecursionLimit); } Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyFSubInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, const Instruction *CxtI) { + return ::SimplifyFSubInst(Op0, Op1, FMF, Query (DL, TLI, DT, CxtI), + RecursionLimit); } Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyFMulInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + const Instruction *CxtI) { + return ::SimplifyFMulInst(Op0, Op1, FMF, Query (DL, TLI, DT, CxtI), + RecursionLimit); } Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyMulInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, const Instruction *CxtI) { + return ::SimplifyMulInst(Op0, Op1, Query (DL, TLI, DT, CxtI), + RecursionLimit); } /// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can @@ -1055,8 +1062,10 @@ Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifySDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + const Instruction *CxtI) { + return ::SimplifySDivInst(Op0, Op1, Query (DL, TLI, DT, CxtI), + RecursionLimit); } /// SimplifyUDivInst - Given operands for a UDiv, see if we can @@ -1071,8 +1080,10 @@ Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyUDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + const Instruction *CxtI) { + return ::SimplifyUDivInst(Op0, Op1, Query (DL, TLI, DT, CxtI), + RecursionLimit); } static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, @@ -1090,8 +1101,10 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyFDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + const Instruction *CxtI) { + return ::SimplifyFDivInst(Op0, Op1, Query (DL, TLI, DT, CxtI), + RecursionLimit); } /// SimplifyRem - Given operands for an SRem or URem, see if we can @@ -1160,8 +1173,10 @@ Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifySRemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + const Instruction *CxtI) { + return ::SimplifySRemInst(Op0, Op1, Query (DL, TLI, DT, CxtI), + RecursionLimit); } /// SimplifyURemInst - Given operands for a URem, see if we can @@ -1176,8 +1191,10 @@ Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyURemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + const Instruction *CxtI) { + return ::SimplifyURemInst(Op0, Op1, Query (DL, TLI, DT, CxtI), + RecursionLimit); } static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, @@ -1195,8 +1212,10 @@ Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyFRemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + const Instruction *CxtI) { + return ::SimplifyFRemInst(Op0, Op1, Query (DL, TLI, DT, CxtI), + RecursionLimit); } /// isUndefShift - Returns true if a shift by \c Amount always yields undef. @@ -1284,8 +1303,8 @@ Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT), + const DominatorTree *DT, const Instruction *CxtI) { + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT, CxtI), RecursionLimit); } @@ -1316,8 +1335,9 @@ Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyLShrInst(Op0, Op1, isExact, Query (DL, TLI, DT), + const DominatorTree *DT, + const Instruction *CxtI) { + return ::SimplifyLShrInst(Op0, Op1, isExact, Query (DL, TLI, DT, CxtI), RecursionLimit); } @@ -1352,8 +1372,9 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyAShrInst(Op0, Op1, isExact, Query (DL, TLI, DT), + const DominatorTree *DT, + const Instruction *CxtI) { + return ::SimplifyAShrInst(Op0, Op1, isExact, Query (DL, TLI, DT, CxtI), RecursionLimit); } @@ -1407,9 +1428,9 @@ // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { - if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true)) + if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true, 0, Q.CxtI, Q.DT)) return Op0; - if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true)) + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true, 0, Q.CxtI, Q.DT)) return Op1; } @@ -1447,8 +1468,9 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyAndInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, const Instruction *CxtI) { + return ::SimplifyAndInst(Op0, Op1, Query (DL, TLI, DT, CxtI), + RecursionLimit); } /// SimplifyOrInst - Given operands for an Or, see if we can @@ -1540,18 +1562,22 @@ if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+ match(A, m_Add(m_Value(V1), m_Value(V2)))) { // Add commutes, try both ways. - if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) + if (V1 == B && MaskedValueIsZero(V2, C2->getValue(), Q.DL, + 0, Q.CxtI, Q.DT)) return A; - if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) + if (V2 == B && MaskedValueIsZero(V1, C2->getValue(), Q.DL, + 0, Q.CxtI, Q.DT)) return A; } // Or commutes, try both ways. if ((C1->getValue() & (C1->getValue() + 1)) == 0 && match(B, m_Add(m_Value(V1), m_Value(V2)))) { // Add commutes, try both ways. - if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) + if (V1 == A && MaskedValueIsZero(V2, C1->getValue(), Q.DL, + 0, Q.CxtI, Q.DT)) return B; - if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) + if (V2 == A && MaskedValueIsZero(V1, C1->getValue(), Q.DL, + 0, Q.CxtI, Q.DT)) return B; } } @@ -1568,8 +1594,8 @@ Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyOrInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, const Instruction *CxtI) { + return ::SimplifyOrInst(Op0, Op1, Query (DL, TLI, DT, CxtI), RecursionLimit); } /// SimplifyXorInst - Given operands for a Xor, see if we can @@ -1623,8 +1649,8 @@ Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyXorInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, const Instruction *CxtI) { + return ::SimplifyXorInst(Op0, Op1, Query (DL, TLI, DT, CxtI), RecursionLimit); } static Type *GetCompareTy(Value *Op) { @@ -1878,40 +1904,44 @@ return getTrue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: - if (isKnownNonZero(LHS, Q.DL)) + if (isKnownNonZero(LHS, Q.DL, 0, Q.CxtI, Q.DT)) return getFalse(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: - if (isKnownNonZero(LHS, Q.DL)) + if (isKnownNonZero(LHS, Q.DL, 0, Q.CxtI, Q.DT)) return getTrue(ITy); break; case ICmpInst::ICMP_SLT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, + 0, Q.CxtI, Q.DT); if (LHSKnownNegative) return getTrue(ITy); if (LHSKnownNonNegative) return getFalse(ITy); break; case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, + 0, Q.CxtI, Q.DT); if (LHSKnownNegative) return getTrue(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL)) + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.CxtI, Q.DT)) return getFalse(ITy); break; case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, + 0, Q.CxtI, Q.DT); if (LHSKnownNegative) return getFalse(ITy); if (LHSKnownNonNegative) return getTrue(ITy); break; case ICmpInst::ICMP_SGT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, + 0, Q.CxtI, Q.DT); if (LHSKnownNegative) return getFalse(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL)) + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.CxtI, Q.DT)) return getTrue(ITy); break; } @@ -2181,10 +2211,10 @@ uint32_t BitWidth = CI->getBitWidth(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, 0, Q.CxtI, Q.DT); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, Q.DL, 0, Q.CxtI, Q.DT); if (((LHSKnownOne & RHSKnownZero) != 0) || ((LHSKnownZero & RHSKnownOne) != 0)) return (Pred == ICmpInst::ICMP_EQ) @@ -2286,7 +2316,8 @@ break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, + 0, Q.CxtI, Q.DT); if (!KnownNonNegative) break; // fall-through @@ -2296,7 +2327,8 @@ return getFalse(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, + 0, Q.CxtI, Q.DT); if (!KnownNonNegative) break; // fall-through @@ -2315,7 +2347,8 @@ break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, + 0, Q.CxtI, Q.DT); if (!KnownNonNegative) break; // fall-through @@ -2325,7 +2358,8 @@ return getTrue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, + 0, Q.CxtI, Q.DT); if (!KnownNonNegative) break; // fall-through @@ -2610,8 +2644,9 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT), + const DominatorTree *DT, + Instruction *CxtI) { + return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, CxtI), RecursionLimit); } @@ -2707,8 +2742,9 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT), + const DominatorTree *DT, + const Instruction *CxtI) { + return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, CxtI), RecursionLimit); } @@ -2746,9 +2782,10 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (DL, TLI, DT), - RecursionLimit); + const DominatorTree *DT, + const Instruction *CxtI) { + return ::SimplifySelectInst(Cond, TrueVal, FalseVal, + Query (DL, TLI, DT, CxtI), RecursionLimit); } /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can @@ -2792,8 +2829,8 @@ Value *llvm::SimplifyGEPInst(ArrayRef Ops, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyGEPInst(Ops, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, const Instruction *CxtI) { + return ::SimplifyGEPInst(Ops, Query (DL, TLI, DT, CxtI), RecursionLimit); } /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we @@ -2829,8 +2866,9 @@ ArrayRef Idxs, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (DL, TLI, DT), + const DominatorTree *DT, + const Instruction *CxtI) { + return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (DL, TLI, DT, CxtI), RecursionLimit); } @@ -2877,8 +2915,9 @@ Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyTruncInst(Op, Ty, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + const Instruction *CxtI) { + return ::SimplifyTruncInst(Op, Ty, Query (DL, TLI, DT, CxtI), RecursionLimit); } //=== Helper functions for higher up the class hierarchy. @@ -2950,8 +2989,9 @@ Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyBinOp(Opcode, LHS, RHS, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, const Instruction *CxtI) { + return ::SimplifyBinOp(Opcode, LHS, RHS, Query (DL, TLI, DT, CxtI), + RecursionLimit); } /// SimplifyCmpInst - Given operands for a CmpInst, see if we can @@ -2965,8 +3005,8 @@ Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT), + const DominatorTree *DT, const Instruction *CxtI) { + return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, CxtI), RecursionLimit); } @@ -3041,15 +3081,15 @@ Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT), + const DominatorTree *DT, const Instruction *CxtI) { + return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, CxtI), RecursionLimit); } Value *llvm::SimplifyCall(Value *V, ArrayRef Args, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyCall(V, Args.begin(), Args.end(), Query(DL, TLI, DT), + const DominatorTree *DT, const Instruction *CxtI) { + return ::SimplifyCall(V, Args.begin(), Args.end(), Query(DL, TLI, DT, CxtI), RecursionLimit); } @@ -3066,109 +3106,120 @@ break; case Instruction::FAdd: Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT); + I->getFastMathFlags(), DL, TLI, DT, I); break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), - DL, TLI, DT); + DL, TLI, DT, I); break; case Instruction::FSub: Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT); + I->getFastMathFlags(), DL, TLI, DT, I); break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), - DL, TLI, DT); + DL, TLI, DT, I); break; case Instruction::FMul: Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT); + I->getFastMathFlags(), DL, TLI, DT, I); break; case Instruction::Mul: - Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, I); break; case Instruction::SDiv: - Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, I); break; case Instruction::UDiv: - Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, I); break; case Instruction::FDiv: - Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, I); break; case Instruction::SRem: - Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, I); break; case Instruction::URem: - Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, I); break; case Instruction::FRem: - Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, I); break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), - DL, TLI, DT); + DL, TLI, DT, I); break; case Instruction::LShr: Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), cast(I)->isExact(), - DL, TLI, DT); + DL, TLI, DT, I); break; case Instruction::AShr: Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), cast(I)->isExact(), - DL, TLI, DT); + DL, TLI, DT, I); break; case Instruction::And: - Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, I); break; case Instruction::Or: - Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, I); break; case Instruction::Xor: - Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, I); break; case Instruction::ICmp: Result = SimplifyICmpInst(cast(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), DL, TLI, DT); + I->getOperand(0), I->getOperand(1), + DL, TLI, DT, I); break; case Instruction::FCmp: Result = SimplifyFCmpInst(cast(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), DL, TLI, DT); + I->getOperand(0), I->getOperand(1), + DL, TLI, DT, I); break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), - I->getOperand(2), DL, TLI, DT); + I->getOperand(2), DL, TLI, DT, I); break; case Instruction::GetElementPtr: { SmallVector Ops(I->op_begin(), I->op_end()); - Result = SimplifyGEPInst(Ops, DL, TLI, DT); + Result = SimplifyGEPInst(Ops, DL, TLI, DT, I); break; } case Instruction::InsertValue: { InsertValueInst *IV = cast(I); Result = SimplifyInsertValueInst(IV->getAggregateOperand(), IV->getInsertedValueOperand(), - IV->getIndices(), DL, TLI, DT); + IV->getIndices(), DL, TLI, DT, I); break; } case Instruction::PHI: - Result = SimplifyPHINode(cast(I), Query (DL, TLI, DT)); + Result = SimplifyPHINode(cast(I), Query (DL, TLI, DT, I)); break; case Instruction::Call: { CallSite CS(cast(I)); Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), - DL, TLI, DT); + DL, TLI, DT, I); break; } case Instruction::Trunc: - Result = SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT); + Result = SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT, I); break; } Index: lib/Analysis/Lint.cpp =================================================================== --- lib/Analysis/Lint.cpp +++ lib/Analysis/Lint.cpp @@ -504,7 +504,7 @@ "Undefined result: Shift count out of range", &I); } -static bool isZero(Value *V, const DataLayout *DL) { +static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT) { // Assume undef could be zero. if (isa(V)) return true; @@ -513,7 +513,8 @@ if (!VecTy) { unsigned BitWidth = V->getType()->getIntegerBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL); + computeKnownBits(V, KnownZero, KnownOne, DL, + 0, dyn_cast(V), DT); return KnownZero.isAllOnesValue(); } @@ -543,22 +544,22 @@ } void Lint::visitSDiv(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL), + Assert1(!isZero(I.getOperand(1), DL, DT), "Undefined behavior: Division by zero", &I); } void Lint::visitUDiv(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL), + Assert1(!isZero(I.getOperand(1), DL, DT), "Undefined behavior: Division by zero", &I); } void Lint::visitSRem(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL), + Assert1(!isZero(I.getOperand(1), DL, DT), "Undefined behavior: Division by zero", &I); } void Lint::visitURem(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL), + Assert1(!isZero(I.getOperand(1), DL, DT), "Undefined behavior: Division by zero", &I); } Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -3395,7 +3395,7 @@ // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones); + computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, nullptr, DT); return Zeros.countTrailingOnes(); } @@ -3534,7 +3534,7 @@ if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, DL); + computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, nullptr, DT); if (Ones == ~Zeros + 1) return setUnsignedRange(U, ConservativeResult); return setUnsignedRange(U, @@ -3686,7 +3686,7 @@ // For a SCEVUnknown, ask ValueTracking. if (!U->getValue()->getType()->isIntegerTy() && !DL) return setSignedRange(U, ConservativeResult); - unsigned NS = ComputeNumSignBits(U->getValue(), DL); + unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, nullptr, DT); if (NS <= 1) return setSignedRange(U, ConservativeResult); return setSignedRange(U, ConservativeResult.intersectWith( @@ -3793,7 +3793,8 @@ unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, + 0, nullptr, DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -14,12 +14,14 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" @@ -45,10 +47,116 @@ return TD ? TD->getPointerTypeSizeInBits(Ty) : 0; } +// Many of these functions have internal versions that take an invariant +// exclusion set. This is because of the potential for mutual recursion to +// cause computeKnownBits to repeatedly visit the same invariant intrinsic. The +// classic case of this is invariant(x = y), which will attempt to determine +// bits in x from bits in y, which will attempt to determine bits in y from +// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call +// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and +// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so on. +typedef SmallPtrSet ExclInvsSet; + +// Simplifying using an invariant can only be done in a particular control-flow +// context (the context instruction provides that context). If an invariant and +// the context instruction are not in the same block then the DT helps in +// figuring out if we can use it. +struct Query { + ExclInvsSet ExclInvs; + const Instruction *CxtI; + const DominatorTree *DT; + + Query(const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr) + : CxtI(CxtI), DT(DT) {} + + Query(const Query &Q, const Value *NewExcl) + : ExclInvs(Q.ExclInvs), CxtI(Q.CxtI), DT(Q.DT) { + ExclInvs.insert(NewExcl); + } +}; + +static const Instruction *findContext(Value *V, const Instruction *CxtI) { + if (CxtI) { + return CxtI; + } else if (const Instruction *I = dyn_cast(V)) { + return I; + } else if (const Argument *A = dyn_cast(V)) { + const Function *F = A->getParent(); + if (!F->empty() && + F->getEntryBlock().getFirstInsertionPt() != F->getEntryBlock().end()) + return F->getEntryBlock().getFirstInsertionPt(); + } + + return nullptr; +} + +static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + const DataLayout *TD, unsigned Depth, + const Query &Q); + +void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + const DataLayout *TD, unsigned Depth, + const Instruction *CxtI, const DominatorTree *DT) { + CxtI = findContext(V, CxtI); + ::computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Query(CxtI, DT)); +} + +static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, + const DataLayout *TD, unsigned Depth, + const Query &Q); + +void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, + const DataLayout *TD, unsigned Depth, + const Instruction *CxtI, const DominatorTree *DT) { + CxtI = findContext(V, CxtI); + ::ComputeSignBit(V, KnownZero, KnownOne, TD, Depth, Query(CxtI, DT)); +} + +static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, + const Query &Q); + +bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, + const Instruction *CxtI, + const DominatorTree *DT) { + CxtI = findContext(V, CxtI); + return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth, Query(CxtI, DT)); +} + +static bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, + const Query &Q); + +bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, + const Instruction *CxtI, const DominatorTree *DT) { + CxtI = findContext(V, CxtI); + return ::isKnownNonZero(V, TD, Depth, Query(CxtI, DT)); +} + +static bool MaskedValueIsZero(Value *V, const APInt &Mask, + const DataLayout *TD, unsigned Depth, + const Query &Q); + +bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, + const DataLayout *TD, unsigned Depth, + const Instruction *CxtI, const DominatorTree *DT) { + CxtI = findContext(V, CxtI); + return ::MaskedValueIsZero(V, Mask, TD, Depth, Query(CxtI, DT)); +} + +static unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, + unsigned Depth, const Query &Q); + +unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, + unsigned Depth, const Instruction *CxtI, + const DominatorTree *DT) { + CxtI = findContext(V, CxtI); + return ::ComputeNumSignBits(V, TD, Depth, Query(CxtI, DT)); +} + static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, - const DataLayout *TD, unsigned Depth) { + const DataLayout *TD, unsigned Depth, + const Query &Q) { if (!Add) { if (ConstantInt *CLHS = dyn_cast(Op0)) { // We know that the top bits of C-X are clear if X contains less bits @@ -59,7 +167,7 @@ unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -80,10 +188,10 @@ // result. For an add, this works with either operand. For a subtract, // this only works if the known zeros are in the right operand. APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - llvm::computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1); + computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1, Q); unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); - llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q); unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); // Determine which operand has more trailing zeros, and use that @@ -131,10 +239,11 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, - const DataLayout *TD, unsigned Depth) { + const DataLayout *TD, unsigned Depth, + const Query &Q) { unsigned BitWidth = KnownZero.getBitWidth(); - computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1); - computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1, Q); bool isKnownNegative = false; bool isKnownNonNegative = false; @@ -155,9 +264,9 @@ // negative or zero. if (!isKnownNonNegative) isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 && - isKnownNonZero(Op0, TD, Depth)) || + isKnownNonZero(Op0, TD, Depth, Q)) || (isKnownNegativeOp0 && isKnownNonNegativeOp1 && - isKnownNonZero(Op1, TD, Depth)); + isKnownNonZero(Op1, TD, Depth, Q)); } } @@ -209,6 +318,249 @@ KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros); } +static bool isEphemeralValueOf(Instruction *I, const Value *E) { + SmallVector WorkSet(1, I); + DenseSet Visited; + DenseSet EphValues; + + while (!WorkSet.empty()) { + const Value *V = WorkSet.pop_back_val(); + if (!Visited.insert(V).second) + continue; + + // If all uses of this value are ephemeral, then so is this value. + bool FoundNEUse = false; + for (const User *I : V->users()) + if (!EphValues.count(I)) { + FoundNEUse = true; + break; + } + + if (!FoundNEUse) { + if (V == E) + return true; + + EphValues.insert(V); + if (const User *U = dyn_cast(V)) + for (User::const_op_iterator J = U->op_begin(), JE = U->op_end(); + J != JE; ++J) { + if (isSafeToSpeculativelyExecute(*J)) + WorkSet.push_back(*J); + } + } + } + + return false; +} + +// Is this an intrinsic that cannot be speculated but also cannot trap? +static bool isInvariantLikeIntrinsic(const Instruction *I) { + if (const CallInst *CI = dyn_cast(I)) + if (Function *F = CI->getCalledFunction()) + switch (F->getIntrinsicID()) { + default: break; + // FIXME: This list is repeated from NoTTI::getIntrinsicCost. + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + return true; + } + + return false; +} + +static bool isValidInvariantForContext(Value *V, const Query &Q, + const DataLayout *DL) { + Instruction *Inv = cast(V); + + // There are two restrictions on the use of an invariant: + // 1. The invariant must dominate the context (or the control flow must + // reach the invariant whenever it reaches the context). + // 2. The context must not be in the invariant's set of ephemeral values + // (otherwise we will use the invariant to prove that the condition + // feeding the invariant is trivially true, thus causing the removal of + // the invariant). + + if (Q.DT) { + if (Q.DT->dominates(Inv, Q.CxtI)) { + return true; + } else if (Inv->getParent() == Q.CxtI->getParent()) { + // The context comes first, but they're both in the same block. Make sure + // there is nothing in between that might interrupt the control flow. + for (BasicBlock::const_iterator I = + std::next(BasicBlock::const_iterator(Q.CxtI)), + IE(Inv); I != IE; ++I) + if (!isSafeToSpeculativelyExecute(I, DL) && + !isInvariantLikeIntrinsic(I)) + return false; + + return !isEphemeralValueOf(Inv, Q.CxtI); + } + + return false; + } + + // When we don't have a DT, we do a limited search... + if (Inv->getParent() == Q.CxtI->getParent()->getSinglePredecessor()) { + return true; + } else if (Inv->getParent() == Q.CxtI->getParent()) { + // Search forward from the invariant until we reach the context (or the end + // of the block); the common case is that the invariant will come first. + for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)), + IE = Inv->getParent()->end(); I != IE; ++I) + if (I == Q.CxtI) + return true; + + // The context must come first... + for (BasicBlock::const_iterator I = + std::next(BasicBlock::const_iterator(Q.CxtI)), + IE(Inv); I != IE; ++I) + if (!isSafeToSpeculativelyExecute(I, DL) && + !isInvariantLikeIntrinsic(I)) + return false; + + return !isEphemeralValueOf(Inv, Q.CxtI); + } + + return false; +} + +bool llvm::isValidInvariantForContext(const Instruction *I, + const Instruction *CxtI, + const DataLayout *DL, + const DominatorTree *DT) { + return ::isValidInvariantForContext(const_cast(I), + Query(CxtI, DT), DL); +} + +static void findInvariantUsers(Value *V, SmallVector &Invs, + const DataLayout *DL, const Query &Q, + unsigned Depth = 0) { + // This is too expensive, and also wrong (we need to check that we don't pass + // a function call that might not return -- relative to the original query + // value)! + + // At depth 0 we're provided with the original value, at other depths, the + // possible opcodes are restricted to limit the cost of the search. + // For example: + // 0 V V V V + // 1 ptrtoint icmp ptrtoint and + // 2 icmp invariant and icmp + // 3 invariant icmp invariant + // 4 invariant + + for (User *I : V->users()) { + if (match(cast(I), + m_Intrinsic(m_Value())) && + !Q.ExclInvs.count(I)) { + if (isValidInvariantForContext(I, Q, DL)) + Invs.push_back(I); + } else if (Instruction *J = dyn_cast(I)) { + unsigned Op = J->getOpcode(); + if (!I->use_empty() && (Depth == 0 || + (Depth == 1 && (Op == Instruction::PtrToInt || + Op == Instruction::ICmp || + Op == Instruction::And)) || + (Depth == 2 && (Op == Instruction::ICmp || + Op == Instruction::And)) || + (Depth == 3 && (Op == Instruction::ICmp)) )) + findInvariantUsers(J, Invs, DL, Q, Depth+1); + } + } +} + +static void computeKnownBitsFromInvariant(Value *V, APInt &KnownZero, + APInt &KnownOne, + const DataLayout *DL, + unsigned Depth, const Query &Q) { + // Use of invariants is context-sensitive. If we don't have a context, we + // cannot use them! + if (!Q.CxtI) + return; + + unsigned BitWidth = KnownZero.getBitWidth(); + + SmallVector Invs; + findInvariantUsers(V, Invs, DL, Q); + + for (auto I : Invs) { + // Note: If you update these patterns you must also update + // findInvariantUsers because findInvariantUsers filters its search at each + // level based on opcode to limit the overall cost. + + if (match(I, m_Intrinsic(m_Specific(V)))) { + assert(BitWidth == 1 && "invariant operand is not i1?"); + KnownZero.clearAllBits(); + KnownOne.setAllBits(); + return; + } + + Value *A, *B; + CmpInst::Predicate Pred; + // invariant(v = a) + if ((match(I, m_Intrinsic( + m_ICmp(Pred, m_Specific(V), m_Value(A)))) || + match(I, m_Intrinsic( + m_ICmp(Pred, m_Value(A), m_Specific(V)))) || + match(I, m_Intrinsic( + m_ICmp(Pred, m_PtrToInt(m_Specific(V)), m_Value(A)))) || + match(I, m_Intrinsic( + m_ICmp(Pred, m_Value(A), m_PtrToInt(m_Specific(V)))))) && + Pred == ICmpInst::ICMP_EQ) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + KnownZero |= RHSKnownZero; + KnownOne |= RHSKnownOne; + // invariant(v & b = a) + // FIXME: The pattern matching templates should understand commutative + // instructions so we don't need to write all permutations here. + } else if ((match(I, m_Intrinsic( + m_ICmp(Pred, + m_And(m_Specific(V), m_Value(B)), m_Value(A)))) || + match(I, m_Intrinsic( + m_ICmp(Pred, + m_And(m_Value(B), m_Specific(V)), m_Value(A)))) || + match(I, m_Intrinsic( + m_ICmp(Pred, + m_Value(A), m_And(m_Specific(V), m_Value(B))))) || + match(I, m_Intrinsic( + m_ICmp(Pred, + m_Value(A), m_And(m_Value(B), m_Specific(V))))) || + match(I, m_Intrinsic( + m_ICmp(Pred, + m_And(m_PtrToInt(m_Specific(V)), m_Value(B)), + m_Value(A)))) || + match(I, m_Intrinsic( + m_ICmp(Pred, + m_And(m_Value(B), m_PtrToInt(m_Specific(V))), + m_Value(A)))) || + match(I, m_Intrinsic( + m_ICmp(Pred, m_Value(A), + m_And(m_PtrToInt(m_Specific(V)), m_Value(B))))) || + match(I, m_Intrinsic( + m_ICmp(Pred, m_Value(A), + m_And(m_Value(B), m_PtrToInt(m_Specific(V))))))) && + Pred == ICmpInst::ICMP_EQ) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); + computeKnownBits(B, MaskKnownZero, MaskKnownOne, DL, Depth+1, Query(Q, I)); + + // For those bits in the mask that are known to be one, we can propagate + // known bits from the RHS to V. + KnownZero |= RHSKnownZero & MaskKnownOne; + KnownOne |= RHSKnownOne & MaskKnownOne; + } + } +} + /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. /// @@ -224,8 +576,9 @@ /// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout *TD, unsigned Depth) { +void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + const DataLayout *TD, unsigned Depth, + const Query &Q) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = KnownZero.getBitWidth(); @@ -301,7 +654,7 @@ if (GA->mayBeOverridden()) { KnownZero.clearAllBits(); KnownOne.clearAllBits(); } else { - computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1, Q); } return; } @@ -322,6 +675,10 @@ if (Align) KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); + + // Don't give up yet... there might be an invariant that provides more + // information... + computeKnownBitsFromInvariant(V, KnownZero, KnownOne, TD, Depth, Q); return; } @@ -331,6 +688,9 @@ if (Depth == MaxDepth) return; // Limit search depth. + // Check whether a nearby invariant intrinsic can determine some known bits. + computeKnownBitsFromInvariant(V, KnownZero, KnownOne, TD, Depth, Q); + Operator *I = dyn_cast(V); if (!I) return; @@ -343,8 +703,8 @@ break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; @@ -353,8 +713,8 @@ break; } case Instruction::Or: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; @@ -363,8 +723,8 @@ break; } case Instruction::Xor: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); @@ -376,19 +736,20 @@ case Instruction::Mul: { bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); + KnownZero, KnownOne, KnownZero2, KnownOne2, TD, + Depth, Q); break; } case Instruction::UDiv: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, @@ -398,9 +759,8 @@ break; } case Instruction::Select: - computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, - Depth+1); + computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; @@ -435,7 +795,7 @@ assert(SrcBitWidth && "SrcBitWidth can't be zero"); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); KnownZero = KnownZero.zextOrTrunc(BitWidth); KnownOne = KnownOne.zextOrTrunc(BitWidth); // Any top bits are known to be zero. @@ -449,7 +809,7 @@ // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); break; } break; @@ -460,7 +820,7 @@ KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -476,7 +836,7 @@ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 @@ -490,7 +850,7 @@ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); // Unsigned shift right. - computeKnownBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1, Q); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); // high bits known zero. @@ -505,7 +865,7 @@ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); @@ -521,14 +881,14 @@ bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, - Depth); + Depth, Q); break; } case Instruction::Add: { bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, - Depth); + Depth, Q); break; } case Instruction::SRem: @@ -536,7 +896,8 @@ APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, + Depth+1, Q); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -561,7 +922,7 @@ if (KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD, - Depth+1); + Depth+1, Q); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) KnownZero.setBit(BitWidth - 1); @@ -574,7 +935,7 @@ if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, - Depth+1); + Depth+1, Q); KnownZero |= ~LowBits; KnownOne &= LowBits; break; @@ -583,8 +944,8 @@ // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q); unsigned Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); @@ -608,7 +969,7 @@ // to determine if we can prove known low zero bits. APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD, - Depth+1); + Depth+1, Q); unsigned TrailZ = LocalKnownZero.countTrailingOnes(); gep_type_iterator GTI = gep_type_begin(I); @@ -644,7 +1005,7 @@ unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); - computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1); + computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1, Q); TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + LocalKnownZero.countTrailingOnes())); @@ -686,11 +1047,11 @@ break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1, Q); // We need to take the minimum number of known bits APInt KnownZero3(KnownZero), KnownOne3(KnownOne); - computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1); + computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1, Q); KnownZero = APInt::getLowBitsSet(BitWidth, std::min(KnownZero2.countTrailingOnes(), @@ -722,7 +1083,7 @@ // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD, - MaxDepth-1); + MaxDepth-1, Q); KnownZero &= KnownZero2; KnownOne &= KnownOne2; // If all bits have been ruled out, there's no need to check @@ -774,19 +1135,19 @@ case Intrinsic::sadd_with_overflow: computeKnownBitsAddSub(true, II->getArgOperand(0), II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, TD, Depth); + KnownOne, KnownZero2, KnownOne2, TD, Depth, Q); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: computeKnownBitsAddSub(false, II->getArgOperand(0), II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, TD, Depth); + KnownOne, KnownZero2, KnownOne2, TD, Depth, Q); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, KnownZero, KnownOne, - KnownZero2, KnownOne2, TD, Depth); + KnownZero2, KnownOne2, TD, Depth, Q); break; } } @@ -798,8 +1159,9 @@ /// ComputeSignBit - Determine whether the sign bit is known to be zero or /// one. Convenience wrapper around computeKnownBits. -void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout *TD, unsigned Depth) { +void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, + const DataLayout *TD, unsigned Depth, + const Query &Q) { unsigned BitWidth = getBitWidth(V->getType(), TD); if (!BitWidth) { KnownZero = false; @@ -808,7 +1170,7 @@ } APInt ZeroBits(BitWidth, 0); APInt OneBits(BitWidth, 0); - computeKnownBits(V, ZeroBits, OneBits, TD, Depth); + computeKnownBits(V, ZeroBits, OneBits, TD, Depth, Q); KnownOne = OneBits[BitWidth - 1]; KnownZero = ZeroBits[BitWidth - 1]; } @@ -817,7 +1179,8 @@ /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. -bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { +bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, + const Query &Q) { if (Constant *C = dyn_cast(V)) { if (C->isNullValue()) return OrZero; @@ -844,19 +1207,20 @@ // A shift of a power of two is a power of two or zero. if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || match(V, m_Shr(m_Value(X), m_Value())))) - return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth); + return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q); if (ZExtInst *ZI = dyn_cast(V)) - return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth); + return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q); if (SelectInst *SI = dyn_cast(V)) - return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth) && - isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth); + return + isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) && + isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q); if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { // A power of two and'd with anything is a power of two or zero. - if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth) || - isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth)) + if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q) || + isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth, Q)) return true; // X & (-X) is always a power of two or zero. if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) @@ -871,19 +1235,19 @@ if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) { if (match(X, m_And(m_Specific(Y), m_Value())) || match(X, m_And(m_Value(), m_Specific(Y)))) - if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth)) + if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q)) return true; if (match(Y, m_And(m_Specific(X), m_Value())) || match(Y, m_And(m_Value(), m_Specific(X)))) - if (isKnownToBeAPowerOfTwo(X, OrZero, Depth)) + if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q)) return true; unsigned BitWidth = V->getType()->getScalarSizeInBits(); APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0); - computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth); + computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth, Q); APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0); - computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth); + computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth, Q); // If i8 V is a power of two or zero: // ZeroBits: 1 1 1 0 1 1 1 1 // ~ZeroBits: 0 0 0 1 0 0 0 0 @@ -900,7 +1264,8 @@ // copying a sign bit (sdiv int_min, 2). if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { - return isKnownToBeAPowerOfTwo(cast(V)->getOperand(0), OrZero, Depth); + return isKnownToBeAPowerOfTwo(cast(V)->getOperand(0), OrZero, + Depth, Q); } return false; @@ -913,7 +1278,7 @@ /// /// Currently this routine does not support vector GEPs. static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, - unsigned Depth) { + unsigned Depth, const Query &Q) { if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0) return false; @@ -922,7 +1287,7 @@ // If the base pointer is non-null, we cannot walk to a null address with an // inbounds GEP in address space zero. - if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth)) + if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth, Q)) return true; // Past this, if we don't have DataLayout, we can't do much. @@ -965,7 +1330,7 @@ if (Depth++ >= MaxDepth) continue; - if (isKnownNonZero(GTI.getOperand(), DL, Depth)) + if (isKnownNonZero(GTI.getOperand(), DL, Depth, Q)) return true; } @@ -976,7 +1341,8 @@ /// when defined. For vectors return true if every element is known to be /// non-zero when defined. Supports values with integer or pointer type and /// vectors of integers. -bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { +bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, + const Query &Q) { if (Constant *C = dyn_cast(V)) { if (C->isNullValue()) return false; @@ -996,7 +1362,7 @@ if (isKnownNonNull(V)) return true; if (GEPOperator *GEP = dyn_cast(V)) - if (isGEPKnownNonNull(GEP, TD, Depth)) + if (isGEPKnownNonNull(GEP, TD, Depth, Q)) return true; } @@ -1005,11 +1371,12 @@ // X | Y != 0 if X != 0 or Y != 0. Value *X = nullptr, *Y = nullptr; if (match(V, m_Or(m_Value(X), m_Value(Y)))) - return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth); + return isKnownNonZero(X, TD, Depth, Q) || + isKnownNonZero(Y, TD, Depth, Q); // ext X != 0 if X != 0. if (isa(V) || isa(V)) - return isKnownNonZero(cast(V)->getOperand(0), TD, Depth); + return isKnownNonZero(cast(V)->getOperand(0), TD, Depth, Q); // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined // if the lowest bit is shifted off the end. @@ -1017,11 +1384,11 @@ // shl nuw can't remove any non-zero bits. OverflowingBinaryOperator *BO = cast(V); if (BO->hasNoUnsignedWrap()) - return isKnownNonZero(X, TD, Depth); + return isKnownNonZero(X, TD, Depth, Q); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, TD, Depth); + computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q); if (KnownOne[0]) return true; } @@ -1031,28 +1398,29 @@ // shr exact can only shift out zero bits. PossiblyExactOperator *BO = cast(V); if (BO->isExact()) - return isKnownNonZero(X, TD, Depth); + return isKnownNonZero(X, TD, Depth, Q); bool XKnownNonNegative, XKnownNegative; - ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth); + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q); if (XKnownNegative) return true; } // div exact can only produce a zero if the dividend is zero. else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { - return isKnownNonZero(X, TD, Depth); + return isKnownNonZero(X, TD, Depth, Q); } // X + Y. else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { bool XKnownNonNegative, XKnownNegative; bool YKnownNonNegative, YKnownNegative; - ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth); + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q); + ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth, Q); // If X and Y are both non-negative (as signed values) then their sum is not // zero unless both X and Y are zero. if (XKnownNonNegative && YKnownNonNegative) - if (isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth)) + if (isKnownNonZero(X, TD, Depth, Q) || + isKnownNonZero(Y, TD, Depth, Q)) return true; // If X and Y are both negative (as signed values) then their sum is not @@ -1063,20 +1431,22 @@ APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - computeKnownBits(X, KnownZero, KnownOne, TD, Depth); + computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q); if ((KnownOne & Mask) != 0) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - computeKnownBits(Y, KnownZero, KnownOne, TD, Depth); + computeKnownBits(Y, KnownZero, KnownOne, TD, Depth, Q); if ((KnownOne & Mask) != 0) return true; } // The sum of a non-negative number and a power of two is not zero. - if (XKnownNonNegative && isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth)) + if (XKnownNonNegative && + isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth, Q)) return true; - if (YKnownNonNegative && isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth)) + if (YKnownNonNegative && + isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth, Q)) return true; } // X * Y. @@ -1085,20 +1455,21 @@ // If X and Y are non-zero then so is X * Y as long as the multiplication // does not overflow. if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) && - isKnownNonZero(X, TD, Depth) && isKnownNonZero(Y, TD, Depth)) + isKnownNonZero(X, TD, Depth, Q) && + isKnownNonZero(Y, TD, Depth, Q)) return true; } // (C ? X : Y) != 0 if X != 0 and Y != 0. else if (SelectInst *SI = dyn_cast(V)) { - if (isKnownNonZero(SI->getTrueValue(), TD, Depth) && - isKnownNonZero(SI->getFalseValue(), TD, Depth)) + if (isKnownNonZero(SI->getTrueValue(), TD, Depth, Q) && + isKnownNonZero(SI->getFalseValue(), TD, Depth, Q)) return true; } if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, TD, Depth); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q); return KnownOne != 0; } @@ -1111,10 +1482,11 @@ /// where V is a vector, the mask, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, - const DataLayout *TD, unsigned Depth) { +bool MaskedValueIsZero(Value *V, const APInt &Mask, + const DataLayout *TD, unsigned Depth, + const Query &Q) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); - computeKnownBits(V, KnownZero, KnownOne, TD, Depth); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q); return (KnownZero & Mask) == Mask; } @@ -1128,8 +1500,8 @@ /// /// 'Op' must have a scalar integer type. /// -unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, - unsigned Depth) { +unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, + unsigned Depth, const Query &Q) { assert((TD || V->getType()->isIntOrIntVectorTy()) && "ComputeNumSignBits requires a DataLayout object to operate " "on non-integer values!"); @@ -1150,10 +1522,10 @@ default: break; case Instruction::SExt: Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); - return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp; + return ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q) + Tmp; case Instruction::AShr: { - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); // ashr X, C -> adds C sign bits. Vectors too. const APInt *ShAmt; if (match(U->getOperand(1), m_APInt(ShAmt))) { @@ -1166,7 +1538,7 @@ const APInt *ShAmt; if (match(U->getOperand(1), m_APInt(ShAmt))) { // shl destroys sign bits. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); Tmp2 = ShAmt->getZExtValue(); if (Tmp2 >= TyBits || // Bad shift. Tmp2 >= Tmp) break; // Shifted all sign bits out. @@ -1178,9 +1550,9 @@ case Instruction::Or: case Instruction::Xor: // NOT is handled here. // Logical binary ops preserve the number of sign bits at the worst. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); if (Tmp != 1) { - Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); FirstAnswer = std::min(Tmp, Tmp2); // We computed what we know about the sign bits as our first // answer. Now proceed to the generic code that uses @@ -1189,22 +1561,22 @@ break; case Instruction::Select: - Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1); + Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1, Q); return std::min(Tmp, Tmp2); case Instruction::Add: // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); if (Tmp == 1) return 1; // Early out. // Special case decrementing a value (ADD X, -1): if (ConstantInt *CRHS = dyn_cast(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -1217,19 +1589,19 @@ return Tmp; } - Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); if (Tmp2 == 1) return 1; return std::min(Tmp, Tmp2)-1; case Instruction::Sub: - Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); if (Tmp2 == 1) return 1; // Handle NEG. if (ConstantInt *CLHS = dyn_cast(U->getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) @@ -1245,7 +1617,7 @@ // Sub can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; @@ -1256,11 +1628,12 @@ // Take the minimum of all incoming values. This can't infinitely loop // because of our depth threshold. - Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1); + Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1, Q); for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { if (Tmp == 1) return Tmp; Tmp = std::min(Tmp, - ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1)); + ComputeNumSignBits(PN->getIncomingValue(i), TD, + Depth+1, Q)); } return Tmp; } @@ -1275,7 +1648,7 @@ // use this information. APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); APInt Mask; - computeKnownBits(V, KnownZero, KnownOne, TD, Depth); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q); if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; Index: lib/Transforms/InstCombine/InstCombine.h =================================================================== --- lib/Transforms/InstCombine/InstCombine.h +++ lib/Transforms/InstCombine/InstCombine.h @@ -25,6 +25,7 @@ namespace llvm { class CallSite; class DataLayout; +class DominatorTree; class TargetLibraryInfo; class DbgDeclareInst; class MemIntrinsic; @@ -88,6 +89,7 @@ public InstVisitor { const DataLayout *DL; TargetLibraryInfo *TLI; + DominatorTree *DT; // not required bool MadeIRChange; LibCallSimplifier *Simplifier; bool MinimizeSize; @@ -115,6 +117,8 @@ void getAnalysisUsage(AnalysisUsage &AU) const override; const DataLayout *getDataLayout() const { return DL; } + + DominatorTree *getDominatorTree() const { return DT; } TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; } @@ -148,7 +152,7 @@ Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS); Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS); Instruction *visitAnd(BinaryOperator &I); - Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS); + Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI); Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, Value *B, Value *C); @@ -246,8 +250,8 @@ Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); - bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); - bool WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS); + bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction *CxtI); + bool WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS, Instruction *CxtI); Value *EmitGEPOffset(User *GEP); Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask); @@ -316,16 +320,16 @@ } void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - unsigned Depth = 0) const { - return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth); + unsigned Depth = 0, Instruction *CxtI = nullptr) const { + return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, CxtI, DT); } bool MaskedValueIsZero(Value *V, const APInt &Mask, - unsigned Depth = 0) const { - return llvm::MaskedValueIsZero(V, Mask, DL, Depth); + unsigned Depth = 0, Instruction *CxtI = nullptr) const { + return llvm::MaskedValueIsZero(V, Mask, DL, Depth, CxtI, DT); } - unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const { - return llvm::ComputeNumSignBits(Op, DL, Depth); + unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0, Instruction *CxtI = nullptr) const { + return llvm::ComputeNumSignBits(Op, DL, Depth, CxtI, DT); } private: @@ -343,7 +347,8 @@ /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value /// based on the demanded bits. Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, - APInt &KnownOne, unsigned Depth); + APInt &KnownOne, unsigned Depth, + Instruction *CxtI = nullptr); bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth = 0); /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded Index: lib/Transforms/InstCombine/InstCombineAddSub.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -895,7 +895,8 @@ /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. /// TODO: Handle this for Vectors. -bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { +bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, + Instruction *CxtI) { // There are different heuristics we can use for this. Here are some simple // ones. @@ -913,18 +914,19 @@ // // Since the carry into the most significant position is always equal to // the carry out of the addition, there is no signed overflow. - if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) + if (ComputeNumSignBits(LHS, 0, CxtI) > 1 && + ComputeNumSignBits(RHS, 0, CxtI) > 1) return true; if (IntegerType *IT = dyn_cast(LHS->getType())) { int BitWidth = IT->getBitWidth(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI); // Addition of two 2's compliment numbers having opposite signs will never // overflow. @@ -943,13 +945,14 @@ /// WillNotOverflowUnsignedAdd - Return true if we can prove that: /// (zext (add LHS, RHS)) === (add (zext LHS), (zext RHS)) -bool InstCombiner::WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS) { +bool InstCombiner::WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS, + Instruction *CxtI) { // There are different heuristics we can use for this. Here is a simple one. // If the sign bit of LHS and that of RHS are both zero, no unsigned wrap. bool LHSKnownNonNegative, LHSKnownNegative; bool RHSKnownNonNegative, RHSKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, 0); - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, 0); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, 0, CxtI, DT); + ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, 0, CxtI, DT); if (LHSKnownNonNegative && RHSKnownNonNegative) return true; @@ -1064,7 +1067,7 @@ if (ExtendAmt) { APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); - if (!MaskedValueIsZero(XorLHS, Mask)) + if (!MaskedValueIsZero(XorLHS, Mask, 0, &I)) ExtendAmt = 0; } @@ -1080,7 +1083,7 @@ IntegerType *IT = cast(I.getType()); APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); - computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne, 0, &I); if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue()) return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); @@ -1133,11 +1136,11 @@ if (IntegerType *IT = dyn_cast(I.getType())) { APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &I); if (LHSKnownZero != 0) { APInt RHSKnownOne(IT->getBitWidth(), 0); APInt RHSKnownZero(IT->getBitWidth(), 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &I); // No bits in common -> bitwise or. if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) @@ -1215,7 +1218,7 @@ ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSExt(CI, I.getType()) == RHSC && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) { // Insert the new, smaller add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); @@ -1231,7 +1234,7 @@ if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && WillNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0))) { + RHSConv->getOperand(0), &I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); @@ -1257,11 +1260,11 @@ // TODO(jingyue): Consider WillNotOverflowSignedAdd and // WillNotOverflowUnsignedAdd to reduce the number of invocations of // computeKnownBits. - if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS)) { + if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, &I)) { Changed = true; I.setHasNoSignedWrap(true); } - if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedAdd(LHS, RHS)) { + if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedAdd(LHS, RHS, &I)) { Changed = true; I.setHasNoUnsignedWrap(true); } @@ -1318,7 +1321,7 @@ ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSIToFP(CI, I.getType()) == CFP && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); @@ -1334,7 +1337,7 @@ if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && WillNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0))) { + RHSConv->getOperand(0), &I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0),"addconv"); Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -355,7 +355,7 @@ if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive uint32_t BitWidth = cast(RHS->getType())->getBitWidth(); APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); - if (MaskedValueIsZero(RHS, Mask)) + if (MaskedValueIsZero(RHS, Mask, 0, &I)) break; } } @@ -1136,14 +1136,14 @@ if (!Op0I->hasOneUse()) break; APInt NotAndRHS(~AndRHSMask); - if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { + if (MaskedValueIsZero(Op0LHS, NotAndRHS, 0, &I)) { // Not masking anything out for the LHS, move to RHS. Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, Op0RHS->getName()+".masked"); return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); } if (!isa(Op0RHS) && - MaskedValueIsZero(Op0RHS, NotAndRHS)) { + MaskedValueIsZero(Op0RHS, NotAndRHS, 0, &I)) { // Not masking anything out for the RHS, move to LHS. Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, Op0LHS->getName()+".masked"); @@ -1176,7 +1176,7 @@ uint32_t Zeros = AndRHSMask.countLeadingZeros(); APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); - if (MaskedValueIsZero(Op0LHS, Mask)) { + if (MaskedValueIsZero(Op0LHS, Mask, 0, &I)) { Value *NewNeg = Builder->CreateNeg(Op0RHS); return BinaryOperator::CreateAnd(NewNeg, AndRHS); } @@ -1554,7 +1554,8 @@ } /// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. -Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { +Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, + Instruction *CxtI) { ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) @@ -1574,13 +1575,15 @@ Value *Mask = nullptr; Value *Masked = nullptr; if (LAnd->getOperand(0) == RAnd->getOperand(0) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(1)) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(1))) { + isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, CxtI, DT) && + isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, CxtI, DT)) { Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1)); Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask); } else if (LAnd->getOperand(1) == RAnd->getOperand(1) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(0)) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(0))) { + isKnownToBeAPowerOfTwo(LAnd->getOperand(0), + false, 0, CxtI, DT) && + isKnownToBeAPowerOfTwo(RAnd->getOperand(0), + false, 0, CxtI, DT)) { Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0)); Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask); } @@ -1973,7 +1976,7 @@ // (X^C)|Y -> (X|Y)^C iff Y&C == 0 if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && - MaskedValueIsZero(Op1, C1->getValue())) { + MaskedValueIsZero(Op1, C1->getValue(), 0, &I)) { Value *NOr = Builder->CreateOr(A, Op1); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, C1); @@ -1982,7 +1985,7 @@ // Y|(X^C) -> (X|Y)^C iff Y&C == 0 if (Op1->hasOneUse() && match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && - MaskedValueIsZero(Op0, C1->getValue())) { + MaskedValueIsZero(Op0, C1->getValue(), 0, &I)) { Value *NOr = Builder->CreateOr(A, Op0); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, C1); @@ -2000,14 +2003,18 @@ // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) // iff (C1&C2) == 0 and (N&~C1) == 0 if (match(A, m_Or(m_Value(V1), m_Value(V2))) && - ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N) - (V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V) + ((V1 == B && + MaskedValueIsZero(V2, ~C1->getValue(), 0, &I)) || // (V|N) + (V2 == B && + MaskedValueIsZero(V1, ~C1->getValue(), 0, &I)))) // (N|V) return BinaryOperator::CreateAnd(A, Builder->getInt(C1->getValue()|C2->getValue())); // Or commutes, try both ways. if (match(B, m_Or(m_Value(V1), m_Value(V2))) && - ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N) - (V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V) + ((V1 == A && + MaskedValueIsZero(V2, ~C2->getValue(), 0, &I)) || // (V|N) + (V2 == A && + MaskedValueIsZero(V1, ~C2->getValue(), 0, &I)))) // (N|V) return BinaryOperator::CreateAnd(B, Builder->getInt(C1->getValue()|C2->getValue())); @@ -2138,7 +2145,7 @@ if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) - if (Value *Res = FoldOrOfICmps(LHS, RHS)) + if (Value *Res = FoldOrOfICmps(LHS, RHS, &I)) return ReplaceInstUsesWith(I, Res); // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) @@ -2169,7 +2176,7 @@ // cast is otherwise not optimizable. This happens for vector sexts. if (ICmpInst *RHS = dyn_cast(Op1COp)) if (ICmpInst *LHS = dyn_cast(Op0COp)) - if (Value *Res = FoldOrOfICmps(LHS, RHS)) + if (Value *Res = FoldOrOfICmps(LHS, RHS, &I)) return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the @@ -2327,7 +2334,8 @@ } } else if (Op0I->getOpcode() == Instruction::Or) { // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 - if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { + if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue(), + 0, &I)) { Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); // Anything in both C1 and C2 is known to be zero, remove it from // NewRHS. Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -58,8 +58,8 @@ } Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL); - unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL); + unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, DT); + unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, DT); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment(); @@ -154,7 +154,7 @@ } Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { - unsigned Alignment = getKnownAlignment(MI->getDest(), DL); + unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, DT); if (MI->getAlignment() < Alignment) { MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Alignment, false)); @@ -322,7 +322,7 @@ uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne); + computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II); unsigned TrailingZeros = KnownOne.countTrailingZeros(); APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); if ((Mask & KnownZero) == Mask) @@ -340,7 +340,7 @@ uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne); + computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II); unsigned LeadingZeros = KnownOne.countLeadingZeros(); APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); if ((Mask & KnownZero) == Mask) @@ -355,14 +355,14 @@ uint32_t BitWidth = IT->getBitWidth(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, II); bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; if (LHSKnownNegative || LHSKnownPositive) { APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, II); bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; if (LHSKnownNegative && RHSKnownNegative) { @@ -426,7 +426,7 @@ // can prove that it will never overflow. if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - if (WillNotOverflowSignedAdd(LHS, RHS)) { + if (WillNotOverflowSignedAdd(LHS, RHS, II)) { Value *Add = Builder->CreateNSWAdd(LHS, RHS); Add->takeName(&CI); Constant *V[] = {UndefValue::get(Add->getType()), Builder->getFalse()}; @@ -464,10 +464,10 @@ APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, II); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, II); // Get the largest possible values for each operand. APInt LHSMax = ~LHSKnownZero; @@ -521,7 +521,8 @@ case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: // Turn PPC lvx -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL) >= 16) { + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, + DL, II, DT) >= 16) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); @@ -530,7 +531,8 @@ case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL) >= 16) { + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, + DL, II, DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); @@ -541,7 +543,8 @@ case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL) >= 16) { + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, + DL, II, DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(1)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); @@ -886,7 +889,7 @@ case Intrinsic::arm_neon_vst2lane: case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: { - unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL); + unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, DT); unsigned AlignArg = II->getNumArgOperands() - 1; ConstantInt *IntrAlign = dyn_cast(II->getArgOperand(AlignArg)); if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -335,7 +335,8 @@ /// /// This function works on both vectors and scalars. /// -static bool CanEvaluateTruncated(Value *V, Type *Ty) { +static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, + Instruction *CxtI) { // We can always evaluate constants in another type. if (isa(V)) return true; @@ -364,8 +365,8 @@ case Instruction::Or: case Instruction::Xor: // These operators can all arbitrarily be extended or truncated. - return CanEvaluateTruncated(I->getOperand(0), Ty) && - CanEvaluateTruncated(I->getOperand(1), Ty); + return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && + CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); case Instruction::UDiv: case Instruction::URem: { @@ -374,10 +375,10 @@ uint32_t BitWidth = Ty->getScalarSizeInBits(); if (BitWidth < OrigBitWidth) { APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); - if (MaskedValueIsZero(I->getOperand(0), Mask) && - MaskedValueIsZero(I->getOperand(1), Mask)) { - return CanEvaluateTruncated(I->getOperand(0), Ty) && - CanEvaluateTruncated(I->getOperand(1), Ty); + if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) && + IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) { + return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && + CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); } } break; @@ -388,7 +389,7 @@ if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { uint32_t BitWidth = Ty->getScalarSizeInBits(); if (CI->getLimitedValue(BitWidth) < BitWidth) - return CanEvaluateTruncated(I->getOperand(0), Ty); + return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); } break; case Instruction::LShr: @@ -398,10 +399,10 @@ if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (MaskedValueIsZero(I->getOperand(0), - APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && + if (IC.MaskedValueIsZero(I->getOperand(0), + APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth), 0, CxtI) && CI->getLimitedValue(BitWidth) < BitWidth) { - return CanEvaluateTruncated(I->getOperand(0), Ty); + return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); } } break; @@ -415,8 +416,8 @@ return true; case Instruction::Select: { SelectInst *SI = cast(I); - return CanEvaluateTruncated(SI->getTrueValue(), Ty) && - CanEvaluateTruncated(SI->getFalseValue(), Ty); + return CanEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) && + CanEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI); } case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never @@ -424,7 +425,7 @@ // instructions with a single use. PHINode *PN = cast(I); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty)) + if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty, IC, CxtI)) return false; return true; } @@ -453,7 +454,7 @@ // expression tree to something weird like i93 unless the source is also // strange. if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && - CanEvaluateTruncated(Src, DestTy)) { + CanEvaluateTruncated(Src, DestTy, *this, &CI)) { // If this cast is a truncate, evaluting in a different type always // eliminates the cast, so it is always a win. @@ -553,7 +554,7 @@ // If Op1C some other power of two, convert: uint32_t BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne); + computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne, 0, &CI); APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? @@ -601,8 +602,8 @@ APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS); - computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS); + computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS, 0, &CI); + computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS, 0, &CI); if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { APInt KnownBits = KnownZeroLHS | KnownOneLHS; @@ -651,7 +652,8 @@ /// clear the top bits anyway, doing this has no extra cost. /// /// This function works on both vectors and scalars. -static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { +static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, + InstCombiner &IC, Instruction *CxtI) { BitsToClear = 0; if (isa(V)) return true; @@ -680,8 +682,8 @@ case Instruction::Add: case Instruction::Sub: case Instruction::Mul: - if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) || - !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp)) + if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) || + !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI)) return false; // These can all be promoted if neither operand has 'bits to clear'. if (BitsToClear == 0 && Tmp == 0) @@ -695,8 +697,9 @@ // We use MaskedValueIsZero here for generality, but the case we care // about the most is constant RHS. unsigned VSize = V->getType()->getScalarSizeInBits(); - if (MaskedValueIsZero(I->getOperand(1), - APInt::getHighBitsSet(VSize, BitsToClear))) + if (IC.MaskedValueIsZero(I->getOperand(1), + APInt::getHighBitsSet(VSize, BitsToClear), + 0, CxtI)) return true; } @@ -707,7 +710,7 @@ // We can promote shl(x, cst) if we can promote x. Since shl overwrites the // upper bits we can reduce BitsToClear by the shift amount. if (ConstantInt *Amt = dyn_cast(I->getOperand(1))) { - if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear)) + if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI)) return false; uint64_t ShiftAmt = Amt->getZExtValue(); BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0; @@ -718,7 +721,7 @@ // We can promote lshr(x, cst) if we can promote x. This requires the // ultimate 'and' to clear out the high zero bits we're clearing out though. if (ConstantInt *Amt = dyn_cast(I->getOperand(1))) { - if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear)) + if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI)) return false; BitsToClear += Amt->getZExtValue(); if (BitsToClear > V->getType()->getScalarSizeInBits()) @@ -728,8 +731,8 @@ // Cannot promote variable LSHR. return false; case Instruction::Select: - if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) || - !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) || + if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) || + !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) || // TODO: If important, we could handle the case when the BitsToClear are // known zero in the disagreeing side. Tmp != BitsToClear) @@ -741,10 +744,10 @@ // get into trouble with cyclic PHIs here because we only consider // instructions with a single use. PHINode *PN = cast(I); - if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear)) + if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI)) return false; for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) - if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) || + if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) || // TODO: If important, we could handle the case when the BitsToClear // are known zero in the disagreeing input. Tmp != BitsToClear) @@ -781,7 +784,7 @@ // strange. unsigned BitsToClear; if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && - CanEvaluateZExtd(Src, DestTy, BitsToClear)) { + CanEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) { assert(BitsToClear < SrcTy->getScalarSizeInBits() && "Unreasonable BitsToClear"); @@ -796,8 +799,10 @@ // If the high bits are already filled with zeros, just replace this // cast with the result. - if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize, - DestBitSize-SrcBitsKept))) + if (MaskedValueIsZero(Res, + APInt::getHighBitsSet(DestBitSize, + DestBitSize-SrcBitsKept), + 0, &CI)) return ReplaceInstUsesWith(CI, Res); // We need to emit an AND to clear the high bits. @@ -921,7 +926,7 @@ ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ unsigned BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(Op0, KnownZero, KnownOne); + computeKnownBits(Op0, KnownZero, KnownOne, 0, &CI); APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { @@ -1072,7 +1077,7 @@ // If the high bits are already filled with sign bit, just replace this // cast with the result. - if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize) + if (ComputeNumSignBits(Res, 0, &CI) > DestBitSize - SrcBitSize) return ReplaceInstUsesWith(CI, Res); // We need to emit a shl + ashr to do the sign extend. Index: lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCompares.cpp +++ lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1060,7 +1060,7 @@ unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); - computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne); + computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne, 0, &ICI); // If all the high bits are known, we can do this xform. if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { @@ -1932,8 +1932,8 @@ // sign-extended; check for that condition. For example, if CI2 is 2^31 and // the operands of the add are 64 bits wide, we need at least 33 sign bits. unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1; - if (IC.ComputeNumSignBits(A) < NeededSignBits || - IC.ComputeNumSignBits(B) < NeededSignBits) + if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits || + IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits) return nullptr; // In order to replace the original add with a narrower @@ -3112,7 +3112,8 @@ // and (A & ~B) != 0 --> (A & B) == 0 // if A is a power of 2. if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A) && I.isEquality()) + match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A, false, 0, &I, DT) && + I.isEquality()) return new ICmpInst(I.getInversePredicate(), Builder->CreateAnd(A, B), Op1); Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -268,7 +268,8 @@ SmallVector ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(), - AI.getAlignment(), DL); + AI.getAlignment(), + DL, &AI, DT); if (AI.getAlignment() <= SourceAlign) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); @@ -363,7 +364,8 @@ // Attempt to improve the alignment. if (DL) { unsigned KnownAlign = - getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()),DL); + getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()), + DL, &LI, DT); unsigned LoadAlign = LI.getAlignment(); unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : DL->getABITypeAlignment(LI.getType()); @@ -602,7 +604,7 @@ if (DL) { unsigned KnownAlign = getOrEnforceKnownAlignment(Ptr, DL->getPrefTypeAlignment(Val->getType()), - DL); + DL, &SI, DT); unsigned StoreAlign = SI.getAlignment(); unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : DL->getABITypeAlignment(Val->getType()); Index: lib/Transforms/InstCombine/InstCombineMulDivRem.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -25,7 +25,8 @@ /// simplifyValueKnownNonZero - The specific integer value is used in a context /// where it is known to be non-zero. If this allows us to simplify the /// computation, do so and return the new operand, otherwise return null. -static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { +static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, + Instruction *CxtI) { // If V has multiple uses, then we would have to do more analysis to determine // if this is safe. For example, the use could be in dynamically unreached // code. @@ -39,7 +40,7 @@ if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))), m_Value(B))) && // The "1" can be any value known to be a power of 2. - isKnownToBeAPowerOfTwo(PowerOf2)) { + isKnownToBeAPowerOfTwo(PowerOf2, false, 0, CxtI, IC.getDominatorTree())) { A = IC.Builder->CreateSub(A, B); return IC.Builder->CreateShl(PowerOf2, A); } @@ -47,10 +48,12 @@ // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it // inexact. Similarly for <<. if (BinaryOperator *I = dyn_cast(V)) - if (I->isLogicalShift() && isKnownToBeAPowerOfTwo(I->getOperand(0))) { + if (I->isLogicalShift() && isKnownToBeAPowerOfTwo(I->getOperand(0), false, + 0, CxtI, + IC.getDominatorTree())) { // We know that this is an exact/nuw shift and that the input is a // non-zero context as well. - if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) { + if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC, CxtI)) { I->setOperand(0, V2); MadeChange = true; } @@ -277,9 +280,9 @@ APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); Value *BoolCast = nullptr, *OtherOp = nullptr; - if (MaskedValueIsZero(Op0, Negative2)) + if (MaskedValueIsZero(Op0, Negative2, 0, &I)) BoolCast = Op0, OtherOp = Op1; - else if (MaskedValueIsZero(Op1, Negative2)) + else if (MaskedValueIsZero(Op1, Negative2, 0, &I)) BoolCast = Op1, OtherOp = Op0; if (BoolCast) { @@ -714,7 +717,7 @@ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // The RHS is known non-zero. - if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) { + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) { I.setOperand(1, V); return &I; } @@ -1008,8 +1011,8 @@ // unsigned inputs), turn this into a udiv. if (I.getType()->isIntegerTy()) { APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op0, Mask)) { - if (MaskedValueIsZero(Op1, Mask)) { + if (MaskedValueIsZero(Op0, Mask, 0, &I)) { + if (MaskedValueIsZero(Op1, Mask, 0, &I)) { // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); } @@ -1195,7 +1198,7 @@ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // The RHS is known non-zero. - if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) { + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) { I.setOperand(1, V); return &I; } @@ -1242,7 +1245,7 @@ I.getType()); // X urem Y -> X and Y-1, where Y is a power of 2, - if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true)) { + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true, 0, &I, DT)) { Constant *N1 = Constant::getAllOnesValue(I.getType()); Value *Add = Builder->CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); @@ -1285,7 +1288,8 @@ // unsigned inputs), turn this into a urem. if (I.getType()->isIntegerTy()) { APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { + if (MaskedValueIsZero(Op1, Mask, 0, &I) && + MaskedValueIsZero(Op0, Mask, 0, &I)) { // X srem Y -> X urem Y, iff X and Y don't have sign bit set return BinaryOperator::CreateURem(Op0, Op1, I.getName()); } Index: lib/Transforms/InstCombine/InstCombineShifts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineShifts.cpp +++ lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -68,7 +68,7 @@ /// this succeeds, the GetShiftedValue function will be called to produce the /// value. static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, - InstCombiner &IC) { + InstCombiner &IC, Instruction *CxtI) { // We can always evaluate constants shifted. if (isa(V)) return true; @@ -111,8 +111,8 @@ case Instruction::Or: case Instruction::Xor: // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. - return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) && - CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC); + return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC, I) && + CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC, I); case Instruction::Shl: { // We can often fold the shift into shifts-by-a-constant. @@ -131,8 +131,9 @@ // profitable unless we know the and'd out bits are already zero. if (CI->getZExtValue() > NumBits) { unsigned LowBits = TypeWidth - CI->getZExtValue(); - if (MaskedValueIsZero(I->getOperand(0), - APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) + if (IC.MaskedValueIsZero(I->getOperand(0), + APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits, + 0, CxtI)) return true; } @@ -155,8 +156,9 @@ // profitable unless we know the and'd out bits are already zero. if (CI->getValue().ult(TypeWidth) && CI->getZExtValue() > NumBits) { unsigned LowBits = CI->getZExtValue() - NumBits; - if (MaskedValueIsZero(I->getOperand(0), - APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) + if (IC.MaskedValueIsZero(I->getOperand(0), + APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits, + 0, CxtI)) return true; } @@ -164,8 +166,9 @@ } case Instruction::Select: { SelectInst *SI = cast(I); - return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) && - CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC); + return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, + IC, SI) && + CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC, SI); } case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never @@ -173,7 +176,8 @@ // instructions with a single use. PHINode *PN = cast(I); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC)) + if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift, + IC, PN)) return false; return true; } @@ -329,7 +333,7 @@ // See if we can propagate this shift into the input, this covers the trivial // cast of lshr(shl(x,c1),c2) as well as other more complex cases. if (I.getOpcode() != Instruction::AShr && - CanEvaluateShifted(Op0, COp1->getZExtValue(), isLeftShift, *this)) { + CanEvaluateShifted(Op0, COp1->getZExtValue(), isLeftShift, *this, &I)) { DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); @@ -703,14 +707,15 @@ // If the shifted-out value is known-zero, then this is a NUW shift. if (!I.hasNoUnsignedWrap() && MaskedValueIsZero(I.getOperand(0), - APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) { + APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt), + 0, &I)) { I.setHasNoUnsignedWrap(); return &I; } // If the shifted out value is all signbits, this is a NSW shift. if (!I.hasNoSignedWrap() && - ComputeNumSignBits(I.getOperand(0)) > ShAmt) { + ComputeNumSignBits(I.getOperand(0), 0, &I) > ShAmt) { I.setHasNoSignedWrap(); return &I; } @@ -760,7 +765,8 @@ // If the shifted-out value is known-zero, then this is an exact shift. if (!I.isExact() && - MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ + MaskedValueIsZero(Op0, APInt::getLowBitsSet(Op1C->getBitWidth(), ShAmt), + 0, &I)){ I.setIsExact(); return &I; } @@ -804,7 +810,8 @@ // If the shifted-out value is known-zero, then this is an exact shift. if (!I.isExact() && - MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ + MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt), + 0, &I)){ I.setIsExact(); return &I; } @@ -812,11 +819,12 @@ // See if we can turn a signed shr into an unsigned shr. if (MaskedValueIsZero(Op0, - APInt::getSignBit(I.getType()->getScalarSizeInBits()))) + APInt::getSignBit(I.getType()->getScalarSizeInBits()), + 0, &I)) return BinaryOperator::CreateLShr(Op0, Op1); // Arithmetic shifting an all-sign-bit value is a no-op. - unsigned NumSignBits = ComputeNumSignBits(Op0); + unsigned NumSignBits = ComputeNumSignBits(Op0, 0, &I); if (NumSignBits == Op0->getType()->getScalarSizeInBits()) return ReplaceInstUsesWith(I, Op0); Index: lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -57,7 +57,7 @@ APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, - KnownZero, KnownOne, 0); + KnownZero, KnownOne, 0, &Inst); if (!V) return false; if (V == &Inst) return true; ReplaceInstUsesWith(Inst, V); @@ -71,7 +71,8 @@ APInt &KnownZero, APInt &KnownOne, unsigned Depth) { Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, - KnownZero, KnownOne, Depth); + KnownZero, KnownOne, Depth, + dyn_cast(U.getUser())); if (!NewVal) return false; U = NewVal; return true; @@ -101,7 +102,8 @@ /// in the context where the specified bits are demanded, but not for all users. Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, - unsigned Depth) { + unsigned Depth, + Instruction *CxtI) { assert(V != nullptr && "Null pointer of Value???"); assert(Depth <= 6 && "Limit Search Depth"); uint32_t BitWidth = DemandedMask.getBitWidth(); @@ -144,7 +146,7 @@ Instruction *I = dyn_cast(V); if (!I) { - computeKnownBits(V, KnownZero, KnownOne, Depth); + computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); return nullptr; // Only analyze instructions. } @@ -158,8 +160,10 @@ // this instruction has a simpler value in that context. if (I->getOpcode() == Instruction::And) { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1, + CxtI); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + CxtI); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and' in this @@ -180,8 +184,10 @@ // only bits from X or Y are demanded. // If either the LHS or the RHS are One, the result is One. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1, + CxtI); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + CxtI); // If all of the demanded bits are known zero on one side, return the // other. These bits cannot contribute to the result of the 'or' in this @@ -205,8 +211,10 @@ // We can simplify (X^Y) -> X or Y in the user's context if we know that // only bits from X or Y are demanded. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1, + CxtI); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + CxtI); // If all of the demanded bits are known zero on one side, return the // other. @@ -217,7 +225,7 @@ } // Compute the KnownZero/KnownOne bits to simplify things downstream. - computeKnownBits(I, KnownZero, KnownOne, Depth); + computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); return nullptr; } @@ -230,7 +238,7 @@ switch (I->getOpcode()) { default: - computeKnownBits(I, KnownZero, KnownOne, Depth); + computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); break; case Instruction::And: // If either the LHS or the RHS are Zero, the result is zero. @@ -581,7 +589,7 @@ // Otherwise just hand the sub off to computeKnownBits to fill in // the known zeros and ones. - computeKnownBits(V, KnownZero, KnownOne, Depth); + computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known // zero. @@ -752,7 +760,8 @@ // remainder is zero. if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + CxtI); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) KnownZero.setBit(KnownZero.getBitWidth() - 1); @@ -814,7 +823,7 @@ return nullptr; } } - computeKnownBits(V, KnownZero, KnownOne, Depth); + computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); break; } Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -45,6 +45,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" @@ -2835,6 +2836,11 @@ DataLayoutPass *DLP = getAnalysisIfAvailable(); DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis(); + + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable(); + DT = DTWP ? &DTWP->getDomTree() : nullptr; + // Minimizing size? MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); Index: lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -963,8 +963,10 @@ // If it is greater than the memcpy, then we check to see if we can force the // source of the memcpy to the alignment we need. If we fail, we bail out. + DominatorTree &DT = getAnalysis().getDomTree(); if (MDep->getAlignment() < ByValAlign && - getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, DL) < ByValAlign) + getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, + DL, CS.getInstruction(), &DT) < ByValAlign) return false; // Verify that the copied-from memory doesn't change in between the memcpy and Index: lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- lib/Transforms/Utils/InlineFunction.cpp +++ lib/Transforms/Utils/InlineFunction.cpp @@ -376,7 +376,7 @@ // If the pointer is already known to be sufficiently aligned, or if we can // round it up to a larger alignment, then we don't need a temporary. if (getOrEnforceKnownAlignment(Arg, ByValAlignment, - IFI.DL) >= ByValAlignment) + IFI.DL, TheCall) >= ByValAlignment) return Arg; // Otherwise, we have to make a memcpy to get a safe alignment. This is bad Index: lib/Transforms/Utils/Local.cpp =================================================================== --- lib/Transforms/Utils/Local.cpp +++ lib/Transforms/Utils/Local.cpp @@ -938,13 +938,15 @@ /// and it is more than the alignment of the ultimate object, see if we can /// increase the alignment of the ultimate object, making this check succeed. unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, - const DataLayout *DL) { + const DataLayout *DL, + const Instruction *CxtI, + const DominatorTree *DT) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64; APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL); + computeKnownBits(V, KnownZero, KnownOne, DL, 0, CxtI, DT); unsigned TrailZ = KnownZero.countTrailingOnes(); // Avoid trouble with ridiculously large TrailZ values, such as Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -3208,11 +3208,11 @@ /// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch /// and use it to remove dead cases. -static bool EliminateDeadSwitchCases(SwitchInst *SI) { +static bool EliminateDeadSwitchCases(SwitchInst *SI, const DataLayout *DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); - computeKnownBits(Cond, KnownZero, KnownOne); + computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, SI); // Gather dead cases. SmallVector DeadCases; @@ -3942,7 +3942,7 @@ return SimplifyCFG(BB, TTI, DL) | true; // Remove unreachable cases. - if (EliminateDeadSwitchCases(SI)) + if (EliminateDeadSwitchCases(SI, DL)) return SimplifyCFG(BB, TTI, DL) | true; if (ForwardSwitchConditionToPHI(SI)) Index: test/Transforms/InstCombine/invariants-loop-align.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/invariants-loop-align.ll @@ -0,0 +1,47 @@ +; RUN: opt -domtree -instcombine -loops -S < %s | FileCheck %s +; Note: The -loops above can be anything that requires the domtree, and is +; necessary to work around a pass-manager bug. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @foo(i32* %a, i32* %b) #0 { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 63 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.invariant(i1 %maskcond) + %ptrint1 = ptrtoint i32* %b to i64 + %maskedptr2 = and i64 %ptrint1, 63 + %maskcond3 = icmp eq i64 %maskedptr2, 0 + tail call void @llvm.invariant(i1 %maskcond3) + br label %for.body + +; CHECK-LABEL: @foo +; CHECK: load i32* {{.*}} align 64 +; CHECK: store i32 {{.*}} align 64 +; CHECK: ret + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %add = add nsw i32 %0, 1 + %arrayidx5 = getelementptr inbounds i32* %a, i64 %indvars.iv + store i32 %add, i32* %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 16 + %1 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %1, 1648 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +; Function Attrs: nounwind +declare void @llvm.invariant(i1) #1 + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } + Index: test/Transforms/InstCombine/invariants.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/invariants.ll @@ -0,0 +1,148 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define i32 @foo1(i32* %a) #0 { +entry: + %0 = load i32* %a, align 4 + +; Check that the alignment has been upgraded and that the invariant has not +; been removed: +; CHECK-LABEL: @foo1 +; CHECK-DAG: load i32* %a, align 32 +; CHECK-DAG: call void @llvm.invariant +; CHECK: ret i32 + + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.invariant(i1 %maskcond) + + ret i32 %0 +} + +; Function Attrs: nounwind uwtable +define i32 @foo2(i32* %a) #0 { +entry: +; Same check as in @foo1, but make sure it works if the invariant is first too. +; CHECK-LABEL: @foo2 +; CHECK-DAG: load i32* %a, align 32 +; CHECK-DAG: call void @llvm.invariant +; CHECK: ret i32 + + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.invariant(i1 %maskcond) + + %0 = load i32* %a, align 4 + ret i32 %0 +} + +; Function Attrs: nounwind +declare void @llvm.invariant(i1) #1 + +; Function Attrs: nounwind uwtable +define i32 @bar1(i32 %a) #0 { +entry: + %and1 = and i32 %a, 3 + +; CHECK-LABEL: @bar1 +; CHECK: call void @llvm.invariant +; CHECK: ret i32 1 + + %and = and i32 %a, 7 + %cmp = icmp eq i32 %and, 1 + tail call void @llvm.invariant(i1 %cmp) + + ret i32 %and1 +} + +; Function Attrs: nounwind uwtable +define i32 @bar2(i32 %a) #0 { +entry: +; CHECK-LABEL: @bar2 +; CHECK: call void @llvm.invariant +; CHECK: ret i32 1 + + %and = and i32 %a, 7 + %cmp = icmp eq i32 %and, 1 + tail call void @llvm.invariant(i1 %cmp) + + %and1 = and i32 %a, 3 + ret i32 %and1 +} + +; Function Attrs: nounwind uwtable +define i32 @bar3(i32 %a, i1 %x, i1 %y) #0 { +entry: + %and1 = and i32 %a, 3 + +; Don't be fooled by other invariants around. +; CHECK-LABEL: @bar3 +; CHECK: call void @llvm.invariant +; CHECK: ret i32 1 + + tail call void @llvm.invariant(i1 %x) + + %and = and i32 %a, 7 + %cmp = icmp eq i32 %and, 1 + tail call void @llvm.invariant(i1 %cmp) + + tail call void @llvm.invariant(i1 %y) + + ret i32 %and1 +} + +; Function Attrs: nounwind uwtable +define i32 @bar4(i32 %a, i32 %b) { +entry: + %and1 = and i32 %b, 3 + +; CHECK-LABEL: @bar4 +; CHECK: call void @llvm.invariant +; CHECK: call void @llvm.invariant +; CHECK: ret i32 1 + + %and = and i32 %a, 7 + %cmp = icmp eq i32 %and, 1 + tail call void @llvm.invariant(i1 %cmp) + + %cmp2 = icmp eq i32 %a, %b + tail call void @llvm.invariant(i1 %cmp2) + + ret i32 %and1 +} + +define i32 @icmp1(i32 %a) #0 { +entry: + %cmp = icmp sgt i32 %a, 5 + tail call void @llvm.invariant(i1 %cmp) + %conv = zext i1 %cmp to i32 + ret i32 %conv + +; CHECK-LABEL: @icmp1 +; CHECK: call void @llvm.invariant +; CHECK: ret i32 1 + +} + +; Function Attrs: nounwind uwtable +define i32 @icmp2(i32 %a) #0 { +entry: + %cmp = icmp sgt i32 %a, 5 + tail call void @llvm.invariant(i1 %cmp) + %0 = zext i1 %cmp to i32 + %lnot.ext = xor i32 %0, 1 + ret i32 %lnot.ext + +; CHECK-LABEL: @icmp2 +; CHECK: call void @llvm.invariant +; CHECK: ret i32 0 + +} + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } +