diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -49,6 +49,7 @@ class Loop; class LoopInfo; class ProfileSummaryInfo; +class OptimizationRemarkEmitter; class SCEV; class ScalarEvolution; class StoreInst; @@ -1325,6 +1326,14 @@ /// @} + /// Improve \p Known for \p V with target specific known bits analysis. + /// \returns true if the target analyzed \p V, false otherwise. + bool computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT, + OptimizationRemarkEmitter *ORE, + bool UseInstrInfo) const; + private: /// Estimate the latency of specified instruction. /// Returns 1 as the default value. @@ -1604,6 +1613,12 @@ virtual unsigned getGISelRematGlobalCost() const = 0; virtual bool hasActiveVectorLength() const = 0; virtual int getInstructionLatency(const Instruction *I) = 0; + virtual bool + computeKnownBits(const TargetTransformInfo *TTI, const Value *V, + KnownBits &Known, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, OptimizationRemarkEmitter *ORE, + bool UseInstrInfo) const = 0; }; template @@ -2123,6 +2138,15 @@ int getInstructionLatency(const Instruction *I) override { return Impl.getInstructionLatency(I); } + + bool computeKnownBits(const TargetTransformInfo *TTI, const Value *V, + KnownBits &Known, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, OptimizationRemarkEmitter *ORE, + bool UseInstrInfo) const override { + return Impl.computeKnownBits(TTI, V, Known, DL, Depth, AC, CxtI, DT, ORE, + UseInstrInfo); + } }; template diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -20,6 +20,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -1080,6 +1081,14 @@ return 1; } + + bool computeKnownBits(const TargetTransformInfo *TTI, const Value *V, + KnownBits &Known, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, OptimizationRemarkEmitter *ORE, + bool UseInstrInfo) const { + return false; + } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -43,6 +43,7 @@ class OptimizationRemarkEmitter; class StringRef; class TargetLibraryInfo; +class TargetTransformInfo; class Value; constexpr unsigned MaxAnalysisRecursionDepth = 6; @@ -61,7 +62,8 @@ const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, OptimizationRemarkEmitter *ORE = nullptr, - bool UseInstrInfo = true); + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. @@ -77,7 +79,8 @@ const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, OptimizationRemarkEmitter *ORE = nullptr, - bool UseInstrInfo = true); + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); /// Returns the known bits rather than passing by reference. KnownBits computeKnownBits(const Value *V, const DataLayout &DL, @@ -85,7 +88,8 @@ const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, OptimizationRemarkEmitter *ORE = nullptr, - bool UseInstrInfo = true); + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); /// Returns the known bits rather than passing by reference. KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, @@ -94,7 +98,8 @@ const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, OptimizationRemarkEmitter *ORE = nullptr, - bool UseInstrInfo = true); + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); /// Compute known bits from the range metadata. /// \p KnownZero the set of bits that are known to be zero @@ -108,7 +113,8 @@ AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); /// Return true if the given value is known to have exactly one bit set when /// defined. For vectors return true if every element is known to be a power @@ -120,7 +126,8 @@ AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI); @@ -134,7 +141,8 @@ AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); /// Return true if the two given values are negation. /// Currently can recoginze Value pair: @@ -148,7 +156,8 @@ AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); /// Returns true if the given value is known be positive (i.e. non-negative /// and non-zero). @@ -156,7 +165,8 @@ AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); /// Returns true if the given value is known be negative (i.e. non-positive /// and non-zero). @@ -172,7 +182,8 @@ AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); /// Return true if 'V & Mask' is known to be zero. We use this predicate to /// simplify operations downstream. Mask is known to be zero for bits that V @@ -188,7 +199,8 @@ unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); /// Return the number of times the sign bit of the register is replicated into /// the other bits. We know that at least 1 bit is always equal to the sign @@ -201,7 +213,8 @@ unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); /// This function computes the integer multiple of Base that equals V. If /// successful, it returns true and returns the multiple in Multiple. If @@ -493,47 +506,39 @@ NeverOverflows, }; - OverflowResult computeOverflowForUnsignedMul(const Value *LHS, - const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT, - bool UseInstrInfo = true); - OverflowResult computeOverflowForSignedMul(const Value *LHS, const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT, - bool UseInstrInfo = true); - OverflowResult computeOverflowForUnsignedAdd(const Value *LHS, - const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT, - bool UseInstrInfo = true); - OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + OverflowResult computeOverflowForUnsignedMul( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + bool UseInstrInfo = true, const TargetTransformInfo *TTI = nullptr); + OverflowResult computeOverflowForSignedMul( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + bool UseInstrInfo = true, const TargetTransformInfo *TTI = nullptr); + OverflowResult computeOverflowForUnsignedAdd( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + bool UseInstrInfo = true, const TargetTransformInfo *TTI = nullptr); + OverflowResult computeOverflowForSignedAdd( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + const TargetTransformInfo *TTI = nullptr); /// This version also leverages the sign bit of Add if known. - OverflowResult computeOverflowForSignedAdd(const AddOperator *Add, - const DataLayout &DL, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); - OverflowResult computeOverflowForUnsignedSub(const Value *LHS, const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT); - OverflowResult computeOverflowForSignedSub(const Value *LHS, const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT); + OverflowResult + computeOverflowForSignedAdd(const AddOperator *Add, const DataLayout &DL, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + const TargetTransformInfo *TTI = nullptr); + OverflowResult computeOverflowForUnsignedSub( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + const TargetTransformInfo *TTI = nullptr); + OverflowResult + computeOverflowForSignedSub(const Value *LHS, const Value *RHS, + const DataLayout &DL, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT, + const TargetTransformInfo *TTI = nullptr); /// Returns true if the arithmetic part of the \p WO 's result is /// used only along the paths control dependent on the computation diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1877,6 +1877,14 @@ unsigned getVectorSplitCost() { return 1; } /// @} + bool computeKnownBits(const TargetTransformInfo *TTI, const Value *V, + KnownBits &Known, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, OptimizationRemarkEmitter *ORE, + bool UseInstrInfo) const { + return BaseT::computeKnownBits(TTI, V, Known, DL, Depth, AC, CxtI, DT, ORE, + UseInstrInfo); + } }; /// Concrete BasicTTIImpl that can be used if no further customization diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -166,6 +166,16 @@ return *this; } + /// Return known bits for a sign extension or truncation of the value we're + /// tracking. + KnownBits sextOrTrunc(unsigned BitWidth) const { + if (BitWidth > getBitWidth()) + return sext(BitWidth); + if (BitWidth < getBitWidth()) + return trunc(BitWidth); + return *this; + } + /// Return a KnownBits with the extracted bits /// [bitPosition,bitPosition+numBits). KnownBits extractBits(unsigned NumBits, unsigned BitPosition) const { @@ -269,6 +279,8 @@ One.extractBits(NumBits, BitPosition)); } + static KnownBits computeForMul(const KnownBits &LHS, const KnownBits &RHS); + /// Update known bits based on ANDing with RHS. KnownBits &operator&=(const KnownBits &RHS); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1365,6 +1365,14 @@ } } +bool TargetTransformInfo::computeKnownBits( + const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + OptimizationRemarkEmitter *ORE, bool UseInstrInfo) const { + return TTIImpl->computeKnownBits(this, V, Known, DL, Depth, AC, CxtI, DT, ORE, + UseInstrInfo); +} + TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -32,6 +32,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -120,16 +121,20 @@ /// If true, it is safe to use metadata during simplification. InstrInfoQuery IIQ; + const TargetTransformInfo *TTI; + unsigned NumExcluded = 0; Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo, - OptimizationRemarkEmitter *ORE = nullptr) - : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {} + OptimizationRemarkEmitter *ORE = nullptr, + const TargetTransformInfo *TTI = nullptr) + : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo), + TTI(TTI) {} Query(const Query &Q, const Value *NewExcl) : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), IIQ(Q.IIQ), - NumExcluded(Q.NumExcluded) { + TTI(Q.TTI), NumExcluded(Q.NumExcluded) { Excluded = Q.Excluded; Excluded[NumExcluded++] = NewExcl; assert(NumExcluded <= Excluded.size()); @@ -221,18 +226,22 @@ const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - OptimizationRemarkEmitter *ORE, bool UseInstrInfo) { - ::computeKnownBits(V, Known, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); + OptimizationRemarkEmitter *ORE, bool UseInstrInfo, + const TargetTransformInfo *TTI) { + ::computeKnownBits( + V, Known, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, TTI)); } void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, KnownBits &Known, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - OptimizationRemarkEmitter *ORE, bool UseInstrInfo) { - ::computeKnownBits(V, DemandedElts, Known, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); + OptimizationRemarkEmitter *ORE, bool UseInstrInfo, + const TargetTransformInfo *TTI) { + ::computeKnownBits( + V, DemandedElts, Known, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, TTI)); } static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, @@ -241,14 +250,13 @@ static KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q); -KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, - unsigned Depth, AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT, - OptimizationRemarkEmitter *ORE, - bool UseInstrInfo) { +KnownBits +llvm::computeKnownBits(const Value *V, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, OptimizationRemarkEmitter *ORE, + bool UseInstrInfo, const TargetTransformInfo *TTI) { return ::computeKnownBits( - V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); + V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, TTI)); } KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, @@ -256,16 +264,18 @@ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, OptimizationRemarkEmitter *ORE, - bool UseInstrInfo) { + bool UseInstrInfo, + const TargetTransformInfo *TTI) { return ::computeKnownBits( V, DemandedElts, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, TTI)); } bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { + bool UseInstrInfo, + const TargetTransformInfo *TTI) { assert(LHS->getType() == RHS->getType() && "LHS and RHS should have the same type"); assert(LHS->getType()->isIntOrIntVectorTy() && @@ -281,8 +291,10 @@ IntegerType *IT = cast(LHS->getType()->getScalarType()); KnownBits LHSKnown(IT->getBitWidth()); KnownBits RHSKnown(IT->getBitWidth()); - computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo); - computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo); + computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo, + TTI); + computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo, + TTI); return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue(); } @@ -304,9 +316,11 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { + const DominatorTree *DT, bool UseInstrInfo, + const TargetTransformInfo *TTI) { return ::isKnownToBeAPowerOfTwo( - V, OrZero, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); + V, OrZero, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, /*ORE=*/nullptr, TTI)); } static bool isKnownNonZero(const Value *V, const APInt &DemandedElts, @@ -316,30 +330,34 @@ bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { - return ::isKnownNonZero(V, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); + const DominatorTree *DT, bool UseInstrInfo, + const TargetTransformInfo *TTI) { + return ::isKnownNonZero( + V, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, /*ORE=*/nullptr, TTI)); } bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { + bool UseInstrInfo, + const TargetTransformInfo *TTI) { KnownBits Known = - computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo); + computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo, TTI); return Known.isNonNegative(); } bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { + const DominatorTree *DT, bool UseInstrInfo, + const TargetTransformInfo *TTI) { if (auto *CI = dyn_cast(V)) return CI->getValue().isStrictlyPositive(); // TODO: We'd doing two recursive queries here. We should factor this such // that only a single query is needed. - return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, UseInstrInfo) && - isKnownNonZero(V, DL, Depth, AC, CxtI, DT, UseInstrInfo); + return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, UseInstrInfo, TTI) && + isKnownNonZero(V, DL, Depth, AC, CxtI, DT, UseInstrInfo, TTI); } bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth, @@ -355,10 +373,10 @@ bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { + bool UseInstrInfo, const TargetTransformInfo *TTI) { return ::isKnownNonEqual(V1, V2, Query(DL, AC, safeCxtI(V1, safeCxtI(V2, CxtI)), DT, - UseInstrInfo, /*ORE=*/nullptr)); + UseInstrInfo, /*ORE=*/nullptr, TTI)); } static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, @@ -367,9 +385,11 @@ bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { + const DominatorTree *DT, bool UseInstrInfo, + const TargetTransformInfo *TTI) { return ::MaskedValueIsZero( - V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); + V, Mask, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, /*ORE=*/nullptr, TTI)); } static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, @@ -391,9 +411,11 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { + const DominatorTree *DT, bool UseInstrInfo, + const TargetTransformInfo *TTI) { return ::ComputeNumSignBits( - V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); + V, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, /*ORE=*/nullptr, TTI)); } static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, @@ -415,7 +437,6 @@ const APInt &DemandedElts, KnownBits &Known, KnownBits &Known2, unsigned Depth, const Query &Q) { - unsigned BitWidth = Known.getBitWidth(); computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q); computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); @@ -433,7 +454,7 @@ bool isKnownNegativeOp0 = Known2.isNegative(); // The product of two numbers with the same sign is non-negative. isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || - (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); + (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); // The product of a negative number and a non-negative number is either // negative or zero. if (!isKnownNonNegative) @@ -444,78 +465,7 @@ } } - assert(!Known.hasConflict() && !Known2.hasConflict()); - // Compute a conservative estimate for high known-0 bits. - unsigned LeadZ = std::max(Known.countMinLeadingZeros() + - Known2.countMinLeadingZeros(), - BitWidth) - BitWidth; - LeadZ = std::min(LeadZ, BitWidth); - - // The result of the bottom bits of an integer multiply can be - // inferred by looking at the bottom bits of both operands and - // multiplying them together. - // We can infer at least the minimum number of known trailing bits - // of both operands. Depending on number of trailing zeros, we can - // infer more bits, because (a*b) <=> ((a/m) * (b/n)) * (m*n) assuming - // a and b are divisible by m and n respectively. - // We then calculate how many of those bits are inferrable and set - // the output. For example, the i8 mul: - // a = XXXX1100 (12) - // b = XXXX1110 (14) - // We know the bottom 3 bits are zero since the first can be divided by - // 4 and the second by 2, thus having ((12/4) * (14/2)) * (2*4). - // Applying the multiplication to the trimmed arguments gets: - // XX11 (3) - // X111 (7) - // ------- - // XX11 - // XX11 - // XX11 - // XX11 - // ------- - // XXXXX01 - // Which allows us to infer the 2 LSBs. Since we're multiplying the result - // by 8, the bottom 3 bits will be 0, so we can infer a total of 5 bits. - // The proof for this can be described as: - // Pre: (C1 >= 0) && (C1 < (1 << C5)) && (C2 >= 0) && (C2 < (1 << C6)) && - // (C7 == (1 << (umin(countTrailingZeros(C1), C5) + - // umin(countTrailingZeros(C2), C6) + - // umin(C5 - umin(countTrailingZeros(C1), C5), - // C6 - umin(countTrailingZeros(C2), C6)))) - 1) - // %aa = shl i8 %a, C5 - // %bb = shl i8 %b, C6 - // %aaa = or i8 %aa, C1 - // %bbb = or i8 %bb, C2 - // %mul = mul i8 %aaa, %bbb - // %mask = and i8 %mul, C7 - // => - // %mask = i8 ((C1*C2)&C7) - // Where C5, C6 describe the known bits of %a, %b - // C1, C2 describe the known bottom bits of %a, %b. - // C7 describes the mask of the known bits of the result. - APInt Bottom0 = Known.One; - APInt Bottom1 = Known2.One; - - // How many times we'd be able to divide each argument by 2 (shr by 1). - // This gives us the number of trailing zeros on the multiplication result. - unsigned TrailBitsKnown0 = (Known.Zero | Known.One).countTrailingOnes(); - unsigned TrailBitsKnown1 = (Known2.Zero | Known2.One).countTrailingOnes(); - unsigned TrailZero0 = Known.countMinTrailingZeros(); - unsigned TrailZero1 = Known2.countMinTrailingZeros(); - unsigned TrailZ = TrailZero0 + TrailZero1; - - // Figure out the fewest known-bits operand. - unsigned SmallestOperand = std::min(TrailBitsKnown0 - TrailZero0, - TrailBitsKnown1 - TrailZero1); - unsigned ResultBitsKnown = std::min(SmallestOperand + TrailZ, BitWidth); - - APInt BottomKnown = Bottom0.getLoBits(TrailBitsKnown0) * - Bottom1.getLoBits(TrailBitsKnown1); - - Known.resetAll(); - Known.Zero.setHighBits(LeadZ); - Known.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown); - Known.One |= BottomKnown.getLoBits(ResultBitsKnown); + Known = KnownBits::computeForMul(Known, Known2); // Only make use of no-wrap flags if we failed to compute the sign bit // directly. This matters if the multiplication always overflows, in @@ -2012,6 +1962,10 @@ if (Depth == MaxAnalysisRecursionDepth) return; + if (Q.TTI && + Q.TTI->computeKnownBits(V, Known, Q.DL, Depth + 1, + Q.AC, Q.CxtI, Q.DT, Q.ORE, true)) + return; // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has // the bits of its aliasee. if (const GlobalAlias *GA = dyn_cast(V)) { @@ -4529,9 +4483,10 @@ static ConstantRange computeConstantRangeIncludingKnownBits( const Value *V, bool ForSigned, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) { - KnownBits Known = computeKnownBits( - V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo); + OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr) { + KnownBits Known = + computeKnownBits(V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo, TTI); ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned); ConstantRange CR2 = computeConstantRange(V, UseInstrInfo); ConstantRange::PreferredRangeType RangeType = @@ -4542,21 +4497,20 @@ OverflowResult llvm::computeOverflowForUnsignedMul( const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { + bool UseInstrInfo, const TargetTransformInfo *TTI) { KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); + nullptr, UseInstrInfo, TTI); KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); + nullptr, UseInstrInfo, TTI); ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false); ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false); return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange)); } -OverflowResult -llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS, - const DataLayout &DL, AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { +OverflowResult llvm::computeOverflowForSignedMul( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + bool UseInstrInfo, const TargetTransformInfo *TTI) { // Multiplying n * m significant bits yields a result of n + m significant // bits. If the total number of significant bits does not exceed the // result bit width (minus 1), there is no overflow. @@ -4567,8 +4521,8 @@ // Note that underestimating the number of sign bits gives a more // conservative answer. - unsigned SignBits = ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) + - ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT); + unsigned SignBits = ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT, TTI) + + ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT, TTI); // First handle the easy case: if we have enough sign bits there's // definitely no overflow. @@ -4586,9 +4540,9 @@ // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 // For simplicity we just check if at least one side is not negative. KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); + nullptr, UseInstrInfo, TTI); KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); + nullptr, UseInstrInfo, TTI); if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) return OverflowResult::NeverOverflows; } @@ -4598,23 +4552,20 @@ OverflowResult llvm::computeOverflowForUnsignedAdd( const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { + bool UseInstrInfo, const TargetTransformInfo *TTI) { ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( - LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); + LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, nullptr, + UseInstrInfo, TTI); ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( - RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); + RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, nullptr, + UseInstrInfo, TTI); return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange)); } -static OverflowResult computeOverflowForSignedAdd(const Value *LHS, - const Value *RHS, - const AddOperator *Add, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { +static OverflowResult computeOverflowForSignedAdd( + const Value *LHS, const Value *RHS, const AddOperator *Add, + const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, const TargetTransformInfo *TTI) { if (Add && Add->hasNoSignedWrap()) { return OverflowResult::NeverOverflows; } @@ -4633,14 +4584,16 @@ // // Since the carry into the most significant position is always equal to // the carry out of the addition, there is no signed overflow. - if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 && - ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1) + if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT, TTI) > 1 && + ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT, TTI) > 1) return OverflowResult::NeverOverflows; ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( - LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); + LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT, /*ORE=*/nullptr, + TTI); ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( - RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); + RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT, /*ORE=*/nullptr, + TTI); OverflowResult OR = mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange)); if (OR != OverflowResult::MayOverflow) @@ -4662,7 +4615,8 @@ if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { KnownBits AddKnown(LHSRange.getBitWidth()); computeKnownBitsFromAssume( - Add, AddKnown, /*Depth=*/0, Query(DL, AC, CxtI, DT, true)); + Add, AddKnown, /*Depth=*/0, + Query(DL, AC, CxtI, DT, true, /*ORE=*/nullptr, TTI)); if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) || (AddKnown.isNegative() && LHSOrRHSKnownNegative)) return OverflowResult::NeverOverflows; @@ -4671,12 +4625,10 @@ return OverflowResult::MayOverflow; } -OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS, - const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { +OverflowResult llvm::computeOverflowForUnsignedSub( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + const TargetTransformInfo *TTI) { // Checking for conditions implied by dominating conditions may be expensive. // Limit it to usub_with_overflow calls for now. if (match(CxtI, @@ -4688,18 +4640,18 @@ return OverflowResult::AlwaysOverflowsLow; } ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( - LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT); + LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, /*ORE=*/nullptr, + TTI); ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( - RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT); + RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, /*ORE=*/nullptr, + TTI); return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange)); } -OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS, - const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { +OverflowResult llvm::computeOverflowForSignedSub( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + const TargetTransformInfo *TTI) { // If LHS and RHS each have at least two sign bits, the subtraction // cannot overflow. if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 && @@ -4707,9 +4659,11 @@ return OverflowResult::NeverOverflows; ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( - LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); + LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT, /*ORE=*/nullptr, + TTI); ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( - RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); + RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT, /*ORE=*/nullptr, + TTI); return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange)); } @@ -4992,22 +4946,21 @@ return ::isGuaranteedNotToBeUndefOrPoison(V, CtxI, DT, Depth, true); } -OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { +OverflowResult +llvm::computeOverflowForSignedAdd(const AddOperator *Add, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, + const TargetTransformInfo *TTI) { return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), - Add, DL, AC, CxtI, DT); + Add, DL, AC, CxtI, DT, TTI); } -OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS, - const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { - return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT); +OverflowResult llvm::computeOverflowForSignedAdd( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + const TargetTransformInfo *TTI) { + return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT, + TTI); } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -163,6 +163,85 @@ return KnownAbs; } +KnownBits KnownBits::computeForMul(const KnownBits &LHS, const KnownBits &RHS) { + unsigned BitWidth = LHS.getBitWidth(); + + assert(!LHS.hasConflict() && !RHS.hasConflict()); + // Compute a conservative estimate for high known-0 bits. + unsigned LeadZ = + std::max(LHS.countMinLeadingZeros() + RHS.countMinLeadingZeros(), + BitWidth) - + BitWidth; + LeadZ = std::min(LeadZ, BitWidth); + + // The result of the bottom bits of an integer multiply can be + // inferred by looking at the bottom bits of both operands and + // multiplying them together. + // We can infer at least the minimum number of known trailing bits + // of both operands. Depending on number of trailing zeros, we can + // infer more bits, because (a*b) <=> ((a/m) * (b/n)) * (m*n) assuming + // a and b are divisible by m and n respectively. + // We then calculate how many of those bits are inferrable and set + // the output. For example, the i8 mul: + // a = XXXX1100 (12) + // b = XXXX1110 (14) + // We know the bottom 3 bits are zero since the first can be divided by + // 4 and the second by 2, thus having ((12/4) * (14/2)) * (2*4). + // Applying the multiplication to the trimmed arguments gets: + // XX11 (3) + // X111 (7) + // ------- + // XX11 + // XX11 + // XX11 + // XX11 + // ------- + // XXXXX01 + // Which allows us to infer the 2 LSBs. Since we're multiplying the result + // by 8, the bottom 3 bits will be 0, so we can infer a total of 5 bits. + // The proof for this can be described as: + // Pre: (C1 >= 0) && (C1 < (1 << C5)) && (C2 >= 0) && (C2 < (1 << C6)) && + // (C7 == (1 << (umin(countTrailingZeros(C1), C5) + + // umin(countTrailingZeros(C2), C6) + + // umin(C5 - umin(countTrailingZeros(C1), C5), + // C6 - umin(countTrailingZeros(C2), C6)))) - 1) + // %aa = shl i8 %a, C5 + // %bb = shl i8 %b, C6 + // %aaa = or i8 %aa, C1 + // %bbb = or i8 %bb, C2 + // %mul = mul i8 %aaa, %bbb + // %mask = and i8 %mul, C7 + // => + // %mask = i8 ((C1*C2)&C7) + // Where C5, C6 describe the known bits of %a, %b + // C1, C2 describe the known bottom bits of %a, %b. + // C7 describes the mask of the known bits of the result. + APInt Bottom0 = LHS.One; + APInt Bottom1 = RHS.One; + + // How many times we'd be able to divide each argument by 2 (shr by 1). + // This gives us the number of trailing zeros on the multiplication result. + unsigned TrailBitsKnown0 = (LHS.Zero | LHS.One).countTrailingOnes(); + unsigned TrailBitsKnown1 = (RHS.Zero | RHS.One).countTrailingOnes(); + unsigned TrailZero0 = LHS.countMinTrailingZeros(); + unsigned TrailZero1 = RHS.countMinTrailingZeros(); + unsigned TrailZ = TrailZero0 + TrailZero1; + + // Figure out the fewest known-bits operand. + unsigned SmallestOperand = + std::min(TrailBitsKnown0 - TrailZero0, TrailBitsKnown1 - TrailZero1); + unsigned ResultBitsKnown = std::min(SmallestOperand + TrailZ, BitWidth); + + APInt BottomKnown = + Bottom0.getLoBits(TrailBitsKnown0) * Bottom1.getLoBits(TrailBitsKnown1); + + KnownBits Res(BitWidth); + Res.Zero.setHighBits(LeadZ); + Res.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown); + Res.One = BottomKnown.getLoBits(ResultBitsKnown); + return Res; +} + KnownBits &KnownBits::operator&=(const KnownBits &RHS) { // Result bit is 0 if either operand bit is 0. Zero |= RHS.Zero; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -957,7 +957,7 @@ return nullptr; // TODO: Do we need to thread more context in here? - KnownBits Known = computeKnownBits(MaskOp, DL, 0, nullptr, II); + KnownBits Known = ::computeKnownBits(MaskOp, DL, 0, nullptr, II); if (Known.countMinLeadingOnes() < 32) return nullptr; diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -8,6 +8,8 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Dominators.h" @@ -19,6 +21,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Target/TargetMachine.h" #include "gtest/gtest.h" using namespace llvm; @@ -1077,6 +1080,231 @@ EXPECT_EQ(Known.One.getZExtValue(), 0u); } +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsAddWithRange) { + parseAssembly("define void @test(i64* %p) {\n" + " %A = load i64, i64* %p, !range !{i64 64, i64 65536}\n" + " %APlus512 = add i64 %A, 512\n" + " %c = icmp ugt i64 %APlus512, 523\n" + " call void @llvm.assume(i1 %c)\n" + " ret void\n" + "}\n" + "declare void @llvm.assume(i1)\n"); + AssumptionCache AC(*F); + KnownBits Known = computeKnownBits(A, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + EXPECT_EQ(Known.Zero.getZExtValue(), ~(65536llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 0u); + Instruction &APlus512 = findInstructionByName(F, "APlus512"); + Known = computeKnownBits(&APlus512, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + // We know of one less zero because 512 may have produced a 1 that + // got carried all the way to the first trailing zero. + EXPECT_EQ(Known.Zero.getZExtValue(), (~(65536llu - 1)) << 1); + EXPECT_EQ(Known.One.getZExtValue(), 0u); +} + +// 512 + [32, 64) doesn't produce overlapping bits. +// Make sure we get all the individual bits properly. +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsAddWithRangeNoOverlap) { + parseAssembly("define void @test(i64* %p) {\n" + " %A = load i64, i64* %p, !range !{i64 32, i64 64}\n" + " %APlus512 = add i64 %A, 512\n" + " %c = icmp ugt i64 %APlus512, 523\n" + " call void @llvm.assume(i1 %c)\n" + " ret void\n" + "}\n" + "declare void @llvm.assume(i1)\n"); + AssumptionCache AC(*F); + KnownBits Known = computeKnownBits(A, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + EXPECT_EQ(Known.Zero.getZExtValue(), ~(64llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 32u); + Instruction &APlus512 = findInstructionByName(F, "APlus512"); + Known = computeKnownBits(&APlus512, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + EXPECT_EQ(Known.Zero.getZExtValue(), ~512llu & ~(64llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 512u | 32u); +} + +namespace { +class TestGEPTTIImpl : public TargetTransformInfoImplCRTPBase { + typedef TargetTransformInfoImplCRTPBase BaseT; + friend BaseT; + +public: + // Override the handling of GEPs to compute the full bits. + // By default the generic analysis doesn't spend a lot of time + // on these as the only interesting information for most + // optimizations is the alignment. + bool computeKnownBits(const TargetTransformInfo *TTI, const Value *V, + KnownBits &Known, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, OptimizationRemarkEmitter *ORE, + bool UseInstrInfo) const { + if (!isa(V)) + return false; + const Operator *I = cast(V); + unsigned BitWidth = Known.getBitWidth(); + + switch (I->getOpcode()) { + default: + return false; + case Instruction::GetElementPtr: { + // Analyze all of the subscripts of this getelementptr instruction + // to determine if we can prove known low zero bits. + KnownBits LocalKnown(BitWidth); + ::computeKnownBits(I->getOperand(0), LocalKnown, DL, Depth + 1, AC, CxtI, + DT, ORE, UseInstrInfo, TTI); + KnownBits AddrKnownBits(LocalKnown); + + unsigned TrailZ = LocalKnown.countMinTrailingZeros(); + + gep_type_iterator GTI = gep_type_begin(I); + // If the inbounds keyword is not present, the offsets are added to the + // base address with silently-wrapping two’s complement arithmetic. + bool IsInBounds = cast(I)->isInBounds(); + for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { + if (TrailZ == 0 && AddrKnownBits.isUnknown()) + break; + Value *Index = I->getOperand(i); + + unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); + KnownBits IndexBits(IndexBitWidth); + if (StructType *STy = GTI.getStructTypeOrNull()) { + // Handle struct member offset arithmetic. + + // Handle case when index is vector zeroinitializer + Constant *CIndex = cast(Index); + if (CIndex->isZeroValue()) + continue; + + if (CIndex->getType()->isVectorTy()) + Index = CIndex->getSplatValue(); + + unsigned Idx = cast(Index)->getZExtValue(); + const StructLayout *SL = DL.getStructLayout(STy); + uint64_t Offset = SL->getElementOffset(Idx); + if (!AddrKnownBits.isUnknown()) { + IndexBits.Zero = ~Offset; + IndexBits.One = Offset; + } + TrailZ = std::min(TrailZ, countTrailingZeros(Offset)); + } else { + // Handle array index arithmetic. + Type *IndexedTy = GTI.getIndexedType(); + if (!IndexedTy->isSized()) { + TrailZ = 0; + AddrKnownBits.resetAll(); + break; + } + ::computeKnownBits(Index, IndexBits, DL, Depth + 1, AC, CxtI, DT, ORE, + UseInstrInfo, TTI); + TypeSize IndexTypeSize = DL.getTypeAllocSize(IndexedTy); + uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinSize(); + if (IndexTypeSize.isScalable()) + AddrKnownBits.resetAll(); + if (!AddrKnownBits.isUnknown()) { + // Multiply by current sizeof type. + // &A[i] == A + i * sizeof(*A[i]). + KnownBits ScalingFactor(IndexBitWidth); + ScalingFactor.Zero = ~TypeSizeInBytes; + ScalingFactor.One = TypeSizeInBytes; + IndexBits = KnownBits::computeForMul(IndexBits, ScalingFactor); + } + TrailZ = + std::min(TrailZ, unsigned(countTrailingZeros(TypeSizeInBytes) + + IndexBits.countMinTrailingZeros())); + } + if (AddrKnownBits.isUnknown()) + continue; + + // If the offsets have a different width from the pointer, according + // to the language reference we need to sign-extend or truncate them + // to the width of the pointer. + IndexBits = IndexBits.sextOrTrunc(BitWidth); + + AddrKnownBits = KnownBits::computeForAddSub( + /*Add=*/true, + /*NSW=*/IsInBounds, AddrKnownBits, IndexBits); + } + if (!AddrKnownBits.isUnknown()) + Known = AddrKnownBits; + else + Known.Zero.setLowBits(TrailZ); + return true; + } + } + } + +public: + explicit TestGEPTTIImpl(const Function &F) + : BaseT(F.getParent()->getDataLayout()) {} +}; +} // End anonymous namespace. + +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsGEPWithRange) { + parseAssembly( + "define void @test(i64* %p) {\n" + " %A = load i64, i64* %p, !range !{i64 64, i64 65536}\n" + " %APtr = inttoptr i64 %A to float*" + " %APtrPlus512 = getelementptr float, float* %APtr, i32 128\n" + " %c = icmp ugt float* %APtrPlus512, inttoptr (i32 523 to float*)\n" + " call void @llvm.assume(i1 %c)\n" + " ret void\n" + "}\n" + "declare void @llvm.assume(i1)\n"); + AssumptionCache AC(*F); + TestGEPTTIImpl TTIImpl(*F); + TargetTransformInfo TTI(TTIImpl); + KnownBits Known = computeKnownBits( + A, M->getDataLayout(), /* Depth */ 0, &AC, F->front().getTerminator(), + /*DT=*/nullptr, /*ORE=*/nullptr, /*UseInstrInfo=*/true, &TTI); + EXPECT_EQ(Known.Zero.getZExtValue(), ~(65536llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 0u); + Instruction &APtrPlus512 = findInstructionByName(F, "APtrPlus512"); + Known = computeKnownBits(&APtrPlus512, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator(), /*DT=*/nullptr, + /*ORE=*/nullptr, /*UseInstrInfo=*/true, &TTI); + // We know of one less zero because 512 may have produced a 1 that + // got carried all the way to the first trailing zero. + EXPECT_EQ(Known.Zero.getZExtValue(), ~(65536llu - 1) << 1); + EXPECT_EQ(Known.One.getZExtValue(), 0u); +} + +// 4*128 + [32, 64) doesn't produce overlapping bits. +// Make sure we get all the individual bits properly. +// This test is useful to check that we account for the scaling factor +// in the gep. Indeed, gep float, [32,64), 128 is not 128 + [32,64). +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsGEPWithRangeNoOverlap) { + parseAssembly( + "define void @test(i64* %p) {\n" + " %A = load i64, i64* %p, !range !{i64 32, i64 64}\n" + " %APtr = inttoptr i64 %A to float*" + " %APtrPlus512 = getelementptr float, float* %APtr, i32 128\n" + " %c = icmp ugt float* %APtrPlus512, inttoptr (i32 523 to float*)\n" + " call void @llvm.assume(i1 %c)\n" + " ret void\n" + "}\n" + "declare void @llvm.assume(i1)\n"); + AssumptionCache AC(*F); + TestGEPTTIImpl TTIImpl(*F); + TargetTransformInfo TTI(TTIImpl); + KnownBits Known = computeKnownBits( + A, M->getDataLayout(), /* Depth */ 0, &AC, F->front().getTerminator(), + /*DT=*/nullptr, /*ORE=*/nullptr, /*UseInstrInfo=*/true, &TTI); + EXPECT_EQ(Known.Zero.getZExtValue(), ~(64llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 32u); + Instruction &APtrPlus512 = findInstructionByName(F, "APtrPlus512"); + Known = computeKnownBits(&APtrPlus512, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator(), + /*DT=*/nullptr, /*ORE=*/nullptr, + /*UseInstrInfo=*/true, &TTI); + // We know of one less zero because 512 may have produced a 1 that + // got carried all the way to the first trailing zero. + EXPECT_EQ(Known.Zero.getZExtValue(), ~512llu & ~(64llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 512u | 32u); +} + class IsBytewiseValueTest : public ValueTrackingTest, public ::testing::WithParamInterface< std::pair> {