diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -49,6 +49,7 @@ class Loop; class LoopInfo; class ProfileSummaryInfo; +class OptimizationRemarkEmitter; class SCEV; class ScalarEvolution; class StoreInst; @@ -1302,6 +1303,14 @@ /// @} + /// Improve \p Known for \p V with target specific known bits analysis. + /// \returns true if the target analyzed \p V, false otherwise. + bool computeKnownBits(Value *V, KnownBits &Known, const DataLayout &DL, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT, + OptimizationRemarkEmitter *ORE, + bool UseInstrInfo) const; + private: /// Estimate the latency of specified instruction. /// Returns 1 as the default value. @@ -1577,6 +1586,11 @@ virtual unsigned getGISelRematGlobalCost() const = 0; virtual bool hasActiveVectorLength() const = 0; virtual int getInstructionLatency(const Instruction *I) = 0; + virtual bool + computeKnownBits(const TargetTransformInfo *TTI, Value *V, KnownBits &Known, + const DataLayout &DL, unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT, + OptimizationRemarkEmitter *ORE, bool UseInstrInfo) const = 0; }; template @@ -2088,6 +2102,15 @@ int getInstructionLatency(const Instruction *I) override { return Impl.getInstructionLatency(I); } + + bool computeKnownBits(const TargetTransformInfo *TTI, Value *V, + KnownBits &Known, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, OptimizationRemarkEmitter *ORE, + bool UseInstrInfo) const override { + return Impl.computeKnownBits(TTI, V, Known, DL, Depth, AC, CxtI, DT, ORE, + UseInstrInfo); + } }; template diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1068,6 +1068,14 @@ return 1; } + + bool computeKnownBits(const TargetTransformInfo *TTI, Value *V, + KnownBits &Known, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, OptimizationRemarkEmitter *ORE, + bool UseInstrInfo) const { + return false; + } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -43,6 +43,7 @@ class OptimizationRemarkEmitter; class StringRef; class TargetLibraryInfo; +class TargetTransformInfo; class Value; /// Determine which bits of V are known to be either zero or one and return @@ -53,237 +54,241 @@ /// where V is a vector, the known zero and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. - void computeKnownBits(const Value *V, KnownBits &Known, - const DataLayout &DL, unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - OptimizationRemarkEmitter *ORE = nullptr, - bool UseInstrInfo = true); - - /// Determine which bits of V are known to be either zero or one and return - /// them in the KnownZero/KnownOne bit sets. - /// - /// This function is defined on values with integer type, values with pointer - /// type, and vectors of integers. In the case - /// where V is a vector, the known zero and known one values are the - /// same width as the vector element, and the bit is set only if it is true - /// for all of the demanded elements in the vector. - void computeKnownBits(const Value *V, const APInt &DemandedElts, - KnownBits &Known, const DataLayout &DL, - unsigned Depth = 0, AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - OptimizationRemarkEmitter *ORE = nullptr, - bool UseInstrInfo = true); - - /// Returns the known bits rather than passing by reference. - KnownBits computeKnownBits(const Value *V, const DataLayout &DL, - unsigned Depth = 0, AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - OptimizationRemarkEmitter *ORE = nullptr, - bool UseInstrInfo = true); - - /// Returns the known bits rather than passing by reference. - KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, - const DataLayout &DL, unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - OptimizationRemarkEmitter *ORE = nullptr, - bool UseInstrInfo = true); - - /// Compute known bits from the range metadata. - /// \p KnownZero the set of bits that are known to be zero - /// \p KnownOne the set of bits that are known to be one - void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, - KnownBits &Known); - - /// Return true if LHS and RHS have no common bits set. - bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - /// Return true if the given value is known to have exactly one bit set when - /// defined. For vectors return true if every element is known to be a power - /// of two when defined. Supports values with integer or pointer type and - /// vectors of integers. If 'OrZero' is set, then return true if the given - /// value is either a power of two or zero. - bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, - bool OrZero = false, unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI); - - /// Return true if the given value is known to be non-zero when defined. For - /// vectors, return true if every element is known to be non-zero when - /// defined. For pointers, if the context instruction and dominator tree are - /// specified, perform context-sensitive analysis and return true if the - /// pointer couldn't possibly be null at the specified instruction. - /// Supports values with integer or pointer type and vectors of integers. - bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0, - AssumptionCache *AC = nullptr, +void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - /// Return true if the two given values are negation. - /// Currently can recoginze Value pair: - /// 1: if X = sub (0, Y) or Y = sub (0, X) - /// 2: if X = sub (A, B) and Y = sub (B, A) - bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW = false); - - /// Returns true if the give value is known to be non-negative. - bool isKnownNonNegative(const Value *V, const DataLayout &DL, - unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - /// Returns true if the given value is known be positive (i.e. non-negative - /// and non-zero). - bool isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - /// Returns true if the given value is known be negative (i.e. non-positive - /// and non-zero). - bool isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); + OptimizationRemarkEmitter *ORE = nullptr, + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); + +/// Determine which bits of V are known to be either zero or one and return +/// them in the KnownZero/KnownOne bit sets. +/// +/// This function is defined on values with integer type, values with pointer +/// type, and vectors of integers. In the case +/// where V is a vector, the known zero and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the demanded elements in the vector. +void computeKnownBits(const Value *V, const APInt &DemandedElts, + KnownBits &Known, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + OptimizationRemarkEmitter *ORE = nullptr, + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); - /// Return true if the given values are known to be non-equal when defined. - /// Supports scalar integer types only. - bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); +/// Returns the known bits rather than passing by reference. +KnownBits computeKnownBits(const Value *V, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + OptimizationRemarkEmitter *ORE = nullptr, + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); - /// Return true if 'V & Mask' is known to be zero. We use this predicate to - /// simplify operations downstream. Mask is known to be zero for bits that V - /// cannot have. - /// - /// This function is defined on values with integer type, values with pointer - /// type, and vectors of integers. In the case - /// where V is a vector, the mask, known zero, and known one values are the - /// same width as the vector element, and the bit is set only if it is true - /// for all of the elements in the vector. - bool MaskedValueIsZero(const Value *V, const APInt &Mask, - const DataLayout &DL, - unsigned Depth = 0, AssumptionCache *AC = nullptr, +/// Returns the known bits rather than passing by reference. +KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, + const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + OptimizationRemarkEmitter *ORE = nullptr, + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); + +/// Compute known bits from the range metadata. +/// \p KnownZero the set of bits that are known to be zero +/// \p KnownOne the set of bits that are known to be one +void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, KnownBits &Known); + +/// Return true if LHS and RHS have no common bits set. +bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, + const DataLayout &DL, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - /// Return the number of times the sign bit of the register is replicated into - /// the other bits. We know that at least 1 bit is always equal to the sign - /// bit (itself), but other cases can give us information. For example, - /// immediately after an "ashr X, 2", we know that the top 3 bits are all - /// equal to each other, so we return 3. For vectors, return the number of - /// sign bits for the vector element with the mininum number of known sign - /// bits. - unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, - unsigned Depth = 0, AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - /// This function computes the integer multiple of Base that equals V. If - /// successful, it returns true and returns the multiple in Multiple. If - /// unsuccessful, it returns false. Also, if V can be simplified to an - /// integer, then the simplified V is returned in Val. Look through sext only - /// if LookThroughSExt=true. - bool ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, - bool LookThroughSExt = false, - unsigned Depth = 0); - - /// Map a call instruction to an intrinsic ID. Libcalls which have equivalent - /// intrinsics are treated as-if they were intrinsics. - Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, - const TargetLibraryInfo *TLI); - - /// Return true if we can prove that the specified FP value is never equal to - /// -0.0. - bool CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, - unsigned Depth = 0); - - /// Return true if we can prove that the specified FP value is either NaN or - /// never less than -0.0. - /// - /// NaN --> true - /// +0 --> true - /// -0 --> true - /// x > +0 --> true - /// x < -0 --> false - bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI); - - /// Return true if the floating-point scalar value is not an infinity or if - /// the floating-point vector value has no infinities. Return false if a value - /// could ever be infinity. - bool isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI, - unsigned Depth = 0); - - /// Return true if the floating-point scalar value is not a NaN or if the - /// floating-point vector value has no NaN elements. Return false if a value - /// could ever be NaN. - bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, - unsigned Depth = 0); - - /// Return true if we can prove that the specified FP value's sign bit is 0. - /// - /// NaN --> true/false (depending on the NaN's sign bit) - /// +0 --> true - /// -0 --> false - /// x > +0 --> true - /// x < -0 --> false - bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI); - - /// If the specified value can be set by repeating the same byte in memory, - /// return the i8 value that it is represented with. This is true for all i8 - /// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double - /// 0.0 etc. If the value can't be handled with a repeated byte store (e.g. - /// i16 0x1234), return null. If the value is entirely undef and padding, - /// return undef. - Value *isBytewiseValue(Value *V, const DataLayout &DL); - - /// Given an aggregate and an sequence of indices, see if the scalar value - /// indexed is already around as a register, for example if it were inserted - /// directly into the aggregate. - /// - /// If InsertBefore is not null, this function will duplicate (modified) - /// insertvalues when a part of a nested struct is extracted. - Value *FindInsertedValue(Value *V, - ArrayRef idx_range, - Instruction *InsertBefore = nullptr); - - /// Analyze the specified pointer to see if it can be expressed as a base - /// pointer plus a constant offset. Return the base and offset to the caller. - /// - /// This is a wrapper around Value::stripAndAccumulateConstantOffsets that - /// creates and later unpacks the required APInt. - inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout &DL, - bool AllowNonInbounds = true) { - APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); - Value *Base = - Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, AllowNonInbounds); - - Offset = OffsetAPInt.getSExtValue(); - return Base; - } + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); + +/// Return true if the given value is known to have exactly one bit set when +/// defined. For vectors return true if every element is known to be a power +/// of two when defined. Supports values with integer or pointer type and +/// vectors of integers. If 'OrZero' is set, then return true if the given +/// value is either a power of two or zero. +bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, + bool OrZero = false, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); + +bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI); + +/// Return true if the given value is known to be non-zero when defined. For +/// vectors, return true if every element is known to be non-zero when +/// defined. For pointers, if the context instruction and dominator tree are +/// specified, perform context-sensitive analysis and return true if the +/// pointer couldn't possibly be null at the specified instruction. +/// Supports values with integer or pointer type and vectors of integers. +bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); + +/// Return true if the two given values are negation. +/// Currently can recoginze Value pair: +/// 1: if X = sub (0, Y) or Y = sub (0, X) +/// 2: if X = sub (A, B) and Y = sub (B, A) +bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW = false); + +/// Returns true if the give value is known to be non-negative. +bool isKnownNonNegative(const Value *V, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); + +/// Returns true if the given value is known be positive (i.e. non-negative +/// and non-zero). +bool isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); + +/// Returns true if the given value is known be negative (i.e. non-positive +/// and non-zero). +bool isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + +/// Return true if the given values are known to be non-equal when defined. +/// Supports scalar integer types only. +bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); + +/// Return true if 'V & Mask' is known to be zero. We use this predicate to +/// simplify operations downstream. Mask is known to be zero for bits that V +/// cannot have. +/// +/// This function is defined on values with integer type, values with pointer +/// type, and vectors of integers. In the case +/// where V is a vector, the mask, known zero, and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the elements in the vector. +bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); + +/// Return the number of times the sign bit of the register is replicated into +/// the other bits. We know that at least 1 bit is always equal to the sign +/// bit (itself), but other cases can give us information. For example, +/// immediately after an "ashr X, 2", we know that the top 3 bits are all +/// equal to each other, so we return 3. For vectors, return the number of +/// sign bits for the vector element with the mininum number of known sign +/// bits. +unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr); + +/// This function computes the integer multiple of Base that equals V. If +/// successful, it returns true and returns the multiple in Multiple. If +/// unsuccessful, it returns false. Also, if V can be simplified to an +/// integer, then the simplified V is returned in Val. Look through sext only +/// if LookThroughSExt=true. +bool ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, + bool LookThroughSExt = false, unsigned Depth = 0); + +/// Map a call instruction to an intrinsic ID. Libcalls which have equivalent +/// intrinsics are treated as-if they were intrinsics. +Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, + const TargetLibraryInfo *TLI); + +/// Return true if we can prove that the specified FP value is never equal to +/// -0.0. +bool CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, + unsigned Depth = 0); + +/// Return true if we can prove that the specified FP value is either NaN or +/// never less than -0.0. +/// +/// NaN --> true +/// +0 --> true +/// -0 --> true +/// x > +0 --> true +/// x < -0 --> false +bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI); + +/// Return true if the floating-point scalar value is not an infinity or if +/// the floating-point vector value has no infinities. Return false if a value +/// could ever be infinity. +bool isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI, + unsigned Depth = 0); + +/// Return true if the floating-point scalar value is not a NaN or if the +/// floating-point vector value has no NaN elements. Return false if a value +/// could ever be NaN. +bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, + unsigned Depth = 0); + +/// Return true if we can prove that the specified FP value's sign bit is 0. +/// +/// NaN --> true/false (depending on the NaN's sign bit) +/// +0 --> true +/// -0 --> false +/// x > +0 --> true +/// x < -0 --> false +bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI); + +/// If the specified value can be set by repeating the same byte in memory, +/// return the i8 value that it is represented with. This is true for all i8 +/// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double +/// 0.0 etc. If the value can't be handled with a repeated byte store (e.g. +/// i16 0x1234), return null. If the value is entirely undef and padding, +/// return undef. +Value *isBytewiseValue(Value *V, const DataLayout &DL); + +/// Given an aggregate and an sequence of indices, see if the scalar value +/// indexed is already around as a register, for example if it were inserted +/// directly into the aggregate. +/// +/// If InsertBefore is not null, this function will duplicate (modified) +/// insertvalues when a part of a nested struct is extracted. +Value *FindInsertedValue(Value *V, ArrayRef idx_range, + Instruction *InsertBefore = nullptr); + +/// Analyze the specified pointer to see if it can be expressed as a base +/// pointer plus a constant offset. Return the base and offset to the caller. +/// +/// This is a wrapper around Value::stripAndAccumulateConstantOffsets that +/// creates and later unpacks the required APInt. +inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, + const DataLayout &DL, + bool AllowNonInbounds = true) { + APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Value *Base = + Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, AllowNonInbounds); + + Offset = OffsetAPInt.getSExtValue(); + return Base; +} inline const Value * GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset, const DataLayout &DL, @@ -489,47 +494,39 @@ NeverOverflows, }; - OverflowResult computeOverflowForUnsignedMul(const Value *LHS, - const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT, - bool UseInstrInfo = true); - OverflowResult computeOverflowForSignedMul(const Value *LHS, const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT, - bool UseInstrInfo = true); - OverflowResult computeOverflowForUnsignedAdd(const Value *LHS, - const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT, - bool UseInstrInfo = true); - OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + OverflowResult computeOverflowForUnsignedMul( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + bool UseInstrInfo = true, const TargetTransformInfo *TTI = nullptr); + OverflowResult computeOverflowForSignedMul( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + bool UseInstrInfo = true, const TargetTransformInfo *TTI = nullptr); + OverflowResult computeOverflowForUnsignedAdd( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + bool UseInstrInfo = true, const TargetTransformInfo *TTI = nullptr); + OverflowResult computeOverflowForSignedAdd( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + const TargetTransformInfo *TTI = nullptr); /// This version also leverages the sign bit of Add if known. - OverflowResult computeOverflowForSignedAdd(const AddOperator *Add, - const DataLayout &DL, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); - OverflowResult computeOverflowForUnsignedSub(const Value *LHS, const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT); - OverflowResult computeOverflowForSignedSub(const Value *LHS, const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT); + OverflowResult + computeOverflowForSignedAdd(const AddOperator *Add, const DataLayout &DL, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + const TargetTransformInfo *TTI = nullptr); + OverflowResult computeOverflowForUnsignedSub( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + const TargetTransformInfo *TTI = nullptr); + OverflowResult + computeOverflowForSignedSub(const Value *LHS, const Value *RHS, + const DataLayout &DL, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT, + const TargetTransformInfo *TTI = nullptr); /// Returns true if the arithmetic part of the \p WO 's result is /// used only along the paths control dependent on the computation diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1882,6 +1882,14 @@ unsigned getVectorSplitCost() { return 1; } /// @} + bool computeKnownBits(const TargetTransformInfo *TTI, Value *V, + KnownBits &Known, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, OptimizationRemarkEmitter *ORE, + bool UseInstrInfo) const { + return BaseT::computeKnownBits(TTI, V, Known, DL, Depth, AC, CxtI, DT, ORE, + UseInstrInfo); + } }; /// Concrete BasicTTIImpl that can be used if no further customization diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -166,6 +166,16 @@ return *this; } + /// Return known bits for a sign extension or truncation of the value we're + /// tracking. + KnownBits sextOrTrunc(unsigned BitWidth) const { + if (BitWidth > getBitWidth()) + return sext(BitWidth); + if (BitWidth < getBitWidth()) + return trunc(BitWidth); + return *this; + } + /// Return a KnownBits with the extracted bits /// [bitPosition,bitPosition+numBits). KnownBits extractBits(unsigned NumBits, unsigned BitPosition) const { @@ -241,6 +251,8 @@ static KnownBits computeForAddSub(bool Add, bool NSW, const KnownBits &LHS, KnownBits RHS); + static KnownBits computeForMul(const KnownBits &LHS, const KnownBits &RHS); + /// Update known bits based on ANDing with RHS. KnownBits &operator&=(const KnownBits &RHS); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1355,6 +1355,14 @@ } } +bool TargetTransformInfo::computeKnownBits( + Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + OptimizationRemarkEmitter *ORE, bool UseInstrInfo) const { + return TTIImpl->computeKnownBits(this, V, Known, DL, Depth, AC, CxtI, DT, ORE, + UseInstrInfo); +} + TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -32,6 +32,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -122,16 +123,20 @@ /// If true, it is safe to use metadata during simplification. InstrInfoQuery IIQ; + const TargetTransformInfo *TTI; + unsigned NumExcluded = 0; Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo, - OptimizationRemarkEmitter *ORE = nullptr) - : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {} + OptimizationRemarkEmitter *ORE = nullptr, + const TargetTransformInfo *TTI = nullptr) + : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo), + TTI(TTI) {} Query(const Query &Q, const Value *NewExcl) : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), IIQ(Q.IIQ), - NumExcluded(Q.NumExcluded) { + TTI(Q.TTI), NumExcluded(Q.NumExcluded) { Excluded = Q.Excluded; Excluded[NumExcluded++] = NewExcl; assert(NumExcluded <= Excluded.size()); @@ -223,18 +228,22 @@ const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - OptimizationRemarkEmitter *ORE, bool UseInstrInfo) { - ::computeKnownBits(V, Known, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); + OptimizationRemarkEmitter *ORE, bool UseInstrInfo, + const TargetTransformInfo *TTI) { + ::computeKnownBits( + V, Known, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, TTI)); } void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, KnownBits &Known, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - OptimizationRemarkEmitter *ORE, bool UseInstrInfo) { - ::computeKnownBits(V, DemandedElts, Known, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); + OptimizationRemarkEmitter *ORE, bool UseInstrInfo, + const TargetTransformInfo *TTI) { + ::computeKnownBits( + V, DemandedElts, Known, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, TTI)); } static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, @@ -243,14 +252,13 @@ static KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q); -KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, - unsigned Depth, AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT, - OptimizationRemarkEmitter *ORE, - bool UseInstrInfo) { +KnownBits +llvm::computeKnownBits(const Value *V, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, OptimizationRemarkEmitter *ORE, + bool UseInstrInfo, const TargetTransformInfo *TTI) { return ::computeKnownBits( - V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); + V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, TTI)); } KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, @@ -258,16 +266,18 @@ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, OptimizationRemarkEmitter *ORE, - bool UseInstrInfo) { + bool UseInstrInfo, + const TargetTransformInfo *TTI) { return ::computeKnownBits( V, DemandedElts, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, TTI)); } bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { + bool UseInstrInfo, + const TargetTransformInfo *TTI) { assert(LHS->getType() == RHS->getType() && "LHS and RHS should have the same type"); assert(LHS->getType()->isIntOrIntVectorTy() && @@ -283,8 +293,10 @@ IntegerType *IT = cast(LHS->getType()->getScalarType()); KnownBits LHSKnown(IT->getBitWidth()); KnownBits RHSKnown(IT->getBitWidth()); - computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo); - computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo); + computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo, + TTI); + computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo, + TTI); return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue(); } @@ -306,9 +318,11 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { + const DominatorTree *DT, bool UseInstrInfo, + const TargetTransformInfo *TTI) { return ::isKnownToBeAPowerOfTwo( - V, OrZero, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); + V, OrZero, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, /*ORE=*/nullptr, TTI)); } static bool isKnownNonZero(const Value *V, const APInt &DemandedElts, @@ -318,30 +332,34 @@ bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { - return ::isKnownNonZero(V, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); + const DominatorTree *DT, bool UseInstrInfo, + const TargetTransformInfo *TTI) { + return ::isKnownNonZero( + V, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, /*ORE=*/nullptr, TTI)); } bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { + bool UseInstrInfo, + const TargetTransformInfo *TTI) { KnownBits Known = - computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo); + computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo, TTI); return Known.isNonNegative(); } bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { + const DominatorTree *DT, bool UseInstrInfo, + const TargetTransformInfo *TTI) { if (auto *CI = dyn_cast(V)) return CI->getValue().isStrictlyPositive(); // TODO: We'd doing two recursive queries here. We should factor this such // that only a single query is needed. - return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, UseInstrInfo) && - isKnownNonZero(V, DL, Depth, AC, CxtI, DT, UseInstrInfo); + return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, UseInstrInfo, TTI) && + isKnownNonZero(V, DL, Depth, AC, CxtI, DT, UseInstrInfo, TTI); } bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth, @@ -357,10 +375,10 @@ bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { + bool UseInstrInfo, const TargetTransformInfo *TTI) { return ::isKnownNonEqual(V1, V2, Query(DL, AC, safeCxtI(V1, safeCxtI(V2, CxtI)), DT, - UseInstrInfo, /*ORE=*/nullptr)); + UseInstrInfo, /*ORE=*/nullptr, TTI)); } static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, @@ -369,9 +387,11 @@ bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { + const DominatorTree *DT, bool UseInstrInfo, + const TargetTransformInfo *TTI) { return ::MaskedValueIsZero( - V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); + V, Mask, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, /*ORE=*/nullptr, TTI)); } static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, @@ -393,9 +413,11 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { + const DominatorTree *DT, bool UseInstrInfo, + const TargetTransformInfo *TTI) { return ::ComputeNumSignBits( - V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); + V, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, /*ORE=*/nullptr, TTI)); } static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, @@ -417,7 +439,6 @@ const APInt &DemandedElts, KnownBits &Known, KnownBits &Known2, unsigned Depth, const Query &Q) { - unsigned BitWidth = Known.getBitWidth(); computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q); computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); @@ -435,7 +456,7 @@ bool isKnownNegativeOp0 = Known2.isNegative(); // The product of two numbers with the same sign is non-negative. isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || - (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); + (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); // The product of a negative number and a non-negative number is either // negative or zero. if (!isKnownNonNegative) @@ -446,78 +467,7 @@ } } - assert(!Known.hasConflict() && !Known2.hasConflict()); - // Compute a conservative estimate for high known-0 bits. - unsigned LeadZ = std::max(Known.countMinLeadingZeros() + - Known2.countMinLeadingZeros(), - BitWidth) - BitWidth; - LeadZ = std::min(LeadZ, BitWidth); - - // The result of the bottom bits of an integer multiply can be - // inferred by looking at the bottom bits of both operands and - // multiplying them together. - // We can infer at least the minimum number of known trailing bits - // of both operands. Depending on number of trailing zeros, we can - // infer more bits, because (a*b) <=> ((a/m) * (b/n)) * (m*n) assuming - // a and b are divisible by m and n respectively. - // We then calculate how many of those bits are inferrable and set - // the output. For example, the i8 mul: - // a = XXXX1100 (12) - // b = XXXX1110 (14) - // We know the bottom 3 bits are zero since the first can be divided by - // 4 and the second by 2, thus having ((12/4) * (14/2)) * (2*4). - // Applying the multiplication to the trimmed arguments gets: - // XX11 (3) - // X111 (7) - // ------- - // XX11 - // XX11 - // XX11 - // XX11 - // ------- - // XXXXX01 - // Which allows us to infer the 2 LSBs. Since we're multiplying the result - // by 8, the bottom 3 bits will be 0, so we can infer a total of 5 bits. - // The proof for this can be described as: - // Pre: (C1 >= 0) && (C1 < (1 << C5)) && (C2 >= 0) && (C2 < (1 << C6)) && - // (C7 == (1 << (umin(countTrailingZeros(C1), C5) + - // umin(countTrailingZeros(C2), C6) + - // umin(C5 - umin(countTrailingZeros(C1), C5), - // C6 - umin(countTrailingZeros(C2), C6)))) - 1) - // %aa = shl i8 %a, C5 - // %bb = shl i8 %b, C6 - // %aaa = or i8 %aa, C1 - // %bbb = or i8 %bb, C2 - // %mul = mul i8 %aaa, %bbb - // %mask = and i8 %mul, C7 - // => - // %mask = i8 ((C1*C2)&C7) - // Where C5, C6 describe the known bits of %a, %b - // C1, C2 describe the known bottom bits of %a, %b. - // C7 describes the mask of the known bits of the result. - APInt Bottom0 = Known.One; - APInt Bottom1 = Known2.One; - - // How many times we'd be able to divide each argument by 2 (shr by 1). - // This gives us the number of trailing zeros on the multiplication result. - unsigned TrailBitsKnown0 = (Known.Zero | Known.One).countTrailingOnes(); - unsigned TrailBitsKnown1 = (Known2.Zero | Known2.One).countTrailingOnes(); - unsigned TrailZero0 = Known.countMinTrailingZeros(); - unsigned TrailZero1 = Known2.countMinTrailingZeros(); - unsigned TrailZ = TrailZero0 + TrailZero1; - - // Figure out the fewest known-bits operand. - unsigned SmallestOperand = std::min(TrailBitsKnown0 - TrailZero0, - TrailBitsKnown1 - TrailZero1); - unsigned ResultBitsKnown = std::min(SmallestOperand + TrailZ, BitWidth); - - APInt BottomKnown = Bottom0.getLoBits(TrailBitsKnown0) * - Bottom1.getLoBits(TrailBitsKnown1); - - Known.resetAll(); - Known.Zero.setHighBits(LeadZ); - Known.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown); - Known.One |= BottomKnown.getLoBits(ResultBitsKnown); + Known = KnownBits::computeForMul(Known, Known2); // Only make use of no-wrap flags if we failed to compute the sign bit // directly. This matters if the multiplication always overflows, in @@ -1452,15 +1402,19 @@ // to determine if we can prove known low zero bits. KnownBits LocalKnown(BitWidth); computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q); + unsigned TrailZ = LocalKnown.countMinTrailingZeros(); gep_type_iterator GTI = gep_type_begin(I); + // If the inbounds keyword is not present, the offsets are added to the base + // address with silently-wrapping two’s complement arithmetic. + bool IsInBounds = cast(I)->isInBounds(); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { - // TrailZ can only become smaller, short-circuit if we hit zero. if (TrailZ == 0) break; - Value *Index = I->getOperand(i); + + unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); if (StructType *STy = GTI.getStructTypeOrNull()) { // Handle struct member offset arithmetic. @@ -1488,12 +1442,10 @@ uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy).getKnownMinSize(); LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0); computeKnownBits(Index, LocalKnown, Depth + 1, Q); - TrailZ = std::min(TrailZ, - unsigned(countTrailingZeros(TypeSize) + - LocalKnown.countMinTrailingZeros())); + TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + + LocalKnown.countMinTrailingZeros())); } } - Known.Zero.setLowBits(TrailZ); break; } @@ -2009,6 +1961,10 @@ if (Depth == MaxDepth) return; + if (Q.TTI && + Q.TTI->computeKnownBits(const_cast(V), Known, Q.DL, Depth + 1, + Q.AC, Q.CxtI, Q.DT, Q.ORE, true)) + return; // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has // the bits of its aliasee. if (const GlobalAlias *GA = dyn_cast(V)) { @@ -4519,9 +4475,10 @@ static ConstantRange computeConstantRangeIncludingKnownBits( const Value *V, bool ForSigned, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) { - KnownBits Known = computeKnownBits( - V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo); + OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true, + const TargetTransformInfo *TTI = nullptr) { + KnownBits Known = + computeKnownBits(V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo, TTI); ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned); ConstantRange CR2 = computeConstantRange(V, UseInstrInfo); ConstantRange::PreferredRangeType RangeType = @@ -4532,21 +4489,20 @@ OverflowResult llvm::computeOverflowForUnsignedMul( const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { + bool UseInstrInfo, const TargetTransformInfo *TTI) { KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); + nullptr, UseInstrInfo, TTI); KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); + nullptr, UseInstrInfo, TTI); ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false); ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false); return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange)); } -OverflowResult -llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS, - const DataLayout &DL, AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { +OverflowResult llvm::computeOverflowForSignedMul( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + bool UseInstrInfo, const TargetTransformInfo *TTI) { // Multiplying n * m significant bits yields a result of n + m significant // bits. If the total number of significant bits does not exceed the // result bit width (minus 1), there is no overflow. @@ -4557,8 +4513,8 @@ // Note that underestimating the number of sign bits gives a more // conservative answer. - unsigned SignBits = ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) + - ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT); + unsigned SignBits = ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT, TTI) + + ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT, TTI); // First handle the easy case: if we have enough sign bits there's // definitely no overflow. @@ -4576,9 +4532,9 @@ // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 // For simplicity we just check if at least one side is not negative. KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); + nullptr, UseInstrInfo, TTI); KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); + nullptr, UseInstrInfo, TTI); if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) return OverflowResult::NeverOverflows; } @@ -4588,23 +4544,20 @@ OverflowResult llvm::computeOverflowForUnsignedAdd( const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { + bool UseInstrInfo, const TargetTransformInfo *TTI) { ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( - LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); + LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, nullptr, + UseInstrInfo, TTI); ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( - RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); + RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, nullptr, + UseInstrInfo, TTI); return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange)); } -static OverflowResult computeOverflowForSignedAdd(const Value *LHS, - const Value *RHS, - const AddOperator *Add, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { +static OverflowResult computeOverflowForSignedAdd( + const Value *LHS, const Value *RHS, const AddOperator *Add, + const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, const TargetTransformInfo *TTI) { if (Add && Add->hasNoSignedWrap()) { return OverflowResult::NeverOverflows; } @@ -4623,14 +4576,16 @@ // // Since the carry into the most significant position is always equal to // the carry out of the addition, there is no signed overflow. - if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 && - ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1) + if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT, TTI) > 1 && + ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT, TTI) > 1) return OverflowResult::NeverOverflows; ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( - LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); + LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT, /*ORE=*/nullptr, + TTI); ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( - RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); + RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT, /*ORE=*/nullptr, + TTI); OverflowResult OR = mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange)); if (OR != OverflowResult::MayOverflow) @@ -4652,7 +4607,8 @@ if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { KnownBits AddKnown(LHSRange.getBitWidth()); computeKnownBitsFromAssume( - Add, AddKnown, /*Depth=*/0, Query(DL, AC, CxtI, DT, true)); + Add, AddKnown, /*Depth=*/0, + Query(DL, AC, CxtI, DT, true, /*ORE=*/nullptr, TTI)); if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) || (AddKnown.isNegative() && LHSOrRHSKnownNegative)) return OverflowResult::NeverOverflows; @@ -4661,12 +4617,10 @@ return OverflowResult::MayOverflow; } -OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS, - const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { +OverflowResult llvm::computeOverflowForUnsignedSub( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + const TargetTransformInfo *TTI) { // Checking for conditions implied by dominating conditions may be expensive. // Limit it to usub_with_overflow calls for now. if (match(CxtI, @@ -4678,18 +4632,18 @@ return OverflowResult::AlwaysOverflowsLow; } ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( - LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT); + LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, /*ORE=*/nullptr, + TTI); ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( - RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT); + RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, /*ORE=*/nullptr, + TTI); return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange)); } -OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS, - const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { +OverflowResult llvm::computeOverflowForSignedSub( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + const TargetTransformInfo *TTI) { // If LHS and RHS each have at least two sign bits, the subtraction // cannot overflow. if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 && @@ -4697,9 +4651,11 @@ return OverflowResult::NeverOverflows; ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( - LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); + LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT, /*ORE=*/nullptr, + TTI); ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( - RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); + RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT, /*ORE=*/nullptr, + TTI); return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange)); } @@ -4955,22 +4911,21 @@ return false; } -OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { +OverflowResult +llvm::computeOverflowForSignedAdd(const AddOperator *Add, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, + const TargetTransformInfo *TTI) { return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), - Add, DL, AC, CxtI, DT); + Add, DL, AC, CxtI, DT, TTI); } -OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS, - const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { - return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT); +OverflowResult llvm::computeOverflowForSignedAdd( + const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + const TargetTransformInfo *TTI) { + return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT, + TTI); } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -83,6 +83,85 @@ return KnownOut; } +KnownBits KnownBits::computeForMul(const KnownBits &LHS, const KnownBits &RHS) { + unsigned BitWidth = LHS.getBitWidth(); + + assert(!LHS.hasConflict() && !RHS.hasConflict()); + // Compute a conservative estimate for high known-0 bits. + unsigned LeadZ = + std::max(LHS.countMinLeadingZeros() + RHS.countMinLeadingZeros(), + BitWidth) - + BitWidth; + LeadZ = std::min(LeadZ, BitWidth); + + // The result of the bottom bits of an integer multiply can be + // inferred by looking at the bottom bits of both operands and + // multiplying them together. + // We can infer at least the minimum number of known trailing bits + // of both operands. Depending on number of trailing zeros, we can + // infer more bits, because (a*b) <=> ((a/m) * (b/n)) * (m*n) assuming + // a and b are divisible by m and n respectively. + // We then calculate how many of those bits are inferrable and set + // the output. For example, the i8 mul: + // a = XXXX1100 (12) + // b = XXXX1110 (14) + // We know the bottom 3 bits are zero since the first can be divided by + // 4 and the second by 2, thus having ((12/4) * (14/2)) * (2*4). + // Applying the multiplication to the trimmed arguments gets: + // XX11 (3) + // X111 (7) + // ------- + // XX11 + // XX11 + // XX11 + // XX11 + // ------- + // XXXXX01 + // Which allows us to infer the 2 LSBs. Since we're multiplying the result + // by 8, the bottom 3 bits will be 0, so we can infer a total of 5 bits. + // The proof for this can be described as: + // Pre: (C1 >= 0) && (C1 < (1 << C5)) && (C2 >= 0) && (C2 < (1 << C6)) && + // (C7 == (1 << (umin(countTrailingZeros(C1), C5) + + // umin(countTrailingZeros(C2), C6) + + // umin(C5 - umin(countTrailingZeros(C1), C5), + // C6 - umin(countTrailingZeros(C2), C6)))) - 1) + // %aa = shl i8 %a, C5 + // %bb = shl i8 %b, C6 + // %aaa = or i8 %aa, C1 + // %bbb = or i8 %bb, C2 + // %mul = mul i8 %aaa, %bbb + // %mask = and i8 %mul, C7 + // => + // %mask = i8 ((C1*C2)&C7) + // Where C5, C6 describe the known bits of %a, %b + // C1, C2 describe the known bottom bits of %a, %b. + // C7 describes the mask of the known bits of the result. + APInt Bottom0 = LHS.One; + APInt Bottom1 = RHS.One; + + // How many times we'd be able to divide each argument by 2 (shr by 1). + // This gives us the number of trailing zeros on the multiplication result. + unsigned TrailBitsKnown0 = (LHS.Zero | LHS.One).countTrailingOnes(); + unsigned TrailBitsKnown1 = (RHS.Zero | RHS.One).countTrailingOnes(); + unsigned TrailZero0 = LHS.countMinTrailingZeros(); + unsigned TrailZero1 = RHS.countMinTrailingZeros(); + unsigned TrailZ = TrailZero0 + TrailZero1; + + // Figure out the fewest known-bits operand. + unsigned SmallestOperand = + std::min(TrailBitsKnown0 - TrailZero0, TrailBitsKnown1 - TrailZero1); + unsigned ResultBitsKnown = std::min(SmallestOperand + TrailZ, BitWidth); + + APInt BottomKnown = + Bottom0.getLoBits(TrailBitsKnown0) * Bottom1.getLoBits(TrailBitsKnown1); + + KnownBits Res(BitWidth); + Res.Zero.setHighBits(LeadZ); + Res.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown); + Res.One = BottomKnown.getLoBits(ResultBitsKnown); + return Res; +} + KnownBits &KnownBits::operator&=(const KnownBits &RHS) { // Result bit is 0 if either operand bit is 0. Zero |= RHS.Zero; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -956,7 +956,7 @@ return nullptr; // TODO: Do we need to thread more context in here? - KnownBits Known = computeKnownBits(MaskOp, DL, 0, nullptr, II); + KnownBits Known = ::computeKnownBits(MaskOp, DL, 0, nullptr, II); if (Known.countMinLeadingOnes() < 32) return nullptr; diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -8,7 +8,9 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/AsmParser/Parser.h" +#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" @@ -18,6 +20,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Target/TargetMachine.h" #include "gtest/gtest.h" using namespace llvm; @@ -1013,6 +1016,222 @@ EXPECT_EQ(Known.One.getZExtValue(), 0u); } +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsAddWithRange) { + parseAssembly("define void @test(i64* %p) {\n" + " %A = load i64, i64* %p, !range !{i64 64, i64 65536}\n" + " %APlus512 = add i64 %A, 512\n" + " %c = icmp ugt i64 %APlus512, 523\n" + " call void @llvm.assume(i1 %c)\n" + " ret void\n" + "}\n" + "declare void @llvm.assume(i1)\n"); + AssumptionCache AC(*F); + KnownBits Known = computeKnownBits(A, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + EXPECT_EQ(Known.Zero.getZExtValue(), ~(65536llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 0u); + Instruction &APlus512 = findInstructionByName(F, "APlus512"); + Known = computeKnownBits(&APlus512, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + // We know of one less zero because 512 may have produced a 1 that + // got carried all the way to the first trailing zero. + EXPECT_EQ(Known.Zero.getZExtValue(), (~(65536llu - 1)) << 1); + EXPECT_EQ(Known.One.getZExtValue(), 0u); +} + +// 512 + [32, 64) doesn't produce overlapping bits. +// Make sure we get all the individual bits properly. +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsAddWithRangeNoOverlap) { + parseAssembly("define void @test(i64* %p) {\n" + " %A = load i64, i64* %p, !range !{i64 32, i64 64}\n" + " %APlus512 = add i64 %A, 512\n" + " %c = icmp ugt i64 %APlus512, 523\n" + " call void @llvm.assume(i1 %c)\n" + " ret void\n" + "}\n" + "declare void @llvm.assume(i1)\n"); + AssumptionCache AC(*F); + KnownBits Known = computeKnownBits(A, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + EXPECT_EQ(Known.Zero.getZExtValue(), ~(64llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 32u); + Instruction &APlus512 = findInstructionByName(F, "APlus512"); + Known = computeKnownBits(&APlus512, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + EXPECT_EQ(Known.Zero.getZExtValue(), ~512llu & ~(64llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 512u | 32u); +} + +namespace { +class TestGEPTTIImpl : public TargetTransformInfoImplCRTPBase { + typedef TargetTransformInfoImplCRTPBase BaseT; + friend BaseT; + +public: + bool computeKnownBits(const TargetTransformInfo *TTI, Value *V, + KnownBits &Known, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, OptimizationRemarkEmitter *ORE, + bool UseInstrInfo) const { + if (!isa(V)) + return false; + Operator *I = cast(V); + unsigned BitWidth = Known.getBitWidth(); + + switch (I->getOpcode()) { + default: + return false; + case Instruction::GetElementPtr: { + // Analyze all of the subscripts of this getelementptr instruction + // to determine if we can prove known low zero bits. + KnownBits LocalKnown(BitWidth); + ::computeKnownBits(I->getOperand(0), LocalKnown, DL, Depth + 1, AC, CxtI, + DT, ORE, UseInstrInfo, TTI); + KnownBits AddrKnownBits(LocalKnown); + + unsigned TrailZ = LocalKnown.countMinTrailingZeros(); + + gep_type_iterator GTI = gep_type_begin(I); + // If the inbounds keyword is not present, the offsets are added to the + // base address with silently-wrapping two’s complement arithmetic. + bool IsInBounds = cast(I)->isInBounds(); + for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { + if (TrailZ == 0 && AddrKnownBits.isUnknown()) + break; + Value *Index = I->getOperand(i); + + unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); + KnownBits IndexBits(IndexBitWidth); + if (StructType *STy = GTI.getStructTypeOrNull()) { + // Handle struct member offset arithmetic. + + // Handle case when index is vector zeroinitializer + Constant *CIndex = cast(Index); + if (CIndex->isZeroValue()) + continue; + + if (CIndex->getType()->isVectorTy()) + Index = CIndex->getSplatValue(); + + unsigned Idx = cast(Index)->getZExtValue(); + const StructLayout *SL = DL.getStructLayout(STy); + uint64_t Offset = SL->getElementOffset(Idx); + if (!AddrKnownBits.isUnknown()) { + IndexBits.Zero = ~Offset; + IndexBits.One = Offset; + } + TrailZ = std::min(TrailZ, countTrailingZeros(Offset)); + } else { + // Handle array index arithmetic. + Type *IndexedTy = GTI.getIndexedType(); + if (!IndexedTy->isSized()) { + TrailZ = 0; + AddrKnownBits.resetAll(); + break; + } + ::computeKnownBits(Index, IndexBits, DL, Depth + 1, AC, CxtI, DT, ORE, + UseInstrInfo, TTI); + TypeSize IndexTypeSize = DL.getTypeAllocSize(IndexedTy); + uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinSize(); + if (IndexTypeSize.isScalable()) + AddrKnownBits.resetAll(); + if (!AddrKnownBits.isUnknown()) { + // Multiply by current sizeof type. + // &A[i] == A + i * sizeof(*A[i]). + KnownBits ScalingFactor(IndexBitWidth); + ScalingFactor.Zero = ~TypeSizeInBytes; + ScalingFactor.One = TypeSizeInBytes; + IndexBits = KnownBits::computeForMul(IndexBits, ScalingFactor); + } + TrailZ = + std::min(TrailZ, unsigned(countTrailingZeros(TypeSizeInBytes) + + IndexBits.countMinTrailingZeros())); + } + if (AddrKnownBits.isUnknown()) + continue; + + // If the offsets have a different width from the pointer, according + // to the language reference we need to sign-extend or truncate them + // to the width of the pointer. + IndexBits = IndexBits.sextOrTrunc(BitWidth); + + AddrKnownBits = KnownBits::computeForAddSub( + /*Add=*/true, + /*NSW=*/IsInBounds, AddrKnownBits, IndexBits); + } + if (!AddrKnownBits.isUnknown()) + Known = AddrKnownBits; + else + Known.Zero.setLowBits(TrailZ); + return true; + } + } + } + +public: + explicit TestGEPTTIImpl(const Function &F) + : BaseT(F.getParent()->getDataLayout()) {} +}; +} // End anonymous namespace. + +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsGEPWithRange) { + parseAssembly( + "define void @test(i64* %p) {\n" + " %A = load i64, i64* %p, !range !{i64 64, i64 65536}\n" + " %APtr = inttoptr i64 %A to float*" + " %APtrPlus512 = getelementptr float, float* %APtr, i32 128\n" + " %c = icmp ugt float* %APtrPlus512, inttoptr (i32 523 to float*)\n" + " call void @llvm.assume(i1 %c)\n" + " ret void\n" + "}\n" + "declare void @llvm.assume(i1)\n"); + AssumptionCache AC(*F); + TestGEPTTIImpl TTIImpl(*F); + TargetTransformInfo TTI(TTIImpl); + KnownBits Known = computeKnownBits( + A, M->getDataLayout(), /* Depth */ 0, &AC, F->front().getTerminator(), + /*DT=*/nullptr, /*ORE=*/nullptr, /*UseInstrInfo=*/true, &TTI); + EXPECT_EQ(Known.Zero.getZExtValue(), ~(65536llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 0u); + Instruction &APtrPlus512 = findInstructionByName(F, "APtrPlus512"); + Known = computeKnownBits(&APtrPlus512, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator(), /*DT=*/nullptr, + /*ORE=*/nullptr, /*UseInstrInfo=*/true, &TTI); + // We know of one less zero because 512 may have produced a 1 that + // got carried all the way to the first trailing zero. + EXPECT_EQ(Known.Zero.getZExtValue(), ~(65536llu - 1) << 1); + EXPECT_EQ(Known.One.getZExtValue(), 0u); +} + +// 4*128 + [32, 64) doesn't produce overlapping bits. +// Make sure we get all the individual bits properly. +// This test is useful to check that we account for the scaling factor +// in the gep. Indeed, gep float, [32,64), 128 is not 128 + [32,64). +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsGEPWithRangeNoOverlap) { + parseAssembly( + "define void @test(i64* %p) {\n" + " %A = load i64, i64* %p, !range !{i64 32, i64 64}\n" + " %APtr = inttoptr i64 %A to float*" + " %APtrPlus512 = getelementptr float, float* %APtr, i32 128\n" + " %c = icmp ugt float* %APtrPlus512, inttoptr (i32 523 to float*)\n" + " call void @llvm.assume(i1 %c)\n" + " ret void\n" + "}\n" + "declare void @llvm.assume(i1)\n"); + AssumptionCache AC(*F); + KnownBits Known = computeKnownBits(A, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + EXPECT_EQ(Known.Zero.getZExtValue(), ~(64llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 32u); + Instruction &APtrPlus512 = findInstructionByName(F, "APtrPlus512"); + Known = computeKnownBits(&APtrPlus512, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + // We know of one less zero because 512 may have produced a 1 that + // got carried all the way to the first trailing zero. + EXPECT_EQ(Known.Zero.getZExtValue(), ~512llu & ~(64llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 512u | 32u); +} + class IsBytewiseValueTest : public ValueTrackingTest, public ::testing::WithParamInterface< std::pair> {