Index: llvm/include/llvm/Analysis/ValueTracking.h =================================================================== --- llvm/include/llvm/Analysis/ValueTracking.h +++ llvm/include/llvm/Analysis/ValueTracking.h @@ -15,6 +15,7 @@ #define LLVM_ANALYSIS_VALUETRACKING_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/Constants.h" @@ -54,237 +55,235 @@ /// where V is a vector, the known zero and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. - void computeKnownBits(const Value *V, KnownBits &Known, - const DataLayout &DL, unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - OptimizationRemarkEmitter *ORE = nullptr, - bool UseInstrInfo = true); +void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + OptimizationRemarkEmitter *ORE = nullptr, + bool UseInstrInfo = true, + DenseMap *KnownValues = nullptr); + +/// Determine which bits of V are known to be either zero or one and return +/// them in the KnownZero/KnownOne bit sets. +/// +/// This function is defined on values with integer type, values with pointer +/// type, and vectors of integers. In the case +/// where V is a vector, the known zero and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the demanded elements in the vector. +void computeKnownBits(const Value *V, const APInt &DemandedElts, + KnownBits &Known, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + OptimizationRemarkEmitter *ORE = nullptr, + bool UseInstrInfo = true, + DenseMap *KnownValues = nullptr); - /// Determine which bits of V are known to be either zero or one and return - /// them in the KnownZero/KnownOne bit sets. - /// - /// This function is defined on values with integer type, values with pointer - /// type, and vectors of integers. In the case - /// where V is a vector, the known zero and known one values are the - /// same width as the vector element, and the bit is set only if it is true - /// for all of the demanded elements in the vector. - void computeKnownBits(const Value *V, const APInt &DemandedElts, - KnownBits &Known, const DataLayout &DL, - unsigned Depth = 0, AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - OptimizationRemarkEmitter *ORE = nullptr, - bool UseInstrInfo = true); +/// Returns the known bits rather than passing by reference. +KnownBits computeKnownBits(const Value *V, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + OptimizationRemarkEmitter *ORE = nullptr, + bool UseInstrInfo = true, + DenseMap *KnownValues = nullptr); - /// Returns the known bits rather than passing by reference. - KnownBits computeKnownBits(const Value *V, const DataLayout &DL, - unsigned Depth = 0, AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - OptimizationRemarkEmitter *ORE = nullptr, - bool UseInstrInfo = true); - - /// Returns the known bits rather than passing by reference. - KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, - const DataLayout &DL, unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - OptimizationRemarkEmitter *ORE = nullptr, - bool UseInstrInfo = true); - - /// Compute known bits from the range metadata. - /// \p KnownZero the set of bits that are known to be zero - /// \p KnownOne the set of bits that are known to be one - void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, - KnownBits &Known); - - /// Return true if LHS and RHS have no common bits set. - bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, - const DataLayout &DL, +/// Returns the known bits rather than passing by reference. +KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, + const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - /// Return true if the given value is known to have exactly one bit set when - /// defined. For vectors return true if every element is known to be a power - /// of two when defined. Supports values with integer or pointer type and - /// vectors of integers. If 'OrZero' is set, then return true if the given - /// value is either a power of two or zero. - bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, - bool OrZero = false, unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI); - - /// Return true if the given value is known to be non-zero when defined. For - /// vectors, return true if every element is known to be non-zero when - /// defined. For pointers, if the context instruction and dominator tree are - /// specified, perform context-sensitive analysis and return true if the - /// pointer couldn't possibly be null at the specified instruction. - /// Supports values with integer or pointer type and vectors of integers. - bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - /// Return true if the two given values are negation. - /// Currently can recoginze Value pair: - /// 1: if X = sub (0, Y) or Y = sub (0, X) - /// 2: if X = sub (A, B) and Y = sub (B, A) - bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW = false); - - /// Returns true if the give value is known to be non-negative. - bool isKnownNonNegative(const Value *V, const DataLayout &DL, - unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - /// Returns true if the given value is known be positive (i.e. non-negative - /// and non-zero). - bool isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - /// Returns true if the given value is known be negative (i.e. non-positive - /// and non-zero). - bool isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - /// Return true if the given values are known to be non-equal when defined. - /// Supports scalar integer types only. - bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - /// Return true if 'V & Mask' is known to be zero. We use this predicate to - /// simplify operations downstream. Mask is known to be zero for bits that V - /// cannot have. - /// - /// This function is defined on values with integer type, values with pointer - /// type, and vectors of integers. In the case - /// where V is a vector, the mask, known zero, and known one values are the - /// same width as the vector element, and the bit is set only if it is true - /// for all of the elements in the vector. - bool MaskedValueIsZero(const Value *V, const APInt &Mask, - const DataLayout &DL, - unsigned Depth = 0, AssumptionCache *AC = nullptr, + OptimizationRemarkEmitter *ORE = nullptr, + bool UseInstrInfo = true, + DenseMap *KnownValues = nullptr); + +/// Compute known bits from the range metadata. +/// \p KnownZero the set of bits that are known to be zero +/// \p KnownOne the set of bits that are known to be one +void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, KnownBits &Known); + +/// Return true if LHS and RHS have no common bits set. +bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, + const DataLayout &DL, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, bool UseInstrInfo = true); - /// Return the number of times the sign bit of the register is replicated into - /// the other bits. We know that at least 1 bit is always equal to the sign - /// bit (itself), but other cases can give us information. For example, - /// immediately after an "ashr X, 2", we know that the top 3 bits are all - /// equal to each other, so we return 3. For vectors, return the number of - /// sign bits for the vector element with the mininum number of known sign - /// bits. - unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, - unsigned Depth = 0, AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr, - bool UseInstrInfo = true); - - /// Get the upper bound on bit size for this Value \p Op as a signed integer. - /// i.e. x == sext(trunc(x to MaxSignificantBits) to bitwidth(x)). - /// Similar to the APInt::getSignificantBits function. - unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, - unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); - - /// Map a call instruction to an intrinsic ID. Libcalls which have equivalent - /// intrinsics are treated as-if they were intrinsics. - Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, - const TargetLibraryInfo *TLI); +/// Return true if the given value is known to have exactly one bit set when +/// defined. For vectors return true if every element is known to be a power +/// of two when defined. Supports values with integer or pointer type and +/// vectors of integers. If 'OrZero' is set, then return true if the given +/// value is either a power of two or zero. +bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, + bool OrZero = false, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + +bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI); + +/// Return true if the given value is known to be non-zero when defined. For +/// vectors, return true if every element is known to be non-zero when +/// defined. For pointers, if the context instruction and dominator tree are +/// specified, perform context-sensitive analysis and return true if the +/// pointer couldn't possibly be null at the specified instruction. +/// Supports values with integer or pointer type and vectors of integers. +bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + +/// Return true if the two given values are negation. +/// Currently can recoginze Value pair: +/// 1: if X = sub (0, Y) or Y = sub (0, X) +/// 2: if X = sub (A, B) and Y = sub (B, A) +bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW = false); + +/// Returns true if the give value is known to be non-negative. +bool isKnownNonNegative(const Value *V, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); - /// Return true if we can prove that the specified FP value is never equal to - /// -0.0. - bool CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, - unsigned Depth = 0); +/// Returns true if the given value is known be positive (i.e. non-negative +/// and non-zero). +bool isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + +/// Returns true if the given value is known be negative (i.e. non-positive +/// and non-zero). +bool isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + +/// Return true if the given values are known to be non-equal when defined. +/// Supports scalar integer types only. +bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + +/// Return true if 'V & Mask' is known to be zero. We use this predicate to +/// simplify operations downstream. Mask is known to be zero for bits that V +/// cannot have. +/// +/// This function is defined on values with integer type, values with pointer +/// type, and vectors of integers. In the case +/// where V is a vector, the mask, known zero, and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the elements in the vector. +bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); - /// Return true if we can prove that the specified FP value is either NaN or - /// never less than -0.0. - /// - /// NaN --> true - /// +0 --> true - /// -0 --> true - /// x > +0 --> true - /// x < -0 --> false - bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI); - - /// Return true if the floating-point scalar value is not an infinity or if - /// the floating-point vector value has no infinities. Return false if a value - /// could ever be infinity. - bool isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI, - unsigned Depth = 0); - - /// Return true if the floating-point scalar value is not a NaN or if the - /// floating-point vector value has no NaN elements. Return false if a value - /// could ever be NaN. - bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, - unsigned Depth = 0); - - /// Return true if we can prove that the specified FP value's sign bit is 0. - /// - /// NaN --> true/false (depending on the NaN's sign bit) - /// +0 --> true - /// -0 --> false - /// x > +0 --> true - /// x < -0 --> false - bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI); - - /// If the specified value can be set by repeating the same byte in memory, - /// return the i8 value that it is represented with. This is true for all i8 - /// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double - /// 0.0 etc. If the value can't be handled with a repeated byte store (e.g. - /// i16 0x1234), return null. If the value is entirely undef and padding, - /// return undef. - Value *isBytewiseValue(Value *V, const DataLayout &DL); - - /// Given an aggregate and an sequence of indices, see if the scalar value - /// indexed is already around as a register, for example if it were inserted - /// directly into the aggregate. - /// - /// If InsertBefore is not null, this function will duplicate (modified) - /// insertvalues when a part of a nested struct is extracted. - Value *FindInsertedValue(Value *V, - ArrayRef idx_range, - Instruction *InsertBefore = nullptr); - - /// Analyze the specified pointer to see if it can be expressed as a base - /// pointer plus a constant offset. Return the base and offset to the caller. - /// - /// This is a wrapper around Value::stripAndAccumulateConstantOffsets that - /// creates and later unpacks the required APInt. - inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout &DL, - bool AllowNonInbounds = true) { - APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); - Value *Base = - Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, AllowNonInbounds); - - Offset = OffsetAPInt.getSExtValue(); - return Base; - } +/// Return the number of times the sign bit of the register is replicated into +/// the other bits. We know that at least 1 bit is always equal to the sign +/// bit (itself), but other cases can give us information. For example, +/// immediately after an "ashr X, 2", we know that the top 3 bits are all +/// equal to each other, so we return 3. For vectors, return the number of +/// sign bits for the vector element with the mininum number of known sign +/// bits. +unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + +/// Get the upper bound on bit size for this Value \p Op as a signed integer. +/// i.e. x == sext(trunc(x to MaxSignificantBits) to bitwidth(x)). +/// Similar to the APInt::getSignificantBits function. +unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, + unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); + +/// Map a call instruction to an intrinsic ID. Libcalls which have equivalent +/// intrinsics are treated as-if they were intrinsics. +Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, + const TargetLibraryInfo *TLI); + +/// Return true if we can prove that the specified FP value is never equal to +/// -0.0. +bool CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, + unsigned Depth = 0); + +/// Return true if we can prove that the specified FP value is either NaN or +/// never less than -0.0. +/// +/// NaN --> true +/// +0 --> true +/// -0 --> true +/// x > +0 --> true +/// x < -0 --> false +bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI); + +/// Return true if the floating-point scalar value is not an infinity or if +/// the floating-point vector value has no infinities. Return false if a value +/// could ever be infinity. +bool isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI, + unsigned Depth = 0); + +/// Return true if the floating-point scalar value is not a NaN or if the +/// floating-point vector value has no NaN elements. Return false if a value +/// could ever be NaN. +bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, + unsigned Depth = 0); + +/// Return true if we can prove that the specified FP value's sign bit is 0. +/// +/// NaN --> true/false (depending on the NaN's sign bit) +/// +0 --> true +/// -0 --> false +/// x > +0 --> true +/// x < -0 --> false +bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI); + +/// If the specified value can be set by repeating the same byte in memory, +/// return the i8 value that it is represented with. This is true for all i8 +/// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double +/// 0.0 etc. If the value can't be handled with a repeated byte store (e.g. +/// i16 0x1234), return null. If the value is entirely undef and padding, +/// return undef. +Value *isBytewiseValue(Value *V, const DataLayout &DL); + +/// Given an aggregate and an sequence of indices, see if the scalar value +/// indexed is already around as a register, for example if it were inserted +/// directly into the aggregate. +/// +/// If InsertBefore is not null, this function will duplicate (modified) +/// insertvalues when a part of a nested struct is extracted. +Value *FindInsertedValue(Value *V, ArrayRef idx_range, + Instruction *InsertBefore = nullptr); + +/// Analyze the specified pointer to see if it can be expressed as a base +/// pointer plus a constant offset. Return the base and offset to the caller. +/// +/// This is a wrapper around Value::stripAndAccumulateConstantOffsets that +/// creates and later unpacks the required APInt. +inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, + const DataLayout &DL, + bool AllowNonInbounds = true) { + APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Value *Base = + Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, AllowNonInbounds); + + Offset = OffsetAPInt.getSExtValue(); + return Base; +} inline const Value * GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset, const DataLayout &DL, Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -402,6 +402,7 @@ void initializeScalarizerLegacyPassPass(PassRegistry&); void initializeScavengerTestPass(PassRegistry&); void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&); +void initializeSetAlignmentLegacyPassPass(PassRegistry &); void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &); void initializeShadowStackGCLoweringPass(PassRegistry&); void initializeShrinkWrapPass(PassRegistry&); Index: llvm/include/llvm/LinkAllPasses.h =================================================================== --- llvm/include/llvm/LinkAllPasses.h +++ llvm/include/llvm/LinkAllPasses.h @@ -164,6 +164,7 @@ (void) llvm::createRegionViewerPass(); (void) llvm::createSCCPPass(); (void) llvm::createSafeStackPass(); + (void)llvm::createSetAlignmentPass(); (void) llvm::createSROAPass(); (void) llvm::createSingleLoopExtractorPass(); (void) llvm::createStripSymbolsPass(); Index: llvm/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/include/llvm/Transforms/Scalar.h +++ llvm/include/llvm/Transforms/Scalar.h @@ -560,6 +560,13 @@ // and scatter intrinsics with scalar code when target doesn't support them. // FunctionPass *createScalarizeMaskedMemIntrinLegacyPass(); + +//===----------------------------------------------------------------------===// +// +// createSetAlignmentPass - Set alignment information on instructions supporting +// it like load, store based on analysis of KnownBits. +// +FunctionPass *createSetAlignmentPass(); } // End llvm namespace #endif Index: llvm/include/llvm/Transforms/Scalar/SetLoadStoreAlignment.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Scalar/SetLoadStoreAlignment.h @@ -0,0 +1,26 @@ +//===-- SetLoadStoreAlignment.h - Set alignment -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_SETLOADSTOREALIGNMENT_H +#define LLVM_TRANSFORMS_SCALAR_SETLOADSTOREALIGNMENT_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Function; + +/// Set alignment on load/store instructions based on known bits analysis. +class SetLoadStoreAlignmentPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_SETLOADSTOREALIGNMENT_H Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -120,10 +120,15 @@ /// If true, it is safe to use metadata during simplification. InstrInfoQuery IIQ; + // Provides a cache of KnownBits already calculated. + DenseMap *KnownValues; + Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo, - OptimizationRemarkEmitter *ORE = nullptr) - : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {} + OptimizationRemarkEmitter *ORE = nullptr, + DenseMap *KnownValues = nullptr) + : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo), + KnownValues(KnownValues) {} }; } // end anonymous namespace @@ -222,18 +227,22 @@ const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - OptimizationRemarkEmitter *ORE, bool UseInstrInfo) { - ::computeKnownBits(V, Known, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); + OptimizationRemarkEmitter *ORE, bool UseInstrInfo, + DenseMap *KnownValues) { + ::computeKnownBits( + V, Known, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, KnownValues)); } void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, KnownBits &Known, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - OptimizationRemarkEmitter *ORE, bool UseInstrInfo) { - ::computeKnownBits(V, DemandedElts, Known, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); + OptimizationRemarkEmitter *ORE, bool UseInstrInfo, + DenseMap *KnownValues) { + ::computeKnownBits( + V, DemandedElts, Known, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, KnownValues)); } static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, @@ -247,9 +256,11 @@ const Instruction *CxtI, const DominatorTree *DT, OptimizationRemarkEmitter *ORE, - bool UseInstrInfo) { + bool UseInstrInfo, + DenseMap *KnownValues) { return ::computeKnownBits( - V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); + V, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, KnownValues)); } KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, @@ -257,10 +268,11 @@ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, OptimizationRemarkEmitter *ORE, - bool UseInstrInfo) { + bool UseInstrInfo, + DenseMap *KnownValues) { return ::computeKnownBits( V, DemandedElts, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, KnownValues)); } bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, @@ -1912,6 +1924,14 @@ /// for all of the demanded elements in the vector specified by DemandedElts. void computeKnownBits(const Value *V, const APInt &DemandedElts, KnownBits &Known, unsigned Depth, const Query &Q) { + if (Q.KnownValues) { + auto it = Q.KnownValues->find(V); + if (it != Q.KnownValues->end()) { + // If we already know the result return. + Known = it->second; + return; + } + } if (!DemandedElts || isa(V->getType())) { // No demanded elts or V is a scalable vector, better to assume we don't // know anything. Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -207,6 +207,7 @@ #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" #include "llvm/Transforms/Scalar/Scalarizer.h" #include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h" +#include "llvm/Transforms/Scalar/SetLoadStoreAlignment.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/Sink.h" Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -357,6 +357,7 @@ FUNCTION_PASS("reg2mem", RegToMemPass()) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("scalarizer", ScalarizerPass()) +FUNCTION_PASS("set-alignment", SetLoadStoreAlignmentPass()) FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass()) FUNCTION_PASS("sccp", SCCPPass()) FUNCTION_PASS("sink", SinkingPass()) Index: llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp =================================================================== --- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -317,8 +317,10 @@ const NVPTXSubtarget &ST = *getTM().getSubtargetImpl(); addPass(createNVVMReflectPass(ST.getSmVersion())); - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) { addPass(createNVPTXImageOptimizerPass()); + addPass(createSetAlignmentPass()); + } addPass(createNVPTXAssignValidGlobalNamesPass()); addPass(createGenericToNVVMPass()); Index: llvm/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -72,6 +72,7 @@ SimpleLoopUnswitch.cpp SimplifyCFGPass.cpp Sink.cpp + SetLoadStoreAlignment.cpp SpeculativeExecution.cpp StraightLineStrengthReduce.cpp StructurizeCFG.cpp Index: llvm/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/lib/Transforms/Scalar/Scalar.cpp +++ llvm/lib/Transforms/Scalar/Scalar.cpp @@ -97,6 +97,7 @@ initializeSROALegacyPassPass(Registry); initializeCFGSimplifyPassPass(Registry); initializeStructurizeCFGLegacyPassPass(Registry); + initializeSetAlignmentLegacyPassPass(Registry); initializeSimpleLoopUnswitchLegacyPassPass(Registry); initializeSinkingLegacyPassPass(Registry); initializeTailCallElimPass(Registry); Index: llvm/lib/Transforms/Scalar/SetLoadStoreAlignment.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Scalar/SetLoadStoreAlignment.cpp @@ -0,0 +1,186 @@ +//===- SetLoadStoreAlignment.cpp - LoadStore alignment Pass ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass to set alignment information on load and store +// instructions. In order to do that it applies the following startegy: +// * First assume all phiNodes have all bits set to 0 to be able to handle +// cycles. +// * Calculate known bits for every integer instructions. +// * Everytime we find an updated Knownbits update all Uses +// * Run until we reach a fix point. +// +// This solution allows to handle cases with long arithmetic chaine as well +// cases with cycles. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/SetLoadStoreAlignment.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Instructions.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "set-alignment" + +static Align getKnownAlignment(const KnownBits &Known) { + unsigned TrailZ = Known.countMinTrailingZeros(); + + // Avoid trouble with ridiculously large TrailZ values, such as + // those computed from a null pointer. + // LLVM doesn't support alignments larger than (1 << MaxAlignmentExponent). + TrailZ = std::min(TrailZ, +Value::MaxAlignmentExponent); + + Align Alignment = Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ)); + return Alignment; +} + +static void +calculateAndUpdateKnownBits(Instruction &I, const DataLayout &DL, + DenseMap &FuncKnownBits, + SmallSetVector &Worklist) { + if (!I.getType()->isIntOrIntVectorTy() && !I.getType()->isPtrOrPtrVectorTy()) + return; + + auto it = FuncKnownBits.find(&I); + Optional previousValue; + if (it != FuncKnownBits.end()) { + previousValue = it->second; + FuncKnownBits.erase(it); + } + + KnownBits Known = computeKnownBits(&I, DL, 0, nullptr, nullptr, nullptr, + nullptr, true, &FuncKnownBits); + + FuncKnownBits[&I] = Known; + // If there is no update return early and don't add uses to the worklist. + if (previousValue && *previousValue == Known) + return; + for (auto *U : I.users()) { + auto *UInst = dyn_cast(U); + if (!UInst) + continue; + if (FuncKnownBits.count(UInst)) + Worklist.insert(UInst); + } +} + +/// Returns the bitwidth of the given scalar or pointer type. For vector types, +/// returns the element type's bitwidth. +static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { + if (unsigned BitWidth = Ty->getScalarSizeInBits()) + return BitWidth; + + return DL.getPointerTypeSizeInBits(Ty); +} + +static bool SetLoadStoreAlignment(Function &F) { + bool changed = false; + DenseMap FuncKnownBits; + SmallSetVector Worklist; + const DataLayout &DL = F.getParent()->getDataLayout(); + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + // Assume that phi nodes are all zero for the first iteration then iterate + // to fix point to get the real known bits. + if (isa(I)) { + KnownBits allZero(getBitWidth(I.getType(), DL)); + allZero.setAllZero(); + FuncKnownBits.insert(std::make_pair(&I, allZero)); + Worklist.insert(&I); + } else { + calculateAndUpdateKnownBits(I, DL, FuncKnownBits, Worklist); + } + } + } + // Run continue to update until we reach a fix point. + while (!Worklist.empty()) { + Instruction &I = *Worklist.pop_back_val(); + calculateAndUpdateKnownBits(I, DL, FuncKnownBits, Worklist); + } + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (LoadInst *LI = dyn_cast(&I)) { + Value *ptr = LI->getPointerOperand(); + auto it = FuncKnownBits.find(ptr); + if (it != FuncKnownBits.end()) { + Align KnownAlign = getKnownAlignment(it->second); + if (KnownAlign > LI->getAlignment()) { + LI->setAlignment(KnownAlign); + changed = true; + } + } + } else if (StoreInst *SI = dyn_cast(&I)) { + Value *ptr = SI->getPointerOperand(); + auto it = FuncKnownBits.find(ptr); + if (it != FuncKnownBits.end()) { + Align KnownAlign = getKnownAlignment(it->second); + if (KnownAlign > SI->getAlignment()) { + SI->setAlignment(KnownAlign); + changed = true; + } + } + } + } + } + return changed; +} + +PreservedAnalyses SetLoadStoreAlignmentPass::run(Function &F, + FunctionAnalysisManager &AM) { + DenseMap FuncKnownBits; + SmallVector Worklist; + + if (!SetLoadStoreAlignment(F)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet(); + return PA; +} + +namespace { +struct SetAlignmentLegacyPass : public FunctionPass { + static char ID; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + } + + SetAlignmentLegacyPass() : FunctionPass(ID) { + initializeSetAlignmentLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + return SetLoadStoreAlignment(F); + } +}; +} // namespace + +char SetAlignmentLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(SetAlignmentLegacyPass, "set-alignment", + "Set alignment information", false, false) +INITIALIZE_PASS_END(SetAlignmentLegacyPass, "set-alignment", + "Set alignment information", false, false) + +// Public interface to the SetAlignmentPass pass +FunctionPass *llvm::createSetAlignmentPass() { + return new SetAlignmentLegacyPass(); +} Index: llvm/test/Transforms/SetLoadStoreAlignment/basic.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SetLoadStoreAlignment/basic.ll @@ -0,0 +1,126 @@ +; RUN: opt -S --set-alignment < %s | FileCheck %s + +declare ptr @getPtr() + +; CHECK-LABEL: define float @phi_case +; CHECK: load float, ptr %{{.*}}, align 32 +define float @phi_case(i64 %0, i1 %1) { + %3 = tail call align 32 ptr @getPtr() + br i1 %1, label %4, label %7 + +4: ; preds = %2 + %5 = shl i64 %0, 8 + %6 = or i64 %5, 128 + br label %7 + +7: ; preds = %4, %2 + %8 = phi i64 [ %6, %4 ], [ 0, %2 ] + %9 = getelementptr inbounds float, ptr %3, i64 %8 + %10 = load float, ptr %9 + ret float %10 +} + +; CHECK-LABEL: define float @nested_loops +; CHECK: load float, ptr %{{.*}}, align 8 +define float @nested_loops(i64 %0, i1 %1) { + %3 = tail call align 32 ptr @getPtr() + %4 = shl i64 %0, 8 + br i1 %1, label %5, label %18 + +5: ; preds = %15, %2 + %6 = phi i32 [ %16, %15 ], [ 0, %2 ] + %7 = phi i64 [ %12, %15 ], [ %4, %2 ] + br label %8 + +8: ; preds = %8, %5 + %9 = phi i32 [ 0, %5 ], [ %13, %8 ] + %10 = phi i64 [ %7, %5 ], [ %12, %8 ] + %11 = shl i64 %0, 1 + %12 = add nsw i64 %11, %10 + %13 = add nuw nsw i32 %9, 1 + %14 = icmp eq i32 %13, 1000 + br i1 %14, label %15, label %8 + +15: ; preds = %8 + %16 = add nuw nsw i32 %6, 1 + %17 = icmp eq i32 %16, 1000 + br i1 %17, label %21, label %5 + +18: ; preds = %25, %2 + %19 = phi i32 [ %26, %25 ], [ 0, %2 ] + %20 = phi i64 [ %32, %25 ], [ %4, %2 ] + br label %28 + +21: ; preds = %25, %15 + %22 = phi i64 [ %12, %15 ], [ %32, %25 ] + %23 = getelementptr inbounds float, ptr %3, i64 %22 + %24 = load float, ptr %23 + ret float %24 + +25: ; preds = %28 + %26 = add nuw nsw i32 %19, 1 + %27 = icmp eq i32 %26, 1000 + br i1 %27, label %21, label %18 + +28: ; preds = %28, %18 + %29 = phi i32 [ 0, %18 ], [ %33, %28 ] + %30 = phi i64 [ %20, %18 ], [ %32, %28 ] + %31 = shl i64 %0, 16 + %32 = add nsw i64 %31, %30 + %33 = add nuw nsw i32 %29, 1 + %34 = icmp eq i32 %33, 1000 + br i1 %34, label %25, label %28 +} + +; CHECK-LABEL: define float @nested_loops_undef_init +; CHECK: load float, ptr %{{.*}}, align 4 +define float @nested_loops_undef_init(i64 %0, i1 %1) { + %3 = tail call align 32 ptr @getPtr() + %4 = shl i64 %0, 8 + br i1 %1, label %5, label %18 + +5: ; preds = %15, %2 + %6 = phi i32 [ %16, %15 ], [ 0, %2 ] + %7 = phi i64 [ %12, %15 ], [ undef, %2 ] + br label %8 + +8: ; preds = %8, %5 + %9 = phi i32 [ 0, %5 ], [ %13, %8 ] + %10 = phi i64 [ %7, %5 ], [ %12, %8 ] + %11 = shl i64 %0, 1 + %12 = add nsw i64 %11, %10 + %13 = add nuw nsw i32 %9, 1 + %14 = icmp eq i32 %13, 1000 + br i1 %14, label %15, label %8 + +15: ; preds = %8 + %16 = add nuw nsw i32 %6, 1 + %17 = icmp eq i32 %16, 1000 + br i1 %17, label %21, label %5 + +18: ; preds = %25, %2 + %19 = phi i32 [ %26, %25 ], [ 0, %2 ] + %20 = phi i64 [ %32, %25 ], [ %4, %2 ] + br label %28 + +21: ; preds = %25, %15 + %22 = phi i64 [ %12, %15 ], [ %32, %25 ] + %23 = getelementptr inbounds float, ptr %3, i64 %22 + %24 = load float, ptr %23, align 4 + ret float %24 + +25: ; preds = %28 + %26 = add nuw nsw i32 %19, 1 + %27 = icmp eq i32 %26, 1000 + br i1 %27, label %21, label %18 + +28: ; preds = %28, %18 + %29 = phi i32 [ 0, %18 ], [ %33, %28 ] + %30 = phi i64 [ %20, %18 ], [ %32, %28 ] + %31 = shl i64 %0, 16 + %32 = add nsw i64 %31, %30 + %33 = add nuw nsw i32 %29, 1 + %34 = icmp eq i32 %33, 1000 + br i1 %34, label %25, label %28 +} +