Index: llvm/include/llvm/Analysis/ValueTracking.h
===================================================================
--- llvm/include/llvm/Analysis/ValueTracking.h
+++ llvm/include/llvm/Analysis/ValueTracking.h
@@ -15,6 +15,7 @@
 #define LLVM_ANALYSIS_VALUETRACKING_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/IR/Constants.h"
@@ -54,237 +55,235 @@
   /// where V is a vector, the known zero and known one values are the
   /// same width as the vector element, and the bit is set only if it is true
   /// for all of the elements in the vector.
-  void computeKnownBits(const Value *V, KnownBits &Known,
-                        const DataLayout &DL, unsigned Depth = 0,
-                        AssumptionCache *AC = nullptr,
-                        const Instruction *CxtI = nullptr,
-                        const DominatorTree *DT = nullptr,
-                        OptimizationRemarkEmitter *ORE = nullptr,
-                        bool UseInstrInfo = true);
+void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL,
+                      unsigned Depth = 0, AssumptionCache *AC = nullptr,
+                      const Instruction *CxtI = nullptr,
+                      const DominatorTree *DT = nullptr,
+                      OptimizationRemarkEmitter *ORE = nullptr,
+                      bool UseInstrInfo = true,
+                      DenseMap<Value *, KnownBits> *KnownValues = nullptr);
+
+/// Determine which bits of V are known to be either zero or one and return
+/// them in the KnownZero/KnownOne bit sets.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type, and vectors of integers.  In the case
+/// where V is a vector, the known zero and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the demanded elements in the vector.
+void computeKnownBits(const Value *V, const APInt &DemandedElts,
+                      KnownBits &Known, const DataLayout &DL,
+                      unsigned Depth = 0, AssumptionCache *AC = nullptr,
+                      const Instruction *CxtI = nullptr,
+                      const DominatorTree *DT = nullptr,
+                      OptimizationRemarkEmitter *ORE = nullptr,
+                      bool UseInstrInfo = true,
+                      DenseMap<Value *, KnownBits> *KnownValues = nullptr);
 
-  /// Determine which bits of V are known to be either zero or one and return
-  /// them in the KnownZero/KnownOne bit sets.
-  ///
-  /// This function is defined on values with integer type, values with pointer
-  /// type, and vectors of integers.  In the case
-  /// where V is a vector, the known zero and known one values are the
-  /// same width as the vector element, and the bit is set only if it is true
-  /// for all of the demanded elements in the vector.
-  void computeKnownBits(const Value *V, const APInt &DemandedElts,
-                        KnownBits &Known, const DataLayout &DL,
-                        unsigned Depth = 0, AssumptionCache *AC = nullptr,
-                        const Instruction *CxtI = nullptr,
-                        const DominatorTree *DT = nullptr,
-                        OptimizationRemarkEmitter *ORE = nullptr,
-                        bool UseInstrInfo = true);
+/// Returns the known bits rather than passing by reference.
+KnownBits computeKnownBits(const Value *V, const DataLayout &DL,
+                           unsigned Depth = 0, AssumptionCache *AC = nullptr,
+                           const Instruction *CxtI = nullptr,
+                           const DominatorTree *DT = nullptr,
+                           OptimizationRemarkEmitter *ORE = nullptr,
+                           bool UseInstrInfo = true,
+                           DenseMap<Value *, KnownBits> *KnownValues = nullptr);
 
-  /// Returns the known bits rather than passing by reference.
-  KnownBits computeKnownBits(const Value *V, const DataLayout &DL,
-                             unsigned Depth = 0, AssumptionCache *AC = nullptr,
-                             const Instruction *CxtI = nullptr,
-                             const DominatorTree *DT = nullptr,
-                             OptimizationRemarkEmitter *ORE = nullptr,
-                             bool UseInstrInfo = true);
-
-  /// Returns the known bits rather than passing by reference.
-  KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts,
-                             const DataLayout &DL, unsigned Depth = 0,
-                             AssumptionCache *AC = nullptr,
-                             const Instruction *CxtI = nullptr,
-                             const DominatorTree *DT = nullptr,
-                             OptimizationRemarkEmitter *ORE = nullptr,
-                             bool UseInstrInfo = true);
-
-  /// Compute known bits from the range metadata.
-  /// \p KnownZero the set of bits that are known to be zero
-  /// \p KnownOne the set of bits that are known to be one
-  void computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
-                                         KnownBits &Known);
-
-  /// Return true if LHS and RHS have no common bits set.
-  bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
-                           const DataLayout &DL,
+/// Returns the known bits rather than passing by reference.
+KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts,
+                           const DataLayout &DL, unsigned Depth = 0,
                            AssumptionCache *AC = nullptr,
                            const Instruction *CxtI = nullptr,
                            const DominatorTree *DT = nullptr,
-                           bool UseInstrInfo = true);
-
-  /// Return true if the given value is known to have exactly one bit set when
-  /// defined. For vectors return true if every element is known to be a power
-  /// of two when defined. Supports values with integer or pointer type and
-  /// vectors of integers. If 'OrZero' is set, then return true if the given
-  /// value is either a power of two or zero.
-  bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
-                              bool OrZero = false, unsigned Depth = 0,
-                              AssumptionCache *AC = nullptr,
-                              const Instruction *CxtI = nullptr,
-                              const DominatorTree *DT = nullptr,
-                              bool UseInstrInfo = true);
-
-  bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI);
-
-  /// Return true if the given value is known to be non-zero when defined. For
-  /// vectors, return true if every element is known to be non-zero when
-  /// defined. For pointers, if the context instruction and dominator tree are
-  /// specified, perform context-sensitive analysis and return true if the
-  /// pointer couldn't possibly be null at the specified instruction.
-  /// Supports values with integer or pointer type and vectors of integers.
-  bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0,
-                      AssumptionCache *AC = nullptr,
-                      const Instruction *CxtI = nullptr,
-                      const DominatorTree *DT = nullptr,
-                      bool UseInstrInfo = true);
-
-  /// Return true if the two given values are negation.
-  /// Currently can recoginze Value pair:
-  /// 1: <X, Y> if X = sub (0, Y) or Y = sub (0, X)
-  /// 2: <X, Y> if X = sub (A, B) and Y = sub (B, A)
-  bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW = false);
-
-  /// Returns true if the give value is known to be non-negative.
-  bool isKnownNonNegative(const Value *V, const DataLayout &DL,
-                          unsigned Depth = 0,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          bool UseInstrInfo = true);
-
-  /// Returns true if the given value is known be positive (i.e. non-negative
-  /// and non-zero).
-  bool isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth = 0,
-                       AssumptionCache *AC = nullptr,
-                       const Instruction *CxtI = nullptr,
-                       const DominatorTree *DT = nullptr,
-                       bool UseInstrInfo = true);
-
-  /// Returns true if the given value is known be negative (i.e. non-positive
-  /// and non-zero).
-  bool isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0,
-                       AssumptionCache *AC = nullptr,
-                       const Instruction *CxtI = nullptr,
-                       const DominatorTree *DT = nullptr,
-                       bool UseInstrInfo = true);
-
-  /// Return true if the given values are known to be non-equal when defined.
-  /// Supports scalar integer types only.
-  bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL,
-                       AssumptionCache *AC = nullptr,
-                       const Instruction *CxtI = nullptr,
-                       const DominatorTree *DT = nullptr,
-                       bool UseInstrInfo = true);
-
-  /// Return true if 'V & Mask' is known to be zero. We use this predicate to
-  /// simplify operations downstream. Mask is known to be zero for bits that V
-  /// cannot have.
-  ///
-  /// This function is defined on values with integer type, values with pointer
-  /// type, and vectors of integers.  In the case
-  /// where V is a vector, the mask, known zero, and known one values are the
-  /// same width as the vector element, and the bit is set only if it is true
-  /// for all of the elements in the vector.
-  bool MaskedValueIsZero(const Value *V, const APInt &Mask,
-                         const DataLayout &DL,
-                         unsigned Depth = 0, AssumptionCache *AC = nullptr,
+                           OptimizationRemarkEmitter *ORE = nullptr,
+                           bool UseInstrInfo = true,
+                           DenseMap<Value *, KnownBits> *KnownValues = nullptr);
+
+/// Compute known bits from the range metadata.
+/// \p KnownZero the set of bits that are known to be zero
+/// \p KnownOne the set of bits that are known to be one
+void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, KnownBits &Known);
+
+/// Return true if LHS and RHS have no common bits set.
+bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
+                         const DataLayout &DL, AssumptionCache *AC = nullptr,
                          const Instruction *CxtI = nullptr,
                          const DominatorTree *DT = nullptr,
                          bool UseInstrInfo = true);
 
-  /// Return the number of times the sign bit of the register is replicated into
-  /// the other bits. We know that at least 1 bit is always equal to the sign
-  /// bit (itself), but other cases can give us information. For example,
-  /// immediately after an "ashr X, 2", we know that the top 3 bits are all
-  /// equal to each other, so we return 3. For vectors, return the number of
-  /// sign bits for the vector element with the mininum number of known sign
-  /// bits.
-  unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL,
-                              unsigned Depth = 0, AssumptionCache *AC = nullptr,
-                              const Instruction *CxtI = nullptr,
-                              const DominatorTree *DT = nullptr,
-                              bool UseInstrInfo = true);
-
-  /// Get the upper bound on bit size for this Value \p Op as a signed integer.
-  /// i.e.  x == sext(trunc(x to MaxSignificantBits) to bitwidth(x)).
-  /// Similar to the APInt::getSignificantBits function.
-  unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL,
-                                     unsigned Depth = 0,
-                                     AssumptionCache *AC = nullptr,
-                                     const Instruction *CxtI = nullptr,
-                                     const DominatorTree *DT = nullptr);
-
-  /// Map a call instruction to an intrinsic ID.  Libcalls which have equivalent
-  /// intrinsics are treated as-if they were intrinsics.
-  Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB,
-                                        const TargetLibraryInfo *TLI);
+/// Return true if the given value is known to have exactly one bit set when
+/// defined. For vectors return true if every element is known to be a power
+/// of two when defined. Supports values with integer or pointer type and
+/// vectors of integers. If 'OrZero' is set, then return true if the given
+/// value is either a power of two or zero.
+bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
+                            bool OrZero = false, unsigned Depth = 0,
+                            AssumptionCache *AC = nullptr,
+                            const Instruction *CxtI = nullptr,
+                            const DominatorTree *DT = nullptr,
+                            bool UseInstrInfo = true);
+
+bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI);
+
+/// Return true if the given value is known to be non-zero when defined. For
+/// vectors, return true if every element is known to be non-zero when
+/// defined. For pointers, if the context instruction and dominator tree are
+/// specified, perform context-sensitive analysis and return true if the
+/// pointer couldn't possibly be null at the specified instruction.
+/// Supports values with integer or pointer type and vectors of integers.
+bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0,
+                    AssumptionCache *AC = nullptr,
+                    const Instruction *CxtI = nullptr,
+                    const DominatorTree *DT = nullptr,
+                    bool UseInstrInfo = true);
+
+/// Return true if the two given values are negation.
+/// Currently can recoginze Value pair:
+/// 1: <X, Y> if X = sub (0, Y) or Y = sub (0, X)
+/// 2: <X, Y> if X = sub (A, B) and Y = sub (B, A)
+bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW = false);
+
+/// Returns true if the give value is known to be non-negative.
+bool isKnownNonNegative(const Value *V, const DataLayout &DL,
+                        unsigned Depth = 0, AssumptionCache *AC = nullptr,
+                        const Instruction *CxtI = nullptr,
+                        const DominatorTree *DT = nullptr,
+                        bool UseInstrInfo = true);
 
-  /// Return true if we can prove that the specified FP value is never equal to
-  /// -0.0.
-  bool CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
-                            unsigned Depth = 0);
+/// Returns true if the given value is known be positive (i.e. non-negative
+/// and non-zero).
+bool isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth = 0,
+                     AssumptionCache *AC = nullptr,
+                     const Instruction *CxtI = nullptr,
+                     const DominatorTree *DT = nullptr,
+                     bool UseInstrInfo = true);
+
+/// Returns true if the given value is known be negative (i.e. non-positive
+/// and non-zero).
+bool isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0,
+                     AssumptionCache *AC = nullptr,
+                     const Instruction *CxtI = nullptr,
+                     const DominatorTree *DT = nullptr,
+                     bool UseInstrInfo = true);
+
+/// Return true if the given values are known to be non-equal when defined.
+/// Supports scalar integer types only.
+bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL,
+                     AssumptionCache *AC = nullptr,
+                     const Instruction *CxtI = nullptr,
+                     const DominatorTree *DT = nullptr,
+                     bool UseInstrInfo = true);
+
+/// Return true if 'V & Mask' is known to be zero. We use this predicate to
+/// simplify operations downstream. Mask is known to be zero for bits that V
+/// cannot have.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type, and vectors of integers.  In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
+bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL,
+                       unsigned Depth = 0, AssumptionCache *AC = nullptr,
+                       const Instruction *CxtI = nullptr,
+                       const DominatorTree *DT = nullptr,
+                       bool UseInstrInfo = true);
 
-  /// Return true if we can prove that the specified FP value is either NaN or
-  /// never less than -0.0.
-  ///
-  ///      NaN --> true
-  ///       +0 --> true
-  ///       -0 --> true
-  ///   x > +0 --> true
-  ///   x < -0 --> false
-  bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI);
-
-  /// Return true if the floating-point scalar value is not an infinity or if
-  /// the floating-point vector value has no infinities. Return false if a value
-  /// could ever be infinity.
-  bool isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI,
-                            unsigned Depth = 0);
-
-  /// Return true if the floating-point scalar value is not a NaN or if the
-  /// floating-point vector value has no NaN elements. Return false if a value
-  /// could ever be NaN.
-  bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
-                       unsigned Depth = 0);
-
-  /// Return true if we can prove that the specified FP value's sign bit is 0.
-  ///
-  ///      NaN --> true/false (depending on the NaN's sign bit)
-  ///       +0 --> true
-  ///       -0 --> false
-  ///   x > +0 --> true
-  ///   x < -0 --> false
-  bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI);
-
-  /// If the specified value can be set by repeating the same byte in memory,
-  /// return the i8 value that it is represented with. This is true for all i8
-  /// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double
-  /// 0.0 etc. If the value can't be handled with a repeated byte store (e.g.
-  /// i16 0x1234), return null. If the value is entirely undef and padding,
-  /// return undef.
-  Value *isBytewiseValue(Value *V, const DataLayout &DL);
-
-  /// Given an aggregate and an sequence of indices, see if the scalar value
-  /// indexed is already around as a register, for example if it were inserted
-  /// directly into the aggregate.
-  ///
-  /// If InsertBefore is not null, this function will duplicate (modified)
-  /// insertvalues when a part of a nested struct is extracted.
-  Value *FindInsertedValue(Value *V,
-                           ArrayRef<unsigned> idx_range,
-                           Instruction *InsertBefore = nullptr);
-
-  /// Analyze the specified pointer to see if it can be expressed as a base
-  /// pointer plus a constant offset. Return the base and offset to the caller.
-  ///
-  /// This is a wrapper around Value::stripAndAccumulateConstantOffsets that
-  /// creates and later unpacks the required APInt.
-  inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
-                                                 const DataLayout &DL,
-                                                 bool AllowNonInbounds = true) {
-    APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
-    Value *Base =
-        Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, AllowNonInbounds);
-
-    Offset = OffsetAPInt.getSExtValue();
-    return Base;
-  }
+/// Return the number of times the sign bit of the register is replicated into
+/// the other bits. We know that at least 1 bit is always equal to the sign
+/// bit (itself), but other cases can give us information. For example,
+/// immediately after an "ashr X, 2", we know that the top 3 bits are all
+/// equal to each other, so we return 3. For vectors, return the number of
+/// sign bits for the vector element with the mininum number of known sign
+/// bits.
+unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL,
+                            unsigned Depth = 0, AssumptionCache *AC = nullptr,
+                            const Instruction *CxtI = nullptr,
+                            const DominatorTree *DT = nullptr,
+                            bool UseInstrInfo = true);
+
+/// Get the upper bound on bit size for this Value \p Op as a signed integer.
+/// i.e.  x == sext(trunc(x to MaxSignificantBits) to bitwidth(x)).
+/// Similar to the APInt::getSignificantBits function.
+unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL,
+                                   unsigned Depth = 0,
+                                   AssumptionCache *AC = nullptr,
+                                   const Instruction *CxtI = nullptr,
+                                   const DominatorTree *DT = nullptr);
+
+/// Map a call instruction to an intrinsic ID.  Libcalls which have equivalent
+/// intrinsics are treated as-if they were intrinsics.
+Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB,
+                                      const TargetLibraryInfo *TLI);
+
+/// Return true if we can prove that the specified FP value is never equal to
+/// -0.0.
+bool CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
+                          unsigned Depth = 0);
+
+/// Return true if we can prove that the specified FP value is either NaN or
+/// never less than -0.0.
+///
+///      NaN --> true
+///       +0 --> true
+///       -0 --> true
+///   x > +0 --> true
+///   x < -0 --> false
+bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI);
+
+/// Return true if the floating-point scalar value is not an infinity or if
+/// the floating-point vector value has no infinities. Return false if a value
+/// could ever be infinity.
+bool isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI,
+                          unsigned Depth = 0);
+
+/// Return true if the floating-point scalar value is not a NaN or if the
+/// floating-point vector value has no NaN elements. Return false if a value
+/// could ever be NaN.
+bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
+                     unsigned Depth = 0);
+
+/// Return true if we can prove that the specified FP value's sign bit is 0.
+///
+///      NaN --> true/false (depending on the NaN's sign bit)
+///       +0 --> true
+///       -0 --> false
+///   x > +0 --> true
+///   x < -0 --> false
+bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI);
+
+/// If the specified value can be set by repeating the same byte in memory,
+/// return the i8 value that it is represented with. This is true for all i8
+/// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double
+/// 0.0 etc. If the value can't be handled with a repeated byte store (e.g.
+/// i16 0x1234), return null. If the value is entirely undef and padding,
+/// return undef.
+Value *isBytewiseValue(Value *V, const DataLayout &DL);
+
+/// Given an aggregate and an sequence of indices, see if the scalar value
+/// indexed is already around as a register, for example if it were inserted
+/// directly into the aggregate.
+///
+/// If InsertBefore is not null, this function will duplicate (modified)
+/// insertvalues when a part of a nested struct is extracted.
+Value *FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
+                         Instruction *InsertBefore = nullptr);
+
+/// Analyze the specified pointer to see if it can be expressed as a base
+/// pointer plus a constant offset. Return the base and offset to the caller.
+///
+/// This is a wrapper around Value::stripAndAccumulateConstantOffsets that
+/// creates and later unpacks the required APInt.
+inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+                                               const DataLayout &DL,
+                                               bool AllowNonInbounds = true) {
+  APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+  Value *Base =
+      Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, AllowNonInbounds);
+
+  Offset = OffsetAPInt.getSExtValue();
+  return Base;
+}
   inline const Value *
   GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset,
                                    const DataLayout &DL,
Index: llvm/include/llvm/InitializePasses.h
===================================================================
--- llvm/include/llvm/InitializePasses.h
+++ llvm/include/llvm/InitializePasses.h
@@ -402,6 +402,7 @@
 void initializeScalarizerLegacyPassPass(PassRegistry&);
 void initializeScavengerTestPass(PassRegistry&);
 void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&);
+void initializeSetAlignmentLegacyPassPass(PassRegistry &);
 void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &);
 void initializeShadowStackGCLoweringPass(PassRegistry&);
 void initializeShrinkWrapPass(PassRegistry&);
Index: llvm/include/llvm/LinkAllPasses.h
===================================================================
--- llvm/include/llvm/LinkAllPasses.h
+++ llvm/include/llvm/LinkAllPasses.h
@@ -164,6 +164,7 @@
       (void) llvm::createRegionViewerPass();
       (void) llvm::createSCCPPass();
       (void) llvm::createSafeStackPass();
+      (void)llvm::createSetAlignmentPass();
       (void) llvm::createSROAPass();
       (void) llvm::createSingleLoopExtractorPass();
       (void) llvm::createStripSymbolsPass();
Index: llvm/include/llvm/Transforms/Scalar.h
===================================================================
--- llvm/include/llvm/Transforms/Scalar.h
+++ llvm/include/llvm/Transforms/Scalar.h
@@ -560,6 +560,13 @@
 // and scatter intrinsics with scalar code when target doesn't support them.
 //
 FunctionPass *createScalarizeMaskedMemIntrinLegacyPass();
+
+//===----------------------------------------------------------------------===//
+//
+// createSetAlignmentPass - Set alignment information on instructions supporting
+// it like load, store based on analysis of KnownBits.
+//
+FunctionPass *createSetAlignmentPass();
 } // End llvm namespace
 
 #endif
Index: llvm/include/llvm/Transforms/Scalar/SetLoadStoreAlignment.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/Transforms/Scalar/SetLoadStoreAlignment.h
@@ -0,0 +1,26 @@
+//===-- SetLoadStoreAlignment.h - Set alignment -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_SETLOADSTOREALIGNMENT_H
+#define LLVM_TRANSFORMS_SCALAR_SETLOADSTOREALIGNMENT_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+/// Set alignment on load/store instructions based on known bits analysis.
+class SetLoadStoreAlignmentPass
+    : public PassInfoMixin<SetLoadStoreAlignmentPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_SETLOADSTOREALIGNMENT_H
Index: llvm/lib/Analysis/ValueTracking.cpp
===================================================================
--- llvm/lib/Analysis/ValueTracking.cpp
+++ llvm/lib/Analysis/ValueTracking.cpp
@@ -120,10 +120,15 @@
   /// If true, it is safe to use metadata during simplification.
   InstrInfoQuery IIQ;
 
+  // Provides a cache of KnownBits already calculated.
+  DenseMap<Value *, KnownBits> *KnownValues;
+
   Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
         const DominatorTree *DT, bool UseInstrInfo,
-        OptimizationRemarkEmitter *ORE = nullptr)
-      : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {}
+        OptimizationRemarkEmitter *ORE = nullptr,
+        DenseMap<Value *, KnownBits> *KnownValues = nullptr)
+      : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo),
+        KnownValues(KnownValues) {}
 };
 
 } // end anonymous namespace
@@ -222,18 +227,22 @@
                             const DataLayout &DL, unsigned Depth,
                             AssumptionCache *AC, const Instruction *CxtI,
                             const DominatorTree *DT,
-                            OptimizationRemarkEmitter *ORE, bool UseInstrInfo) {
-  ::computeKnownBits(V, Known, Depth,
-                     Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
+                            OptimizationRemarkEmitter *ORE, bool UseInstrInfo,
+                            DenseMap<Value *, KnownBits> *KnownValues) {
+  ::computeKnownBits(
+      V, Known, Depth,
+      Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, KnownValues));
 }
 
 void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
                             KnownBits &Known, const DataLayout &DL,
                             unsigned Depth, AssumptionCache *AC,
                             const Instruction *CxtI, const DominatorTree *DT,
-                            OptimizationRemarkEmitter *ORE, bool UseInstrInfo) {
-  ::computeKnownBits(V, DemandedElts, Known, Depth,
-                     Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
+                            OptimizationRemarkEmitter *ORE, bool UseInstrInfo,
+                            DenseMap<Value *, KnownBits> *KnownValues) {
+  ::computeKnownBits(
+      V, DemandedElts, Known, Depth,
+      Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, KnownValues));
 }
 
 static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts,
@@ -247,9 +256,11 @@
                                  const Instruction *CxtI,
                                  const DominatorTree *DT,
                                  OptimizationRemarkEmitter *ORE,
-                                 bool UseInstrInfo) {
+                                 bool UseInstrInfo,
+                                 DenseMap<Value *, KnownBits> *KnownValues) {
   return ::computeKnownBits(
-      V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
+      V, Depth,
+      Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, KnownValues));
 }
 
 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
@@ -257,10 +268,11 @@
                                  AssumptionCache *AC, const Instruction *CxtI,
                                  const DominatorTree *DT,
                                  OptimizationRemarkEmitter *ORE,
-                                 bool UseInstrInfo) {
+                                 bool UseInstrInfo,
+                                 DenseMap<Value *, KnownBits> *KnownValues) {
   return ::computeKnownBits(
       V, DemandedElts, Depth,
-      Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
+      Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE, KnownValues));
 }
 
 bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
@@ -1912,6 +1924,14 @@
 /// for all of the demanded elements in the vector specified by DemandedElts.
 void computeKnownBits(const Value *V, const APInt &DemandedElts,
                       KnownBits &Known, unsigned Depth, const Query &Q) {
+  if (Q.KnownValues) {
+    auto it = Q.KnownValues->find(V);
+    if (it != Q.KnownValues->end()) {
+      // If we already know the result return.
+      Known = it->second;
+      return;
+    }
+  }
   if (!DemandedElts || isa<ScalableVectorType>(V->getType())) {
     // No demanded elts or V is a scalable vector, better to assume we don't
     // know anything.
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -207,6 +207,7 @@
 #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
 #include "llvm/Transforms/Scalar/Scalarizer.h"
 #include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
+#include "llvm/Transforms/Scalar/SetLoadStoreAlignment.h"
 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
 #include "llvm/Transforms/Scalar/Sink.h"
Index: llvm/lib/Passes/PassRegistry.def
===================================================================
--- llvm/lib/Passes/PassRegistry.def
+++ llvm/lib/Passes/PassRegistry.def
@@ -357,6 +357,7 @@
 FUNCTION_PASS("reg2mem", RegToMemPass())
 FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass())
 FUNCTION_PASS("scalarizer", ScalarizerPass())
+FUNCTION_PASS("set-alignment", SetLoadStoreAlignmentPass())
 FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass())
 FUNCTION_PASS("sccp", SCCPPass())
 FUNCTION_PASS("sink", SinkingPass())
Index: llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -317,8 +317,10 @@
   const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
   addPass(createNVVMReflectPass(ST.getSmVersion()));
 
-  if (getOptLevel() != CodeGenOpt::None)
+  if (getOptLevel() != CodeGenOpt::None) {
     addPass(createNVPTXImageOptimizerPass());
+    addPass(createSetAlignmentPass());
+  }
   addPass(createNVPTXAssignValidGlobalNamesPass());
   addPass(createGenericToNVVMPass());
 
Index: llvm/lib/Transforms/Scalar/CMakeLists.txt
===================================================================
--- llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -72,6 +72,7 @@
   SimpleLoopUnswitch.cpp
   SimplifyCFGPass.cpp
   Sink.cpp
+  SetLoadStoreAlignment.cpp
   SpeculativeExecution.cpp
   StraightLineStrengthReduce.cpp
   StructurizeCFG.cpp
Index: llvm/lib/Transforms/Scalar/Scalar.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/Scalar.cpp
+++ llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -97,6 +97,7 @@
   initializeSROALegacyPassPass(Registry);
   initializeCFGSimplifyPassPass(Registry);
   initializeStructurizeCFGLegacyPassPass(Registry);
+  initializeSetAlignmentLegacyPassPass(Registry);
   initializeSimpleLoopUnswitchLegacyPassPass(Registry);
   initializeSinkingLegacyPassPass(Registry);
   initializeTailCallElimPass(Registry);
Index: llvm/lib/Transforms/Scalar/SetLoadStoreAlignment.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Transforms/Scalar/SetLoadStoreAlignment.cpp
@@ -0,0 +1,186 @@
+//===- SetLoadStoreAlignment.cpp - LoadStore alignment Pass ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass to set alignment information on load and store
+// instructions. In order to do that it applies the following startegy:
+// * First assume all phiNodes have all bits set to 0 to be able to handle
+// cycles.
+// * Calculate known bits for every integer instructions.
+// * Everytime we find an updated Knownbits update all Uses
+// * Run until we reach a fix point.
+//
+// This solution allows to handle cases with long arithmetic chaine as well
+// cases with cycles.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/SetLoadStoreAlignment.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "set-alignment"
+
+static Align getKnownAlignment(const KnownBits &Known) {
+  unsigned TrailZ = Known.countMinTrailingZeros();
+
+  // Avoid trouble with ridiculously large TrailZ values, such as
+  // those computed from a null pointer.
+  // LLVM doesn't support alignments larger than (1 << MaxAlignmentExponent).
+  TrailZ = std::min(TrailZ, +Value::MaxAlignmentExponent);
+
+  Align Alignment = Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ));
+  return Alignment;
+}
+
+static void
+calculateAndUpdateKnownBits(Instruction &I, const DataLayout &DL,
+                            DenseMap<Value *, KnownBits> &FuncKnownBits,
+                            SmallSetVector<Instruction *, 256> &Worklist) {
+  if (!I.getType()->isIntOrIntVectorTy() && !I.getType()->isPtrOrPtrVectorTy())
+    return;
+
+  auto it = FuncKnownBits.find(&I);
+  Optional<KnownBits> previousValue;
+  if (it != FuncKnownBits.end()) {
+    previousValue = it->second;
+    FuncKnownBits.erase(it);
+  }
+
+  KnownBits Known = computeKnownBits(&I, DL, 0, nullptr, nullptr, nullptr,
+                                     nullptr, true, &FuncKnownBits);
+
+  FuncKnownBits[&I] = Known;
+  // If there is no update return early and don't add uses to the worklist.
+  if (previousValue && *previousValue == Known)
+    return;
+  for (auto *U : I.users()) {
+    auto *UInst = dyn_cast<Instruction>(U);
+    if (!UInst)
+      continue;
+    if (FuncKnownBits.count(UInst))
+      Worklist.insert(UInst);
+  }
+}
+
+/// Returns the bitwidth of the given scalar or pointer type. For vector types,
+/// returns the element type's bitwidth.
+static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
+  if (unsigned BitWidth = Ty->getScalarSizeInBits())
+    return BitWidth;
+
+  return DL.getPointerTypeSizeInBits(Ty);
+}
+
+static bool SetLoadStoreAlignment(Function &F) {
+  bool changed = false;
+  DenseMap<Value *, KnownBits> FuncKnownBits;
+  SmallSetVector<Instruction *, 256> Worklist;
+  const DataLayout &DL = F.getParent()->getDataLayout();
+
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      // Assume that phi nodes are all zero for the first iteration then iterate
+      // to fix point to get the real known bits.
+      if (isa<PHINode>(I)) {
+        KnownBits allZero(getBitWidth(I.getType(), DL));
+        allZero.setAllZero();
+        FuncKnownBits.insert(std::make_pair(&I, allZero));
+        Worklist.insert(&I);
+      } else {
+        calculateAndUpdateKnownBits(I, DL, FuncKnownBits, Worklist);
+      }
+    }
+  }
+  // Run continue to update until we reach a fix point.
+  while (!Worklist.empty()) {
+    Instruction &I = *Worklist.pop_back_val();
+    calculateAndUpdateKnownBits(I, DL, FuncKnownBits, Worklist);
+  }
+
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+        Value *ptr = LI->getPointerOperand();
+        auto it = FuncKnownBits.find(ptr);
+        if (it != FuncKnownBits.end()) {
+          Align KnownAlign = getKnownAlignment(it->second);
+          if (KnownAlign > LI->getAlignment()) {
+            LI->setAlignment(KnownAlign);
+            changed = true;
+          }
+        }
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+        Value *ptr = SI->getPointerOperand();
+        auto it = FuncKnownBits.find(ptr);
+        if (it != FuncKnownBits.end()) {
+          Align KnownAlign = getKnownAlignment(it->second);
+          if (KnownAlign > SI->getAlignment()) {
+            SI->setAlignment(KnownAlign);
+            changed = true;
+          }
+        }
+      }
+    }
+  }
+  return changed;
+}
+
+PreservedAnalyses SetLoadStoreAlignmentPass::run(Function &F,
+                                                 FunctionAnalysisManager &AM) {
+  DenseMap<Value *, KnownBits> FuncKnownBits;
+  SmallVector<Instruction *, 256> Worklist;
+
+  if (!SetLoadStoreAlignment(F))
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA;
+  PA.preserveSet<CFGAnalyses>();
+  return PA;
+}
+
+namespace {
+struct SetAlignmentLegacyPass : public FunctionPass {
+  static char ID;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+  }
+
+  SetAlignmentLegacyPass() : FunctionPass(ID) {
+    initializeSetAlignmentLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (skipFunction(F))
+      return false;
+    return SetLoadStoreAlignment(F);
+  }
+};
+} // namespace
+
+char SetAlignmentLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(SetAlignmentLegacyPass, "set-alignment",
+                      "Set alignment information", false, false)
+INITIALIZE_PASS_END(SetAlignmentLegacyPass, "set-alignment",
+                    "Set alignment information", false, false)
+
+// Public interface to the SetAlignmentPass pass
+FunctionPass *llvm::createSetAlignmentPass() {
+  return new SetAlignmentLegacyPass();
+}
Index: llvm/test/Transforms/SetLoadStoreAlignment/basic.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/SetLoadStoreAlignment/basic.ll
@@ -0,0 +1,126 @@
+; RUN: opt -S --set-alignment < %s | FileCheck %s
+
+declare ptr @getPtr()
+
+; CHECK-LABEL: define float @phi_case
+; CHECK: load float, ptr %{{.*}}, align 32
+define float @phi_case(i64 %0, i1 %1) {
+  %3 = tail call align 32 ptr @getPtr()
+  br i1 %1, label %4, label %7
+
+4:                                                ; preds = %2
+  %5 = shl i64 %0, 8
+  %6 = or i64 %5, 128
+  br label %7
+
+7:                                                ; preds = %4, %2
+  %8 = phi i64 [ %6, %4 ], [ 0, %2 ]
+  %9 = getelementptr inbounds float, ptr %3, i64 %8
+  %10 = load float, ptr %9
+  ret float %10
+}
+
+; CHECK-LABEL: define float @nested_loops
+; CHECK: load float, ptr %{{.*}}, align 8
+define float @nested_loops(i64 %0, i1 %1) {
+  %3 = tail call align 32 ptr @getPtr()
+  %4 = shl i64 %0, 8
+  br i1 %1, label %5, label %18
+
+5:                                                ; preds = %15, %2
+  %6 = phi i32 [ %16, %15 ], [ 0, %2 ]
+  %7 = phi i64 [ %12, %15 ], [ %4, %2 ]
+  br label %8
+
+8:                                                ; preds = %8, %5
+  %9 = phi i32 [ 0, %5 ], [ %13, %8 ]
+  %10 = phi i64 [ %7, %5 ], [ %12, %8 ]
+  %11 = shl i64 %0, 1
+  %12 = add nsw i64 %11, %10
+  %13 = add nuw nsw i32 %9, 1
+  %14 = icmp eq i32 %13, 1000
+  br i1 %14, label %15, label %8
+
+15:                                               ; preds = %8
+  %16 = add nuw nsw i32 %6, 1
+  %17 = icmp eq i32 %16, 1000
+  br i1 %17, label %21, label %5
+
+18:                                               ; preds = %25, %2
+  %19 = phi i32 [ %26, %25 ], [ 0, %2 ]
+  %20 = phi i64 [ %32, %25 ], [ %4, %2 ]
+  br label %28
+
+21:                                               ; preds = %25, %15
+  %22 = phi i64 [ %12, %15 ], [ %32, %25 ]
+  %23 = getelementptr inbounds float, ptr %3, i64 %22
+  %24 = load float, ptr %23
+  ret float %24
+
+25:                                               ; preds = %28
+  %26 = add nuw nsw i32 %19, 1
+  %27 = icmp eq i32 %26, 1000
+  br i1 %27, label %21, label %18
+
+28:                                               ; preds = %28, %18
+  %29 = phi i32 [ 0, %18 ], [ %33, %28 ]
+  %30 = phi i64 [ %20, %18 ], [ %32, %28 ]
+  %31 = shl i64 %0, 16
+  %32 = add nsw i64 %31, %30
+  %33 = add nuw nsw i32 %29, 1
+  %34 = icmp eq i32 %33, 1000
+  br i1 %34, label %25, label %28
+}
+
+; CHECK-LABEL: define float @nested_loops_undef_init
+; CHECK: load float, ptr %{{.*}}, align 4
+define float @nested_loops_undef_init(i64 %0, i1 %1) {
+  %3 = tail call align 32 ptr @getPtr()
+  %4 = shl i64 %0, 8
+  br i1 %1, label %5, label %18
+
+5:                                                ; preds = %15, %2
+  %6 = phi i32 [ %16, %15 ], [ 0, %2 ]
+  %7 = phi i64 [ %12, %15 ], [ undef, %2 ]
+  br label %8
+
+8:                                                ; preds = %8, %5
+  %9 = phi i32 [ 0, %5 ], [ %13, %8 ]
+  %10 = phi i64 [ %7, %5 ], [ %12, %8 ]
+  %11 = shl i64 %0, 1
+  %12 = add nsw i64 %11, %10
+  %13 = add nuw nsw i32 %9, 1
+  %14 = icmp eq i32 %13, 1000
+  br i1 %14, label %15, label %8
+
+15:                                               ; preds = %8
+  %16 = add nuw nsw i32 %6, 1
+  %17 = icmp eq i32 %16, 1000
+  br i1 %17, label %21, label %5
+
+18:                                               ; preds = %25, %2
+  %19 = phi i32 [ %26, %25 ], [ 0, %2 ]
+  %20 = phi i64 [ %32, %25 ], [ %4, %2 ]
+  br label %28
+
+21:                                               ; preds = %25, %15
+  %22 = phi i64 [ %12, %15 ], [ %32, %25 ]
+  %23 = getelementptr inbounds float, ptr %3, i64 %22
+  %24 = load float, ptr %23, align 4
+  ret float %24
+
+25:                                               ; preds = %28
+  %26 = add nuw nsw i32 %19, 1
+  %27 = icmp eq i32 %26, 1000
+  br i1 %27, label %21, label %18
+
+28:                                               ; preds = %28, %18
+  %29 = phi i32 [ 0, %18 ], [ %33, %28 ]
+  %30 = phi i64 [ %20, %18 ], [ %32, %28 ]
+  %31 = shl i64 %0, 16
+  %32 = add nsw i64 %31, %30
+  %33 = add nuw nsw i32 %29, 1
+  %34 = icmp eq i32 %33, 1000
+  br i1 %34, label %25, label %28
+}
+