diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -2566,7 +2566,6 @@
   /// See AbstractAttribute::initialize(...).
   void initialize(Attributor &A) override {
     AAWillReturn::initialize(A);
-
     Function *F = getAnchorScope();
     if (!F || !A.isFunctionIPOAmendable(*F) || mayContainUnboundedCycle(*F, A))
       indicatePessimisticFixpoint();
@@ -5406,6 +5405,11 @@
     if (!PrivatizableType.getValue())
       return indicatePessimisticFixpoint();
 
+    // The dependence is optional so we don't give up once we give up on the
+    // alignment.
+    A.getAAFor<AAAlign>(*this, IRPosition::value(getAssociatedValue()),
+                        /* TrackDependence */ true, DepClassTy::OPTIONAL);
+
     // Avoid arguments with padding for now.
     if (!getIRPosition().hasAttr(Attribute::ByVal) &&
         !ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(),
@@ -5592,7 +5596,6 @@
   static void createInitialization(Type *PrivType, Value &Base, Function &F,
                                    unsigned ArgNo, Instruction &IP) {
     assert(PrivType && "Expected privatizable type!");
-
     IRBuilder<NoFolder> IRB(&IP);
     const DataLayout &DL = F.getParent()->getDataLayout();
 
@@ -5620,8 +5623,8 @@
 
   /// Extract values from \p Base according to the type \p PrivType at the
   /// call position \p ACS. The values are appended to \p ReplacementValues.
-  void createReplacementValues(Type *PrivType, AbstractCallSite ACS,
-                               Value *Base,
+  void createReplacementValues(Align Alignment, Type *PrivType,
+                               AbstractCallSite ACS, Value *Base,
                                SmallVectorImpl<Value *> &ReplacementValues) {
     assert(Base && "Expected base value!");
     assert(PrivType && "Expected privatizable type!");
@@ -5634,7 +5637,6 @@
       Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(),
                                                  "", ACS.getInstruction());
 
-    // TODO: Improve the alignment of the loads.
     // Traverse the type, build GEPs and loads.
     if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
       const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
@@ -5644,7 +5646,7 @@
             constructPointer(PointeeTy->getPointerTo(), Base,
                              PrivStructLayout->getElementOffset(u), IRB, DL);
         LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
-        L->setAlignment(MaybeAlign(1));
+        L->setAlignment(Alignment);
         ReplacementValues.push_back(L);
       }
     } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
@@ -5655,12 +5657,12 @@
         Value *Ptr =
             constructPointer(PointeePtrTy, Base, u * PointeeTySize, IRB, DL);
         LoadInst *L = new LoadInst(PointeePtrTy, Ptr, "", IP);
-        L->setAlignment(MaybeAlign(1));
+        L->setAlignment(Alignment);
         ReplacementValues.push_back(L);
       }
     } else {
       LoadInst *L = new LoadInst(PrivType, Base, "", IP);
-      L->setAlignment(MaybeAlign(1));
+      L->setAlignment(Alignment);
       ReplacementValues.push_back(L);
     }
   }
@@ -5687,6 +5689,10 @@
 
     Argument *Arg = getAssociatedArgument();
 
+    // Query AAAlign attribute for alignment of associated argument to
+    // determine the best alignment of loads.
+    const auto &AlignAA = A.getAAFor<AAAlign>(*this, IRPosition::value(*Arg));
+
     // Callback to repair the associated function. A new alloca is placed at the
     // beginning and initialized with the values passed through arguments. The
     // new alloca replaces the use of the old pointer argument.
@@ -5709,9 +5715,13 @@
     // of the privatizable type are loaded prior to the call and passed to the
     // new function version.
     Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB =
-        [=](const Attributor::ArgumentReplacementInfo &ARI,
-            AbstractCallSite ACS, SmallVectorImpl<Value *> &NewArgOperands) {
+        [=, &AlignAA](const Attributor::ArgumentReplacementInfo &ARI,
+                      AbstractCallSite ACS,
+                      SmallVectorImpl<Value *> &NewArgOperands) {
+          // When no alignment is specified for the load instruction,
+          // natural alignment is assumed.
           createReplacementValues(
+              assumeAligned(AlignAA.getAssumedAlign()),
               PrivatizableType.getValue(), ACS,
               ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()),
               NewArgOperands);
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -21,7 +21,7 @@
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @deref(i32 [[TMP0]])
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
@@ -67,7 +67,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 32
 ; CHECK-NEXT:    call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -30,7 +30,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -72,7 +72,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -114,7 +114,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -156,7 +156,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -276,7 +276,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -318,7 +318,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
@@ -1,16 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
-; RUN: opt -S -passes='attributor' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s
+; RUN: opt -S -passes='attributor' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=7 < %s | FileCheck %s
 
+; Test1
 define void @f() {
 ; CHECK-LABEL: define {{[^@]+}}@f()
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 1
+; CHECK-NEXT:    [[A:%.*]] = alloca i32
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @g(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %a = alloca i32, align 1
+  %a = alloca i32
   call void @g(i32* %a)
   ret void
 }
@@ -20,13 +21,72 @@
 ; CHECK-SAME: (i32 [[TMP0:%.*]])
 ; CHECK-NEXT:    [[A_PRIV:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 [[TMP0]], i32* [[A_PRIV]]
-; CHECK-NEXT:    [[AA:%.*]] = load i32, i32* [[A_PRIV]], align 1
+; CHECK-NEXT:    [[AA:%.*]] = load i32, i32* [[A_PRIV]], align 4
 ; CHECK-NEXT:    call void @z(i32 [[AA]])
 ; CHECK-NEXT:    ret void
 ;
-  %aa = load i32, i32* %a, align 1
+  %aa = load i32, i32* %a
   call void @z(i32 %aa)
   ret void
 }
 
 declare void @z(i32)
+
+; Test2
+define internal i32 @test(i32* %X, i64* %Y) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
+; CHECK-NEXT:    [[Y_PRIV:%.*]] = alloca i64
+; CHECK-NEXT:    store i64 [[TMP1]], i64* [[Y_PRIV]]
+; CHECK-NEXT:    [[X_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[X_PRIV]]
+; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[X_PRIV]], align 4
+; CHECK-NEXT:    [[B:%.*]] = load i64, i64* [[Y_PRIV]], align 8
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], 1
+; CHECK-NEXT:    [[D:%.*]] = add i64 [[B]], 1
+;
+  %A = load i32, i32* %X
+  %B = load i64, i64* %Y
+  %C = add i32 %A, 1
+  %D = add i64 %B, 1
+  %cond = icmp sgt i64 %D, -1
+  br i1 %cond, label %Return1, label %Return2
+Return1:  
+  ret i32 %C
+Return2:
+  ret i32 %A
+}
+
+define internal i32 @caller(i32* %A) {
+; CHECK-LABEL: define {{[^@]+}}@caller
+; CHECK-SAME: (i32 [[TMP0:%.*]])
+; CHECK-NEXT:    [[A_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[A_PRIV]]
+; CHECK-NEXT:    [[B:%.*]] = alloca i64
+; CHECK-NEXT:    store i64 1, i64* [[B]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A_PRIV]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[B]], align 8
+; CHECK-NEXT:    [[C:%.*]] = call i32 @test(i32 [[TMP2]], i64 [[TMP3]])
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = alloca i64
+  store i64 1, i64* %B
+  %C = call i32 @test(i32* %A, i64* %B)
+  ret i32 %C
+}
+
+define i32 @callercaller() {
+; CHECK-LABEL: define {{[^@]+}}@callercaller()
+; CHECK-NEXT:    [[B:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 2, i32* [[B]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT:    [[X:%.*]] = call i32 @caller(i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %B = alloca i32
+  store i32 2, i32* %B
+  %X = call i32 @caller(i32* %B)
+  ret i32 %X
+}
+
+
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
@@ -48,10 +48,10 @@
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
 ; CHECK-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 8
 ; CHECK-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X]], align 4
 ; CHECK-NEXT:    [[C:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]], i32 zeroext 0)
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
@@ -27,8 +27,8 @@
 ; CHECK-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV]]
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 1, i32* [[A]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 1
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[B_PRIV]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[B_PRIV]], align 4
 ; CHECK-NEXT:    [[C:%.*]] = call i32 @test(i32 [[TMP2]], i32 [[TMP3]])
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -42,7 +42,7 @@
 ; CHECK-LABEL: define {{[^@]+}}@callercaller()
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 2, i32* [[B]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
 ; CHECK-NEXT:    [[X:%.*]] = call i32 @caller(i32 [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
@@ -62,14 +62,14 @@
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
 ; CHECK-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 8
 ; CHECK-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8
 ; CHECK-NEXT:    [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]])
 ; CHECK-NEXT:    [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 32
 ; CHECK-NEXT:    [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 32
 ; CHECK-NEXT:    [[C1:%.*]] = call i32 @g(i32 [[TMP2]], i64 [[TMP3]])
 ; CHECK-NEXT:    [[A:%.*]] = add i32 [[C0]], [[C1]]
 ; CHECK-NEXT:    ret i32 [[A]]
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
@@ -29,7 +29,7 @@
 ; CHECK-LABEL: define {{[^@]+}}@foo()
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 17, i32* [[A]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32 [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
@@ -61,9 +61,9 @@
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    store i32 1, i32* [[F0]], align 4
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    store i32 2, i32* [[F1]], align 4
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; GLOBALOPT_ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 4
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; GLOBALOPT_ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = load i32, i32* [[S_0_1]], align 1
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = load i32, i32* [[S_0_1]], align 4
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    [[R:%.*]] = call fastcc i32 @f(i32 [[TMP0]], i32 [[TMP1]])
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    ret i32 [[R]]
 ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: define {{[^@]+}}@caller()
 ; CHECK-NEXT:    [[X:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 42, i32* [[X]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
 ; CHECK-NEXT:    call void @promote_i32_ptr(i32 [[TMP1]]), !prof !0
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
--- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
@@ -47,9 +47,9 @@
 ; CHECK-LABEL: define {{[^@]+}}@unions()
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8*
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8
 ; CHECK-NEXT:    [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8
 ; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @vfu2(i8 [[TMP0]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[RESULT]]
 ;
@@ -94,9 +94,9 @@
 ; CHECK-LABEL: define {{[^@]+}}@unions_v2()
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8*
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8
 ; CHECK-NEXT:    [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8
 ; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP0]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[RESULT]]
 ;