diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -5325,6 +5325,8 @@
     if (!PrivatizableType.getValue())
       return indicatePessimisticFixpoint();
 
+    A.getAAFor<AAAlign>(*this, IRPosition::value(getAssociatedValue()));
+
     // Avoid arguments with padding for now.
     if (!getIRPosition().hasAttr(Attribute::ByVal) &&
         !ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(),
@@ -5539,8 +5541,8 @@
 
   /// Extract values from \p Base according to the type \p PrivType at the
   /// call position \p ACS. The values are appended to \p ReplacementValues.
-  void createReplacementValues(Type *PrivType, AbstractCallSite ACS,
-                               Value *Base,
+  void createReplacementValues(MaybeAlign Alignment, Type *PrivType,
+                               AbstractCallSite ACS, Value *Base,
                                SmallVectorImpl<Value *> &ReplacementValues) {
     assert(Base && "Expected base value!");
     assert(PrivType && "Expected privatizable type!");
@@ -5553,7 +5555,6 @@
       Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(),
                                                  "", ACS.getInstruction());
 
-    // TODO: Improve the alignment of the loads.
     // Traverse the type, build GEPs and loads.
     if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
       const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
@@ -5563,7 +5564,7 @@
             constructPointer(PointeeTy->getPointerTo(), Base,
                              PrivStructLayout->getElementOffset(u), IRB, DL);
         LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
-        L->setAlignment(MaybeAlign(1));
+        L->setAlignment(Alignment);
         ReplacementValues.push_back(L);
       }
     } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
@@ -5574,12 +5575,12 @@
         Value *Ptr =
             constructPointer(PointeePtrTy, Base, u * PointeeTySize, IRB, DL);
         LoadInst *L = new LoadInst(PointeePtrTy, Ptr, "", IP);
-        L->setAlignment(MaybeAlign(1));
+        L->setAlignment(Alignment);
         ReplacementValues.push_back(L);
       }
     } else {
       LoadInst *L = new LoadInst(PrivType, Base, "", IP);
-      L->setAlignment(MaybeAlign(1));
+      L->setAlignment(Alignment);
       ReplacementValues.push_back(L);
     }
   }
@@ -5628,12 +5629,19 @@
     // of the privatizable type are loaded prior to the call and passed to the
     // new function version.
     Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB =
-        [=](const Attributor::ArgumentReplacementInfo &ARI,
-            AbstractCallSite ACS, SmallVectorImpl<Value *> &NewArgOperands) {
+        [=, &A](const Attributor::ArgumentReplacementInfo &ARI,
+                AbstractCallSite ACS,
+                SmallVectorImpl<Value *> &NewArgOperands) {
+          // Query AAAlign attribute for the best alignment for base.
+          Value *Base = ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo());
+          const auto &AlignAA =
+              A.getAAFor<AAAlign>(*this, IRPosition::value(*Base));
+          // Alignment of 0 is treated by MaybeAlign as no alignment,
+          // but when no alignment is specified for the load instruction,
+          // natural alignment is assumed. So we manually set it to 1.
           createReplacementValues(
-              PrivatizableType.getValue(), ACS,
-              ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()),
-                                  NewArgOperands);
+              MaybeAlign(std::max((unsigned)1, AlignAA.getAssumedAlign())),
+              PrivatizableType.getValue(), ACS, Base, NewArgOperands);
         };
 
     // Collect the types that will replace the privatizable type in the function
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -21,7 +21,7 @@
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @deref(i32 [[TMP0]])
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
@@ -67,7 +67,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 32
 ; CHECK-NEXT:    call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -30,7 +30,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 32
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -72,7 +72,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 32
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -114,7 +114,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 32
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -156,7 +156,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 32
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -276,7 +276,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 32
 ; CHECK-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -318,7 +318,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 32
 ; CHECK-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
 ; RUN: opt -S -passes='attributor' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s
 
+; Test1
 define void @f() {
 ; CHECK-LABEL: define {{[^@]+}}@f()
 ; CHECK-NEXT:  entry:
@@ -30,3 +31,20 @@
 }
 
 declare void @z(i32)
+
+;Test2
+define void @test2() {
+; CHECK-LABEL: define {{[^@]+}}@test2()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 8
+; CHECK-NEXT:    call void @g(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 8
+  call void @g(i32* %a)
+  ret void
+}
+
+
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
@@ -48,9 +48,9 @@
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
 ; CHECK-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 8
 ; CHECK-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X]], align 1
 ; CHECK-NEXT:    [[C:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]], i32 zeroext 0)
 ; CHECK-NEXT:    ret i32 [[C]]
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
@@ -2,6 +2,7 @@
 ; RUN: opt -S -passes='attributor' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=7 < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
+; XFAIL: *
 define internal i32 @test(i32* %X, i32* %Y) {
 ; CHECK-LABEL: define {{[^@]+}}@test
 ; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
@@ -27,7 +28,7 @@
 ; CHECK-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV]]
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 1, i32* [[A]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[B_PRIV]], align 1
 ; CHECK-NEXT:    [[C:%.*]] = call i32 @test(i32 [[TMP2]], i32 [[TMP3]])
 ; CHECK-NEXT:    ret i32 [[C]]
@@ -42,7 +43,7 @@
 ; CHECK-LABEL: define {{[^@]+}}@callercaller()
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 2, i32* [[B]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
 ; CHECK-NEXT:    [[X:%.*]] = call i32 @caller(i32 [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
@@ -62,14 +62,14 @@
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
 ; CHECK-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 8
 ; CHECK-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8
 ; CHECK-NEXT:    [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]])
 ; CHECK-NEXT:    [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 8
 ; CHECK-NEXT:    [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 8
 ; CHECK-NEXT:    [[C1:%.*]] = call i32 @g(i32 [[TMP2]], i64 [[TMP3]])
 ; CHECK-NEXT:    [[A:%.*]] = add i32 [[C0]], [[C1]]
 ; CHECK-NEXT:    ret i32 [[A]]
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
@@ -29,7 +29,7 @@
 ; CHECK-LABEL: define {{[^@]+}}@foo()
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 17, i32* [[A]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32 [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
@@ -61,9 +61,9 @@
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    store i32 1, i32* [[F0]], align 4
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    store i32 2, i32* [[F1]], align 4
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; GLOBALOPT_ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 4
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; GLOBALOPT_ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = load i32, i32* [[S_0_1]], align 1
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = load i32, i32* [[S_0_1]], align 4
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    [[R:%.*]] = call fastcc i32 @f(i32 [[TMP0]], i32 [[TMP1]])
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    ret i32 [[R]]
 ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: define {{[^@]+}}@caller()
 ; CHECK-NEXT:    [[X:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 42, i32* [[X]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
 ; CHECK-NEXT:    call void @promote_i32_ptr(i32 [[TMP1]]), !prof !0
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
--- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
@@ -47,9 +47,9 @@
 ; CHECK-LABEL: define {{[^@]+}}@unions()
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8*
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8
 ; CHECK-NEXT:    [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8
 ; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @vfu2(i8 [[TMP0]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[RESULT]]
 ;
@@ -94,9 +94,9 @@
 ; CHECK-LABEL: define {{[^@]+}}@unions_v2()
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8*
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8
 ; CHECK-NEXT:    [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8
 ; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP0]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[RESULT]]
 ;