diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -5053,6 +5053,11 @@ if (!PrivatizableType.getValue()) return indicatePessimisticFixpoint(); + // The dependence is optional so we don't give up once we give up on the + // alignment. + A.getAAFor(*this, IRPosition::value(getAssociatedValue()), + /* TrackDependence */ true, DepClassTy::OPTIONAL); + // Avoid arguments with padding for now. if (!getIRPosition().hasAttr(Attribute::ByVal) && !ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(), @@ -5267,8 +5272,8 @@ /// Extract values from \p Base according to the type \p PrivType at the /// call position \p ACS. The values are appended to \p ReplacementValues. - void createReplacementValues(Type *PrivType, AbstractCallSite ACS, - Value *Base, + void createReplacementValues(Align Alignment, Type *PrivType, + AbstractCallSite ACS, Value *Base, SmallVectorImpl &ReplacementValues) { assert(Base && "Expected base value!"); assert(PrivType && "Expected privatizable type!"); @@ -5281,7 +5286,6 @@ Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(), "", ACS.getInstruction()); - // TODO: Improve the alignment of the loads. // Traverse the type, build GEPs and loads. if (auto *PrivStructType = dyn_cast(PrivType)) { const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType); @@ -5291,7 +5295,7 @@ constructPointer(PointeeTy->getPointerTo(), Base, PrivStructLayout->getElementOffset(u), IRB, DL); LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP); - L->setAlignment(Align(1)); + L->setAlignment(Alignment); ReplacementValues.push_back(L); } } else if (auto *PrivArrayType = dyn_cast(PrivType)) { @@ -5302,12 +5306,12 @@ Value *Ptr = constructPointer(PointeePtrTy, Base, u * PointeeTySize, IRB, DL); LoadInst *L = new LoadInst(PointeePtrTy, Ptr, "", IP); - L->setAlignment(Align(1)); + L->setAlignment(Alignment); ReplacementValues.push_back(L); } } else { LoadInst *L = new LoadInst(PrivType, Base, "", IP); - L->setAlignment(Align(1)); + L->setAlignment(Alignment); ReplacementValues.push_back(L); } } @@ -5333,6 +5337,9 @@ return ChangeStatus::UNCHANGED; Argument *Arg = getAssociatedArgument(); + // Query AAAlign attribute for alignment of associated argument to + // determine the best alignment of loads. + const auto &AlignAA = A.getAAFor(*this, IRPosition::value(*Arg)); // Callback to repair the associated function. A new alloca is placed at the // beginning and initialized with the values passed through arguments. The @@ -5356,9 +5363,13 @@ // of the privatizable type are loaded prior to the call and passed to the // new function version. Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB = - [=](const Attributor::ArgumentReplacementInfo &ARI, - AbstractCallSite ACS, SmallVectorImpl &NewArgOperands) { + [=, &AlignAA](const Attributor::ArgumentReplacementInfo &ARI, + AbstractCallSite ACS, + SmallVectorImpl &NewArgOperands) { + // When no alignment is specified for the load instruction, + // natural alignment is assumed. createReplacementValues( + assumeAligned(AlignAA.getAssumedAlign()), PrivatizableType.getValue(), ACS, ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()), NewArgOperands); diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll @@ -44,7 +44,7 @@ ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[X_ADDR:%.*]] = alloca i32 ; IS__TUNIT_NPM-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 4 ; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = call i32 @deref(i32 [[TMP0]]) ; IS__TUNIT_NPM-NEXT: ret i32 [[TMP1]] ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll @@ -132,7 +132,7 @@ ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* ; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 32 ; IS__TUNIT_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -54,7 +54,7 @@ ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -141,7 +141,7 @@ ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -228,7 +228,7 @@ ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -315,7 +315,7 @@ ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -570,7 +570,7 @@ ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -657,7 +657,7 @@ ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -51,3 +51,113 @@ } declare void @z(i32) + +; Test2 +; Different alignemnt privatizable arguments +define internal i32 @test(i32* %X, i64* %Y) { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i64* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[Y:%.*]]) +; IS__TUNIT_OPM-NEXT: [[A:%.*]] = load i32, i32* [[X]], align 4 +; IS__TUNIT_OPM-NEXT: [[B:%.*]] = load i64, i64* [[Y]], align 8 +; IS__TUNIT_OPM-NEXT: [[C:%.*]] = add i32 [[A]], 1 +; IS__TUNIT_OPM-NEXT: [[D:%.*]] = add i64 [[B]], 1 +; IS__TUNIT_OPM-NEXT: [[COND:%.*]] = icmp sgt i64 [[D]], -1 +; IS__TUNIT_OPM-NEXT: br i1 [[COND]], label [[RETURN1:%.*]], label [[RETURN2:%.*]] +; IS__TUNIT_OPM: Return1: +; IS__TUNIT_OPM-NEXT: ret i32 [[C]] +; IS__TUNIT_OPM: Return2: +; IS__TUNIT_OPM-NEXT: ret i32 [[A]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test +; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) +; IS__TUNIT_NPM-NEXT: [[Y_PRIV:%.*]] = alloca i64 +; IS__TUNIT_NPM-NEXT: store i64 [[TMP1]], i64* [[Y_PRIV]] +; IS__TUNIT_NPM-NEXT: [[X_PRIV:%.*]] = alloca i32 +; IS__TUNIT_NPM-NEXT: store i32 [[TMP0]], i32* [[X_PRIV]] +; IS__TUNIT_NPM-NEXT: [[A:%.*]] = load i32, i32* [[X_PRIV]], align 4 +; IS__TUNIT_NPM-NEXT: [[B:%.*]] = load i64, i64* [[Y_PRIV]], align 8 +; IS__TUNIT_NPM-NEXT: [[C:%.*]] = add i32 [[A]], 1 +; IS__TUNIT_NPM-NEXT: [[D:%.*]] = add i64 [[B]], 1 +; IS__TUNIT_NPM-NEXT: [[COND:%.*]] = icmp sgt i64 [[D]], -1 +; IS__TUNIT_NPM-NEXT: br i1 [[COND]], label [[RETURN1:%.*]], label [[RETURN2:%.*]] +; IS__TUNIT_NPM: Return1: +; IS__TUNIT_NPM-NEXT: ret i32 [[C]] +; IS__TUNIT_NPM: Return2: +; IS__TUNIT_NPM-NEXT: ret i32 [[A]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test +; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i64* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[Y:%.*]]) +; IS__CGSCC____-NEXT: [[A:%.*]] = load i32, i32* [[X]], align 4 +; IS__CGSCC____-NEXT: [[B:%.*]] = load i64, i64* [[Y]], align 8 +; IS__CGSCC____-NEXT: [[C:%.*]] = add i32 [[A]], 1 +; IS__CGSCC____-NEXT: [[D:%.*]] = add i64 [[B]], 1 +; IS__CGSCC____-NEXT: [[COND:%.*]] = icmp sgt i64 [[D]], -1 +; IS__CGSCC____-NEXT: br i1 [[COND]], label [[RETURN1:%.*]], label [[RETURN2:%.*]] +; IS__CGSCC____: Return1: +; IS__CGSCC____-NEXT: ret i32 [[C]] +; IS__CGSCC____: Return2: +; IS__CGSCC____-NEXT: ret i32 [[A]] +; + %A = load i32, i32* %X + %B = load i64, i64* %Y + %C = add i32 %A, 1 + %D = add i64 %B, 1 + %cond = icmp sgt i64 %D, -1 + br i1 %cond, label %Return1, label %Return2 +Return1: + ret i32 %C +Return2: + ret i32 %A +} + +define internal i32 @caller(i32* %A) { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@caller +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) +; IS__TUNIT_OPM-NEXT: [[B:%.*]] = alloca i64 +; IS__TUNIT_OPM-NEXT: store i64 1, i64* [[B]], align 8 +; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i64* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[B]]) +; IS__TUNIT_OPM-NEXT: ret i32 [[C]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@caller +; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]]) +; IS__TUNIT_NPM-NEXT: [[A_PRIV:%.*]] = alloca i32 +; IS__TUNIT_NPM-NEXT: store i32 [[TMP0]], i32* [[A_PRIV]] +; IS__TUNIT_NPM-NEXT: [[B:%.*]] = alloca i64 +; IS__TUNIT_NPM-NEXT: store i64 1, i64* [[B]], align 8 +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_PRIV]], align 4 +; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i64, i64* [[B]], align 8 +; IS__TUNIT_NPM-NEXT: [[C:%.*]] = call i32 @test(i32 [[TMP2]], i64 [[TMP3]]) +; IS__TUNIT_NPM-NEXT: ret i32 [[C]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@caller +; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) +; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i64 +; IS__CGSCC____-NEXT: store i64 1, i64* [[B]], align 8 +; IS__CGSCC____-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i64* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[B]]) +; IS__CGSCC____-NEXT: ret i32 [[C]] +; + %B = alloca i64 + store i64 1, i64* %B + %C = call i32 @test(i32* %A, i64* %B) + ret i32 %C +} + +define i32 @callercaller() { +; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callercaller() +; NOT_TUNIT_NPM-NEXT: [[B:%.*]] = alloca i32 +; NOT_TUNIT_NPM-NEXT: store i32 2, i32* [[B]], align 4 +; NOT_TUNIT_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; NOT_TUNIT_NPM-NEXT: ret i32 [[X]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callercaller() +; IS__TUNIT_NPM-NEXT: [[B:%.*]] = alloca i32 +; IS__TUNIT_NPM-NEXT: store i32 2, i32* [[B]], align 4 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; IS__TUNIT_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32 [[TMP1]]) +; IS__TUNIT_NPM-NEXT: ret i32 [[X]] +; + %B = alloca i32 + store i32 2, i32* %B + %X = call i32 @caller(i32* %B) + ret i32 %X +} diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll @@ -107,10 +107,10 @@ ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__TUNIT_NPM-NEXT: store i64 2, i64* [[TMP4]], align 4 ; IS__TUNIT_NPM-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 8 ; IS__TUNIT_NPM-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 1 -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8 +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]], align 4 ; IS__TUNIT_NPM-NEXT: [[C:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]], i32 zeroext 0) ; IS__TUNIT_NPM-NEXT: ret i32 [[C]] ; @@ -126,7 +126,7 @@ ; IS__CGSCC_OPM-NEXT: ret i32 [[C]] ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test -; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull readonly dereferenceable(4) [[X:%.*]]) +; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) ; IS__CGSCC_NPM-NEXT: entry: ; IS__CGSCC_NPM-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]] ; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 @@ -136,8 +136,8 @@ ; IS__CGSCC_NPM-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 8 ; IS__CGSCC_NPM-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 1 -; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]], align 1 +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8 +; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]], align 4 ; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]]) ; IS__CGSCC_NPM-NEXT: ret i32 [[C]] ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll @@ -51,8 +51,8 @@ ; IS__TUNIT_NPM-NEXT: store i32 [[TMP0]], i32* [[B_PRIV]] ; IS__TUNIT_NPM-NEXT: [[A:%.*]] = alloca i32 ; IS__TUNIT_NPM-NEXT: store i32 1, i32* [[A]], align 4 -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 1 -; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_PRIV]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_PRIV]], align 4 ; IS__TUNIT_NPM-NEXT: [[C:%.*]] = call i32 @test(i32 [[TMP2]], i32 [[TMP3]]) ; IS__TUNIT_NPM-NEXT: ret i32 [[C]] ; @@ -79,7 +79,7 @@ ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callercaller() ; IS__TUNIT_NPM-NEXT: [[B:%.*]] = alloca i32 ; IS__TUNIT_NPM-NEXT: store i32 2, i32* [[B]], align 4 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 ; IS__TUNIT_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32 [[TMP1]]) ; IS__TUNIT_NPM-NEXT: ret i32 [[X]] ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll @@ -67,7 +67,7 @@ ; IS__CGSCC_OPM-NEXT: ret i32 0 ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test -; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull readonly dereferenceable(4) [[X:%.*]]) +; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) ; IS__CGSCC_NPM-NEXT: entry: ; IS__CGSCC_NPM-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]] ; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll @@ -123,14 +123,14 @@ ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__TUNIT_NPM-NEXT: store i64 2, i64* [[TMP4]], align 4 ; IS__TUNIT_NPM-NEXT: [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32* -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST1]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST1]], align 8 ; IS__TUNIT_NPM-NEXT: [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_12]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_12]], align 8 ; IS__TUNIT_NPM-NEXT: [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) ; IS__TUNIT_NPM-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST]], align 32 ; IS__TUNIT_NPM-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_1]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_1]], align 32 ; IS__TUNIT_NPM-NEXT: [[C1:%.*]] = call i32 @g(i32 [[TMP2]], i64 [[TMP3]]) ; IS__TUNIT_NPM-NEXT: [[A:%.*]] = add i32 [[C0]], [[C1]] ; IS__TUNIT_NPM-NEXT: ret i32 [[A]] @@ -151,18 +151,18 @@ ; IS__CGSCC_NPM-NEXT: entry: ; IS__CGSCC_NPM-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]] ; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; IS__CGSCC_NPM-NEXT: store i32 1, i32* [[TMP1]], align 8 +; IS__CGSCC_NPM-NEXT: store i32 1, i32* [[TMP1]], align 32 ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__CGSCC_NPM-NEXT: store i64 2, i64* [[TMP4]], align 4 ; IS__CGSCC_NPM-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* -; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 8 +; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 32 ; IS__CGSCC_NPM-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 1 +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8 ; IS__CGSCC_NPM-NEXT: [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) ; IS__CGSCC_NPM-NEXT: [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32* -; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 8 +; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 32 ; IS__CGSCC_NPM-NEXT: [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 1 +; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 32 ; IS__CGSCC_NPM-NEXT: [[C1:%.*]] = call i32 @g(i32 [[TMP2]], i64 [[TMP3]]) ; IS__CGSCC_NPM-NEXT: [[A:%.*]] = add i32 [[C0]], [[C1]] ; IS__CGSCC_NPM-NEXT: ret i32 [[A]] diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll @@ -56,7 +56,7 @@ ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@foo() ; IS__TUNIT_NPM-NEXT: [[A:%.*]] = alloca i32 ; IS__TUNIT_NPM-NEXT: store i32 17, i32* [[A]], align 4 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 ; IS__TUNIT_NPM-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32 [[TMP1]]) ; IS__TUNIT_NPM-NEXT: ret i32 [[X]] ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll @@ -17,7 +17,7 @@ ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@caller() ; IS__TUNIT_NPM-NEXT: [[X:%.*]] = alloca i32 ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[X]], align 4 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 ; IS__TUNIT_NPM-NEXT: call void @promote_i32_ptr(i32 [[TMP1]]), !prof !0 ; IS__TUNIT_NPM-NEXT: ret void ; diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll @@ -104,9 +104,9 @@ ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@unions() ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8* -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8 ; IS__TUNIT_NPM-NEXT: [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8 ; IS__TUNIT_NPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2(i8 [[TMP0]], i32 [[TMP1]]) ; IS__TUNIT_NPM-NEXT: ret i32 [[RESULT]] ; @@ -192,9 +192,9 @@ ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@unions_v2() ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8* -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8 ; IS__TUNIT_NPM-NEXT: [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8 ; IS__TUNIT_NPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP0]], i32 [[TMP1]]) ; IS__TUNIT_NPM-NEXT: ret i32 [[RESULT]] ; @@ -208,7 +208,7 @@ ; IS__CGSCC_NPM-NEXT: [[MYSTR_CAST1:%.*]] = bitcast %struct.MYstr* @mystr to i8* ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8 ; IS__CGSCC_NPM-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1 -; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_12]], align 1 +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8 ; IS__CGSCC_NPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP0]], i32 [[TMP1]]) ; IS__CGSCC_NPM-NEXT: ret i32 [[RESULT]] ; diff --git a/llvm/test/Transforms/Attributor/internal-noalias.ll b/llvm/test/Transforms/Attributor/internal-noalias.ll --- a/llvm/test/Transforms/Attributor/internal-noalias.ll +++ b/llvm/test/Transforms/Attributor/internal-noalias.ll @@ -155,8 +155,8 @@ ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@visible_local_2() ; IS__TUNIT_NPM-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__TUNIT_NPM-NEXT: store i32 5, i32* [[B]], align 4 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 1 -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[B]], align 1 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[B]], align 4 ; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32 [[TMP1]], i32 [[TMP2]]) ; IS__TUNIT_NPM-NEXT: ret i32 [[CALL]] ; diff --git a/llvm/test/Transforms/Attributor/misc_crash.ll b/llvm/test/Transforms/Attributor/misc_crash.ll --- a/llvm/test/Transforms/Attributor/misc_crash.ll +++ b/llvm/test/Transforms/Attributor/misc_crash.ll @@ -77,7 +77,7 @@ } define void @func4() { -; CHECK-LABEL: define {{[^@]+}}@func4 +; CHECK-LABEL: define {{[^@]+}}@func4() ; CHECK-NEXT: call void @func5() ; CHECK-NEXT: ret void ; @@ -86,13 +86,13 @@ } define internal void @func5(i32 %0) { -; CHECK-LABEL: define {{[^@]+}}@func5 -; CHECK-NEXT: [[TMP1:%.*]] = alloca i8* -; CHECK-NEXT: br label %block +; CHECK-LABEL: define {{[^@]+}}@func5() +; CHECK-NEXT: [[TMP:%.*]] = alloca i8* +; CHECK-NEXT: br label [[BLOCK:%.*]] ; CHECK: block: -; CHECK-NEXT: store i8* blockaddress(@func5, %block), i8** [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]] -; CHECK-NEXT: call void @func6(i8* [[TMP2]]) +; CHECK-NEXT: store i8* blockaddress(@func5, [[BLOCK]]), i8** [[TMP]], align 8 +; CHECK-NEXT: [[ADDR:%.*]] = load i8*, i8** [[TMP]], align 8 +; CHECK-NEXT: call void @func6(i8* [[ADDR]]) ; CHECK-NEXT: ret void ; %tmp = alloca i8*