diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -125,8 +125,11 @@ // original function so that we can tell the alias analysis implementation // what the new GEP/Load instructions we are inserting look like. // We need to keep the original loads for each argument and the elements - // of the argument that are accessed. - std::map, LoadInst *> OriginalLoads; + // of the argument that are accessed. Given map also keep track of inbounds + // information of GEP instruction used to load promoted arguments present in + // callee function. This is used to preserve inbound information in callers. + std::map, std::pair> + OriginalLoads; // Attribute - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter @@ -190,7 +193,11 @@ else // Take any load, we will use it only to update Alias Analysis OrigLoad = cast(UI->user_back()); - OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; + bool IsGepInbounds = false; + if (GEPOperator *GEPOp = dyn_cast(UI)) + IsGepInbounds = GEPOp->isInBounds(); + OriginalLoads[std::make_pair(&*I, Indices)] = + std::make_pair(OrigLoad, IsGepInbounds); } // Add a parameter to the function for each element passed in. @@ -267,8 +274,10 @@ Align StructAlign = *I->getParamAlign(); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - auto *Idx = - IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i)); + // It is safe to create inbounds GEP for byval argument since it is + // dereferenceable. + auto *Idx = IRB.CreateInBoundsGEP(STy, *AI, Idxs, + (*AI)->getName() + "." + Twine(i)); // TODO: Tell AA about the new values? Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(i)); @@ -285,7 +294,7 @@ for (const auto &ArgIndex : ArgIndices) { Value *V = *AI; LoadInst *OrigLoad = - OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; + OriginalLoads[std::make_pair(&*I, ArgIndex.second)].first; if (!ArgIndex.second.empty()) { Ops.reserve(ArgIndex.second.size()); Type *ElTy = V->getType(); @@ -303,7 +312,16 @@ ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, II); } // And create a GEP to extract those indices. - V = IRB.CreateGEP(ArgIndex.first, V, Ops, V->getName() + ".idx"); + bool IsCalleeArgGepInBound = + OriginalLoads[std::make_pair(&*I, ArgIndex.second)].second; + if (IsCalleeArgGepInBound) { + // Using preserved inbounds information of GEP in callee while + // creating GEP in caller. + V = IRB.CreateInBoundsGEP(ArgIndex.first, V, Ops, + V->getName() + ".idx"); + } else { + V = IRB.CreateGEP(ArgIndex.first, V, Ops, V->getName() + ".idx"); + } Ops.clear(); } // Since we're replacing a load make sure we take the alignment diff --git a/llvm/test/Transforms/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/attrs.ll --- a/llvm/test/Transforms/ArgumentPromotion/attrs.ll +++ b/llvm/test/Transforms/ArgumentPromotion/attrs.ll @@ -41,9 +41,9 @@ ; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 -; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 ; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 -; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_1:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 ; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval(i32) align 4 [[X]], i32 zeroext 0) ; CHECK-NEXT: ret i32 0 diff --git a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll --- a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll +++ b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll @@ -41,9 +41,9 @@ ; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 -; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 ; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 8 -; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_1:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 ; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval(i32) align 4 [[X]]) ; CHECK-NEXT: ret i32 0 diff --git a/llvm/test/Transforms/ArgumentPromotion/byval.ll b/llvm/test/Transforms/ArgumentPromotion/byval.ll --- a/llvm/test/Transforms/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/ArgumentPromotion/byval.ll @@ -78,14 +78,14 @@ ; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 -; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 ; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 -; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_1:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 ; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]]) -; CHECK-NEXT: [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_01:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 ; CHECK-NEXT: [[S_01_VAL:%.*]] = load i32, i32* [[S_01]], align 32 -; CHECK-NEXT: [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_12:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: [[S_12_VAL:%.*]] = load i64, i64* [[S_12]], align 4 ; CHECK-NEXT: call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]]) ; CHECK-NEXT: call void @h(%struct.ss* byval(%struct.ss) %S) diff --git a/llvm/test/Transforms/ArgumentPromotion/conditional-gep-and-load.ll b/llvm/test/Transforms/ArgumentPromotion/conditional-gep-and-load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/conditional-gep-and-load.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s + +%struct.ss = type { i32, i64 } + +define internal void @f(%struct.ss* byval(%struct.ss) align 4 %b) nounwind { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq %struct.ss* [[B]], null +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], 1 +; CHECK-NEXT: store i32 [[C]], i32* [[TMP]], align 4 +; CHECK-NEXT: ret void +; CHECK: 3: +; CHECK-NEXT: ret void +; +%1 = icmp eq %struct.ss* %b, null +br i1 %1, label %2, label %3 + +2: + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %a = load i32, i32* %tmp, align 4 + %c = add i32 %a, 1 + store i32 %c, i32* %tmp, align 4 + ret void + +3: + ret void +} + +define i32 @main() nounwind { +; CHECK-LABEL: define {{[^@]+}}@main +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 32 +; CHECK-NEXT: [[P:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: store i32 1, i32* [[P]], align 8 +; CHECK-NEXT: [[Q:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: store i64 2, i64* [[Q]], align 4 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 +; CHECK-NEXT: [[S_1:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %S = alloca %struct.ss, align 32 + %p = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + store i32 1, i32* %p, align 8 + %q = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i64 2, i64* %q, align 4 + call void @f(%struct.ss* byval(%struct.ss) align 4 %S) nounwind + ret i32 0 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/dbg.ll --- a/llvm/test/Transforms/ArgumentPromotion/dbg.ll +++ b/llvm/test/Transforms/ArgumentPromotion/dbg.ll @@ -36,9 +36,9 @@ ; CHECK-NEXT: [[Y_VAL:%.*]] = load i32*, i32** [[Y]], align 8, [[DBG4:!dbg !.*]] ; CHECK-NEXT: [[Y_VAL_VAL:%.*]] = load i32, i32* [[Y_VAL]], align 8, [[DBG4]] ; CHECK-NEXT: call void @test(i32 [[Y_VAL_VAL]]), [[DBG4]] -; CHECK-NEXT: [[P_0:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 0, [[DBG5:!dbg !.*]] +; CHECK-NEXT: [[P_0:%.*]] = getelementptr inbounds [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 0, [[DBG5:!dbg !.*]] ; CHECK-NEXT: [[P_0_VAL:%.*]] = load i32, i32* [[P_0]], align 4, [[DBG5]] -; CHECK-NEXT: [[P_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1, [[DBG5]] +; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1, [[DBG5]] ; CHECK-NEXT: [[P_1_VAL:%.*]] = load i32, i32* [[P_1]], align 4, [[DBG5]] ; CHECK-NEXT: call void @test_byval(i32 [[P_0_VAL]], i32 [[P_1_VAL]]), [[DBG5]] ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/fp80.ll --- a/llvm/test/Transforms/ArgumentPromotion/fp80.ll +++ b/llvm/test/Transforms/ArgumentPromotion/fp80.ll @@ -16,7 +16,7 @@ ; CHECK-LABEL: define {{[^@]+}}@run() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* byval([[UNION_U:%.*]]) align 16 bitcast (%struct.s* @b to %union.u*)) -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr inbounds [[UNION_U]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0 ; CHECK-NEXT: [[DOT0_VAL:%.*]] = load x86_fp80, x86_fp80* [[DOT0]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* byval([[STRUCT_FOO:%.*]]) @a) diff --git a/llvm/test/Transforms/ArgumentPromotion/pr32917.ll b/llvm/test/Transforms/ArgumentPromotion/pr32917.ll --- a/llvm/test/Transforms/ArgumentPromotion/pr32917.ll +++ b/llvm/test/Transforms/ArgumentPromotion/pr32917.ll @@ -10,7 +10,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @b, align 4 ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32* -; CHECK-NEXT: [[DOTIDX:%.*]] = getelementptr i32, i32* [[TMP3]], i64 -1 +; CHECK-NEXT: [[DOTIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 -1 ; CHECK-NEXT: [[DOTIDX_VAL:%.*]] = load i32, i32* [[DOTIDX]], align 4 ; CHECK-NEXT: call fastcc void @fn1(i32 [[DOTIDX_VAL]]) ; CHECK-NEXT: ret i32 undef diff --git a/llvm/test/Transforms/ArgumentPromotion/preserve-inbounds-info-of-gep.ll b/llvm/test/Transforms/ArgumentPromotion/preserve-inbounds-info-of-gep.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/preserve-inbounds-info-of-gep.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s + +%T = type { i32, i32, i32, i32 } +@G = constant %T { i32 0, i32 0, i32 17, i32 25 } + +define internal i32 @f(%T* %p) { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[P_0_2_VAL:%.*]], i32 [[P_0_3_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = add i32 [[P_0_3_VAL]], [[P_0_2_VAL]] +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a.gep = getelementptr inbounds %T, %T* %p, i64 0, i32 3 + %b.gep = getelementptr inbounds %T, %T* %p, i64 0, i32 2 + %a = load i32, i32* %a.gep + %b = load i32, i32* %b.gep + %v = add i32 %a, %b + ret i32 %v +} + +define i32 @caller() { +; CHECK-LABEL: define {{[^@]+}}@caller() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G_IDX:%.*]] = getelementptr inbounds [[T:%.*]], %T* @G, i64 0, i32 2 +; CHECK-NEXT: [[G_IDX_VAL:%.*]] = load i32, i32* [[G_IDX]] +; CHECK-NEXT: [[G_IDX1:%.*]] = getelementptr inbounds [[T]], %T* @G, i64 0, i32 3 +; CHECK-NEXT: [[G_IDX1_VAL:%.*]] = load i32, i32* [[G_IDX1]] +; CHECK-NEXT: [[V:%.*]] = call i32 @f(i32 [[G_IDX_VAL]], i32 [[G_IDX1_VAL]]) +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %v = call i32 @f(%T* @G) + ret i32 %v +} + +define internal i32 @g(%T* %p) { +; CHECK-LABEL: define {{[^@]+}}@g +; CHECK-SAME: (i32 [[P_0_2_VAL:%.*]], i32 [[P_0_3_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = add i32 [[P_0_3_VAL]], [[P_0_2_VAL]] +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a.gep = getelementptr inbounds %T, %T* %p, i64 0, i32 3 + %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 + %a = load i32, i32* %a.gep + %b = load i32, i32* %b.gep + %v = add i32 %a, %b + ret i32 %v +} + +define i32 @caller2() { +; CHECK-LABEL: define {{[^@]+}}@caller2() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G_IDX:%.*]] = getelementptr [[T:%.*]], %T* @G, i64 0, i32 2 +; CHECK-NEXT: [[G_IDX_VAL:%.*]] = load i32, i32* [[G_IDX]] +; CHECK-NEXT: [[G_IDX1:%.*]] = getelementptr inbounds [[T]], %T* @G, i64 0, i32 3 +; CHECK-NEXT: [[G_IDX1_VAL:%.*]] = load i32, i32* [[G_IDX1]] +; CHECK-NEXT: [[V:%.*]] = call i32 @g(i32 [[G_IDX_VAL]], i32 [[G_IDX1_VAL]]) +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %v = call i32 @g(%T* @G) + ret i32 %v +}