Index: llvm/lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -46,7 +46,10 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MustExecute.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Argument.h" @@ -620,20 +623,33 @@ return true; }; - // First, iterate the entry block and mark loads of (geps of) arguments as - // safe. + // First, iterate all the basic blocks that must be executed upon entry + // and mark loads of (geps of) arguments as safe. BasicBlock &EntryBlock = Arg->getParent()->front(); + GetterTy LIGetter = [&](const Function &F) { + DominatorTree *DT = new DominatorTree(const_cast(F)); + LoopInfo *LI = new LoopInfo(*DT); + return LI; + }; + GetterTy PDTGetter = [&](const Function &F) { + PostDominatorTree *PDT = new PostDominatorTree(const_cast(F)); + return PDT; + }; + MustBeExecutedContextExplorer Explorer(true, LIGetter, PDTGetter); // Declare this here so we can reuse it IndicesVector Indices; - for (Instruction &I : EntryBlock) - if (LoadInst *LI = dyn_cast(&I)) { - Value *V = LI->getPointerOperand(); - if (GetElementPtrInst *GEP = dyn_cast(V)) { + for ( + auto EIt = Explorer.begin(&EntryBlock.front()), EEnd = Explorer.end(&EntryBlock.front()); + EIt != EEnd; ++EIt + ) + if (const LoadInst *LI = dyn_cast(EIt.getCurrentInst())) { + const Value *V = LI->getPointerOperand(); + if (const GetElementPtrInst *GEP = dyn_cast(V)) { V = GEP->getPointerOperand(); if (V == Arg) { // This load actually loads (part of) Arg? Check the indices then. Indices.reserve(GEP->getNumIndices()); - for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); + for (User::const_op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) if (ConstantInt *CI = dyn_cast(*II)) Indices.push_back(CI->getSExtValue()); Index: llvm/test/Transforms/ArgumentPromotion/control-flow3.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/ArgumentPromotion/control-flow3.ll @@ -0,0 +1,81 @@ +; RUN: opt < %s -argpromotion -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s + +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +; CHECK-LABEL: define internal i32 @callee0(i32 %P.val) +define internal i32 @callee0(i32* %P) { +entry: + br label %bb1 + +bb1: + br label %bb2 + +bb2: + ; CHECK-NOT: load i32, i32* %P + %X = load i32, i32* %P + ret i32 %X +} + +; CHECK-LABEL: define i32 @caller0() { +define i32 @caller0() { + %A = alloca i32 + store i32 17, i32* %A + ; CHECK: %A.val = load i32, i32* %A + ; CHECK: %X = call i32 @callee0(i32 %A.val) + %X = call i32 @callee0(i32* %A) + ret i32 %X +} + +; CHECK-LABEL: define internal i32 @callee1(i1 %C, i32 %P.val) +define internal i32 @callee1(i1 %C, i32* %P) { +entry: + br label %bb1 + +bb1: + br label %bb2 + +bb2: + ; CHECK-NOT: load i32, i32* %P + %X = load i32, i32* %P + br i1 %C, label %bb2, label %exit + +exit: + ret i32 %X +} + +; CHECK-LABEL: define i32 @caller1() { +define i32 @caller1() { + %A = alloca i32 + store i32 17, i32* %A + ; CHECK: %A.val = load i32, i32* %A + ; CHECK: %X = call i32 @callee1(i1 false, i32 %A.val) + %X = call i32 @callee1(i1 false, i32* %A) + ret i32 %X +} + +; CHECK-LABEL: define internal i32 @callee2(i32* %P) { +define internal i32 @callee2(i32* %P) { +entry: + br label %bb1 + +bb1: + %gep0 = getelementptr i32, i32* %P, i64 0 + ; CHECK: %X = load i32, i32* %gep0 + %X = load i32, i32* %gep0 + br label %bb1 + +bb2: + %gep1 = getelementptr i32, i32* %P, i64 1 + ; CHECK: %Y = load i32, i32* %gep1 + %Y = load i32, i32* %gep1 + ret i32 %X +} + +define i32 @caller2() { + %A = alloca i32 + store i32 17, i32* %A + ; CHECK: %X = call i32 @callee2(i32* %A) + %X = call i32 @callee2(i32* %A) + ret i32 %X +}