Index: llvm/lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -47,6 +47,7 @@ #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Argument.h" @@ -569,7 +570,7 @@ /// elements of the aggregate in order to avoid exploding the number of /// arguments passed in. static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR, - unsigned MaxElements) { + const PostDominatorTree *PDT, unsigned MaxElements) { using GEPIndicesSet = std::set; // Quick exit for unused arguments @@ -620,45 +621,52 @@ return true; }; - // First, iterate the entry block and mark loads of (geps of) arguments as - // safe. - BasicBlock &EntryBlock = Arg->getParent()->front(); + // First, iterate the post-dominators of the entry block and mark loads of + // (geps of) arguments as safe. + Function* F = Arg->getParent(); + BasicBlock &EntryBlock = F->front(); + Instruction* EntryBlockFront = &EntryBlock.front(); + std::vector EntryPostDoms; + for (BasicBlock &BB: *F) + if (PDT->dominates(&BB.front(), EntryBlockFront)) + EntryPostDoms.push_back(&BB); // Declare this here so we can reuse it IndicesVector Indices; - for (Instruction &I : EntryBlock) - if (LoadInst *LI = dyn_cast(&I)) { - Value *V = LI->getPointerOperand(); - if (GetElementPtrInst *GEP = dyn_cast(V)) { - V = GEP->getPointerOperand(); - if (V == Arg) { - // This load actually loads (part of) Arg? Check the indices then. - Indices.reserve(GEP->getNumIndices()); - for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); - II != IE; ++II) - if (ConstantInt *CI = dyn_cast(*II)) - Indices.push_back(CI->getSExtValue()); - else - // We found a non-constant GEP index for this argument? Bail out - // right away, can't promote this argument at all. + for (BasicBlock *PostDomBB : EntryPostDoms) + for (Instruction &I : *PostDomBB) + if (LoadInst *LI = dyn_cast(&I)) { + Value *V = LI->getPointerOperand(); + if (GetElementPtrInst *GEP = dyn_cast(V)) { + V = GEP->getPointerOperand(); + if (V == Arg) { + // This load actually loads (part of) Arg? Check the indices then. + Indices.reserve(GEP->getNumIndices()); + for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); + II != IE; ++II) + if (ConstantInt *CI = dyn_cast(*II)) + Indices.push_back(CI->getSExtValue()); + else + // We found a non-constant GEP index for this argument? Bail out + // right away, can't promote this argument at all. + return false; + + if (!UpdateBaseTy(GEP->getSourceElementType())) return false; - if (!UpdateBaseTy(GEP->getSourceElementType())) + // Indices checked out, mark them as safe + markIndicesSafe(Indices, SafeToUnconditionallyLoad); + Indices.clear(); + } + } else if (V == Arg) { + // Direct loads are equivalent to a GEP with a single 0 index. + markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); + + if (BaseTy && LI->getType() != BaseTy) return false; - // Indices checked out, mark them as safe - markIndicesSafe(Indices, SafeToUnconditionallyLoad); - Indices.clear(); + BaseTy = LI->getType(); } - } else if (V == Arg) { - // Direct loads are equivalent to a GEP with a single 0 index. - markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); - - if (BaseTy && LI->getType() != BaseTy) - return false; - - BaseTy = LI->getType(); } - } // Now, iterate all uses of the argument to see if there are any uses that are // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. @@ -685,7 +693,7 @@ // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? - return isSafeToPromoteArgument(Arg, ByValTy, AAR, MaxElements); + return isSafeToPromoteArgument(Arg, ByValTy, AAR, PDT, MaxElements); } if (!UpdateBaseTy(GEP->getSourceElementType())) @@ -868,7 +876,8 @@ unsigned MaxElements, Optional> ReplaceCallSite, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + const PostDominatorTree* PDT) { // Don't perform argument promotion for naked functions; otherwise we can end // up removing parameters that are seemingly 'not used' as they are referred // to in the assembly. @@ -1003,7 +1012,7 @@ // Otherwise, see if we can promote the pointer to its value. Type *ByValTy = PtrArg->hasByValAttr() ? PtrArg->getParamByValType() : nullptr; - if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, MaxElements)) + if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, PDT, MaxElements)) ArgsToPromote.insert(PtrArg); } @@ -1041,8 +1050,9 @@ }; const TargetTransformInfo &TTI = FAM.getResult(OldF); + PostDominatorTree PDT(OldF); Function *NewF = - promoteArguments(&OldF, AARGetter, MaxElements, None, TTI); + promoteArguments(&OldF, AARGetter, MaxElements, None, TTI, &PDT); if (!NewF) continue; LocalChange = true; @@ -1147,8 +1157,9 @@ const TargetTransformInfo &TTI = getAnalysis().getTTI(*OldF); + const PostDominatorTree PDT(*OldF); if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements, - {ReplaceCallSite}, TTI)) { + {ReplaceCallSite}, TTI, &PDT)) { LocalChange = true; // Update the call graph for the newly promoted function. Index: llvm/test/Transforms/ArgumentPromotion/control-flow3.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/ArgumentPromotion/control-flow3.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -argpromotion -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s + +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +; CHECK-LABEL: define internal i32 @callee0(i32 %P.val) +define internal i32 @callee0(i32* %P) { +entry: + br label %bb1 + +bb1: + br label %bb2 + +bb2: + ; CHECK-NOT: load i32, i32* %P + %X = load i32, i32* %P + ret i32 %X +} + +; CHECK-LABEL: define i32 @caller0() { +define i32 @caller0() { + %A = alloca i32 + store i32 17, i32* %A + ; CHECK: %A.val = load i32, i32* %A + ; CHECK: %X = call i32 @callee0(i32 %A.val) + %X = call i32 @callee0(i32* %A) + ret i32 %X +} + +; CHECK: define internal i32 @callee1(i1 %C, i32 %P.val) +define internal i32 @callee1(i1 %C, i32* %P) { +entry: + br label %bb1 + +bb1: + br label %bb2 + +bb2: + ; CHECK-NOT: load i32, i32* %P + %X = load i32, i32* %P + br i1 %C, label %bb2, label %exit + +exit: + ret i32 %X +} + +define i32 @caller1() { + %A = alloca i32 + store i32 17, i32* %A + ; CHECK: %A.val = load i32, i32* %A + ; CHECK: %X = call i32 @callee1(i1 false, i32 %A.val) + %X = call i32 @callee1(i1 false, i32* %A) + ret i32 %X +}