Index: include/llvm/Analysis/LazyCallGraph.h =================================================================== --- include/llvm/Analysis/LazyCallGraph.h +++ include/llvm/Analysis/LazyCallGraph.h @@ -1002,7 +1002,9 @@ /// Doesn't recurse through any constants already in the \p Visited set, and /// updates that set with every constant visited. /// - /// For each defined function, calls \p Callback with that function. + /// For each defined function, calls \p Callback with that function. If \p + /// Callback returns true, the visit is considered complete and this returns + /// immediately. To visit the full worklist, simply always return false. template static void visitReferences(SmallVectorImpl &Worklist, SmallPtrSetImpl &Visited, @@ -1012,7 +1014,9 @@ if (Function *F = dyn_cast(C)) { if (!F->isDeclaration()) - Callback(*F); + if (Callback(*F)) + return; + continue; } Index: include/llvm/Transforms/IPO/ArgumentPromotion.h =================================================================== --- /dev/null +++ include/llvm/Transforms/IPO/ArgumentPromotion.h @@ -0,0 +1,31 @@ +//===- ArgumentPromotion.h - Promote by-reference arguments -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_ARGUMENTPROMOTION_H +#define LLVM_TRANSFORMS_IPO_ARGUMENTPROMOTION_H + +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LazyCallGraph.h" + +namespace llvm { + +/// Argument promotion pass. +/// +/// This pass walks the functions in each SCC and for each one tries to +/// transform it and all of its callers to replace indirect arguments with +/// direct (by-value) arguments. +class ArgumentPromotionPass : public PassInfoMixin { +public: + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR); +}; + +} + +#endif Index: lib/Analysis/CGSCCPassManager.cpp =================================================================== --- lib/Analysis/CGSCCPassManager.cpp +++ lib/Analysis/CGSCCPassManager.cpp @@ -316,6 +316,9 @@ RetainedEdges.insert(&Referee); if (E->isCall()) DemotedCallTargets.insert(&Referee); + + // Walk all of the references. + return false; }); // First remove all of the edges that are no longer present in this function. Index: lib/Analysis/LazyCallGraph.cpp =================================================================== --- lib/Analysis/LazyCallGraph.cpp +++ lib/Analysis/LazyCallGraph.cpp @@ -80,6 +80,8 @@ // Process them (recursively) collecting every function found. visitReferences(Worklist, Visited, [&](Function &F) { addEdge(Edges, EdgeIndexMap, F, LazyCallGraph::Edge::Ref); + // We aren't finished until all references are visited. + return false; }); } @@ -156,6 +158,8 @@ "entry set.\n"); visitReferences(Worklist, Visited, [&](Function &F) { addEdge(EntryEdges, EntryIndexMap, F, LazyCallGraph::Edge::Ref); + // We aren't finished until all references are visited. + return false; }); } Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -61,6 +61,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/GCOVProfiler.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/ConstantMerge.h" #include "llvm/Transforms/IPO/CrossDSOCFI.h" #include "llvm/Transforms/IPO/DeadArgumentElimination.h" Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -85,6 +85,7 @@ #ifndef CGSCC_PASS #define CGSCC_PASS(NAME, CREATE_PASS) #endif +CGSCC_PASS("argpromotion", ArgumentPromotionPass()) CGSCC_PASS("invalidate", InvalidateAllAnalysesPass()) CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass()) CGSCC_PASS("inline", InlinerPass()) Index: lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- lib/Transforms/IPO/ArgumentPromotion.cpp +++ lib/Transforms/IPO/ArgumentPromotion.cpp @@ -29,7 +29,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -37,6 +39,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" @@ -67,9 +70,11 @@ /// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. -static CallGraphNode * +static Function * doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, - SmallPtrSetImpl &ByValArgsToTransform, CallGraph &CG) { + SmallPtrSetImpl &ByValArgsToTransform, + Optional> + ReplaceCallSite) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. @@ -207,9 +212,6 @@ F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); - // Get a new callgraph node for NF. - CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); - // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. // @@ -334,8 +336,8 @@ AttributesVec.clear(); // Update the callgraph to know that the callsite has been transformed. - CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; - CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN); + if (ReplaceCallSite) + (*ReplaceCallSite)(CS, CallSite(New)); if (!Call->use_empty()) { Call->replaceAllUsesWith(New); @@ -463,18 +465,7 @@ std::advance(I2, ArgIndices.size()); } - NF_CGN->stealCalledFunctionsFrom(CG[F]); - - // Now that the old function is dead, delete it. If there is a dangling - // reference to the CallgraphNode, just leave the dead function around for - // someone else to nuke. - CallGraphNode *CGN = CG[F]; - if (CGN->getNumReferences() == 0) - delete CG.removeFunctionFromModule(CGN); - else - F->setLinkage(Function::ExternalLinkage); - - return NF_CGN; + return NF; } /// AllCallersPassInValidPointerForArgument - Return true if we can prove that @@ -818,14 +809,13 @@ /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. /// -static CallGraphNode * -promoteArguments(CallGraphNode *CGN, CallGraph &CG, - function_ref AARGetter, - unsigned MaxElements) { - Function *F = CGN->getFunction(); - +static Function * +promoteArguments(Function *F, function_ref AARGetter, + unsigned MaxElements, + Optional> + ReplaceCallSite) { // Make sure that it is local to this module. - if (!F || !F->hasLocalLinkage()) + if (!F->hasLocalLinkage()) return nullptr; // Don't promote arguments for variadic functions. Adding, removing, or @@ -950,7 +940,121 @@ if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; - return doPromotion(F, ArgsToPromote, ByValArgsToTransform, CG); + return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); +} + +static void updateCGAfterRemovingRefs( + LazyCallGraph &G, LazyCallGraph::Node &N /*, + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR*/) { + typedef LazyCallGraph::Edge Edge; + typedef LazyCallGraph::Node Node; + typedef LazyCallGraph::RefSCC RefSCC; + + SmallPtrSet RefTargets; + for (Edge &E : N) + if (!E.isCall()) + RefTargets.insert(&E.getFunction()); + if (RefTargets.empty()) + return; + + SmallVector Worklist; + SmallPtrSet Visited; + for (Instruction &I : instructions(N.getFunction())) + for (Value *Op : I.operand_values()) + if (auto *C = dyn_cast(Op)) { + if (auto *F = dyn_cast(C)) { + if (RefTargets.erase(F)) + if (RefTargets.empty()) + // All targets accounted for, nothing to update. + return; + + // No need to further recurse through functions. + continue; + } + + // For arbitrary constants unique them into a worklist to recurse + // through. + if (Visited.insert(C).second) + Worklist.push_back(C); + } + + LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &F) { + if (RefTargets.erase(&F)) + // We're finished visiting if we cover all ref targets. + return RefTargets.empty(); + + return false; + }); + + RefSCC &RC = *G.lookupRefSCC(N); + for (Function *TargetF : RefTargets) { + Node &TargetN = *G.lookup(*TargetF); + assert(&RC != G.lookupRefSCC(TargetN) && + "Cannot delete an edge within a RefSCC!"); + RC.removeOutgoingEdge(N, TargetN); + DEBUG(dbgs() << "Deleting outgoing edge from '" << N << "' to '" << TargetN + << "'\n"); + } +} + +PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, + CGSCCAnalysisManager &AM, + LazyCallGraph &CG, + CGSCCUpdateResult &UR) { + bool Changed = false, LocalChange; + + // Iterate until we stop promoting from this SCC. + do { + LocalChange = false; + + for (LazyCallGraph::Node &N : C) { + Function &OldF = N.getFunction(); + + FunctionAnalysisManager &FAM = + AM.getResult(C, CG).getManager(); + // FIXME: This lambda must only be used with this function. We should + // skip the lambda and just get the AA results directly. + auto AARGetter = [&](Function &F) -> AAResults & { + assert(&F == &OldF && "Called with an unexpected function!"); + return FAM.getResult(F); + }; + + Function *NewF = promoteArguments(&OldF, AARGetter, 3u, None); + if (!NewF) + continue; + LocalChange = true; + + // Directly substitute the functions in the call graph. Note that this + // requires the old function to be completely dead and completely + // replaced by the new function. It does no call graph updates, it merely + // swaps out the particular function mapped to a particular node in the + // graph. + C.getOuterRefSCC().replaceNodeFunction(N, *NewF); + OldF.eraseFromParent(); + + // Promoting arguments may deleted some references to functions. We know + // that call edges are undisturbed and no new references can be + // introduced, so we can simply scan the function to account for all the + // remaining references and remove those no longer present. Note that + // this is just *removing* edges and so the order shouldn't matter. + for (User *U : NewF->users()) { + // Argument promotion only handles functions where all users are + // instructions. + auto &I = *cast(U); + + Function &UserF = *I.getParent()->getParent(); + LazyCallGraph::Node &UserN = *CG.lookup(UserF); + updateCGAfterRemovingRefs(CG, UserN); + } + } + + Changed |= LocalChange; + } while (LocalChange); + + if (!Changed) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); } namespace { @@ -1019,9 +1123,31 @@ LocalChange = false; // Attempt to promote arguments from all functions in this SCC. for (CallGraphNode *OldNode : SCC) { - if (CallGraphNode *NewNode = - promoteArguments(OldNode, CG, AARGetter, MaxElements)) { + Function *OldF = OldNode->getFunction(); + if (!OldF) + continue; + + auto ReplaceCallSite = [&](CallSite OldCS, CallSite NewCS) { + Function *Caller = OldCS.getInstruction()->getParent()->getParent(); + CallGraphNode *NewCalleeNode = + CG.getOrInsertFunction(NewCS.getCalledFunction()); + CallGraphNode *CallerNode = CG[Caller]; + CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode); + }; + + if (Function *NewF = + promoteArguments(OldF, AARGetter, MaxElements, {ReplaceCallSite})) { LocalChange = true; + + // Update the call graph for the newly promoted function. + CallGraphNode *NewNode = CG.getOrInsertFunction(NewF); + NewNode->stealCalledFunctionsFrom(OldNode); + if (OldNode->getNumReferences() == 0) + delete CG.removeFunctionFromModule(OldNode); + else + OldF->setLinkage(Function::ExternalLinkage); + + // And updat ethe SCC we're iterating as well. SCC.ReplaceNode(OldNode, NewNode); } } Index: test/Transforms/ArgumentPromotion/aggregate-promote.ll =================================================================== --- test/Transforms/ArgumentPromotion/aggregate-promote.ll +++ test/Transforms/ArgumentPromotion/aggregate-promote.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -argpromotion -instcombine -S | not grep load +; RUN: opt < %s -passes='cgscc(argpromotion,function(instcombine))' -S | not grep load target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" %QuadTy = type { i32, i32, i32, i32 } Index: test/Transforms/ArgumentPromotion/attrs.ll =================================================================== --- test/Transforms/ArgumentPromotion/attrs.ll +++ test/Transforms/ArgumentPromotion/attrs.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -argpromotion -S | grep zeroext +; RUN: opt < %s -passes=argpromotion -S | grep zeroext %struct.ss = type { i32, i64 } Index: test/Transforms/ArgumentPromotion/byval-2.ll =================================================================== --- test/Transforms/ArgumentPromotion/byval-2.ll +++ test/Transforms/ArgumentPromotion/byval-2.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -argpromotion -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s ; Arg promotion eliminates the struct argument. ; FIXME: Should it eliminate the i32* argument? Index: test/Transforms/ArgumentPromotion/byval.ll =================================================================== --- test/Transforms/ArgumentPromotion/byval.ll +++ test/Transforms/ArgumentPromotion/byval.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -argpromotion -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" Index: test/Transforms/ArgumentPromotion/callgraph-update.ll =================================================================== --- test/Transforms/ArgumentPromotion/callgraph-update.ll +++ test/Transforms/ArgumentPromotion/callgraph-update.ll @@ -1,4 +1,6 @@ ; RUN: opt < %s -argpromotion -simplifycfg -constmerge | llvm-dis +; RUN: opt < %s -passes='cgscc(argpromotion,function(simplify-cfg)),constmerge' | llvm-dis + target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin10.0" Index: test/Transforms/ArgumentPromotion/chained.ll =================================================================== --- test/Transforms/ArgumentPromotion/chained.ll +++ test/Transforms/ArgumentPromotion/chained.ll @@ -1,4 +1,6 @@ ; RUN: opt < %s -argpromotion -instcombine -S | not grep load +; RUN: opt < %s -passes='cgscc(argpromotion,function(instcombine))' -S | not grep load + target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @G1 = constant i32 0 ; [#uses=1] Index: test/Transforms/ArgumentPromotion/control-flow.ll =================================================================== --- test/Transforms/ArgumentPromotion/control-flow.ll +++ test/Transforms/ArgumentPromotion/control-flow.ll @@ -1,5 +1,7 @@ ; RUN: opt < %s -argpromotion -S | \ ; RUN: not grep "load i32* null" +; RUN: opt < %s -passes=argpromotion -S | \ +; RUN: not grep "load i32* null" define internal i32 @callee(i1 %C, i32* %P) { br i1 %C, label %T, label %F Index: test/Transforms/ArgumentPromotion/control-flow2.ll =================================================================== --- test/Transforms/ArgumentPromotion/control-flow2.ll +++ test/Transforms/ArgumentPromotion/control-flow2.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -argpromotion -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s ; CHECK: load i32, i32* %A target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" Index: test/Transforms/ArgumentPromotion/crash.ll =================================================================== --- test/Transforms/ArgumentPromotion/crash.ll +++ test/Transforms/ArgumentPromotion/crash.ll @@ -1,4 +1,5 @@ -; RUN: opt -inline -argpromotion < %s +; RUN: opt -S -inline -argpromotion < %s +; RUN: opt -S -passes=inline,argpromotion < %s ; rdar://7879828 define void @foo() personality i32 (...)* @__gxx_personality_v0 { Index: test/Transforms/ArgumentPromotion/dbg.ll =================================================================== --- test/Transforms/ArgumentPromotion/dbg.ll +++ test/Transforms/ArgumentPromotion/dbg.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -argpromotion -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s declare void @sink(i32) Index: test/Transforms/ArgumentPromotion/fp80.ll =================================================================== --- test/Transforms/ArgumentPromotion/fp80.ll +++ test/Transforms/ArgumentPromotion/fp80.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -argpromotion -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/ArgumentPromotion/inalloca.ll =================================================================== --- test/Transforms/ArgumentPromotion/inalloca.ll +++ test/Transforms/ArgumentPromotion/inalloca.ll @@ -1,4 +1,5 @@ ; RUN: opt %s -argpromotion -sroa -S | FileCheck %s +; RUN: opt %s -passes='argpromotion,function(sroa)' -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" Index: test/Transforms/ArgumentPromotion/pr27568.ll =================================================================== --- test/Transforms/ArgumentPromotion/pr27568.ll +++ test/Transforms/ArgumentPromotion/pr27568.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -argpromotion < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion < %s | FileCheck %s target triple = "x86_64-pc-windows-msvc" define internal void @callee(i8*) { Index: test/Transforms/ArgumentPromotion/reserve-tbaa.ll =================================================================== --- test/Transforms/ArgumentPromotion/reserve-tbaa.ll +++ test/Transforms/ArgumentPromotion/reserve-tbaa.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -argpromotion -S +; RUN: opt < %s -passes=argpromotion -S ; PR17906 ; When we promote two arguments in a single function with different types, Index: test/Transforms/ArgumentPromotion/sret.ll =================================================================== --- test/Transforms/ArgumentPromotion/sret.ll +++ test/Transforms/ArgumentPromotion/sret.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -argpromotion -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc" Index: test/Transforms/ArgumentPromotion/tail.ll =================================================================== --- test/Transforms/ArgumentPromotion/tail.ll +++ test/Transforms/ArgumentPromotion/tail.ll @@ -1,4 +1,5 @@ ; RUN: opt %s -argpromotion -S -o - | FileCheck %s +; RUN: opt %s -passes=argpromotion -S -o - | FileCheck %s ; PR14710 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Transforms/ArgumentPromotion/variadic.ll =================================================================== --- test/Transforms/ArgumentPromotion/variadic.ll +++ test/Transforms/ArgumentPromotion/variadic.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -argpromotion -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s ; Unused arguments from variadic functions cannot be eliminated as that changes ; their classiciation according to the SysV amd64 ABI. Clang and other frontends