Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -517,6 +517,20 @@ Function *Caller = CS.getCaller(); Function *Callee = CS.getCalledFunction(); + // Track whether this is the last use of a callee. + // This is used later to delete unused functions. + // TODO: Use this information to splice instead of clone a function. + bool LastCalleeUse = Callee && Callee->hasOneUse() && + Callee->hasLocalLinkage() && + // TODO: Can remove if in SCC now. + !SCCFunctions.count(Callee) && + + // The function may be apparently dead, but if + // there are indirect callgraph references to the + // node, we cannot delete it yet, this could + // invalidate the CGSCC iterator. + CG[Callee]->getNumReferences() == 1; + // If this call site is dead and it is to a readonly function, we should // just delete the call instead of trying to inline it, regardless of // size. This happens because IPSCCP propagates the result out of the @@ -531,7 +545,7 @@ } else { // We can only inline direct calls to non-declarations. if (!Callee || Callee->isDeclaration()) continue; - + // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee // itself. If so, we'd be recursively inlining the same function, @@ -588,19 +602,15 @@ } } } - + // If we inlined or deleted the last possible call site to the function, // delete the function body now. - if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() && - // TODO: Can remove if in SCC now. - !SCCFunctions.count(Callee) && - - // The function may be apparently dead, but if there are indirect - // callgraph references to the node, we cannot delete it yet, this - // could invalidate the CGSCC iterator. - CG[Callee]->getNumReferences() == 0) { + if (LastCalleeUse) { DEBUG(dbgs() << " -> Deleting dead function: " << Callee->getName() << "\n"); + + assert(Callee->use_empty() && CG[Callee]->getNumReferences() == 0 && + "Failed to update call graph after devirtualizing"); CallGraphNode *CalleeNode = CG[Callee]; // Remove any call graph edges from the callee to its callees. Index: lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- lib/Transforms/Utils/InlineFunction.cpp +++ lib/Transforms/Utils/InlineFunction.cpp @@ -506,6 +506,36 @@ return nullptr; } +/// \brief Given a return instruction which is being inlined, and the inlined +/// call-site, try to devirtualize any uses of the returned value if it points +/// to a Function. Updates the call graph if any calls were devirtualized. +static bool devirtualizedCallUsers(const ReturnInst *R, + Instruction *TheCall, + InlineFunctionInfo &IFI) { + if (!IFI.CG) + return false; + Function *CalledF = dyn_cast(R->getReturnValue()); + if (!CalledF) + return false; + CallGraphNode *CallerNode = nullptr; + for (auto UI = TheCall->use_begin(), UE = TheCall->use_end(); + UI != UE;) { + Use &U = *UI++; + U.set(CalledF); + CallSite CS(U.getUser()); + if (!CS) + continue; + if (!CallerNode) { + // Lazily look up the caller node + const Function *Caller = TheCall->getParent()->getParent(); + CallerNode = (*IFI.CG)[Caller]; + } + CallGraphNode *CalleeNode = IFI.CG->getOrInsertFunction(CalledF); + CallerNode->replaceCallEdge(CS, CS, CalleeNode); + } + return true; +} + /// InlineFunction - This function inlines the called function into the basic /// block of the caller. This returns false if it is not possible to inline /// this call. The program is still in a well defined state if this occurs @@ -623,7 +653,7 @@ // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. - CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", &InlinedFunctionInfo, IFI.DL, TheCall); @@ -852,7 +882,7 @@ ReturnInst *R = Returns[0]; if (TheCall == R->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); - else + else if (!devirtualizedCallUsers(R, TheCall, IFI)) TheCall->replaceAllUsesWith(R->getReturnValue()); } // Since we are now done with the Call/Invoke, we can delete it. @@ -956,7 +986,7 @@ if (!TheCall->use_empty()) { if (TheCall == Returns[0]->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); - else + else if (!devirtualizedCallUsers(Returns[0], TheCall, IFI)) TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); } Index: test/Transforms/Inline/devirtualize-2.ll =================================================================== --- test/Transforms/Inline/devirtualize-2.ll +++ test/Transforms/Inline/devirtualize-2.ll @@ -42,3 +42,60 @@ ; CHECK-LABEL: @test2( ; CHECK-NEXT: ret i32 41 + +; This is test1 but with multiple return blocks to test that +; we have coverage over all parts of the inliner which devirtualize +; based on return values from inlined calls. +define i32 @test3(i1 %cond) { + %funcall1_ = call fastcc i32 ()* (i1)* @f4a(i1 %cond) + %executecommandptr1_ = call i32 %funcall1_() + ret i32 %executecommandptr1_ +} + +define internal fastcc i32 ()* @f4a(i1 %cond) nounwind readnone { + br i1 %cond, label %l1, label %l2 +l2: + br label %l1 +l1: + ret i32 ()* @f5a +} + +define internal i32 @f5a() nounwind readnone { + ret i32 1 +} + +; CHECK: @test3(i1 %cond) +; CHECK: f4a.exit: +; CHECK-NEXT: ret i32 1 + +; This is test1 but with an invoke instead of a call. +define i32 @test4(i1 %cond) { + %funcall1_ = invoke fastcc i32 ()* ()* @f1() to label %normal_bb unwind label %unwind_bb +normal_bb: + %executecommandptr1_ = call i32 %funcall1_() + ret i32 %executecommandptr1_ +unwind_bb: + landingpad { i8*, i32 } personality i32 (...)* null + catch i8** null + unreachable +} + +; CHECK: @test4(i1 %cond) +; CHECK: normal_bb: +; CHECK-NEXT: ret i32 1 + +; This is test3 but with an invoke instead of a call. +define i32 @test5(i1 %cond) { + %funcall1_ = invoke fastcc i32 ()* (i1)* @f4a(i1 %cond) to label %normal_bb unwind label %unwind_bb +normal_bb: + %executecommandptr1_ = call i32 %funcall1_() + ret i32 %executecommandptr1_ +unwind_bb: + landingpad { i8*, i32 } personality i32 (...)* null + catch i8** null + unreachable +} + +; CHECK: @test5(i1 %cond) +; CHECK: normal_bb: +; CHECK-NEXT: ret i32 1