Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -790,9 +790,8 @@ // as a result, we do not have profile info for the branch // probability. We set the probability to 80% taken to indicate // that the static call is likely taken. - Instruction *DI = dyn_cast( - promoteIndirectCall(I, R->getValue(), 80, 100, false, ORE) - ->stripPointerCasts()); + Instruction *DI = promoteIndirectCall( + I, R->getValue(), 80, 100, false, ORE); PromotedInsns.insert(I); // If profile mismatches, we should not attempt to inline DI. if ((isa(DI) || isa(DI)) && Index: lib/Transforms/Instrumentation/IndirectCallPromotion.cpp =================================================================== --- lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -461,11 +461,13 @@ // MergeBB is the bottom BB of the if-then-else-diamond after the // transformation. For invoke instruction, the edges from DirectCallBB and // IndirectCallBB to MergeBB are removed before this call (during -// createIfThenElse). +// createIfThenElse). Stores the pointer to the Instruction that cast +// the direct call in \p CastInst. static Instruction *createDirectCallInst(const Instruction *Inst, Function *DirectCallee, BasicBlock *DirectCallBB, - BasicBlock *MergeBB) { + BasicBlock *MergeBB, + Instruction *&CastInst) { Instruction *NewInst = Inst->clone(); if (CallInst *CI = dyn_cast(NewInst)) { CI->setCalledFunction(DirectCallee); @@ -499,7 +501,8 @@ } } - return insertCallRetCast(Inst, NewInst, DirectCallee); + CastInst = insertCallRetCast(Inst, NewInst, DirectCallee); + return NewInst; } // Create a PHI to unify the return values of calls. @@ -559,15 +562,17 @@ createIfThenElse(Inst, DirectCallee, Count, TotalCount, &DirectCallBB, &IndirectCallBB, &MergeBB); + // If the return type of the NewInst is not the same as the Inst, a CastInst + // is needed for type casting. Otherwise CastInst is the same as NewInst. + Instruction *CastInst = nullptr; Instruction *NewInst = - createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB); + createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB, CastInst); if (AttachProfToDirectCall) { SmallVector Weights; Weights.push_back(Count); MDBuilder MDB(NewInst->getContext()); - if (Instruction *DI = dyn_cast(NewInst->stripPointerCasts())) - DI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); + NewInst->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); } // Move Inst from MergeBB to IndirectCallBB. @@ -589,10 +594,10 @@ // We don't need to update the operand from NormalDest for DirectCallBB. // Pass nullptr here. fixupPHINodeForNormalDest(Inst, II->getNormalDest(), MergeBB, - IndirectCallBB, NewInst); + IndirectCallBB, CastInst); } - insertCallRetPHI(Inst, NewInst, DirectCallee); + insertCallRetPHI(Inst, CastInst, DirectCallee); DEBUG(dbgs() << "\n== Basic Blocks After ==\n"); DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n"); Index: test/Transforms/SampleProfile/Inputs/indirect-call.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/indirect-call.prof +++ test/Transforms/SampleProfile/Inputs/indirect-call.prof @@ -24,3 +24,8 @@ test_noinline_bitcast:3000:0 1: foo_direct_i32:3000 1: 3000 +return_arg_caller:3000:0 + 1: foo_inline1:3000 + 11: 3000 + 2: return_arg:3000 + 1: 3000 Index: test/Transforms/SampleProfile/indirect-call.ll =================================================================== --- test/Transforms/SampleProfile/indirect-call.ll +++ test/Transforms/SampleProfile/indirect-call.ll @@ -92,6 +92,32 @@ ret void } +define i32* @return_arg(i32* readnone returned) !dbg !29{ + ret i32* %0 +} + +; CHECK-LABEL: @return_arg_caller +; When the promoted indirect call returns a parameter that was defined by the +; return value of a previous direct call. Checks both direct call and promoted +; indirect call are inlined. +define i32* @return_arg_caller(i32* (i32*)* nocapture) !dbg !30{ +; CHECK-NOT: call i32* @foo_inline1 +; CHECK: if.true.direct_targ: +; CHECK-NOT: call +; CHECK: if.false.orig_indirect: +; CHECK: call + %2 = call i32* @foo_inline1(i32* null), !dbg !31 + %cmp = icmp ne i32* %2, null + br i1 %cmp, label %then, label %else + +then: + %3 = tail call i32* %0(i32* %2), !dbg !32 + ret i32* %3 + +else: + ret i32* null +} + @x = global i32 0, align 4 @y = global void ()* null, align 8 @@ -176,3 +202,7 @@ !26 = distinct !DISubprogram(name: "test_noinline_bitcast", scope: !1, file: !1, line: 12, unit: !0) !27 = !DILocation(line: 13, scope: !26) !28 = distinct !DISubprogram(name: "foo_direct_i32", scope: !1, file: !1, line: 11, unit: !0) +!29 = distinct !DISubprogram(name: "return_arg", scope: !1, file: !1, line: 11, unit: !0) +!30 = distinct !DISubprogram(name: "return_arg_caller", scope: !1, file: !1, line: 11, unit: !0) +!31 = !DILocation(line: 12, scope: !30) +!32 = !DILocation(line: 13, scope: !30)