diff --git a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp --- a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp +++ b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp @@ -129,7 +129,7 @@ FunctionPropertiesUpdater::FunctionPropertiesUpdater( FunctionPropertiesInfo &FPI, const CallBase &CB) : FPI(FPI), CallSiteBB(*CB.getParent()), Caller(*CallSiteBB.getParent()) { - + assert(isa(CB) || isa(CB)); // For BBs that are likely to change, we subtract from feature totals their // contribution. Some features, like max loop counts or depths, are left // invalid, as they will be updated post-inlining. @@ -146,6 +146,18 @@ // with the CB BB ('Entry') between which the inlined callee will be pasted. Successors.insert(succ_begin(&CallSiteBB), succ_end(&CallSiteBB)); + // Inlining only handles invoke and calls. If this is an invoke, and inlining + // it pulls another invoke, the original landing pad may get split, so as to + // share its content with other potential users. So the edge up to which we + // need to invalidate and then re-account BB data is the successors of the + // current landing pad. We can leave the current lp, too - if it doesn't get + // split, then it will be the place traversal stops. Either way, the + // discounted BBs will be checked if reachable and re-added. + if (const auto *II = dyn_cast(&CB)) { + const auto *UnwindDest = II->getUnwindDest(); + Successors.insert(succ_begin(UnwindDest), succ_end(UnwindDest)); + } + // Exclude the CallSiteBB, if it happens to be its own successor (1-BB loop). // We are only interested in BBs the graph moves past the callsite BB to // define the frontier past which we don't want to re-process BBs. Including @@ -165,46 +177,72 @@ } void FunctionPropertiesUpdater::finish(FunctionAnalysisManager &FAM) const { - SetVector Worklist; - - if (&CallSiteBB != &*Caller.begin()) { - FPI.reIncludeBB(*Caller.begin()); - Worklist.insert(&*Caller.begin()); - } - // Update feature values from the BBs that were copied from the callee, or // might have been modified because of inlining. The latter have been // subtracted in the FunctionPropertiesUpdater ctor. // There could be successors that were reached before but now are only // reachable from elsewhere in the CFG. - // Suppose the following diamond CFG (lines are arrows pointing down): + // One example is the following diamond CFG (lines are arrows pointing down): // A // / \ // B C + // | | + // | D + // | | + // | E // \ / - // D + // F // There's a call site in C that is inlined. Upon doing that, it turns out // it expands to // call void @llvm.trap() // unreachable - // D isn't reachable from C anymore, but we did discount it when we set up + // F isn't reachable from C anymore, but we did discount it when we set up // FunctionPropertiesUpdater, so we need to re-include it here. - + // At the same time, D and E were reachable before, but now are not anymore, + // so we need to leave D out (we discounted it at setup), and explicitly + // remove E. + SetVector Reinclude; + SetVector Unreachable; const auto &DT = FAM.getResult(const_cast(Caller)); - for (const auto *Succ : Successors) - if (DT.isReachableFromEntry(Succ) && Worklist.insert(Succ)) - FPI.reIncludeBB(*Succ); - auto I = Worklist.size(); - bool CSInsertion = Worklist.insert(&CallSiteBB); + if (&CallSiteBB != &*Caller.begin()) + Reinclude.insert(&*Caller.begin()); + + // Distribute the successors to the 2 buckets. + for (const auto *Succ : Successors) + if (DT.isReachableFromEntry(Succ)) + Reinclude.insert(Succ); + else + Unreachable.insert(Succ); + + // For reinclusion, we want to stop at the reachable successors, who are at + // the beginning of the worklist; but, starting from the callsite bb and + // ending at those successors, we also want to perform a traversal. + // IncludeSuccessorsMark is the index after which we include successors. + const auto IncludeSuccessorsMark = Reinclude.size(); + bool CSInsertion = Reinclude.insert(&CallSiteBB); (void)CSInsertion; assert(CSInsertion); - for (; I < Worklist.size(); ++I) { - const auto *BB = Worklist[I]; + for (size_t I = 0; I < Reinclude.size(); ++I) { + const auto *BB = Reinclude[I]; FPI.reIncludeBB(*BB); - for (const auto *Succ : successors(BB)) - Worklist.insert(Succ); + if (I >= IncludeSuccessorsMark) + Reinclude.insert(succ_begin(BB), succ_end(BB)); + } + + // For exclusion, we don't need to exclude the set of BBs that were successors + // before and are now unreachable, because we already did that at setup. For + // the rest, as long as a successor is unreachable, we want to explicitly + // exclude it. + const auto AlreadyExcludedMark = Unreachable.size(); + for (size_t I = 0; I < Unreachable.size(); ++I) { + const auto *U = Unreachable[I]; + if (I >= AlreadyExcludedMark) + FPI.updateForBB(*U, -1); + for (const auto *Succ : successors(U)) + if (!DT.isReachableFromEntry(Succ)) + Unreachable.insert(Succ); } const auto &LI = FAM.getResult(const_cast(Caller)); diff --git a/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp b/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp --- a/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp +++ b/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp @@ -636,10 +636,16 @@ cond.false: ; preds = %entry %call3 = call noundef i64 @f2() + br label %extra + +extra: + br label %extra2 + +extra2: br label %cond.end cond.end: ; preds = %cond.false, %cond.true - %cond = phi i64 [ %conv2, %cond.true ], [ %call3, %cond.false ] + %cond = phi i64 [ %conv2, %cond.true ], [ %call3, %extra ] ret i64 %cond } @@ -657,8 +663,8 @@ EXPECT_NE(CB, nullptr); FunctionPropertiesInfo ExpectedInitial; - ExpectedInitial.BasicBlockCount = 4; - ExpectedInitial.TotalInstructionCount = 7; + ExpectedInitial.BasicBlockCount = 6; + ExpectedInitial.TotalInstructionCount = 9; ExpectedInitial.BlocksReachedFromConditionalInstruction = 2; ExpectedInitial.Uses = 1; ExpectedInitial.DirectCallsToDefinedFunctions = 1; @@ -680,4 +686,63 @@ EXPECT_EQ(FPI, ExpectedFinal); } +TEST_F(FunctionPropertiesAnalysisTest, InvokeSkipLP) { + LLVMContext C; + std::unique_ptr M = makeLLVMModule(C, + R"IR( +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +define i64 @f1(i32 noundef %value) { +entry: + invoke fastcc void @f2() to label %cont unwind label %lpad +cont: + ret i64 1 +lpad: + %lp = landingpad i32 cleanup + br label %ehcleanup +ehcleanup: + resume i32 0 +} +define void @f2() { + invoke noundef void @f3() to label %exit unwind label %lpad +exit: + ret void +lpad: + %lp = landingpad i32 cleanup + resume i32 %lp +} +declare void @f3() +)IR"); + + // The outcome of inlining will be that lpad becomes unreachable. The landing + // pad of the invoke inherited from f2 will land on a new bb which will branch + // to a bb containing the body of lpad. + Function *F1 = M->getFunction("f1"); + CallBase *CB = findCall(*F1); + EXPECT_NE(CB, nullptr); + + FunctionPropertiesInfo ExpectedInitial; + ExpectedInitial.BasicBlockCount = 4; + ExpectedInitial.TotalInstructionCount = 5; + ExpectedInitial.BlocksReachedFromConditionalInstruction = 0; + ExpectedInitial.Uses = 1; + ExpectedInitial.DirectCallsToDefinedFunctions = 1; + + FunctionPropertiesInfo ExpectedFinal = ExpectedInitial; + ExpectedFinal.BasicBlockCount = 6; + ExpectedFinal.DirectCallsToDefinedFunctions = 0; + ExpectedFinal.TotalInstructionCount = 8; + + auto FPI = buildFPI(*F1); + EXPECT_EQ(FPI, ExpectedInitial); + + FunctionPropertiesUpdater FPU(FPI, *CB); + InlineFunctionInfo IFI; + auto IR = llvm::InlineFunction(*CB, IFI); + EXPECT_TRUE(IR.isSuccess()); + invalidate(*F1); + FPU.finish(FAM); + EXPECT_EQ(FPI, ExpectedFinal); +} } // end anonymous namespace