Index: clang/lib/CodeGen/BackendUtil.cpp =================================================================== --- clang/lib/CodeGen/BackendUtil.cpp +++ clang/lib/CodeGen/BackendUtil.cpp @@ -579,7 +579,9 @@ PMBuilder.Inliner = createFunctionInliningPass( CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize, (!CodeGenOpts.SampleProfileFile.empty() && - CodeGenOpts.PrepareForThinLTO)); + CodeGenOpts.PrepareForThinLTO), + /*ExternalFunctionUseListsAreIncomplete=*/ + CodeGenOpts.PrepareForThinLTO || CodeGenOpts.PrepareForLTO); } PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel; Index: llvm/include/llvm/Analysis/InlineCost.h =================================================================== --- llvm/include/llvm/Analysis/InlineCost.h +++ llvm/include/llvm/Analysis/InlineCost.h @@ -215,11 +215,13 @@ /// /// Also note that calling this function *dynamically* computes the cost of /// inlining the callsite. It is an expensive, heavyweight call. -InlineCost getInlineCost( - CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, - std::function &GetAssumptionCache, - Optional> GetBFI, - ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr); +InlineCost +getInlineCost(CallBase &Call, const InlineParams &Params, + TargetTransformInfo &CalleeTTI, + std::function &GetAssumptionCache, + Optional> GetBFI, + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr, + bool CallerInlinabilityMatters = true); /// Get an InlineCost with the callee explicitly specified. /// This allows you to calculate the cost of inlining a function via a @@ -231,7 +233,8 @@ TargetTransformInfo &CalleeTTI, std::function &GetAssumptionCache, Optional> GetBFI, - ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE); + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, + bool CallerInlinabilityMatters = true); /// Minimal filter to detect invalid constructs for inlining. InlineResult isInlineViable(Function &Callee); Index: llvm/include/llvm/Passes/PassBuilder.h =================================================================== --- llvm/include/llvm/Passes/PassBuilder.h +++ llvm/include/llvm/Passes/PassBuilder.h @@ -331,8 +331,8 @@ /// \p Phase indicates the current ThinLTO phase. ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, - ThinLTOPhase Phase, - bool DebugLogging = false); + bool DebugLogging = false, + ThinLTOPhase Phase = ThinLTOPhase::None); /// Construct the core LLVM module optimization pipeline. /// @@ -347,9 +347,10 @@ /// only intended for use when attempting to optimize code. If frontends /// require some transformations for semantic reasons, they should explicitly /// build them. - ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, - bool DebugLogging = false, - bool LTOPreLink = false); + ModulePassManager + buildModuleOptimizationPipeline(OptimizationLevel Level, + bool DebugLogging = false, + ThinLTOPhase Phase = ThinLTOPhase::None); /// Build a per-module default optimization pipeline. /// @@ -362,9 +363,10 @@ /// only intended for use when attempting to optimize code. If frontends /// require some transformations for semantic reasons, they should explicitly /// build them. - ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging = false, - bool LTOPreLink = false); + ModulePassManager + buildPerModuleDefaultPipeline(OptimizationLevel Level, + bool DebugLogging = false, + ThinLTOPhase Phase = ThinLTOPhase::None); /// Build a pre-link, ThinLTO-targeting default optimization pipeline to /// a pass manager. @@ -696,7 +698,8 @@ void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, OptimizationLevel Level, bool RunProfileGen, bool IsCS, std::string ProfileFile, - std::string ProfileRemappingFile); + std::string ProfileRemappingFile, + bool ExternalFunctionUseListsAreIncomplete); void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel); // Extension Point callbacks Index: llvm/include/llvm/Transforms/IPO.h =================================================================== --- llvm/include/llvm/Transforms/IPO.h +++ llvm/include/llvm/Transforms/IPO.h @@ -103,10 +103,16 @@ /// The -inline-threshold command line option takes precedence over the /// threshold given here. Pass *createFunctionInliningPass(); -Pass *createFunctionInliningPass(int Threshold); -Pass *createFunctionInliningPass(unsigned OptLevel, unsigned SizeOptLevel, - bool DisableInlineHotCallSite); -Pass *createFunctionInliningPass(InlineParams &Params); +Pass * +createFunctionInliningPass(int Threshold, + bool ExternalFunctionUseListsAreIncomplete = false); +Pass * +createFunctionInliningPass(unsigned OptLevel, unsigned SizeOptLevel, + bool DisableInlineHotCallSite, + bool ExternalFunctionUseListsAreIncomplete = false); +Pass * +createFunctionInliningPass(InlineParams &Params, + bool ExternalFunctionUseListsAreIncomplete = false); //===----------------------------------------------------------------------===// /// createPruneEHPass - Return a new pass object which transforms invoke Index: llvm/include/llvm/Transforms/IPO/Inliner.h =================================================================== --- llvm/include/llvm/Transforms/IPO/Inliner.h +++ llvm/include/llvm/Transforms/IPO/Inliner.h @@ -93,12 +93,12 @@ /// passes be composed to achieve the same end result. class InlinerPass : public PassInfoMixin { public: - InlinerPass(InlineParams Params = getInlineParams()) - : Params(std::move(Params)) {} + InlinerPass(InlineParams Params = getInlineParams(), + bool ExternalFunctionUseListsAreIncomplete = false) + : Params(std::move(Params)), ExternalFunctionUseListsAreIncomplete( + ExternalFunctionUseListsAreIncomplete) {} ~InlinerPass(); - InlinerPass(InlinerPass &&Arg) - : Params(std::move(Arg.Params)), - ImportedFunctionsStats(std::move(Arg.ImportedFunctionsStats)) {} + InlinerPass(InlinerPass &&Arg) = default; PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); @@ -106,6 +106,9 @@ private: InlineParams Params; std::unique_ptr ImportedFunctionsStats; + // Assume that new references can't appear on otherwise-unreferenced `extern` + // functions. This isn't the case in e.g., ThinLTO. + bool ExternalFunctionUseListsAreIncomplete; }; } // end namespace llvm Index: llvm/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/lib/Analysis/InlineCost.cpp +++ llvm/lib/Analysis/InlineCost.cpp @@ -121,9 +121,7 @@ /// The OptimizationRemarkEmitter available for this compilation. OptimizationRemarkEmitter *ORE; - /// The candidate callsite being analyzed. Please do not use this to do - /// analysis in the caller function; we want the inline cost query to be - /// easily cacheable. Instead, use the cover function paramHasAttr. + /// The candidate callsite being analyzed. CallBase &CandidateCall; /// Extension points for handling callsite features. @@ -377,6 +375,10 @@ /// Attempt to evaluate indirect calls to boost its inline cost. const bool BoostIndirectCalls; + /// If true, inlining may be more conservative to take the caller's + /// inlineability into account. + const bool CallerInlinabilityMatters; + /// Inlining cost measured in abstract units, accounts for all the /// instructions expected to be executed for a given function invocation. /// Instructions that are statically proven to be dead based on call-site @@ -626,12 +628,14 @@ std::function &GetAssumptionCache, Optional> &GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee, - CallBase &Call, const InlineParams &Params, bool BoostIndirect = true) + CallBase &Call, const InlineParams &Params, bool BoostIndirect = true, + bool CallerInlinabilityMatters = true) : CallAnalyzer(TTI, GetAssumptionCache, GetBFI, PSI, ORE, Callee, Call), ComputeFullInlineCost(OptComputeFullInlineCost || Params.ComputeFullInlineCost || ORE), Params(Params), Threshold(Params.DefaultThreshold), - BoostIndirectCalls(BoostIndirect) {} + BoostIndirectCalls(BoostIndirect), + CallerInlinabilityMatters(CallerInlinabilityMatters) {} void dump(); virtual ~InlineCostCallAnalyzer() {} @@ -1164,11 +1168,22 @@ int VectorBonusPercent = TTI.getInlinerVectorBonusPercent(); int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus; - // Lambda to set all the above bonus and bonus percentages to 0. - auto DisallowAllBonuses = [&]() { + // Lambda to set all the above bonus and bonus percentages to 0, if doing so + // might help us inline the caller elsewhere. + auto SetBonusesForColdCalee = [&]() { + // Cold callsites should be kept small; disallow their bonuses. SingleBBBonusPercent = 0; VectorBonusPercent = 0; - LastCallToStaticBonus = 0; + + // While keeping LastCallToStaticBonus might result in code size reduction, + // it can cause the size of the caller to increase, which may prevent it + // from being inlined. + // + // FIXME: This is logically a part of our `shouldBeDeferred` logic in the + // main `Inliner` pass. Figuring out how to hoist some of this there (or + // sink it here, as the note there mentions) might be nice. + if (CallerInlinabilityMatters) + LastCallToStaticBonus = 0; }; // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available @@ -1208,11 +1223,7 @@ Threshold = HotCallSiteThreshold.getValue(); } else if (isColdCallSite(Call, CallerBFI)) { LLVM_DEBUG(dbgs() << "Cold callsite.\n"); - // Do not apply bonuses for a cold callsite including the - // LastCallToStatic bonus. While this bonus might result in code size - // reduction, it can cause the size of a non-cold caller to increase - // preventing it from being inlined. - DisallowAllBonuses(); + SetBonusesForColdCalee(); Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); } else if (PSI) { // Use callee's global profile information only if we have no way of @@ -1225,11 +1236,7 @@ Threshold = MaxIfValid(Threshold, Params.HintThreshold); } else if (PSI->isFunctionEntryCold(&Callee)) { LLVM_DEBUG(dbgs() << "Cold callee.\n"); - // Do not apply bonuses for a cold callee including the - // LastCallToStatic bonus. While this bonus might result in code size - // reduction, it can cause the size of a non-cold caller to increase - // preventing it from being inlined. - DisallowAllBonuses(); + SetBonusesForColdCalee(); Threshold = MinIfValid(Threshold, Params.ColdThreshold); } } @@ -2107,9 +2114,11 @@ CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function &GetAssumptionCache, Optional> GetBFI, - ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, + bool CallerInlinabilityMatters) { return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI, - GetAssumptionCache, GetBFI, PSI, ORE); + GetAssumptionCache, GetBFI, PSI, ORE, + CallerInlinabilityMatters); } InlineCost llvm::getInlineCost( @@ -2117,7 +2126,8 @@ TargetTransformInfo &CalleeTTI, std::function &GetAssumptionCache, Optional> GetBFI, - ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, + bool CallerInlinabilityMatters) { // Cannot inline indirect calls. if (!Callee) @@ -2177,7 +2187,8 @@ << "... (caller:" << Caller->getName() << ")\n"); InlineCostCallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, - *Callee, Call, Params); + *Callee, Call, Params, /*BoostIndirect=*/true, + CallerInlinabilityMatters); InlineResult ShouldInline = CA.analyze(); LLVM_DEBUG(CA.dump()); Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -572,11 +572,11 @@ return FPM; } -void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, - PassBuilder::OptimizationLevel Level, - bool RunProfileGen, bool IsCS, - std::string ProfileFile, - std::string ProfileRemappingFile) { +void PassBuilder::addPGOInstrPasses( + ModulePassManager &MPM, bool DebugLogging, + PassBuilder::OptimizationLevel Level, bool RunProfileGen, bool IsCS, + std::string ProfileFile, std::string ProfileRemappingFile, + bool ExternalFunctionUseListsAreIncomplete) { assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!"); // Generally running simplification passes and the inliner with an high // threshold results in smaller executables, but there may be cases where @@ -595,7 +595,7 @@ CGSCCPassManager CGPipeline(DebugLogging); - CGPipeline.addPass(InlinerPass(IP)); + CGPipeline.addPass(InlinerPass(IP, ExternalFunctionUseListsAreIncomplete)); FunctionPassManager FPM; FPM.addPass(SROA()); @@ -671,10 +671,12 @@ return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel()); } -ModulePassManager -PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, - ThinLTOPhase Phase, - bool DebugLogging) { +static bool areFuncUseListsIncompleteDuring(PassBuilder::ThinLTOPhase Phase) { + return Phase != PassBuilder::ThinLTOPhase::None; +} + +ModulePassManager PassBuilder::buildModuleSimplificationPipeline( + OptimizationLevel Level, bool DebugLogging, ThinLTOPhase Phase) { ModulePassManager MPM(DebugLogging); bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); @@ -787,7 +789,8 @@ addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr, /* IsCS */ false, PGOOpt->ProfileFile, - PGOOpt->ProfileRemappingFile); + PGOOpt->ProfileRemappingFile, + areFuncUseListsIncompleteDuring(Phase)); MPM.addPass(PGOIndirectCallPromotion(false, false)); } if (PGOOpt && Phase != ThinLTOPhase::PostLink && @@ -826,7 +829,8 @@ if (Phase == ThinLTOPhase::PreLink && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) IP.HotCallSiteThreshold = 0; - MainCGPipeline.addPass(InlinerPass(IP)); + MainCGPipeline.addPass( + InlinerPass(IP, areFuncUseListsIncompleteDuring(Phase))); // Now deduce any function attributes based in the current code. MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); @@ -857,7 +861,7 @@ } ModulePassManager PassBuilder::buildModuleOptimizationPipeline( - OptimizationLevel Level, bool DebugLogging, bool LTOPreLink) { + OptimizationLevel Level, bool DebugLogging, ThinLTOPhase Phase) { ModulePassManager MPM(DebugLogging); // Optimize globals now that the module is fully simplified. @@ -878,7 +882,7 @@ // may make globals referenced by available external functions dead and saves // running remaining passes on the eliminated functions. These should be // preserved during prelinking for link-time inlining decisions. - if (!LTOPreLink) + if (Phase != ThinLTOPhase::PreLink) MPM.addPass(EliminateAvailableExternallyPass()); if (EnableOrderFileInstrumentation) @@ -893,15 +897,17 @@ // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as // cross-module inline has not been done yet. The context sensitive // instrumentation is after all the inlines are done. - if (!LTOPreLink && PGOOpt) { + if (Phase != ThinLTOPhase::PreLink && PGOOpt) { if (PGOOpt->CSAction == PGOOptions::CSIRInstr) addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true, /* IsCS */ true, PGOOpt->CSProfileGenFile, - PGOOpt->ProfileRemappingFile); + PGOOpt->ProfileRemappingFile, + areFuncUseListsIncompleteDuring(Phase)); else if (PGOOpt->CSAction == PGOOptions::CSIRUse) addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false, /* IsCS */ true, PGOOpt->ProfileFile, - PGOOpt->ProfileRemappingFile); + PGOOpt->ProfileRemappingFile, + areFuncUseListsIncompleteDuring(Phase)); } // Re-require GloblasAA here prior to function passes. This is particularly @@ -996,7 +1002,7 @@ // Split out cold code. Splitting is done late to avoid hiding context from // other optimizations and inadvertently regressing performance. The tradeoff // is that this has a higher code size cost than splitting early. - if (EnableHotColdSplit && !LTOPreLink) + if (EnableHotColdSplit && Phase != ThinLTOPhase::PreLink) MPM.addPass(HotColdSplittingPass()); // LoopSink pass sinks instructions hoisted by LICM, which serves as a @@ -1040,9 +1046,8 @@ return MPM; } -ModulePassManager -PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging, bool LTOPreLink) { +ModulePassManager PassBuilder::buildPerModuleDefaultPipeline( + OptimizationLevel Level, bool DebugLogging, ThinLTOPhase Phase) { assert(Level != OptimizationLevel::O0 && "Must request optimizations for the default pipeline!"); @@ -1059,11 +1064,10 @@ MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); // Add the core simplification pipeline. - MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::None, - DebugLogging)); + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, Phase)); // Now add the optimization pipeline. - MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging, LTOPreLink)); + MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging, Phase)); return MPM; } @@ -1089,8 +1093,8 @@ // If we are planning to perform ThinLTO later, we don't bloat the code with // unrolling/vectorization/... now. Just simplify the module as much as we // can. - MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PreLink, - DebugLogging)); + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, + ThinLTOPhase::PreLink)); // Run partial inlining pass to partially inline functions that have // large bodies. @@ -1140,11 +1144,12 @@ MPM.addPass(ForceFunctionAttrsPass()); // Add the core simplification pipeline. - MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PostLink, - DebugLogging)); + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, + ThinLTOPhase::PostLink)); // Now add the optimization pipeline. - MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); + MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging, + ThinLTOPhase::PostLink)); return MPM; } @@ -1156,7 +1161,7 @@ "Must request optimizations for the default pipeline!"); // FIXME: We should use a customized pre-link pipeline! return buildPerModuleDefaultPipeline(Level, DebugLogging, - /* LTOPreLink */ true); + ThinLTOPhase::PreLink); } ModulePassManager @@ -1291,11 +1296,13 @@ if (PGOOpt->CSAction == PGOOptions::CSIRInstr) addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true, /* IsCS */ true, PGOOpt->CSProfileGenFile, - PGOOpt->ProfileRemappingFile); + PGOOpt->ProfileRemappingFile, + /*ExternalFunctionUseListsAreIncomplete=*/false); else if (PGOOpt->CSAction == PGOOptions::CSIRUse) addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false, /* IsCS */ true, PGOOpt->ProfileFile, - PGOOpt->ProfileRemappingFile); + PGOOpt->ProfileRemappingFile, + /*ExternalFunctionUseListsAreIncomplete=*/false); } // Break up allocas Index: llvm/lib/Transforms/IPO/InlineSimple.cpp =================================================================== --- llvm/lib/Transforms/IPO/InlineSimple.cpp +++ llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -45,8 +45,11 @@ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } - explicit SimpleInliner(InlineParams Params) - : LegacyInlinerBase(ID), Params(std::move(Params)) { + explicit SimpleInliner(InlineParams Params, + bool ExternalFunctionUseListsAreIncomplete) + : LegacyInlinerBase(ID), Params(std::move(Params)), + ExternalFunctionUseListsAreIncomplete( + ExternalFunctionUseListsAreIncomplete) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } @@ -69,9 +72,17 @@ [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; + + Function &Caller = *CS.getCaller(); + bool CallerMayHaveUses = + ExternalFunctionUseListsAreIncomplete || !Caller.use_empty(); + bool CallerMayBeInlined = + CallerMayHaveUses && !Caller.hasFnAttribute(Attribute::NoInline); + return llvm::getInlineCost( cast(*CS.getInstruction()), Params, TTI, GetAssumptionCache, - /*GetBFI=*/None, PSI, RemarksEnabled ? &ORE : nullptr); + /*GetBFI=*/None, PSI, RemarksEnabled ? &ORE : nullptr, + /*CallerInlineabilityMatters=*/CallerMayBeInlined); } bool runOnSCC(CallGraphSCC &SCC) override; @@ -79,7 +90,9 @@ private: TargetTransformInfoWrapperPass *TTIWP; - + // Assume that new references can't appear on otherwise-unreferenced `extern` + // functions. This isn't the case in e.g., ThinLTO. + bool ExternalFunctionUseListsAreIncomplete = true; }; } // end anonymous namespace @@ -97,21 +110,27 @@ Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); } -Pass *llvm::createFunctionInliningPass(int Threshold) { - return new SimpleInliner(llvm::getInlineParams(Threshold)); +Pass * +llvm::createFunctionInliningPass(int Threshold, + bool ExternalFunctionUseListsAreIncomplete) { + return new SimpleInliner(llvm::getInlineParams(Threshold), + ExternalFunctionUseListsAreIncomplete); } -Pass *llvm::createFunctionInliningPass(unsigned OptLevel, - unsigned SizeOptLevel, - bool DisableInlineHotCallSite) { +Pass * +llvm::createFunctionInliningPass(unsigned OptLevel, unsigned SizeOptLevel, + bool DisableInlineHotCallSite, + bool ExternalFunctionUseListsAreIncomplete) { auto Param = llvm::getInlineParams(OptLevel, SizeOptLevel); if (DisableInlineHotCallSite) Param.HotCallSiteThreshold = 0; - return new SimpleInliner(Param); + return new SimpleInliner(Param, ExternalFunctionUseListsAreIncomplete); } -Pass *llvm::createFunctionInliningPass(InlineParams &Params) { - return new SimpleInliner(Params); +Pass * +llvm::createFunctionInliningPass(InlineParams &Params, + bool ExternalFunctionUseListsAreIncomplete) { + return new SimpleInliner(Params, ExternalFunctionUseListsAreIncomplete); } bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) { Index: llvm/lib/Transforms/IPO/Inliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/Inliner.cpp +++ llvm/lib/Transforms/IPO/Inliner.cpp @@ -1015,9 +1015,17 @@ bool RemarksEnabled = Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled( DEBUG_TYPE); + + Function &Caller = *CS.getCaller(); + bool CallerMayHaveUses = + ExternalFunctionUseListsAreIncomplete || !Caller.use_empty(); + bool CallerMayBeInlined = + CallerMayHaveUses && !Caller.hasFnAttribute(Attribute::NoInline); + return getInlineCost(cast(*CS.getInstruction()), Params, CalleeTTI, GetAssumptionCache, {GetBFI}, PSI, - RemarksEnabled ? &ORE : nullptr); + RemarksEnabled ? &ORE : nullptr, + /*CallerInlineabilityMatters=*/CallerMayBeInlined); }; // Now process as many calls as we have within this caller in the sequnece. Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -318,7 +318,10 @@ // This should probably be lowered after performance testing. IP.HintThreshold = 325; - MPM.add(createFunctionInliningPass(IP)); + MPM.add(createFunctionInliningPass( + IP, + /*ExternalFunctionUseListsAreIncomplete=*/PrepareForLTO || + PrepareForThinLTO || PerformThinLTO)); MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies MPM.add(createCFGSimplificationPass()); // Merge & remove BBs Index: llvm/test/Transforms/Inline/bpi-cold-inlining.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/bpi-cold-inlining.ll @@ -0,0 +1,76 @@ +; RUN: opt < %s -passes=inline -inline-cold-callsite-threshold=0 -S | FileCheck %s + +declare void @foo(i32) + +@a = external global i1 + +define internal void @callee1() { + call void @foo(i32 1) + ret void +} + +; CHECK-LABEL: define void @not_inlined_if_cold +define void @not_inlined_if_cold() { +entry: + %a = load i1, i1* @a + br i1 %a, label %if.then, label %if.end, !prof !0 + +if.then: + ; CHECK: call void @callee1() + call void @callee1() + br label %if.end + +if.end: + ; CHECK: ret void + ret void +} + +@not_inlined_if_cold_addr = global void ()* @not_inlined_if_cold + +define internal void @callee2() { + call void @foo(i32 2) + ret void +} + +; CHECK-LABEL: define void @gets_inlined_noinline_and_cold +define void @gets_inlined_noinline_and_cold() #0 { +entry: + %a = load i1, i1* @a + br i1 %a, label %if.then, label %if.end, !prof !0 + +if.then: + ; CHECK: call void @foo(i32 2) + call void @callee2() + br label %if.end + +if.end: + ; CHECK: ret void + ret void +} + +@gets_inlined_noinline_and_cold_addr = global void ()* @gets_inlined_noinline_and_cold + +define internal void @callee3() { + call void @foo(i32 3) + ret void +} + +; CHECK-LABEL: define void @gets_inlined_no_uses_and_cold +define void @gets_inlined_no_uses_and_cold() { +entry: + %a = load i1, i1* @a + br i1 %a, label %if.then, label %if.end, !prof !0 + +if.then: + ; CHECK: call void @foo(i32 3) + call void @callee3() + br label %if.end + +if.end: + ; CHECK: ret void + ret void +} + +attributes #0 = { noinline } + +!0 = !{!"branch_weights", i32 1, i32 2000}