Index: lib/Transforms/IPO/PartialInlining.cpp =================================================================== --- lib/Transforms/IPO/PartialInlining.cpp +++ lib/Transforms/IPO/PartialInlining.cpp @@ -16,8 +16,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" @@ -70,16 +74,25 @@ }; struct PartialInlinerImpl { - PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {} + PartialInlinerImpl( + std::function *GetAC, + std::function *GTTI, + Optional> GBFI, + ProfileSummaryInfo *ProfSI) + : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {} bool run(Module &M); Function *unswitchFunction(Function *F); std::unique_ptr computeOutliningInfo(Function *F); private: - InlineFunctionInfo IFI; int NumPartialInlining = 0; + std::function *GetAssumptionCache; + std::function *GetTTI; + Optional> GetBFI; + ProfileSummaryInfo *PSI; + bool shouldPartialInline(CallSite CS, OptimizationRemarkEmitter &ORE); bool IsLimitReached() { return (MaxNumPartialInlining != -1 && NumPartialInlining >= MaxNumPartialInlining); @@ -94,18 +107,30 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); + AU.addRequired(); } bool runOnModule(Module &M) override { if (skipModule(M)) return false; AssumptionCacheTracker *ACT = &getAnalysis(); + TargetTransformInfoWrapperPass *TTIWP = + &getAnalysis(); + ProfileSummaryInfo *PSI = + getAnalysis().getPSI(); + std::function GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; - InlineFunctionInfo IFI(nullptr, &GetAssumptionCache); - return PartialInlinerImpl(IFI).run(M); + + std::function GetTTI = + [&TTIWP](Function &F) -> TargetTransformInfo & { + return TTIWP->getTTI(F); + }; + + return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M); } }; } @@ -263,6 +288,49 @@ return OutliningInfo; } +bool PartialInlinerImpl::shouldPartialInline(CallSite CS, + OptimizationRemarkEmitter &ORE) { + // TODO : more sharing with shouldInline in Inliner.cpp + using namespace ore; + Instruction *Call = CS.getInstruction(); + Function *Callee = CS.getCalledFunction(); + Function *Caller = CS.getCaller(); + auto &CalleeTTI = (*GetTTI)(*Callee); + InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, + *GetAssumptionCache, GetBFI, PSI); + + if (IC.isAlways()) { + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) + << NV("Callee", Callee) + << " should always be fully inlined, not partially"); + return false; + } + + if (IC.isNever()) { + ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) + << NV("Callee", Callee) << " not partially inlined into " + << NV("Caller", Caller) + << " because it should never be inlined (cost=never)"); + return false; + } + + if (!IC) { + ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) + << NV("Callee", Callee) << " not partially inlined into " + << NV("Caller", Caller) << " because too costly to inline (cost=" + << NV("Cost", IC.getCost()) << ", threshold=" + << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); + return false; + } + + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call) + << NV("Callee", Callee) << " can be partially inlined into " + << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) + << " (threshold=" + << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); + return true; +} + Function *PartialInlinerImpl::unswitchFunction(Function *F) { if (F->hasAddressTaken()) @@ -277,7 +345,6 @@ // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; Function *DuplicateFunction = CloneFunction(F, VMap); - DuplicateFunction->setLinkage(GlobalValue::InternalLinkage); BasicBlock *NewReturnBlock = cast(VMap[OutliningInfo->ReturnBlock]); BasicBlock *NewNonReturnBlock = @@ -385,16 +452,19 @@ if (IsLimitReached()) continue; - NumPartialInlining++; - OptimizationRemarkEmitter ORE(CS.getCaller()); + if (!shouldPartialInline(CS, ORE)) + continue; + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); BasicBlock *Block = CS.getParent(); ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block) << ore::NV("Callee", F) << " partially inlined into " << ore::NV("Caller", CS.getCaller())); + InlineFunctionInfo IFI(nullptr, GetAssumptionCache); InlineFunction(CS, IFI); + NumPartialInlining++; } // Ditch the duplicate, since we're done with it, and rewrite all remaining @@ -448,6 +518,8 @@ INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) @@ -458,12 +530,25 @@ PreservedAnalyses PartialInlinerPass::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult(M).getManager(); + std::function GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; - InlineFunctionInfo IFI(nullptr, &GetAssumptionCache); - if (PartialInlinerImpl(IFI).run(M)) + + std::function GetBFI = + [&FAM](Function &F) -> BlockFrequencyInfo & { + return FAM.getResult(F); + }; + + std::function GetTTI = + [&FAM](Function &F) -> TargetTransformInfo & { + return FAM.getResult(F); + }; + + ProfileSummaryInfo *PSI = &AM.getResult(M); + + if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } Index: test/Transforms/CodeExtractor/PartialInlineOptRemark.ll =================================================================== --- test/Transforms/CodeExtractor/PartialInlineOptRemark.ll +++ test/Transforms/CodeExtractor/PartialInlineOptRemark.ll @@ -7,6 +7,8 @@ ; RUN: opt -S -passes=partial-inliner -pass-remarks=partial-inlining --disable-partial-inlining < %s 2>&1 | FileCheck --check-prefix=LIMIT %s ; RUN: opt -S -partial-inliner -pass-remarks=partial-inlining -max-partial-inlining=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT %s ; RUN: opt -S -passes=partial-inliner -pass-remarks=partial-inlining -max-partial-inlining=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT %s +; RUN: opt -S -partial-inliner -pass-remarks=partial-inlining -inline-threshold=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT %s +; RUN: opt -S -passes=partial-inliner -pass-remarks=partial-inlining -inline-threshold=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT %s define i32 @bar(i32 %arg) local_unnamed_addr #0 !dbg !5 { bb: @@ -30,6 +32,38 @@ ret i32 %tmp3, !dbg !19 } +define i32 @bar_noinline(i32 %arg) local_unnamed_addr #1 !dbg !5 { +bb: + %tmp = icmp slt i32 %arg, 0, !dbg !7 + br i1 %tmp, label %bb1, label %bb2, !dbg !8 + +bb1: ; preds = %bb + tail call void (...) @foo() #0, !dbg !9 + tail call void (...) @foo() #0, !dbg !10 + tail call void (...) @foo() #0, !dbg !11 + br label %bb2, !dbg !18 + +bb2: ; preds = %bb1, %bb + %tmp3 = phi i32 [ 0, %bb1 ], [ 1, %bb ] + ret i32 %tmp3, !dbg !19 +} + +define i32 @bar_alwaysinline(i32 %arg) local_unnamed_addr #2 !dbg !5 { +bb: + %tmp = icmp slt i32 %arg, 0, !dbg !7 + br i1 %tmp, label %bb1, label %bb2, !dbg !8 + +bb1: ; preds = %bb + tail call void (...) @foo() #0, !dbg !9 + tail call void (...) @foo() #0, !dbg !10 + tail call void (...) @foo() #0, !dbg !11 + br label %bb2, !dbg !18 + +bb2: ; preds = %bb1, %bb + %tmp3 = phi i32 [ 0, %bb1 ], [ 1, %bb ] + ret i32 %tmp3, !dbg !19 +} + ; Function Attrs: nounwind declare void @foo(...) local_unnamed_addr #0 @@ -37,12 +71,18 @@ define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 !dbg !20 { bb: ; CHECK:remark{{.*}}bar partially inlined into dummy_caller +; CHECK-NOT:remark{{.*}}bar_noinline partially inlined into dummy_caller +; CHECK-NOT:remark{{.*}}bar_alwaysinline partially inlined into dummy_caller ; LIMIT-NOT:remark{{.*}}bar partially inlined into dummy_caller %tmp = tail call i32 @bar(i32 %arg), !dbg !21 + %tmp2 = tail call i32 @bar_noinline(i32 %arg), !dbg !21 + %tmp3 = tail call i32 @bar_alwaysinline(i32 %arg), !dbg !21 ret i32 %tmp, !dbg !22 } attributes #0 = { nounwind } +attributes #1 = { noinline nounwind } +attributes #2 = { alwaysinline nounwind } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3} Index: test/Transforms/CodeExtractor/PartialInlineOr.ll =================================================================== --- test/Transforms/CodeExtractor/PartialInlineOr.ll +++ test/Transforms/CodeExtractor/PartialInlineOr.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s ; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s -; Function Attrs: noinline nounwind uwtable +; Function Attrs: nounwind uwtable define i32 @bar(i32 %arg) local_unnamed_addr #0 { bb: %tmp = icmp slt i32 %arg, 0 @@ -35,7 +35,7 @@ declare void @foo(...) local_unnamed_addr -; Function Attrs: noinline nounwind uwtable +; Function Attrs: nounwind uwtable define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 { bb: ; CHECK-LABEL: @dummy_caller @@ -89,7 +89,7 @@ ret i32 %tmp } -attributes #0 = { noinline nounwind uwtable } +attributes #0 = { nounwind uwtable } attributes #1 = { nounwind } !llvm.ident = !{!0}