Index: llvm/trunk/include/llvm/Analysis/OptimizationDiagnosticInfo.h =================================================================== --- llvm/trunk/include/llvm/Analysis/OptimizationDiagnosticInfo.h +++ llvm/trunk/include/llvm/Analysis/OptimizationDiagnosticInfo.h @@ -16,11 +16,11 @@ #define LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H #include "llvm/ADT/Optional.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" namespace llvm { -class BlockFrequencyInfo; class DebugLoc; class Function; class LLVMContext; @@ -34,6 +34,19 @@ OptimizationRemarkEmitter(Function *F, BlockFrequencyInfo *BFI) : F(F), BFI(BFI) {} + /// \brief This variant can be used to generate ORE on demand (without the + /// analysis pass). + /// + /// Note that this ctor has a very different cost depending on whether + /// F->getContext().getDiagnosticHotnessRequested() is on or not. If it's off + /// the operation is free. + /// + /// Whereas if DiagnosticHotnessRequested is on, it is fairly expensive + /// operation since BFI and all its required analyses are computed. This is + /// for example useful for CGSCC passes that can't use function analyses + /// passes in the old PM. + OptimizationRemarkEmitter(Function *F); + OptimizationRemarkEmitter(OptimizationRemarkEmitter &&Arg) : F(Arg.F), BFI(Arg.BFI) {} @@ -149,6 +162,9 @@ BlockFrequencyInfo *BFI; + /// If we generate BFI on demand, we need to free it when ORE is freed. + std::unique_ptr OwnedBFI; + Optional computeHotness(const Value *V); OptimizationRemarkEmitter(const OptimizationRemarkEmitter &) = delete; Index: llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h =================================================================== --- llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h +++ llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h @@ -27,6 +27,7 @@ class CallSite; class DataLayout; class InlineCost; +class OptimizationRemarkEmitter; class ProfileSummaryInfo; template class SmallPtrSet; Index: llvm/trunk/lib/Analysis/OptimizationDiagnosticInfo.cpp =================================================================== --- llvm/trunk/lib/Analysis/OptimizationDiagnosticInfo.cpp +++ llvm/trunk/lib/Analysis/OptimizationDiagnosticInfo.cpp @@ -13,13 +13,37 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/LLVMContext.h" using namespace llvm; +OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F) + : F(F), BFI(nullptr) { + if (!F->getContext().getDiagnosticHotnessRequested()) + return; + + // First create a dominator tree. + DominatorTree DT; + DT.recalculate(*F); + + // Generate LoopInfo from it. + LoopInfo LI; + LI.analyze(DT); + + // Then compute BranchProbabilityInfo. + BranchProbabilityInfo BPI; + BPI.calculate(*F, LI); + + // Finally compute BFI. + OwnedBFI = llvm::make_unique(*F, BPI, LI); + BFI = OwnedBFI.get(); +} + Optional OptimizationRemarkEmitter::computeHotness(const Value *V) { if (!BFI) return None; Index: llvm/trunk/lib/Transforms/IPO/Inliner.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/Inliner.cpp +++ llvm/trunk/lib/Transforms/IPO/Inliner.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" @@ -237,11 +238,9 @@ return true; } -static void emitAnalysis(CallSite CS, const Twine &Msg) { - Function *Caller = CS.getCaller(); - LLVMContext &Ctx = Caller->getContext(); - DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); - emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg); +static void emitAnalysis(CallSite CS, OptimizationRemarkEmitter &ORE, + const Twine &Msg) { + ORE.emitOptimizationRemarkAnalysis(DEBUG_TYPE, CS.getInstruction(), Msg); } /// Return true if inlining of CS can block the caller from being @@ -323,22 +322,23 @@ /// Return true if the inliner should attempt to inline at the given CallSite. static bool shouldInline(CallSite CS, - function_ref GetInlineCost) { + function_ref GetInlineCost, + OptimizationRemarkEmitter &ORE) { InlineCost IC = GetInlineCost(CS); if (IC.isAlways()) { DEBUG(dbgs() << " Inlining: cost=always" << ", Call: " << *CS.getInstruction() << "\n"); - emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) + - " should always be inlined (cost=always)"); + emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName()) + + " should always be inlined (cost=always)"); return true; } if (IC.isNever()) { DEBUG(dbgs() << " NOT Inlining: cost=never" << ", Call: " << *CS.getInstruction() << "\n"); - emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + - " should never be inlined (cost=never)")); + emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName() + + " should never be inlined (cost=never)")); return false; } @@ -347,10 +347,10 @@ DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << "\n"); - emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + - " too costly to inline (cost=") + - Twine(IC.getCost()) + ", threshold=" + - Twine(IC.getCostDelta() + IC.getCost()) + ")"); + emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName() + + " too costly to inline (cost=") + + Twine(IC.getCost()) + ", threshold=" + + Twine(IC.getCostDelta() + IC.getCost()) + ")"); return false; } @@ -359,20 +359,22 @@ DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << " Cost = " << IC.getCost() << ", outer Cost = " << TotalSecondaryCost << '\n'); - emitAnalysis(CS, Twine("Not inlining. Cost of inlining " + - CS.getCalledFunction()->getName() + - " increases the cost of inlining " + - CS.getCaller()->getName() + " in other contexts")); + emitAnalysis(CS, ORE, + Twine("Not inlining. Cost of inlining " + + CS.getCalledFunction()->getName() + + " increases the cost of inlining " + + CS.getCaller()->getName() + " in other contexts")); return false; } DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << '\n'); - emitAnalysis( - CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") + - CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) + - " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")"); + emitAnalysis(CS, ORE, CS.getCalledFunction()->getName() + + Twine(" can be inlined into ") + + CS.getCaller()->getName() + " with cost=" + + Twine(IC.getCost()) + " (threshold=" + + Twine(IC.getCostDelta() + IC.getCost()) + ")"); return true; } @@ -513,18 +515,21 @@ InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) continue; - LLVMContext &CallerCtx = Caller->getContext(); - // Get DebugLoc to report. CS will be invalid after Inliner. DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + BasicBlock *Block = CS.getParent(); + // FIXME for new PM: because of the old PM we currently generate ORE and + // in turn BFI on demand. With the new PM, the ORE dependency should + // just become a regular analysis dependency. + OptimizationRemarkEmitter ORE(Caller); // If the policy determines that we should inline this function, // try to do so. - if (!shouldInline(CS, GetInlineCost)) { - emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, - Twine(Callee->getName() + - " will not be inlined into " + - Caller->getName())); + if (!shouldInline(CS, GetInlineCost, ORE)) { + ORE.emitOptimizationRemarkMissed(DEBUG_TYPE, DLoc, Block, + Twine(Callee->getName() + + " will not be inlined into " + + Caller->getName())); continue; } @@ -532,17 +537,17 @@ if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID, InsertLifetime, AARGetter, ImportedFunctionsStats)) { - emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, - Twine(Callee->getName() + - " will not be inlined into " + - Caller->getName())); + ORE.emitOptimizationRemarkMissed(DEBUG_TYPE, DLoc, Block, + Twine(Callee->getName() + + " will not be inlined into " + + Caller->getName())); continue; } ++NumInlined; // Report the inline decision. - emitOptimizationRemark( - CallerCtx, DEBUG_TYPE, *Caller, DLoc, + ORE.emitOptimizationRemark( + DEBUG_TYPE, DLoc, Block, Twine(Callee->getName() + " inlined into " + Caller->getName())); // If inlining this function gave us any new call sites, throw them Index: llvm/trunk/test/Transforms/Inline/optimization-remarks-with-hotness.ll =================================================================== --- llvm/trunk/test/Transforms/Inline/optimization-remarks-with-hotness.ll +++ llvm/trunk/test/Transforms/Inline/optimization-remarks-with-hotness.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -inline -pass-remarks=inline -pass-remarks-missed=inline \ +; RUN: -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 \ +; RUN: | FileCheck %s + +; CHECK: foo should always be inlined (cost=always) (hotness: 30) +; CHECK: foo inlined into bar (hotness: 30) +; CHECK: foz should never be inlined (cost=never) (hotness: 30) +; CHECK: foz will not be inlined into bar (hotness: 30) + +; Function Attrs: alwaysinline nounwind uwtable +define i32 @foo() #0 !prof !1 { +entry: + ret i32 4 +} + +; Function Attrs: noinline nounwind uwtable +define i32 @foz() #1 !prof !2 { +entry: + ret i32 2 +} + +; Function Attrs: nounwind uwtable +define i32 @bar() !prof !3 { +entry: + %call = call i32 @foo() + %call2 = call i32 @foz() + %mul = mul i32 %call, %call2 + ret i32 %mul +} + +attributes #0 = { alwaysinline } +attributes #1 = { noinline } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.5.0 "} +!1 = !{!"function_entry_count", i64 10} +!2 = !{!"function_entry_count", i64 20} +!3 = !{!"function_entry_count", i64 30}