Index: include/llvm/Analysis/OptimizationDiagnosticInfo.h =================================================================== --- include/llvm/Analysis/OptimizationDiagnosticInfo.h +++ include/llvm/Analysis/OptimizationDiagnosticInfo.h @@ -16,11 +16,11 @@ #define LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H #include "llvm/ADT/Optional.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" namespace llvm { -class BlockFrequencyInfo; class DebugLoc; class Function; class LLVMContext; @@ -34,6 +34,10 @@ OptimizationRemarkEmitter(Function *F, BlockFrequencyInfo *BFI) : F(F), BFI(BFI) {} + // This variant can be used to generate ORE on demand (without the analysis + // pass). + OptimizationRemarkEmitter(Function *F); + OptimizationRemarkEmitter(OptimizationRemarkEmitter &&Arg) : F(Arg.F), BFI(Arg.BFI) {} @@ -58,6 +62,14 @@ /// debug location from the Loop parameter \p L. void emitOptimizationRemark(const char *PassName, Loop *L, const Twine &Msg); + /// \brief Same as above but derives the debug location and the code region + /// from the debug location and the basic block of \p Inst, respectively. + void emitOptimizationRemark(const char *PassName, Instruction *Inst, + const Twine &Msg) { + emitOptimizationRemark(PassName, Inst->getDebugLoc(), Inst->getParent(), + Msg); + } + /// Emit an optimization-missed message. /// /// \p PassName is the name of the pass emitting the message. If @@ -73,6 +85,14 @@ void emitOptimizationRemarkMissed(const char *PassName, Loop *L, const Twine &Msg); + /// \brief Same as above but derives the debug location and the code region + /// from the debug location and the basic block of \p Inst, respectively. + void emitOptimizationRemarkMissed(const char *PassName, Instruction *Inst, + const Twine &Msg) { + emitOptimizationRemarkMissed(PassName, Inst->getDebugLoc(), + Inst->getParent(), Msg); + } + /// Emit an optimization analysis remark message. /// /// \p PassName is the name of the pass emitting the message. If @@ -89,6 +109,14 @@ void emitOptimizationRemarkAnalysis(const char *PassName, Loop *L, const Twine &Msg); + /// \brief Same as above but derives the debug location and the code region + /// from the debug location and the basic block of \p Inst, respectively. + void emitOptimizationRemarkAnalysis(const char *PassName, Instruction *Inst, + const Twine &Msg) { + emitOptimizationRemarkAnalysis(PassName, Inst->getDebugLoc(), + Inst->getParent(), Msg); + } + /// \brief Emit an optimization analysis remark related to floating-point /// non-commutativity. /// @@ -125,6 +153,9 @@ BlockFrequencyInfo *BFI; + /// \brief If we generate BFI on demand, we need to free it when ORE is freed. + std::unique_ptr OwnedBFI; + Optional computeHotness(const Value *V); OptimizationRemarkEmitter(const OptimizationRemarkEmitter &) = delete; Index: include/llvm/Transforms/IPO/InlinerPass.h =================================================================== --- include/llvm/Transforms/IPO/InlinerPass.h +++ include/llvm/Transforms/IPO/InlinerPass.h @@ -24,6 +24,7 @@ class CallSite; class DataLayout; class InlineCost; +class OptimizationRemarkEmitter; class ProfileSummaryInfo; template class SmallPtrSet; @@ -75,7 +76,7 @@ /// shouldInline - Return true if the inliner should attempt to /// inline at the given CallSite. - bool shouldInline(CallSite CS); + bool shouldInline(CallSite CS, OptimizationRemarkEmitter &ORE); /// Return true if inlining of CS can block the caller from being /// inlined which is proved to be more beneficial. \p IC is the /// estimated inline cost associated with callsite \p CS. Index: lib/Analysis/OptimizationDiagnosticInfo.cpp =================================================================== --- lib/Analysis/OptimizationDiagnosticInfo.cpp +++ lib/Analysis/OptimizationDiagnosticInfo.cpp @@ -13,13 +13,37 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/LLVMContext.h" using namespace llvm; +OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F) + : F(F), BFI(nullptr) { + if (!F->getContext().getDiagnosticHotnessRequested()) + return; + + // First create a dominator tree. + DominatorTree DT; + DT.recalculate(*F); + + // Generate LoopInfo from it. + LoopInfo LI; + LI.analyze(DT); + + // Then compute BranchProbabilityInfo. + BranchProbabilityInfo BPI; + BPI.calculate(*F, LI); + + // Finally compute BFI. + OwnedBFI = llvm::make_unique(*F, BPI, LI); + BFI = OwnedBFI.get(); +} + Optional OptimizationRemarkEmitter::computeHotness(const Value *V) { if (!BFI) return None; Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" @@ -222,11 +223,9 @@ return true; } -static void emitAnalysis(CallSite CS, const Twine &Msg) { - Function *Caller = CS.getCaller(); - LLVMContext &Ctx = Caller->getContext(); - DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); - emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg); +static void emitAnalysis(CallSite CS, OptimizationRemarkEmitter &ORE, + const Twine &Msg) { + ORE.emitOptimizationRemarkAnalysis(DEBUG_TYPE, CS.getInstruction(), Msg); } bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, @@ -300,22 +299,22 @@ } /// Return true if the inliner should attempt to inline at the given CallSite. -bool Inliner::shouldInline(CallSite CS) { +bool Inliner::shouldInline(CallSite CS, OptimizationRemarkEmitter &ORE) { InlineCost IC = getInlineCost(CS); if (IC.isAlways()) { DEBUG(dbgs() << " Inlining: cost=always" << ", Call: " << *CS.getInstruction() << "\n"); - emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) + - " should always be inlined (cost=always)"); + emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName()) + + " should always be inlined (cost=always)"); return true; } if (IC.isNever()) { DEBUG(dbgs() << " NOT Inlining: cost=never" << ", Call: " << *CS.getInstruction() << "\n"); - emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + - " should never be inlined (cost=never)")); + emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName() + + " should never be inlined (cost=never)")); return false; } @@ -324,10 +323,10 @@ DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << "\n"); - emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + - " too costly to inline (cost=") + - Twine(IC.getCost()) + ", threshold=" + - Twine(IC.getCostDelta() + IC.getCost()) + ")"); + emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName() + + " too costly to inline (cost=") + + Twine(IC.getCost()) + ", threshold=" + + Twine(IC.getCostDelta() + IC.getCost()) + ")"); return false; } @@ -336,20 +335,22 @@ DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << " Cost = " << IC.getCost() << ", outer Cost = " << TotalSecondaryCost << '\n'); - emitAnalysis(CS, Twine("Not inlining. Cost of inlining " + - CS.getCalledFunction()->getName() + - " increases the cost of inlining " + - CS.getCaller()->getName() + " in other contexts")); + emitAnalysis(CS, ORE, + Twine("Not inlining. Cost of inlining " + + CS.getCalledFunction()->getName() + + " increases the cost of inlining " + + CS.getCaller()->getName() + " in other contexts")); return false; } DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << '\n'); - emitAnalysis( - CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") + - CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) + - " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")"); + emitAnalysis(CS, ORE, CS.getCalledFunction()->getName() + + Twine(" can be inlined into ") + + CS.getCaller()->getName() + " with cost=" + + Twine(IC.getCost()) + " (threshold=" + + Twine(IC.getCostDelta() + IC.getCost()) + ")"); return true; } @@ -478,36 +479,39 @@ if (InlineHistoryID != -1 && InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) continue; - - LLVMContext &CallerCtx = Caller->getContext(); // Get DebugLoc to report. CS will be invalid after Inliner. DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + BasicBlock *Block = CS.getParent(); + // FIXME for new PM: because of the old PM we currently generate ORE and + // in turn BFI on demand. With the new PM, the ORE dependency should + // just become a regular analysis dependency. + OptimizationRemarkEmitter ORE(Caller); // If the policy determines that we should inline this function, // try to do so. - if (!shouldInline(CS)) { - emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, - Twine(Callee->getName() + - " will not be inlined into " + - Caller->getName())); + if (!shouldInline(CS, ORE)) { + ORE.emitOptimizationRemarkMissed(DEBUG_TYPE, DLoc, Block, + Twine(Callee->getName() + + " will not be inlined into " + + Caller->getName())); continue; } // Attempt to inline the function. if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID, InsertLifetime)) { - emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, - Twine(Callee->getName() + - " will not be inlined into " + - Caller->getName())); + ORE.emitOptimizationRemarkMissed(DEBUG_TYPE, DLoc, Block, + Twine(Callee->getName() + + " will not be inlined into " + + Caller->getName())); continue; } ++NumInlined; // Report the inline decision. - emitOptimizationRemark( - CallerCtx, DEBUG_TYPE, *Caller, DLoc, + ORE.emitOptimizationRemark( + DEBUG_TYPE, DLoc, Block, Twine(Callee->getName() + " inlined into " + Caller->getName())); // If inlining this function gave us any new call sites, throw them Index: test/Transforms/Inline/optimization-remarks-with-hotness.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/optimization-remarks-with-hotness.ll @@ -0,0 +1,63 @@ +; RUN: opt < %s -inline -pass-remarks=inline -pass-remarks-missed=inline -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 | FileCheck %s + +; CHECK: foo should always be inlined (cost=always) (hotness: 30) +; CHECK: foo inlined into bar (hotness: 30) +; CHECK: foz should never be inlined (cost=never) (hotness: 30) +; CHECK: foz will not be inlined into bar (hotness: 30) + +; Function Attrs: alwaysinline nounwind uwtable +define i32 @foo(i32 %x, i32 %y) #0 !prof !1 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32, i32* %x.addr, align 4 + %1 = load i32, i32* %y.addr, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +; Function Attrs: noinline nounwind uwtable +define float @foz(i32 %x, i32 %y) #1 !prof !2 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32, i32* %x.addr, align 4 + %1 = load i32, i32* %y.addr, align 4 + %mul = mul nsw i32 %0, %1 + %conv = sitofp i32 %mul to float + ret float %conv +} + +; Function Attrs: nounwind uwtable +define i32 @bar(i32 %j) #2 !prof !3 { +entry: + %j.addr = alloca i32, align 4 + store i32 %j, i32* %j.addr, align 4 + %0 = load i32, i32* %j.addr, align 4 + %1 = load i32, i32* %j.addr, align 4 + %sub = sub nsw i32 %1, 2 + %call = call i32 @foo(i32 %0, i32 %sub) + %conv = sitofp i32 %call to float + %2 = load i32, i32* %j.addr, align 4 + %sub1 = sub nsw i32 %2, 2 + %3 = load i32, i32* %j.addr, align 4 + %call2 = call float @foz(i32 %sub1, i32 %3) + %mul = fmul float %conv, %call2 + %conv3 = fptosi float %mul to i32 + ret i32 %conv3 +} + +attributes #0 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.5.0 "} +!1 = !{!"function_entry_count", i64 10} +!2 = !{!"function_entry_count", i64 20} +!3 = !{!"function_entry_count", i64 30}