Index: llvm/include/llvm/Analysis/InlineCost.h =================================================================== --- llvm/include/llvm/Analysis/InlineCost.h +++ llvm/include/llvm/Analysis/InlineCost.h @@ -266,6 +266,17 @@ /// Minimal filter to detect invalid constructs for inlining. InlineResult isInlineViable(Function &Callee); + +// This pass is used to annotate instructions during the inline process for +// debugging and analysis +struct InlineCostAnnotationPrinterPass + : PassInfoMixin { + raw_ostream &OS; + + public: + explicit InlineCostAnnotationPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +}; } // namespace llvm #endif Index: llvm/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/lib/Analysis/InlineCost.cpp +++ llvm/lib/Analysis/InlineCost.cpp @@ -54,9 +54,9 @@ cl::ZeroOrMore, cl::desc("Default amount of inlining to perform")); -static cl::opt PrintDebugInstructionDeltas( - "print-instruction-deltas", cl::Hidden, cl::init(false), - cl::desc("Prints deltas of cost and threshold per instruction")); +static cl::opt PrintInstructionAnnotations( + "print-inline-cost-instruction-annotations", cl::Hidden, cl::init(false), + cl::desc("Annotate instructions based on inline analysis")); static cl::opt InlineThreshold( "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, @@ -128,16 +128,6 @@ bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; } }; -class CostAnnotationWriter : public AssemblyAnnotationWriter { -public: - // This DenseMap stores the delta change in cost and threshold after - // accounting for the given instruction. - DenseMap CostThresholdMap; - - virtual void emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS); -}; - /// Carry out call site analysis, in order to evaluate inlinability. /// NOTE: the type is currently used as implementation detail of functions such /// as llvm::getInlineCost. Note the function_ref constructor parameters - the @@ -414,6 +404,16 @@ void dump(); }; +class InlineCostAnnotationWriter : public AssemblyAnnotationWriter { +private: + InlineCostCallAnalyzer *ICCA; + +public: + InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {} + virtual void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS); +}; + /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer class InlineCostCallAnalyzer final : public CallAnalyzer { @@ -429,6 +429,11 @@ /// Tunable parameters that control the analysis. const InlineParams &Params; + // This DenseMap stores the delta change in cost and threshold after + // accounting for the given instruction. The map is filled only with the + // flag PrintInstructionAnnotations on. + DenseMap InstructionCostDetailMap; + /// Upper bound for the inlining cost. Bonuses are being applied to account /// for speculative "expected profit" of the inlining decision. int Threshold = 0; @@ -598,19 +603,19 @@ void onInstructionAnalysisStart(const Instruction *I) override { // This function is called to store the initial cost of inlining before // the given instruction was assessed. - if (!PrintDebugInstructionDeltas) + if (!PrintInstructionAnnotations) return; - Writer.CostThresholdMap[I].CostBefore = Cost; - Writer.CostThresholdMap[I].ThresholdBefore = Threshold; + InstructionCostDetailMap[I].CostBefore = Cost; + InstructionCostDetailMap[I].ThresholdBefore = Threshold; } void onInstructionAnalysisFinish(const Instruction *I) override { // This function is called to find new values of cost and threshold after // the instruction has been assessed. - if (!PrintDebugInstructionDeltas) + if (!PrintInstructionAnnotations) return; - Writer.CostThresholdMap[I].CostAfter = Cost; - Writer.CostThresholdMap[I].ThresholdAfter = Threshold; + InstructionCostDetailMap[I].CostAfter = Cost; + InstructionCostDetailMap[I].ThresholdAfter = Threshold; } InlineResult finalizeAnalysis() override { @@ -713,19 +718,46 @@ ComputeFullInlineCost(OptComputeFullInlineCost || Params.ComputeFullInlineCost || ORE), Params(Params), Threshold(Params.DefaultThreshold), - BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold) {} + BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold), + ICAWriter(this) {} - /// Annotation Writer for cost annotation - CostAnnotationWriter Writer; + /// Annotation Writer for instruction details + InlineCostAnnotationWriter ICAWriter; void dump(); + Optional getCostDetails(const Instruction *I) { + if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end()) + return InstructionCostDetailMap[I]; + return None; + } + virtual ~InlineCostCallAnalyzer() {} int getThreshold() { return Threshold; } int getCost() { return Cost; } }; } // namespace +void InlineCostAnnotationWriter::emitInstructionAnnot( + const Instruction *I, formatted_raw_ostream &OS) { + // The cost of inlining of the given instruction is printed always. + // The threshold delta is printed only when it is non-zero. It happens + // when we decided to give a bonus at a particular instruction. + Optional Record = ICCA->getCostDetails(I); + if (!Record) + OS << "; No cost analysis for the instruction"; + else { + OS << "; cost before = " << Record->CostBefore + << ", cost after = " << Record->CostAfter + << ", threshold before = " << Record->ThresholdBefore + << ", threshold after = " << Record->ThresholdAfter << ", "; + OS << "cost delta = " << Record->getCostDelta(); + if (Record->hasThresholdChanged()) + OS << ", threshold delta = " << Record->getThresholdDelta(); + } + OS << "\n"; +} + /// Test whether the given value is an Alloca-derived function argument. bool CallAnalyzer::isAllocaDerivedArg(Value *V) { return SROAArgValues.count(V); @@ -737,26 +769,6 @@ disableLoadElimination(); } -void CostAnnotationWriter::emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS) { - // The cost of inlining of the given instruction is printed always. - // The threshold delta is printed only when it is non-zero. It happens - // when we decided to give a bonus at a particular instruction. - if (CostThresholdMap.count(I) == 0) { - OS << "; No analysis for the instruction\n"; - return; - } - const auto &Record = CostThresholdMap[I]; - OS << "; cost before = " << Record.CostBefore - << ", cost after = " << Record.CostAfter - << ", threshold before = " << Record.ThresholdBefore - << ", threshold after = " << Record.ThresholdAfter << ", "; - OS << "cost delta = " << Record.getCostDelta(); - if (Record.hasThresholdChanged()) - OS << ", threshold delta = " << Record.getThresholdDelta(); - OS << "\n"; -} - /// If 'V' maps to a SROA candidate, disable SROA for it. void CallAnalyzer::disableSROA(Value *V) { if (auto *SROAArg = getSROAArgForValueOrNull(V)) { @@ -2159,8 +2171,8 @@ /// Dump stats about this call's analysis. LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n" - if (PrintDebugInstructionDeltas) - F.print(dbgs(), &Writer); + if (PrintInstructionAnnotations) + F.print(dbgs(), &ICAWriter); DEBUG_PRINT_STAT(NumConstantArgs); DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); DEBUG_PRINT_STAT(NumAllocaArgs); @@ -2496,3 +2508,35 @@ Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; return Params; } + +PreservedAnalyses +InlineCostAnnotationPrinterPass::run(Function &F, + FunctionAnalysisManager &FAM) { + PrintInstructionAnnotations = true; + std::function GetAssumptionCache = [&]( + Function &F) -> AssumptionCache & { + return FAM.getResult(F); + }; + Module *M = F.getParent(); + ProfileSummaryInfo PSI(*M); + DataLayout DL(M); + TargetTransformInfo TTI(DL); + const InlineParams Params = llvm::getInlineParams(); + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (CallInst *CI = dyn_cast(&I)) { + Function *CalledFunction = CI->getCalledFunction(); + if (!CalledFunction || CalledFunction->isDeclaration()) + continue; + OptimizationRemarkEmitter ORE(CalledFunction); + InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI, + GetAssumptionCache, nullptr, &PSI, &ORE); + ICCA.analyze(); + OS << " Analyzing call of " << CalledFunction->getName() + << "... (caller:" << CI->getCaller()->getName() << ")\n"; + ICCA.dump(); + } + } + } + return PreservedAnalyses::all(); +} Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -234,6 +234,7 @@ FUNCTION_PASS("print", PostDominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print", DemandedBitsPrinterPass(dbgs())) FUNCTION_PASS("print", DominanceFrontierPrinterPass(dbgs())) +FUNCTION_PASS("print", InlineCostAnnotationPrinterPass(dbgs())) FUNCTION_PASS("print", LoopPrinterPass(dbgs())) FUNCTION_PASS("print", MemorySSAPrinterPass(dbgs())) FUNCTION_PASS("print", PhiValuesPrinterPass(dbgs())) Index: llvm/test/Transforms/Inline/debuginline-cost-delta.ll =================================================================== --- llvm/test/Transforms/Inline/debuginline-cost-delta.ll +++ llvm/test/Transforms/Inline/debuginline-cost-delta.ll @@ -1,32 +1,16 @@ -; Require asserts for -debug-only -; REQUIRES: asserts - -; RUN: opt < %s -inline -debug-only=inline-cost -disable-output -print-instruction-deltas 2>&1 | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 | FileCheck %s ; CHECK: Analyzing call of callee1... (caller:foo) -; CHECK: define i32 @callee1(i32 %x) { -; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = 5 -; CHECK: %x1 = add i32 %x, 1 -; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = 5 -; CHECK: %x2 = add i32 %x1, 1 -; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = 5 -; CHECK: %x3 = add i32 %x2, 1 -; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = 0 -; CHECK: ret i32 %x3 -; CHECK: } -; CHECK: NumConstantArgs: 0 -; CHECK: NumConstantOffsetPtrArgs: 0 -; CHECK: NumAllocaArgs: 0 -; CHECK: NumConstantPtrCmps: 0 -; CHECK: NumConstantPtrDiffs: 0 -; CHECK: NumInstructionsSimplified: 1 -; CHECK: NumInstructions: 4 -; CHECK: SROACostSavings: 0 -; CHECK: SROACostSavingsLost: 0 -; CHECK: LoadEliminationCost: 0 -; CHECK: ContainsNoDuplicateCall: 0 -; CHECK: Cost: {{.*}} -; CHECK: Threshold: {{.*}} +; CHECK-NEXT: define i32 @callee1(i32 %x) { +; CHECK-NEXT: cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = {{.*}} +; CHECK-NEXT: %x1 = add i32 %x, 1 +; CHECK-NEXT: cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = {{.*}} +; CHECK-NEXT: %x2 = add i32 %x1, 1 +; CHECK-NEXT: cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = {{.*}} +; CHECK-NEXT: %x3 = add i32 %x2, 1 +; CHECK-NEXT: cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = {{.*}} +; CHECK-NEXT: ret i32 %x3 +; CHECK-NEXT: } define i32 @foo(i32 %y) { %x = call i32 @callee1(i32 %y) Index: llvm/test/Transforms/Inline/inline-cost-annotation-pass.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/inline-cost-annotation-pass.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -passes="print" 2>&1 | FileCheck %s + +; CHECK: Analyzing call of foo... (caller:main) +; CHECK: define i8 addrspace(1)** @foo() { +; CHECK: cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = {{.*}} +; CHECK: %1 = inttoptr i64 754974720 to i8 addrspace(1)** +; CHECK: cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = {{.*}} +; CHECK: ret i8 addrspace(1)** %1 +; CHECK: } +; CHECK: NumConstantArgs: {{.*}} +; CHECK: NumConstantOffsetPtrArgs: {{.*}} +; CHECK: NumAllocaArgs: {{.*}} +; CHECK: NumConstantPtrCmps: {{.*}} +; CHECK: NumConstantPtrDiffs: {{.*}} +; CHECK: NumInstructionsSimplified: {{.*}} +; CHECK: NumInstructions: {{.*}} +; CHECK: SROACostSavings: {{.*}} +; CHECK: SROACostSavingsLost: {{.*}} +; CHECK: LoadEliminationCost: {{.*}} +; CHECK: ContainsNoDuplicateCall: {{.*}} +; CHECK: Cost: {{.*}} +; CHECK: Threshold: {{.*}} + +define i8 addrspace(1)** @foo() { + %1 = inttoptr i64 754974720 to i8 addrspace(1)** + ret i8 addrspace(1)** %1 +} + +define i8 addrspace(1)** @main() { + %1 = call i8 addrspace(1)** @foo() + ret i8 addrspace(1)** %1 +} Index: llvm/test/Transforms/Inline/print-instructions-deltas-unfinished.ll =================================================================== --- llvm/test/Transforms/Inline/print-instructions-deltas-unfinished.ll +++ /dev/null @@ -1,22 +0,0 @@ -; Require asserts for -debug-only -; REQUIRES: asserts - -; This test ensures that the hadling of instructions which were not analyzed by -; '-print-instruction-deltas' flag due to the early exit was done correctly. - -; RUN: opt < %s -inline -debug-only=inline-cost -disable-output -print-instruction-deltas -inline-threshold=0 2>&1 | FileCheck %s - -; CHECK: No analysis for the instruction -; CHECK: ret void - -declare void @callee1() - -define void @bar() { - call void @callee1() - ret void -} - -define void @foo() { - call void @bar() - ret void -}