diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -141,6 +141,22 @@ operator const char *() const { return message; } }; +/// Keeps a bunch of stats about the interesting simplification constructs found +/// during inline-cost analysis so we can use them outside of InlineCost call +/// analysis. +struct InlineCostStats { + unsigned NumConstantArgs = 0; + unsigned NumConstantOffsetPtrArgs = 0; + unsigned NumAllocaArgs = 0; + unsigned NumConstantPtrCmps = 0; + unsigned NumConstantPtrDiffs = 0; + unsigned NumInstructionsSimplified = 0; + unsigned NumInstructions = 0; + unsigned NumVectorInstructions = 0; + + LLVM_DUMP_METHOD void dump(); +}; + /// Thresholds to tune inline cost analysis. The inline cost analysis decides /// the condition to apply a threshold and applies it. Otherwise, /// DefaultThreshold is used. If a threshold is Optional, it is applied only @@ -178,6 +194,9 @@ /// Compute inline cost even when the cost has exceeded the threshold. Optional ComputeFullInlineCost; + + /// Callback to pass inline cost stats to the caller of getInlineCost. + Optional> PassInlineCostStats; }; /// Generate the parameters to tune the inline cost analysis based only on the diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -150,8 +150,6 @@ /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize = 0; - unsigned NumInstructions = 0; - unsigned NumVectorInstructions = 0; /// Bonus to be applied when percentage of vector instructions in callee is /// high (see more details in updateThreshold). @@ -177,6 +175,9 @@ /// cost must be added. DenseMap SROAArgCosts; + unsigned SROACostSavings = 0; + unsigned SROACostSavingsLost = 0; + /// Keep track of values which map to a pointer base and constant offset. DenseMap> ConstantOffsetPtrs; @@ -194,6 +195,8 @@ SmallPtrSet LoadAddrSet; int LoadEliminationCost = 0; + InlineCostStats InliningStats; + // Custom simplification helper routines. bool isAllocaDerivedArg(Value *V); bool lookupSROAArgAndCost(Value *V, Value *&Arg, @@ -307,16 +310,7 @@ int getThreshold() { return Threshold; } int getCost() { return Cost; } - // Keep a bunch of stats about the cost savings found so we can print them - // out when debugging. - unsigned NumConstantArgs = 0; - unsigned NumConstantOffsetPtrArgs = 0; - unsigned NumAllocaArgs = 0; - unsigned NumConstantPtrCmps = 0; - unsigned NumConstantPtrDiffs = 0; - unsigned NumInstructionsSimplified = 0; - unsigned SROACostSavings = 0; - unsigned SROACostSavingsLost = 0; + InlineCostStats getInlineCostStats() { return InliningStats; } void dump(); }; @@ -1012,7 +1006,7 @@ Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { SimplifiedValues[&I] = C; - ++NumConstantPtrCmps; + ++InliningStats.NumConstantPtrCmps; return true; } } @@ -1058,7 +1052,7 @@ Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { SimplifiedValues[&I] = C; - ++NumConstantPtrDiffs; + ++InliningStats.NumConstantPtrDiffs; return true; } } @@ -1597,9 +1591,9 @@ if (EphValues.count(&*I)) continue; - ++NumInstructions; + ++InliningStats.NumInstructions; if (isa(I) || I->getType()->isVectorTy()) - ++NumVectorInstructions; + ++InliningStats.NumVectorInstructions; // If the instruction simplified to a constant, there is no cost to this // instruction. Visit the instructions using our InstVisitor to account for @@ -1607,7 +1601,7 @@ // consumed the instruction in any way, and false if the instruction's base // cost should count against inlining. if (Base::visit(&*I)) - ++NumInstructionsSimplified; + ++InliningStats.NumInstructionsSimplified; else addCost(InlineConstants::InstrCost); @@ -1760,8 +1754,8 @@ // // FIXME: It would be nice to remove all such bonuses. At least it would be // nice to base the bonus values on something more scientific. - assert(NumInstructions == 0); - assert(NumVectorInstructions == 0); + assert(InliningStats.NumInstructions == 0); + assert(InliningStats.NumVectorInstructions == 0); // Update the threshold based on callsite properties updateThreshold(Call, F); @@ -1824,9 +1818,9 @@ } } } - NumConstantArgs = SimplifiedValues.size(); - NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); - NumAllocaArgs = SROAArgValues.size(); + InliningStats.NumConstantArgs = SimplifiedValues.size(); + InliningStats.NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); + InliningStats.NumAllocaArgs = SROAArgValues.size(); // FIXME: If a caller has multiple calls to a callee, we end up recomputing // the ephemeral values multiple times (and they're completely determined by @@ -1951,17 +1945,30 @@ // We applied the maximum possible vector bonus at the beginning. Now, // subtract the excess bonus, if any, from the Threshold before // comparing against Cost. - if (NumVectorInstructions <= NumInstructions / 10) + if (InliningStats.NumVectorInstructions <= InliningStats.NumInstructions / 10) Threshold -= VectorBonus; - else if (NumVectorInstructions <= NumInstructions / 2) + else if (InliningStats.NumVectorInstructions <= + InliningStats.NumInstructions / 2) Threshold -= VectorBonus/2; return Cost < std::max(1, Threshold); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -/// Dump stats about this call's analysis. LLVM_DUMP_METHOD void CallAnalyzer::dump() { +#define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n" + InliningStats.dump(); + DEBUG_PRINT_STAT(ContainsNoDuplicateCall); + DEBUG_PRINT_STAT(SROACostSavings); + DEBUG_PRINT_STAT(SROACostSavingsLost); + DEBUG_PRINT_STAT(LoadEliminationCost); + DEBUG_PRINT_STAT(Cost); + DEBUG_PRINT_STAT(Threshold); +#undef DEBUG_PRINT_STAT +} + +/// Dump stats about this call's analysis. +LLVM_DUMP_METHOD void InlineCostStats::dump() { #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n" DEBUG_PRINT_STAT(NumConstantArgs); DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); @@ -1970,12 +1977,6 @@ DEBUG_PRINT_STAT(NumConstantPtrDiffs); DEBUG_PRINT_STAT(NumInstructionsSimplified); DEBUG_PRINT_STAT(NumInstructions); - DEBUG_PRINT_STAT(SROACostSavings); - DEBUG_PRINT_STAT(SROACostSavingsLost); - DEBUG_PRINT_STAT(LoadEliminationCost); - DEBUG_PRINT_STAT(ContainsNoDuplicateCall); - DEBUG_PRINT_STAT(Cost); - DEBUG_PRINT_STAT(Threshold); #undef DEBUG_PRINT_STAT } #endif @@ -2099,6 +2100,9 @@ Call, Params); InlineResult ShouldInline = CA.analyzeCall(Call); + if (Params.PassInlineCostStats) + (*Params.PassInlineCostStats)(CA.getInlineCostStats()); + LLVM_DEBUG(CA.dump()); // Check if there was a reason to force inlining or no inlining.