Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -126,26 +126,38 @@ /// Tunable parameters that control the analysis. const InlineParams &Params; + /// Upper bound for the inlining cost. Bonuses are being applied to account + /// for speculative "expected profit" of the inlining decision. int Threshold; - int Cost; + + /// Inlining cost measured in abstract units, accounts for all the + /// instructions expected to be executed for a given function invocation. + /// Instructions that are statically proven to be dead based on call-site + /// arguments are not counted here. + int Cost = 0; + bool ComputeFullInlineCost; - bool IsCallerRecursive; - bool IsRecursiveCall; - bool ExposesReturnsTwice; - bool HasDynamicAlloca; - bool ContainsNoDuplicateCall; - bool HasReturn; - bool HasIndirectBr; - bool HasUninlineableIntrinsic; - bool InitsVargArgs; + bool IsCallerRecursive = false; + bool IsRecursiveCall = false; + bool ExposesReturnsTwice = false; + bool HasDynamicAlloca = false; + bool ContainsNoDuplicateCall = false; + bool HasReturn = false; + bool HasIndirectBr = false; + bool HasUninlineableIntrinsic = false; + bool InitsVargArgs = false; /// Number of bytes allocated statically by the callee. - uint64_t AllocatedSize; - unsigned NumInstructions, NumVectorInstructions; - int VectorBonus, TenPercentVectorBonus; - // Bonus to be applied when the callee has only one reachable basic block. - int SingleBBBonus; + uint64_t AllocatedSize = 0; + unsigned NumInstructions = 0; + unsigned NumVectorInstructions = 0; + + /// Bonus to be applied when percentage of vector instructions in callee is + /// high (see more details in updateThreshold). + int VectorBonus = 0; + /// Bonus to be applied when the callee has only one reachable basic block. + int SingleBBBonus = 0; /// While we walk the potentially-inlined instructions, we build up and /// maintain a mapping of simplified values specific to this callsite. The @@ -180,7 +192,7 @@ /// loads. bool EnableLoadElimination; SmallPtrSet LoadAddrSet; - int LoadEliminationCost; + int LoadEliminationCost = 0; // Custom simplification helper routines. bool isAllocaDerivedArg(Value *V); @@ -231,6 +243,11 @@ InlineResult analyzeBlock(BasicBlock *BB, SmallPtrSetImpl &EphValues); + /// Handle a capped 'int' increment for Cost. + void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) { + Cost = std::min(UpperBound, Cost + Inc); + } + // Disable several entry points to the visitor so we don't accidentally use // them by declaring but not defining them here. void visit(Module *); @@ -279,18 +296,9 @@ : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold), - Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost || - Params.ComputeFullInlineCost || ORE), - IsCallerRecursive(false), IsRecursiveCall(false), - ExposesReturnsTwice(false), HasDynamicAlloca(false), - ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), - HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0), - NumInstructions(0), NumVectorInstructions(0), VectorBonus(0), - SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), - SROACostSavingsLost(0) {} + ComputeFullInlineCost(OptComputeFullInlineCost || + Params.ComputeFullInlineCost || ORE), + EnableLoadElimination(true) {} InlineResult analyzeCall(CallSite CS); @@ -299,14 +307,14 @@ // Keep a bunch of stats about the cost savings found so we can print them // out when debugging. - unsigned NumConstantArgs; - unsigned NumConstantOffsetPtrArgs; - unsigned NumAllocaArgs; - unsigned NumConstantPtrCmps; - unsigned NumConstantPtrDiffs; - unsigned NumInstructionsSimplified; - unsigned SROACostSavings; - unsigned SROACostSavingsLost; + unsigned NumConstantArgs = 0; + unsigned NumConstantOffsetPtrArgs = 0; + unsigned NumAllocaArgs = 0; + unsigned NumConstantPtrCmps = 0; + unsigned NumConstantPtrDiffs = 0; + unsigned NumInstructionsSimplified = 0; + unsigned SROACostSavings = 0; + unsigned SROACostSavingsLost = 0; void dump(); }; @@ -341,7 +349,7 @@ void CallAnalyzer::disableSROA(DenseMap::iterator CostIt) { // If we're no longer able to perform SROA we need to undo its cost savings // and prevent subsequent analysis. - Cost += CostIt->second; + addCost(CostIt->second); SROACostSavings -= CostIt->second; SROACostSavingsLost += CostIt->second; SROAArgCosts.erase(CostIt); @@ -365,7 +373,7 @@ void CallAnalyzer::disableLoadElimination() { if (EnableLoadElimination) { - Cost += LoadEliminationCost; + addCost(LoadEliminationCost); LoadEliminationCost = 0; EnableLoadElimination = false; } @@ -720,7 +728,7 @@ case Instruction::FPToUI: case Instruction::FPToSI: if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) - Cost += InlineConstants::CallPenalty; + addCost(InlineConstants::CallPenalty); break; default: break; @@ -1089,7 +1097,7 @@ // as such. if (I.getType()->isFloatingPointTy() && TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) - Cost += InlineConstants::CallPenalty; + addCost(InlineConstants::CallPenalty); return false; } @@ -1228,7 +1236,7 @@ case Intrinsic::load_relative: // This is normally lowered to 4 LLVM instructions. - Cost += 3 * InlineConstants::InstrCost; + addCost(3 * InlineConstants::InstrCost); return false; case Intrinsic::memset: @@ -1257,12 +1265,12 @@ if (TTI.isLoweredToCall(F)) { // We account for the average 1 instruction per call argument setup // here. - Cost += CS.arg_size() * InlineConstants::InstrCost; + addCost(CS.arg_size() * InlineConstants::InstrCost); // Everything other than inline ASM will also have a significant cost // merely from making the call. if (!isa(CS.getCalledValue())) - Cost += InlineConstants::CallPenalty; + addCost(InlineConstants::CallPenalty); } if (!CS.onlyReadsMemory()) @@ -1276,7 +1284,7 @@ // First, pay the price of the argument setup. We account for the average // 1 instruction per call argument setup here. - Cost += CS.arg_size() * InlineConstants::InstrCost; + addCost(CS.arg_size() * InlineConstants::InstrCost); // Next, check if this happens to be an indirect function call to a known // function in this inline context. If not, we've done all we can. @@ -1438,7 +1446,7 @@ (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); if (CostLowerBound > Threshold && !ComputeFullInlineCost) { - Cost = CostLowerBound; + addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost); return false; } @@ -1452,7 +1460,7 @@ int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + 4 * InlineConstants::InstrCost; - Cost = std::min((int64_t)CostUpperBound, JTCost + Cost); + addCost(JTCost, (int64_t)CostUpperBound); return false; } @@ -1473,7 +1481,7 @@ // n + n / 2 - 1 = n * 3 / 2 - 1 if (NumCaseCluster <= 3) { // Suppose a comparison includes one compare and one conditional branch. - Cost += NumCaseCluster * 2 * InlineConstants::InstrCost; + addCost(NumCaseCluster * 2 * InlineConstants::InstrCost); return false; } @@ -1481,7 +1489,7 @@ int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; - Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost); + addCost(SwitchCost, (int64_t)CostUpperBound); return false; } @@ -1574,7 +1582,7 @@ if (Base::visit(&*I)) ++NumInstructionsSimplified; else - Cost += InlineConstants::InstrCost; + addCost(InlineConstants::InstrCost); using namespace ore; // If the visit this instruction detected an uninlinable pattern, abort. @@ -1619,7 +1627,7 @@ return IR; } - // Check if we've past the maximum possible threshold so we don't spin in + // Check if we've passed the maximum possible threshold so we don't spin in // huge basic blocks that will never inline. if (Cost >= Threshold && !ComputeFullInlineCost) return false; @@ -1745,7 +1753,7 @@ // Give out bonuses for the callsite, as the instructions setting them up // will be gone after inlining. - Cost -= getCallsiteCost(CS, DL); + addCost(-getCallsiteCost(CS, DL)); // If this function uses the coldcc calling convention, prefer not to inline // it. @@ -1909,7 +1917,7 @@ continue; NumLoops++; } - Cost += NumLoops * InlineConstants::CallPenalty; + addCost(NumLoops * InlineConstants::CallPenalty); } // We applied the maximum possible vector bonus at the beginning. Now,