diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -226,10 +226,13 @@ // multi-region outlining. FunctionCloner(Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE, - function_ref LookupAC); + function_ref LookupAC, + function_ref GetTTI); FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI, OptimizationRemarkEmitter &ORE, - function_ref LookupAC); + function_ref LookupAC, + function_ref GetTTI); + ~FunctionCloner(); // Prepare for function outlining: making sure there is only @@ -266,6 +269,7 @@ std::unique_ptr ClonedFuncBFI = nullptr; OptimizationRemarkEmitter &ORE; function_ref LookupAC; + function_ref GetTTI; }; private: @@ -334,7 +338,8 @@ // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to // approximate both the size and runtime cost (Note that in the current // inline cost analysis, there is no clear distinction there either). - static int computeBBInlineCost(BasicBlock *BB); + static int computeBBInlineCost(BasicBlock *BB, + TargetTransformInfo *TTI); std::unique_ptr computeOutliningInfo(Function *F); std::unique_ptr @@ -448,9 +453,10 @@ // Use the same computeBBInlineCost function to compute the cost savings of // the outlining the candidate region. + TargetTransformInfo *FTTI = &GetTTI(*F); int OverallFunctionCost = 0; for (auto &BB : *F) - OverallFunctionCost += computeBBInlineCost(&BB); + OverallFunctionCost += computeBBInlineCost(&BB, FTTI); #ifndef NDEBUG if (TracePartialInlining) @@ -509,7 +515,8 @@ continue; int OutlineRegionCost = 0; for (auto *BB : DominateVector) - OutlineRegionCost += computeBBInlineCost(BB); + OutlineRegionCost += computeBBInlineCost(BB, + &GetTTI(*BB->getParent())); #ifndef NDEBUG if (TracePartialInlining) @@ -843,7 +850,8 @@ // TODO: Ideally we should share Inliner's InlineCost Analysis code. // For now use a simplified version. The returned 'InlineCost' will be used // to esimate the size cost as well as runtime cost of the BB. -int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { +int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB, + TargetTransformInfo *TTI) { int InlineCost = 0; const DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); for (Instruction &I : BB->instructionsWithoutDebug()) { @@ -866,6 +874,23 @@ if (I.isLifetimeStartOrEnd()) continue; + if (isa(&I)) { + auto *II = cast(&I); + Intrinsic::ID IID = II->getIntrinsicID(); + SmallVector Tys; + FastMathFlags FMF; + unsigned Op = 0, Opc = II->getNumArgOperands(); + for (;Op != Opc; ++Op) + Tys.push_back(II->getArgOperand(Op)->getType()); + + if (auto *FPMO = dyn_cast(II)) + FMF = FPMO->getFastMathFlags(); + + IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF); + InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency); + continue; + } + if (CallInst *CI = dyn_cast(&I)) { InlineCost += getCallsiteCost(*CI, DL); continue; @@ -893,11 +918,13 @@ BasicBlock* OutliningCallBB = FuncBBPair.second; // Now compute the cost of the call sequence to the outlined function // 'OutlinedFunction' in BB 'OutliningCallBB': - OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB); + auto OutlinedFuncTTI = &GetTTI(*OutlinedFunc); + OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB, + OutlinedFuncTTI); // Now compute the cost of the extracted/outlined function itself: for (BasicBlock &BB : *OutlinedFunc) - OutlinedFunctionCost += computeBBInlineCost(&BB); + OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI); } assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost && "Outlined function cost should be no less than the outlined region"); @@ -962,8 +989,9 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner( Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE, - function_ref LookupAC) - : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) { + function_ref LookupAC, + function_ref GetTTI) + : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) { ClonedOI = std::make_unique(); // Clone the function, so that we can hack away on it. @@ -987,8 +1015,9 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner( Function *F, FunctionOutliningMultiRegionInfo *OI, OptimizationRemarkEmitter &ORE, - function_ref LookupAC) - : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) { + function_ref LookupAC, + function_ref GetTTI) + : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) { ClonedOMRI = std::make_unique(); // Clone the function, so that we can hack away on it. @@ -1099,10 +1128,10 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { - auto ComputeRegionCost = [](SmallVectorImpl &Region) { + auto ComputeRegionCost = [&](SmallVectorImpl &Region) { int Cost = 0; for (BasicBlock* BB : Region) - Cost += computeBBInlineCost(BB); + Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent())); return Cost; }; @@ -1196,9 +1225,11 @@ // Gather up the blocks that we're going to extract. std::vector ToExtract; + auto ClonedFuncTTI = &GetTTI(*ClonedFunc); ToExtract.push_back(ClonedOI->NonReturnBlock); OutlinedRegionCost += - PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock); + PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock, + ClonedFuncTTI); for (BasicBlock &BB : *ClonedFunc) if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) { ToExtract.push_back(&BB); @@ -1206,7 +1237,7 @@ // into the outlined function which may make the outlining // overhead (the difference of the outlined function cost // and OutliningRegionCost) look larger. - OutlinedRegionCost += computeBBInlineCost(&BB); + OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI); } // Extract the body of the if. @@ -1276,7 +1307,7 @@ std::unique_ptr OMRI = computeOutliningColdRegionsInfo(F, ORE); if (OMRI) { - FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache); + FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI); #ifndef NDEBUG if (TracePartialInlining) { @@ -1309,7 +1340,7 @@ if (!OI) return {false, nullptr}; - FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache); + FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache, GetTTI); Cloner.NormalizeReturnBlock(); Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();