Index: include/llvm/Analysis/BlockFrequencyInfo.h =================================================================== --- include/llvm/Analysis/BlockFrequencyInfo.h +++ include/llvm/Analysis/BlockFrequencyInfo.h @@ -20,6 +20,10 @@ namespace llvm { +// FIXME: Replace this brittle forward declaration with the include of the new +// PassManager.h when doing so doesn't break the PassManagerBuilder. +template +class AnalysisManager; class BranchProbabilityInfo; class LoopInfo; template class BlockFrequencyInfoImpl; @@ -85,6 +89,21 @@ void print(raw_ostream &OS, const Module *M) const override; }; +/// \brief Analysis pass that exposes the \c BlockFrequencyInfo for a function. +class BlockFrequencyAnalysis { + static char PassID; + + public: + typedef BlockFrequencyInfo Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + /// \brief Provide a name for the analysis for debugging and logging. + static StringRef name() { return "BlockFrequencyAnalysis"; } + + BlockFrequencyInfo run(Function &F, AnalysisManager *AM); +}; } #endif Index: include/llvm/Analysis/BranchProbabilityInfo.h =================================================================== --- include/llvm/Analysis/BranchProbabilityInfo.h +++ include/llvm/Analysis/BranchProbabilityInfo.h @@ -22,6 +22,10 @@ #include "llvm/Support/BranchProbability.h" namespace llvm { +// FIXME: Replace this brittle forward declaration with the include of the new +// PassManager.h when doing so doesn't break the PassManagerBuilder. +template +class AnalysisManager; class LoopInfo; class raw_ostream; @@ -172,6 +176,22 @@ void print(raw_ostream &OS, const Module *M = nullptr) const override; }; +/// \brief Analysis pass that exposes the \c BranchProbabilityInfo for a +/// function. +class BranchProbabilityAnalysis { + static char PassID; + + public: + typedef BranchProbabilityInfo Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + /// \brief Provide a name for the analysis for debugging and logging. + static StringRef name() { return "BranchProbabilityAnalysis"; } + + BranchProbabilityInfo run(Function &F, AnalysisManager *AM); +}; } #endif Index: include/llvm/Analysis/DynamicInstructionCount.h =================================================================== --- /dev/null +++ include/llvm/Analysis/DynamicInstructionCount.h @@ -0,0 +1,61 @@ +//===- DynamicInstructionCount.h - Dynamic Instruction Count Analysis +//----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass that computes the estimated dynamic instruction +// count of a function. The dynamic instruction count computed is per invocation +// of the function. This pass is intended to be used by the inliner and uses +// InlineCost to calculate the static instruction count. This is weighted by +// BlockFrequencyInfo to arrive at the dynamic instruction count. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_ANALYSIS_DYNAMIC_INSTRUCTION_COUNT_H +#define LLVM_ANALYSIS_DYNAMIC_INSTRUCTION_COUNT_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { +class DynamicInstructionCount { + public: + uint64_t getCount() const { return Count; } + void setCount(uint64_t NewCount) { Count = NewCount; } + + private: + uint64_t Count; +}; + +/// \brief Analysis pass that calculates dynamic instr count for a function. +class DynamicInstructionCountAnalysis { + static char PassID; + + public: + typedef DynamicInstructionCount Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + /// \brief Provide a name for the analysis for debugging and logging. + static StringRef name() { return "DynamicInstructionCountAnalysis"; } + + Result run(Function &F, AnalysisManager *AM); +}; + +/// \brief Printer pass for the \c DynamicInstructionCountAnalysis results. +class DynamicInstructionCountPrinterPass { + raw_ostream &OS; + + public: + explicit DynamicInstructionCountPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, AnalysisManager *AM); + + static StringRef name() { return "DynamicInstructionCountPrinterPass"; } +}; +} +#endif Index: include/llvm/Analysis/InlineCost.h =================================================================== --- include/llvm/Analysis/InlineCost.h +++ include/llvm/Analysis/InlineCost.h @@ -15,6 +15,10 @@ #define LLVM_ANALYSIS_INLINECOST_H #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/InstVisitor.h" #include #include @@ -98,6 +102,157 @@ int getCostDelta() const { return Threshold - getCost(); } }; +class CallAnalyzer : public InstVisitor { + typedef InstVisitor Base; + friend class InstVisitor; + + /// The TargetTransformInfo available for this compilation. + const TargetTransformInfo &TTI; + + /// The cache of @llvm.assume intrinsics. + AssumptionCacheTracker *ACT; + + // The called function. + Function &F; + + // The candidate callsite being analyzed. Please do not use this to do + // analysis in the caller function; we want the inline cost query to be + // easily cacheable. Instead, use the cover function paramHasAttr. + CallSite *CandidateCS; + + int Threshold; + int Cost; + + bool IsCallerRecursive; + bool IsRecursiveCall; + bool ExposesReturnsTwice; + bool HasDynamicAlloca; + bool ContainsNoDuplicateCall; + bool HasReturn; + bool HasIndirectBr; + bool HasFrameEscape; + + /// Number of bytes allocated statically by the callee. + uint64_t AllocatedSize; + unsigned NumInstructions, NumVectorInstructions; + int FiftyPercentVectorBonus, TenPercentVectorBonus; + int VectorBonus; + + // While we walk the potentially-inlined instructions, we build up and + // maintain a mapping of simplified values specific to this callsite. The + // idea is to propagate any special information we have about arguments to + // this call through the inlinable section of the function, and account for + // likely simplifications post-inlining. The most important aspect we track + // is CFG altering simplifications -- when we prove a basic block dead, that + // can cause dramatic shifts in the cost of inlining a function. + DenseMap SimplifiedValues; + + // Keep track of the values which map back (through function arguments) to + // allocas on the caller stack which could be simplified through SROA. + DenseMap SROAArgValues; + + // The mapping of caller Alloca values to their accumulated cost savings. If + // we have to disable SROA for one of the allocas, this tells us how much + // cost must be added. + DenseMap SROAArgCosts; + + // Keep track of values which map to a pointer base and constant offset. + DenseMap > ConstantOffsetPtrs; + + // The call cost analyzer can be used to compute the cost of out-of-line + // functions as well. This flag tells whether it is used for inlining or + // not. + const bool Inlining; + + // Custom simplification helper routines. + bool isAllocaDerivedArg(Value *V); + bool lookupSROAArgAndCost(Value *V, Value *&Arg, + DenseMap::iterator &CostIt); + void disableSROA(DenseMap::iterator CostIt); + void disableSROA(Value *V); + void accumulateSROACost(DenseMap::iterator CostIt, + int InstructionCost); + bool isGEPOffsetConstant(GetElementPtrInst &GEP); + bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); + bool simplifyCallSite(Function *F, CallSite CS); + ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); + + /// Return true if the given argument to the function being considered for + /// inlining has the given attribute set either at the call site or the + /// function declaration. Primarily used to inspect call site specific + /// attributes since these can be more precise than the ones on the callee + /// itself. + bool paramHasAttr(Argument *A, Attribute::AttrKind Attr); + + /// Return true if the given value is known non null within the callee if + /// inlined through this particular callsite. + bool isKnownNonNullInCallee(Value *V); + + // Custom analysis routines. + + // Disable several entry points to the visitor so we don't accidentally use + // them by declaring but not defining them here. + void visit(Module *); + void visit(Module &); + void visit(Function *); + void visit(Function &); + void visit(BasicBlock *); + void visit(BasicBlock &); + + // Provide base case for our instruction visit. + bool visitInstruction(Instruction &I); + + // Our visit overrides. + bool visitAlloca(AllocaInst &I); + bool visitPHI(PHINode &I); + bool visitGetElementPtr(GetElementPtrInst &I); + bool visitBitCast(BitCastInst &I); + bool visitPtrToInt(PtrToIntInst &I); + bool visitIntToPtr(IntToPtrInst &I); + bool visitCastInst(CastInst &I); + bool visitUnaryInstruction(UnaryInstruction &I); + bool visitCmpInst(CmpInst &I); + bool visitSub(BinaryOperator &I); + bool visitBinaryOperator(BinaryOperator &I); + bool visitLoad(LoadInst &I); + bool visitStore(StoreInst &I); + bool visitExtractValue(ExtractValueInst &I); + bool visitInsertValue(InsertValueInst &I); + bool visitCallSite(CallSite CS); + bool visitReturnInst(ReturnInst &RI); + bool visitBranchInst(BranchInst &BI); + bool visitSwitchInst(SwitchInst &SI); + bool visitIndirectBrInst(IndirectBrInst &IBI); + bool visitResumeInst(ResumeInst &RI); + bool visitCleanupReturnInst(CleanupReturnInst &RI); + bool visitCatchReturnInst(CatchReturnInst &RI); + bool visitUnreachableInst(UnreachableInst &I); + + public: + CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT, + Function &Callee, int Threshold, CallSite *CSArg, + bool Inlining = true); + + bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl &EphValues); + bool analyzeCall(CallSite CS); + + int getThreshold() { return Threshold; } + int getCost() { return Cost; } + + // Keep a bunch of stats about the cost savings found so we can print them + // out when debugging. + unsigned NumConstantArgs; + unsigned NumConstantOffsetPtrArgs; + unsigned NumAllocaArgs; + unsigned NumConstantPtrCmps; + unsigned NumConstantPtrDiffs; + unsigned NumInstructionsSimplified; + unsigned SROACostSavings; + unsigned SROACostSavingsLost; + + void dump(); +}; + /// \brief Cost analyzer used by inliner. class InlineCostAnalysis : public CallGraphSCCPass { TargetTransformInfoWrapperPass *TTIWP; Index: lib/Analysis/BlockFrequencyInfo.cpp =================================================================== --- lib/Analysis/BlockFrequencyInfo.cpp +++ lib/Analysis/BlockFrequencyInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -211,3 +212,13 @@ BFI.calculate(F, BPI, LI); return false; } + +char BlockFrequencyAnalysis::PassID; + +BlockFrequencyInfo BlockFrequencyAnalysis::run(Function &F, + AnalysisManager *AM) { + BlockFrequencyInfo BFI; + BFI.calculate(F, AM->getResult(F), + AM->getResult(F)); + return BFI; +} Index: lib/Analysis/BranchProbabilityInfo.cpp =================================================================== --- lib/Analysis/BranchProbabilityInfo.cpp +++ lib/Analysis/BranchProbabilityInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -700,3 +701,12 @@ const Module *) const { BPI.print(OS); } + +char BranchProbabilityAnalysis::PassID; + +BranchProbabilityInfo BranchProbabilityAnalysis::run( + Function &F, AnalysisManager *AM) { + BranchProbabilityInfo BPI; + BPI.calculate(F, AM->getResult(F)); + return BPI; +} Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -25,6 +25,7 @@ DivergenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp + DynamicInstructionCount.cpp GlobalsModRef.cpp IVUsers.cpp InlineCost.cpp Index: lib/Analysis/DynamicInstructionCount.cpp =================================================================== --- /dev/null +++ lib/Analysis/DynamicInstructionCount.cpp @@ -0,0 +1,58 @@ +//===- DynamicInstructionCount.cpp - Dynamic Instruction Count analysis ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements dynamic instruction count analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DynamicInstructionCount.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +char DynamicInstructionCountAnalysis::PassID; + +#define DEBUG_TYPE "dyninscount" + +DynamicInstructionCount DynamicInstructionCountAnalysis::run( + Function &F, AnalysisManager *AM) { + DynamicInstructionCount DIC; + CallAnalyzer CA(AM->getResult(F), nullptr, F, INT_MAX, + nullptr, false); + SmallPtrSet EphValues; + int Cost = 0, BBCost = 0; + uint64_t DynInsCount = 0; + BlockFrequencyInfo &BFI = AM->getResult(F); + for (BasicBlock &BB : F) { + CA.analyzeBlock(&BB, EphValues); + BBCost = CA.getCost() - Cost; + Cost = CA.getCost(); + BBCost /= InlineConstants::InstrCost; + BlockFrequency BF = BFI.getBlockFreq(&BB); + DEBUG(llvm::dbgs() << " BB = " << BB.getName() << " Cost = " << BBCost + << " Frequency = " << BF.getFrequency() << "...\n"); + DynInsCount += (BF.getFrequency() * BBCost); + } + uint64_t EntryFreq = BFI.getEntryFreq(); + DynInsCount = (DynInsCount + EntryFreq / 2) / EntryFreq; + DIC.setCount(DynInsCount); + return DIC; +} + +PreservedAnalyses DynamicInstructionCountPrinterPass::run( + Function &F, AnalysisManager *AM) { + DynamicInstructionCount DIC = + AM->getResult(F); + OS << "Dynamic instruction count of " << F.getName() << ": " << DIC.getCount() + << "\n"; + return PreservedAnalyses::all(); +} Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -39,161 +39,38 @@ STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); -namespace { - -class CallAnalyzer : public InstVisitor { - typedef InstVisitor Base; - friend class InstVisitor; - - /// The TargetTransformInfo available for this compilation. - const TargetTransformInfo &TTI; - - /// The cache of @llvm.assume intrinsics. - AssumptionCacheTracker *ACT; - - // The called function. - Function &F; - - // The candidate callsite being analyzed. Please do not use this to do - // analysis in the caller function; we want the inline cost query to be - // easily cacheable. Instead, use the cover function paramHasAttr. - CallSite CandidateCS; - - int Threshold; - int Cost; - - bool IsCallerRecursive; - bool IsRecursiveCall; - bool ExposesReturnsTwice; - bool HasDynamicAlloca; - bool ContainsNoDuplicateCall; - bool HasReturn; - bool HasIndirectBr; - bool HasFrameEscape; - - /// Number of bytes allocated statically by the callee. - uint64_t AllocatedSize; - unsigned NumInstructions, NumVectorInstructions; - int FiftyPercentVectorBonus, TenPercentVectorBonus; - int VectorBonus; - - // While we walk the potentially-inlined instructions, we build up and - // maintain a mapping of simplified values specific to this callsite. The - // idea is to propagate any special information we have about arguments to - // this call through the inlinable section of the function, and account for - // likely simplifications post-inlining. The most important aspect we track - // is CFG altering simplifications -- when we prove a basic block dead, that - // can cause dramatic shifts in the cost of inlining a function. - DenseMap SimplifiedValues; - - // Keep track of the values which map back (through function arguments) to - // allocas on the caller stack which could be simplified through SROA. - DenseMap SROAArgValues; - - // The mapping of caller Alloca values to their accumulated cost savings. If - // we have to disable SROA for one of the allocas, this tells us how much - // cost must be added. - DenseMap SROAArgCosts; - - // Keep track of values which map to a pointer base and constant offset. - DenseMap > ConstantOffsetPtrs; - - // Custom simplification helper routines. - bool isAllocaDerivedArg(Value *V); - bool lookupSROAArgAndCost(Value *V, Value *&Arg, - DenseMap::iterator &CostIt); - void disableSROA(DenseMap::iterator CostIt); - void disableSROA(Value *V); - void accumulateSROACost(DenseMap::iterator CostIt, - int InstructionCost); - bool isGEPOffsetConstant(GetElementPtrInst &GEP); - bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); - bool simplifyCallSite(Function *F, CallSite CS); - ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); - - /// Return true if the given argument to the function being considered for - /// inlining has the given attribute set either at the call site or the - /// function declaration. Primarily used to inspect call site specific - /// attributes since these can be more precise than the ones on the callee - /// itself. - bool paramHasAttr(Argument *A, Attribute::AttrKind Attr); - - /// Return true if the given value is known non null within the callee if - /// inlined through this particular callsite. - bool isKnownNonNullInCallee(Value *V); - - // Custom analysis routines. - bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl &EphValues); - - // Disable several entry points to the visitor so we don't accidentally use - // them by declaring but not defining them here. - void visit(Module *); void visit(Module &); - void visit(Function *); void visit(Function &); - void visit(BasicBlock *); void visit(BasicBlock &); - - // Provide base case for our instruction visit. - bool visitInstruction(Instruction &I); - - // Our visit overrides. - bool visitAlloca(AllocaInst &I); - bool visitPHI(PHINode &I); - bool visitGetElementPtr(GetElementPtrInst &I); - bool visitBitCast(BitCastInst &I); - bool visitPtrToInt(PtrToIntInst &I); - bool visitIntToPtr(IntToPtrInst &I); - bool visitCastInst(CastInst &I); - bool visitUnaryInstruction(UnaryInstruction &I); - bool visitCmpInst(CmpInst &I); - bool visitSub(BinaryOperator &I); - bool visitBinaryOperator(BinaryOperator &I); - bool visitLoad(LoadInst &I); - bool visitStore(StoreInst &I); - bool visitExtractValue(ExtractValueInst &I); - bool visitInsertValue(InsertValueInst &I); - bool visitCallSite(CallSite CS); - bool visitReturnInst(ReturnInst &RI); - bool visitBranchInst(BranchInst &BI); - bool visitSwitchInst(SwitchInst &SI); - bool visitIndirectBrInst(IndirectBrInst &IBI); - bool visitResumeInst(ResumeInst &RI); - bool visitCleanupReturnInst(CleanupReturnInst &RI); - bool visitCatchReturnInst(CatchReturnInst &RI); - bool visitUnreachableInst(UnreachableInst &I); - -public: - CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT, - Function &Callee, int Threshold, CallSite CSArg) - : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold), - Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), - ExposesReturnsTwice(false), HasDynamicAlloca(false), - ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), - HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), - NumVectorInstructions(0), FiftyPercentVectorBonus(0), - TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), - NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), - NumConstantPtrDiffs(0), NumInstructionsSimplified(0), - SROACostSavings(0), SROACostSavingsLost(0) {} - - bool analyzeCall(CallSite CS); - - int getThreshold() { return Threshold; } - int getCost() { return Cost; } - - // Keep a bunch of stats about the cost savings found so we can print them - // out when debugging. - unsigned NumConstantArgs; - unsigned NumConstantOffsetPtrArgs; - unsigned NumAllocaArgs; - unsigned NumConstantPtrCmps; - unsigned NumConstantPtrDiffs; - unsigned NumInstructionsSimplified; - unsigned SROACostSavings; - unsigned SROACostSavingsLost; - - void dump(); -}; - -} // namespace +CallAnalyzer::CallAnalyzer(const TargetTransformInfo &TTI, + AssumptionCacheTracker *ACT, Function &Callee, + int Threshold, CallSite *CSArg, bool Inlining) + : TTI(TTI), + ACT(ACT), + F(Callee), + CandidateCS(CSArg), + Threshold(Threshold), + Cost(0), + IsCallerRecursive(false), + IsRecursiveCall(false), + ExposesReturnsTwice(false), + HasDynamicAlloca(false), + ContainsNoDuplicateCall(false), + HasReturn(false), + HasIndirectBr(false), + HasFrameEscape(false), + AllocatedSize(0), + NumInstructions(0), + NumVectorInstructions(0), + FiftyPercentVectorBonus(0), + TenPercentVectorBonus(0), + VectorBonus(0), + Inlining(Inlining), + NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), + NumAllocaArgs(0), + NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), + SROACostSavings(0), + SROACostSavingsLost(0) {} /// \brief Test whether the given value is an Alloca-derived function argument. bool CallAnalyzer::isAllocaDerivedArg(Value *V) { @@ -516,7 +393,7 @@ bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) { unsigned ArgNo = A->getArgNo(); - return CandidateCS.paramHasAttr(ArgNo+1, Attr); + return CandidateCS ? CandidateCS->paramHasAttr(ArgNo + 1, Attr) : false; } bool CallAnalyzer::isKnownNonNullInCallee(Value *V) { @@ -758,7 +635,7 @@ } bool CallAnalyzer::visitCallSite(CallSite CS) { - if (CS.hasFnAttr(Attribute::ReturnsTwice) && + if (Inlining && CS.hasFnAttr(Attribute::ReturnsTwice) && !F.hasFnAttribute(Attribute::ReturnsTwice)) { // This aborts the entire analysis. ExposesReturnsTwice = true; @@ -791,7 +668,7 @@ } } - if (F == CS.getInstruction()->getParent()->getParent()) { + if (Inlining && F == CS.getInstruction()->getParent()->getParent()) { // This flag will fully abort the analysis, so don't bother with anything // else. IsRecursiveCall = true; @@ -823,15 +700,14 @@ // Next, check if this happens to be an indirect function call to a known // function in this inline context. If not, we've done all we can. Function *F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); - if (!F) - return Base::visitCallSite(CS); + if (!F || !Inlining) return Base::visitCallSite(CS); // If we have a constant that we are calling as a function, we can peer // through it and see the function target. This happens not infrequently // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS); + CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, &CS); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // bonus we want to apply, but don't go below zero. @@ -843,7 +719,7 @@ bool CallAnalyzer::visitReturnInst(ReturnInst &RI) { // At least one return instruction will be free after inlining. - bool Free = !HasReturn; + bool Free = !HasReturn && Inlining; HasReturn = true; return Free; } @@ -997,6 +873,8 @@ else Cost += InlineConstants::InstrCost; + if (!Inlining) continue; + // If the visit this instruction detected an uninlinable pattern, abort. if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || HasIndirectBr || HasFrameEscape) @@ -1401,7 +1279,7 @@ DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold, CS); + CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold, &CS); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -17,7 +17,11 @@ #include "llvm/Passes/PassBuilder.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/DynamicInstructionCount.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -53,6 +53,9 @@ FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis()) FUNCTION_ANALYSIS("loops", LoopAnalysis()) +FUNCTION_ANALYSIS("branchprob", BranchProbabilityAnalysis()) +FUNCTION_ANALYSIS("blockfreq", BlockFrequencyAnalysis()) +FUNCTION_ANALYSIS("dyninscount", DynamicInstructionCountAnalysis()) FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis()) FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis()) FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) @@ -72,6 +75,7 @@ FUNCTION_PASS("print", AssumptionPrinterPass(dbgs())) FUNCTION_PASS("print", DominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print", LoopPrinterPass(dbgs())) +FUNCTION_PASS("print", DynamicInstructionCountPrinterPass(dbgs())) FUNCTION_PASS("print", ScalarEvolutionPrinterPass(dbgs())) FUNCTION_PASS("simplify-cfg", SimplifyCFGPass()) FUNCTION_PASS("sroa", SROA()) Index: test/Analysis/DynamicInstructionCount/basic.ll =================================================================== --- /dev/null +++ test/Analysis/DynamicInstructionCount/basic.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @f0() { +; CHECK: Dynamic instruction count of f0: 1 +entry: + ret i32 0 +} + +define i32 @f1(i32 %i) { +; CHECK: Dynamic instruction count of f1: 2 +entry: + %add = add nsw i32 %i, 1 + ret i32 %add +} + +define void @f2(i32* nocapture %ptr) { +; CHECK: Dynamic instruction count of f2: 97 +entry: +; Dynamic instructions in this block = 0 (unconditional branch) + br label %for.body + +for.cond.cleanup: ; preds = %for.body +; Dynamic instructions in this block = 1 + ret void + +for.body: ; preds = %for.body, %entry +; Static instructions in this block = 3 (phi is not counted) +; Dynamic instructions in this block = 3 * 32 = 96 + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +define i32 @f3(i32 %i) { +; CHECK: Dynamic instruction count of f3: 3 +entry: + %cmp = icmp sgt i32 %i, 10 + br i1 %cmp, label %if.then, label %if.else +if.then: + ret i32 0 +if.else: + ret i32 1 +}