diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h --- a/llvm/include/llvm/Transforms/IPO/Inliner.h +++ b/llvm/include/llvm/Transforms/IPO/Inliner.h @@ -13,7 +13,6 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/PassManager.h" #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" #include @@ -51,15 +50,7 @@ /// This method must be implemented by the subclass to determine the cost of /// inlining the specified call site. If the cost returned is greater than /// the current inline threshold, the call site is not inlined. - // FIXME(mtrofin): remove this in favor of the CallBase-based one - virtual InlineCost getInlineCost(CallSite CS) = 0; - - /// This method must be implemented by the subclass to determine the cost of - /// inlining the specified call site. If the cost returned is greater than - /// the current inline threshold, the call site is not inlined. - virtual InlineCost getInlineCost(CallBase &CB) { - return getInlineCost(CallSite(&CB)); - } + virtual InlineCost getInlineCost(CallBase &CB) = 0; /// Remove dead functions. /// diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -228,10 +228,7 @@ /// and all varargs at the callsite will be passed to any calls to /// ForwardVarArgsTo. The caller of InlineFunction has to make sure any varargs /// are only used by ForwardVarArgsTo. -InlineResult InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, - AAResults *CalleeAAR = nullptr, - bool InsertLifetime = true); -InlineResult InlineFunction(CallSite CS, InlineFunctionInfo &IFI, +InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR = nullptr, bool InsertLifetime = true, Function *ForwardVarArgsTo = nullptr); diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -33,7 +33,6 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" @@ -200,7 +199,7 @@ bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr, uint64_t AllocaSize); - bool ShouldInlinePointerAddress(CallSite &CS); + bool ShouldInlinePointerAddress(CallInst &CI); void TryInlinePointerAddress(); public: @@ -322,7 +321,7 @@ case Instruction::Call: case Instruction::Invoke: { - ImmutableCallSite CS(I); + const CallBase &CS = *cast(I); if (I->isLifetimeStartOrEnd()) continue; @@ -344,8 +343,8 @@ // FIXME: a more precise solution would require an interprocedural // analysis here, which would look at all uses of an argument inside // the function being called. - ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); - for (ImmutableCallSite::arg_iterator A = B; A != E; ++A) + auto B = CS.arg_begin(), E = CS.arg_end(); + for (auto A = B; A != E; ++A) if (A->get() == V) if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) || CS.doesNotAccessMemory()))) { @@ -705,34 +704,34 @@ } } -bool SafeStack::ShouldInlinePointerAddress(CallSite &CS) { - Function *Callee = CS.getCalledFunction(); - if (CS.hasFnAttr(Attribute::AlwaysInline) && +bool SafeStack::ShouldInlinePointerAddress(CallInst &CI) { + Function *Callee = CI.getCalledFunction(); + if (CI.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee).isSuccess()) return true; if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) || - CS.isNoInline()) + CI.isNoInline()) return false; return true; } void SafeStack::TryInlinePointerAddress() { - if (!isa(UnsafeStackPtr)) + auto *CI = dyn_cast(UnsafeStackPtr); + if (!CI) return; if(F.hasOptNone()) return; - CallSite CS(UnsafeStackPtr); - Function *Callee = CS.getCalledFunction(); + Function *Callee = CI->getCalledFunction(); if (!Callee || Callee->isDeclaration()) return; - if (!ShouldInlinePointerAddress(CS)) + if (!ShouldInlinePointerAddress(*CI)) return; InlineFunctionInfo IFI; - InlineFunction(CS, IFI); + InlineFunction(*CI, IFI); } bool SafeStack::run() { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp @@ -23,7 +23,6 @@ #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -67,9 +66,9 @@ static char ID; // Pass identification, replacement for typeid - unsigned getInlineThreshold(CallSite CS) const; + unsigned getInlineThreshold(CallBase &CB) const; - InlineCost getInlineCost(CallSite CS) override; + InlineCost getInlineCost(CallBase &CB) override; bool runOnSCC(CallGraphSCC &SCC) override; @@ -106,13 +105,13 @@ LegacyInlinerBase::getAnalysisUsage(AU); } -unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const { +unsigned AMDGPUInliner::getInlineThreshold(CallBase &CB) const { int Thres = Params.DefaultThreshold; - Function *Caller = CS.getCaller(); + Function *Caller = CB.getCaller(); // Listen to the inlinehint attribute when it would increase the threshold // and the caller does not need to minimize its size. - Function *Callee = CS.getCalledFunction(); + Function *Callee = CB.getCalledFunction(); bool InlineHint = Callee && !Callee->isDeclaration() && Callee->hasFnAttribute(Attribute::InlineHint); if (InlineHint && Params.HintThreshold && Params.HintThreshold > Thres @@ -129,7 +128,7 @@ // Increase the inline threshold to allow inliniting in this case. uint64_t AllocaSize = 0; SmallPtrSet AIVisited; - for (Value *PtrArg : CS.args()) { + for (Value *PtrArg : CB.args()) { PointerType *Ty = dyn_cast(PtrArg->getType()); if (!Ty || (Ty->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS && Ty->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)) @@ -156,8 +155,8 @@ // Check if call is just a wrapper around another call. // In this case we only have call and ret instructions. -static bool isWrapperOnlyCall(CallSite CS) { - Function *Callee = CS.getCalledFunction(); +static bool isWrapperOnlyCall(CallBase &CB) { + Function *Callee = CB.getCalledFunction(); if (!Callee || Callee->size() != 1) return false; const BasicBlock &BB = Callee->getEntryBlock(); @@ -174,32 +173,32 @@ return false; } -InlineCost AMDGPUInliner::getInlineCost(CallSite CS) { - Function *Callee = CS.getCalledFunction(); - Function *Caller = CS.getCaller(); +InlineCost AMDGPUInliner::getInlineCost(CallBase &CB) { + Function *Callee = CB.getCalledFunction(); + Function *Caller = CB.getCaller(); if (!Callee || Callee->isDeclaration()) return llvm::InlineCost::getNever("undefined callee"); - if (CS.isNoInline()) + if (CB.isNoInline()) return llvm::InlineCost::getNever("noinline"); TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); if (!TTI.areInlineCompatible(Caller, Callee)) return llvm::InlineCost::getNever("incompatible"); - if (CS.hasFnAttr(Attribute::AlwaysInline)) { + if (CB.hasFnAttr(Attribute::AlwaysInline)) { auto IsViable = isInlineViable(*Callee); if (IsViable.isSuccess()) return llvm::InlineCost::getAlways("alwaysinline viable"); return llvm::InlineCost::getNever(IsViable.getFailureReason()); } - if (isWrapperOnlyCall(CS)) + if (isWrapperOnlyCall(CB)) return llvm::InlineCost::getAlways("wrapper-only call"); InlineParams LocalParams = Params; - LocalParams.DefaultThreshold = (int)getInlineThreshold(CS); + LocalParams.DefaultThreshold = (int)getInlineThreshold(CB); bool RemarksEnabled = false; const auto &BBs = Caller->getBasicBlockList(); if (!BBs.empty()) { @@ -214,9 +213,9 @@ return ACT->getAssumptionCache(F); }; - auto IC = llvm::getInlineCost(cast(*CS.getInstruction()), Callee, - LocalParams, TTI, GetAssumptionCache, None, - GetTLI, PSI, RemarksEnabled ? &ORE : nullptr); + auto IC = + llvm::getInlineCost(CB, Callee, LocalParams, TTI, GetAssumptionCache, + None, GetTLI, PSI, RemarksEnabled ? &ORE : nullptr); if (IC && !IC.isAlways() && !Callee->hasFnAttribute(Attribute::InlineHint)) { // Single BB does not increase total BB amount, thus subtract 1 diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -16,7 +16,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" @@ -43,7 +42,7 @@ }; InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache); - SmallSetVector Calls; + SmallSetVector Calls; bool Changed = false; SmallVector InlinedFunctions; for (Function &F : M) @@ -52,15 +51,15 @@ Calls.clear(); for (User *U : F.users()) - if (auto CS = CallSite(U)) - if (CS.getCalledFunction() == &F) - Calls.insert(CS); + if (auto *CB = dyn_cast(U)) + if (CB->getCalledFunction() == &F) + Calls.insert(CB); - for (CallSite CS : Calls) + for (CallBase *CB : Calls) // FIXME: We really shouldn't be able to fail to inline at this point! // We should do something to log or check the inline failures here. Changed |= - InlineFunction(CS, IFI, /*CalleeAAR=*/nullptr, InsertLifetime) + InlineFunction(*CB, IFI, /*CalleeAAR=*/nullptr, InsertLifetime) .isSuccess(); // Remember to try and delete this function afterward. This both avoids @@ -117,7 +116,7 @@ static char ID; // Pass identification, replacement for typeid - InlineCost getInlineCost(CallSite CS) override; + InlineCost getInlineCost(CallBase &CB) override; using llvm::Pass::doFinalization; bool doFinalization(CallGraph &CG) override { @@ -152,8 +151,8 @@ /// computed here, but as we only expect to do this for relatively few and /// small functions which have the explicit attribute to force inlining, it is /// likely not worth it in practice. -InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallSite CS) { - Function *Callee = CS.getCalledFunction(); +InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallBase &CB) { + Function *Callee = CB.getCalledFunction(); // Only inline direct calls to functions with always-inline attributes // that are viable for inlining. @@ -164,7 +163,7 @@ if (Callee->isDeclaration()) return InlineCost::getNever("no definition"); - if (!CS.hasFnAttr(Attribute::AlwaysInline)) + if (!CB.hasFnAttr(Attribute::AlwaysInline)) return InlineCost::getNever("no alwaysinline attribute"); auto IsViable = isInlineViable(*Callee); diff --git a/llvm/lib/Transforms/IPO/InlineSimple.cpp b/llvm/lib/Transforms/IPO/InlineSimple.cpp --- a/llvm/lib/Transforms/IPO/InlineSimple.cpp +++ b/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -15,7 +15,6 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" @@ -52,26 +51,26 @@ static char ID; // Pass identification, replacement for typeid - InlineCost getInlineCost(CallSite CS) override { - Function *Callee = CS.getCalledFunction(); + InlineCost getInlineCost(CallBase &CB) override { + Function *Callee = CB.getCalledFunction(); TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); bool RemarksEnabled = false; - const auto &BBs = CS.getCaller()->getBasicBlockList(); + const auto &BBs = CB.getCaller()->getBasicBlockList(); if (!BBs.empty()) { auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBs.front()); if (DI.isEnabled()) RemarksEnabled = true; } - OptimizationRemarkEmitter ORE(CS.getCaller()); + OptimizationRemarkEmitter ORE(CB.getCaller()); std::function GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; - return llvm::getInlineCost( - cast(*CS.getInstruction()), Params, TTI, GetAssumptionCache, - /*GetBFI=*/None, GetTLI, PSI, RemarksEnabled ? &ORE : nullptr); + return llvm::getInlineCost(CB, Params, TTI, GetAssumptionCache, + /*GetBFI=*/None, GetTLI, PSI, + RemarksEnabled ? &ORE : nullptr); } bool runOnSCC(CallGraphSCC &SCC) override; diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -283,7 +283,7 @@ // Try to inline the function. Get the list of static allocas that were // inlined. - InlineResult IR = InlineFunction(&CS, IFI, &AAR, InsertLifetime); + InlineResult IR = InlineFunction(CS, IFI, &AAR, InsertLifetime); if (!IR.isSuccess()) return IR; @@ -1087,7 +1087,7 @@ using namespace ore; - InlineResult IR = InlineFunction(CS, IFI); + InlineResult IR = InlineFunction(*CS, IFI); if (!IR.isSuccess()) { setInlineRemark(*CS, std::string(IR.getFailureReason()) + "; " + inlineCostStr(*OIC)); diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -30,7 +30,6 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" @@ -284,9 +283,9 @@ // edges from the guarding entry blocks). BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner); - // Return true if the callee of CS should be partially inlined with + // Return true if the callee of CB should be partially inlined with // profit. - bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner, + bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost, OptimizationRemarkEmitter &ORE); @@ -305,26 +304,22 @@ NumPartialInlining >= MaxNumPartialInlining); } - static CallSite getCallSite(User *U) { - CallSite CS; - if (CallInst *CI = dyn_cast(U)) - CS = CallSite(CI); - else if (InvokeInst *II = dyn_cast(U)) - CS = CallSite(II); - else - llvm_unreachable("All uses must be calls"); - return CS; + static CallBase *getSupportedCallBase(User *U) { + if (isa(U) || isa(U)) + return cast(U); + llvm_unreachable("All uses must be calls"); + return nullptr; } - static CallSite getOneCallSiteTo(Function *F) { + static CallBase *getOneCallSiteTo(Function *F) { User *User = *F->user_begin(); - return getCallSite(User); + return getSupportedCallBase(User); } std::tuple getOneDebugLoc(Function *F) { - CallSite CS = getOneCallSiteTo(F); - DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); - BasicBlock *Block = CS.getParent(); + CallBase *CB = getOneCallSiteTo(F); + DebugLoc DLoc = CB->getDebugLoc(); + BasicBlock *Block = CB->getParent(); return std::make_tuple(DLoc, Block); } @@ -767,31 +762,28 @@ } bool PartialInlinerImpl::shouldPartialInline( - CallSite CS, FunctionCloner &Cloner, - BlockFrequency WeightedOutliningRcost, + CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost, OptimizationRemarkEmitter &ORE) { using namespace ore; - Instruction *Call = CS.getInstruction(); - Function *Callee = CS.getCalledFunction(); + Function *Callee = CB.getCalledFunction(); assert(Callee == Cloner.ClonedFunc); if (SkipCostAnalysis) return isInlineViable(*Callee).isSuccess(); - Function *Caller = CS.getCaller(); + Function *Caller = CB.getCaller(); auto &CalleeTTI = (*GetTTI)(*Callee); bool RemarksEnabled = Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled( DEBUG_TYPE); - assert(Call && "invalid callsite for partial inline"); - InlineCost IC = getInlineCost(cast(*Call), getInlineParams(), - CalleeTTI, *GetAssumptionCache, GetBFI, *GetTLI, - PSI, RemarksEnabled ? &ORE : nullptr); + InlineCost IC = + getInlineCost(CB, getInlineParams(), CalleeTTI, *GetAssumptionCache, + GetBFI, *GetTLI, PSI, RemarksEnabled ? &ORE : nullptr); if (IC.isAlways()) { ORE.emit([&]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) + return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB) << NV("Callee", Cloner.OrigFunc) << " should always be fully inlined, not partially"; }); @@ -800,7 +792,7 @@ if (IC.isNever()) { ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) + return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB) << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " because it should never be inlined (cost=never)"; @@ -810,7 +802,7 @@ if (!IC) { ORE.emit([&]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call) + return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB) << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " because too costly to inline (cost=" << NV("Cost", IC.getCost()) << ", threshold=" @@ -821,14 +813,14 @@ const DataLayout &DL = Caller->getParent()->getDataLayout(); // The savings of eliminating the call: - int NonWeightedSavings = getCallsiteCost(cast(*Call), DL); + int NonWeightedSavings = getCallsiteCost(CB, DL); BlockFrequency NormWeightedSavings(NonWeightedSavings); // Weighted saving is smaller than weighted cost, return false if (NormWeightedSavings < WeightedOutliningRcost) { ORE.emit([&]() { return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", - Call) + &CB) << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " runtime overhead (overhead=" << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency()) @@ -842,7 +834,7 @@ } ORE.emit([&]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call) + return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB) << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into " << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) << " (threshold=" @@ -954,15 +946,15 @@ }; for (User *User : Users) { - CallSite CS = getCallSite(User); - Function *Caller = CS.getCaller(); + CallBase *CB = getSupportedCallBase(User); + Function *Caller = CB->getCaller(); if (CurrentCaller != Caller) { CurrentCaller = Caller; ComputeCurrBFI(Caller); } else { assert(CurrentCallerBFI && "CallerBFI is not set"); } - BasicBlock *CallBB = CS.getInstruction()->getParent(); + BasicBlock *CallBB = CB->getParent(); auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB); if (Count) CallSiteToProfCountMap[User] = *Count; @@ -1163,8 +1155,8 @@ Function *OutlinedFunc = CE.extractCodeRegion(CEAC); if (OutlinedFunc) { - CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc); - BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent(); + CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc); + BasicBlock *OutliningCallBB = OCS->getParent(); assert(OutliningCallBB->getParent() == ClonedFunc); OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB)); NumColdRegionsOutlined++; @@ -1172,7 +1164,7 @@ if (MarkOutlinedColdCC) { OutlinedFunc->setCallingConv(CallingConv::Cold); - OCS.setCallingConv(CallingConv::Cold); + OCS->setCallingConv(CallingConv::Cold); } } else ORE.emit([&]() { @@ -1232,7 +1224,6 @@ if (OutlinedFunc) { BasicBlock *OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc) - .getInstruction() ->getParent(); assert(OutliningCallBB->getParent() == ClonedFunc); OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB)); @@ -1399,25 +1390,25 @@ bool AnyInline = false; for (User *User : Users) { - CallSite CS = getCallSite(User); + CallBase *CB = getSupportedCallBase(User); if (IsLimitReached()) continue; - OptimizationRemarkEmitter CallerORE(CS.getCaller()); - if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE)) + OptimizationRemarkEmitter CallerORE(CB->getCaller()); + if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE)) continue; // Construct remark before doing the inlining, as after successful inlining // the callsite is removed. - OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction()); + OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB); OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " - << ore::NV("Caller", CS.getCaller()); + << ore::NV("Caller", CB->getCaller()); InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI); // We can only forward varargs when we outlined a single region, else we // bail on vararg functions. - if (!InlineFunction(CS, IFI, nullptr, true, + if (!InlineFunction(*CB, IFI, nullptr, true, (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first : nullptr)) .isSuccess()) diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -893,9 +893,11 @@ return it.first->second; } +// FIXME(CallSite): Parameter should be CallBase&, as it's assumed to be that, +// and non-null. bool SampleProfileLoader::inlineCallInstruction(Instruction *I) { assert(isa(I) || isa(I)); - CallSite CS(I); + CallBase &CS = *cast(I); Function *CalledFunction = CS.getCalledFunction(); assert(CalledFunction); DebugLoc DLoc = I->getDebugLoc(); diff --git a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp --- a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -650,7 +650,7 @@ // Do the actual inlining InlineFunctionInfo IFI; - bool InlineStatus = InlineFunction(PollCall, IFI).isSuccess(); + bool InlineStatus = InlineFunction(*PollCall, IFI).isSuccess(); assert(InlineStatus && "inline must succeed"); (void)InlineStatus; // suppress warning in release-asserts diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -98,12 +98,6 @@ "attribute inference in inlined body"), cl::init(4)); -llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, - AAResults *CalleeAAR, - bool InsertLifetime) { - return InlineFunction(CallSite(CB), IFI, CalleeAAR, InsertLifetime); -} - namespace { /// A class for recording information about inlining a landing pad. @@ -781,12 +775,10 @@ /// When inlining a call site that has !llvm.mem.parallel_loop_access or /// llvm.access.group metadata, that metadata should be propagated to all /// memory-accessing cloned instructions. -static void PropagateParallelLoopAccessMetadata(CallSite CS, +static void PropagateParallelLoopAccessMetadata(CallBase &CB, ValueToValueMapTy &VMap) { - MDNode *M = - CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access); - MDNode *CallAccessGroup = - CS.getInstruction()->getMetadata(LLVMContext::MD_access_group); + MDNode *M = CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access); + MDNode *CallAccessGroup = CB.getMetadata(LLVMContext::MD_access_group); if (!M && !CallAccessGroup) return; @@ -824,8 +816,8 @@ /// not be differentiated (and this would lead to miscompiles because the /// non-aliasing property communicated by the metadata could have /// call-site-specific control dependencies). -static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { - const Function *CalledFunc = CS.getCalledFunction(); +static void CloneAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap) { + const Function *CalledFunc = CB.getCalledFunction(); SetVector MD; // Note: We could only clone the metadata if it is already used in the @@ -900,13 +892,11 @@ // If the call site also had alias scope metadata (a list of scopes to // which instructions inside it might belong), propagate those scopes to // the inlined instructions. - if (MDNode *CSM = - CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) + if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_alias_scope)) NewMD = MDNode::concatenate(NewMD, CSM); NI->setMetadata(LLVMContext::MD_alias_scope, NewMD); } else if (NI->mayReadOrWriteMemory()) { - if (MDNode *M = - CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) + if (MDNode *M = CB.getMetadata(LLVMContext::MD_alias_scope)) NI->setMetadata(LLVMContext::MD_alias_scope, M); } @@ -915,12 +905,11 @@ // If the call site also had noalias metadata (a list of scopes with // which instructions inside it don't alias), propagate those scopes to // the inlined instructions. - if (MDNode *CSM = - CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) + if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_noalias)) NewMD = MDNode::concatenate(NewMD, CSM); NI->setMetadata(LLVMContext::MD_noalias, NewMD); } else if (NI->mayReadOrWriteMemory()) { - if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) + if (MDNode *M = CB.getMetadata(LLVMContext::MD_noalias)) NI->setMetadata(LLVMContext::MD_noalias, M); } } @@ -930,16 +919,16 @@ /// then add new alias scopes for each noalias argument, tag the mapped noalias /// parameters with noalias metadata specifying the new scope, and tag all /// non-derived loads, stores and memory intrinsics with the new alias scopes. -static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, +static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, const DataLayout &DL, AAResults *CalleeAAR) { if (!EnableNoAliasConversion) return; - const Function *CalledFunc = CS.getCalledFunction(); + const Function *CalledFunc = CB.getCalledFunction(); SmallVector NoAliasArgs; for (const Argument &Arg : CalledFunc->args()) - if (CS.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty()) + if (CB.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty()) NoAliasArgs.push_back(&Arg); if (NoAliasArgs.empty()) @@ -1072,7 +1061,7 @@ // completely describe the aliasing properties using alias.scope // metadata (and, thus, won't add any). if (const Argument *A = dyn_cast(V)) { - if (!CS.paramHasAttr(A->getArgNo(), Attribute::NoAlias)) + if (!CB.paramHasAttr(A->getArgNo(), Attribute::NoAlias)) UsesAliasingPtr = true; } else { UsesAliasingPtr = true; @@ -1164,9 +1153,9 @@ return false; } -static AttrBuilder IdentifyValidAttributes(CallSite CS) { +static AttrBuilder IdentifyValidAttributes(CallBase &CB) { - AttrBuilder AB(CS.getAttributes(), AttributeList::ReturnIndex); + AttrBuilder AB(CB.getAttributes(), AttributeList::ReturnIndex); if (AB.empty()) return AB; AttrBuilder Valid; @@ -1184,14 +1173,14 @@ return Valid; } -static void AddReturnAttributes(CallSite CS, ValueToValueMapTy &VMap) { +static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) { if (!UpdateReturnAttributes && !UpdateLoadMetadataDuringInlining) return; - AttrBuilder Valid = IdentifyValidAttributes(CS); + AttrBuilder Valid = IdentifyValidAttributes(CB); if (Valid.empty()) return; - auto *CalledFunction = CS.getCalledFunction(); + auto *CalledFunction = CB.getCalledFunction(); auto &Context = CalledFunction->getContext(); auto getExpectedRV = [&](Value *V) -> Instruction * { @@ -1247,14 +1236,14 @@ // with a differing value, the AttributeList's merge API honours the already // existing attribute value (i.e. attributes such as dereferenceable, // dereferenceable_or_null etc). See AttrBuilder::merge for more details. - if (auto *CB = dyn_cast(NewRetVal)) { - AttributeList AL = CB->getAttributes(); + if (auto *NewRetValCB = dyn_cast(NewRetVal)) { + AttributeList AL = NewRetValCB->getAttributes(); AttributeList NewAL = AL.addAttributes(Context, AttributeList::ReturnIndex, Valid); - CB->setAttributes(NewAL); + NewRetValCB->setAttributes(NewAL); } else { auto *NewLI = cast(NewRetVal); - if (CS.isReturnNonNull()) + if (CB.isReturnNonNull()) NewLI->setMetadata(LLVMContext::MD_nonnull, CreateMDNode(1)); // If the load already has a dereferenceable/dereferenceable_or_null // metadata, we should honour it. @@ -1267,41 +1256,40 @@ NewLI->setMetadata(LLVMContext::MD_dereferenceable_or_null, CreateMDNode(DerefOrNullBytes)); } - } } /// If the inlined function has non-byval align arguments, then /// add @llvm.assume-based alignment assumptions to preserve this information. -static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { +static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache) return; - AssumptionCache *AC = &(*IFI.GetAssumptionCache)(*CS.getCaller()); - auto &DL = CS.getCaller()->getParent()->getDataLayout(); + AssumptionCache *AC = &(*IFI.GetAssumptionCache)(*CB.getCaller()); + auto &DL = CB.getCaller()->getParent()->getDataLayout(); // To avoid inserting redundant assumptions, we should check for assumptions // already in the caller. To do this, we might need a DT of the caller. DominatorTree DT; bool DTCalculated = false; - Function *CalledFunc = CS.getCalledFunction(); + Function *CalledFunc = CB.getCalledFunction(); for (Argument &Arg : CalledFunc->args()) { unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0; if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) { if (!DTCalculated) { - DT.recalculate(*CS.getCaller()); + DT.recalculate(*CB.getCaller()); DTCalculated = true; } // If we can already prove the asserted alignment in the context of the // caller, then don't bother inserting the assumption. - Value *ArgVal = CS.getArgument(Arg.getArgNo()); - if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align) + Value *ArgVal = CB.getArgOperand(Arg.getArgNo()); + if (getKnownAlignment(ArgVal, DL, &CB, AC, &DT) >= Align) continue; - CallInst *NewAsmp = IRBuilder<>(CS.getInstruction()) - .CreateAlignmentAssumption(DL, ArgVal, Align); + CallInst *NewAsmp = + IRBuilder<>(&CB).CreateAlignmentAssumption(DL, ArgVal, Align); AC->registerAssumption(NewAsmp); } } @@ -1311,13 +1299,13 @@ /// update the specified callgraph to reflect the changes we made. /// Note that it's possible that not all code was copied over, so only /// some edges of the callgraph may remain. -static void UpdateCallGraphAfterInlining(CallSite CS, +static void UpdateCallGraphAfterInlining(CallBase &CB, Function::iterator FirstNewBlock, ValueToValueMapTy &VMap, InlineFunctionInfo &IFI) { CallGraph &CG = *IFI.CG; - const Function *Caller = CS.getCaller(); - const Function *Callee = CS.getCalledFunction(); + const Function *Caller = CB.getCaller(); + const Function *Callee = CB.getCalledFunction(); CallGraphNode *CalleeNode = CG[Callee]; CallGraphNode *CallerNode = CG[Caller]; @@ -1375,7 +1363,7 @@ // Update the call graph by deleting the edge from Callee to Caller. We must // do this after the loop above in case Caller and Callee are the same. - CallerNode->removeCallEdgeFor(*cast(CS.getInstruction())); + CallerNode->removeCallEdgeFor(*cast(&CB)); } static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, @@ -1664,31 +1652,29 @@ /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. -llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, +llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR, bool InsertLifetime, Function *ForwardVarArgsTo) { - Instruction *TheCall = CS.getInstruction(); - assert(TheCall->getParent() && TheCall->getFunction() - && "Instruction not in function!"); + assert(CB.getParent() && CB.getFunction() && "Instruction not in function!"); // FIXME: we don't inline callbr yet. - if (isa(TheCall)) + if (isa(CB)) return InlineResult::failure("We don't inline callbr yet."); // If IFI has any state in it, zap it before we fill it in. IFI.reset(); - Function *CalledFunc = CS.getCalledFunction(); + Function *CalledFunc = CB.getCalledFunction(); if (!CalledFunc || // Can't inline external function or indirect CalledFunc->isDeclaration()) // call! return InlineResult::failure("external or indirect"); // The inliner does not know how to inline through calls with operand bundles // in general ... - if (CS.hasOperandBundles()) { - for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) { - uint32_t Tag = CS.getOperandBundleAt(i).getTagID(); + if (CB.hasOperandBundles()) { + for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) { + uint32_t Tag = CB.getOperandBundleAt(i).getTagID(); // ... but it knows how to inline through "deopt" operand bundles ... if (Tag == LLVMContext::OB_deopt) continue; @@ -1702,9 +1688,9 @@ // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. - bool MarkNoUnwind = CS.doesNotThrow(); + bool MarkNoUnwind = CB.doesNotThrow(); - BasicBlock *OrigBB = TheCall->getParent(); + BasicBlock *OrigBB = CB.getParent(); Function *Caller = OrigBB->getParent(); // GC poses two hazards to inlining, which only occur when the callee has GC: @@ -1749,7 +1735,7 @@ EHPersonality Personality = classifyEHPersonality(CallerPersonality); if (isScopedEHPersonality(Personality)) { Optional ParentFunclet = - CS.getOperandBundle(LLVMContext::OB_funclet); + CB.getOperandBundle(LLVMContext::OB_funclet); if (ParentFunclet) CallSiteEHPad = cast(ParentFunclet->Inputs.front()); @@ -1782,7 +1768,7 @@ // Determine if we are dealing with a call in an EHPad which does not unwind // to caller. bool EHPadForCallUnwindsLocally = false; - if (CallSiteEHPad && CS.isCall()) { + if (CallSiteEHPad && isa(CB)) { UnwindDestMemoTy FuncletUnwindMap; Value *CallSiteUnwindDestToken = getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap); @@ -1811,7 +1797,7 @@ // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. - CallSite::arg_iterator AI = CS.arg_begin(); + auto AI = CB.arg_begin(); unsigned ArgNo = 0; for (Function::arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { @@ -1821,8 +1807,8 @@ // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. - if (CS.isByValArgument(ArgNo)) { - ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, + if (CB.isByValArgument(ArgNo)) { + ActualArg = HandleByValArgument(ActualArg, &CB, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo)); if (ActualArg != *AI) ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); @@ -1835,13 +1821,13 @@ // Add alignment assumptions if necessary. We do this before the inlined // instructions are actually cloned into the caller so that we can easily // check what will be known at the start of the inlined code. - AddAlignmentAssumptions(CS, IFI); + AddAlignmentAssumptions(CB, IFI); AssumptionCache *AC = IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; /// Preserve all attributes on of the call and its parameters. - salvageKnowledge(CS.getInstruction(), AC); + salvageKnowledge(&CB, AC); // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs @@ -1849,7 +1835,7 @@ // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo, TheCall); + &InlinedFunctionInfo, &CB); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; @@ -1858,7 +1844,7 @@ updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, CalledFunc->front()); - updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall, + updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), &CB, IFI.PSI, IFI.CallerBFI); // Inject byval arguments initialization. @@ -1867,21 +1853,22 @@ &*FirstNewBlock, IFI); Optional ParentDeopt = - CS.getOperandBundle(LLVMContext::OB_deopt); + CB.getOperandBundle(LLVMContext::OB_deopt); if (ParentDeopt) { SmallVector OpDefs; for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { - Instruction *I = dyn_cast_or_null(VH); - if (!I) continue; // instruction was DCE'd or RAUW'ed to undef + CallBase *ICS = dyn_cast_or_null(VH); + if (!ICS) + continue; // instruction was DCE'd or RAUW'ed to undef OpDefs.clear(); - CallSite ICS(I); - OpDefs.reserve(ICS.getNumOperandBundles()); + OpDefs.reserve(ICS->getNumOperandBundles()); - for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) { - auto ChildOB = ICS.getOperandBundleAt(i); + for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe; + ++COBi) { + auto ChildOB = ICS->getOperandBundleAt(COBi); if (ChildOB.getTagID() != LLVMContext::OB_deopt) { // If the inlined call has other operand bundles, let them be OpDefs.emplace_back(ChildOB); @@ -1906,44 +1893,44 @@ } Instruction *NewI = nullptr; - if (isa(I)) - NewI = CallInst::Create(cast(I), OpDefs, I); - else if (isa(I)) - NewI = CallBrInst::Create(cast(I), OpDefs, I); + if (isa(ICS)) + NewI = CallInst::Create(cast(ICS), OpDefs, ICS); + else if (isa(ICS)) + NewI = CallBrInst::Create(cast(ICS), OpDefs, ICS); else - NewI = InvokeInst::Create(cast(I), OpDefs, I); + NewI = InvokeInst::Create(cast(ICS), OpDefs, ICS); // Note: the RAUW does the appropriate fixup in VMap, so we need to do // this even if the call returns void. - I->replaceAllUsesWith(NewI); + ICS->replaceAllUsesWith(NewI); VH = nullptr; - I->eraseFromParent(); + ICS->eraseFromParent(); } } // Update the callgraph if requested. if (IFI.CG) - UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); + UpdateCallGraphAfterInlining(CB, FirstNewBlock, VMap, IFI); // For 'nodebug' functions, the associated DISubprogram is always null. // Conservatively avoid propagating the callsite debug location to // instructions inlined from a function whose DISubprogram is not null. - fixupLineNumbers(Caller, FirstNewBlock, TheCall, + fixupLineNumbers(Caller, FirstNewBlock, &CB, CalledFunc->getSubprogram() != nullptr); // Clone existing noalias metadata if necessary. - CloneAliasScopeMetadata(CS, VMap); + CloneAliasScopeMetadata(CB, VMap); // Add noalias metadata if necessary. - AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); + AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR); // Clone return attributes on the callsite into the calls within the inlined // function which feed into its return value. - AddReturnAttributes(CS, VMap); + AddReturnAttributes(CB, VMap); // Propagate llvm.mem.parallel_loop_access if necessary. - PropagateParallelLoopAccessMetadata(CS, VMap); + PropagateParallelLoopAccessMetadata(CB, VMap); // Register any cloned assumptions. if (IFI.GetAssumptionCache) @@ -2000,15 +1987,15 @@ SmallVector VarArgsToForward; SmallVector VarArgsAttrs; for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); - i < CS.getNumArgOperands(); i++) { - VarArgsToForward.push_back(CS.getArgOperand(i)); - VarArgsAttrs.push_back(CS.getAttributes().getParamAttributes(i)); + i < CB.getNumArgOperands(); i++) { + VarArgsToForward.push_back(CB.getArgOperand(i)); + VarArgsAttrs.push_back(CB.getAttributes().getParamAttributes(i)); } bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; if (InlinedFunctionInfo.ContainsCalls) { CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; - if (CallInst *CI = dyn_cast(TheCall)) + if (CallInst *CI = dyn_cast(&CB)) CallSiteTailKind = CI->getTailCallKind(); // For inlining purposes, the "notail" marker is the same as no marker. @@ -2170,7 +2157,7 @@ // any call instructions into invoke instructions. This is sensitive to which // funclet pads were top-level in the inlinee, so must be done before // rewriting the "parent pad" links. - if (auto *II = dyn_cast(TheCall)) { + if (auto *II = dyn_cast(&CB)) { BasicBlock *UnwindDest = II->getUnwindDest(); Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); if (isa(FirstNonPHI)) { @@ -2191,29 +2178,28 @@ // Add bundle operands to any top-level call sites. SmallVector OpBundles; for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { - Instruction *I = &*BBI++; - CallSite CS(I); - if (!CS) + CallBase *I = dyn_cast(&*BBI++); + if (!I) continue; // Skip call sites which are nounwind intrinsics. auto *CalledFn = - dyn_cast(CS.getCalledValue()->stripPointerCasts()); - if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow()) + dyn_cast(I->getCalledValue()->stripPointerCasts()); + if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow()) continue; // Skip call sites which already have a "funclet" bundle. - if (CS.getOperandBundle(LLVMContext::OB_funclet)) + if (I->getOperandBundle(LLVMContext::OB_funclet)) continue; - CS.getOperandBundlesAsDefs(OpBundles); + I->getOperandBundlesAsDefs(OpBundles); OpBundles.emplace_back("funclet", CallSiteEHPad); Instruction *NewInst; - if (CS.isCall()) - NewInst = CallInst::Create(cast(I), OpBundles, I); - else if (CS.isCallBr()) - NewInst = CallBrInst::Create(cast(I), OpBundles, I); + if (auto *CallI = dyn_cast(I)) + NewInst = CallInst::Create(CallI, OpBundles, CallI); + else if (auto *CallBrI = dyn_cast(I)) + NewInst = CallBrInst::Create(CallBrI, OpBundles, CallBrI); else NewInst = InvokeInst::Create(cast(I), OpBundles, I); NewInst->takeName(I); @@ -2252,7 +2238,7 @@ // caller (but terminate it instead). If the caller's return type does not // match the callee's return type, we also need to change the return type of // the intrinsic. - if (Caller->getReturnType() == TheCall->getType()) { + if (Caller->getReturnType() == CB.getType()) { auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) { return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; }); @@ -2311,7 +2297,7 @@ if (InlinedMustTailCalls) { // Check if we need to bitcast the result of any musttail calls. Type *NewRetTy = Caller->getReturnType(); - bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy; + bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy; // Handle the returns preceded by musttail calls separately. SmallVector NormalReturns; @@ -2360,30 +2346,29 @@ // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. - OrigBB->getInstList().splice(TheCall->getIterator(), - FirstNewBlock->getInstList(), + OrigBB->getInstList().splice(CB.getIterator(), FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); // If the call site was an invoke instruction, add a branch to the normal // destination. - if (InvokeInst *II = dyn_cast(TheCall)) { - BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); + if (InvokeInst *II = dyn_cast(&CB)) { + BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), &CB); NewBr->setDebugLoc(Returns[0]->getDebugLoc()); } // If the return instruction returned a value, replace uses of the call with // uses of the returned value. - if (!TheCall->use_empty()) { + if (!CB.use_empty()) { ReturnInst *R = Returns[0]; - if (TheCall == R->getReturnValue()) - TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + if (&CB == R->getReturnValue()) + CB.replaceAllUsesWith(UndefValue::get(CB.getType())); else - TheCall->replaceAllUsesWith(R->getReturnValue()); + CB.replaceAllUsesWith(R->getReturnValue()); } // Since we are now done with the Call/Invoke, we can delete it. - TheCall->eraseFromParent(); + CB.eraseFromParent(); // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); @@ -2400,10 +2385,10 @@ // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; BranchInst *CreatedBranchToNormalDest = nullptr; - if (InvokeInst *II = dyn_cast(TheCall)) { + if (InvokeInst *II = dyn_cast(&CB)) { // Add an unconditional branch to make this look like the CallInst case... - CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); + CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), &CB); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more @@ -2412,11 +2397,11 @@ OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(), CalledFunc->getName() + ".exit"); - } else { // It's a call + } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // - AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(), + AfterCallBB = OrigBB->splitBasicBlock(CB.getIterator(), CalledFunc->getName() + ".exit"); } @@ -2449,12 +2434,12 @@ if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. - if (!TheCall->use_empty()) { - PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), + if (!CB.use_empty()) { + PHI = PHINode::Create(RTy, Returns.size(), CB.getName(), &AfterCallBB->front()); // Anything that used the result of the function call should now use the // PHI node as their operand. - TheCall->replaceAllUsesWith(PHI); + CB.replaceAllUsesWith(PHI); } // Loop over all of the return instructions adding entries to the PHI node @@ -2486,11 +2471,11 @@ } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. - if (!TheCall->use_empty()) { - if (TheCall == Returns[0]->getReturnValue()) - TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + if (!CB.use_empty()) { + if (&CB == Returns[0]->getReturnValue()) + CB.replaceAllUsesWith(UndefValue::get(CB.getType())); else - TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); + CB.replaceAllUsesWith(Returns[0]->getReturnValue()); } // Update PHI nodes that use the ReturnBB to use the AfterCallBB. @@ -2508,14 +2493,14 @@ // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); - } else if (!TheCall->use_empty()) { + } else if (!CB.use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. - TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + CB.replaceAllUsesWith(UndefValue::get(CB.getType())); } // Since we are now done with the Call/Invoke, we can delete it. - TheCall->eraseFromParent(); + CB.eraseFromParent(); // If we inlined any musttail calls and the original return is now // unreachable, delete it. It can only contain a bitcast and ret.