diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h
--- a/llvm/include/llvm/Transforms/IPO/Inliner.h
+++ b/llvm/include/llvm/Transforms/IPO/Inliner.h
@@ -13,7 +13,6 @@
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
 #include <utility>
@@ -51,15 +50,7 @@
   /// This method must be implemented by the subclass to determine the cost of
   /// inlining the specified call site.  If the cost returned is greater than
   /// the current inline threshold, the call site is not inlined.
-  // FIXME(mtrofin): remove this in favor of the CallBase-based one
-  virtual InlineCost getInlineCost(CallSite CS) = 0;
-
-  /// This method must be implemented by the subclass to determine the cost of
-  /// inlining the specified call site.  If the cost returned is greater than
-  /// the current inline threshold, the call site is not inlined.
-  virtual InlineCost getInlineCost(CallBase &CB) {
-    return getInlineCost(CallSite(&CB));
-  }
+  virtual InlineCost getInlineCost(CallBase &CB) = 0;
 
   /// Remove dead functions.
   ///
diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -228,10 +228,7 @@
 /// and all varargs at the callsite will be passed to any calls to
 /// ForwardVarArgsTo. The caller of InlineFunction has to make sure any varargs
 /// are only used by ForwardVarArgsTo.
-InlineResult InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
-                            AAResults *CalleeAAR = nullptr,
-                            bool InsertLifetime = true);
-InlineResult InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
+InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
                             AAResults *CalleeAAR = nullptr,
                             bool InsertLifetime = true,
                             Function *ForwardVarArgsTo = nullptr);
diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp
--- a/llvm/lib/CodeGen/SafeStack.cpp
+++ b/llvm/lib/CodeGen/SafeStack.cpp
@@ -33,7 +33,6 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
@@ -200,7 +199,7 @@
   bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr,
                     uint64_t AllocaSize);
 
-  bool ShouldInlinePointerAddress(CallSite &CS);
+  bool ShouldInlinePointerAddress(CallInst &CI);
   void TryInlinePointerAddress();
 
 public:
@@ -322,7 +321,7 @@
 
       case Instruction::Call:
       case Instruction::Invoke: {
-        ImmutableCallSite CS(I);
+        const CallBase &CS = *cast<CallBase>(I);
 
         if (I->isLifetimeStartOrEnd())
           continue;
@@ -344,8 +343,8 @@
         // FIXME: a more precise solution would require an interprocedural
         // analysis here, which would look at all uses of an argument inside
         // the function being called.
-        ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
-        for (ImmutableCallSite::arg_iterator A = B; A != E; ++A)
+        auto B = CS.arg_begin(), E = CS.arg_end();
+        for (auto A = B; A != E; ++A)
           if (A->get() == V)
             if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) ||
                                                CS.doesNotAccessMemory()))) {
@@ -705,34 +704,34 @@
   }
 }
 
-bool SafeStack::ShouldInlinePointerAddress(CallSite &CS) {
-  Function *Callee = CS.getCalledFunction();
-  if (CS.hasFnAttr(Attribute::AlwaysInline) &&
+bool SafeStack::ShouldInlinePointerAddress(CallInst &CI) {
+  Function *Callee = CI.getCalledFunction();
+  if (CI.hasFnAttr(Attribute::AlwaysInline) &&
       isInlineViable(*Callee).isSuccess())
     return true;
   if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
-      CS.isNoInline())
+      CI.isNoInline())
     return false;
   return true;
 }
 
 void SafeStack::TryInlinePointerAddress() {
-  if (!isa<CallInst>(UnsafeStackPtr))
+  auto *CI = dyn_cast<CallInst>(UnsafeStackPtr);
+  if (!CI)
     return;
 
   if(F.hasOptNone())
     return;
 
-  CallSite CS(UnsafeStackPtr);
-  Function *Callee = CS.getCalledFunction();
+  Function *Callee = CI->getCalledFunction();
   if (!Callee || Callee->isDeclaration())
     return;
 
-  if (!ShouldInlinePointerAddress(CS))
+  if (!ShouldInlinePointerAddress(*CI))
     return;
 
   InlineFunctionInfo IFI;
-  InlineFunction(CS, IFI);
+  InlineFunction(*CI, IFI);
 }
 
 bool SafeStack::run() {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -23,7 +23,6 @@
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
@@ -67,9 +66,9 @@
 
   static char ID; // Pass identification, replacement for typeid
 
-  unsigned getInlineThreshold(CallSite CS) const;
+  unsigned getInlineThreshold(CallBase &CB) const;
 
-  InlineCost getInlineCost(CallSite CS) override;
+  InlineCost getInlineCost(CallBase &CB) override;
 
   bool runOnSCC(CallGraphSCC &SCC) override;
 
@@ -106,13 +105,13 @@
   LegacyInlinerBase::getAnalysisUsage(AU);
 }
 
-unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
+unsigned AMDGPUInliner::getInlineThreshold(CallBase &CB) const {
   int Thres = Params.DefaultThreshold;
 
-  Function *Caller = CS.getCaller();
+  Function *Caller = CB.getCaller();
   // Listen to the inlinehint attribute when it would increase the threshold
   // and the caller does not need to minimize its size.
-  Function *Callee = CS.getCalledFunction();
+  Function *Callee = CB.getCalledFunction();
   bool InlineHint = Callee && !Callee->isDeclaration() &&
     Callee->hasFnAttribute(Attribute::InlineHint);
   if (InlineHint && Params.HintThreshold && Params.HintThreshold > Thres
@@ -129,7 +128,7 @@
   // Increase the inline threshold to allow inliniting in this case.
   uint64_t AllocaSize = 0;
   SmallPtrSet<const AllocaInst *, 8> AIVisited;
-  for (Value *PtrArg : CS.args()) {
+  for (Value *PtrArg : CB.args()) {
     PointerType *Ty = dyn_cast<PointerType>(PtrArg->getType());
     if (!Ty || (Ty->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS &&
                 Ty->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS))
@@ -156,8 +155,8 @@
 
 // Check if call is just a wrapper around another call.
 // In this case we only have call and ret instructions.
-static bool isWrapperOnlyCall(CallSite CS) {
-  Function *Callee = CS.getCalledFunction();
+static bool isWrapperOnlyCall(CallBase &CB) {
+  Function *Callee = CB.getCalledFunction();
   if (!Callee || Callee->size() != 1)
     return false;
   const BasicBlock &BB = Callee->getEntryBlock();
@@ -174,32 +173,32 @@
   return false;
 }
 
-InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
-  Function *Callee = CS.getCalledFunction();
-  Function *Caller = CS.getCaller();
+InlineCost AMDGPUInliner::getInlineCost(CallBase &CB) {
+  Function *Callee = CB.getCalledFunction();
+  Function *Caller = CB.getCaller();
 
   if (!Callee || Callee->isDeclaration())
     return llvm::InlineCost::getNever("undefined callee");
 
-  if (CS.isNoInline())
+  if (CB.isNoInline())
     return llvm::InlineCost::getNever("noinline");
 
   TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
   if (!TTI.areInlineCompatible(Caller, Callee))
     return llvm::InlineCost::getNever("incompatible");
 
-  if (CS.hasFnAttr(Attribute::AlwaysInline)) {
+  if (CB.hasFnAttr(Attribute::AlwaysInline)) {
     auto IsViable = isInlineViable(*Callee);
     if (IsViable.isSuccess())
       return llvm::InlineCost::getAlways("alwaysinline viable");
     return llvm::InlineCost::getNever(IsViable.getFailureReason());
   }
 
-  if (isWrapperOnlyCall(CS))
+  if (isWrapperOnlyCall(CB))
     return llvm::InlineCost::getAlways("wrapper-only call");
 
   InlineParams LocalParams = Params;
-  LocalParams.DefaultThreshold = (int)getInlineThreshold(CS);
+  LocalParams.DefaultThreshold = (int)getInlineThreshold(CB);
   bool RemarksEnabled = false;
   const auto &BBs = Caller->getBasicBlockList();
   if (!BBs.empty()) {
@@ -214,9 +213,9 @@
     return ACT->getAssumptionCache(F);
   };
 
-  auto IC = llvm::getInlineCost(cast<CallBase>(*CS.getInstruction()), Callee,
-                                LocalParams, TTI, GetAssumptionCache, None,
-                                GetTLI, PSI, RemarksEnabled ? &ORE : nullptr);
+  auto IC =
+      llvm::getInlineCost(CB, Callee, LocalParams, TTI, GetAssumptionCache,
+                          None, GetTLI, PSI, RemarksEnabled ? &ORE : nullptr);
 
   if (IC && !IC.isAlways() && !Callee->hasFnAttribute(Attribute::InlineHint)) {
     // Single BB does not increase total BB amount, thus subtract 1
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instructions.h"
@@ -43,7 +42,7 @@
   };
   InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache);
 
-  SmallSetVector<CallSite, 16> Calls;
+  SmallSetVector<CallBase *, 16> Calls;
   bool Changed = false;
   SmallVector<Function *, 16> InlinedFunctions;
   for (Function &F : M)
@@ -52,15 +51,15 @@
       Calls.clear();
 
       for (User *U : F.users())
-        if (auto CS = CallSite(U))
-          if (CS.getCalledFunction() == &F)
-            Calls.insert(CS);
+        if (auto *CB = dyn_cast<CallBase>(U))
+          if (CB->getCalledFunction() == &F)
+            Calls.insert(CB);
 
-      for (CallSite CS : Calls)
+      for (CallBase *CB : Calls)
         // FIXME: We really shouldn't be able to fail to inline at this point!
         // We should do something to log or check the inline failures here.
         Changed |=
-            InlineFunction(CS, IFI, /*CalleeAAR=*/nullptr, InsertLifetime)
+            InlineFunction(*CB, IFI, /*CalleeAAR=*/nullptr, InsertLifetime)
                 .isSuccess();
 
       // Remember to try and delete this function afterward. This both avoids
@@ -117,7 +116,7 @@
 
   static char ID; // Pass identification, replacement for typeid
 
-  InlineCost getInlineCost(CallSite CS) override;
+  InlineCost getInlineCost(CallBase &CB) override;
 
   using llvm::Pass::doFinalization;
   bool doFinalization(CallGraph &CG) override {
@@ -152,8 +151,8 @@
 /// computed here, but as we only expect to do this for relatively few and
 /// small functions which have the explicit attribute to force inlining, it is
 /// likely not worth it in practice.
-InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallSite CS) {
-  Function *Callee = CS.getCalledFunction();
+InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallBase &CB) {
+  Function *Callee = CB.getCalledFunction();
 
   // Only inline direct calls to functions with always-inline attributes
   // that are viable for inlining.
@@ -164,7 +163,7 @@
   if (Callee->isDeclaration())
     return InlineCost::getNever("no definition");
 
-  if (!CS.hasFnAttr(Attribute::AlwaysInline))
+  if (!CB.hasFnAttr(Attribute::AlwaysInline))
     return InlineCost::getNever("no alwaysinline attribute");
 
   auto IsViable = isInlineViable(*Callee);
diff --git a/llvm/lib/Transforms/IPO/InlineSimple.cpp b/llvm/lib/Transforms/IPO/InlineSimple.cpp
--- a/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -15,7 +15,6 @@
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instructions.h"
@@ -52,26 +51,26 @@
 
   static char ID; // Pass identification, replacement for typeid
 
-  InlineCost getInlineCost(CallSite CS) override {
-    Function *Callee = CS.getCalledFunction();
+  InlineCost getInlineCost(CallBase &CB) override {
+    Function *Callee = CB.getCalledFunction();
     TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
 
     bool RemarksEnabled = false;
-    const auto &BBs = CS.getCaller()->getBasicBlockList();
+    const auto &BBs = CB.getCaller()->getBasicBlockList();
     if (!BBs.empty()) {
       auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBs.front());
       if (DI.isEnabled())
         RemarksEnabled = true;
     }
-    OptimizationRemarkEmitter ORE(CS.getCaller());
+    OptimizationRemarkEmitter ORE(CB.getCaller());
 
     std::function<AssumptionCache &(Function &)> GetAssumptionCache =
         [&](Function &F) -> AssumptionCache & {
       return ACT->getAssumptionCache(F);
     };
-    return llvm::getInlineCost(
-        cast<CallBase>(*CS.getInstruction()), Params, TTI, GetAssumptionCache,
-        /*GetBFI=*/None, GetTLI, PSI, RemarksEnabled ? &ORE : nullptr);
+    return llvm::getInlineCost(CB, Params, TTI, GetAssumptionCache,
+                               /*GetBFI=*/None, GetTLI, PSI,
+                               RemarksEnabled ? &ORE : nullptr);
   }
 
   bool runOnSCC(CallGraphSCC &SCC) override;
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -283,7 +283,7 @@
 
   // Try to inline the function.  Get the list of static allocas that were
   // inlined.
-  InlineResult IR = InlineFunction(&CS, IFI, &AAR, InsertLifetime);
+  InlineResult IR = InlineFunction(CS, IFI, &AAR, InsertLifetime);
   if (!IR.isSuccess())
     return IR;
 
@@ -1087,7 +1087,7 @@
 
       using namespace ore;
 
-      InlineResult IR = InlineFunction(CS, IFI);
+      InlineResult IR = InlineFunction(*CS, IFI);
       if (!IR.isSuccess()) {
         setInlineRemark(*CS, std::string(IR.getFailureReason()) + "; " +
                                  inlineCostStr(*OIC));
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -30,7 +30,6 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
@@ -284,9 +283,9 @@
   // edges from the guarding entry blocks).
   BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
 
-  // Return true if the callee of CS should be partially inlined with
+  // Return true if the callee of CB should be partially inlined with
   // profit.
-  bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
+  bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
                            BlockFrequency WeightedOutliningRcost,
                            OptimizationRemarkEmitter &ORE);
 
@@ -305,26 +304,22 @@
             NumPartialInlining >= MaxNumPartialInlining);
   }
 
-  static CallSite getCallSite(User *U) {
-    CallSite CS;
-    if (CallInst *CI = dyn_cast<CallInst>(U))
-      CS = CallSite(CI);
-    else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
-      CS = CallSite(II);
-    else
-      llvm_unreachable("All uses must be calls");
-    return CS;
+  static CallBase *getSupportedCallBase(User *U) {
+    if (isa<CallInst>(U) || isa<InvokeInst>(U))
+      return cast<CallBase>(U);
+    llvm_unreachable("All uses must be calls");
+    return nullptr;
   }
 
-  static CallSite getOneCallSiteTo(Function *F) {
+  static CallBase *getOneCallSiteTo(Function *F) {
     User *User = *F->user_begin();
-    return getCallSite(User);
+    return getSupportedCallBase(User);
   }
 
   std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
-    CallSite CS = getOneCallSiteTo(F);
-    DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
-    BasicBlock *Block = CS.getParent();
+    CallBase *CB = getOneCallSiteTo(F);
+    DebugLoc DLoc = CB->getDebugLoc();
+    BasicBlock *Block = CB->getParent();
     return std::make_tuple(DLoc, Block);
   }
 
@@ -767,31 +762,28 @@
 }
 
 bool PartialInlinerImpl::shouldPartialInline(
-    CallSite CS, FunctionCloner &Cloner,
-    BlockFrequency WeightedOutliningRcost,
+    CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
     OptimizationRemarkEmitter &ORE) {
   using namespace ore;
 
-  Instruction *Call = CS.getInstruction();
-  Function *Callee = CS.getCalledFunction();
+  Function *Callee = CB.getCalledFunction();
   assert(Callee == Cloner.ClonedFunc);
 
   if (SkipCostAnalysis)
     return isInlineViable(*Callee).isSuccess();
 
-  Function *Caller = CS.getCaller();
+  Function *Caller = CB.getCaller();
   auto &CalleeTTI = (*GetTTI)(*Callee);
   bool RemarksEnabled =
       Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
           DEBUG_TYPE);
-  assert(Call && "invalid callsite for partial inline");
-  InlineCost IC = getInlineCost(cast<CallBase>(*Call), getInlineParams(),
-                                CalleeTTI, *GetAssumptionCache, GetBFI, *GetTLI,
-                                PSI, RemarksEnabled ? &ORE : nullptr);
+  InlineCost IC =
+      getInlineCost(CB, getInlineParams(), CalleeTTI, *GetAssumptionCache,
+                    GetBFI, *GetTLI, PSI, RemarksEnabled ? &ORE : nullptr);
 
   if (IC.isAlways()) {
     ORE.emit([&]() {
-      return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
+      return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
              << NV("Callee", Cloner.OrigFunc)
              << " should always be fully inlined, not partially";
     });
@@ -800,7 +792,7 @@
 
   if (IC.isNever()) {
     ORE.emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
+      return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
              << NV("Caller", Caller)
              << " because it should never be inlined (cost=never)";
@@ -810,7 +802,7 @@
 
   if (!IC) {
     ORE.emit([&]() {
-      return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
+      return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
              << NV("Caller", Caller) << " because too costly to inline (cost="
              << NV("Cost", IC.getCost()) << ", threshold="
@@ -821,14 +813,14 @@
   const DataLayout &DL = Caller->getParent()->getDataLayout();
 
   // The savings of eliminating the call:
-  int NonWeightedSavings = getCallsiteCost(cast<CallBase>(*Call), DL);
+  int NonWeightedSavings = getCallsiteCost(CB, DL);
   BlockFrequency NormWeightedSavings(NonWeightedSavings);
 
   // Weighted saving is smaller than weighted cost, return false
   if (NormWeightedSavings < WeightedOutliningRcost) {
     ORE.emit([&]() {
       return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
-                                        Call)
+                                        &CB)
              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
              << NV("Caller", Caller) << " runtime overhead (overhead="
              << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
@@ -842,7 +834,7 @@
   }
 
   ORE.emit([&]() {
-    return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
+    return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
            << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
            << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
            << " (threshold="
@@ -954,15 +946,15 @@
   };
 
   for (User *User : Users) {
-    CallSite CS = getCallSite(User);
-    Function *Caller = CS.getCaller();
+    CallBase *CB = getSupportedCallBase(User);
+    Function *Caller = CB->getCaller();
     if (CurrentCaller != Caller) {
       CurrentCaller = Caller;
       ComputeCurrBFI(Caller);
     } else {
       assert(CurrentCallerBFI && "CallerBFI is not set");
     }
-    BasicBlock *CallBB = CS.getInstruction()->getParent();
+    BasicBlock *CallBB = CB->getParent();
     auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
     if (Count)
       CallSiteToProfCountMap[User] = *Count;
@@ -1163,8 +1155,8 @@
     Function *OutlinedFunc = CE.extractCodeRegion(CEAC);
 
     if (OutlinedFunc) {
-      CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
-      BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent();
+      CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
+      BasicBlock *OutliningCallBB = OCS->getParent();
       assert(OutliningCallBB->getParent() == ClonedFunc);
       OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
       NumColdRegionsOutlined++;
@@ -1172,7 +1164,7 @@
 
       if (MarkOutlinedColdCC) {
         OutlinedFunc->setCallingConv(CallingConv::Cold);
-        OCS.setCallingConv(CallingConv::Cold);
+        OCS->setCallingConv(CallingConv::Cold);
       }
     } else
       ORE.emit([&]() {
@@ -1232,7 +1224,6 @@
   if (OutlinedFunc) {
     BasicBlock *OutliningCallBB =
         PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
-            .getInstruction()
             ->getParent();
     assert(OutliningCallBB->getParent() == ClonedFunc);
     OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
@@ -1399,25 +1390,25 @@
 
   bool AnyInline = false;
   for (User *User : Users) {
-    CallSite CS = getCallSite(User);
+    CallBase *CB = getSupportedCallBase(User);
 
     if (IsLimitReached())
       continue;
 
-    OptimizationRemarkEmitter CallerORE(CS.getCaller());
-    if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE))
+    OptimizationRemarkEmitter CallerORE(CB->getCaller());
+    if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
       continue;
 
     // Construct remark before doing the inlining, as after successful inlining
     // the callsite is removed.
-    OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction());
+    OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
     OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
-       << ore::NV("Caller", CS.getCaller());
+       << ore::NV("Caller", CB->getCaller());
 
     InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
     // We can only forward varargs when we outlined a single region, else we
     // bail on vararg functions.
-    if (!InlineFunction(CS, IFI, nullptr, true,
+    if (!InlineFunction(*CB, IFI, nullptr, true,
                         (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
                                          : nullptr))
              .isSuccess())
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -893,9 +893,11 @@
   return it.first->second;
 }
 
+// FIXME(CallSite): Parameter should be CallBase&, as it's assumed to be that,
+// and non-null.
 bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
   assert(isa<CallInst>(I) || isa<InvokeInst>(I));
-  CallSite CS(I);
+  CallBase &CS = *cast<CallBase>(I);
   Function *CalledFunction = CS.getCalledFunction();
   assert(CalledFunction);
   DebugLoc DLoc = I->getDebugLoc();
diff --git a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
--- a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -650,7 +650,7 @@
 
   // Do the actual inlining
   InlineFunctionInfo IFI;
-  bool InlineStatus = InlineFunction(PollCall, IFI).isSuccess();
+  bool InlineStatus = InlineFunction(*PollCall, IFI).isSuccess();
   assert(InlineStatus && "inline must succeed");
   (void)InlineStatus; // suppress warning in release-asserts
 
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -98,12 +98,6 @@
              "attribute inference in inlined body"),
     cl::init(4));
 
-llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
-                                        AAResults *CalleeAAR,
-                                        bool InsertLifetime) {
-  return InlineFunction(CallSite(CB), IFI, CalleeAAR, InsertLifetime);
-}
-
 namespace {
 
   /// A class for recording information about inlining a landing pad.
@@ -781,12 +775,10 @@
 /// When inlining a call site that has !llvm.mem.parallel_loop_access or
 /// llvm.access.group metadata, that metadata should be propagated to all
 /// memory-accessing cloned instructions.
-static void PropagateParallelLoopAccessMetadata(CallSite CS,
+static void PropagateParallelLoopAccessMetadata(CallBase &CB,
                                                 ValueToValueMapTy &VMap) {
-  MDNode *M =
-    CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
-  MDNode *CallAccessGroup =
-      CS.getInstruction()->getMetadata(LLVMContext::MD_access_group);
+  MDNode *M = CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access);
+  MDNode *CallAccessGroup = CB.getMetadata(LLVMContext::MD_access_group);
   if (!M && !CallAccessGroup)
     return;
 
@@ -824,8 +816,8 @@
 /// not be differentiated (and this would lead to miscompiles because the
 /// non-aliasing property communicated by the metadata could have
 /// call-site-specific control dependencies).
-static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
-  const Function *CalledFunc = CS.getCalledFunction();
+static void CloneAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap) {
+  const Function *CalledFunc = CB.getCalledFunction();
   SetVector<const MDNode *> MD;
 
   // Note: We could only clone the metadata if it is already used in the
@@ -900,13 +892,11 @@
       // If the call site also had alias scope metadata (a list of scopes to
       // which instructions inside it might belong), propagate those scopes to
       // the inlined instructions.
-      if (MDNode *CSM =
-              CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope))
+      if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_alias_scope))
         NewMD = MDNode::concatenate(NewMD, CSM);
       NI->setMetadata(LLVMContext::MD_alias_scope, NewMD);
     } else if (NI->mayReadOrWriteMemory()) {
-      if (MDNode *M =
-              CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope))
+      if (MDNode *M = CB.getMetadata(LLVMContext::MD_alias_scope))
         NI->setMetadata(LLVMContext::MD_alias_scope, M);
     }
 
@@ -915,12 +905,11 @@
       // If the call site also had noalias metadata (a list of scopes with
       // which instructions inside it don't alias), propagate those scopes to
       // the inlined instructions.
-      if (MDNode *CSM =
-              CS.getInstruction()->getMetadata(LLVMContext::MD_noalias))
+      if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_noalias))
         NewMD = MDNode::concatenate(NewMD, CSM);
       NI->setMetadata(LLVMContext::MD_noalias, NewMD);
     } else if (NI->mayReadOrWriteMemory()) {
-      if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias))
+      if (MDNode *M = CB.getMetadata(LLVMContext::MD_noalias))
         NI->setMetadata(LLVMContext::MD_noalias, M);
     }
   }
@@ -930,16 +919,16 @@
 /// then add new alias scopes for each noalias argument, tag the mapped noalias
 /// parameters with noalias metadata specifying the new scope, and tag all
 /// non-derived loads, stores and memory intrinsics with the new alias scopes.
-static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
+static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
                                   const DataLayout &DL, AAResults *CalleeAAR) {
   if (!EnableNoAliasConversion)
     return;
 
-  const Function *CalledFunc = CS.getCalledFunction();
+  const Function *CalledFunc = CB.getCalledFunction();
   SmallVector<const Argument *, 4> NoAliasArgs;
 
   for (const Argument &Arg : CalledFunc->args())
-    if (CS.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty())
+    if (CB.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty())
       NoAliasArgs.push_back(&Arg);
 
   if (NoAliasArgs.empty())
@@ -1072,7 +1061,7 @@
         // completely describe the aliasing properties using alias.scope
         // metadata (and, thus, won't add any).
         if (const Argument *A = dyn_cast<Argument>(V)) {
-          if (!CS.paramHasAttr(A->getArgNo(), Attribute::NoAlias))
+          if (!CB.paramHasAttr(A->getArgNo(), Attribute::NoAlias))
             UsesAliasingPtr = true;
         } else {
           UsesAliasingPtr = true;
@@ -1164,9 +1153,9 @@
   return false;
 }
 
-static AttrBuilder IdentifyValidAttributes(CallSite CS) {
+static AttrBuilder IdentifyValidAttributes(CallBase &CB) {
 
-  AttrBuilder AB(CS.getAttributes(), AttributeList::ReturnIndex);
+  AttrBuilder AB(CB.getAttributes(), AttributeList::ReturnIndex);
   if (AB.empty())
     return AB;
   AttrBuilder Valid;
@@ -1184,14 +1173,14 @@
   return Valid;
 }
 
-static void AddReturnAttributes(CallSite CS, ValueToValueMapTy &VMap) {
+static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
   if (!UpdateReturnAttributes && !UpdateLoadMetadataDuringInlining)
     return;
 
-  AttrBuilder Valid = IdentifyValidAttributes(CS);
+  AttrBuilder Valid = IdentifyValidAttributes(CB);
   if (Valid.empty())
     return;
-  auto *CalledFunction = CS.getCalledFunction();
+  auto *CalledFunction = CB.getCalledFunction();
   auto &Context = CalledFunction->getContext();
 
   auto getExpectedRV = [&](Value *V) -> Instruction * {
@@ -1247,14 +1236,14 @@
     // with a differing value, the AttributeList's merge API honours the already
     // existing attribute value (i.e. attributes such as dereferenceable,
     // dereferenceable_or_null etc). See AttrBuilder::merge for more details.
-    if (auto *CB = dyn_cast<CallBase>(NewRetVal)) {
-      AttributeList AL = CB->getAttributes();
+    if (auto *NewRetValCB = dyn_cast<CallBase>(NewRetVal)) {
+      AttributeList AL = NewRetValCB->getAttributes();
       AttributeList NewAL =
           AL.addAttributes(Context, AttributeList::ReturnIndex, Valid);
-      CB->setAttributes(NewAL);
+      NewRetValCB->setAttributes(NewAL);
     } else {
       auto *NewLI = cast<LoadInst>(NewRetVal);
-      if (CS.isReturnNonNull())
+      if (CB.isReturnNonNull())
         NewLI->setMetadata(LLVMContext::MD_nonnull, CreateMDNode(1));
       // If the load already has a dereferenceable/dereferenceable_or_null
       // metadata, we should honour it.
@@ -1267,41 +1256,40 @@
          NewLI->setMetadata(LLVMContext::MD_dereferenceable_or_null,
                             CreateMDNode(DerefOrNullBytes));
     }
-
   }
 }
 
 /// If the inlined function has non-byval align arguments, then
 /// add @llvm.assume-based alignment assumptions to preserve this information.
-static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
+static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
   if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
     return;
 
-  AssumptionCache *AC = &(*IFI.GetAssumptionCache)(*CS.getCaller());
-  auto &DL = CS.getCaller()->getParent()->getDataLayout();
+  AssumptionCache *AC = &(*IFI.GetAssumptionCache)(*CB.getCaller());
+  auto &DL = CB.getCaller()->getParent()->getDataLayout();
 
   // To avoid inserting redundant assumptions, we should check for assumptions
   // already in the caller. To do this, we might need a DT of the caller.
   DominatorTree DT;
   bool DTCalculated = false;
 
-  Function *CalledFunc = CS.getCalledFunction();
+  Function *CalledFunc = CB.getCalledFunction();
   for (Argument &Arg : CalledFunc->args()) {
     unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0;
     if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) {
       if (!DTCalculated) {
-        DT.recalculate(*CS.getCaller());
+        DT.recalculate(*CB.getCaller());
         DTCalculated = true;
       }
 
       // If we can already prove the asserted alignment in the context of the
       // caller, then don't bother inserting the assumption.
-      Value *ArgVal = CS.getArgument(Arg.getArgNo());
-      if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align)
+      Value *ArgVal = CB.getArgOperand(Arg.getArgNo());
+      if (getKnownAlignment(ArgVal, DL, &CB, AC, &DT) >= Align)
         continue;
 
-      CallInst *NewAsmp = IRBuilder<>(CS.getInstruction())
-                              .CreateAlignmentAssumption(DL, ArgVal, Align);
+      CallInst *NewAsmp =
+          IRBuilder<>(&CB).CreateAlignmentAssumption(DL, ArgVal, Align);
       AC->registerAssumption(NewAsmp);
     }
   }
@@ -1311,13 +1299,13 @@
 /// update the specified callgraph to reflect the changes we made.
 /// Note that it's possible that not all code was copied over, so only
 /// some edges of the callgraph may remain.
-static void UpdateCallGraphAfterInlining(CallSite CS,
+static void UpdateCallGraphAfterInlining(CallBase &CB,
                                          Function::iterator FirstNewBlock,
                                          ValueToValueMapTy &VMap,
                                          InlineFunctionInfo &IFI) {
   CallGraph &CG = *IFI.CG;
-  const Function *Caller = CS.getCaller();
-  const Function *Callee = CS.getCalledFunction();
+  const Function *Caller = CB.getCaller();
+  const Function *Callee = CB.getCalledFunction();
   CallGraphNode *CalleeNode = CG[Callee];
   CallGraphNode *CallerNode = CG[Caller];
 
@@ -1375,7 +1363,7 @@
 
   // Update the call graph by deleting the edge from Callee to Caller.  We must
   // do this after the loop above in case Caller and Callee are the same.
-  CallerNode->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
+  CallerNode->removeCallEdgeFor(*cast<CallBase>(&CB));
 }
 
 static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
@@ -1664,31 +1652,29 @@
 /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
 /// exists in the instruction stream.  Similarly this will inline a recursive
 /// function by one level.
-llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
+llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
                                         AAResults *CalleeAAR,
                                         bool InsertLifetime,
                                         Function *ForwardVarArgsTo) {
-  Instruction *TheCall = CS.getInstruction();
-  assert(TheCall->getParent() && TheCall->getFunction()
-         && "Instruction not in function!");
+  assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
 
   // FIXME: we don't inline callbr yet.
-  if (isa<CallBrInst>(TheCall))
+  if (isa<CallBrInst>(CB))
     return InlineResult::failure("We don't inline callbr yet.");
 
   // If IFI has any state in it, zap it before we fill it in.
   IFI.reset();
 
-  Function *CalledFunc = CS.getCalledFunction();
+  Function *CalledFunc = CB.getCalledFunction();
   if (!CalledFunc ||               // Can't inline external function or indirect
       CalledFunc->isDeclaration()) // call!
     return InlineResult::failure("external or indirect");
 
   // The inliner does not know how to inline through calls with operand bundles
   // in general ...
-  if (CS.hasOperandBundles()) {
-    for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
-      uint32_t Tag = CS.getOperandBundleAt(i).getTagID();
+  if (CB.hasOperandBundles()) {
+    for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {
+      uint32_t Tag = CB.getOperandBundleAt(i).getTagID();
       // ... but it knows how to inline through "deopt" operand bundles ...
       if (Tag == LLVMContext::OB_deopt)
         continue;
@@ -1702,9 +1688,9 @@
 
   // If the call to the callee cannot throw, set the 'nounwind' flag on any
   // calls that we inline.
-  bool MarkNoUnwind = CS.doesNotThrow();
+  bool MarkNoUnwind = CB.doesNotThrow();
 
-  BasicBlock *OrigBB = TheCall->getParent();
+  BasicBlock *OrigBB = CB.getParent();
   Function *Caller = OrigBB->getParent();
 
   // GC poses two hazards to inlining, which only occur when the callee has GC:
@@ -1749,7 +1735,7 @@
     EHPersonality Personality = classifyEHPersonality(CallerPersonality);
     if (isScopedEHPersonality(Personality)) {
       Optional<OperandBundleUse> ParentFunclet =
-          CS.getOperandBundle(LLVMContext::OB_funclet);
+          CB.getOperandBundle(LLVMContext::OB_funclet);
       if (ParentFunclet)
         CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front());
 
@@ -1782,7 +1768,7 @@
   // Determine if we are dealing with a call in an EHPad which does not unwind
   // to caller.
   bool EHPadForCallUnwindsLocally = false;
-  if (CallSiteEHPad && CS.isCall()) {
+  if (CallSiteEHPad && isa<CallInst>(CB)) {
     UnwindDestMemoTy FuncletUnwindMap;
     Value *CallSiteUnwindDestToken =
         getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap);
@@ -1811,7 +1797,7 @@
 
     // Calculate the vector of arguments to pass into the function cloner, which
     // matches up the formal to the actual argument values.
-    CallSite::arg_iterator AI = CS.arg_begin();
+    auto AI = CB.arg_begin();
     unsigned ArgNo = 0;
     for (Function::arg_iterator I = CalledFunc->arg_begin(),
          E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
@@ -1821,8 +1807,8 @@
       // by them explicit.  However, we don't do this if the callee is readonly
       // or readnone, because the copy would be unneeded: the callee doesn't
       // modify the struct.
-      if (CS.isByValArgument(ArgNo)) {
-        ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+      if (CB.isByValArgument(ArgNo)) {
+        ActualArg = HandleByValArgument(ActualArg, &CB, CalledFunc, IFI,
                                         CalledFunc->getParamAlignment(ArgNo));
         if (ActualArg != *AI)
           ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
@@ -1835,13 +1821,13 @@
     // Add alignment assumptions if necessary. We do this before the inlined
     // instructions are actually cloned into the caller so that we can easily
     // check what will be known at the start of the inlined code.
-    AddAlignmentAssumptions(CS, IFI);
+    AddAlignmentAssumptions(CB, IFI);
 
     AssumptionCache *AC =
         IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
 
     /// Preserve all attributes on of the call and its parameters.
-    salvageKnowledge(CS.getInstruction(), AC);
+    salvageKnowledge(&CB, AC);
 
     // We want the inliner to prune the code as it copies.  We would LOVE to
     // have no dead or constant instructions leftover after inlining occurs
@@ -1849,7 +1835,7 @@
     // happy with whatever the cloner can do.
     CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
                               /*ModuleLevelChanges=*/false, Returns, ".i",
-                              &InlinedFunctionInfo, TheCall);
+                              &InlinedFunctionInfo, &CB);
     // Remember the first block that is newly cloned over.
     FirstNewBlock = LastBlock; ++FirstNewBlock;
 
@@ -1858,7 +1844,7 @@
       updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
                       CalledFunc->front());
 
-    updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall,
+    updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), &CB,
                       IFI.PSI, IFI.CallerBFI);
 
     // Inject byval arguments initialization.
@@ -1867,21 +1853,22 @@
                               &*FirstNewBlock, IFI);
 
     Optional<OperandBundleUse> ParentDeopt =
-        CS.getOperandBundle(LLVMContext::OB_deopt);
+        CB.getOperandBundle(LLVMContext::OB_deopt);
     if (ParentDeopt) {
       SmallVector<OperandBundleDef, 2> OpDefs;
 
       for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
-        Instruction *I = dyn_cast_or_null<Instruction>(VH);
-        if (!I) continue;  // instruction was DCE'd or RAUW'ed to undef
+        CallBase *ICS = dyn_cast_or_null<CallBase>(VH);
+        if (!ICS)
+          continue; // instruction was DCE'd or RAUW'ed to undef
 
         OpDefs.clear();
 
-        CallSite ICS(I);
-        OpDefs.reserve(ICS.getNumOperandBundles());
+        OpDefs.reserve(ICS->getNumOperandBundles());
 
-        for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) {
-          auto ChildOB = ICS.getOperandBundleAt(i);
+        for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe;
+             ++COBi) {
+          auto ChildOB = ICS->getOperandBundleAt(COBi);
           if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
             // If the inlined call has other operand bundles, let them be
             OpDefs.emplace_back(ChildOB);
@@ -1906,44 +1893,44 @@
         }
 
         Instruction *NewI = nullptr;
-        if (isa<CallInst>(I))
-          NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I);
-        else if (isa<CallBrInst>(I))
-          NewI = CallBrInst::Create(cast<CallBrInst>(I), OpDefs, I);
+        if (isa<CallInst>(ICS))
+          NewI = CallInst::Create(cast<CallInst>(ICS), OpDefs, ICS);
+        else if (isa<CallBrInst>(ICS))
+          NewI = CallBrInst::Create(cast<CallBrInst>(ICS), OpDefs, ICS);
         else
-          NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I);
+          NewI = InvokeInst::Create(cast<InvokeInst>(ICS), OpDefs, ICS);
 
         // Note: the RAUW does the appropriate fixup in VMap, so we need to do
         // this even if the call returns void.
-        I->replaceAllUsesWith(NewI);
+        ICS->replaceAllUsesWith(NewI);
 
         VH = nullptr;
-        I->eraseFromParent();
+        ICS->eraseFromParent();
       }
     }
 
     // Update the callgraph if requested.
     if (IFI.CG)
-      UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+      UpdateCallGraphAfterInlining(CB, FirstNewBlock, VMap, IFI);
 
     // For 'nodebug' functions, the associated DISubprogram is always null.
     // Conservatively avoid propagating the callsite debug location to
     // instructions inlined from a function whose DISubprogram is not null.
-    fixupLineNumbers(Caller, FirstNewBlock, TheCall,
+    fixupLineNumbers(Caller, FirstNewBlock, &CB,
                      CalledFunc->getSubprogram() != nullptr);
 
     // Clone existing noalias metadata if necessary.
-    CloneAliasScopeMetadata(CS, VMap);
+    CloneAliasScopeMetadata(CB, VMap);
 
     // Add noalias metadata if necessary.
-    AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
+    AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR);
 
     // Clone return attributes on the callsite into the calls within the inlined
     // function which feed into its return value.
-    AddReturnAttributes(CS, VMap);
+    AddReturnAttributes(CB, VMap);
 
     // Propagate llvm.mem.parallel_loop_access if necessary.
-    PropagateParallelLoopAccessMetadata(CS, VMap);
+    PropagateParallelLoopAccessMetadata(CB, VMap);
 
     // Register any cloned assumptions.
     if (IFI.GetAssumptionCache)
@@ -2000,15 +1987,15 @@
   SmallVector<Value*,4> VarArgsToForward;
   SmallVector<AttributeSet, 4> VarArgsAttrs;
   for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
-       i < CS.getNumArgOperands(); i++) {
-    VarArgsToForward.push_back(CS.getArgOperand(i));
-    VarArgsAttrs.push_back(CS.getAttributes().getParamAttributes(i));
+       i < CB.getNumArgOperands(); i++) {
+    VarArgsToForward.push_back(CB.getArgOperand(i));
+    VarArgsAttrs.push_back(CB.getAttributes().getParamAttributes(i));
   }
 
   bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
   if (InlinedFunctionInfo.ContainsCalls) {
     CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
-    if (CallInst *CI = dyn_cast<CallInst>(TheCall))
+    if (CallInst *CI = dyn_cast<CallInst>(&CB))
       CallSiteTailKind = CI->getTailCallKind();
 
     // For inlining purposes, the "notail" marker is the same as no marker.
@@ -2170,7 +2157,7 @@
   // any call instructions into invoke instructions.  This is sensitive to which
   // funclet pads were top-level in the inlinee, so must be done before
   // rewriting the "parent pad" links.
-  if (auto *II = dyn_cast<InvokeInst>(TheCall)) {
+  if (auto *II = dyn_cast<InvokeInst>(&CB)) {
     BasicBlock *UnwindDest = II->getUnwindDest();
     Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
     if (isa<LandingPadInst>(FirstNonPHI)) {
@@ -2191,29 +2178,28 @@
       // Add bundle operands to any top-level call sites.
       SmallVector<OperandBundleDef, 1> OpBundles;
       for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) {
-        Instruction *I = &*BBI++;
-        CallSite CS(I);
-        if (!CS)
+        CallBase *I = dyn_cast<CallBase>(&*BBI++);
+        if (!I)
           continue;
 
         // Skip call sites which are nounwind intrinsics.
         auto *CalledFn =
-            dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
-        if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow())
+            dyn_cast<Function>(I->getCalledValue()->stripPointerCasts());
+        if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow())
           continue;
 
         // Skip call sites which already have a "funclet" bundle.
-        if (CS.getOperandBundle(LLVMContext::OB_funclet))
+        if (I->getOperandBundle(LLVMContext::OB_funclet))
           continue;
 
-        CS.getOperandBundlesAsDefs(OpBundles);
+        I->getOperandBundlesAsDefs(OpBundles);
         OpBundles.emplace_back("funclet", CallSiteEHPad);
 
         Instruction *NewInst;
-        if (CS.isCall())
-          NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I);
-        else if (CS.isCallBr())
-          NewInst = CallBrInst::Create(cast<CallBrInst>(I), OpBundles, I);
+        if (auto *CallI = dyn_cast<CallInst>(I))
+          NewInst = CallInst::Create(CallI, OpBundles, CallI);
+        else if (auto *CallBrI = dyn_cast<CallBrInst>(I))
+          NewInst = CallBrInst::Create(CallBrI, OpBundles, CallBrI);
         else
           NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I);
         NewInst->takeName(I);
@@ -2252,7 +2238,7 @@
     // caller (but terminate it instead).  If the caller's return type does not
     // match the callee's return type, we also need to change the return type of
     // the intrinsic.
-    if (Caller->getReturnType() == TheCall->getType()) {
+    if (Caller->getReturnType() == CB.getType()) {
       auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) {
         return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
       });
@@ -2311,7 +2297,7 @@
   if (InlinedMustTailCalls) {
     // Check if we need to bitcast the result of any musttail calls.
     Type *NewRetTy = Caller->getReturnType();
-    bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy;
+    bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy;
 
     // Handle the returns preceded by musttail calls separately.
     SmallVector<ReturnInst *, 8> NormalReturns;
@@ -2360,30 +2346,29 @@
   // the calling basic block.
   if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
     // Move all of the instructions right before the call.
-    OrigBB->getInstList().splice(TheCall->getIterator(),
-                                 FirstNewBlock->getInstList(),
+    OrigBB->getInstList().splice(CB.getIterator(), FirstNewBlock->getInstList(),
                                  FirstNewBlock->begin(), FirstNewBlock->end());
     // Remove the cloned basic block.
     Caller->getBasicBlockList().pop_back();
 
     // If the call site was an invoke instruction, add a branch to the normal
     // destination.
-    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
-      BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+    if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
+      BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), &CB);
       NewBr->setDebugLoc(Returns[0]->getDebugLoc());
     }
 
     // If the return instruction returned a value, replace uses of the call with
     // uses of the returned value.
-    if (!TheCall->use_empty()) {
+    if (!CB.use_empty()) {
       ReturnInst *R = Returns[0];
-      if (TheCall == R->getReturnValue())
-        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+      if (&CB == R->getReturnValue())
+        CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
       else
-        TheCall->replaceAllUsesWith(R->getReturnValue());
+        CB.replaceAllUsesWith(R->getReturnValue());
     }
     // Since we are now done with the Call/Invoke, we can delete it.
-    TheCall->eraseFromParent();
+    CB.eraseFromParent();
 
     // Since we are now done with the return instruction, delete it also.
     Returns[0]->eraseFromParent();
@@ -2400,10 +2385,10 @@
   // this is an invoke instruction or a call instruction.
   BasicBlock *AfterCallBB;
   BranchInst *CreatedBranchToNormalDest = nullptr;
-  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+  if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
 
     // Add an unconditional branch to make this look like the CallInst case...
-    CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
+    CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), &CB);
 
     // Split the basic block.  This guarantees that no PHI nodes will have to be
     // updated due to new incoming edges, and make the invoke case more
@@ -2412,11 +2397,11 @@
         OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(),
                                 CalledFunc->getName() + ".exit");
 
-  } else {  // It's a call
+  } else { // It's a call
     // If this is a call instruction, we need to split the basic block that
     // the call lives in.
     //
-    AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(),
+    AfterCallBB = OrigBB->splitBasicBlock(CB.getIterator(),
                                           CalledFunc->getName() + ".exit");
   }
 
@@ -2449,12 +2434,12 @@
   if (Returns.size() > 1) {
     // The PHI node should go at the front of the new basic block to merge all
     // possible incoming values.
-    if (!TheCall->use_empty()) {
-      PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
+    if (!CB.use_empty()) {
+      PHI = PHINode::Create(RTy, Returns.size(), CB.getName(),
                             &AfterCallBB->front());
       // Anything that used the result of the function call should now use the
       // PHI node as their operand.
-      TheCall->replaceAllUsesWith(PHI);
+      CB.replaceAllUsesWith(PHI);
     }
 
     // Loop over all of the return instructions adding entries to the PHI node
@@ -2486,11 +2471,11 @@
   } else if (!Returns.empty()) {
     // Otherwise, if there is exactly one return value, just replace anything
     // using the return value of the call with the computed value.
-    if (!TheCall->use_empty()) {
-      if (TheCall == Returns[0]->getReturnValue())
-        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+    if (!CB.use_empty()) {
+      if (&CB == Returns[0]->getReturnValue())
+        CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
       else
-        TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+        CB.replaceAllUsesWith(Returns[0]->getReturnValue());
     }
 
     // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
@@ -2508,14 +2493,14 @@
     // Delete the return instruction now and empty ReturnBB now.
     Returns[0]->eraseFromParent();
     ReturnBB->eraseFromParent();
-  } else if (!TheCall->use_empty()) {
+  } else if (!CB.use_empty()) {
     // No returns, but something is using the return value of the call.  Just
     // nuke the result.
-    TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+    CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
   }
 
   // Since we are now done with the Call/Invoke, we can delete it.
-  TheCall->eraseFromParent();
+  CB.eraseFromParent();
 
   // If we inlined any musttail calls and the original return is now
   // unreachable, delete it.  It can only contain a bitcast and ret.