diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1760,11 +1760,11 @@
     return isNoopAddrSpaceCast(SrcAS, DestAS);
   }
 
-  /// Return true if the pointer arguments to CI should be aligned by aligning
+  /// Return true if the pointer arguments to CB should be aligned by aligning
   /// the object whose address is being passed. If so then MinSize is set to the
   /// minimum size the object must be to be aligned and PrefAlign is set to the
   /// preferred alignment.
-  virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
+  virtual bool shouldAlignPointerArgs(CallBase * /*CB*/, unsigned & /*MinSize*/,
                                       unsigned & /*PrefAlign*/) const {
     return false;
   }
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -369,8 +369,8 @@
     bool optimizeInst(Instruction *I, bool &ModifiedDT);
     bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
                             Type *AccessTy, unsigned AddrSpace);
-    bool optimizeInlineAsmInst(CallInst *CS);
-    bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
+    bool optimizeInlineAsmInst(CallBase *CB);
+    bool optimizeCallBase(CallBase *CB, bool &ModifiedDT);
     bool optimizeExt(Instruction *&I);
     bool optimizeExtUses(Instruction *I);
     bool optimizeLoadExt(LoadInst *Load);
@@ -1883,35 +1883,36 @@
   return true;
 }
 
-bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
-  BasicBlock *BB = CI->getParent();
+bool CodeGenPrepare::optimizeCallBase(CallBase *CB, bool &ModifiedDT) {
+  BasicBlock *BB = CB->getParent();
 
   // Lower inline assembly if we can.
   // If we found an inline asm expession, and if the target knows how to
   // lower it to normal LLVM code, do so now.
-  if (isa<InlineAsm>(CI->getCalledValue())) {
-    if (TLI->ExpandInlineAsm(CI)) {
-      // Avoid invalidating the iterator.
-      CurInstIterator = BB->begin();
-      // Avoid processing instructions out of order, which could cause
-      // reuse before a value is defined.
-      SunkAddrs.clear();
-      return true;
+  if (auto *CI = dyn_cast<CallInst>(CB))
+    if (isa<InlineAsm>(CI->getCalledValue())) {
+      if (TLI->ExpandInlineAsm(CI)) {
+        // Avoid invalidating the iterator.
+        CurInstIterator = BB->begin();
+        // Avoid processing instructions out of order, which could cause
+        // reuse before a value is defined.
+        SunkAddrs.clear();
+        return true;
+      }
+      // Sink address computing for memory operands into the block.
+      if (optimizeInlineAsmInst(CI))
+        return true;
     }
-    // Sink address computing for memory operands into the block.
-    if (optimizeInlineAsmInst(CI))
-      return true;
-  }
 
   // Align the pointer arguments to this call if the target thinks it's a good
   // idea
   unsigned MinSize, PrefAlign;
-  if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
-    for (auto &Arg : CI->arg_operands()) {
+  if (TLI->shouldAlignPointerArgs(CB, MinSize, PrefAlign)) {
+    for (auto &Arg : CB->arg_operands()) {
       // We want to align both objects whose address is used directly and
       // objects whose address is used in casts and GEPs, though it only makes
-      // sense for GEPs if the offset is a multiple of the desired alignment and
-      // if size - offset meets the size threshold.
+      // sense for GEPs if the offset is a multiple of the desired alignment
+      // and if size - offset meets the size threshold.
       if (!Arg->getType()->isPointerTy())
         continue;
       APInt Offset(DL->getIndexSizeInBits(
@@ -1919,7 +1920,7 @@
                    0);
       Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
       uint64_t Offset2 = Offset.getLimitedValue();
-      if ((Offset2 & (PrefAlign-1)) != 0)
+      if ((Offset2 & (PrefAlign - 1)) != 0)
         continue;
       AllocaInst *AI;
       if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
@@ -1932,13 +1933,12 @@
       GlobalVariable *GV;
       if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
           GV->getPointerAlignment(*DL) < PrefAlign &&
-          DL->getTypeAllocSize(GV->getValueType()) >=
-              MinSize + Offset2)
+          DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
         GV->setAlignment(MaybeAlign(PrefAlign));
     }
     // If this is a memcpy (or similar) then we may be able to improve the
     // alignment
-    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
+    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CB)) {
       unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL);
       if (DestAlign > MI->getDestAlignment())
         MI->setDestAlignment(DestAlign);
@@ -1954,16 +1954,16 @@
   // cold block.  This interacts with our handling for loads and stores to
   // ensure that we can fold all uses of a potential addressing computation
   // into their uses.  TODO: generalize this to work over profiling data
-  if (CI->hasFnAttr(Attribute::Cold) &&
-      !OptSize && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
-    for (auto &Arg : CI->arg_operands()) {
+  if (CB->hasFnAttr(Attribute::Cold) && !OptSize &&
+      !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
+    for (auto &Arg : CB->arg_operands()) {
       if (!Arg->getType()->isPointerTy())
         continue;
       unsigned AS = Arg->getType()->getPointerAddressSpace();
-      return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
+      return optimizeMemoryInst(CB, Arg, Arg->getType(), AS);
     }
 
-  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
+  auto *II = dyn_cast<IntrinsicInst>(CB);
   if (II) {
     switch (II->getIntrinsicID()) {
     default: break;
@@ -1982,7 +1982,7 @@
       }
       Constant *RetVal = ConstantInt::getTrue(II->getContext());
       resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
-        replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
+        replaceAndRecursivelySimplify(CB, RetVal, TLInfo, nullptr);
       });
       return true;
     }
@@ -1992,12 +1992,12 @@
       llvm_unreachable("llvm.is.constant.* should have been lowered already");
     case Intrinsic::aarch64_stlxr:
     case Intrinsic::aarch64_stxr: {
-      ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
+      ZExtInst *ExtVal = dyn_cast<ZExtInst>(CB->getArgOperand(0));
       if (!ExtVal || !ExtVal->hasOneUse() ||
-          ExtVal->getParent() == CI->getParent())
+          ExtVal->getParent() == CB->getParent())
         return false;
       // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
-      ExtVal->moveBefore(CI);
+      ExtVal->moveBefore(CB);
       // Mark this instruction as "inserted by CGP", so that other
       // optimizations don't touch it.
       InsertedInsts.insert(ExtVal);
@@ -2057,18 +2057,21 @@
   }
 
   // From here on out we're working with named functions.
-  if (!CI->getCalledFunction()) return false;
+  if (!CB->getCalledFunction())
+    return false;
 
   // Lower all default uses of _chk calls.  This is very similar
   // to what InstCombineCalls does, but here we are only lowering calls
   // to fortified library functions (e.g. __memcpy_chk) that have the default
   // "don't know" as the objectsize.  Anything else should be left alone.
-  FortifiedLibCallSimplifier Simplifier(TLInfo, true);
-  IRBuilder<> Builder(CI);
-  if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
-    CI->replaceAllUsesWith(V);
-    CI->eraseFromParent();
-    return true;
+  if (auto *CI = dyn_cast<CallInst>(CB)) {
+    FortifiedLibCallSimplifier Simplifier(TLInfo, true);
+    IRBuilder<> Builder(CB);
+    if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
+      CI->replaceAllUsesWith(V);
+      CI->eraseFromParent();
+      return true;
+    }
   }
 
   return false;
@@ -4537,13 +4540,12 @@
 
 /// Check to see if all uses of OpVal by the specified inline asm call are due
 /// to memory operands. If so, return true, otherwise return false.
-static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
+static bool IsOperandAMemoryOperand(CallBase *CB, InlineAsm *IA, Value *OpVal,
                                     const TargetLowering &TLI,
                                     const TargetRegisterInfo &TRI) {
-  const Function *F = CI->getFunction();
-  TargetLowering::AsmOperandInfoVector TargetConstraints =
-      TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,
-                            ImmutableCallSite(CI));
+  const Function *F = CB->getFunction();
+  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
+      F->getParent()->getDataLayout(), &TRI, ImmutableCallSite(CB));
 
   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
@@ -4620,21 +4622,19 @@
       continue;
     }
 
-    if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
-      if (CI->hasFnAttr(Attribute::Cold)) {
+    if (CallBase *CB = dyn_cast<CallBase>(UserI)) {
+      if (CB->hasFnAttr(Attribute::Cold)) {
         // If this is a cold call, we can sink the addressing calculation into
-        // the cold path.  See optimizeCallInst
-        bool OptForSize = OptSize ||
-          llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
-        if (!OptForSize)
+        // the cold path.  See optimizeCallBase.
+        if (!(OptSize || shouldOptimizeForSize(CB->getParent(), PSI, BFI)))
           continue;
       }
 
-      InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
+      InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledValue());
       if (!IA) return true;
 
       // If this is a memory operand, we're cool, otherwise bail out.
-      if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
+      if (!IsOperandAMemoryOperand(CB, IA, I, TLI, TRI))
         return true;
       continue;
     }
@@ -5186,13 +5186,13 @@
 
 /// If there are any memory operands, use OptimizeMemoryInst to sink their
 /// address computing into the block when possible / profitable.
-bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
+bool CodeGenPrepare::optimizeInlineAsmInst(CallBase *CB) {
   bool MadeChange = false;
 
   const TargetRegisterInfo *TRI =
-      TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
+      TM->getSubtargetImpl(*CB->getFunction())->getRegisterInfo();
   TargetLowering::AsmOperandInfoVector TargetConstraints =
-      TLI->ParseConstraints(*DL, TRI, CS);
+      TLI->ParseConstraints(*DL, TRI, ImmutableCallSite(CB));
   unsigned ArgNo = 0;
   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
@@ -5202,8 +5202,8 @@
 
     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
         OpInfo.isIndirect) {
-      Value *OpVal = CS->getArgOperand(ArgNo++);
-      MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
+      Value *OpVal = CB->getArgOperand(ArgNo++);
+      MadeChange |= optimizeMemoryInst(CB, OpVal, OpVal->getType(), ~0u);
     } else if (OpInfo.Type == InlineAsm::isInput)
       ArgNo++;
   }
@@ -7245,7 +7245,8 @@
   case Instruction::AShr:
     return optimizeShiftInst(cast<BinaryOperator>(I));
   case Instruction::Call:
-    return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
+  case Instruction::CallBr:
+    return optimizeCallBase(cast<CallBase>(I), ModifiedDT);
   case Instruction::Select:
     return optimizeSelectInst(cast<SelectInst>(I));
   case Instruction::ShuffleVector:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -515,7 +515,7 @@
       return true;
     }
 
-    bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+    bool shouldAlignPointerArgs(CallBase *CB, unsigned &MinSize,
                                 unsigned &PrefAlign) const override;
 
     /// createFastISel - This method returns a target specific FastISel object,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1770,9 +1770,9 @@
 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
 // source/dest is aligned and the copy size is large enough. We therefore want
 // to align such objects passed to memory intrinsics.
-bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+bool ARMTargetLowering::shouldAlignPointerArgs(CallBase *CB, unsigned &MinSize,
                                                unsigned &PrefAlign) const {
-  if (!isa<MemIntrinsic>(CI))
+  if (!isa<MemIntrinsic>(CB))
     return false;
   MinSize = 8;
   // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1