Index: docs/LangRef.rst
===================================================================
--- docs/LangRef.rst
+++ docs/LangRef.rst
@@ -1507,6 +1507,38 @@
 otherwise escaped) and the entire visible heap.  Deoptimization
 operand bundles do not capture their operands.
 
+The inliner knows how to inline through calls that have deoptimization
+operand bundles.  Just like inlining through a normal call site
+involves composing the normal and exceptional continuations, inlining
+through a call site with a deoptimization operand bundle needs to
+appropriately compose the "safe" deoptimization continuation.  The
+inliner does this by prepending the parent's deoptimization
+continuation to every deoptimization continuation in the inlined body.
+E.g. inlining ``@f`` into ``@g`` in the following example
+
+.. code-block:: llvm
+
+    define void @f() {
+      call void @x()  ;; no deopt state
+      call void @y() [ "deopt"(i32 10) ]
+      ret void
+    }
+
+    define void @g() {
+      call void @f() [ "deopt"(i32 20) ]
+      ret void
+    }
+
+will result in
+
+.. code-block:: llvm
+
+    define void @g() {
+      call void @x()  ;; still no deopt state
+      call void @y() [ "deopt"(i32 20, i32 10) ]
+      ret void
+    }
+
 .. _moduleasm:
 
 Module-Level Inline Assembly
Index: include/llvm/IR/CallSite.h
===================================================================
--- include/llvm/IR/CallSite.h
+++ include/llvm/IR/CallSite.h
@@ -211,10 +211,12 @@
   }
 
 #define CALLSITE_DELEGATE_GETTER(METHOD) \
-  InstrTy *II = getInstruction();    \
-  return isCall()                        \
-    ? cast<CallInst>(II)->METHOD         \
-    : cast<InvokeInst>(II)->METHOD
+  InstrTy *II = getInstruction();        \
+  if (isCall())                          \
+    return cast<CallInst>(II)->METHOD;   \
+  else                                   \
+    return cast<InvokeInst>(II)->METHOD  \
+
 
 #define CALLSITE_DELEGATE_SETTER(METHOD) \
   InstrTy *II = getInstruction();    \
@@ -387,6 +389,10 @@
     CALLSITE_DELEGATE_GETTER(getOperandBundle(ID));
   }
 
+  Instruction *cloneWithOperandBundles(ArrayRef<OperandBundleDef> OpB) {
+    CALLSITE_DELEGATE_GETTER(cloneWithOperandBundles(OpB));
+  }
+
 #undef CALLSITE_DELEGATE_GETTER
 #undef CALLSITE_DELEGATE_SETTER
 
Index: include/llvm/IR/InstrTypes.h
===================================================================
--- include/llvm/IR/InstrTypes.h
+++ include/llvm/IR/InstrTypes.h
@@ -1162,6 +1162,14 @@
   OperandBundleDefT() {}
   explicit OperandBundleDefT(StringRef Tag, const std::vector<InputTy> &Inputs)
       : Tag(Tag), Inputs(Inputs) {}
+
+  explicit OperandBundleDefT(StringRef Tag, std::vector<InputTy> &&Inputs)
+      : Tag(Tag), Inputs(Inputs) {}
+
+  explicit OperandBundleDefT(const OperandBundleUse &OBU) {
+    Tag = OBU.getTagName();
+    Inputs.insert(Inputs.end(), OBU.Inputs.begin(), OBU.Inputs.end());
+  }
 };
 
 typedef OperandBundleDefT<Value *> OperandBundleDef;
Index: include/llvm/IR/Instructions.h
===================================================================
--- include/llvm/IR/Instructions.h
+++ include/llvm/IR/Instructions.h
@@ -1459,6 +1459,11 @@
                           BasicBlock *InsertAtEnd) {
     return new(1) CallInst(F, NameStr, InsertAtEnd);
   }
+
+  /// \brief Clone this call instruction with a different set of operand
+  /// bundles.
+  CallInst *cloneWithOperandBundles(ArrayRef<OperandBundleDef> Bundles);
+
   /// CreateMalloc - Generate the IR for a call to malloc:
   /// 1. Compute the malloc call's argument as the specified type's size,
   ///    possibly multiplied by the array size if the array size is not
@@ -3403,6 +3408,10 @@
                    InsertAtEnd);
   }
 
+  /// \brief Clone this invoke instruction with a different set of operand
+  /// bundles.
+  InvokeInst *cloneWithOperandBundles(ArrayRef<OperandBundleDef> Bundles);
+
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
Index: include/llvm/Transforms/Utils/Cloning.h
===================================================================
--- include/llvm/Transforms/Utils/Cloning.h
+++ include/llvm/Transforms/Utils/Cloning.h
@@ -74,6 +74,10 @@
   /// size.
   bool ContainsDynamicAllocas;
 
+  /// All cloned call sites that have operand bundles attached are appended to
+  /// this vector.
+  std::vector<AssertingVH<Instruction>> OperandBundleCallSites;
+
   ClonedCodeInfo() : ContainsCalls(false), ContainsDynamicAllocas(false) {}
 };
 
Index: lib/IR/Instructions.cpp
===================================================================
--- lib/IR/Instructions.cpp
+++ lib/IR/Instructions.cpp
@@ -297,6 +297,15 @@
   SubclassOptionalData = CI.SubclassOptionalData;
 }
 
+CallInst *CallInst::cloneWithOperandBundles(ArrayRef<OperandBundleDef> OpB) {
+  std::vector<Value *> Args(op_begin(), op_begin() + getNumArgOperands());
+  auto *CI = CallInst::Create(getCalledValue(), Args, OpB, getName());
+  CI->setTailCallKind(getTailCallKind());
+  CI->setCallingConv(getCallingConv());
+  CI->SubclassOptionalData = SubclassOptionalData;
+  return CI;
+}
+
 void CallInst::addAttribute(unsigned i, Attribute::AttrKind attr) {
   AttributeSet PAL = getAttributes();
   PAL = PAL.addAttribute(getContext(), i, attr);
@@ -571,6 +580,16 @@
   SubclassOptionalData = II.SubclassOptionalData;
 }
 
+InvokeInst *
+InvokeInst::cloneWithOperandBundles(ArrayRef<OperandBundleDef> OpB) {
+  std::vector<Value *> Args(op_begin(), op_begin() + getNumArgOperands());
+  auto *II = InvokeInst::Create(getCalledValue(), getNormalDest(),
+                                getUnwindDest(), Args, OpB, getName());
+  II->setCallingConv(getCallingConv());
+  II->SubclassOptionalData = SubclassOptionalData;
+  return II;
+}
+
 BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const {
   return getSuccessor(idx);
 }
Index: lib/Transforms/Utils/CloneFunction.cpp
===================================================================
--- lib/Transforms/Utils/CloneFunction.cpp
+++ lib/Transforms/Utils/CloneFunction.cpp
@@ -373,6 +373,11 @@
     VMap[&*II] = NewInst; // Add instruction map to value.
     NewBB->getInstList().push_back(NewInst);
     hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+
+    if (auto CS = ImmutableCallSite(&*II))
+      if (CS.hasOperandBundles())
+        CodeInfo->OperandBundleCallSites.push_back(NewInst);
+
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
       if (isa<ConstantInt>(AI->getArraySize()))
         hasStaticAllocas = true;
@@ -444,7 +449,11 @@
       NewInst->setName(OldTI->getName()+NameSuffix);
     NewBB->getInstList().push_back(NewInst);
     VMap[OldTI] = NewInst;             // Add instruction map to value.
-    
+
+    if (auto CS = ImmutableCallSite(OldTI))
+      if (CS.hasOperandBundles())
+        CodeInfo->OperandBundleCallSites.push_back(NewInst);
+
     // Recursively clone any reachable successor blocks.
     const TerminatorInst *TI = BB->getTerminator();
     for (const BasicBlock *Succ : TI->successors())
Index: lib/Transforms/Utils/InlineFunction.cpp
===================================================================
--- lib/Transforms/Utils/InlineFunction.cpp
+++ lib/Transforms/Utils/InlineFunction.cpp
@@ -208,8 +208,13 @@
     // Create the new invoke instruction.
     ImmutableCallSite CS(CI);
     SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
-    InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge,
-                                        InvokeArgs, CI->getName(), BB);
+    SmallVector<OperandBundleDef, 1> OpBundles;
+    for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i)
+      OpBundles.emplace_back(CS.getOperandBundleAt(i));
+
+    InvokeInst *II =
+        InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, InvokeArgs,
+                           OpBundles, CI->getName(), BB);
     II->setDebugLoc(CI->getDebugLoc());
     II->setCallingConv(CI->getCallingConv());
     II->setAttributes(CI->getAttributes());
@@ -1030,8 +1035,13 @@
       CalledFunc->getFunctionType()->isVarArg()) return false;
 
   // The inliner does not know how to inline through calls with operand bundles.
-  if (CS.hasOperandBundles())
-    return false;
+  if (CS.hasOperandBundles()) {
+    bool CanInline =
+        CS.getNumOperandBundles() == 1 &&
+        CS.getOperandBundleAt(0).getTagID() == LLVMContext::OB_deopt;
+    if (!CanInline)
+      return false;
+  }
 
   // If the call to the callee cannot throw, set the 'nounwind' flag on any
   // calls that we inline.
@@ -1138,6 +1148,55 @@
       HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
                               &*FirstNewBlock, IFI);
 
+    if (CS.hasOperandBundles()) {
+      auto ParentDeopt = CS.getOperandBundleAt(0);
+      assert(ParentDeopt.getTagID() == LLVMContext::OB_deopt &&
+             "Checked on entry!");
+
+      SmallVector<OperandBundleDef, 2> OpDefs;
+
+      for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
+        Instruction *I = VH;
+
+        OpDefs.clear();
+        CallSite ICS(I);
+        bool Found = false;
+        for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) {
+          auto ChildOB = ICS.getOperandBundleAt(i);
+          if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
+            // If the inlined call has other operand bundles, let them be
+            OpDefs.emplace_back(ChildOB);
+            continue;
+          }
+
+          // It may be useful to separate this logic (of handling operand
+          // bundles) out to a separate "policy" component if this gets crowded.
+
+          assert(!Found && "Only one deopt operand bundle allowed!");
+          Found = true;
+
+          // Prepend the parent's deoptimization continuation to the newly
+          // inlined call's deoptimization continuation.
+          std::vector<Value *> MergedDeoptArgs(ParentDeopt.Inputs.begin(),
+                                               ParentDeopt.Inputs.end());
+          MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(),
+                                 ChildOB.Inputs.end());
+
+          OpDefs.emplace_back(ChildOB.getTagName(), std::move(MergedDeoptArgs));
+        }
+
+        Instruction *NewI = ICS.cloneWithOperandBundles(OpDefs);
+        I->getParent()->getInstList().insert(I->getIterator(), NewI);
+
+        // Note: the RAUW does the appropriate fixup in VMap, so we need to do
+        // this even if the call returns void.
+        I->replaceAllUsesWith(NewI);
+
+        VH = nullptr;
+        I->eraseFromParent();
+      }
+    }
+
     // Update the callgraph if requested.
     if (IFI.CG)
       UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
Index: test/Transforms/Inline/deopt-bundles.ll
===================================================================
--- /dev/null
+++ test/Transforms/Inline/deopt-bundles.ll
@@ -0,0 +1,97 @@
+; RUN: opt -S -always-inline < %s | FileCheck %s
+
+declare void @f()
+declare i32 @g()
+
+define i32 @callee_0() alwaysinline {
+ entry:
+  call void @f()
+  ret i32 2
+}
+
+define i32 @caller_0() {
+; CHECK-LABEL: @caller_0(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 2
+  %x = call i32 @callee_0() [ "deopt"(i32 5) ]
+  ret i32 %x
+}
+
+define i32 @callee_1() alwaysinline {
+ entry:
+  call void @f() [ "deopt"() ]
+  call void @f() [ "deopt"(i32 0, i32 1) ]
+  call void @f() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+  ret i32 2
+}
+
+define i32 @caller_1() {
+; CHECK-LABEL: @caller_1(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT:  call void @f() [ "deopt"(i32 5) ]
+; CHECK-NEXT:  call void @f() [ "deopt"(i32 5, i32 0, i32 1) ]
+; CHECK-NEXT:  call void @f() [ "deopt"(i32 5, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+; CHECK-NEXT:  ret i32 2
+
+  %x = call i32 @callee_1() [ "deopt"(i32 5) ]
+  ret i32 %x
+}
+
+define i32 @callee_2() alwaysinline {
+ entry:
+  %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+  ret i32 %v
+}
+
+define i32 @caller_2(i32 %val) {
+; CHECK-LABEL: @caller_2(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT:   [[RVAL:%[^ ]+]] = call i32 @g() [ "deopt"(i32 %val, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+; CHECK-NEXT:   ret i32 [[RVAL]]
+  %x = call i32 @callee_2() [ "deopt"(i32 %val) ]
+  ret i32 %x
+}
+
+define i32 @callee_3() alwaysinline {
+ entry:
+  %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+  ret i32 %v
+}
+
+define i32 @caller_3() personality i8 3 {
+; CHECK-LABEL: @caller_3(
+ entry:
+  %x = invoke i32 @callee_3() [ "deopt"(i32 7) ] to label %normal unwind label %unwind
+; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+
+ normal:
+  ret i32 %x
+
+ unwind:
+  %cleanup = landingpad i8 cleanup
+  ret i32 101
+}
+
+define i32 @callee_4() alwaysinline personality i8 3 {
+ entry:
+  %v = invoke i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] to label %normal unwind label %unwind
+
+ normal:
+  ret i32 %v
+
+ unwind:
+  %cleanup = landingpad i8 cleanup
+  ret i32 100
+}
+
+define i32 @caller_4() {
+; CHECK-LABEL: @caller_4(
+ entry:
+; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+  %x = call i32 @callee_4() [ "deopt"(i32 7) ]
+  ret i32 %x
+}