Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -1507,6 +1507,38 @@ otherwise escaped) and the entire visible heap. Deoptimization operand bundles do not capture their operands. +The inliner knows how to inline through calls that have deoptimization +operand bundles. Just like inlining through a normal call site +involves composing the normal and exceptional continuations, inlining +through a call site with a deoptimization operand bundle needs to +appropriately compose the "safe" deoptimization continuation. The +inliner does this by prepending the parent's deoptimization +continuation to every deoptimization continuation in the inlined body. +E.g. inlining ``@f`` into ``@g`` in the following example + +.. code-block:: llvm + + define void @f() { + call void @x() ;; no deopt state + call void @y() [ "deopt"(i32 10) ] + ret void + } + + define void @g() { + call void @f() [ "deopt"(i32 20) ] + ret void + } + +will result in + +.. code-block:: llvm + + define void @g() { + call void @x() ;; still no deopt state + call void @y() [ "deopt"(i32 20, i32 10) ] + ret void + } + .. _moduleasm: Module-Level Inline Assembly Index: include/llvm/IR/CallSite.h =================================================================== --- include/llvm/IR/CallSite.h +++ include/llvm/IR/CallSite.h @@ -211,10 +211,12 @@ } #define CALLSITE_DELEGATE_GETTER(METHOD) \ - InstrTy *II = getInstruction(); \ - return isCall() \ - ? cast(II)->METHOD \ - : cast(II)->METHOD + InstrTy *II = getInstruction(); \ + if (isCall()) \ + return cast(II)->METHOD; \ + else \ + return cast(II)->METHOD \ + #define CALLSITE_DELEGATE_SETTER(METHOD) \ InstrTy *II = getInstruction(); \ @@ -387,6 +389,10 @@ CALLSITE_DELEGATE_GETTER(getOperandBundle(ID)); } + Instruction *cloneWithOperandBundles(ArrayRef OpB) { + CALLSITE_DELEGATE_GETTER(cloneWithOperandBundles(OpB)); + } + #undef CALLSITE_DELEGATE_GETTER #undef CALLSITE_DELEGATE_SETTER Index: include/llvm/IR/InstrTypes.h =================================================================== --- include/llvm/IR/InstrTypes.h +++ include/llvm/IR/InstrTypes.h @@ -1162,6 +1162,14 @@ OperandBundleDefT() {} explicit OperandBundleDefT(StringRef Tag, const std::vector &Inputs) : Tag(Tag), Inputs(Inputs) {} + + explicit OperandBundleDefT(StringRef Tag, std::vector &&Inputs) + : Tag(Tag), Inputs(Inputs) {} + + explicit OperandBundleDefT(const OperandBundleUse &OBU) { + Tag = OBU.getTagName(); + Inputs.insert(Inputs.end(), OBU.Inputs.begin(), OBU.Inputs.end()); + } }; typedef OperandBundleDefT OperandBundleDef; Index: include/llvm/IR/Instructions.h =================================================================== --- include/llvm/IR/Instructions.h +++ include/llvm/IR/Instructions.h @@ -1459,6 +1459,11 @@ BasicBlock *InsertAtEnd) { return new(1) CallInst(F, NameStr, InsertAtEnd); } + + /// \brief Clone this call instruction with a different set of operand + /// bundles. + CallInst *cloneWithOperandBundles(ArrayRef Bundles); + /// CreateMalloc - Generate the IR for a call to malloc: /// 1. Compute the malloc call's argument as the specified type's size, /// possibly multiplied by the array size if the array size is not @@ -3403,6 +3408,10 @@ InsertAtEnd); } + /// \brief Clone this invoke instruction with a different set of operand + /// bundles. + InvokeInst *cloneWithOperandBundles(ArrayRef Bundles); + /// Provide fast operand accessors DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); Index: include/llvm/Transforms/Utils/Cloning.h =================================================================== --- include/llvm/Transforms/Utils/Cloning.h +++ include/llvm/Transforms/Utils/Cloning.h @@ -74,6 +74,10 @@ /// size. bool ContainsDynamicAllocas; + /// All cloned call sites that have operand bundles attached are appended to + /// this vector. + std::vector> OperandBundleCallSites; + ClonedCodeInfo() : ContainsCalls(false), ContainsDynamicAllocas(false) {} }; Index: lib/IR/Instructions.cpp =================================================================== --- lib/IR/Instructions.cpp +++ lib/IR/Instructions.cpp @@ -297,6 +297,15 @@ SubclassOptionalData = CI.SubclassOptionalData; } +CallInst *CallInst::cloneWithOperandBundles(ArrayRef OpB) { + std::vector Args(op_begin(), op_begin() + getNumArgOperands()); + auto *CI = CallInst::Create(getCalledValue(), Args, OpB, getName()); + CI->setTailCallKind(getTailCallKind()); + CI->setCallingConv(getCallingConv()); + CI->SubclassOptionalData = SubclassOptionalData; + return CI; +} + void CallInst::addAttribute(unsigned i, Attribute::AttrKind attr) { AttributeSet PAL = getAttributes(); PAL = PAL.addAttribute(getContext(), i, attr); @@ -571,6 +580,16 @@ SubclassOptionalData = II.SubclassOptionalData; } +InvokeInst * +InvokeInst::cloneWithOperandBundles(ArrayRef OpB) { + std::vector Args(op_begin(), op_begin() + getNumArgOperands()); + auto *II = InvokeInst::Create(getCalledValue(), getNormalDest(), + getUnwindDest(), Args, OpB, getName()); + II->setCallingConv(getCallingConv()); + II->SubclassOptionalData = SubclassOptionalData; + return II; +} + BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const { return getSuccessor(idx); } Index: lib/Transforms/Utils/CloneFunction.cpp =================================================================== --- lib/Transforms/Utils/CloneFunction.cpp +++ lib/Transforms/Utils/CloneFunction.cpp @@ -373,6 +373,11 @@ VMap[&*II] = NewInst; // Add instruction map to value. NewBB->getInstList().push_back(NewInst); hasCalls |= (isa(II) && !isa(II)); + + if (auto CS = ImmutableCallSite(&*II)) + if (CS.hasOperandBundles()) + CodeInfo->OperandBundleCallSites.push_back(NewInst); + if (const AllocaInst *AI = dyn_cast(II)) { if (isa(AI->getArraySize())) hasStaticAllocas = true; @@ -444,7 +449,11 @@ NewInst->setName(OldTI->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); VMap[OldTI] = NewInst; // Add instruction map to value. - + + if (auto CS = ImmutableCallSite(OldTI)) + if (CS.hasOperandBundles()) + CodeInfo->OperandBundleCallSites.push_back(NewInst); + // Recursively clone any reachable successor blocks. const TerminatorInst *TI = BB->getTerminator(); for (const BasicBlock *Succ : TI->successors()) Index: lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- lib/Transforms/Utils/InlineFunction.cpp +++ lib/Transforms/Utils/InlineFunction.cpp @@ -208,8 +208,13 @@ // Create the new invoke instruction. ImmutableCallSite CS(CI); SmallVector InvokeArgs(CS.arg_begin(), CS.arg_end()); - InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, - InvokeArgs, CI->getName(), BB); + SmallVector OpBundles; + for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) + OpBundles.emplace_back(CS.getOperandBundleAt(i)); + + InvokeInst *II = + InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, InvokeArgs, + OpBundles, CI->getName(), BB); II->setDebugLoc(CI->getDebugLoc()); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); @@ -1030,8 +1035,13 @@ CalledFunc->getFunctionType()->isVarArg()) return false; // The inliner does not know how to inline through calls with operand bundles. - if (CS.hasOperandBundles()) - return false; + if (CS.hasOperandBundles()) { + bool CanInline = + CS.getNumOperandBundles() == 1 && + CS.getOperandBundleAt(0).getTagID() == LLVMContext::OB_deopt; + if (!CanInline) + return false; + } // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. @@ -1138,6 +1148,55 @@ HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), &*FirstNewBlock, IFI); + if (CS.hasOperandBundles()) { + auto ParentDeopt = CS.getOperandBundleAt(0); + assert(ParentDeopt.getTagID() == LLVMContext::OB_deopt && + "Checked on entry!"); + + SmallVector OpDefs; + + for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { + Instruction *I = VH; + + OpDefs.clear(); + CallSite ICS(I); + bool Found = false; + for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) { + auto ChildOB = ICS.getOperandBundleAt(i); + if (ChildOB.getTagID() != LLVMContext::OB_deopt) { + // If the inlined call has other operand bundles, let them be + OpDefs.emplace_back(ChildOB); + continue; + } + + // It may be useful to separate this logic (of handling operand + // bundles) out to a separate "policy" component if this gets crowded. + + assert(!Found && "Only one deopt operand bundle allowed!"); + Found = true; + + // Prepend the parent's deoptimization continuation to the newly + // inlined call's deoptimization continuation. + std::vector MergedDeoptArgs(ParentDeopt.Inputs.begin(), + ParentDeopt.Inputs.end()); + MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(), + ChildOB.Inputs.end()); + + OpDefs.emplace_back(ChildOB.getTagName(), std::move(MergedDeoptArgs)); + } + + Instruction *NewI = ICS.cloneWithOperandBundles(OpDefs); + I->getParent()->getInstList().insert(I->getIterator(), NewI); + + // Note: the RAUW does the appropriate fixup in VMap, so we need to do + // this even if the call returns void. + I->replaceAllUsesWith(NewI); + + VH = nullptr; + I->eraseFromParent(); + } + } + // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); Index: test/Transforms/Inline/deopt-bundles.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/deopt-bundles.ll @@ -0,0 +1,97 @@ +; RUN: opt -S -always-inline < %s | FileCheck %s + +declare void @f() +declare i32 @g() + +define i32 @callee_0() alwaysinline { + entry: + call void @f() + ret i32 2 +} + +define i32 @caller_0() { +; CHECK-LABEL: @caller_0( + entry: +; CHECK: entry: +; CHECK-NEXT: call void @f() +; CHECK-NEXT: ret i32 2 + %x = call i32 @callee_0() [ "deopt"(i32 5) ] + ret i32 %x +} + +define i32 @callee_1() alwaysinline { + entry: + call void @f() [ "deopt"() ] + call void @f() [ "deopt"(i32 0, i32 1) ] + call void @f() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] + ret i32 2 +} + +define i32 @caller_1() { +; CHECK-LABEL: @caller_1( + entry: +; CHECK: entry: +; CHECK-NEXT: call void @f() [ "deopt"(i32 5) ] +; CHECK-NEXT: call void @f() [ "deopt"(i32 5, i32 0, i32 1) ] +; CHECK-NEXT: call void @f() [ "deopt"(i32 5, i32 0, i32 1), "foo"(double 0.000000e+00) ] +; CHECK-NEXT: ret i32 2 + + %x = call i32 @callee_1() [ "deopt"(i32 5) ] + ret i32 %x +} + +define i32 @callee_2() alwaysinline { + entry: + %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] + ret i32 %v +} + +define i32 @caller_2(i32 %val) { +; CHECK-LABEL: @caller_2( + entry: +; CHECK: entry: +; CHECK-NEXT: [[RVAL:%[^ ]+]] = call i32 @g() [ "deopt"(i32 %val, i32 0, i32 1), "foo"(double 0.000000e+00) ] +; CHECK-NEXT: ret i32 [[RVAL]] + %x = call i32 @callee_2() [ "deopt"(i32 %val) ] + ret i32 %x +} + +define i32 @callee_3() alwaysinline { + entry: + %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] + ret i32 %v +} + +define i32 @caller_3() personality i8 3 { +; CHECK-LABEL: @caller_3( + entry: + %x = invoke i32 @callee_3() [ "deopt"(i32 7) ] to label %normal unwind label %unwind +; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ] + + normal: + ret i32 %x + + unwind: + %cleanup = landingpad i8 cleanup + ret i32 101 +} + +define i32 @callee_4() alwaysinline personality i8 3 { + entry: + %v = invoke i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] to label %normal unwind label %unwind + + normal: + ret i32 %v + + unwind: + %cleanup = landingpad i8 cleanup + ret i32 100 +} + +define i32 @caller_4() { +; CHECK-LABEL: @caller_4( + entry: +; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ] + %x = call i32 @callee_4() [ "deopt"(i32 7) ] + ret i32 %x +}