Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -1509,6 +1509,46 @@ deoptimization, in which case control will not be returned to the compiled frame. +The inliner knows how to inline through calls that have deoptimization +operand bundles. Just like inlining through a normal call site +involves composing the normal and exceptional continuations, inlining +through a call site with a deoptimization operand bundle needs to +appropriately compose the "safe" deoptimization continuation. The +inliner does this by prepending the parent's deoptimization +continuation to every deoptimization continuation in the inlined body. +E.g. inlining ``@f`` into ``@g`` in the following example + +.. code-block:: llvm + + define void @f() { + call void @x() ;; no deopt state + call void @y() [ "deopt"(i32 10) ] + call void @y() [ "deopt"(i32 10), "unknown"(i8* null) ] + ret void + } + + define void @g() { + call void @f() [ "deopt"(i32 20) ] + ret void + } + +will result in + +.. code-block:: llvm + + define void @g() { + call void @x() ;; still no deopt state + call void @y() [ "deopt"(i32 20, i32 10) ] + call void @y() [ "deopt"(i32 20, i32 10), "unknown"(i8* null) ] + ret void + } + +It is the frontend's responsibility to structure or encode the +deoptimization state in a way that syntactically prepending the +caller's deoptimization state to the callee's deoptimization state is +semantically equivalent to composing the caller's deoptimization +continuation after the callee's deoptimization continuation. + .. _moduleasm: Module-Level Inline Assembly Index: include/llvm/IR/InstrTypes.h =================================================================== --- include/llvm/IR/InstrTypes.h +++ include/llvm/IR/InstrTypes.h @@ -1162,6 +1162,14 @@ OperandBundleDefT() {} explicit OperandBundleDefT(StringRef Tag, const std::vector &Inputs) : Tag(Tag), Inputs(Inputs) {} + + explicit OperandBundleDefT(StringRef Tag, std::vector &&Inputs) + : Tag(Tag), Inputs(Inputs) {} + + explicit OperandBundleDefT(const OperandBundleUse &OBU) { + Tag = OBU.getTagName(); + Inputs.insert(Inputs.end(), OBU.Inputs.begin(), OBU.Inputs.end()); + } }; typedef OperandBundleDefT OperandBundleDef; Index: include/llvm/IR/Instructions.h =================================================================== --- include/llvm/IR/Instructions.h +++ include/llvm/IR/Instructions.h @@ -1459,6 +1459,15 @@ BasicBlock *InsertAtEnd) { return new(1) CallInst(F, NameStr, InsertAtEnd); } + + /// \brief Create a clone of \p CI with a different set of operand bundles and + /// insert it before CI. + /// + /// The returned call instruction is identical \p CI in every way except that + /// the operand bundles for the new instruction are set to the operand bundles + /// in \p Bundles. + static CallInst *Create(CallInst *CI, ArrayRef Bundles); + /// CreateMalloc - Generate the IR for a call to malloc: /// 1. Compute the malloc call's argument as the specified type's size, /// possibly multiplied by the array size if the array size is not @@ -3403,6 +3412,14 @@ InsertAtEnd); } + /// \brief Create a clone of \p II with a different set of operand bundles and + /// insert it before II. + /// + /// The returned invoke instruction is identical to \p II in every way except + /// that the operand bundles for the new instruction are set to the operand + /// bundles in \p Bundles. + static InvokeInst *Create(InvokeInst *II, ArrayRef Bundles); + /// Provide fast operand accessors DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); Index: include/llvm/Transforms/Utils/Cloning.h =================================================================== --- include/llvm/Transforms/Utils/Cloning.h +++ include/llvm/Transforms/Utils/Cloning.h @@ -74,6 +74,10 @@ /// size. bool ContainsDynamicAllocas; + /// All cloned call sites that have operand bundles attached are appended to + /// this vector. + std::vector> OperandBundleCallSites; + ClonedCodeInfo() : ContainsCalls(false), ContainsDynamicAllocas(false) {} }; Index: lib/IR/Instructions.cpp =================================================================== --- lib/IR/Instructions.cpp +++ lib/IR/Instructions.cpp @@ -297,6 +297,18 @@ SubclassOptionalData = CI.SubclassOptionalData; } +CallInst *CallInst::Create(CallInst *CI, ArrayRef OpB) { + CallSite CS(CI); + std::vector Args(CS.arg_begin(), CS.arg_end()); + + auto *NewCI = + CallInst::Create(CI->getCalledValue(), Args, OpB, CI->getName(), CI); + NewCI->setTailCallKind(CI->getTailCallKind()); + NewCI->setCallingConv(CI->getCallingConv()); + NewCI->SubclassOptionalData = CI->SubclassOptionalData; + return NewCI; +} + void CallInst::addAttribute(unsigned i, Attribute::AttrKind attr) { AttributeSet PAL = getAttributes(); PAL = PAL.addAttribute(getContext(), i, attr); @@ -571,6 +583,18 @@ SubclassOptionalData = II.SubclassOptionalData; } +InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef OpB) { + CallSite CS(II); + std::vector Args(CS.arg_begin(), CS.arg_end()); + + auto *NewII = + InvokeInst::Create(II->getCalledValue(), II->getNormalDest(), + II->getUnwindDest(), Args, OpB, II->getName(), II); + NewII->setCallingConv(II->getCallingConv()); + NewII->SubclassOptionalData = II->SubclassOptionalData; + return NewII; +} + BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const { return getSuccessor(idx); } Index: lib/Transforms/Utils/CloneFunction.cpp =================================================================== --- lib/Transforms/Utils/CloneFunction.cpp +++ lib/Transforms/Utils/CloneFunction.cpp @@ -380,6 +380,11 @@ VMap[&*II] = NewInst; // Add instruction map to value. NewBB->getInstList().push_back(NewInst); hasCalls |= (isa(II) && !isa(II)); + + if (auto CS = ImmutableCallSite(&*II)) + if (CS.hasOperandBundles()) + CodeInfo->OperandBundleCallSites.push_back(NewInst); + if (const AllocaInst *AI = dyn_cast(II)) { if (isa(AI->getArraySize())) hasStaticAllocas = true; @@ -451,7 +456,11 @@ NewInst->setName(OldTI->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); VMap[OldTI] = NewInst; // Add instruction map to value. - + + if (auto CS = ImmutableCallSite(OldTI)) + if (CS.hasOperandBundles()) + CodeInfo->OperandBundleCallSites.push_back(NewInst); + // Recursively clone any reachable successor blocks. const TerminatorInst *TI = BB->getTerminator(); for (const BasicBlock *Succ : TI->successors()) Index: lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- lib/Transforms/Utils/InlineFunction.cpp +++ lib/Transforms/Utils/InlineFunction.cpp @@ -208,8 +208,21 @@ // Create the new invoke instruction. ImmutableCallSite CS(CI); SmallVector InvokeArgs(CS.arg_begin(), CS.arg_end()); - InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, - InvokeArgs, CI->getName(), BB); + SmallVector OpBundles; + + for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) + // Copy the OperandBundeUse instances to OperandBundleDefs. These two are + // *different* representations of operand bundles: see the documentation + // in InstrTypes.h for more details. + OpBundles.emplace_back(CS.getOperandBundleAt(i)); + + // Note: we're round tripping operand bundles through memory here, and that + // can potentially be avoided with a cleverer API design that we do not have + // as of this time. + + InvokeInst *II = + InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, InvokeArgs, + OpBundles, CI->getName(), BB); II->setDebugLoc(CI->getDebugLoc()); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); @@ -1029,9 +1042,16 @@ CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; - // The inliner does not know how to inline through calls with operand bundles. - if (CS.hasOperandBundles()) - return false; + // The inliner does not know how to inline through calls with operand bundles + // in general ... + if (CS.hasOperandBundles()) { + // ... but it knows how to inline through "deopt" operand bundles. + bool CanInline = + CS.getNumOperandBundles() == 1 && + CS.getOperandBundleAt(0).getTagID() == LLVMContext::OB_deopt; + if (!CanInline) + return false; + } // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. @@ -1138,6 +1158,53 @@ HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), &*FirstNewBlock, IFI); + if (CS.hasOperandBundles()) { + auto ParentDeopt = CS.getOperandBundleAt(0); + assert(ParentDeopt.getTagID() == LLVMContext::OB_deopt && + "Checked on entry!"); + + SmallVector OpDefs; + + for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { + Instruction *I = VH; + + OpDefs.clear(); + CallSite ICS(I); + for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) { + auto ChildOB = ICS.getOperandBundleAt(i); + if (ChildOB.getTagID() != LLVMContext::OB_deopt) { + // If the inlined call has other operand bundles, let them be + OpDefs.emplace_back(ChildOB); + continue; + } + + // It may be useful to separate this logic (of handling operand + // bundles) out to a separate "policy" component if this gets crowded. + // Prepend the parent's deoptimization continuation to the newly + // inlined call's deoptimization continuation. + std::vector MergedDeoptArgs(ParentDeopt.Inputs.begin(), + ParentDeopt.Inputs.end()); + MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(), + ChildOB.Inputs.end()); + + OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); + } + + Instruction *NewI = nullptr; + if (isa(I)) + NewI = CallInst::Create(cast(I), OpDefs); + else + NewI = InvokeInst::Create(cast(I), OpDefs); + + // Note: the RAUW does the appropriate fixup in VMap, so we need to do + // this even if the call returns void. + I->replaceAllUsesWith(NewI); + + VH = nullptr; + I->eraseFromParent(); + } + } + // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); Index: test/Transforms/Inline/deopt-bundles.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/deopt-bundles.ll @@ -0,0 +1,97 @@ +; RUN: opt -S -always-inline < %s | FileCheck %s + +declare void @f() +declare i32 @g() + +define i32 @callee_0() alwaysinline { + entry: + call void @f() + ret i32 2 +} + +define i32 @caller_0() { +; CHECK-LABEL: @caller_0( + entry: +; CHECK: entry: +; CHECK-NEXT: call void @f() +; CHECK-NEXT: ret i32 2 + %x = call i32 @callee_0() [ "deopt"(i32 5) ] + ret i32 %x +} + +define i32 @callee_1() alwaysinline { + entry: + call void @f() [ "deopt"() ] + call void @f() [ "deopt"(i32 0, i32 1) ] + call void @f() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] + ret i32 2 +} + +define i32 @caller_1() { +; CHECK-LABEL: @caller_1( + entry: +; CHECK: entry: +; CHECK-NEXT: call void @f() [ "deopt"(i32 5) ] +; CHECK-NEXT: call void @f() [ "deopt"(i32 5, i32 0, i32 1) ] +; CHECK-NEXT: call void @f() [ "deopt"(i32 5, i32 0, i32 1), "foo"(double 0.000000e+00) ] +; CHECK-NEXT: ret i32 2 + + %x = call i32 @callee_1() [ "deopt"(i32 5) ] + ret i32 %x +} + +define i32 @callee_2() alwaysinline { + entry: + %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] + ret i32 %v +} + +define i32 @caller_2(i32 %val) { +; CHECK-LABEL: @caller_2( + entry: +; CHECK: entry: +; CHECK-NEXT: [[RVAL:%[^ ]+]] = call i32 @g() [ "deopt"(i32 %val, i32 0, i32 1), "foo"(double 0.000000e+00) ] +; CHECK-NEXT: ret i32 [[RVAL]] + %x = call i32 @callee_2() [ "deopt"(i32 %val) ] + ret i32 %x +} + +define i32 @callee_3() alwaysinline { + entry: + %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] + ret i32 %v +} + +define i32 @caller_3() personality i8 3 { +; CHECK-LABEL: @caller_3( + entry: + %x = invoke i32 @callee_3() [ "deopt"(i32 7) ] to label %normal unwind label %unwind +; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ] + + normal: + ret i32 %x + + unwind: + %cleanup = landingpad i8 cleanup + ret i32 101 +} + +define i32 @callee_4() alwaysinline personality i8 3 { + entry: + %v = invoke i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] to label %normal unwind label %unwind + + normal: + ret i32 %v + + unwind: + %cleanup = landingpad i8 cleanup + ret i32 100 +} + +define i32 @caller_4() { +; CHECK-LABEL: @caller_4( + entry: +; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ] + %x = call i32 @callee_4() [ "deopt"(i32 7) ] + ret i32 %x +}