Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -1534,6 +1534,8 @@ More specific types of operand bundles are described below. +.. _deopt_opbundles: + Deoptimization Operand Bundles ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -12102,6 +12104,62 @@ This intrinsic does nothing, and it's removed by optimizers and ignored by codegen. +'``llvm.experimental.deoptimize``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare type @llvm.experimental.deoptimize() + +Overview: +""""""""" + +This intrinsic, together with :ref:`deoptimization operand bundles +`, allow frontends to express transfer of control and +frame-local state from one (typically more specialized, hence faster) +version of a function into another (typically more generic, hence +slower) version. + +In languages with a fully integrated managed runtime like Java this +intrinsic can be used to implement "uncommon trap" like functionality. +In unmanaged languages like C and C++, this intrinsic can be used to +represent the slow paths of specialized functions. + + +Arguments: +"""""""""" + +None. + +Semantics: +"""""""""" + +The ``@llvm.experimental.deoptimize`` intrinsic executes an attached +deoptimization continuation (denoted using a :ref:`deoptimization +operand bundle `) and returns the value returned by +the deoptimization continuation. + +Deoptimization continuations expressed using ``"deopt"`` operand bundles always +continue execution to the end of the physical frame containing them, so all +calls to ``@llvm.experimental.deoptimize`` must be in "tail +position": + + - ``@llvm.experimental.deoptimize`` cannot be invoked. + - The call must immediately precede a :ref:`ret ` instruction. + - The ret instruction must return the value produced by the + ``@llvm.experimental.deoptimize`` call if there is one, or void. + +Note that the above restrictions imply that the return type for a call +to ``@llvm.experimental.deoptimize`` will match the return type of its +immediate caller. + +The inliner composes the ``"deopt"`` continuations of the caller into the +``"deopt"`` continuations present in the inlinee, and also updates calls to this +intrinsic to return directly from the frame of the function it inlined into. + Stack Map Intrinsics -------------------- Index: include/llvm/IR/BasicBlock.h =================================================================== --- include/llvm/IR/BasicBlock.h +++ include/llvm/IR/BasicBlock.h @@ -111,6 +111,14 @@ TerminatorInst *getTerminator(); const TerminatorInst *getTerminator() const; + /// \brief Returns the call instruction calling @llvm.experimental.deoptimize + /// prior to the terminating return instruction of this basic block, if such a + /// call is present. Otherwise, returns null. + CallInst *getTerminatingDeoptimizeCall(); + const CallInst *getTerminatingDeoptimizeCall() const { + return const_cast(this)->getTerminatingDeoptimizeCall(); + } + /// \brief Returns the call instruction marked 'musttail' prior to the /// terminating return instruction of this basic block, if such a call is /// present. Otherwise, returns null. Index: include/llvm/IR/CallSite.h =================================================================== --- include/llvm/IR/CallSite.h +++ include/llvm/IR/CallSite.h @@ -453,6 +453,10 @@ CALLSITE_DELEGATE_GETTER(getOperandBundle(ID)); } + unsigned countOperandBundlesOfType(uint32_t ID) const { + CALLSITE_DELEGATE_GETTER(countOperandBundlesOfType(ID)); + } + IterTy arg_begin() const { CALLSITE_DELEGATE_GETTER(arg_begin()); } Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -593,6 +593,9 @@ def int_debugtrap : Intrinsic<[]>, GCCBuiltin<"__builtin_debugtrap">; +// Support for dynamic deoptimization (or de-specialization) +def int_experimental_deoptimize : Intrinsic<[llvm_any_ty], [], []>; + // NOP: calls/invokes to this intrinsic are removed by codegen def int_donothing : Intrinsic<[], [], [IntrNoMem]>; Index: lib/IR/BasicBlock.cpp =================================================================== --- lib/IR/BasicBlock.cpp +++ lib/IR/BasicBlock.cpp @@ -162,6 +162,21 @@ return nullptr; } +CallInst *BasicBlock::getTerminatingDeoptimizeCall() { + if (InstList.empty()) + return nullptr; + auto *RI = dyn_cast(&InstList.back()); + if (!RI || RI == &InstList.front()) + return nullptr; + + if (auto *CI = dyn_cast_or_null(RI->getPrevNode())) + if (Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) + return CI; + + return nullptr; +} + Instruction* BasicBlock::getFirstNonPHI() { for (Instruction &I : *this) if (!isa(I)) Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -4076,6 +4076,29 @@ "masked_store: vector mask must be same length as data", CS); break; } + + case Intrinsic::experimental_deoptimize: { + Assert(CS.isCall(), "experimental_deoptimize cannot be invoked", CS); + Assert(CS.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1, + "experimental_deoptimize must have exactly one " + "\"deopt\" operand bundle"); + Assert(CS.getType() == CS.getInstruction()->getFunction()->getReturnType(), + "experimental_deoptimize return type must match caller return type"); + + if (CS.isCall()) { + auto *DeoptCI = CS.getInstruction(); + auto *RI = dyn_cast(DeoptCI->getNextNode()); + Assert(RI, + "calls to experimental_deoptimize must be followed by a return"); + + if (!CS.getType()->isVoidTy() && RI) + Assert(RI->getReturnValue() == DeoptCI, + "calls to experimental_deoptimize must be followed by a return " + "of the value computed by experimental_deoptimize"); + } + + break; + } }; } Index: lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- lib/Transforms/Utils/InlineFunction.cpp +++ lib/Transforms/Utils/InlineFunction.cpp @@ -1613,7 +1613,7 @@ replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); } - bool InlinedMustTailCalls = false; + bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; if (InlinedFunctionInfo.ContainsCalls) { CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; if (CallInst *CI = dyn_cast(TheCall)) @@ -1626,6 +1626,10 @@ if (!CI) continue; + if (Function *F = CI->getCalledFunction()) + InlinedDeoptimizeCalls |= + F->getIntrinsicID() == Intrinsic::experimental_deoptimize; + // We need to reduce the strength of any inlined tail calls. For // musttail, we have to avoid introducing potential unbounded stack // growth. For example, if functions 'f' and 'g' are mutually recursive @@ -1799,6 +1803,45 @@ } } + if (InlinedDeoptimizeCalls) { + Function *NewDeoptIntrinsic = nullptr; + if (Caller->getReturnType() != TheCall->getType()) + NewDeoptIntrinsic = Intrinsic::getDeclaration( + Caller->getParent(), Intrinsic::experimental_deoptimize, + {Caller->getReturnType()}); + + SmallVector NormalReturns; + for (ReturnInst *RI : Returns) { + CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); + if (!DeoptCall) { + NormalReturns.push_back(RI); + continue; + } + if (!NewDeoptIntrinsic) + continue; + + auto *CurBB = RI->getParent(); + RI->eraseFromParent(); + + SmallVector OpBundles; + DeoptCall->getOperandBundlesAsDefs(OpBundles); + DeoptCall->eraseFromParent(); + assert(!OpBundles.empty() && + "Expected at least the deopt operand bundle"); + + IRBuilder<> Builder(CurBB); + Value *NewDeoptCall = + Builder.CreateCall(NewDeoptIntrinsic, {}, OpBundles); + if (NewDeoptCall->getType()->isVoidTy()) + Builder.CreateRetVoid(); + else + Builder.CreateRet(NewDeoptCall); + } + + // Leave behind the normal returns so we can merge control flow. + std::swap(Returns, NormalReturns); + } + // Handle any inlined musttail call sites. In order for a new call site to be // musttail, the source of the clone and the inlined call site must have been // musttail. Therefore it's safe to return without merging control into the Index: test/Transforms/Inline/deoptimize-intrinsic.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/deoptimize-intrinsic.ll @@ -0,0 +1,38 @@ +; RUN: opt -S -always-inline < %s | FileCheck %s + +declare i8 @llvm.experimental.deoptimize.i8() + +define i8 @callee(i1 %c) alwaysinline { + br i1 %c, label %left, label %right + +left: + %v = call i8 @llvm.experimental.deoptimize.i8() [ "deopt"(i32 1) ] + ret i8 %v + +right: + ret i8 0 +} + +define void @caller_0(i1 %c, i8* %ptr) { +; CHECK-LABEL: @caller_0( +entry: + %v = call i8 @callee(i1 %c) [ "deopt"(i32 2) ] +; CHECK: left.i: +; CHECK-NEXT: call void @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 2, i32 1) ] +; CHECK-NEXT: ret void + + store i8 %v, i8* %ptr + ret void +} + +define i32 @caller_1(i1 %c, i8* %ptr) { +; CHECK-LABEL: @caller_1( +entry: + %v = call i8 @callee(i1 %c) [ "deopt"(i32 3) ] +; CHECK: left.i: +; CHECK-NEXT: %0 = call i32 @llvm.experimental.deoptimize.i32() [ "deopt"(i32 3, i32 1) ] +; CHECK-NEXT: ret i32 %0 + + store i8 %v, i8* %ptr + ret i32 42 +} Index: test/Verifier/deoptimize-intrinsic.ll =================================================================== --- /dev/null +++ test/Verifier/deoptimize-intrinsic.ll @@ -0,0 +1,42 @@ +; RUN: not opt -verify < %s 2>&1 | FileCheck %s + +declare i8 @llvm.experimental.deoptimize.i8() +declare void @llvm.experimental.deoptimize.isVoid() + +declare void @unknown() + +define void @f_notail() { +entry: + call void @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK: calls to experimental_deoptimize must be followed by a return + call void @unknown() + ret void +} + +define void @f_nodeopt() { +entry: + call void @llvm.experimental.deoptimize.isVoid() +; CHECK: experimental_deoptimize must have exactly one "deopt" operand bundle + ret void +} + +define void @f_invoke() personality i8 3 { +entry: + invoke void @llvm.experimental.deoptimize.isVoid() to label %ok unwind label %not_ok +; CHECK: experimental_deoptimize cannot be invoked + +ok: + ret void + +not_ok: + %0 = landingpad { i8*, i32 } + filter [0 x i8*] zeroinitializer + ret void +} + +define i8 @f_incorrect_return() { +entry: + %val = call i8 @llvm.experimental.deoptimize.i8() [ "deopt"() ] +; CHECK: calls to experimental_deoptimize must be followed by a return of the value computed by experimental_deoptimize + ret i8 0 +}