Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -1534,6 +1534,8 @@ More specific types of operand bundles are described below. +.. _deopt_opbundles: + Deoptimization Operand Bundles ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -12102,6 +12104,81 @@ This intrinsic does nothing, and it's removed by optimizers and ignored by codegen. +'``llvm.experimental.deoptimize``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare type0 @llvm.experimental.deoptimize(type1) [ "deopt"(...) ] + +Overview: +""""""""" + +This intrinsic, together with :ref:`deoptimization operand bundles +`, allow frontends to express transfer of control and +frame-local state from the currently executing (typically more specialized, +hence faster) version of a function into another (typically more generic, hence +slower) version. + +In languages with a fully integrated managed runtime like Java and JavaScript +this intrinsic can be used to implement "uncommon trap" or "side exit" like +functionality. In unmanaged languages like C and C++, this intrinsic can be +used to represent the slow paths of specialized functions. + + +Arguments: +"""""""""" + +The intrinsic takes one argument of arbitrary type. See the +:ref:`Lowering` section for information on how it is used. + +Semantics: +"""""""""" + +The ``@llvm.experimental.deoptimize`` intrinsic executes an attached +deoptimization continuation (denoted using a :ref:`deoptimization +operand bundle `) and returns the value returned by +the deoptimization continuation. Defining the semantic properties of +the continuation itself is out of scope of the language reference -- +as far as LLVM is concerned, the deoptimization continuation can +invoke arbitrary side effects, including reading from and writing to +the entire heap. + +Deoptimization continuations expressed using ``"deopt"`` operand bundles always +continue execution to the end of the physical frame containing them, so all +calls to ``@llvm.experimental.deoptimize`` must be in "tail position": + + - ``@llvm.experimental.deoptimize`` cannot be invoked. + - The call must immediately precede a :ref:`ret ` instruction. + - The ``ret`` instruction must return the value produced by the + ``@llvm.experimental.deoptimize`` call if there is one, or void. + +Note that the above restrictions imply that the return type for a call to +``@llvm.experimental.deoptimize`` will match the return type of its immediate +caller. + +The inliner composes the ``"deopt"`` continuations of the caller into the +``"deopt"`` continuations present in the inlinee, and also updates calls to this +intrinsic to return directly from the frame of the function it inlined into. + +.. _deoptimize_lowering: + +Lowering: +""""""""" + +The generic specification of ``@llvm.experimental.deoptimize`` does not restrict +itself to a specific lowering, but currently all calls to +``@llvm.experimental.deoptimize`` are lowered to calls to the runtime symbol +``__llvm_deoptimize``. The call argument to the intrinsic is passed to +``__llvm_deoptimize`` via the calling convention on the +``@llvm.experimental.deoptimize`` call. It is the responsibility of the +frontend to ensure that the ``__llvm_deoptimize`` runtime function is able to +handle the incoming call arguments. + + Stack Map Intrinsics -------------------- Index: include/llvm/ADT/STLExtras.h =================================================================== --- include/llvm/ADT/STLExtras.h +++ include/llvm/ADT/STLExtras.h @@ -386,6 +386,13 @@ return std::find(Range.begin(), Range.end(), val); } +/// Provide wrappers to std::remove_if which take ranges instead of having to +/// pass begin/end explicitly. +template +auto remove_if(R &&Range, UnaryPredicate &&P) -> decltype(Range.begin()) { + return std::remove_if(Range.begin(), Range.end(), P); +} + //===----------------------------------------------------------------------===// // Extra additions to //===----------------------------------------------------------------------===// Index: include/llvm/IR/BasicBlock.h =================================================================== --- include/llvm/IR/BasicBlock.h +++ include/llvm/IR/BasicBlock.h @@ -111,6 +111,14 @@ TerminatorInst *getTerminator(); const TerminatorInst *getTerminator() const; + /// \brief Returns the call instruction calling @llvm.experimental.deoptimize + /// prior to the terminating return instruction of this basic block, if such a + /// call is present. Otherwise, returns null. + CallInst *getTerminatingDeoptimizeCall(); + const CallInst *getTerminatingDeoptimizeCall() const { + return const_cast(this)->getTerminatingDeoptimizeCall(); + } + /// \brief Returns the call instruction marked 'musttail' prior to the /// terminating return instruction of this basic block, if such a call is /// present. Otherwise, returns null. Index: include/llvm/IR/CallSite.h =================================================================== --- include/llvm/IR/CallSite.h +++ include/llvm/IR/CallSite.h @@ -453,6 +453,10 @@ CALLSITE_DELEGATE_GETTER(getOperandBundle(ID)); } + unsigned countOperandBundlesOfType(uint32_t ID) const { + CALLSITE_DELEGATE_GETTER(countOperandBundlesOfType(ID)); + } + IterTy arg_begin() const { CALLSITE_DELEGATE_GETTER(arg_begin()); } Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -593,6 +593,9 @@ def int_debugtrap : Intrinsic<[]>, GCCBuiltin<"__builtin_debugtrap">; +// Support for dynamic deoptimization (or de-specialization) +def int_experimental_deoptimize : Intrinsic<[llvm_any_ty], [llvm_any_ty], []>; + // NOP: calls/invokes to this intrinsic are removed by codegen def int_donothing : Intrinsic<[], [], [IntrNoMem]>; Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5443,6 +5443,8 @@ setValue(&I, N); return nullptr; } + case Intrinsic::experimental_deoptimize: + return "__llvm_deoptimize"; } } Index: lib/IR/BasicBlock.cpp =================================================================== --- lib/IR/BasicBlock.cpp +++ lib/IR/BasicBlock.cpp @@ -162,6 +162,21 @@ return nullptr; } +CallInst *BasicBlock::getTerminatingDeoptimizeCall() { + if (InstList.empty()) + return nullptr; + auto *RI = dyn_cast(&InstList.back()); + if (!RI || RI == &InstList.front()) + return nullptr; + + if (auto *CI = dyn_cast_or_null(RI->getPrevNode())) + if (Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) + return CI; + + return nullptr; +} + Instruction* BasicBlock::getFirstNonPHI() { for (Instruction &I : *this) if (!isa(I)) Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -4082,6 +4082,29 @@ "masked_store: vector mask must be same length as data", CS); break; } + + case Intrinsic::experimental_deoptimize: { + Assert(CS.isCall(), "experimental_deoptimize cannot be invoked", CS); + Assert(CS.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1, + "experimental_deoptimize must have exactly one " + "\"deopt\" operand bundle"); + Assert(CS.getType() == CS.getInstruction()->getFunction()->getReturnType(), + "experimental_deoptimize return type must match caller return type"); + + if (CS.isCall()) { + auto *DeoptCI = CS.getInstruction(); + auto *RI = dyn_cast(DeoptCI->getNextNode()); + Assert(RI, + "calls to experimental_deoptimize must be followed by a return"); + + if (!CS.getType()->isVoidTy() && RI) + Assert(RI->getReturnValue() == DeoptCI, + "calls to experimental_deoptimize must be followed by a return " + "of the value computed by experimental_deoptimize"); + } + + break; + } }; } Index: lib/Target/X86/X86FastISel.cpp =================================================================== --- lib/Target/X86/X86FastISel.cpp +++ lib/Target/X86/X86FastISel.cpp @@ -2713,6 +2713,8 @@ updateValueMap(II, ResultReg); return true; } + case Intrinsic::experimental_deoptimize: + return lowerCallTo(II, "__llvm_deoptimize", II->getNumArgOperands()); } } Index: lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- lib/Transforms/Utils/InlineFunction.cpp +++ lib/Transforms/Utils/InlineFunction.cpp @@ -1613,7 +1613,7 @@ replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); } - bool InlinedMustTailCalls = false; + bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; if (InlinedFunctionInfo.ContainsCalls) { CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; if (CallInst *CI = dyn_cast(TheCall)) @@ -1626,6 +1626,10 @@ if (!CI) continue; + if (Function *F = CI->getCalledFunction()) + InlinedDeoptimizeCalls |= + F->getIntrinsicID() == Intrinsic::experimental_deoptimize; + // We need to reduce the strength of any inlined tail calls. For // musttail, we have to avoid introducing potential unbounded stack // growth. For example, if functions 'f' and 'g' are mutually recursive @@ -1799,6 +1803,56 @@ } } + if (InlinedDeoptimizeCalls) { + // We need to at least remove the deoptimizing returns from the Return set, + // so that the control flow from those returns does not get merged into the + // caller (but terminate it instead). If the caller's return type does not + // match the callee's return type, we also need to change the return type of + // the intrinsic. + if (Caller->getReturnType() == TheCall->getType()) { + auto NewEnd = remove_if(Returns, [](ReturnInst *RI) { + return RI->getParent()->getTerminatingDeoptimizeCall(); + }); + Returns.erase(NewEnd, Returns.end()); + } else { + SmallVector NormalReturns; + for (ReturnInst *RI : Returns) { + CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); + if (!DeoptCall) { + NormalReturns.push_back(RI); + continue; + } + + auto *CurBB = RI->getParent(); + RI->eraseFromParent(); + + Value *CallArg = *DeoptCall->arg_begin(); + assert((DeoptCall->arg_end() - DeoptCall->arg_begin()) == 1 && "Only one call argument allowed"); + + Function *NewDeoptIntrinsic = Intrinsic::getDeclaration( + Caller->getParent(), Intrinsic::experimental_deoptimize, + {Caller->getReturnType(), CallArg->getType()}); + + SmallVector OpBundles; + DeoptCall->getOperandBundlesAsDefs(OpBundles); + DeoptCall->eraseFromParent(); + assert(!OpBundles.empty() && + "Expected at least the deopt operand bundle"); + + IRBuilder<> Builder(CurBB); + Value *NewDeoptCall = + Builder.CreateCall(NewDeoptIntrinsic, {CallArg}, OpBundles); + if (NewDeoptCall->getType()->isVoidTy()) + Builder.CreateRetVoid(); + else + Builder.CreateRet(NewDeoptCall); + } + + // Leave behind the normal returns so we can merge control flow. + std::swap(Returns, NormalReturns); + } + } + // Handle any inlined musttail call sites. In order for a new call site to be // musttail, the source of the clone and the inlined call site must have been // musttail. Therefore it's safe to return without merging control into the Index: test/CodeGen/X86/deoptimize-intrinsic.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/deoptimize-intrinsic.ll @@ -0,0 +1,40 @@ +; RUN: llc -O3 < %s | FileCheck %s +; RUN: llc -O0 < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +declare i8 @llvm.experimental.deoptimize.i8.i32(i32) +declare i8 @llvm.experimental.deoptimize.i8.f64(double) + +define i8 @caller_int(i1 %c) { +; CHECK-LABEL: _caller_int: + + br i1 %c, label %left, label %right + +left: +; CHECK: movl $1, %edi +; CHECK: callq ___llvm_deoptimize + + %v = call i8 @llvm.experimental.deoptimize.i8.i32(i32 1) [ "deopt"(i32 1) ] + ret i8 %v + +right: + ret i8 0 +} + +define i8 @caller_double(i1 %c) { +; CHECK-LABEL: _caller_double: + + br i1 %c, label %left, label %right + +left: +; CHECK: movsd {{[a-zA-Z0-9_]+}}(%rip), %xmm0 ## xmm0 = mem[0],zero +; CHECK: callq ___llvm_deoptimize + + %v = call i8 @llvm.experimental.deoptimize.i8.f64(double 1.0) [ "deopt"(i32 1) ] + ret i8 %v + +right: + ret i8 0 +} Index: test/Transforms/Inline/deoptimize-intrinsic.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/deoptimize-intrinsic.ll @@ -0,0 +1,38 @@ +; RUN: opt -S -always-inline < %s | FileCheck %s + +declare i8 @llvm.experimental.deoptimize.i8.i32(i32) + +define i8 @callee(i1 %c) alwaysinline { + br i1 %c, label %left, label %right + +left: + %v = call i8 @llvm.experimental.deoptimize.i8.i32(i32 1) [ "deopt"(i32 1) ] + ret i8 %v + +right: + ret i8 0 +} + +define void @caller_0(i1 %c, i8* %ptr) { +; CHECK-LABEL: @caller_0( +entry: + %v = call i8 @callee(i1 %c) [ "deopt"(i32 2) ] +; CHECK: left.i: +; CHECK-NEXT: call void @llvm.experimental.deoptimize.isVoid.i32(i32 1) [ "deopt"(i32 2, i32 1) ] +; CHECK-NEXT: ret void + + store i8 %v, i8* %ptr + ret void +} + +define i32 @caller_1(i1 %c, i8* %ptr) { +; CHECK-LABEL: @caller_1( +entry: + %v = call i8 @callee(i1 %c) [ "deopt"(i32 3) ] +; CHECK: left.i: +; CHECK-NEXT: %0 = call i32 @llvm.experimental.deoptimize.i32.i32(i32 1) [ "deopt"(i32 3, i32 1) ] +; CHECK-NEXT: ret i32 %0 + + store i8 %v, i8* %ptr + ret i32 42 +} Index: test/Verifier/deoptimize-intrinsic.ll =================================================================== --- /dev/null +++ test/Verifier/deoptimize-intrinsic.ll @@ -0,0 +1,42 @@ +; RUN: not opt -verify < %s 2>&1 | FileCheck %s + +declare i8 @llvm.experimental.deoptimize.i8.i32(i32) +declare void @llvm.experimental.deoptimize.isVoid.i32(i32) + +declare void @unknown() + +define void @f_notail() { +entry: + call void @llvm.experimental.deoptimize.isVoid.i32(i32 0) [ "deopt"() ] +; CHECK: calls to experimental_deoptimize must be followed by a return + call void @unknown() + ret void +} + +define void @f_nodeopt() { +entry: + call void @llvm.experimental.deoptimize.isVoid.i32(i32 0) +; CHECK: experimental_deoptimize must have exactly one "deopt" operand bundle + ret void +} + +define void @f_invoke() personality i8 3 { +entry: + invoke void @llvm.experimental.deoptimize.isVoid.i32(i32 0) to label %ok unwind label %not_ok +; CHECK: experimental_deoptimize cannot be invoked + +ok: + ret void + +not_ok: + %0 = landingpad { i8*, i32 } + filter [0 x i8*] zeroinitializer + ret void +} + +define i8 @f_incorrect_return() { +entry: + %val = call i8 @llvm.experimental.deoptimize.i8.i32(i32 0) [ "deopt"() ] +; CHECK: calls to experimental_deoptimize must be followed by a return of the value computed by experimental_deoptimize + ret i8 0 +}