Index: llvm/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/lib/Analysis/InlineCost.cpp +++ llvm/lib/Analysis/InlineCost.cpp @@ -136,6 +136,7 @@ bool HasReturn; bool HasIndirectBr; bool HasFrameEscape; + bool HasMustTailIntrinsic; bool UsesVarArgs; /// Number of bytes allocated statically by the callee. @@ -281,12 +282,13 @@ IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), - HasFrameEscape(false), UsesVarArgs(false), AllocatedSize(0), NumInstructions(0), - NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0), - EnableLoadElimination(true), LoadEliminationCost(0), NumConstantArgs(0), - NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), - NumConstantPtrDiffs(0), NumInstructionsSimplified(0), - SROACostSavings(0), SROACostSavingsLost(0) {} + HasFrameEscape(false), HasMustTailIntrinsic(false), UsesVarArgs(false), + AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), + VectorBonus(0), SingleBBBonus(0), EnableLoadElimination(true), + LoadEliminationCost(0), NumConstantArgs(0), NumConstantOffsetPtrArgs(0), + NumAllocaArgs(0), NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), + SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -1203,17 +1205,27 @@ ExposesReturnsTwice = true; return false; } - if (CS.isCall() && cast(CS.getInstruction())->cannotDuplicate()) + CallInst *CI = dyn_cast(CS.getInstruction()); + if (CI && CI->cannotDuplicate()) ContainsNoDuplicateCall = true; if (Function *F = CS.getCalledFunction()) { + IntrinsicInst *II = dyn_cast(CS.getInstruction()); + if (II && CI && CI->isMustTailCall()) { + // Never inline functions with musttail intrinsics. Currently the only + // possible such intrinsic is llvm.icall.branch.funnel and it does not + // work with inlining. + HasMustTailIntrinsic = true; + return false; + } + // When we have a concrete function, first try to simplify it directly. if (simplifyCallSite(F, CS)) return true; // Next check if it is an intrinsic we know about. // FIXME: Lift this into part of the InstVisitor. - if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { + if (II) { switch (II->getIntrinsicID()) { default: if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) @@ -1572,7 +1584,8 @@ using namespace ore; // If the visit this instruction detected an uninlinable pattern, abort. if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || - HasIndirectBr || HasFrameEscape || UsesVarArgs) { + HasIndirectBr || HasFrameEscape || HasMustTailIntrinsic || + UsesVarArgs) { if (ORE) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Index: llvm/test/Transforms/Inline/inline-with-intrinsic.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/inline-with-intrinsic.ll @@ -0,0 +1,34 @@ +; Test that inliner skips function with musttail intrinsics. +; RUN: opt < %s -inline -S | FileCheck %s + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +declare void @llvm.icall.branch.funnel(...) + +; CHECK-LABEL: define void @fn( +define void @fn() { + call void (...) @bf() + ; CHECK: call void (...) @llvm.icall.branch.funnel + ret void +} + +define internal void @bf(...) { + call void (...) @llvm.icall.branch.funnel() + ret void +} +; CHECK-NOT: define internal void @bf( + +; CHECK-LABEL: define void @fn_musttail( +define void @fn_musttail() { + call void (...) @bf_musttail() + ; CHECK: call void (...) @bf_musttail( + ret void +} + +; CHECK-LABEL: define internal void @bf_musttail( +define internal void @bf_musttail(...) { + musttail call void (...) @llvm.icall.branch.funnel(...) + ; CHECK: musttail call void (...) @llvm.icall.branch.funnel( + ret void +} Index: llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll =================================================================== --- llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll +++ llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll @@ -1,6 +1,10 @@ ; RUN: opt -S -wholeprogramdevirt %s | FileCheck --check-prefixes=CHECK,RETP %s ; RUN: sed -e 's,+retpoline,-retpoline,g' %s | opt -S -wholeprogramdevirt | FileCheck --check-prefixes=CHECK,NORETP %s + ; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,RETP %s + +; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -O3 -S -o - %s | FileCheck --check-prefixes=CHECK %s + ; RUN: FileCheck --check-prefix=SUMMARY %s < %t ; SUMMARY: TypeIdMap: @@ -89,7 +93,10 @@ declare i32 @vf4_1(i8* %this, i32 %arg) declare i32 @vf4_2(i8* %this, i32 %arg) -; CHECK: define i32 @fn1 + + +; CHECK-LABEL: define i32 @fn1 +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel define i32 @fn1(i8* %obj) #0 { %vtableptr = bitcast i8* %obj to [1 x i8*]** %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr @@ -107,7 +114,8 @@ ret i32 %result } -; CHECK: define i32 @fn2 +; CHECK-LABEL: define i32 @fn2 +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel define i32 @fn2(i8* %obj) #0 { %vtableptr = bitcast i8* %obj to [1 x i8*]** %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr @@ -122,7 +130,8 @@ ret i32 %result } -; CHECK: define i32 @fn3 +; CHECK-LABEL: define i32 @fn3 +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel define i32 @fn3(i8* %obj) #0 { %vtableptr = bitcast i8* %obj to [1 x i8*]** %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr @@ -138,10 +147,9 @@ ret i32 %result } -; CHECK: define internal void @branch_funnel(i8* nest, ...) - +; CHECK-LABEL: define internal void @branch_funnel(i8* ; CHECK: define hidden void @__typeid_typeid1_0_branch_funnel(i8* nest, ...) -; CHECK-NEXT: call void (...) @llvm.icall.branch.funnel(i8* %0, i8* bitcast ([1 x i8*]* @vt1_1 to i8*), i32 (i8*, i32)* @vf1_1, i8* bitcast ([1 x i8*]* @vt1_2 to i8*), i32 (i8*, i32)* @vf1_2, ...) +; CHECK-NEXT: musttail call void (...) @llvm.icall.branch.funnel(i8* %0, i8* bitcast ([1 x i8*]* {{(nonnull )?}}@vt1_1 to i8*), i32 (i8*, i32)* {{(nonnull )?}}@vf1_1, i8* bitcast ([1 x i8*]* {{(nonnull )?}}@vt1_2 to i8*), i32 (i8*, i32)* {{(nonnull )?}}@vf1_2, ...) declare i1 @llvm.type.test(i8*, metadata) declare void @llvm.assume(i1)