Index: llvm/trunk/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/trunk/lib/Analysis/InlineCost.cpp +++ llvm/trunk/lib/Analysis/InlineCost.cpp @@ -136,6 +136,7 @@ bool HasReturn; bool HasIndirectBr; bool HasFrameEscape; + bool UsesVarArgs; /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize; @@ -280,7 +281,7 @@ IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), - HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), + HasFrameEscape(false), UsesVarArgs(false), AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0), NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), @@ -1233,6 +1234,10 @@ case Intrinsic::localescape: HasFrameEscape = true; return false; + case Intrinsic::vastart: + case Intrinsic::vaend: + UsesVarArgs = true; + return false; } } @@ -1567,7 +1572,7 @@ using namespace ore; // If the visit this instruction detected an uninlinable pattern, abort. if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || - HasIndirectBr || HasFrameEscape) { + HasIndirectBr || HasFrameEscape || UsesVarArgs) { if (ORE) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Index: llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp +++ llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp @@ -1500,10 +1500,9 @@ IFI.reset(); Function *CalledFunc = CS.getCalledFunction(); - if (!CalledFunc || // Can't inline external function or indirect - CalledFunc->isDeclaration() || - (!ForwardVarArgsTo && CalledFunc->isVarArg())) // call, or call to a vararg function! - return false; + if (!CalledFunc || // Can't inline external function or indirect + CalledFunc->isDeclaration()) // call! + return false; // The inliner does not know how to inline through calls with operand bundles // in general ... @@ -1630,9 +1629,6 @@ auto &DL = Caller->getParent()->getDataLayout(); - assert((CalledFunc->arg_size() == CS.arg_size() || ForwardVarArgsTo) && - "Varargs calls can only be inlined if the Varargs are forwarded!"); - // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); @@ -1833,6 +1829,23 @@ if (!CI) continue; + // Forward varargs from inlined call site to calls to the + // ForwardVarArgsTo function, if requested, and to musttail calls. + if (!VarArgsToForward.empty() && + ((ForwardVarArgsTo && + CI->getCalledFunction() == ForwardVarArgsTo) || + CI->isMustTailCall())) { + SmallVector<Value *, 6> Params(CI->arg_operands()); + Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); + CallInst *Call = + CallInst::Create(CI->getCalledFunction() ? CI->getCalledFunction() + : CI->getCalledValue(), + Params, "", CI); + Call->setDebugLoc(CI->getDebugLoc()); + CI->replaceAllUsesWith(Call); + CI->eraseFromParent(); + } + if (Function *F = CI->getCalledFunction()) InlinedDeoptimizeCalls |= F->getIntrinsicID() == Intrinsic::experimental_deoptimize; @@ -1860,16 +1873,6 @@ // 'nounwind'. if (MarkNoUnwind) CI->setDoesNotThrow(); - - if (ForwardVarArgsTo && !VarArgsToForward.empty() && - CI->getCalledFunction() == ForwardVarArgsTo) { - SmallVector<Value*, 6> Params(CI->arg_operands()); - Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); - CallInst *Call = CallInst::Create(CI->getCalledFunction(), Params, "", CI); - Call->setDebugLoc(CI->getDebugLoc()); - CI->replaceAllUsesWith(Call); - CI->eraseFromParent(); - } } } } Index: llvm/trunk/test/Transforms/Inline/inline-musttail-varargs.ll =================================================================== --- llvm/trunk/test/Transforms/Inline/inline-musttail-varargs.ll +++ llvm/trunk/test/Transforms/Inline/inline-musttail-varargs.ll @@ -1,23 +0,0 @@ -; RUN: opt < %s -inline -instcombine -S | FileCheck %s -; RUN: opt < %s -passes='cgscc(inline,function(instcombine))' -S | FileCheck %s - -; We can't inline this thunk yet, but one day we will be able to. And when we -; do, this test case will be ready. - -declare void @ext_method(i8*, i32) - -define linkonce_odr void @thunk(i8* %this, ...) { - %this_adj = getelementptr i8, i8* %this, i32 4 - musttail call void (i8*, ...) bitcast (void (i8*, i32)* @ext_method to void (i8*, ...)*)(i8* %this_adj, ...) - ret void -} - -define void @thunk_caller(i8* %p) { - call void (i8*, ...) @thunk(i8* %p, i32 42) - ret void -} -; CHECK-LABEL: define void @thunk_caller(i8* %p) -; CHECK: call void (i8*, ...) @thunk(i8* %p, i32 42) - -; FIXME: Inline the thunk. This should be significantly easier than inlining -; general varargs functions. Index: llvm/trunk/test/Transforms/Inline/inline-varargs.ll =================================================================== --- llvm/trunk/test/Transforms/Inline/inline-varargs.ll +++ llvm/trunk/test/Transforms/Inline/inline-varargs.ll @@ -0,0 +1,52 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline,function(instcombine))' -S | FileCheck %s + +declare void @ext_method(i8*, i32) +declare void @vararg_fn(i8*, ...) + +define linkonce_odr void @thunk(i8* %this, ...) { + %this_adj = getelementptr i8, i8* %this, i32 4 + musttail call void (i8*, ...) bitcast (void (i8*, i32)* @ext_method to void (i8*, ...)*)(i8* %this_adj, ...) + ret void +} + +define void @thunk_caller(i8* %p) { + call void (i8*, ...) @thunk(i8* %p, i32 42) + ret void +} +; CHECK-LABEL: define void @thunk_caller(i8* %p) +; CHECK: call void (i8*, ...) bitcast (void (i8*, i32)* @ext_method to void (i8*, ...)*)(i8* %this_adj.i, i32 42) + +define void @test_callee_2(i8* %this, ...) { + %this_adj = getelementptr i8, i8* %this, i32 4 + musttail call void (i8*, ...) @vararg_fn(i8* %this_adj, ...) + ret void +} + +define void @test_caller_2(i8* %p) { + call void (i8*, ...) @test_callee_2(i8* %p) + ret void +} +; CHECK-LABEL: define void @test_caller_2(i8* %p) +; CHECK: call void (i8*, ...) @vararg_fn(i8* %this_adj.i) + + +define internal i32 @varg_accessed(...) { +entry: + %vargs = alloca i8*, align 8 + %vargs.ptr = bitcast i8** %vargs to i8* + call void @llvm.va_start(i8* %vargs.ptr) + %va1 = va_arg i8** %vargs, i32 + call void @llvm.va_end(i8* %vargs.ptr) + ret i32 %va1 +} + +define i32 @call_vargs() { + %res = call i32 (...) @varg_accessed(i32 10) + ret i32 %res +} +; CHECK-LABEL: @call_vargs +; CHECK: %res = call i32 (...) @varg_accessed(i32 10) + +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*)