Index: include/llvm/Transforms/Utils/Cloning.h =================================================================== --- include/llvm/Transforms/Utils/Cloning.h +++ include/llvm/Transforms/Utils/Cloning.h @@ -232,7 +232,8 @@ bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, AAResults *CalleeAAR = nullptr, bool InsertLifetime = true); bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI, - AAResults *CalleeAAR = nullptr, bool InsertLifetime = true); + AAResults *CalleeAAR = nullptr, bool InsertLifetime = true, + Function *ForwardVarArgsTo = nullptr); /// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p /// Blocks. Index: include/llvm/Transforms/Utils/CodeExtractor.h =================================================================== --- include/llvm/Transforms/Utils/CodeExtractor.h +++ include/llvm/Transforms/Utils/CodeExtractor.h @@ -70,7 +70,8 @@ /// extra checking and transformations are enabled. CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, - BranchProbabilityInfo *BPI = nullptr); + BranchProbabilityInfo *BPI = nullptr, + bool AllowVarargs = false); /// \brief Create a code extractor for a loop body. /// @@ -83,7 +84,8 @@ /// \brief Check to see if a block is valid for extraction. /// /// Blocks containing EHPads, allocas, invokes, or vastarts are not valid. - static bool isBlockValidForExtraction(const BasicBlock &BB); + static bool isBlockValidForExtraction(const BasicBlock &BB, + bool AllowVarargs); /// \brief Perform the extraction, returning the new function. /// Index: lib/Transforms/IPO/PartialInlining.cpp =================================================================== --- lib/Transforms/IPO/PartialInlining.cpp +++ lib/Transforms/IPO/PartialInlining.cpp @@ -813,9 +813,29 @@ // Extract the body of the if. OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, - ClonedFuncBFI.get(), &BPI) + ClonedFuncBFI.get(), &BPI, + /* AllowVarargs */ true) .extractCodeRegion(); + // For functions with varargs we must check that the varargs are forwarded + // to the outlined function. Unfortunately CodeExtractor does not provide + // a convenient way to access the non-extracted blocks, so we have to do + // the check after we created the outlined function. This means we have to + // cleanup if we find vastart. + if (ClonedFunc->isVarArg() && OutlinedFunc) + for (auto &BB : *ClonedFunc) + for (auto &I : BB) + if (const CallInst *CI = dyn_cast(&I)) + if (const Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::vastart) { + ClonedFunc->replaceAllUsesWith(OrigFunc); + ClonedFunc->eraseFromParent(); + ClonedFunc = nullptr; + OutlinedFunc->eraseFromParent(); + OutlinedFunc = nullptr; + return nullptr; + } + if (OutlinedFunc) { OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc) .getInstruction() @@ -829,8 +849,10 @@ PartialInlinerImpl::FunctionCloner::~FunctionCloner() { // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. - ClonedFunc->replaceAllUsesWith(OrigFunc); - ClonedFunc->eraseFromParent(); + if (ClonedFunc) { + ClonedFunc->replaceAllUsesWith(OrigFunc); + ClonedFunc->eraseFromParent(); + } if (!IsFunctionInlined) { // Remove the function that is speculatively created if there is no // reference. @@ -938,7 +960,7 @@ << ore::NV("Caller", CS.getCaller()); InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI); - if (!InlineFunction(CS, IFI)) + if (!InlineFunction(CS, IFI, nullptr, true, Cloner.OutlinedFunc)) continue; ORE.emit(OR); Index: lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- lib/Transforms/Utils/CodeExtractor.cpp +++ lib/Transforms/Utils/CodeExtractor.cpp @@ -78,7 +78,8 @@ cl::desc("Aggregate arguments to code-extracted functions")); /// \brief Test whether a block is valid for extraction. -bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) { +bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB, + bool AllowVarargs) { // Landing pads must be in the function where they were inserted for cleanup. if (BB.isEHPad()) return false; @@ -110,10 +111,13 @@ } } - // Don't hoist code containing allocas, invokes, or vastarts. + // Don't hoist code containing allocas or invokes. If explicitly requested, + // allow vastart. for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { if (isa(I) || isa(I)) return false; + if (AllowVarargs) + continue; if (const CallInst *CI = dyn_cast(I)) if (const Function *F = CI->getCalledFunction()) if (F->getIntrinsicID() == Intrinsic::vastart) @@ -125,7 +129,8 @@ /// \brief Build a set of blocks to extract if the input blocks are viable. static SetVector -buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT) { +buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, + bool AllowVarargs) { assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector Result; @@ -138,7 +143,7 @@ if (!Result.insert(BB)) llvm_unreachable("Repeated basic blocks in extraction input"); - if (!CodeExtractor::isBlockValidForExtraction(*BB)) { + if (!CodeExtractor::isBlockValidForExtraction(*BB, AllowVarargs)) { Result.clear(); return Result; } @@ -160,15 +165,15 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI) + BranchProbabilityInfo *BPI, bool AllowVarargs) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)) {} + BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT, AllowVarargs)) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)) {} + BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, false)) {} /// definedInRegion - Return true if the specified value is defined in the /// extracted region. @@ -594,7 +599,7 @@ paramTy.push_back(PointerType::getUnqual(StructTy)); } FunctionType *funcType = - FunctionType::get(RetTy, paramTy, false); + FunctionType::get(RetTy, paramTy, oldFunction->isVarArg()); // Create the new function Function *newFunction = Function::Create(funcType, Index: lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- lib/Transforms/Utils/InlineFunction.cpp +++ lib/Transforms/Utils/InlineFunction.cpp @@ -1490,7 +1490,8 @@ /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, - AAResults *CalleeAAR, bool InsertLifetime) { + AAResults *CalleeAAR, bool InsertLifetime, + Function *ForwardVarArgsTo) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getFunction() && "Instruction not in function!"); @@ -1500,8 +1501,9 @@ Function *CalledFunc = CS.getCalledFunction(); if (!CalledFunc || // Can't inline external function or indirect - CalledFunc->isDeclaration() || // call, or call to a vararg function! - CalledFunc->getFunctionType()->isVarArg()) return false; + CalledFunc->isDeclaration() || + (!ForwardVarArgsTo && CalledFunc->isVarArg())) // call, or call to a vararg function! + return false; // The inliner does not know how to inline through calls with operand bundles // in general ... @@ -1628,8 +1630,8 @@ auto &DL = Caller->getParent()->getDataLayout(); - assert(CalledFunc->arg_size() == CS.arg_size() && - "No varargs calls can be inlined!"); + assert((CalledFunc->arg_size() == CS.arg_size() || ForwardVarArgsTo) && + "Varargs calls can only be inlined if the Varargs are forwarded!"); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. @@ -1652,7 +1654,6 @@ VMap[&*I] = ActualArg; } - // Add alignment assumptions if necessary. We do this before the inlined // instructions are actually cloned into the caller so that we can easily // check what will be known at the start of the inlined code. @@ -1811,6 +1812,11 @@ replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); } + SmallVector VarArgsToForward; + for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); + i < CS.getNumArgOperands(); i++) + VarArgsToForward.push_back(CS.getArgOperand(i)); + bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; if (InlinedFunctionInfo.ContainsCalls) { CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; @@ -1819,7 +1825,8 @@ for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) { - for (Instruction &I : *BB) { + for (auto II = BB->begin(); II != BB->end();) { + Instruction &I = *II++; CallInst *CI = dyn_cast(&I); if (!CI) continue; @@ -1850,6 +1857,14 @@ // 'nounwind'. if (MarkNoUnwind) CI->setDoesNotThrow(); + + if (ForwardVarArgsTo && CI->getCalledFunction() == ForwardVarArgsTo) { + SmallVector Params(CI->arg_operands()); + Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); + CallInst *call = CallInst::Create(CI->getCalledFunction(), Params, "", CI); + CI->replaceAllUsesWith(call); + CI->eraseFromParent(); + } } } } Index: test/Transforms/CodeExtractor/PartialInlineVarArg.ll =================================================================== --- /dev/null +++ test/Transforms/CodeExtractor/PartialInlineVarArg.ll @@ -0,0 +1,49 @@ +; RUN: opt < %s -partial-inliner -S -skip-partial-inlining-cost-analysis | FileCheck %s +; RUN: opt < %s -passes=partial-inliner -S -skip-partial-inlining-cost-analysis | FileCheck %s + +@stat = external global i32, align 4 + +define i32 @vararg(i32 %count, ...) { +entry: + %vargs = alloca i8*, align 8 + %stat1 = load i32, i32* @stat, align 4 + %cmp = icmp slt i32 %stat1, 0 + br i1 %cmp, label %bb2, label %bb1 + +bb1: ; preds = %entry + %vg1 = add nsw i32 %stat1, 1 + store i32 %vg1, i32* @stat, align 4 + %vargs1 = bitcast i8** %vargs to i8* + call void @llvm.va_start(i8* %vargs1) + %va1 = va_arg i8** %vargs, i32 + call void @foo(i32 %count, i32 %va1) #2 + call void @llvm.va_end(i8* %vargs1) + br label %bb2 + +bb2: ; preds = %bb1, %entry + %res = phi i32 [ 1, %bb1 ], [ 0, %entry ] + ret i32 %res +} + +declare void @foo(i32, i32) +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) + +define i32 @caller1(i32 %arg) { +bb: + %tmp = tail call i32 (i32, ...) @vararg(i32 %arg) + ret i32 %tmp +} +; CHECK-LABEL: @caller1 +; CHECK: codeRepl.i: +; CHECK-NEXT: call void (i32, i8**, i32, ...) @vararg.1_bb1(i32 %stat1.i, i8** %vargs.i, i32 %arg) + +define i32 @caller2(i32 %arg, float %arg2) { +bb: + %tmp = tail call i32 (i32, ...) @vararg(i32 %arg, i32 10, float %arg2) + ret i32 %tmp +} + +; CHECK-LABEL: @caller2 +; CHECK: codeRepl.i: +; CHECK-NEXT: call void (i32, i8**, i32, ...) @vararg.1_bb1(i32 %stat1.i, i8** %vargs.i, i32 %arg, i32 10, float %arg2)