Index: include/llvm/Transforms/Utils/Cloning.h =================================================================== --- include/llvm/Transforms/Utils/Cloning.h +++ include/llvm/Transforms/Utils/Cloning.h @@ -227,12 +227,18 @@ /// *inlined* code to minimize the actual inserted code, it must not delete /// code in the caller as users of this routine may have pointers to /// instructions in the caller that need to remain stable. +/// +/// If ForwardVarArgsTo is passed, inlining a function with varargs is allowed +/// and all varargs at the callsite will be passed to any calls to +/// ForwardVarArgsTo. The caller of InlineFunction has to make sure any varargs +/// are only used by ForwardVarArgsTo. bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI, AAResults *CalleeAAR = nullptr, bool InsertLifetime = true); bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, AAResults *CalleeAAR = nullptr, bool InsertLifetime = true); bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI, - AAResults *CalleeAAR = nullptr, bool InsertLifetime = true); + AAResults *CalleeAAR = nullptr, bool InsertLifetime = true, + Function *ForwardVarArgsTo = nullptr); /// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p /// Blocks. Index: include/llvm/Transforms/Utils/CodeExtractor.h =================================================================== --- include/llvm/Transforms/Utils/CodeExtractor.h +++ include/llvm/Transforms/Utils/CodeExtractor.h @@ -56,6 +56,9 @@ BlockFrequencyInfo *BFI; BranchProbabilityInfo *BPI; + // If true, varargs functions can be extracted. + bool AllowVarArgs; + // Bits of intermediate state computed at various phases of extraction. SetVector Blocks; unsigned NumExitBlocks = std::numeric_limits::max(); @@ -67,10 +70,13 @@ /// Given a sequence of basic blocks where the first block in the sequence /// dominates the rest, prepare a code extractor object for pulling this /// sequence out into its new function. When a DominatorTree is also given, - /// extra checking and transformations are enabled. + /// extra checking and transformations are enabled. If AllowVarArgs is true, + /// vararg functions can be extracted. This is safe, if all vararg handling + /// code is extracted, including vastart. CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, - BranchProbabilityInfo *BPI = nullptr); + BranchProbabilityInfo *BPI = nullptr, + bool AllowVarArgs = false); /// \brief Create a code extractor for a loop body. /// @@ -83,7 +89,8 @@ /// \brief Check to see if a block is valid for extraction. /// /// Blocks containing EHPads, allocas, invokes, or vastarts are not valid. - static bool isBlockValidForExtraction(const BasicBlock &BB); + static bool isBlockValidForExtraction(const BasicBlock &BB, + bool AllowVarArgs); /// \brief Perform the extraction, returning the new function. /// Index: lib/Transforms/IPO/PartialInlining.cpp =================================================================== --- lib/Transforms/IPO/PartialInlining.cpp +++ lib/Transforms/IPO/PartialInlining.cpp @@ -149,7 +149,12 @@ // the return block. void NormalizeReturnBlock(); - // Do function outlining: + // Do function outlining. + // NOTE: For vararg functions that do the vararg handling in the outlined + // function, we temporarily generate IR that does not properly + // forward varargs to the outlined function. Calling InlineFunction + // will update calls to the outlined functions to properly forward + // the varargs. Function *doFunctionOutlining(); Function *OrigFunc = nullptr; @@ -813,9 +818,29 @@ // Extract the body of the if. OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, - ClonedFuncBFI.get(), &BPI) + ClonedFuncBFI.get(), &BPI, + /* AllowVarargs */ true) .extractCodeRegion(); + // For functions with varargs we must check that the varargs are forwarded + // to the outlined function. Unfortunately CodeExtractor does not provide + // a convenient way to access the non-extracted blocks, so we have to do + // the check after we created the outlined function. This means we have to + // cleanup if we find vastart. + if (ClonedFunc->isVarArg() && OutlinedFunc) + for (auto &BB : *ClonedFunc) + for (auto &I : BB) + if (const CallInst *CI = dyn_cast(&I)) + if (const Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::vastart) { + ClonedFunc->replaceAllUsesWith(OrigFunc); + ClonedFunc->eraseFromParent(); + ClonedFunc = nullptr; + OutlinedFunc->eraseFromParent(); + OutlinedFunc = nullptr; + return nullptr; + } + if (OutlinedFunc) { OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc) .getInstruction() @@ -829,8 +854,10 @@ PartialInlinerImpl::FunctionCloner::~FunctionCloner() { // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. - ClonedFunc->replaceAllUsesWith(OrigFunc); - ClonedFunc->eraseFromParent(); + if (ClonedFunc) { + ClonedFunc->replaceAllUsesWith(OrigFunc); + ClonedFunc->eraseFromParent(); + } if (!IsFunctionInlined) { // Remove the function that is speculatively created if there is no // reference. @@ -938,7 +965,7 @@ << ore::NV("Caller", CS.getCaller()); InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI); - if (!InlineFunction(CS, IFI)) + if (!InlineFunction(CS, IFI, nullptr, true, Cloner.OutlinedFunc)) continue; ORE.emit(OR); Index: lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- lib/Transforms/Utils/CodeExtractor.cpp +++ lib/Transforms/Utils/CodeExtractor.cpp @@ -78,7 +78,8 @@ cl::desc("Aggregate arguments to code-extracted functions")); /// \brief Test whether a block is valid for extraction. -bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) { +bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB, + bool AllowVarArgs) { // Landing pads must be in the function where they were inserted for cleanup. if (BB.isEHPad()) return false; @@ -110,10 +111,13 @@ } } - // Don't hoist code containing allocas, invokes, or vastarts. + // Don't hoist code containing allocas or invokes. If explicitly requested, + // allow vastart. for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { if (isa(I) || isa(I)) return false; + if (AllowVarArgs) + continue; if (const CallInst *CI = dyn_cast(I)) if (const Function *F = CI->getCalledFunction()) if (F->getIntrinsicID() == Intrinsic::vastart) @@ -125,7 +129,8 @@ /// \brief Build a set of blocks to extract if the input blocks are viable. static SetVector -buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT) { +buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, + bool AllowVarArgs) { assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector Result; @@ -138,7 +143,7 @@ if (!Result.insert(BB)) llvm_unreachable("Repeated basic blocks in extraction input"); - if (!CodeExtractor::isBlockValidForExtraction(*BB)) { + if (!CodeExtractor::isBlockValidForExtraction(*BB, AllowVarArgs)) { Result.clear(); return Result; } @@ -160,15 +165,16 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI) + BranchProbabilityInfo *BPI, bool AllowVarArgs) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)) {} + BPI(BPI), AllowVarArgs(AllowVarArgs), + Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs)) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)) {} + BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, false)) {} /// definedInRegion - Return true if the specified value is defined in the /// extracted region. @@ -594,7 +600,8 @@ paramTy.push_back(PointerType::getUnqual(StructTy)); } FunctionType *funcType = - FunctionType::get(RetTy, paramTy, false); + FunctionType::get(RetTy, paramTy, + AllowVarArgs && oldFunction->isVarArg()); // Create the new function Function *newFunction = Function::Create(funcType, Index: lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- lib/Transforms/Utils/InlineFunction.cpp +++ lib/Transforms/Utils/InlineFunction.cpp @@ -1490,7 +1490,8 @@ /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, - AAResults *CalleeAAR, bool InsertLifetime) { + AAResults *CalleeAAR, bool InsertLifetime, + Function *ForwardVarArgsTo) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getFunction() && "Instruction not in function!"); @@ -1500,8 +1501,9 @@ Function *CalledFunc = CS.getCalledFunction(); if (!CalledFunc || // Can't inline external function or indirect - CalledFunc->isDeclaration() || // call, or call to a vararg function! - CalledFunc->getFunctionType()->isVarArg()) return false; + CalledFunc->isDeclaration() || + (!ForwardVarArgsTo && CalledFunc->isVarArg())) // call, or call to a vararg function! + return false; // The inliner does not know how to inline through calls with operand bundles // in general ... @@ -1628,8 +1630,8 @@ auto &DL = Caller->getParent()->getDataLayout(); - assert(CalledFunc->arg_size() == CS.arg_size() && - "No varargs calls can be inlined!"); + assert((CalledFunc->arg_size() == CS.arg_size() || ForwardVarArgsTo) && + "Varargs calls can only be inlined if the Varargs are forwarded!"); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. @@ -1811,6 +1813,11 @@ replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); } + SmallVector VarArgsToForward; + for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); + i < CS.getNumArgOperands(); i++) + VarArgsToForward.push_back(CS.getArgOperand(i)); + bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; if (InlinedFunctionInfo.ContainsCalls) { CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; @@ -1819,7 +1826,8 @@ for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) { - for (Instruction &I : *BB) { + for (auto II = BB->begin(); II != BB->end();) { + Instruction &I = *II++; CallInst *CI = dyn_cast(&I); if (!CI) continue; @@ -1850,6 +1858,14 @@ // 'nounwind'. if (MarkNoUnwind) CI->setDoesNotThrow(); + + if (ForwardVarArgsTo && CI->getCalledFunction() == ForwardVarArgsTo) { + SmallVector Params(CI->arg_operands()); + Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); + CallInst *Call = CallInst::Create(CI->getCalledFunction(), Params, "", CI); + CI->replaceAllUsesWith(Call); + CI->eraseFromParent(); + } } } } Index: test/Transforms/CodeExtractor/PartialInlineVarArg.ll =================================================================== --- /dev/null +++ test/Transforms/CodeExtractor/PartialInlineVarArg.ll @@ -0,0 +1,49 @@ +; RUN: opt < %s -partial-inliner -S -skip-partial-inlining-cost-analysis | FileCheck %s +; RUN: opt < %s -passes=partial-inliner -S -skip-partial-inlining-cost-analysis | FileCheck %s + +@stat = external global i32, align 4 + +define i32 @vararg(i32 %count, ...) { +entry: + %vargs = alloca i8*, align 8 + %stat1 = load i32, i32* @stat, align 4 + %cmp = icmp slt i32 %stat1, 0 + br i1 %cmp, label %bb2, label %bb1 + +bb1: ; preds = %entry + %vg1 = add nsw i32 %stat1, 1 + store i32 %vg1, i32* @stat, align 4 + %vargs1 = bitcast i8** %vargs to i8* + call void @llvm.va_start(i8* %vargs1) + %va1 = va_arg i8** %vargs, i32 + call void @foo(i32 %count, i32 %va1) #2 + call void @llvm.va_end(i8* %vargs1) + br label %bb2 + +bb2: ; preds = %bb1, %entry + %res = phi i32 [ 1, %bb1 ], [ 0, %entry ] + ret i32 %res +} + +declare void @foo(i32, i32) +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) + +define i32 @caller1(i32 %arg) { +bb: + %tmp = tail call i32 (i32, ...) @vararg(i32 %arg) + ret i32 %tmp +} +; CHECK-LABEL: @caller1 +; CHECK: codeRepl.i: +; CHECK-NEXT: call void (i32, i8**, i32, ...) @vararg.1_bb1(i32 %stat1.i, i8** %vargs.i, i32 %arg) + +define i32 @caller2(i32 %arg, float %arg2) { +bb: + %tmp = tail call i32 (i32, ...) @vararg(i32 %arg, i32 10, float %arg2) + ret i32 %tmp +} + +; CHECK-LABEL: @caller2 +; CHECK: codeRepl.i: +; CHECK-NEXT: call void (i32, i8**, i32, ...) @vararg.1_bb1(i32 %stat1.i, i8** %vargs.i, i32 %arg, i32 10, float %arg2)