diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -89,16 +89,6 @@ STATISTIC(NumRetDuped, "Number of return duplicated"); STATISTIC(NumAccumAdded, "Number of accumulators introduced"); -/// Scan the specified function for alloca instructions. -/// If it contains any dynamic allocas, returns false. -static bool canTRE(Function &F) { - // Because of PR962, we don't TRE dynamic allocas. - return llvm::all_of(instructions(F), [](Instruction &I) { - auto *AI = dyn_cast(&I); - return !AI || AI->isStaticAlloca(); - }); -} - namespace { struct AllocaDerivedValueTracker { // Start at a root value and walk its use-def chain to mark calls that use the @@ -185,11 +175,9 @@ }; } -static bool markTails(Function &F, bool &AllCallsAreTailCalls, - OptimizationRemarkEmitter *ORE) { +static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) { if (F.callsFunctionThatReturnsTwice()) return false; - AllCallsAreTailCalls = true; // The local stack holds all alloca instructions and all byval arguments. AllocaDerivedValueTracker Tracker; @@ -272,11 +260,8 @@ } } - if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { + if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) DeferredTails.push_back(CI); - } else { - AllCallsAreTailCalls = false; - } } for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) { @@ -313,8 +298,6 @@ LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n"); CI->setTailCall(); Modified = true; - } else { - AllCallsAreTailCalls = false; } } @@ -326,6 +309,14 @@ /// instructions between the call and this instruction are movable. /// static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) { + if (isa(I)) + return true; + + if (const IntrinsicInst *II = dyn_cast(I)) + if (II->getIntrinsicID() == Intrinsic::lifetime_end || + II->getIntrinsicID() == Intrinsic::assume) + return true; + // FIXME: We can move load/store/call/free instructions above the call if the // call does not mod/ref the memory location being processed. if (I->mayHaveSideEffects()) // This also handles volatile loads. @@ -366,8 +357,15 @@ (I->getOperand(0) != CI && I->getOperand(1) != CI)) return false; + Instruction *AccumulatorInstr = I; + + if (AccumulatorInstr->hasOneUse() && + isa(AccumulatorInstr->user_back())) + AccumulatorInstr = AccumulatorInstr->user_back(); + // The only user of this instruction we allow is a single return instruction. - if (!I->hasOneUse() || !isa(I->user_back())) + if (!AccumulatorInstr->hasOneUse() || + !isa(AccumulatorInstr->user_back())) return false; return true; @@ -392,7 +390,6 @@ // createTailRecurseLoopHeader the first time we find a call we can eliminate. BasicBlock *HeaderBB = nullptr; SmallVector ArgumentPHIs; - bool RemovableCallsMustBeMarkedTail = false; // PHI node to store our return value. PHINode *RetPN = nullptr; @@ -419,8 +416,7 @@ DomTreeUpdater &DTU) : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {} - CallInst *findTRECandidate(Instruction *TI, - bool CannotTailCallElimCallsMarkedTail); + CallInst *findTRECandidate(Instruction *TI); void createTailRecurseLoopHeader(CallInst *CI); @@ -428,14 +424,14 @@ bool eliminateCall(CallInst *CI); - bool foldReturnAndProcessPred(ReturnInst *Ret, - bool CannotTailCallElimCallsMarkedTail); + bool foldReturnAndProcessPred(ReturnInst *Ret); - bool processReturningBlock(ReturnInst *Ret, - bool CannotTailCallElimCallsMarkedTail); + bool processReturningBlock(ReturnInst *Ret); void cleanupAndFinalize(); + bool canTRE(); + public: static bool eliminate(Function &F, const TargetTransformInfo *TTI, AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, @@ -443,8 +439,7 @@ }; } // namespace -CallInst *TailRecursionEliminator::findTRECandidate( - Instruction *TI, bool CannotTailCallElimCallsMarkedTail) { +CallInst *TailRecursionEliminator::findTRECandidate(Instruction *TI) { BasicBlock *BB = TI->getParent(); if (&BB->front() == TI) // Make sure there is something before the terminator. @@ -464,11 +459,6 @@ --BBI; } - // If this call is marked as a tail call, and if there are dynamic allocas in - // the function, we cannot perform this optimization. - if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) - return nullptr; - // As a special case, detect code like this: // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call // and disable this xform in this case, because the code generator will @@ -498,26 +488,13 @@ BranchInst *BI = BranchInst::Create(HeaderBB, NewEntry); BI->setDebugLoc(CI->getDebugLoc()); - // If this function has self recursive calls in the tail position where some - // are marked tail and some are not, only transform one flavor or another. - // We have to choose whether we move allocas in the entry block to the new - // entry block or not, so we can't make a good choice for both. We make this - // decision here based on whether the first call we found to remove is - // marked tail. - // NOTE: We could do slightly better here in the case that the function has - // no entry block allocas. - RemovableCallsMustBeMarkedTail = CI->isTailCall(); - - // If this tail call is marked 'tail' and if there are any allocas in the - // entry block, move them up to the new entry block. - if (RemovableCallsMustBeMarkedTail) - // Move all fixed sized allocas from HeaderBB to NewEntry. - for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(), - NEBI = NewEntry->begin(); - OEBI != E;) - if (AllocaInst *AI = dyn_cast(OEBI++)) - if (isa(AI->getArraySize())) - AI->moveBefore(&*NEBI); + // Move all fixed sized allocas from HeaderBB to NewEntry. + for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(), + NEBI = NewEntry->begin(); + OEBI != E;) + if (AllocaInst *AI = dyn_cast(OEBI++)) + if (isa(AI->getArraySize())) + AI->moveBefore(&*NEBI); // Now that we have created a new block, which jumps to the entry // block, insert a PHI node for each argument of the function. @@ -620,9 +597,6 @@ if (!HeaderBB) createTailRecurseLoopHeader(CI); - if (RemovableCallsMustBeMarkedTail && !CI->isTailCall()) - return false; - // Ok, now that we know we have a pseudo-entry block WITH all of the // required PHI nodes, add entries into the PHI node for the actual // parameters passed into the tail-recursive call. @@ -672,8 +646,7 @@ return true; } -bool TailRecursionEliminator::foldReturnAndProcessPred( - ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) { +bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) { BasicBlock *BB = Ret->getParent(); bool Change = false; @@ -698,8 +671,7 @@ while (!UncondBranchPreds.empty()) { BranchInst *BI = UncondBranchPreds.pop_back_val(); BasicBlock *Pred = BI->getParent(); - if (CallInst *CI = - findTRECandidate(BI, CannotTailCallElimCallsMarkedTail)) { + if (CallInst *CI = findTRECandidate(BI)) { LLVM_DEBUG(dbgs() << "FOLDING: " << *BB << "INTO UNCOND BRANCH PRED: " << *Pred); FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU); @@ -720,9 +692,8 @@ return Change; } -bool TailRecursionEliminator::processReturningBlock( - ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) { - CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail); +bool TailRecursionEliminator::processReturningBlock(ReturnInst *Ret) { + CallInst *CI = findTRECandidate(Ret); if (!CI) return false; @@ -801,6 +772,70 @@ } } +/// Returns true if this is a block with "return", "unreachable" +/// or "unconditional branch" to other block with "return" +/// instruction. +static bool isTailBlock(BasicBlock *BB) { + Instruction *LastBlockInstr = BB->getTerminator(); + + // Check that last instruction is a "return", either "unreachable", + // either branch to other block containing "return". + if (ReturnInst *Ret = dyn_cast(LastBlockInstr)) + return true; + + if (isa(LastBlockInstr)) + return true; + + if (BranchInst *Branch = dyn_cast(LastBlockInstr)) { + if (Branch->isUnconditional()) + if (isa(Branch->getSuccessor(0)->getFirstNonPHIOrDbg())) + return true; + } + + return false; +} + +/// Checks that specified call instruction is in position +/// suitable for tail recursive elimination. +static bool isInTREPosition(CallInst *Inst, AliasAnalysis *AA) { + if (!isTailBlock(Inst->getParent())) + return false; + + BasicBlock::iterator BBI(Inst->getParent()->getTerminator()); + for (--BBI; &*BBI != Inst; --BBI) { + if (!canMoveAboveCall(&*BBI, Inst, AA) && + !canTransformAccumulatorRecursion(&*BBI, Inst)) + return false; + } + + return true; +} + +bool TailRecursionEliminator::canTRE() { + for (auto &BB : F) { + for (Instruction &I : BB) { + if (AllocaInst *AI = dyn_cast(&I)) { + // Because of PR962, we don't TRE dynamic allocas. + if (!AI->isStaticAlloca()) + return false; + } else if (CallInst *CI = dyn_cast(&I)) { + if (CI->getCalledFunction() == &F) { + if (!isInTREPosition(CI, AA)) + continue; + + // Do not do TRE if CI explicitly marked as NoTailcall or has Operand + // Bundles or not marked as TailCall. + if (CI->isNoTailCall() || CI->hasOperandBundles() || + !CI->isTailCall()) + return false; + } + } + } + } + + return true; +} + bool TailRecursionEliminator::eliminate(Function &F, const TargetTransformInfo *TTI, AliasAnalysis *AA, @@ -810,23 +845,18 @@ return false; bool MadeChange = false; - bool AllCallsAreTailCalls = false; - MadeChange |= markTails(F, AllCallsAreTailCalls, ORE); - if (!AllCallsAreTailCalls) - return MadeChange; + MadeChange |= markTails(F, ORE); // If this function is a varargs function, we won't be able to PHI the args // right, so don't even try to convert it... if (F.getFunctionType()->isVarArg()) return MadeChange; - // If false, we cannot perform TRE on tail calls marked with the 'tail' - // attribute, because doing so would cause the stack size to increase (real - // TRE would deallocate variable sized allocas, TRE doesn't). - bool CanTRETailMarkedCall = canTRE(F); - TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU); + if (!TRE.canTRE()) + return MadeChange; + // Change any tail recursive calls to loops. // // FIXME: The code generator produces really bad code when an 'escaping @@ -836,9 +866,9 @@ for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) { BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB. if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { - bool Change = TRE.processReturningBlock(Ret, !CanTRETailMarkedCall); + bool Change = TRE.processReturningBlock(Ret); if (!Change && BB->getFirstNonPHIOrDbg() == Ret) - Change = TRE.foldReturnAndProcessPred(Ret, !CanTRETailMarkedCall); + Change = TRE.foldReturnAndProcessPred(Ret); MadeChange |= Change; } } diff --git a/llvm/test/Transforms/TailCallElim/basic.ll b/llvm/test/Transforms/TailCallElim/basic.ll --- a/llvm/test/Transforms/TailCallElim/basic.ll +++ b/llvm/test/Transforms/TailCallElim/basic.ll @@ -19,8 +19,8 @@ %A = alloca i32 ; [#uses=2] store i32 5, i32* %A call void @use(i32* %A) -; CHECK: tail call i32 @test1 - %X = tail call i32 @test1() ; [#uses=1] +; CHECK: call i32 @test1 + %X = call i32 @test1() ; [#uses=1] ret i32 %X } diff --git a/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll b/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll @@ -0,0 +1,69 @@ +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s + +; IR for that test was generated from the following C++ source: +; +;int count; +;__attribute__((noinline)) void globalIncrement(const int* param) { count += *param; } +; +;void test(int recurseCount) +;{ +; if (recurseCount == 0) return; +; int temp = 10; +; globalIncrement(&temp); +; test(recurseCount - 1); +;} +; + +@count = dso_local local_unnamed_addr global i32 0, align 4 + +; Function Attrs: nofree noinline norecurse nounwind uwtable +define dso_local void @_Z15globalIncrementPKi(i32* nocapture readonly %param) local_unnamed_addr #0 { +entry: + %0 = load i32, i32* %param, align 4 + %1 = load i32, i32* @count, align 4 + %add = add nsw i32 %1, %0 + store i32 %add, i32* @count, align 4 + ret void +} + +; Test that TRE could be done for recursive tail routine containing +; call to function receiving a pointer to local stack. + +; CHECK: void @_Z4testi +; CHECK: br label %tailrecurse +; CHECK: tailrecurse: +; CHECK-NOT: call void @_Z4testi +; CHECK: br label %tailrecurse +; CHECK-NOT: call void @_Z4testi +; CHECK: ret + +; Function Attrs: nounwind uwtable +define dso_local void @_Z4testi(i32 %recurseCount) local_unnamed_addr #1 { +entry: + %temp = alloca i32, align 4 + %cmp = icmp eq i32 %recurseCount, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %0 = bitcast i32* %temp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #6 + store i32 10, i32* %temp, align 4 + call void @_Z15globalIncrementPKi(i32* nonnull %temp) + %sub = add nsw i32 %recurseCount, -1 + call void @_Z4testi(i32 %sub) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #6 + br label %return + +return: ; preds = %entry, %if.end + ret void +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2 + +attributes #0 = { nofree noinline norecurse nounwind uwtable } +attributes #1 = { nounwind uwtable } +attributes #2 = { argmemonly nounwind willreturn }