Index: llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -445,142 +445,100 @@ return &*I; } -static CallInst *findTRECandidate(Instruction *TI, - bool CannotTailCallElimCallsMarkedTail, - const TargetTransformInfo *TTI) { - BasicBlock *BB = TI->getParent(); - Function *F = BB->getParent(); - - if (&BB->front() == TI) // Make sure there is something before the terminator. - return nullptr; +namespace { +class TailRecursionEliminator { + Function &F; + const TargetTransformInfo *TTI; + AliasAnalysis *AA; + OptimizationRemarkEmitter *ORE; + DomTreeUpdater &DTU; + + // The below are shared state we want to have available when eliminating any + // calls in the function. There values should be populated by + // createTailRecurseLoopHeader the first time we find a call we can eliminate. + BasicBlock *HeaderBB = nullptr; + SmallVector ArgumentPHIs; + bool RemovableCallsMustBeMarkedTail = false; + + TailRecursionEliminator(Function &F, const TargetTransformInfo *TTI, + AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, + DomTreeUpdater &DTU) + : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {} + + CallInst *findTRECandidate(Instruction *TI, + bool CannotTailCallElimCallsMarkedTail) { + BasicBlock *BB = TI->getParent(); + + // Make sure there is something before the terminator. + if (&BB->front() == TI) + return nullptr; - // Scan backwards from the return, checking to see if there is a tail call in - // this block. If so, set CI to it. - CallInst *CI = nullptr; - BasicBlock::iterator BBI(TI); - while (true) { - CI = dyn_cast(BBI); - if (CI && CI->getCalledFunction() == F) - break; - - if (BBI == BB->begin()) - return nullptr; // Didn't find a potential tail call. - --BBI; - } + // Scan backwards from the return, checking to see if there is a tail call + // in this block. If so, set CI to it. + CallInst *CI = nullptr; + BasicBlock::iterator BBI(TI); + while (true) { + CI = dyn_cast(BBI); + if (CI && CI->getCalledFunction() == &F) + break; - // If this call is marked as a tail call, and if there are dynamic allocas in - // the function, we cannot perform this optimization. - if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) - return nullptr; + if (BBI == BB->begin()) + return nullptr; // Didn't find a potential tail call. + --BBI; + } - // As a special case, detect code like this: - // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call - // and disable this xform in this case, because the code generator will - // lower the call to fabs into inline code. - if (BB == &F->getEntryBlock() && - firstNonDbg(BB->front().getIterator()) == CI && - firstNonDbg(std::next(BB->begin())) == TI && CI->getCalledFunction() && - !TTI->isLoweredToCall(CI->getCalledFunction())) { - // A single-block function with just a call and a return. Check that - // the arguments match. - auto I = CI->arg_begin(), E = CI->arg_end(); - Function::arg_iterator FI = F->arg_begin(), - FE = F->arg_end(); - for (; I != E && FI != FE; ++I, ++FI) - if (*I != &*FI) break; - if (I == E && FI == FE) + // If this call is marked as a tail call, and if there are dynamic allocas + // in the function, we cannot perform this optimization. + if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) return nullptr; - } - return CI; -} - -static bool eliminateRecursiveTailCall( - CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry, - bool &TailCallsAreMarkedTail, SmallVectorImpl &ArgumentPHIs, - AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) { - // If we are introducing accumulator recursion to eliminate operations after - // the call instruction that are both associative and commutative, the initial - // value for the accumulator is placed in this variable. If this value is set - // then we actually perform accumulator recursion elimination instead of - // simple tail recursion elimination. If the operation is an LLVM instruction - // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then - // we are handling the case when the return instruction returns a constant C - // which is different to the constant returned by other return instructions - // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a - // special case of accumulator recursion, the operation being "return C". - Value *AccumulatorRecursionEliminationInitVal = nullptr; - Instruction *AccumulatorRecursionInstr = nullptr; - - // Ok, we found a potential tail call. We can currently only transform the - // tail call if all of the instructions between the call and the return are - // movable to above the call itself, leaving the call next to the return. - // Check that this is the case now. - BasicBlock::iterator BBI(CI); - for (++BBI; &*BBI != Ret; ++BBI) { - if (canMoveAboveCall(&*BBI, CI, AA)) - continue; - - // If we can't move the instruction above the call, it might be because it - // is an associative and commutative operation that could be transformed - // using accumulator recursion elimination. Check to see if this is the - // case, and if so, remember the initial accumulator value for later. - if ((AccumulatorRecursionEliminationInitVal = - canTransformAccumulatorRecursion(&*BBI, CI))) { - // Yes, this is accumulator recursion. Remember which instruction - // accumulates. - AccumulatorRecursionInstr = &*BBI; - } else { - return false; // Otherwise, we cannot eliminate the tail recursion! + // As a special case, detect code like this: + // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call + // and disable this xform in this case, because the code generator will + // lower the call to fabs into inline code. + if (BB == &F.getEntryBlock() && + firstNonDbg(BB->front().getIterator()) == CI && + firstNonDbg(std::next(BB->begin())) == TI && CI->getCalledFunction() && + !TTI->isLoweredToCall(CI->getCalledFunction())) { + // A single-block function with just a call and a return. Check that + // the arguments match. + auto I = CI->arg_begin(), E = CI->arg_end(); + Function::arg_iterator FI = F.arg_begin(), FE = F.arg_end(); + for (; I != E && FI != FE; ++I, ++FI) + if (*I != &*FI) + break; + if (I == E && FI == FE) + return nullptr; } - } - // We can only transform call/return pairs that either ignore the return value - // of the call and return void, ignore the value of the call and return a - // constant, return the value returned by the tail call, or that are being - // accumulator recursion variable eliminated. - if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && - !isa(Ret->getReturnValue()) && - AccumulatorRecursionEliminationInitVal == nullptr && - !getCommonReturnValue(nullptr, CI)) { - // One case remains that we are able to handle: the current return - // instruction returns a constant, and all other return instructions - // return a different constant. - if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret)) - return false; // Current return instruction does not return a constant. - // Check that all other return instructions return a common constant. If - // so, record it in AccumulatorRecursionEliminationInitVal. - AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI); - if (!AccumulatorRecursionEliminationInitVal) - return false; + return CI; } - BasicBlock *BB = Ret->getParent(); - Function *F = BB->getParent(); - - using namespace ore; - ORE->emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "tailcall-recursion", CI) - << "transforming tail recursion into loop"; - }); - - // OK! We can transform this tail call. If this is the first one found, - // create the new entry block, allowing us to branch back to the old entry. - if (!OldEntry) { - OldEntry = &F->getEntryBlock(); - BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry); - NewEntry->takeName(OldEntry); - OldEntry->setName("tailrecurse"); - BranchInst *BI = BranchInst::Create(OldEntry, NewEntry); + void createTailRecurseLoopHeader(CallInst *CI) { + HeaderBB = &F.getEntryBlock(); + BasicBlock *NewEntry = BasicBlock::Create(F.getContext(), "", &F, HeaderBB); + NewEntry->takeName(HeaderBB); + HeaderBB->setName("tailrecurse"); + BranchInst *BI = BranchInst::Create(HeaderBB, NewEntry); BI->setDebugLoc(CI->getDebugLoc()); + // If this function has self recursive calls in the tail position where some + // are marked tail and some are not, only transform one flavor or another. + // We have to choose whether we move allocas in the entry block to the new + // entry block or not, so we can't make a good choice for both. We make this + // decision here based on whether the first call we found to remove is + // marked tail. + // NOTE: We could do slightly better here in the case that the function has + // no entry block allocas. + RemovableCallsMustBeMarkedTail = CI->isTailCall(); + // If this tail call is marked 'tail' and if there are any allocas in the // entry block, move them up to the new entry block. - TailCallsAreMarkedTail = CI->isTailCall(); - if (TailCallsAreMarkedTail) - // Move all fixed sized allocas from OldEntry to NewEntry. - for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(), - NEBI = NewEntry->begin(); OEBI != E; ) + if (RemovableCallsMustBeMarkedTail) + // Move all fixed sized allocas from HeaderBB to NewEntry. + for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(), + NEBI = NewEntry->begin(); + OEBI != E;) if (AllocaInst *AI = dyn_cast(OEBI++)) if (isa(AI->getArraySize())) AI->moveBefore(&*NEBI); @@ -589,223 +547,293 @@ // block, insert a PHI node for each argument of the function. // For now, we initialize each PHI to only have the real arguments // which are passed in. - Instruction *InsertPos = &OldEntry->front(); - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) { + Instruction *InsertPos = &HeaderBB->front(); + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; + ++I) { PHINode *PN = PHINode::Create(I->getType(), 2, I->getName() + ".tr", InsertPos); I->replaceAllUsesWith(PN); // Everyone use the PHI node now! PN->addIncoming(&*I, NewEntry); ArgumentPHIs.push_back(PN); } - // The entry block was changed from OldEntry to NewEntry. + // The entry block was changed from HeaderBB to NewEntry. // The forward DominatorTree needs to be recalculated when the EntryBB is // changed. In this corner-case we recalculate the entire tree. DTU.recalculate(*NewEntry->getParent()); } - // If this function has self recursive calls in the tail position where some - // are marked tail and some are not, only transform one flavor or another. We - // have to choose whether we move allocas in the entry block to the new entry - // block or not, so we can't make a good choice for both. NOTE: We could do - // slightly better here in the case that the function has no entry block - // allocas. - if (TailCallsAreMarkedTail && !CI->isTailCall()) - return false; - - // Ok, now that we know we have a pseudo-entry block WITH all of the - // required PHI nodes, add entries into the PHI node for the actual - // parameters passed into the tail-recursive call. - for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) - ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB); - - // If we are introducing an accumulator variable to eliminate the recursion, - // do so now. Note that we _know_ that no subsequent tail recursion - // eliminations will happen on this function because of the way the - // accumulator recursion predicate is set up. - // - if (AccumulatorRecursionEliminationInitVal) { - Instruction *AccRecInstr = AccumulatorRecursionInstr; + PHINode *insertAccumulator(Value *AccumulatorRecursionEliminationInitVal) { // Start by inserting a new PHI node for the accumulator. - pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry); + pred_iterator PB = pred_begin(HeaderBB), PE = pred_end(HeaderBB); PHINode *AccPN = PHINode::Create( AccumulatorRecursionEliminationInitVal->getType(), - std::distance(PB, PE) + 1, "accumulator.tr", &OldEntry->front()); + std::distance(PB, PE) + 1, "accumulator.tr", &HeaderBB->front()); // Loop over all of the predecessors of the tail recursion block. For the // real entry into the function we seed the PHI with the initial value, // computed earlier. For any other existing branches to this block (due to // other tail recursions eliminated) the accumulator is not modified. - // Because we haven't added the branch in the current block to OldEntry yet, + // Because we haven't added the branch in the current block to HeaderBB yet, // it will not show up as a predecessor. for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; - if (P == &F->getEntryBlock()) + if (P == &F.getEntryBlock()) AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); else AccPN->addIncoming(AccPN, P); } - if (AccRecInstr) { - // Add an incoming argument for the current block, which is computed by - // our associative and commutative accumulator instruction. - AccPN->addIncoming(AccRecInstr, BB); + return AccPN; + } - // Next, rewrite the accumulator recursion instruction so that it does not - // use the result of the call anymore, instead, use the PHI node we just - // inserted. - AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); - } else { - // Add an incoming argument for the current block, which is just the - // constant returned by the current return instruction. - AccPN->addIncoming(Ret->getReturnValue(), BB); + bool eliminateCall(CallInst *CI) { + ReturnInst *Ret = cast(CI->getParent()->getTerminator()); + + // If we are introducing accumulator recursion to eliminate operations after + // the call instruction that are both associative and commutative, the + // initial value for the accumulator is placed in this variable. If this + // value is set then we actually perform accumulator recursion elimination + // instead of simple tail recursion elimination. If the operation is an + // LLVM instruction (eg: "add") then it is recorded in + // AccumulatorRecursionInstr. If not, then we are handling the case when + // the return instruction returns a constant C which is different to the + // constant returned by other return instructions (which is recorded in + // AccumulatorRecursionEliminationInitVal). This is a special case of + // accumulator recursion, the operation being "return C". + Value *AccumulatorRecursionEliminationInitVal = nullptr; + Instruction *AccumulatorRecursionInstr = nullptr; + + // Ok, we found a potential tail call. We can currently only transform the + // tail call if all of the instructions between the call and the return are + // movable to above the call itself, leaving the call next to the return. + // Check that this is the case now. + BasicBlock::iterator BBI(CI); + for (++BBI; &*BBI != Ret; ++BBI) { + if (canMoveAboveCall(&*BBI, CI, AA)) + continue; + + // If we can't move the instruction above the call, it might be because it + // is an associative and commutative operation that could be transformed + // using accumulator recursion elimination. Check to see if this is the + // case, and if so, remember the initial accumulator value for later. + if ((AccumulatorRecursionEliminationInitVal = + canTransformAccumulatorRecursion(&*BBI, CI))) { + // Yes, this is accumulator recursion. Remember which instruction + // accumulates. + AccumulatorRecursionInstr = &*BBI; + } else { + return false; // Otherwise, we cannot eliminate the tail recursion! + } } - // Finally, rewrite any return instructions in the program to return the PHI - // node instead of the "initval" that they do currently. This loop will - // actually rewrite the return value we are destroying, but that's ok. - for (BasicBlock &BBI : *F) - if (ReturnInst *RI = dyn_cast(BBI.getTerminator())) - RI->setOperand(0, AccPN); - ++NumAccumAdded; - } + // We can only transform call/return pairs that either ignore the return + // value of the call and return void, ignore the value of the call and + // return a constant, return the value returned by the tail call, or that + // are being accumulator recursion variable eliminated. + if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && + !isa(Ret->getReturnValue()) && + AccumulatorRecursionEliminationInitVal == nullptr && + !getCommonReturnValue(nullptr, CI)) { + // One case remains that we are able to handle: the current return + // instruction returns a constant, and all other return instructions + // return a different constant. + if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret)) + return false; // Current return instruction does not return a constant. + // Check that all other return instructions return a common constant. If + // so, record it in AccumulatorRecursionEliminationInitVal. + AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI); + if (!AccumulatorRecursionEliminationInitVal) + return false; + } - // Now that all of the PHI nodes are in place, remove the call and - // ret instructions, replacing them with an unconditional branch. - BranchInst *NewBI = BranchInst::Create(OldEntry, Ret); - NewBI->setDebugLoc(CI->getDebugLoc()); + BasicBlock *BB = Ret->getParent(); - BB->getInstList().erase(Ret); // Remove return. - BB->getInstList().erase(CI); // Remove call. - DTU.applyUpdates({{DominatorTree::Insert, BB, OldEntry}}); - ++NumEliminated; - return true; -} + using namespace ore; + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "tailcall-recursion", CI) + << "transforming tail recursion into loop"; + }); -static bool foldReturnAndProcessPred( - BasicBlock *BB, ReturnInst *Ret, BasicBlock *&OldEntry, - bool &TailCallsAreMarkedTail, SmallVectorImpl &ArgumentPHIs, - bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI, - AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) { - bool Change = false; - - // Make sure this block is a trivial return block. - assert(BB->getFirstNonPHIOrDbg() == Ret && - "Trying to fold non-trivial return block"); - - // If the return block contains nothing but the return and PHI's, - // there might be an opportunity to duplicate the return in its - // predecessors and perform TRE there. Look for predecessors that end - // in unconditional branch and recursive call(s). - SmallVector UncondBranchPreds; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *Pred = *PI; - Instruction *PTI = Pred->getTerminator(); - if (BranchInst *BI = dyn_cast(PTI)) - if (BI->isUnconditional()) - UncondBranchPreds.push_back(BI); - } + // OK! We can transform this tail call. If this is the first one found, + // create the new entry block, allowing us to branch back to the old entry. + if (!HeaderBB) + createTailRecurseLoopHeader(CI); - while (!UncondBranchPreds.empty()) { - BranchInst *BI = UncondBranchPreds.pop_back_val(); - BasicBlock *Pred = BI->getParent(); - if (CallInst *CI = findTRECandidate(BI, CannotTailCallElimCallsMarkedTail, TTI)){ - LLVM_DEBUG(dbgs() << "FOLDING: " << *BB - << "INTO UNCOND BRANCH PRED: " << *Pred); - ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU); - - // Cleanup: if all predecessors of BB have been eliminated by - // FoldReturnIntoUncondBranch, delete it. It is important to empty it, - // because the ret instruction in there is still using a value which - // eliminateRecursiveTailCall will attempt to remove. - if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) - DTU.deleteBB(BB); - - eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, AA, ORE, DTU); - ++NumRetDuped; - Change = true; - } - } + if (RemovableCallsMustBeMarkedTail && !CI->isTailCall()) + return false; - return Change; -} + // Ok, now that we know we have a pseudo-entry block WITH all of the + // required PHI nodes, add entries into the PHI node for the actual + // parameters passed into the tail-recursive call. + for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) + ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB); -static bool processReturningBlock( - ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, - SmallVectorImpl &ArgumentPHIs, - bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI, - AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) { - CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI); - if (!CI) - return false; + // If we are introducing an accumulator variable to eliminate the recursion, + // do so now. Note that we _know_ that no subsequent tail recursion + // eliminations will happen on this function because of the way the + // accumulator recursion predicate is set up. + // + if (AccumulatorRecursionEliminationInitVal) { + PHINode *AccPN = + insertAccumulator(AccumulatorRecursionEliminationInitVal); + + Instruction *AccRecInstr = AccumulatorRecursionInstr; + if (AccRecInstr) { + // Add an incoming argument for the current block, which is computed by + // our associative and commutative accumulator instruction. + AccPN->addIncoming(AccRecInstr, BB); + + // Next, rewrite the accumulator recursion instruction so that it does + // not use the result of the call anymore, instead, use the PHI node we + // just inserted. + AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); + } else { + // Add an incoming argument for the current block, which is just the + // constant returned by the current return instruction. + AccPN->addIncoming(Ret->getReturnValue(), BB); + } - return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, AA, ORE, DTU); -} + // Finally, rewrite any return instructions in the program to return the + // PHI node instead of the "initval" that they do currently. This loop + // will actually rewrite the return value we are destroying, but that's + // ok. + for (BasicBlock &BBI : F) + if (ReturnInst *RI = dyn_cast(BBI.getTerminator())) + RI->setOperand(0, AccPN); + ++NumAccumAdded; + } -static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI, - AliasAnalysis *AA, - OptimizationRemarkEmitter *ORE, - DomTreeUpdater &DTU) { - if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true") - return false; + // Now that all of the PHI nodes are in place, remove the call and + // ret instructions, replacing them with an unconditional branch. + BranchInst *NewBI = BranchInst::Create(HeaderBB, Ret); + NewBI->setDebugLoc(CI->getDebugLoc()); - bool MadeChange = false; - bool AllCallsAreTailCalls = false; - MadeChange |= markTails(F, AllCallsAreTailCalls, ORE); - if (!AllCallsAreTailCalls) - return MadeChange; + BB->getInstList().erase(Ret); // Remove return. + BB->getInstList().erase(CI); // Remove call. + DTU.applyUpdates({{DominatorTree::Insert, BB, HeaderBB}}); + ++NumEliminated; + return true; + } - // If this function is a varargs function, we won't be able to PHI the args - // right, so don't even try to convert it... - if (F.getFunctionType()->isVarArg()) - return false; + bool foldReturnAndProcessPred(BasicBlock *BB, ReturnInst *Ret, + bool CannotTailCallElimCallsMarkedTail) { + bool Change = false; + + // Make sure this block is a trivial return block. + assert(BB->getFirstNonPHIOrDbg() == Ret && + "Trying to fold non-trivial return block"); + + // If the return block contains nothing but the return and PHI's, + // there might be an opportunity to duplicate the return in its + // predecessors and perform TRE there. Look for predecessors that end + // in unconditional branch and recursive call(s). + SmallVector UncondBranchPreds; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *Pred = *PI; + Instruction *PTI = Pred->getTerminator(); + if (BranchInst *BI = dyn_cast(PTI)) + if (BI->isUnconditional()) + UncondBranchPreds.push_back(BI); + } - BasicBlock *OldEntry = nullptr; - bool TailCallsAreMarkedTail = false; - SmallVector ArgumentPHIs; - - // If false, we cannot perform TRE on tail calls marked with the 'tail' - // attribute, because doing so would cause the stack size to increase (real - // TRE would deallocate variable sized allocas, TRE doesn't). - bool CanTRETailMarkedCall = canTRE(F); - - // Change any tail recursive calls to loops. - // - // FIXME: The code generator produces really bad code when an 'escaping - // alloca' is changed from being a static alloca to being a dynamic alloca. - // Until this is resolved, disable this transformation if that would ever - // happen. This bug is PR962. - for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) { - BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB. - if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { - bool Change = processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, !CanTRETailMarkedCall, - TTI, AA, ORE, DTU); - if (!Change && BB->getFirstNonPHIOrDbg() == Ret) - Change = foldReturnAndProcessPred( - BB, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, - !CanTRETailMarkedCall, TTI, AA, ORE, DTU); - MadeChange |= Change; + while (!UncondBranchPreds.empty()) { + BranchInst *BI = UncondBranchPreds.pop_back_val(); + BasicBlock *Pred = BI->getParent(); + if (CallInst *CI = + findTRECandidate(BI, CannotTailCallElimCallsMarkedTail)) { + LLVM_DEBUG(dbgs() << "FOLDING: " << *BB + << "INTO UNCOND BRANCH PRED: " << *Pred); + FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU); + + // Cleanup: if all predecessors of BB have been eliminated by + // FoldReturnIntoUncondBranch, delete it. It is important to empty it, + // because the ret instruction in there is still using a value which + // eliminateRecursiveTailCall will attempt to remove. + if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) + DTU.deleteBB(BB); + + eliminateCall(CI); + ++NumRetDuped; + Change = true; + } } + + return Change; + } + + bool processReturningBlock(ReturnInst *Ret, + bool CannotTailCallElimCallsMarkedTail) { + CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail); + if (!CI) + return false; + + return eliminateCall(CI); } - // If we eliminated any tail recursions, it's possible that we inserted some - // silly PHI nodes which just merge an initial value (the incoming operand) - // with themselves. Check to see if we did and clean up our mess if so. This - // occurs when a function passes an argument straight through to its tail - // call. - for (PHINode *PN : ArgumentPHIs) { - // If the PHI Node is a dynamic constant, replace it with the value it is. - if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) { - PN->replaceAllUsesWith(PNV); - PN->eraseFromParent(); + void cleanupAndFinalize() { + // If we eliminated any tail recursions, it's possible that we inserted some + // silly PHI nodes which just merge an initial value (the incoming operand) + // with themselves. Check to see if we did and clean up our mess if so. + // This occurs when a function passes an argument straight through to its + // tail call. + for (PHINode *PN : ArgumentPHIs) { + // If the PHI Node is a dynamic constant, replace it with the value it is. + if (Value *PNV = + SimplifyInstruction(PN, F.getParent()->getDataLayout())) { + PN->replaceAllUsesWith(PNV); + PN->eraseFromParent(); + } } } - return MadeChange; -} +public: + static bool eliminate(Function &F, const TargetTransformInfo *TTI, + AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, + DomTreeUpdater &DTU) { + if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true") + return false; + + bool MadeChange = false; + bool AllCallsAreTailCalls = false; + MadeChange |= markTails(F, AllCallsAreTailCalls, ORE); + if (!AllCallsAreTailCalls) + return MadeChange; + + // If this function is a varargs function, we won't be able to PHI the args + // right, so don't even try to convert it... + if (F.getFunctionType()->isVarArg()) + return false; + + // If false, we cannot perform TRE on tail calls marked with the 'tail' + // attribute, because doing so would cause the stack size to increase (real + // TRE would deallocate variable sized allocas, TRE doesn't). + bool CanTRETailMarkedCall = canTRE(F); + + TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU); + + // Change any tail recursive calls to loops. + // + // FIXME: The code generator produces really bad code when an 'escaping + // alloca' is changed from being a static alloca to being a dynamic alloca. + // Until this is resolved, disable this transformation if that would ever + // happen. This bug is PR962. + for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; + /*in loop*/) { + BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB. + if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { + bool Change = TRE.processReturningBlock(Ret, !CanTRETailMarkedCall); + if (!Change && BB->getFirstNonPHIOrDbg() == Ret) + Change = TRE.foldReturnAndProcessPred(BB, Ret, !CanTRETailMarkedCall); + MadeChange |= Change; + } + } + + TRE.cleanupAndFinalize(); + + return MadeChange; + } +}; +} // namespace namespace { struct TailCallElim : public FunctionPass { @@ -836,7 +864,7 @@ // UpdateStrategy to Lazy if we find it profitable later. DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager); - return eliminateTailRecursion( + return TailRecursionEliminator::eliminate( F, &getAnalysis().getTTI(F), &getAnalysis().getAAResults(), &getAnalysis().getORE(), DTU); @@ -869,7 +897,7 @@ // UpdateStrategy based on some test results. It is feasible to switch the // UpdateStrategy to Lazy if we find it profitable later. DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager); - bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE, DTU); + bool Changed = TailRecursionEliminator::eliminate(F, &TTI, &AA, &ORE, DTU); if (!Changed) return PreservedAnalyses::all();