Index: llvm/trunk/include/llvm/Transforms/Utils/BasicBlockUtils.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ llvm/trunk/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -229,7 +229,8 @@ /// value defined by a PHI, propagate the right value into the return. It /// returns the new return instruction in the predecessor. ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, - BasicBlock *Pred); + BasicBlock *Pred, + DomTreeUpdater *DTU = nullptr); /// Split the containing block at the specified instruction - everything before /// SplitBefore stays in the old basic block, and the rest of the instructions Index: llvm/trunk/lib/Transforms/Scalar/TailRecursionElimination.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ llvm/trunk/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -61,6 +61,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" @@ -68,6 +69,8 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DomTreeUpdater.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -488,12 +491,10 @@ return CI; } -static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, - BasicBlock *&OldEntry, - bool &TailCallsAreMarkedTail, - SmallVectorImpl &ArgumentPHIs, - AliasAnalysis *AA, - OptimizationRemarkEmitter *ORE) { +static bool eliminateRecursiveTailCall( + CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, SmallVectorImpl &ArgumentPHIs, + AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) { // If we are introducing accumulator recursion to eliminate operations after // the call instruction that are both associative and commutative, the initial // value for the accumulator is placed in this variable. If this value is set @@ -593,6 +594,10 @@ PN->addIncoming(&*I, NewEntry); ArgumentPHIs.push_back(PN); } + // The entry block was changed from OldEntry to NewEntry. + // The forward DominatorTree needs to be recalculated when the EntryBB is + // changed. In this corner-case we recalculate the entire tree. + DTU.recalculate(*NewEntry->getParent()); } // If this function has self recursive calls in the tail position where some @@ -668,6 +673,7 @@ BB->getInstList().erase(Ret); // Remove return. BB->getInstList().erase(CI); // Remove call. + DTU.insertEdge(BB, OldEntry); ++NumEliminated; return true; } @@ -676,7 +682,7 @@ BasicBlock *BB, ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, SmallVectorImpl &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI, - AliasAnalysis *AA, OptimizationRemarkEmitter *ORE) { + AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) { bool Change = false; // Make sure this block is a trivial return block. @@ -702,17 +708,17 @@ if (CallInst *CI = findTRECandidate(BI, CannotTailCallElimCallsMarkedTail, TTI)){ LLVM_DEBUG(dbgs() << "FOLDING: " << *BB << "INTO UNCOND BRANCH PRED: " << *Pred); - ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred); + ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU); // Cleanup: if all predecessors of BB have been eliminated by // FoldReturnIntoUncondBranch, delete it. It is important to empty it, // because the ret instruction in there is still using a value which // eliminateRecursiveTailCall will attempt to remove. if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) - BB->eraseFromParent(); + DTU.deleteBB(BB); eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, AA, ORE); + ArgumentPHIs, AA, ORE, DTU); ++NumRetDuped; Change = true; } @@ -721,24 +727,23 @@ return Change; } -static bool processReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, - bool &TailCallsAreMarkedTail, - SmallVectorImpl &ArgumentPHIs, - bool CannotTailCallElimCallsMarkedTail, - const TargetTransformInfo *TTI, - AliasAnalysis *AA, - OptimizationRemarkEmitter *ORE) { +static bool processReturningBlock( + ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, + SmallVectorImpl &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI, + AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) { CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI); if (!CI) return false; return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, AA, ORE); + ArgumentPHIs, AA, ORE, DTU); } static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI, AliasAnalysis *AA, - OptimizationRemarkEmitter *ORE) { + OptimizationRemarkEmitter *ORE, + DomTreeUpdater &DTU) { if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true") return false; @@ -773,11 +778,11 @@ if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { bool Change = processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, !CanTRETailMarkedCall, - TTI, AA, ORE); + TTI, AA, ORE, DTU); if (!Change && BB->getFirstNonPHIOrDbg() == Ret) - Change = foldReturnAndProcessPred(BB, Ret, OldEntry, - TailCallsAreMarkedTail, ArgumentPHIs, - !CanTRETailMarkedCall, TTI, AA, ORE); + Change = foldReturnAndProcessPred( + BB, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, + !CanTRETailMarkedCall, TTI, AA, ORE, DTU); MadeChange |= Change; } } @@ -810,16 +815,27 @@ AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); } bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; + auto *DTWP = getAnalysisIfAvailable(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *PDTWP = getAnalysisIfAvailable(); + auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; + // There is no noticable performance difference here between Lazy and Eager + // UpdateStrategy based on some test results. It is feasible to switch the + // UpdateStrategy to Lazy if we find it profitable later. + DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager); + return eliminateTailRecursion( F, &getAnalysis().getTTI(F), &getAnalysis().getAAResults(), - &getAnalysis().getORE()); + &getAnalysis().getORE(), DTU); } }; } @@ -843,12 +859,19 @@ TargetTransformInfo &TTI = AM.getResult(F); AliasAnalysis &AA = AM.getResult(F); auto &ORE = AM.getResult(F); - - bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE); + auto *DT = AM.getCachedResult(F); + auto *PDT = AM.getCachedResult(F); + // There is no noticable performance difference here between Lazy and Eager + // UpdateStrategy based on some test results. It is feasible to switch the + // UpdateStrategy to Lazy if we find it profitable later. + DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager); + bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE, DTU); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); + PA.preserve(); + PA.preserve(); return PA; } Index: llvm/trunk/lib/Transforms/Utils/BasicBlockUtils.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/BasicBlockUtils.cpp +++ llvm/trunk/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -646,7 +646,8 @@ } ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, - BasicBlock *Pred) { + BasicBlock *Pred, + DomTreeUpdater *DTU) { Instruction *UncondBranch = Pred->getTerminator(); // Clone the return and add it to the end of the predecessor. Instruction *NewRet = RI->clone(); @@ -680,6 +681,10 @@ // longer branch to them. BB->removePredecessor(Pred); UncondBranch->eraseFromParent(); + + if (DTU) + DTU->deleteEdge(Pred, BB); + return cast(NewRet); } Index: llvm/trunk/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll =================================================================== --- llvm/trunk/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll +++ llvm/trunk/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s ; PR7328 ; PR7506 define i32 @foo(i32 %x) { Index: llvm/trunk/test/Transforms/TailCallElim/EraseBB.ll =================================================================== --- llvm/trunk/test/Transforms/TailCallElim/EraseBB.ll +++ llvm/trunk/test/Transforms/TailCallElim/EraseBB.ll @@ -1,4 +1,4 @@ -; RUN: opt -tailcallelim -S < %s 2>&1 | FileCheck %s +; RUN: opt -tailcallelim -verify-dom-info -S < %s 2>&1 | FileCheck %s ; CHECK: add nsw i32 ; CHECK-NEXT: br label Index: llvm/trunk/test/Transforms/TailCallElim/accum_recursion.ll =================================================================== --- llvm/trunk/test/Transforms/TailCallElim/accum_recursion.ll +++ llvm/trunk/test/Transforms/TailCallElim/accum_recursion.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s -; RUN: opt < %s -passes=tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s define i32 @test1_factorial(i32 %x) { entry: Index: llvm/trunk/test/Transforms/TailCallElim/ackermann.ll =================================================================== --- llvm/trunk/test/Transforms/TailCallElim/ackermann.ll +++ llvm/trunk/test/Transforms/TailCallElim/ackermann.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; This function contains two tail calls, which should be eliminated -; RUN: opt < %s -tailcallelim -stats -disable-output 2>&1 | grep "2 tailcallelim" +; RUN: opt < %s -tailcallelim -verify-dom-info -stats -disable-output 2>&1 | grep "2 tailcallelim" define i32 @Ack(i32 %M.1, i32 %N.1) { entry: Index: llvm/trunk/test/Transforms/TailCallElim/basic.ll =================================================================== --- llvm/trunk/test/Transforms/TailCallElim/basic.ll +++ llvm/trunk/test/Transforms/TailCallElim/basic.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s declare void @noarg() declare void @use(i32*) Index: llvm/trunk/test/Transforms/TailCallElim/deopt-bundle.ll =================================================================== --- llvm/trunk/test/Transforms/TailCallElim/deopt-bundle.ll +++ llvm/trunk/test/Transforms/TailCallElim/deopt-bundle.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s define i32 @f_1(i32 %x) { ; CHECK-LABEL: @f_1( Index: llvm/trunk/test/Transforms/TailCallElim/dont_reorder_load.ll =================================================================== --- llvm/trunk/test/Transforms/TailCallElim/dont_reorder_load.ll +++ llvm/trunk/test/Transforms/TailCallElim/dont_reorder_load.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | grep call | count 4 +; RUN: opt < %s -tailcallelim -verify-dom-info -S | grep call | count 4 ; PR4323 ; Several cases where tail call elimination should not move the load above the Index: llvm/trunk/test/Transforms/TailCallElim/dup_tail.ll =================================================================== --- llvm/trunk/test/Transforms/TailCallElim/dup_tail.ll +++ llvm/trunk/test/Transforms/TailCallElim/dup_tail.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; Duplicate the return into if.end to enable TCE. -; RUN: opt -tailcallelim -stats -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -tailcallelim -verify-dom-info -stats -disable-output < %s 2>&1 | FileCheck %s ; CHECK: Number of return duplicated Index: llvm/trunk/test/Transforms/TailCallElim/inf-recursion.ll =================================================================== --- llvm/trunk/test/Transforms/TailCallElim/inf-recursion.ll +++ llvm/trunk/test/Transforms/TailCallElim/inf-recursion.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s ; Don't turn this into an infinite loop, this is probably the implementation ; of fabs and we expect the codegen to lower fabs. Index: llvm/trunk/test/Transforms/TailCallElim/notail.ll =================================================================== --- llvm/trunk/test/Transforms/TailCallElim/notail.ll +++ llvm/trunk/test/Transforms/TailCallElim/notail.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s ; CHECK: tail call void @callee0() ; CHECK: notail call void @callee1() Index: llvm/trunk/test/Transforms/TailCallElim/opt-remarks-recursion.ll =================================================================== --- llvm/trunk/test/Transforms/TailCallElim/opt-remarks-recursion.ll +++ llvm/trunk/test/Transforms/TailCallElim/opt-remarks-recursion.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -tailcallelim -pass-remarks=tailcallelim -o /dev/null 2>&1 | FileCheck %s +; RUN: opt %s -tailcallelim -verify-dom-info -pass-remarks=tailcallelim -o /dev/null 2>&1 | FileCheck %s ; RUN: opt %s -o /dev/null -passes='require,tailcallelim' -pass-remarks=tailcallelim 2>&1 | FileCheck %s ; CHECK: /home/davide/pat.c:2:20: transforming tail recursion into loop Index: llvm/trunk/test/Transforms/TailCallElim/reorder_load.ll =================================================================== --- llvm/trunk/test/Transforms/TailCallElim/reorder_load.ll +++ llvm/trunk/test/Transforms/TailCallElim/reorder_load.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s ; PR4323 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: llvm/trunk/test/Transforms/TailCallElim/setjmp.ll =================================================================== --- llvm/trunk/test/Transforms/TailCallElim/setjmp.ll +++ llvm/trunk/test/Transforms/TailCallElim/setjmp.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s ; Test that we don't tail call in a functions that calls returns_twice ; functions.